[Scipy-svn] r5169 - trunk/scipy/cluster

scipy-svn at scipy.org scipy-svn at scipy.org
Sat Nov 22 14:09:37 EST 2008


Author: damian.eads
Date: 2008-11-22 13:09:35 -0600 (Sat, 22 Nov 2008)
New Revision: 5169

Modified:
   trunk/scipy/cluster/hierarchy.py
Log:
RSTifying more hierarchy docs.

Modified: trunk/scipy/cluster/hierarchy.py
===================================================================
--- trunk/scipy/cluster/hierarchy.py	2008-11-22 18:38:52 UTC (rev 5168)
+++ trunk/scipy/cluster/hierarchy.py	2008-11-22 19:09:35 UTC (rev 5169)
@@ -699,7 +699,7 @@
 
         :Returns:
            right : ClusterNode
-                  The left child of the target node.
+                   The left child of the target node.
         """
         return self.right
 
@@ -1696,7 +1696,7 @@
 def set_link_color_palette(palette):
     """
     Changes the list of matplotlib color codes to use when coloring
-    links with the dendrogram colorthreshold feature.
+    links with the dendrogram color_threshold feature.
 
     :Arguments:
         - palette : A list of matplotlib color codes. The order of
@@ -1716,205 +1716,224 @@
         _link_line_colors.remove(i)
     _link_line_colors.extend(list(palette))
 
-def dendrogram(Z, p=30, truncate_mode=None, colorthreshold=None,
+def dendrogram(Z, p=30, truncate_mode=None, color_threshold=None,
                get_leaves=True, orientation='top', labels=None,
                count_sort=False, distance_sort=False, show_leaf_counts=True,
                no_plot=False, no_labels=False, color_list=None,
                leaf_font_size=None, leaf_rotation=None, leaf_label_func=None,
                no_leaves=False, show_contracted=False,
                link_color_func=None):
-    """
-    R = dendrogram(Z)
+    r"""
+    Plots the hiearchical clustering defined by the linkage Z as a
+    dendrogram. The dendrogram illustrates how each cluster is
+    composed by drawing a U-shaped link between a non-singleton
+    cluster and its children. The height of the top of the U-link is
+    the distance between its children clusters. It is also the
+    cophenetic distance between original observations in the two
+    children clusters. It is expected that the distances in Z[:,2] be
+    monotonic, otherwise crossings appear in the dendrogram.
 
-      Plots the hiearchical clustering defined by the linkage Z as a
-      dendrogram. The dendrogram illustrates how each cluster is
-      composed by drawing a U-shaped link between a non-singleton
-      cluster and its children. The height of the top of the U-link
-      is the distance between its children clusters. It is also the
-      cophenetic distance between original observations in the
-      two children clusters. It is expected that the distances in
-      Z[:,2] be monotonic, otherwise crossings appear in the
-      dendrogram.
+    :Arguments:
 
-      R is a dictionary of the data structures computed to render the
-      dendrogram. Its keys are:
+      - Z : ndarray
+        The linkage matrix encoding the hierarchical clustering to
+        render as a dendrogram. See the ``linkage`` function for more
+        information on the format of ``Z``.
 
-         'icoords': a list of lists [I1, I2, ..., Ip] where Ik is a
-         list of 4 independent variable coordinates corresponding to
-         the line that represents the k'th link painted.
+      - truncate_mode : string
+        The dendrogram can be hard to read when the original
+        observation matrix from which the linkage is derived is
+        large. Truncation is used to condense the dendrogram. There
+        are several modes:
 
-         'dcoords': a list of lists [I2, I2, ..., Ip] where Ik is a
-         list of 4 independent variable coordinates corresponding to
-         the line that represents the k'th link painted.
+           * None/'none': no truncation is performed (Default)
 
-         'ivl': a list of labels corresponding to the leaf nodes
+           * 'lastp': the last ``p`` non-singleton formed in the linkage
+           are the only non-leaf nodes in the linkage; they correspond
+           to to rows ``Z[n-p-2:end]`` in ``Z``. All other
+           non-singleton clusters are contracted into leaf nodes.
 
-    R = dendrogram(..., truncate_mode, p)
+           * 'mlab': This corresponds to MATLAB(TM) behavior. (not
+           implemented yet)
 
-      The dendrogram can be hard to read when the original observation
-      matrix from which the linkage is derived is large. Truncation
-      is used to condense the dendrogram. There are several modes:
+           * 'level'/'mtica': no more than ``p`` levels of the
+           dendrogram tree are displayed. This corresponds to
+           Mathematica(TM) behavior.
 
-       * None/'none': no truncation is performed
+       - p : int
+         The ``p`` parameter for ``truncate_mode``.
+`
+       - color_threshold : double
+         For brevity, let :math:`t` be the ``color_threshold``.
+         Colors all the descendent links below a cluster node
+         :math:`k` the same color if :math:`k` is the first node below
+         the cut threshold :math:`t`. All links connecting nodes with
+         distances greater than or equal to the threshold are colored
+         blue. If :math:`t` is less than or equal to zero, all nodes
+         are colored blue. If ``color_threshold`` is ``None`` or
+         'default', corresponding with MATLAB(TM) behavior, the
+         threshold is set to ``0.7*max(Z[:,2])``.
 
-       * 'lastp': the last p non-singleton formed in the linkage are
-       the only non-leaf nodes in the linkage; they correspond to
-       to rows Z[n-p-2:end] in Z. All other non-singleton clusters
-       are contracted into leaf nodes.
+       - get_leaves : bool
+         Includes a list ``R['leaves']=H`` in the result
+         dictionary. For each :math:`i`, ``H[i] == j``, cluster node
+         :math:`j` appears in the :math:`i` th position in the
+         left-to-right traversal of the leaves, where :math:`j < 2n-1`
+         and :math:`i < n`.
 
-       * 'mlab': This corresponds to MATLAB(TM) behavior. (not implemented yet)
+       - orientation : string
+         The direction to plot the dendrogram, which can be any
+         of the following strings
 
-       * 'level'/'mtica': no more than p levels of the dendrogram tree
-       are displayed. This corresponds to Mathematica(TM) behavior.
+           * 'top': plots the root at the top, and plot descendent
+           links going downwards. (default).
 
-    R = dendrogram(..., colorthreshold=t)
+           * 'bottom': plots the root at the bottom, and plot descendent
+           links going upwards.
 
-      Colors all the descendent links below a cluster node k the same color
-      if k is the first node below the cut threshold t. All links connecting
-      nodes with distances greater than or equal to the threshold are
-      colored blue. If t is less than or equal to zero, all nodes
-      are colored blue. If t is None or 'default', corresponding with
-      MATLAB(TM) behavior, the threshold is set to 0.7*max(Z[:,2]).
+           * 'left': plots the root at the left, and plot descendent
+           links going right.
 
-    R = dendrogram(..., get_leaves=True)
+           * 'right': plots the root at the right, and plot descendent
+           links going left.
 
-      Includes a list R['leaves']=H in the result dictionary. For each i,
-      H[i] == j, cluster node j appears in the i'th position in the
-      left-to-right traversal of the leaves, where j < 2n-1 and i < n.
+       - labels : ndarray
+         By default ``labels`` is ``None`` so the index of the
+         original observation is used to label the leaf nodes.
 
-    R = dendrogram(..., orientation)
+         Otherwise, this is an :math:`n` -sized list (or tuple). The
+         ``labels[i]`` value is the text to put under the :math:`i` th
+         leaf node only if it corresponds to an original observation
+         and not a non-singleton cluster.
 
-      Plots the dendrogram in a particular direction. The orientation
-      parameter can be any of:
+       - count_sort : string/bool
+         For each node n, the order (visually, from left-to-right) n's
+         two descendent links are plotted is determined by this
+         parameter, which can be any of the following values:
 
-        * 'top': plots the root at the top, and plot descendent
-          links going downwards. (default).
+            * False: nothing is done.
 
-        * 'bottom': plots the root at the bottom, and plot descendent
-          links going upwards.
+            * 'ascending'/True: the child with the minimum number of
+            original objects in its cluster is plotted first.
 
-        * 'left': plots the root at the left, and plot descendent
-          links going right.
+            * 'descendent': the child with the maximum number of
+            original objects in its cluster is plotted first.
 
-        * 'right': plots the root at the right, and plot descendent
-          links going left.
+         Note ``distance_sort`` and ``count_sort`` cannot both be
+         ``True``.
 
-    R = dendrogram(..., labels=None)
+       - distance_sort : string/bool
+         For each node n, the order (visually, from left-to-right) n's
+         two descendent links are plotted is determined by this
+         parameter, which can be any of the following values:
 
-        The labels parameter is a n-sized list (or tuple). The labels[i]
-        value is the text to put under the i'th leaf node only if it
-        corresponds to an original observation and not a non-singleton
-        cluster.
+            * False: nothing is done.
 
-        When labels=None, the index of the original observation is used
-        used.
+            * 'ascending'/True: the child with the minimum distance
+            between its direct descendents is plotted first.
 
-    R = dendrogram(..., count_sort)
+            * 'descending': the child with the maximum distance
+            between its direct descendents is plotted first.
 
-        When plotting a cluster node and its directly descendent links,
-        the order the two descendent links and their descendents are
-        plotted is determined by the count_sort parameter. Valid values
-        of count_sort are:
+         Note ``distance_sort`` and ``count_sort`` cannot both be
+         ``True``.
 
-          * False: nothing is done.
+       - show_leaf_counts : bool
 
-          * 'ascending'/True: the child with the minimum number of
-          original objects in its cluster is plotted first.
+         When ``True``, leaf nodes representing :math:`k>1` original
+         observation are labeled with the number of observations they
+         contain in parentheses.
 
-          * 'descendent': the child with the maximum number of
-          original objects in its cluster is plotted first.
+       - no_plot : bool
+         When ``True``, the final rendering is not performed. This is
+         useful if only the data structures computed for the rendering
+         are needed or if matplotlib is not available.
 
-    R = dendrogram(..., distance_sort)
+       - no_labels : bool
+         When ``True``, no labels appear next to the leaf nodes in the
+         rendering of the dendrogram.
 
-        When plotting a cluster node and its directly descendent links,
-        the order the two descendent links and their descendents are
-        plotted is determined by the distance_sort parameter. Valid
-        values of count_sort are:
+       - leaf_label_rotation : double
 
-          * False: nothing is done.
+         Specifies the angle (in degrees) to rotate the leaf
+         labels. When unspecified, the rotation based on the number of
+         nodes in the dendrogram. (Default=0)
 
-          * 'ascending'/True: the child with the minimum distance
-          between its direct descendents is plotted first.
+       - leaf_font_size : int
+         Specifies the font size (in points) of the leaf labels. When
+         unspecified, the size based on the number of nodes in the
+         dendrogram.
 
-          * 'descending': the child with the maximum distance
-          between its direct descendents is plotted first.
+       - leaf_label_func : lambda or function
 
-        Note that either count_sort or distance_sort must be False.
+         When leaf_label_func is a callable function, for each
+         leaf with cluster index :math:`k < 2n-1`. The function
+         is expected to return a string with the label for the
+         leaf.
 
-    R = dendrogram(..., show_leaf_counts)
+         Indices :math:`k < n` correspond to original observations
+         while indices :math:`k \geq n` correspond to non-singleton
+         clusters.
 
-        When show_leaf_counts=True, leaf nodes representing k>1
-        original observation are labeled with the number of observations
-        they contain in parentheses.
+         For example, to label singletons with their node id and
+         non-singletons with their id, count, and inconsistency
+         coefficient, simply do::
 
-    R = dendrogram(..., no_plot)
+           # First define the leaf label function.
+           def llf(id):
+               if id < n:
+                   return str(id)
+               else:
+                   return '[%d %d %1.2f]' % (id, count, R[n-id,3])
 
-        When no_plot=True, the final rendering is not performed. This is
-        useful if only the data structures computed for the rendering
-        are needed or if matplotlib is not available.
+           # The text for the leaf nodes is going to be big so force
+           # a rotation of 90 degrees.
+           dendrogram(Z, leaf_label_func=llf, leaf_rotation=90)
 
-    R = dendrogram(..., no_labels)
+       - show_contracted : bool
+         When ``True`` the heights of non-singleton nodes contracted
+         into a leaf node are plotted as crosses along the link
+         connecting that leaf node.  This really is only useful when
+         truncation is used (see ``truncate_mode`` parameter).
 
-        When no_labels=True, no labels appear next to the leaf nodes in
-        the rendering of the dendrogram.
+       - link_color_func : lambda/function When a callable function,
+         link_color_function is called with each non-singleton id
+         corresponding to each U-shaped link it will paint. The
+         function is expected to return the color to paint the link,
+         encoded as a matplotlib color string code.
 
-    R = dendrogram(..., leaf_label_rotation):
+         For example::
 
-        Specifies the angle to which the leaf labels are rotated. When
-        unspecified, the rotation based on the number of nodes in the
-        dendrogram.
+           dendrogram(Z, link_color_func=lambda k: colors[k])
 
-    R = dendrogram(..., leaf_font_size):
+         colors the direct links below each untruncated non-singleton node
+         ``k`` using ``colors[k]``.
 
-        Specifies the font size in points of the leaf labels. When
-        unspecified, the size  based on the number of nodes
-        in the dendrogram.
+    :Returns:
 
+       - R : dict
+         A dictionary of data structures computed to render the
+         dendrogram. Its has the following keys:
 
-    R = dendrogram(..., leaf_label_func)
+           - 'icoords': a list of lists ``[I1, I2, ..., Ip]`` where
+           ``Ik`` is a list of 4 independent variable coordinates
+           corresponding to the line that represents the k'th link
+           painted.
 
-        When a callable function is passed, leaf_label_func is passed
-        cluster index k, and returns a string with the label for the
-        leaf.
+           - 'dcoords': a list of lists ``[I2, I2, ..., Ip]`` where
+           ``Ik`` is a list of 4 independent variable coordinates
+           corresponding to the line that represents the k'th link
+           painted.
 
-        Indices k < n correspond to original observations while indices
-        k >= n correspond to non-singleton clusters.
+           - 'ivl': a list of labels corresponding to the leaf nodes.
 
-        For example, to label singletons with their node id and
-        non-singletons with their id, count, and inconsistency coefficient,
-        we simply do
-
-          # First define the leaf label function.
-          llf = lambda id:
-                   if id < n:
-                      return str(id)
-                   else:
-                      return '[%d %d %1.2f]' % (id, count, R[n-id,3])
-
-          # The text for the leaf nodes is going to be big so force
-          # a rotation of 90 degrees.
-          dendrogram(Z, leaf_label_func=llf, leaf_rotation=90)
-
-    R = dendrogram(..., show_contracted=True)
-
-        The heights of non-singleton nodes contracted into a leaf node
-        are plotted as crosses along the link connecting that leaf node.
-        This feature is only useful when truncation is used.
-
-    R = dendrogram(..., link_color_func)
-
-        When a link is painted, the function link_color_function is
-        called with the non-singleton id. This function is
-        expected to return a matplotlib color string, which represents
-        the color to paint the link.
-
-        For example:
-
-          dendrogram(Z, link_color_func=lambda k: colors[k])
-
-        colors the direct links below each untruncated non-singleton node
-        k using colors[k].
-
+           - 'leaves': for each i, ``H[i] == j``, cluster node
+           :math:`j` appears in the :math:`i` th position in the
+           left-to-right traversal of the leaves, where :math:`j < 2n-1`
+           and :math:`i < n`. If :math:`j` is less than :math:`n`, the
+           :math:`i` th leaf node corresponds to an original
+           observation.  Otherwise, it corresponds to a non-singleton
+           cluster.
     """
 
     # Features under consideration.
@@ -1959,9 +1978,9 @@
         ivl=None
     else:
         ivl=[]
-    if colorthreshold is None or \
-       (type(colorthreshold) == types.StringType and colorthreshold=='default'):
-        colorthreshold = max(Z[:,2])*0.7
+    if color_threshold is None or \
+       (type(color_threshold) == types.StringType and color_threshold=='default'):
+        color_threshold = max(Z[:,2])*0.7
     R={'icoord':icoord_list, 'dcoord':dcoord_list, 'ivl':ivl, 'leaves':lvs,
        'color_list':color_list}
     props = {'cbt': False, 'cc':0}
@@ -1971,7 +1990,7 @@
         contraction_marks = None
     _dendrogram_calculate_info(Z=Z, p=p,
                                truncate_mode=truncate_mode, \
-                               colorthreshold=colorthreshold, \
+                               color_threshold=color_threshold, \
                                get_leaves=get_leaves, \
                                orientation=orientation, \
                                labels=labels, \
@@ -2045,7 +2064,7 @@
 
 
 def _dendrogram_calculate_info(Z, p, truncate_mode, \
-                               colorthreshold=np.inf, get_leaves=True, \
+                               color_threshold=np.inf, get_leaves=True, \
                                orientation='top', labels=None, \
                                count_sort=False, distance_sort=False, \
                                show_leaf_counts=False, i=-1, iv=0.0, \
@@ -2220,7 +2239,7 @@
     (uiva, uwa, uah, uamd) = \
           _dendrogram_calculate_info(Z=Z, p=p, \
                                      truncate_mode=truncate_mode, \
-                                     colorthreshold=colorthreshold, \
+                                     color_threshold=color_threshold, \
                                      get_leaves=get_leaves, \
                                      orientation=orientation, \
                                      labels=labels, \
@@ -2238,7 +2257,7 @@
                                      link_color_func=link_color_func)
 
     h = Z[i-n, 2]
-    if h >= colorthreshold or colorthreshold <= 0:
+    if h >= color_threshold or color_threshold <= 0:
         c = 'b'
 
         if currently_below_threshold[0]:
@@ -2251,7 +2270,7 @@
     (uivb, uwb, ubh, ubmd) = \
           _dendrogram_calculate_info(Z=Z, p=p, \
                                      truncate_mode=truncate_mode, \
-                                     colorthreshold=colorthreshold, \
+                                     color_threshold=color_threshold, \
                                      get_leaves=get_leaves, \
                                      orientation=orientation, \
                                      labels=labels, \




More information about the Scipy-svn mailing list