[Doc-SIG] Re: docstrings for re.py?

Fri, 9 Jul 1999 11:09:52 -0700 (PDT)

Hello,

I went ahead and put docstrings into re.py.  For the most part, I just
pasted in the docs from the library reference, with some minor edits. 

I hope this is useful.

Jeff

Index: re.py
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Lib/re.py,v
retrieving revision 1.33
diff -c -r1.33 re.py
*** re.py	1998/08/21 18:39:38	1.33
--- re.py	1999/07/09 17:57:50
***************
*** 1,3 ****
--- 1,45 ----
+ # module 're' -- A collection of regular expression operations
+ 
+ """Support for regular expressions (RE).
+ 
+ This module provides regular expression matching operations similar to
+ those found in Perl. It's 8-bit clean: the strings being processed may
+ contain both null bytes and characters whose high bit is set. Regular
+ expression pattern strings may not contain null bytes, but can specify
+ the null byte using the \\number notation. Characters with the high
+ bit set may be included.
+ 
+ 
+ This module exports the following functions:
+ match    Match a regular expression pattern to the beginning of a string.
+ search   Search a string for the presence of a pattern.
+ sub      Substitute occurrences of a pattern found in a string.
+ subn     Same as sub, but also return the number of substitutions made.
+ split    Split a string by the occurrences of a pattern.
+ findall  Find all occurrences of a pattern in a string.
+ compile  Compile a pattern into a RegexObject.
+ escape   Backslash all non-alphanumerics in a string.
+ 
+ 
+ This module exports the following classes:
+ RegexObject  Holds a compiled regular expression pattern.
+ MatchObject  Contains information about pattern matches.
+ 
+ 
+ Some of the functions in this module takes flags as optional parameters:
+ I  IGNORECASE
+ L  LOCALE
+ M  MULTILINE
+ S  DOTALL
+ X  VERBOSE
+ See __flags_doc__ for more information.
+ 
+ 
+ This module also defines an exception 'error'.
+ 
+ """
+ 
+ 
  import sys
  import string
  from pcre import *
***************
*** 17,22 ****
--- 59,107 ----
  S = DOTALL 
  X = VERBOSE 

+ 
+ __flags_doc__ = """
+ The expression's behaviour can be modified by specifying a flags
+ value. Values can be any of the following variables, combined
+ using bitwise OR (the | operator).
+ 
+ I 
+ IGNORECASE
+     Perform case-insensitive matching; expressions like [A-Z] will
+     match lowercase letters, too. This is not affected by the current
+     locale.
+ 
+ L 
+ LOCALE
+     Make \w, \W, \b, \B, dependent on the current locale. 
+ 
+ M 
+ MULTILINE
+     When specified, the pattern character "^" matches at the beginning
+     of the string and at the beginning of each line (immediately
+     following each newline); and the pattern character "$" matches at
+     the end of the string and at the end of each line (immediately
+     preceding each newline). By default, "^" matches only at the
+     beginning of the string, and "$" only at the end of the string and
+     immediately before the newline (if any) at the end of the string.
+ 
+ S 
+ DOTALL
+     Make the "." special character match any character at all,
+     including a newline; without this flag, "." will match anything
+     except a newline.
+ 
+ X 
+ VERBOSE
+     This flag allows you to write regular expressions that look
+     nicer. Whitespace within the pattern is ignored, except when in a
+     character class or preceded by an unescaped backslash, and, when a
+     line contains a "#" neither in a character class or preceded by an
+     unescaped backslash, all characters from the leftmost such "#"
+     through the end of the line are ignored.
+ 
+ """
+     
  #
  #
  #
***************
*** 37,69 ****
      return value

  def match(pattern, string, flags=0):
      return _cachecompile(pattern, flags).match(string)

  def search(pattern, string, flags=0):
      return _cachecompile(pattern, flags).search(string)

  def sub(pattern, repl, string, count=0):
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.sub(repl, string, count)

  def subn(pattern, repl, string, count=0):
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.subn(repl, string, count)

  def split(pattern, string, maxsplit=0):
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.split(string, maxsplit)

  def findall(pattern, string):
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.findall(string)

  def escape(pattern):
!     "Escape all non-alphanumeric characters in pattern."
      result = list(pattern)
      alphanum=string.letters+'_'+string.digits
      for i in range(len(pattern)):
--- 122,225 ----
      return value

  def match(pattern, string, flags=0):
+     """match (pattern, string[, flags]) -> MatchObject or None
+     
+     If zero or more characters at the beginning of string match the
+     regular expression pattern, return a corresponding MatchObject
+     instance. Return None if the string does not match the pattern;
+     note that this is different from a zero-length match.
+ 
+     Note: If you want to locate a match anywhere in string, use
+     search() instead.
+ 
+     """
+     
      return _cachecompile(pattern, flags).match(string)

  def search(pattern, string, flags=0):
+     """search (pattern, string[, flags]) -> MatchObject or None
+     
+     Scan through string looking for a location where the regular
+     expression pattern produces a match, and return a corresponding
+     MatchObject instance. Return None if no position in the string
+     matches the pattern; note that this is different from finding a
+     zero-length match at some point in the string.
+ 
+     """
      return _cachecompile(pattern, flags).search(string)

  def sub(pattern, repl, string, count=0):
+     """sub(pattern, repl, string[, count=0]) -> string
+     
+     Return the string obtained by replacing the leftmost
+     non-overlapping occurrences of pattern in string by the
+     replacement repl. If the pattern isn't found, string is returned
+     unchanged. repl can be a string or a function; if a function, it
+     is called for every non-overlapping occurrence of pattern. The
+     function takes a single match object argument, and returns the
+     replacement string.
+ 
+     The pattern may be a string or a regex object; if you need to
+     specify regular expression flags, you must use a regex object, or
+     use embedded modifiers in a pattern; e.g.
+     sub("(?i)b+", "x", "bbbb BBBB") returns 'x x'.
+ 
+     The optional argument count is the maximum number of pattern
+     occurrences to be replaced; count must be a non-negative integer,
+     and the default value of 0 means to replace all occurrences.
+ 
+     """
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.sub(repl, string, count)

  def subn(pattern, repl, string, count=0):
+     """subn(pattern, repl, string[, count=0]) -> (string, num substitutions)
+     
+     Perform the same operation as sub(), but return a tuple
+     (new_string, number_of_subs_made).
+ 
+     """
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.subn(repl, string, count)

  def split(pattern, string, maxsplit=0):
+     """split(pattern, string[, maxsplit=0]) -> list of strings
+     
+     Split string by the occurrences of pattern. If capturing
+     parentheses are used in pattern, then the text of all groups in
+     the pattern are also returned as part of the resulting list. If
+     maxsplit is nonzero, at most maxsplit splits occur, and the
+     remainder of the string is returned as the final element of the
+     list.
+ 
+     """
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.split(string, maxsplit)

  def findall(pattern, string):
+     """findall(pattern, string) -> list
+     
+     Return a list of all non-overlapping matches of pattern in
+     string. If one or more groups are present in the pattern, return a
+     list of groups; this will be a list of tuples if the pattern has
+     more than one group. Empty matches are included in the result.
+ 
+     """
      if type(pattern) == type(''):
          pattern = _cachecompile(pattern)
      return pattern.findall(string)

  def escape(pattern):
!     """escape(string) -> string
!     
!     Return string with all non-alphanumerics backslashed; this is
!     useful if you want to match an arbitrary literal string that may
!     have regular expression metacharacters in it.
! 
!     """
      result = list(pattern)
      alphanum=string.letters+'_'+string.digits
      for i in range(len(pattern)):
***************
*** 74,80 ****
      return string.join(result, '')

  def compile(pattern, flags=0):
!     "Compile a regular expression pattern, returning a RegexObject."
      groupindex={}
      code=pcre_compile(pattern, flags, groupindex)
      return RegexObject(pattern, flags, code, groupindex)
--- 230,242 ----
      return string.join(result, '')

  def compile(pattern, flags=0):
!     """compile(pattern[, flags]) -> RegexObject
! 
!     Compile a regular expression pattern into a regular expression
!     object, which can be used for matching using its match() and
!     search() methods.
! 
!     """
      groupindex={}
      code=pcre_compile(pattern, flags, groupindex)
      return RegexObject(pattern, flags, code, groupindex)
***************
*** 85,90 ****
--- 247,263 ----
  #

  class RegexObject:
+     """Holds a compiled regular expression pattern.
+ 
+     Methods:
+     match    Match the pattern to the beginning of a string.
+     search   Search a string for the presence of the pattern.
+     sub      Substitute occurrences of the pattern found in a string.
+     subn     Same as sub, but also return the number of substitutions made.
+     split    Split a string by the occurrences of the pattern.
+     findall  Find all occurrences of the pattern in a string.
+     
+     """

      def __init__(self, pattern, flags, code, groupindex):
          self.code = code 
***************
*** 93,101 ****
          self.groupindex = groupindex

      def search(self, string, pos=0, endpos=None):
!         """Scan through string looking for a match to the pattern, returning
!         a MatchObject instance, or None if no match was found."""
! 
          if endpos is None or endpos>len(string): 
              endpos=len(string)
          if endpos<pos: endpos=pos
--- 266,282 ----
          self.groupindex = groupindex

      def search(self, string, pos=0, endpos=None):
!         """search(string[, pos][, endpos]) -> MatchObject or None
!         
!         Scan through string looking for a location where this regular
!         expression produces a match, and return a corresponding
!         MatchObject instance. Return None if no position in the string
!         matches the pattern; note that this is different from finding
!         a zero-length match at some point in the string. The optional
!         pos and endpos parameters have the same meaning as for the
!         match() method.
!     
!         """
          if endpos is None or endpos>len(string): 
              endpos=len(string)
          if endpos<pos: endpos=pos
***************
*** 110,118 ****
                             regs)

      def match(self, string, pos=0, endpos=None):
!         """Try to apply the pattern at the start of the string, returning
!         a MatchObject instance, or None if no match was found."""

          if endpos is None or endpos>len(string): 
              endpos=len(string)
          if endpos<pos: endpos=pos
--- 291,319 ----
                             regs)

      def match(self, string, pos=0, endpos=None):
!         """match(string[, pos][, endpos]) -> MatchObject or None
!         
!         If zero or more characters at the beginning of string match
!         this regular expression, return a corresponding MatchObject
!         instance. Return None if the string does not match the
!         pattern; note that this is different from a zero-length match.
! 
!         Note: If you want to locate a match anywhere in string, use
!         search() instead.
! 
!         The optional second parameter pos gives an index in the string
!         where the search is to start; it defaults to 0.  This is not
!         completely equivalent to slicing the string; the '' pattern
!         character matches at the real beginning of the string and at
!         positions just after a newline, but not necessarily at the
!         index where the search is to start.
! 
!         The optional parameter endpos limits how far the string will
!         be searched; it will be as if the string is endpos characters
!         long, so only the characters from pos to endpos will be
!         searched for a match.

+         """
          if endpos is None or endpos>len(string): 
              endpos=len(string)
          if endpos<pos: endpos=pos
***************
*** 126,144 ****
                             regs)

      def sub(self, repl, string, count=0):
!         """Return the string obtained by replacing the leftmost
!         non-overlapping occurrences of the pattern in string by the
!         replacement repl""" 

          return self.subn(repl, string, count)[0]

      def subn(self, repl, source, count=0): 
!         """Return a 2-tuple containing (new_string, number).
!         new_string is the string obtained by replacing the leftmost
!         non-overlapping occurrences of the pattern in the source
!         string by the replacement repl.  number is the number of
!         substitutions that were made."""

          if count < 0:
              raise error, "negative substitution count"
          if count == 0:
--- 327,351 ----
                             regs)

      def sub(self, repl, string, count=0):
!         """sub(repl, string[, count=0]) -> string
!         
!         Return the string obtained by replacing the leftmost
!         non-overlapping occurrences of the compiled pattern in string
!         by the replacement repl. If the pattern isn't found, string is
!         returned unchanged.

+         Identical to the sub() function, using the compiled pattern.
+         
+         """
          return self.subn(repl, string, count)[0]

      def subn(self, repl, source, count=0): 
!         """subn(repl, string[, count=0]) -> tuple

+         Perform the same operation as sub(), but return a tuple
+         (new_string, number_of_subs_made).
+ 
+         """
          if count < 0:
              raise error, "negative substitution count"
          if count == 0:
***************
*** 192,200 ****
          return (string.join(results, ''), n)

      def split(self, source, maxsplit=0):
!         """Split the source string by the occurrences of the pattern,
!         returning a list containing the resulting substrings."""
! 
          if maxsplit < 0:
              raise error, "negative split count"
          if maxsplit == 0:
--- 399,414 ----
          return (string.join(results, ''), n)

      def split(self, source, maxsplit=0):
!         """split(source[, maxsplit=0]) -> list of strings
!     
!         Split string by the occurrences of the compiled pattern. If
!         capturing parentheses are used in the pattern, then the text
!         of all groups in the pattern are also returned as part of the
!         resulting list. If maxsplit is nonzero, at most maxsplit
!         splits occur, and the remainder of the string is returned as
!         the final element of the list.
!         
!         """
          if maxsplit < 0:
              raise error, "negative split count"
          if maxsplit == 0:
***************
*** 232,244 ****
          return results

      def findall(self, source):
!         """Return a list of all non-overlapping matches in the string.
! 
!         If one or more groups are present in the pattern, return a
!         list of groups; this will be a list of tuples if the pattern
!         has more than one group.
! 
!         Empty matches are included in the result.

          """
          pos = 0
--- 446,458 ----
          return results

      def findall(self, source):
!         """findall(source) -> list
!     
!         Return a list of all non-overlapping matches of the compiled
!         pattern in string. If one or more groups are present in the
!         pattern, return a list of groups; this will be a list of
!         tuples if the pattern has more than one group. Empty matches
!         are included in the result.

          """
          pos = 0
***************
*** 286,291 ****
--- 500,516 ----
      group = None

  class MatchObject:
+     """Holds a compiled regular expression pattern.
+ 
+     Methods:
+     start      Return the index of the start of a matched substring.
+     end        Return the index of the end of a matched substring.
+     span       Return a tuple of (start, end) of a matched substring.
+     groups     Return a tuple of all the subgroups of the match.
+     group      Return one or more subgroups of the match.
+     groupdict  Return a dictionary of all the named subgroups of the match.
+ 
+     """

      def __init__(self, re, string, pos, endpos, regs):
          self.re = re
***************
*** 295,301 ****
          self.regs = regs

      def start(self, g = 0):
!         "Return the start of the substring matched by group g"
          if type(g) == type(''):
              try:
                  g = self.re.groupindex[g]
--- 520,533 ----
          self.regs = regs

      def start(self, g = 0):
!         """start([group=0]) -> int or None
!         
!         Return the index of the start of the substring matched by
!         group; group defaults to zero (meaning the whole matched
!         substring). Return None if group exists but did not contribute
!         to the match.
! 
!         """
          if type(g) == type(''):
              try:
                  g = self.re.groupindex[g]
***************
*** 304,310 ****
          return self.regs[g][0]

      def end(self, g = 0):
!         "Return the end of the substring matched by group g"
          if type(g) == type(''):
              try:
                  g = self.re.groupindex[g]
--- 536,549 ----
          return self.regs[g][0]

      def end(self, g = 0):
!         """end([group=0]) -> int or None
!         
!         Return the indices of the end of the substring matched by
!         group; group defaults to zero (meaning the whole matched
!         substring). Return None if group exists but did not contribute
!         to the match.
! 
!         """
          if type(g) == type(''):
              try:
                  g = self.re.groupindex[g]
***************
*** 313,319 ****
          return self.regs[g][1]

      def span(self, g = 0):
!         "Return (start, end) of the substring matched by group g"
          if type(g) == type(''):
              try:
                  g = self.re.groupindex[g]
--- 552,565 ----
          return self.regs[g][1]

      def span(self, g = 0):
!         """span([group=0]) -> tuple
!         
!         Return the 2-tuple (m.start(group), m.end(group)). Note that
!         if group did not contribute to the match, this is (None,
!         None). Group defaults to zero (meaning the whole matched
!         substring).
! 
!         """
          if type(g) == type(''):
              try:
                  g = self.re.groupindex[g]
***************
*** 322,328 ****
          return self.regs[g]

      def groups(self, default=None):
!         "Return a tuple containing all subgroups of the match object"
          result = []
          for g in range(1, self.re._num_regs):
              a, b = self.regs[g]
--- 568,581 ----
          return self.regs[g]

      def groups(self, default=None):
!         """groups([default=None]) -> tuple
!         
!         Return a tuple containing all the subgroups of the match, from
!         1 up to however many groups are in the pattern. The default
!         argument is used for groups that did not participate in the
!         match.
! 
!         """
          result = []
          for g in range(1, self.re._num_regs):
              a, b = self.regs[g]
***************
*** 333,339 ****
          return tuple(result)

      def group(self, *groups):
!         "Return one or more groups of the match"
          if len(groups) == 0:
              groups = (0,)
          result = []
--- 586,614 ----
          return tuple(result)

      def group(self, *groups):
!         """group([group1, group2, ...]) -> string or tuple
!         
!         Return one or more subgroups of the match. If there is a
!         single argument, the result is a single string; if there are
!         multiple arguments, the result is a tuple with one item per
!         argument. Without arguments, group1 defaults to zero (i.e. the
!         whole match is returned). If a groupN argument is zero, the
!         corresponding return value is the entire matching string; if
!         it is in the inclusive range [1..99], it is the string
!         matching the the corresponding parenthesized group. If a group
!         number is negative or larger than the number of groups defined
!         in the pattern, an IndexError exception is raised. If a group
!         is contained in a part of the pattern that did not match, the
!         corresponding result is None. If a group is contained in a
!         part of the pattern that matched multiple times, the last
!         match is returned.
! 
!         If the regular expression uses the (?P<name>...) syntax, the
!         groupN arguments may also be strings identifying groups by
!         their group name. If a string argument is not used as a group
!         name in the pattern, an IndexError exception is raised.
! 
!         """
          if len(groups) == 0:
              groups = (0,)
          result = []
***************
*** 358,364 ****
              return ()

      def groupdict(self, default=None):
!         "Return a dictionary containing all named subgroups of the match"
          dict = {}
          for name, index in self.re.groupindex.items():
              a, b = self.regs[index]
--- 633,645 ----
              return ()

      def groupdict(self, default=None):
!         """groupdict([default=None]) -> dictionary
!         
!         Return a dictionary containing all the named subgroups of the
!         match, keyed by the subgroup name. The default argument is
!         used for groups that did not participate in the match.
! 
!         """
          dict = {}
          for name, index in self.re.groupindex.items():
              a, b = self.regs[index]