[Doc-SIG] Re: docstrings for re.py?
Jeffrey Chang
jefftc@leland.Stanford.EDU
Fri, 9 Jul 1999 11:09:52 -0700 (PDT)
Hello,
I went ahead and put docstrings into re.py. For the most part, I just
pasted in the docs from the library reference, with some minor edits.
I hope this is useful.
Jeff
Index: re.py
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Lib/re.py,v
retrieving revision 1.33
diff -c -r1.33 re.py
*** re.py 1998/08/21 18:39:38 1.33
--- re.py 1999/07/09 17:57:50
***************
*** 1,3 ****
--- 1,45 ----
+ # module 're' -- A collection of regular expression operations
+
+ """Support for regular expressions (RE).
+
+ This module provides regular expression matching operations similar to
+ those found in Perl. It's 8-bit clean: the strings being processed may
+ contain both null bytes and characters whose high bit is set. Regular
+ expression pattern strings may not contain null bytes, but can specify
+ the null byte using the \\number notation. Characters with the high
+ bit set may be included.
+
+
+ This module exports the following functions:
+ match Match a regular expression pattern to the beginning of a string.
+ search Search a string for the presence of a pattern.
+ sub Substitute occurrences of a pattern found in a string.
+ subn Same as sub, but also return the number of substitutions made.
+ split Split a string by the occurrences of a pattern.
+ findall Find all occurrences of a pattern in a string.
+ compile Compile a pattern into a RegexObject.
+ escape Backslash all non-alphanumerics in a string.
+
+
+ This module exports the following classes:
+ RegexObject Holds a compiled regular expression pattern.
+ MatchObject Contains information about pattern matches.
+
+
+ Some of the functions in this module takes flags as optional parameters:
+ I IGNORECASE
+ L LOCALE
+ M MULTILINE
+ S DOTALL
+ X VERBOSE
+ See __flags_doc__ for more information.
+
+
+ This module also defines an exception 'error'.
+
+ """
+
+
import sys
import string
from pcre import *
***************
*** 17,22 ****
--- 59,107 ----
S = DOTALL
X = VERBOSE
+
+ __flags_doc__ = """
+ The expression's behaviour can be modified by specifying a flags
+ value. Values can be any of the following variables, combined
+ using bitwise OR (the | operator).
+
+ I
+ IGNORECASE
+ Perform case-insensitive matching; expressions like [A-Z] will
+ match lowercase letters, too. This is not affected by the current
+ locale.
+
+ L
+ LOCALE
+ Make \w, \W, \b, \B, dependent on the current locale.
+
+ M
+ MULTILINE
+ When specified, the pattern character "^" matches at the beginning
+ of the string and at the beginning of each line (immediately
+ following each newline); and the pattern character "$" matches at
+ the end of the string and at the end of each line (immediately
+ preceding each newline). By default, "^" matches only at the
+ beginning of the string, and "$" only at the end of the string and
+ immediately before the newline (if any) at the end of the string.
+
+ S
+ DOTALL
+ Make the "." special character match any character at all,
+ including a newline; without this flag, "." will match anything
+ except a newline.
+
+ X
+ VERBOSE
+ This flag allows you to write regular expressions that look
+ nicer. Whitespace within the pattern is ignored, except when in a
+ character class or preceded by an unescaped backslash, and, when a
+ line contains a "#" neither in a character class or preceded by an
+ unescaped backslash, all characters from the leftmost such "#"
+ through the end of the line are ignored.
+
+ """
+
#
#
#
***************
*** 37,69 ****
return value
def match(pattern, string, flags=0):
return _cachecompile(pattern, flags).match(string)
def search(pattern, string, flags=0):
return _cachecompile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0):
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.sub(repl, string, count)
def subn(pattern, repl, string, count=0):
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.subn(repl, string, count)
def split(pattern, string, maxsplit=0):
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.split(string, maxsplit)
def findall(pattern, string):
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.findall(string)
def escape(pattern):
! "Escape all non-alphanumeric characters in pattern."
result = list(pattern)
alphanum=string.letters+'_'+string.digits
for i in range(len(pattern)):
--- 122,225 ----
return value
def match(pattern, string, flags=0):
+ """match (pattern, string[, flags]) -> MatchObject or None
+
+ If zero or more characters at the beginning of string match the
+ regular expression pattern, return a corresponding MatchObject
+ instance. Return None if the string does not match the pattern;
+ note that this is different from a zero-length match.
+
+ Note: If you want to locate a match anywhere in string, use
+ search() instead.
+
+ """
+
return _cachecompile(pattern, flags).match(string)
def search(pattern, string, flags=0):
+ """search (pattern, string[, flags]) -> MatchObject or None
+
+ Scan through string looking for a location where the regular
+ expression pattern produces a match, and return a corresponding
+ MatchObject instance. Return None if no position in the string
+ matches the pattern; note that this is different from finding a
+ zero-length match at some point in the string.
+
+ """
return _cachecompile(pattern, flags).search(string)
def sub(pattern, repl, string, count=0):
+ """sub(pattern, repl, string[, count=0]) -> string
+
+ Return the string obtained by replacing the leftmost
+ non-overlapping occurrences of pattern in string by the
+ replacement repl. If the pattern isn't found, string is returned
+ unchanged. repl can be a string or a function; if a function, it
+ is called for every non-overlapping occurrence of pattern. The
+ function takes a single match object argument, and returns the
+ replacement string.
+
+ The pattern may be a string or a regex object; if you need to
+ specify regular expression flags, you must use a regex object, or
+ use embedded modifiers in a pattern; e.g.
+ sub("(?i)b+", "x", "bbbb BBBB") returns 'x x'.
+
+ The optional argument count is the maximum number of pattern
+ occurrences to be replaced; count must be a non-negative integer,
+ and the default value of 0 means to replace all occurrences.
+
+ """
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.sub(repl, string, count)
def subn(pattern, repl, string, count=0):
+ """subn(pattern, repl, string[, count=0]) -> (string, num substitutions)
+
+ Perform the same operation as sub(), but return a tuple
+ (new_string, number_of_subs_made).
+
+ """
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.subn(repl, string, count)
def split(pattern, string, maxsplit=0):
+ """split(pattern, string[, maxsplit=0]) -> list of strings
+
+ Split string by the occurrences of pattern. If capturing
+ parentheses are used in pattern, then the text of all groups in
+ the pattern are also returned as part of the resulting list. If
+ maxsplit is nonzero, at most maxsplit splits occur, and the
+ remainder of the string is returned as the final element of the
+ list.
+
+ """
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.split(string, maxsplit)
def findall(pattern, string):
+ """findall(pattern, string) -> list
+
+ Return a list of all non-overlapping matches of pattern in
+ string. If one or more groups are present in the pattern, return a
+ list of groups; this will be a list of tuples if the pattern has
+ more than one group. Empty matches are included in the result.
+
+ """
if type(pattern) == type(''):
pattern = _cachecompile(pattern)
return pattern.findall(string)
def escape(pattern):
! """escape(string) -> string
!
! Return string with all non-alphanumerics backslashed; this is
! useful if you want to match an arbitrary literal string that may
! have regular expression metacharacters in it.
!
! """
result = list(pattern)
alphanum=string.letters+'_'+string.digits
for i in range(len(pattern)):
***************
*** 74,80 ****
return string.join(result, '')
def compile(pattern, flags=0):
! "Compile a regular expression pattern, returning a RegexObject."
groupindex={}
code=pcre_compile(pattern, flags, groupindex)
return RegexObject(pattern, flags, code, groupindex)
--- 230,242 ----
return string.join(result, '')
def compile(pattern, flags=0):
! """compile(pattern[, flags]) -> RegexObject
!
! Compile a regular expression pattern into a regular expression
! object, which can be used for matching using its match() and
! search() methods.
!
! """
groupindex={}
code=pcre_compile(pattern, flags, groupindex)
return RegexObject(pattern, flags, code, groupindex)
***************
*** 85,90 ****
--- 247,263 ----
#
class RegexObject:
+ """Holds a compiled regular expression pattern.
+
+ Methods:
+ match Match the pattern to the beginning of a string.
+ search Search a string for the presence of the pattern.
+ sub Substitute occurrences of the pattern found in a string.
+ subn Same as sub, but also return the number of substitutions made.
+ split Split a string by the occurrences of the pattern.
+ findall Find all occurrences of the pattern in a string.
+
+ """
def __init__(self, pattern, flags, code, groupindex):
self.code = code
***************
*** 93,101 ****
self.groupindex = groupindex
def search(self, string, pos=0, endpos=None):
! """Scan through string looking for a match to the pattern, returning
! a MatchObject instance, or None if no match was found."""
!
if endpos is None or endpos>len(string):
endpos=len(string)
if endpos<pos: endpos=pos
--- 266,282 ----
self.groupindex = groupindex
def search(self, string, pos=0, endpos=None):
! """search(string[, pos][, endpos]) -> MatchObject or None
!
! Scan through string looking for a location where this regular
! expression produces a match, and return a corresponding
! MatchObject instance. Return None if no position in the string
! matches the pattern; note that this is different from finding
! a zero-length match at some point in the string. The optional
! pos and endpos parameters have the same meaning as for the
! match() method.
!
! """
if endpos is None or endpos>len(string):
endpos=len(string)
if endpos<pos: endpos=pos
***************
*** 110,118 ****
regs)
def match(self, string, pos=0, endpos=None):
! """Try to apply the pattern at the start of the string, returning
! a MatchObject instance, or None if no match was found."""
if endpos is None or endpos>len(string):
endpos=len(string)
if endpos<pos: endpos=pos
--- 291,319 ----
regs)
def match(self, string, pos=0, endpos=None):
! """match(string[, pos][, endpos]) -> MatchObject or None
!
! If zero or more characters at the beginning of string match
! this regular expression, return a corresponding MatchObject
! instance. Return None if the string does not match the
! pattern; note that this is different from a zero-length match.
!
! Note: If you want to locate a match anywhere in string, use
! search() instead.
!
! The optional second parameter pos gives an index in the string
! where the search is to start; it defaults to 0. This is not
! completely equivalent to slicing the string; the '' pattern
! character matches at the real beginning of the string and at
! positions just after a newline, but not necessarily at the
! index where the search is to start.
!
! The optional parameter endpos limits how far the string will
! be searched; it will be as if the string is endpos characters
! long, so only the characters from pos to endpos will be
! searched for a match.
+ """
if endpos is None or endpos>len(string):
endpos=len(string)
if endpos<pos: endpos=pos
***************
*** 126,144 ****
regs)
def sub(self, repl, string, count=0):
! """Return the string obtained by replacing the leftmost
! non-overlapping occurrences of the pattern in string by the
! replacement repl"""
return self.subn(repl, string, count)[0]
def subn(self, repl, source, count=0):
! """Return a 2-tuple containing (new_string, number).
! new_string is the string obtained by replacing the leftmost
! non-overlapping occurrences of the pattern in the source
! string by the replacement repl. number is the number of
! substitutions that were made."""
if count < 0:
raise error, "negative substitution count"
if count == 0:
--- 327,351 ----
regs)
def sub(self, repl, string, count=0):
! """sub(repl, string[, count=0]) -> string
!
! Return the string obtained by replacing the leftmost
! non-overlapping occurrences of the compiled pattern in string
! by the replacement repl. If the pattern isn't found, string is
! returned unchanged.
+ Identical to the sub() function, using the compiled pattern.
+
+ """
return self.subn(repl, string, count)[0]
def subn(self, repl, source, count=0):
! """subn(repl, string[, count=0]) -> tuple
+ Perform the same operation as sub(), but return a tuple
+ (new_string, number_of_subs_made).
+
+ """
if count < 0:
raise error, "negative substitution count"
if count == 0:
***************
*** 192,200 ****
return (string.join(results, ''), n)
def split(self, source, maxsplit=0):
! """Split the source string by the occurrences of the pattern,
! returning a list containing the resulting substrings."""
!
if maxsplit < 0:
raise error, "negative split count"
if maxsplit == 0:
--- 399,414 ----
return (string.join(results, ''), n)
def split(self, source, maxsplit=0):
! """split(source[, maxsplit=0]) -> list of strings
!
! Split string by the occurrences of the compiled pattern. If
! capturing parentheses are used in the pattern, then the text
! of all groups in the pattern are also returned as part of the
! resulting list. If maxsplit is nonzero, at most maxsplit
! splits occur, and the remainder of the string is returned as
! the final element of the list.
!
! """
if maxsplit < 0:
raise error, "negative split count"
if maxsplit == 0:
***************
*** 232,244 ****
return results
def findall(self, source):
! """Return a list of all non-overlapping matches in the string.
!
! If one or more groups are present in the pattern, return a
! list of groups; this will be a list of tuples if the pattern
! has more than one group.
!
! Empty matches are included in the result.
"""
pos = 0
--- 446,458 ----
return results
def findall(self, source):
! """findall(source) -> list
!
! Return a list of all non-overlapping matches of the compiled
! pattern in string. If one or more groups are present in the
! pattern, return a list of groups; this will be a list of
! tuples if the pattern has more than one group. Empty matches
! are included in the result.
"""
pos = 0
***************
*** 286,291 ****
--- 500,516 ----
group = None
class MatchObject:
+ """Holds a compiled regular expression pattern.
+
+ Methods:
+ start Return the index of the start of a matched substring.
+ end Return the index of the end of a matched substring.
+ span Return a tuple of (start, end) of a matched substring.
+ groups Return a tuple of all the subgroups of the match.
+ group Return one or more subgroups of the match.
+ groupdict Return a dictionary of all the named subgroups of the match.
+
+ """
def __init__(self, re, string, pos, endpos, regs):
self.re = re
***************
*** 295,301 ****
self.regs = regs
def start(self, g = 0):
! "Return the start of the substring matched by group g"
if type(g) == type(''):
try:
g = self.re.groupindex[g]
--- 520,533 ----
self.regs = regs
def start(self, g = 0):
! """start([group=0]) -> int or None
!
! Return the index of the start of the substring matched by
! group; group defaults to zero (meaning the whole matched
! substring). Return None if group exists but did not contribute
! to the match.
!
! """
if type(g) == type(''):
try:
g = self.re.groupindex[g]
***************
*** 304,310 ****
return self.regs[g][0]
def end(self, g = 0):
! "Return the end of the substring matched by group g"
if type(g) == type(''):
try:
g = self.re.groupindex[g]
--- 536,549 ----
return self.regs[g][0]
def end(self, g = 0):
! """end([group=0]) -> int or None
!
! Return the indices of the end of the substring matched by
! group; group defaults to zero (meaning the whole matched
! substring). Return None if group exists but did not contribute
! to the match.
!
! """
if type(g) == type(''):
try:
g = self.re.groupindex[g]
***************
*** 313,319 ****
return self.regs[g][1]
def span(self, g = 0):
! "Return (start, end) of the substring matched by group g"
if type(g) == type(''):
try:
g = self.re.groupindex[g]
--- 552,565 ----
return self.regs[g][1]
def span(self, g = 0):
! """span([group=0]) -> tuple
!
! Return the 2-tuple (m.start(group), m.end(group)). Note that
! if group did not contribute to the match, this is (None,
! None). Group defaults to zero (meaning the whole matched
! substring).
!
! """
if type(g) == type(''):
try:
g = self.re.groupindex[g]
***************
*** 322,328 ****
return self.regs[g]
def groups(self, default=None):
! "Return a tuple containing all subgroups of the match object"
result = []
for g in range(1, self.re._num_regs):
a, b = self.regs[g]
--- 568,581 ----
return self.regs[g]
def groups(self, default=None):
! """groups([default=None]) -> tuple
!
! Return a tuple containing all the subgroups of the match, from
! 1 up to however many groups are in the pattern. The default
! argument is used for groups that did not participate in the
! match.
!
! """
result = []
for g in range(1, self.re._num_regs):
a, b = self.regs[g]
***************
*** 333,339 ****
return tuple(result)
def group(self, *groups):
! "Return one or more groups of the match"
if len(groups) == 0:
groups = (0,)
result = []
--- 586,614 ----
return tuple(result)
def group(self, *groups):
! """group([group1, group2, ...]) -> string or tuple
!
! Return one or more subgroups of the match. If there is a
! single argument, the result is a single string; if there are
! multiple arguments, the result is a tuple with one item per
! argument. Without arguments, group1 defaults to zero (i.e. the
! whole match is returned). If a groupN argument is zero, the
! corresponding return value is the entire matching string; if
! it is in the inclusive range [1..99], it is the string
! matching the the corresponding parenthesized group. If a group
! number is negative or larger than the number of groups defined
! in the pattern, an IndexError exception is raised. If a group
! is contained in a part of the pattern that did not match, the
! corresponding result is None. If a group is contained in a
! part of the pattern that matched multiple times, the last
! match is returned.
!
! If the regular expression uses the (?P<name>...) syntax, the
! groupN arguments may also be strings identifying groups by
! their group name. If a string argument is not used as a group
! name in the pattern, an IndexError exception is raised.
!
! """
if len(groups) == 0:
groups = (0,)
result = []
***************
*** 358,364 ****
return ()
def groupdict(self, default=None):
! "Return a dictionary containing all named subgroups of the match"
dict = {}
for name, index in self.re.groupindex.items():
a, b = self.regs[index]
--- 633,645 ----
return ()
def groupdict(self, default=None):
! """groupdict([default=None]) -> dictionary
!
! Return a dictionary containing all the named subgroups of the
! match, keyed by the subgroup name. The default argument is
! used for groups that did not participate in the match.
!
! """
dict = {}
for name, index in self.re.groupindex.items():
a, b = self.regs[index]