[Python-checkins] r45595 - in python/branches/p3yk: Lib/re.py Lib/sre.py Lib/test/regrtest.py Lib/test/test___all__.py Lib/test/test_re.py Modules/_sre.c

thomas.wouters python-checkins at python.org
Fri Apr 21 11:47:09 CEST 2006


Author: thomas.wouters
Date: Fri Apr 21 11:47:09 2006
New Revision: 45595

Added:
   python/branches/p3yk/Lib/re.py
      - copied unchanged from r43069, python/trunk/Lib/re.py
Removed:
   python/branches/p3yk/Lib/sre.py
Modified:
   python/branches/p3yk/   (props changed)
   python/branches/p3yk/Lib/test/regrtest.py
   python/branches/p3yk/Lib/test/test___all__.py
   python/branches/p3yk/Lib/test/test_re.py
   python/branches/p3yk/Modules/_sre.c
Log:

Merge trunk up to 43069, putting re.py back and hopefully making the branch
usable again.



Deleted: /python/branches/p3yk/Lib/sre.py
==============================================================================
--- /python/branches/p3yk/Lib/sre.py	Fri Apr 21 11:47:09 2006
+++ (empty file)
@@ -1,315 +0,0 @@
-#
-# Secret Labs' Regular Expression Engine
-#
-# re-compatible interface for the sre matching engine
-#
-# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.
-#
-# This version of the SRE library can be redistributed under CNRI's
-# Python 1.6 license.  For any other use, please contact Secret Labs
-# AB (info at pythonware.com).
-#
-# Portions of this engine have been developed in cooperation with
-# CNRI.  Hewlett-Packard provided funding for 1.6 integration and
-# other compatibility work.
-#
-
-r"""Support for regular expressions (RE).
-
-This module provides regular expression matching operations similar to
-those found in Perl.  It supports both 8-bit and Unicode strings; both
-the pattern and the strings being processed can contain null bytes and
-characters outside the US ASCII range.
-
-Regular expressions can contain both special and ordinary characters.
-Most ordinary characters, like "A", "a", or "0", are the simplest
-regular expressions; they simply match themselves.  You can
-concatenate ordinary characters, so last matches the string 'last'.
-
-The special characters are:
-    "."      Matches any character except a newline.
-    "^"      Matches the start of the string.
-    "$"      Matches the end of the string.
-    "*"      Matches 0 or more (greedy) repetitions of the preceding RE.
-             Greedy means that it will match as many repetitions as possible.
-    "+"      Matches 1 or more (greedy) repetitions of the preceding RE.
-    "?"      Matches 0 or 1 (greedy) of the preceding RE.
-    *?,+?,?? Non-greedy versions of the previous three special characters.
-    {m,n}    Matches from m to n repetitions of the preceding RE.
-    {m,n}?   Non-greedy version of the above.
-    "\\"      Either escapes special characters or signals a special sequence.
-    []       Indicates a set of characters.
-             A "^" as the first character indicates a complementing set.
-    "|"      A|B, creates an RE that will match either A or B.
-    (...)    Matches the RE inside the parentheses.
-             The contents can be retrieved or matched later in the string.
-    (?iLmsux) Set the I, L, M, S, U, or X flag for the RE (see below).
-    (?:...)  Non-grouping version of regular parentheses.
-    (?P<name>...) The substring matched by the group is accessible by name.
-    (?P=name)     Matches the text matched earlier by the group named name.
-    (?#...)  A comment; ignored.
-    (?=...)  Matches if ... matches next, but doesn't consume the string.
-    (?!...)  Matches if ... doesn't match next.
-
-The special sequences consist of "\\" and a character from the list
-below.  If the ordinary character is not on the list, then the
-resulting RE will match the second character.
-    \number  Matches the contents of the group of the same number.
-    \A       Matches only at the start of the string.
-    \Z       Matches only at the end of the string.
-    \b       Matches the empty string, but only at the start or end of a word.
-    \B       Matches the empty string, but not at the start or end of a word.
-    \d       Matches any decimal digit; equivalent to the set [0-9].
-    \D       Matches any non-digit character; equivalent to the set [^0-9].
-    \s       Matches any whitespace character; equivalent to [ \t\n\r\f\v].
-    \S       Matches any non-whitespace character; equiv. to [^ \t\n\r\f\v].
-    \w       Matches any alphanumeric character; equivalent to [a-zA-Z0-9_].
-             With LOCALE, it will match the set [0-9_] plus characters defined
-             as letters for the current locale.
-    \W       Matches the complement of \w.
-    \\       Matches a literal backslash.
-
-This module exports the following functions:
-    match    Match a regular expression pattern to the beginning of a string.
-    search   Search a string for the presence of a pattern.
-    sub      Substitute occurrences of a pattern found in a string.
-    subn     Same as sub, but also return the number of substitutions made.
-    split    Split a string by the occurrences of a pattern.
-    findall  Find all occurrences of a pattern in a string.
-    compile  Compile a pattern into a RegexObject.
-    purge    Clear the regular expression cache.
-    escape   Backslash all non-alphanumerics in a string.
-
-Some of the functions in this module takes flags as optional parameters:
-    I  IGNORECASE  Perform case-insensitive matching.
-    L  LOCALE      Make \w, \W, \b, \B, dependent on the current locale.
-    M  MULTILINE   "^" matches the beginning of lines as well as the string.
-                   "$" matches the end of lines as well as the string.
-    S  DOTALL      "." matches any character at all, including the newline.
-    X  VERBOSE     Ignore whitespace and comments for nicer looking RE's.
-    U  UNICODE     Make \w, \W, \b, \B, dependent on the Unicode locale.
-
-This module also defines an exception 'error'.
-
-"""
-
-import sys
-import sre_compile
-import sre_parse
-
-# public symbols
-__all__ = [ "match", "search", "sub", "subn", "split", "findall",
-    "compile", "purge", "template", "escape", "I", "L", "M", "S", "X",
-    "U", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE",
-    "UNICODE", "error" ]
-
-__version__ = "2.2.1"
-
-# flags
-I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE # ignore case
-L = LOCALE = sre_compile.SRE_FLAG_LOCALE # assume current 8-bit locale
-U = UNICODE = sre_compile.SRE_FLAG_UNICODE # assume unicode locale
-M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE # make anchors look for newline
-S = DOTALL = sre_compile.SRE_FLAG_DOTALL # make dot match newline
-X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE # ignore whitespace and comments
-
-# sre extensions (experimental, don't rely on these)
-T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE # disable backtracking
-DEBUG = sre_compile.SRE_FLAG_DEBUG # dump pattern after compilation
-
-# sre exception
-error = sre_compile.error
-
-# --------------------------------------------------------------------
-# public interface
-
-def match(pattern, string, flags=0):
-    """Try to apply the pattern at the start of the string, returning
-    a match object, or None if no match was found."""
-    return _compile(pattern, flags).match(string)
-
-def search(pattern, string, flags=0):
-    """Scan through string looking for a match to the pattern, returning
-    a match object, or None if no match was found."""
-    return _compile(pattern, flags).search(string)
-
-def sub(pattern, repl, string, count=0):
-    """Return the string obtained by replacing the leftmost
-    non-overlapping occurrences of the pattern in string by the
-    replacement repl.  repl can be either a string or a callable;
-    if a callable, it's passed the match object and must return
-    a replacement string to be used."""
-    return _compile(pattern, 0).sub(repl, string, count)
-
-def subn(pattern, repl, string, count=0):
-    """Return a 2-tuple containing (new_string, number).
-    new_string is the string obtained by replacing the leftmost
-    non-overlapping occurrences of the pattern in the source
-    string by the replacement repl.  number is the number of
-    substitutions that were made. repl can be either a string or a
-    callable; if a callable, it's passed the match object and must
-    return a replacement string to be used."""
-    return _compile(pattern, 0).subn(repl, string, count)
-
-def split(pattern, string, maxsplit=0):
-    """Split the source string by the occurrences of the pattern,
-    returning a list containing the resulting substrings."""
-    return _compile(pattern, 0).split(string, maxsplit)
-
-def findall(pattern, string, flags=0):
-    """Return a list of all non-overlapping matches in the string.
-
-    If one or more groups are present in the pattern, return a
-    list of groups; this will be a list of tuples if the pattern
-    has more than one group.
-
-    Empty matches are included in the result."""
-    return _compile(pattern, flags).findall(string)
-
-if sys.hexversion >= 0x02020000:
-    __all__.append("finditer")
-    def finditer(pattern, string, flags=0):
-        """Return an iterator over all non-overlapping matches in the
-        string.  For each match, the iterator returns a match object.
-
-        Empty matches are included in the result."""
-        return _compile(pattern, flags).finditer(string)
-
-def compile(pattern, flags=0):
-    "Compile a regular expression pattern, returning a pattern object."
-    return _compile(pattern, flags)
-
-def purge():
-    "Clear the regular expression cache"
-    _cache.clear()
-    _cache_repl.clear()
-
-def template(pattern, flags=0):
-    "Compile a template pattern, returning a pattern object"
-    return _compile(pattern, flags|T)
-
-_alphanum = {}
-for c in 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890':
-    _alphanum[c] = 1
-del c
-
-def escape(pattern):
-    "Escape all non-alphanumeric characters in pattern."
-    s = list(pattern)
-    alphanum = _alphanum
-    for i in range(len(pattern)):
-        c = pattern[i]
-        if c not in alphanum:
-            if c == "\000":
-                s[i] = "\\000"
-            else:
-                s[i] = "\\" + c
-    return pattern[:0].join(s)
-
-# --------------------------------------------------------------------
-# internals
-
-_cache = {}
-_cache_repl = {}
-
-_pattern_type = type(sre_compile.compile("", 0))
-
-_MAXCACHE = 100
-
-def _compile(*key):
-    # internal: compile pattern
-    cachekey = (type(key[0]),) + key
-    p = _cache.get(cachekey)
-    if p is not None:
-        return p
-    pattern, flags = key
-    if isinstance(pattern, _pattern_type):
-        return pattern
-    if not sre_compile.isstring(pattern):
-        raise TypeError, "first argument must be string or compiled pattern"
-    try:
-        p = sre_compile.compile(pattern, flags)
-    except error, v:
-        raise error, v # invalid expression
-    if len(_cache) >= _MAXCACHE:
-        _cache.clear()
-    _cache[cachekey] = p
-    return p
-
-def _compile_repl(*key):
-    # internal: compile replacement pattern
-    p = _cache_repl.get(key)
-    if p is not None:
-        return p
-    repl, pattern = key
-    try:
-        p = sre_parse.parse_template(repl, pattern)
-    except error, v:
-        raise error, v # invalid expression
-    if len(_cache_repl) >= _MAXCACHE:
-        _cache_repl.clear()
-    _cache_repl[key] = p
-    return p
-
-def _expand(pattern, match, template):
-    # internal: match.expand implementation hook
-    template = sre_parse.parse_template(template, pattern)
-    return sre_parse.expand_template(template, match)
-
-def _subx(pattern, template):
-    # internal: pattern.sub/subn implementation helper
-    template = _compile_repl(template, pattern)
-    if not template[0] and len(template[1]) == 1:
-        # literal replacement
-        return template[1][0]
-    def filter(match, template=template):
-        return sre_parse.expand_template(template, match)
-    return filter
-
-# register myself for pickling
-
-import copy_reg
-
-def _pickle(p):
-    return _compile, (p.pattern, p.flags)
-
-copy_reg.pickle(_pattern_type, _pickle, _compile)
-
-# --------------------------------------------------------------------
-# experimental stuff (see python-dev discussions for details)
-
-class Scanner:
-    def __init__(self, lexicon, flags=0):
-        from sre_constants import BRANCH, SUBPATTERN
-        self.lexicon = lexicon
-        # combine phrases into a compound pattern
-        p = []
-        s = sre_parse.Pattern()
-        s.flags = flags
-        for phrase, action in lexicon:
-            p.append(sre_parse.SubPattern(s, [
-                (SUBPATTERN, (len(p)+1, sre_parse.parse(phrase, flags))),
-                ]))
-        p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
-        s.groups = len(p)
-        self.scanner = sre_compile.compile(p)
-    def scan(self, string):
-        result = []
-        append = result.append
-        match = self.scanner.scanner(string).match
-        i = 0
-        while 1:
-            m = match()
-            if not m:
-                break
-            j = m.end()
-            if i == j:
-                break
-            action = self.lexicon[m.lastindex-1][1]
-            if callable(action):
-                self.match = m
-                action = action(self, m.group())
-            if action is not None:
-                append(action)
-            i = j
-        return result, string[i:]

Modified: python/branches/p3yk/Lib/test/regrtest.py
==============================================================================
--- python/branches/p3yk/Lib/test/regrtest.py	(original)
+++ python/branches/p3yk/Lib/test/regrtest.py	Fri Apr 21 11:47:09 2006
@@ -110,7 +110,7 @@
 import getopt
 import random
 import warnings
-import sre
+import re
 import cStringIO
 import traceback
 
@@ -525,7 +525,7 @@
                     _path_created.clear()
                     warnings.filters[:] = fs
                     gc.collect()
-                    sre.purge()
+                    re.purge()
                     _strptime._regex_cache.clear()
                     urlparse.clear_cache()
                     urllib.urlcleanup()

Modified: python/branches/p3yk/Lib/test/test___all__.py
==============================================================================
--- python/branches/p3yk/Lib/test/test___all__.py	(original)
+++ python/branches/p3yk/Lib/test/test___all__.py	Fri Apr 21 11:47:09 2006
@@ -145,7 +145,6 @@
         self.check_all("smtplib")
         self.check_all("sndhdr")
         self.check_all("socket")
-        self.check_all("sre")
         self.check_all("_strptime")
         self.check_all("symtable")
         self.check_all("tabnanny")

Modified: python/branches/p3yk/Lib/test/test_re.py
==============================================================================
--- python/branches/p3yk/Lib/test/test_re.py	(original)
+++ python/branches/p3yk/Lib/test/test_re.py	Fri Apr 21 11:47:09 2006
@@ -3,7 +3,7 @@
 
 from test.test_support import verbose, run_unittest
 import re
-from sre import Scanner
+from re import Scanner
 import sys, os, traceback
 from weakref import proxy
 

Modified: python/branches/p3yk/Modules/_sre.c
==============================================================================
--- python/branches/p3yk/Modules/_sre.c	(original)
+++ python/branches/p3yk/Modules/_sre.c	Fri Apr 21 11:47:09 2006
@@ -51,6 +51,8 @@
 #define SRE_MODULE "sre"
 #endif
 
+#define SRE_PY_MODULE "re"
+
 /* defining this one enables tracing */
 #undef VERBOSE
 
@@ -2455,7 +2457,7 @@
         } else {
             /* not a literal; hand it over to the template compiler */
             filter = call(
-                SRE_MODULE, "_subx",
+                SRE_PY_MODULE, "_subx",
                 PyTuple_Pack(2, self, template)
                 );
             if (!filter)
@@ -2872,7 +2874,7 @@
 
     /* delegate to Python code */
     return call(
-        SRE_MODULE, "_expand",
+        SRE_PY_MODULE, "_expand",
         PyTuple_Pack(3, self->pattern, self, template)
         );
 }


More information about the Python-checkins mailing list