[Python-3000-checkins] r65559 - in python/branches/py3k: Lib/test/test_urllib.py Lib/urllib/parse.py Modules/_sre.c

Wed Aug 6 21:32:25 CEST 2008

Yes, I know the two Lib files shouldn't have been committed. I've
already reverted them, r66650.

On Wed, Aug 6, 2008 at 12:29 PM, guido.van.rossum
<python-3000-checkins at python.org> wrote:
> Author: guido.van.rossum
> Date: Wed Aug  6 21:29:14 2008
> New Revision: 65559
>
> Log:
> Merged revisions 65544 via svnmerge from
> svn+ssh://pythondev@svn.python.org/python/trunk
>
> ........
>  r65544 | guido.van.rossum | 2008-08-04 20:39:21 -0700 (Mon, 04 Aug 2008) | 28 lines
>
>  Tracker issue 3487: sre "bytecode" verifier.
>
>  This is a verifier for the binary code used by the _sre module (this
>  is often called bytecode, though to distinguish it from Python bytecode
>  I put it in quotes).
>
>  I wrote this for Google App Engine, and am making the patch available as
>  open source under the Apache 2 license.  Below are the copyright
>  statement and license, for completeness.
>
>  # Copyright 2008 Google Inc.
>  #
>  # Licensed under the Apache License, Version 2.0 (the "License");
>  # you may not use this file except in compliance with the License.
>  # You may obtain a copy of the License at
>  #
>  #     http://www.apache.org/licenses/LICENSE-2.0
>  #
>  # Unless required by applicable law or agreed to in writing, software
>  # distributed under the License is distributed on an "AS IS" BASIS,
>  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>  # See the License for the specific language governing permissions and
>  # limitations under the License.
>
>  It's not necessary to include these copyrights and bytecode in the
>  source file.  Google has signed a contributor's agreement with the PSF
>  already.
> ........
>
>
> Modified:
>   python/branches/py3k/   (props changed)
>   python/branches/py3k/Lib/test/test_urllib.py
>   python/branches/py3k/Lib/urllib/parse.py
>   python/branches/py3k/Modules/_sre.c
>
> Modified: python/branches/py3k/Lib/test/test_urllib.py
> ==============================================================================
> --- python/branches/py3k/Lib/test/test_urllib.py        (original)
> +++ python/branches/py3k/Lib/test/test_urllib.py        Wed Aug  6 21:29:14 2008
> @@ -465,7 +465,7 @@
>
>     def test_unquote_with_unicode(self):
>         r = urllib.parse.unquote('br%C3%BCckner_sapporo_20050930.doc')
> -        self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc')
> +        self.assertEqual(r, 'br\u00FCckner_sapporo_20050930.doc')
>
>  class urlencode_Tests(unittest.TestCase):
>     """Tests for urlencode()"""
>
> Modified: python/branches/py3k/Lib/urllib/parse.py
> ==============================================================================
> --- python/branches/py3k/Lib/urllib/parse.py    (original)
> +++ python/branches/py3k/Lib/urllib/parse.py    Wed Aug  6 21:29:14 2008
> @@ -261,84 +261,74 @@
>         return url, ''
>
>
> -_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
> -_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
> +def unquote_as_string (s, plus=False, charset=None):
> +    if charset is None:
> +        charset = "UTF-8"
> +    return str(unquote_as_bytes(s, plus=plus), charset, 'strict')
>
> -def unquote(s):
> +def unquote_as_bytes (s, plus=False):
>     """unquote('abc%20def') -> 'abc def'."""
> +    if plus:
> +        s = s.replace('+', ' ')
>     res = s.split('%')
> +    res[0] = res[0].encode('ASCII', 'strict')
>     for i in range(1, len(res)):
> -        item = res[i]
> -        try:
> -            res[i] = _hextochr[item[:2]] + item[2:]
> -        except KeyError:
> -            res[i] = '%' + item
> -        except UnicodeDecodeError:
> -            res[i] = chr(int(item[:2], 16)) + item[2:]
> -    return "".join(res)
> -
> -def unquote_plus(s):
> -    """unquote('%7e/abc+def') -> '~/abc def'"""
> -    s = s.replace('+', ' ')
> -    return unquote(s)
> -
> -always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
> -               'abcdefghijklmnopqrstuvwxyz'
> -               '0123456789' '_.-')
> -_safe_quoters= {}
> -
> -class Quoter:
> -    def __init__(self, safe):
> -        self.cache = {}
> -        self.safe = safe + always_safe
> +        res[i] = (bytes.fromhex(res[i][:2]) +
> +                  res[i][2:].encode('ASCII', 'strict'))
> +    return b''.join(res)
> +
> +_always_safe = (b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
> +                b'abcdefghijklmnopqrstuvwxyz'
> +                b'0123456789'
> +                b'_.-')
> +
> +_percent_code = ord('%')
> +
> +_hextable = b'0123456789ABCDEF'
> +
> +def quote_as_bytes(s, safe = '/', plus=False):
> +    """quote(b'abc at def') -> 'abc%40def'"""
> +
> +    if isinstance(s, str):
> +        s = s.encode("UTF-8", "strict")
> +    if not (isinstance(s, bytes) or isinstance(s, bytearray)):
> +        raise ValueError("Argument to quote must be either bytes "
> +                         "or bytearray; string arguments will be "
> +                         "converted to UTF-8 bytes")
> +
> +    safeset = _always_safe + safe.encode('ASCII', 'strict')
> +    if plus:
> +        safeset += b' '
> +
> +    result = bytearray()
> +    for i in s:
> +        if i not in safeset:
> +            result.append(_percent_code)
> +            result.append(_hextable[(i >> 4) & 0xF])
> +            result.append(_hextable[i & 0xF])
> +        else:
> +            result.append(i)
> +    if plus:
> +        result = result.replace(b' ', b'+')
> +    return result
>
> -    def __call__(self, c):
> -        try:
> -            return self.cache[c]
> -        except KeyError:
> -            if ord(c) < 256:
> -                res = (c in self.safe) and c or ('%%%02X' % ord(c))
> -                self.cache[c] = res
> -                return res
> -            else:
> -                return "".join(['%%%02X' % i for i in c.encode("utf-8")])
> +def quote_as_string(s, safe = '/', plus=False):
> +    return str(quote_as_bytes(s, safe=safe, plus=plus), 'ASCII', 'strict')
>
> -def quote(s, safe = '/'):
> -    """quote('abc def') -> 'abc%20def'
> +# finally, define defaults for 'quote' and 'unquote'
>
> -    Each part of a URL, e.g. the path info, the query, etc., has a
> -    different set of reserved characters that must be quoted.
> +def quote(s, safe='/'):
> +    return quote_as_string(s, safe=safe)
>
> -    RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
> -    the following reserved characters.
> +def quote_plus(s, safe=''):
> +    return quote_as_string(s, safe=safe, plus=True)
>
> -    reserved    = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
> -                  "$" | ","
> +def unquote(s):
> +    return unquote_as_string(s)
>
> -    Each of these characters is reserved in some component of a URL,
> -    but not necessarily in all of them.
> +def unquote_plus(s):
> +    return unquote_as_string(s, plus=True)
>
> -    By default, the quote function is intended for quoting the path
> -    section of a URL.  Thus, it will not encode '/'.  This character
> -    is reserved, but in typical usage the quote function is being
> -    called on a path where the existing slash characters are used as
> -    reserved characters.
> -    """
> -    cachekey = (safe, always_safe)
> -    try:
> -        quoter = _safe_quoters[cachekey]
> -    except KeyError:
> -        quoter = Quoter(safe)
> -        _safe_quoters[cachekey] = quoter
> -    res = map(quoter, s)
> -    return ''.join(res)
> -
> -def quote_plus(s, safe = ''):
> -    """Quote the query fragment of a URL; replacing ' ' with '+'"""
> -    if ' ' in s:
> -        s = quote(s, safe + ' ')
> -        return s.replace(' ', '+')
> -    return quote(s, safe)
>
>  def urlencode(query,doseq=0):
>     """Encode a sequence of two-element tuples or dictionary into a URL query string.
> @@ -387,7 +377,7 @@
>                 # is there a reasonable way to convert to ASCII?
>                 # encode generates a string, but "replace" or "ignore"
>                 # lose information and "strict" can raise UnicodeError
> -                v = quote_plus(v.encode("ASCII","replace"))
> +                v = quote_plus(v)
>                 l.append(k + '=' + v)
>             else:
>                 try:
> @@ -474,7 +464,8 @@
>         _userprog = re.compile('^(.*)@(.*)$')
>
>     match = _userprog.match(host)
> -    if match: return map(unquote, match.group(1, 2))
> +    if match:
> +        return map(unquote, match.group(1, 2))
>     return None, host
>
>  _passwdprog = None
>
> Modified: python/branches/py3k/Modules/_sre.c
> ==============================================================================
> --- python/branches/py3k/Modules/_sre.c (original)
> +++ python/branches/py3k/Modules/_sre.c Wed Aug  6 21:29:14 2008
> @@ -2637,6 +2637,8 @@
>     pattern_members,                   /* tp_members */
>  };
>
> +static int _validate(PatternObject *self); /* Forward */
> +
>  static PyObject *
>  _compile(PyObject* self_, PyObject* args)
>  {
> @@ -2695,10 +2697,482 @@
>
>     self->weakreflist = NULL;
>
> +    if (!_validate(self)) {
> +        Py_DECREF(self);
> +        return NULL;
> +    }
> +
>     return (PyObject*) self;
>  }
>
>  /* -------------------------------------------------------------------- */
> +/* Code validation */
> +
> +/* To learn more about this code, have a look at the _compile() function in
> +   Lib/sre_compile.py.  The validation functions below checks the code array
> +   for conformance with the code patterns generated there.
> +
> +   The nice thing about the generated code is that it is position-independent:
> +   all jumps are relative jumps forward.  Also, jumps don't cross each other:
> +   the target of a later jump is always earlier than the target of an earlier
> +   jump.  IOW, this is okay:
> +
> +   J---------J-------T--------T
> +    \         \_____/        /
> +     \______________________/
> +
> +   but this is not:
> +
> +   J---------J-------T--------T
> +    \_________\_____/        /
> +               \____________/
> +
> +   It also helps that SRE_CODE is always an unsigned type, either 2 bytes or 4
> +   bytes wide (the latter if Python is compiled for "wide" unicode support).
> +*/
> +
> +/* Defining this one enables tracing of the validator */
> +#undef VVERBOSE
> +
> +/* Trace macro for the validator */
> +#if defined(VVERBOSE)
> +#define VTRACE(v) printf v
> +#else
> +#define VTRACE(v)
> +#endif
> +
> +/* Report failure */
> +#define FAIL do { VTRACE(("FAIL: %d\n", __LINE__)); return 0; } while (0)
> +
> +/* Extract opcode, argument, or skip count from code array */
> +#define GET_OP                                          \
> +    do {                                                \
> +        VTRACE(("%p: ", code));                         \
> +        if (code >= end) FAIL;                          \
> +        op = *code++;                                   \
> +        VTRACE(("%lu (op)\n", (unsigned long)op));      \
> +    } while (0)
> +#define GET_ARG                                         \
> +    do {                                                \
> +        VTRACE(("%p= ", code));                         \
> +        if (code >= end) FAIL;                          \
> +        arg = *code++;                                  \
> +        VTRACE(("%lu (arg)\n", (unsigned long)arg));    \
> +    } while (0)
> +#define GET_SKIP                                        \
> +    do {                                                \
> +        VTRACE(("%p= ", code));                         \
> +        if (code >= end) FAIL;                          \
> +        skip = *code;                                   \
> +        VTRACE(("%lu (skip to %p)\n",                   \
> +               (unsigned long)skip, code+skip));        \
> +        if (code+skip < code || code+skip > end)        \
> +            FAIL;                                       \
> +        code++;                                         \
> +    } while (0)
> +
> +static int
> +_validate_charset(SRE_CODE *code, SRE_CODE *end)
> +{
> +    /* Some variables are manipulated by the macros above */
> +    SRE_CODE op;
> +    SRE_CODE arg;
> +    SRE_CODE offset;
> +    int i;
> +
> +    while (code < end) {
> +        GET_OP;
> +        switch (op) {
> +
> +        case SRE_OP_NEGATE:
> +            break;
> +
> +        case SRE_OP_LITERAL:
> +            GET_ARG;
> +            break;
> +
> +        case SRE_OP_RANGE:
> +            GET_ARG;
> +            GET_ARG;
> +            break;
> +
> +        case SRE_OP_CHARSET:
> +            offset = 32/sizeof(SRE_CODE); /* 32-byte bitmap */
> +            if (code+offset < code || code+offset > end)
> +                FAIL;
> +            code += offset;
> +            break;
> +
> +        case SRE_OP_BIGCHARSET:
> +            GET_ARG; /* Number of blocks */
> +            offset = 256/sizeof(SRE_CODE); /* 256-byte table */
> +            if (code+offset < code || code+offset > end)
> +                FAIL;
> +            /* Make sure that each byte points to a valid block */
> +            for (i = 0; i < 256; i++) {
> +                if (((unsigned char *)code)[i] >= arg)
> +                    FAIL;
> +            }
> +            code += offset;
> +            offset = arg * 32/sizeof(SRE_CODE); /* 32-byte bitmap times arg */
> +            if (code+offset < code || code+offset > end)
> +                FAIL;
> +            code += offset;
> +            break;
> +
> +        case SRE_OP_CATEGORY:
> +            GET_ARG;
> +            switch (arg) {
> +            case SRE_CATEGORY_DIGIT:
> +            case SRE_CATEGORY_NOT_DIGIT:
> +            case SRE_CATEGORY_SPACE:
> +            case SRE_CATEGORY_NOT_SPACE:
> +            case SRE_CATEGORY_WORD:
> +            case SRE_CATEGORY_NOT_WORD:
> +            case SRE_CATEGORY_LINEBREAK:
> +            case SRE_CATEGORY_NOT_LINEBREAK:
> +            case SRE_CATEGORY_LOC_WORD:
> +            case SRE_CATEGORY_LOC_NOT_WORD:
> +            case SRE_CATEGORY_UNI_DIGIT:
> +            case SRE_CATEGORY_UNI_NOT_DIGIT:
> +            case SRE_CATEGORY_UNI_SPACE:
> +            case SRE_CATEGORY_UNI_NOT_SPACE:
> +            case SRE_CATEGORY_UNI_WORD:
> +            case SRE_CATEGORY_UNI_NOT_WORD:
> +            case SRE_CATEGORY_UNI_LINEBREAK:
> +            case SRE_CATEGORY_UNI_NOT_LINEBREAK:
> +                break;
> +            default:
> +                FAIL;
> +            }
> +            break;
> +
> +        default:
> +            FAIL;
> +
> +        }
> +    }
> +
> +    return 1;
> +}
> +
> +static int
> +_validate_inner(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
> +{
> +    /* Some variables are manipulated by the macros above */
> +    SRE_CODE op;
> +    SRE_CODE arg;
> +    SRE_CODE skip;
> +
> +    VTRACE(("code=%p, end=%p\n", code, end));
> +
> +    if (code > end)
> +        FAIL;
> +
> +    while (code < end) {
> +        GET_OP;
> +        switch (op) {
> +
> +        case SRE_OP_MARK:
> +            /* We don't check whether marks are properly nested; the
> +               sre_match() code is robust even if they don't, and the worst
> +               you can get is nonsensical match results. */
> +            GET_ARG;
> +            if (arg > 2*groups+1) {
> +                VTRACE(("arg=%d, groups=%d\n", (int)arg, (int)groups));
> +                FAIL;
> +            }
> +            break;
> +
> +        case SRE_OP_LITERAL:
> +        case SRE_OP_NOT_LITERAL:
> +        case SRE_OP_LITERAL_IGNORE:
> +        case SRE_OP_NOT_LITERAL_IGNORE:
> +            GET_ARG;
> +            /* The arg is just a character, nothing to check */
> +            break;
> +
> +        case SRE_OP_SUCCESS:
> +        case SRE_OP_FAILURE:
> +            /* Nothing to check; these normally end the matching process */
> +            break;
> +
> +        case SRE_OP_AT:
> +            GET_ARG;
> +            switch (arg) {
> +            case SRE_AT_BEGINNING:
> +            case SRE_AT_BEGINNING_STRING:
> +            case SRE_AT_BEGINNING_LINE:
> +            case SRE_AT_END:
> +            case SRE_AT_END_LINE:
> +            case SRE_AT_END_STRING:
> +            case SRE_AT_BOUNDARY:
> +            case SRE_AT_NON_BOUNDARY:
> +            case SRE_AT_LOC_BOUNDARY:
> +            case SRE_AT_LOC_NON_BOUNDARY:
> +            case SRE_AT_UNI_BOUNDARY:
> +            case SRE_AT_UNI_NON_BOUNDARY:
> +                break;
> +            default:
> +                FAIL;
> +            }
> +            break;
> +
> +        case SRE_OP_ANY:
> +        case SRE_OP_ANY_ALL:
> +            /* These have no operands */
> +            break;
> +
> +        case SRE_OP_IN:
> +        case SRE_OP_IN_IGNORE:
> +            GET_SKIP;
> +            /* Stop 1 before the end; we check the FAILURE below */
> +            if (!_validate_charset(code, code+skip-2))
> +                FAIL;
> +            if (code[skip-2] != SRE_OP_FAILURE)
> +                FAIL;
> +            code += skip-1;
> +            break;
> +
> +        case SRE_OP_INFO:
> +            {
> +                /* A minimal info field is
> +                   <INFO> <1=skip> <2=flags> <3=min> <4=max>;
> +                   If SRE_INFO_PREFIX or SRE_INFO_CHARSET is in the flags,
> +                   more follows. */
> +                SRE_CODE flags, min, max, i;
> +                SRE_CODE *newcode;
> +                GET_SKIP;
> +                newcode = code+skip-1;
> +                GET_ARG; flags = arg;
> +                GET_ARG; min = arg;
> +                GET_ARG; max = arg;
> +                /* Check that only valid flags are present */
> +                if ((flags & ~(SRE_INFO_PREFIX |
> +                               SRE_INFO_LITERAL |
> +                               SRE_INFO_CHARSET)) != 0)
> +                    FAIL;
> +                /* PREFIX and CHARSET are mutually exclusive */
> +                if ((flags & SRE_INFO_PREFIX) &&
> +                    (flags & SRE_INFO_CHARSET))
> +                    FAIL;
> +                /* LITERAL implies PREFIX */
> +                if ((flags & SRE_INFO_LITERAL) &&
> +                    !(flags & SRE_INFO_PREFIX))
> +                    FAIL;
> +                /* Validate the prefix */
> +                if (flags & SRE_INFO_PREFIX) {
> +                    SRE_CODE prefix_len, prefix_skip;
> +                    GET_ARG; prefix_len = arg;
> +                    GET_ARG; prefix_skip = arg;
> +                    /* Here comes the prefix string */
> +                    if (code+prefix_len < code || code+prefix_len > newcode)
> +                        FAIL;
> +                    code += prefix_len;
> +                    /* And here comes the overlap table */
> +                    if (code+prefix_len < code || code+prefix_len > newcode)
> +                        FAIL;
> +                    /* Each overlap value should be < prefix_len */
> +                    for (i = 0; i < prefix_len; i++) {
> +                        if (code[i] >= prefix_len)
> +                            FAIL;
> +                    }
> +                    code += prefix_len;
> +                }
> +                /* Validate the charset */
> +                if (flags & SRE_INFO_CHARSET) {
> +                    if (!_validate_charset(code, newcode-1))
> +                        FAIL;
> +                    if (newcode[-1] != SRE_OP_FAILURE)
> +                        FAIL;
> +                    code = newcode;
> +                }
> +                else if (code != newcode) {
> +                  VTRACE(("code=%p, newcode=%p\n", code, newcode));
> +                    FAIL;
> +                }
> +            }
> +            break;
> +
> +        case SRE_OP_BRANCH:
> +            {
> +                SRE_CODE *target = NULL;
> +                for (;;) {
> +                    GET_SKIP;
> +                    if (skip == 0)
> +                        break;
> +                    /* Stop 2 before the end; we check the JUMP below */
> +                    if (!_validate_inner(code, code+skip-3, groups))
> +                        FAIL;
> +                    code += skip-3;
> +                    /* Check that it ends with a JUMP, and that each JUMP
> +                       has the same target */
> +                    GET_OP;
> +                    if (op != SRE_OP_JUMP)
> +                        FAIL;
> +                    GET_SKIP;
> +                    if (target == NULL)
> +                        target = code+skip-1;
> +                    else if (code+skip-1 != target)
> +                        FAIL;
> +                }
> +            }
> +            break;
> +
> +        case SRE_OP_REPEAT_ONE:
> +        case SRE_OP_MIN_REPEAT_ONE:
> +            {
> +                SRE_CODE min, max;
> +                GET_SKIP;
> +                GET_ARG; min = arg;
> +                GET_ARG; max = arg;
> +                if (min > max)
> +                    FAIL;
> +#ifdef Py_UNICODE_WIDE
> +                if (max > 65535)
> +                    FAIL;
> +#endif
> +                if (!_validate_inner(code, code+skip-4, groups))
> +                    FAIL;
> +                code += skip-4;
> +                GET_OP;
> +                if (op != SRE_OP_SUCCESS)
> +                    FAIL;
> +            }
> +            break;
> +
> +        case SRE_OP_REPEAT:
> +            {
> +                SRE_CODE min, max;
> +                GET_SKIP;
> +                GET_ARG; min = arg;
> +                GET_ARG; max = arg;
> +                if (min > max)
> +                    FAIL;
> +#ifdef Py_UNICODE_WIDE
> +                if (max > 65535)
> +                    FAIL;
> +#endif
> +                if (!_validate_inner(code, code+skip-3, groups))
> +                    FAIL;
> +                code += skip-3;
> +                GET_OP;
> +                if (op != SRE_OP_MAX_UNTIL && op != SRE_OP_MIN_UNTIL)
> +                    FAIL;
> +            }
> +            break;
> +
> +        case SRE_OP_GROUPREF:
> +        case SRE_OP_GROUPREF_IGNORE:
> +            GET_ARG;
> +            if (arg >= groups)
> +                FAIL;
> +            break;
> +
> +        case SRE_OP_GROUPREF_EXISTS:
> +            /* The regex syntax for this is: '(?(group)then|else)', where
> +               'group' is either an integer group number or a group name,
> +               'then' and 'else' are sub-regexes, and 'else' is optional. */
> +            GET_ARG;
> +            if (arg >= groups)
> +                FAIL;
> +            GET_SKIP;
> +            code--; /* The skip is relative to the first arg! */
> +            /* There are two possibilities here: if there is both a 'then'
> +               part and an 'else' part, the generated code looks like:
> +
> +               GROUPREF_EXISTS
> +               <group>
> +               <skipyes>
> +               ...then part...
> +               JUMP
> +               <skipno>
> +               (<skipyes> jumps here)
> +               ...else part...
> +               (<skipno> jumps here)
> +
> +               If there is only a 'then' part, it looks like:
> +
> +               GROUPREF_EXISTS
> +               <group>
> +               <skip>
> +               ...then part...
> +               (<skip> jumps here)
> +
> +               There is no direct way to decide which it is, and we don't want
> +               to allow arbitrary jumps anywhere in the code; so we just look
> +               for a JUMP opcode preceding our skip target.
> +            */
> +            if (skip >= 3 && code+skip-3 >= code &&
> +                code[skip-3] == SRE_OP_JUMP)
> +            {
> +                VTRACE(("both then and else parts present\n"));
> +                if (!_validate_inner(code+1, code+skip-3, groups))
> +                    FAIL;
> +                code += skip-2; /* Position after JUMP, at <skipno> */
> +                GET_SKIP;
> +                if (!_validate_inner(code, code+skip-1, groups))
> +                    FAIL;
> +                code += skip-1;
> +            }
> +            else {
> +                VTRACE(("only a then part present\n"));
> +                if (!_validate_inner(code+1, code+skip-1, groups))
> +                    FAIL;
> +                code += skip-1;
> +            }
> +            break;
> +
> +        case SRE_OP_ASSERT:
> +        case SRE_OP_ASSERT_NOT:
> +            GET_SKIP;
> +            GET_ARG; /* 0 for lookahead, width for lookbehind */
> +            code--; /* Back up over arg to simplify math below */
> +            if (arg & 0x80000000)
> +                FAIL; /* Width too large */
> +            /* Stop 1 before the end; we check the SUCCESS below */
> +            if (!_validate_inner(code+1, code+skip-2, groups))
> +                FAIL;
> +            code += skip-2;
> +            GET_OP;
> +            if (op != SRE_OP_SUCCESS)
> +                FAIL;
> +            break;
> +
> +        default:
> +            FAIL;
> +
> +        }
> +    }
> +
> +    VTRACE(("okay\n"));
> +    return 1;
> +}
> +
> +static int
> +_validate_outer(SRE_CODE *code, SRE_CODE *end, Py_ssize_t groups)
> +{
> +    if (groups < 0 || groups > 100 || code >= end || end[-1] != SRE_OP_SUCCESS)
> +        FAIL;
> +    if (groups == 0)  /* fix for simplejson */
> +        groups = 100; /* 100 groups should always be safe */
> +    return _validate_inner(code, end-1, groups);
> +}
> +
> +static int
> +_validate(PatternObject *self)
> +{
> +    if (!_validate_outer(self->code, self->code+self->codesize, self->groups))
> +    {
> +        PyErr_SetString(PyExc_RuntimeError, "invalid SRE code");
> +        return 0;
> +    }
> +    else
> +        VTRACE(("Success!\n"));
> +    return 1;
> +}
> +
> +/* -------------------------------------------------------------------- */
>  /* match methods */
>
>  static void
> _______________________________________________
> Python-3000-checkins mailing list
> Python-3000-checkins at python.org
> http://mail.python.org/mailman/listinfo/python-3000-checkins
>

-- 
--Guido van Rossum (home page: http://www.python.org/~guido/)