[pypy-svn] r15943 - in pypy/dist/pypy/module/_sre: . test

nik at codespeak.net nik at codespeak.net
Wed Aug 10 23:05:22 CEST 2005


Author: nik
Date: Wed Aug 10 23:05:20 2005
New Revision: 15943

Added:
   pypy/dist/pypy/module/_sre/test/autopath.py   (contents, props changed)
   pypy/dist/pypy/module/_sre/test/test_interp_sre.py   (contents, props changed)
Modified:
   pypy/dist/pypy/module/_sre/__init__.py
   pypy/dist/pypy/module/_sre/app_sre.py
   pypy/dist/pypy/module/_sre/interp_sre.py
Log:
moved whole category matching/dispatching to interp-level. added the
interp-level tests (missed them some commits ago)


Modified: pypy/dist/pypy/module/_sre/__init__.py
==============================================================================
--- pypy/dist/pypy/module/_sre/__init__.py	(original)
+++ pypy/dist/pypy/module/_sre/__init__.py	Wed Aug 10 23:05:20 2005
@@ -18,11 +18,12 @@
     }
 
     interpleveldefs = {
-        '_is_digit':      'interp_sre._is_digit',
-        '_is_space':      'interp_sre._is_space',
-        '_is_word':       'interp_sre._is_word',
-        '_is_uni_word':   'interp_sre._is_uni_word',
-        '_is_loc_word':   'interp_sre._is_loc_word',
-        '_is_linebreak':  'interp_sre._is_linebreak',
-        '_is_uni_linebreak': 'interp_sre._is_uni_linebreak',
+        '_category_dispatch': 'interp_sre.category_dispatch',
+        '_is_digit':      'interp_sre.is_digit',
+        '_is_space':      'interp_sre.is_space',
+        '_is_word':       'interp_sre.is_word',
+        '_is_uni_word':   'interp_sre.is_uni_word',
+        '_is_loc_word':   'interp_sre.is_loc_word',
+        '_is_linebreak':  'interp_sre.is_linebreak',
+        '_is_uni_linebreak': 'interp_sre.is_uni_linebreak',
     }

Modified: pypy/dist/pypy/module/_sre/app_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/app_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/app_sre.py	Wed Aug 10 23:05:20 2005
@@ -549,7 +549,6 @@
     def __init__(self):
         self.executing_contexts = {}
         self.at_dispatcher = _AtcodeDispatcher()
-        self.ch_dispatcher = _ChcodeDispatcher()
         self.set_dispatcher = _CharsetDispatcher()
         
     def match(self, context):
@@ -643,7 +642,8 @@
         # match at given category
         # <CATEGORY> <code>
         #self._log(ctx, "CATEGORY", ctx.peek_code(1))
-        if ctx.at_end() or not self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):
+        if ctx.at_end() or \
+                 not _sre._category_dispatch(ctx.peek_code(1), ctx.peek_char()):
             ctx.has_matched = False
             return True
         ctx.skip_code(2)
@@ -1083,9 +1083,6 @@
 
 class _CharsetDispatcher(_Dispatcher):
 
-    def __init__(self):
-        self.ch_dispatcher = _ChcodeDispatcher()
-
     def reset(self, char):
         self.char = char
         self.ok = True
@@ -1100,7 +1097,7 @@
             ctx.skip_code(2)
     def set_category(self, ctx):
         # <CATEGORY> <code>
-        if self.ch_dispatcher.dispatch(ctx.peek_code(1), ctx):
+        if _sre._category_dispatch(ctx.peek_code(1), ctx.peek_char()):
             return self.ok
         else:
             ctx.skip_code(2)
@@ -1183,50 +1180,6 @@
 _AtcodeDispatcher.build_dispatch_table(ATCODES, "")
 
 
-class _ChcodeDispatcher(_Dispatcher):
-
-    def category_digit(self, ctx):
-        return _sre._is_digit(ctx.peek_char())
-    def category_not_digit(self, ctx):
-        return not _sre._is_digit(ctx.peek_char())
-    def category_space(self, ctx):
-        return _sre._is_space(ctx.peek_char())
-    def category_not_space(self, ctx):
-        return not _sre._is_space(ctx.peek_char())
-    def category_word(self, ctx):
-        return _sre._is_word(ctx.peek_char())
-    def category_not_word(self, ctx):
-        return not _sre._is_word(ctx.peek_char())
-    def category_linebreak(self, ctx):
-        return _sre._is_linebreak(ctx.peek_char())
-    def category_not_linebreak(self, ctx):
-        return not _sre._is_linebreak(ctx.peek_char())
-    def category_loc_word(self, ctx):
-        return _sre._is_loc_word(ctx.peek_char())
-    def category_loc_not_word(self, ctx):
-        return not _sre._is_loc_word(ctx.peek_char())
-    def category_uni_digit(self, ctx):
-        return ctx.peek_char().isdigit()
-    def category_uni_not_digit(self, ctx):
-        return not ctx.peek_char().isdigit()
-    def category_uni_space(self, ctx):
-        return ctx.peek_char().isspace()
-    def category_uni_not_space(self, ctx):
-        return not ctx.peek_char().isspace()
-    def category_uni_word(self, ctx):
-        return _sre._is_uni_word(ctx.peek_char())
-    def category_uni_not_word(self, ctx):
-        return not _sre._is_uni_word(ctx.peek_char())
-    def category_uni_linebreak(self, ctx):
-        return _sre._is_uni_linebreak(ctx.peek_char())
-    def category_uni_not_linebreak(self, ctx):
-        return not _sre._is_uni_linebreak(ctx.peek_char())
-    def unknown(self, ctx):
-        return False
-
-_ChcodeDispatcher.build_dispatch_table(CHCODES, "")
-
-
 def _log(message):
     if 0:
         print message

Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py	(original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py	Wed Aug 10 23:05:20 2005
@@ -1,5 +1,7 @@
 from pypy.interpreter.baseobjspace import ObjSpace
 
+#### Category helpers
+
 ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0,
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 25, 25, 25, 25, 25, 25, 25, 25,
@@ -16,25 +18,31 @@
 uni_linebreaks = {10: True, 13: True, 28: True, 29: True, 30: True, 133: True,
                   8232: True, 8233: True}
 
-def _is_digit(space, w_char):
+def is_digit(space, w_char):
     code = space.int_w(space.ord(w_char))
     return space.newbool(code < 128 and ascii_char_info[code] & 1)
 
-def _is_space(space, w_char):
+def is_uni_digit(space, w_char):
+    return space.newbool(space.is_true(space.call_method(w_char, "isdigit")))
+
+def is_space(space, w_char):
     code = space.int_w(space.ord(w_char))
     return space.newbool(code < 128 and ascii_char_info[code] & 2)
 
-def _is_word(space, w_char):
+def is_uni_space(space, w_char):
+    return space.newbool(space.is_true(space.call_method(w_char, "isspace")))
+
+def is_word(space, w_char):
     code = space.int_w(space.ord(w_char))
     return space.newbool(code < 128 and ascii_char_info[code] & 16)
 
-def _is_uni_word(space, w_char):
+def is_uni_word(space, w_char):
     code = space.int_w(space.ord(w_char))
     w_unichar = space.newunicode([code])
     isalnum = space.is_true(space.call_method(w_unichar, "isalnum"))
     return space.newbool(isalnum or code == underline)
 
-def _is_loc_word(space, w_char):
+def is_loc_word(space, w_char):
     code = space.int_w(space.ord(w_char))
     if code > 255:
         return space.newbool(False)
@@ -44,9 +52,34 @@
     isalnum = space.is_true(space.call_method(w_char_not_uni, "isalnum"))
     return space.newbool(isalnum or code == underline)
 
-def _is_linebreak(space, w_char):
+def is_linebreak(space, w_char):
     return space.newbool(space.int_w(space.ord(w_char)) == linebreak)
 
-def _is_uni_linebreak(space, w_char):
+def is_uni_linebreak(space, w_char):
     code = space.int_w(space.ord(w_char))
     return space.newbool(uni_linebreaks.has_key(code))
+
+
+#### Category dispatch
+
+def category_dispatch(space, w_chcode, w_char):
+    chcode = space.int_w(w_chcode)
+    if chcode >= len(category_dispatch_table):
+        return space.newbool(False)
+    w_function, negate = category_dispatch_table[chcode]
+    w_result = w_function(space, w_char)
+    if negate:
+        return space.newbool(not space.is_true(w_result))
+    else:
+        return w_result
+
+# Maps opcodes by indices to (function, negate) tuples.
+category_dispatch_table = [
+    (is_digit, False), (is_digit, True), (is_space, False),
+    (is_space, True), (is_word, False), (is_word, True),
+    (is_linebreak, False), (is_linebreak, True), (is_loc_word, False),
+    (is_loc_word, True), (is_uni_digit, False), (is_uni_digit, True),
+    (is_uni_space, False), (is_uni_space, True), (is_uni_word, False),
+    (is_uni_word, True), (is_uni_linebreak, False),
+    (is_uni_linebreak, True)
+]

Added: pypy/dist/pypy/module/_sre/test/autopath.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/module/_sre/test/autopath.py	Wed Aug 10 23:05:20 2005
@@ -0,0 +1,120 @@
+"""
+self cloning, automatic path configuration 
+
+copy this into any subdirectory of pypy from which scripts need 
+to be run, typically all of the test subdirs. 
+The idea is that any such script simply issues
+
+    import autopath
+
+and this will make sure that the parent directory containing "pypy"
+is in sys.path. 
+
+If you modify the master "autopath.py" version (in pypy/tool/autopath.py) 
+you can directly run it which will copy itself on all autopath.py files
+it finds under the pypy root directory. 
+
+This module always provides these attributes:
+
+    pypydir    pypy root directory path 
+    this_dir   directory where this autopath.py resides 
+
+"""
+
+
+def __dirinfo(part):
+    """ return (partdir, this_dir) and insert parent of partdir
+    into sys.path.  If the parent directories don't have the part
+    an EnvironmentError is raised."""
+
+    import sys, os
+    try:
+        head = this_dir = os.path.realpath(os.path.dirname(__file__))
+    except NameError:
+        head = this_dir = os.path.realpath(os.path.dirname(sys.argv[0]))
+
+    while head:
+        partdir = head
+        head, tail = os.path.split(head)
+        if tail == part:
+            break
+    else:
+        raise EnvironmentError, "'%s' missing in '%r'" % (partdir, this_dir)
+    
+    checkpaths = sys.path[:]
+    pypy_root = os.path.join(head, '')
+    
+    while checkpaths:
+        orig = checkpaths.pop()
+        fullorig = os.path.join(os.path.realpath(orig), '')
+        if fullorig.startswith(pypy_root):
+            if os.path.exists(os.path.join(fullorig, '__init__.py')):
+                sys.path.remove(orig)
+    try:
+        sys.path.remove(head)
+    except ValueError:
+        pass
+    sys.path.insert(0, head)
+
+    munged = {}
+    for name, mod in sys.modules.items():
+        fn = getattr(mod, '__file__', None)
+        if '.' in name or not isinstance(fn, str):
+            continue
+        newname = os.path.splitext(os.path.basename(fn))[0]
+        if not newname.startswith(part + '.'):
+            continue
+        path = os.path.join(os.path.dirname(os.path.realpath(fn)), '')
+        if path.startswith(pypy_root) and newname != part:
+            modpaths = os.path.normpath(path[len(pypy_root):]).split(os.sep)
+            if newname != '__init__':
+                modpaths.append(newname)
+            modpath = '.'.join(modpaths)
+            if modpath not in sys.modules:
+                munged[modpath] = mod
+
+    for name, mod in munged.iteritems():
+        if name not in sys.modules:
+            sys.modules[name] = mod
+        if '.' in name:
+            prename = name[:name.rfind('.')]
+            postname = name[len(prename)+1:]
+            if prename not in sys.modules:
+                __import__(prename)
+                if not hasattr(sys.modules[prename], postname):
+                    setattr(sys.modules[prename], postname, mod)
+
+    return partdir, this_dir
+
+def __clone():
+    """ clone master version of autopath.py into all subdirs """
+    from os.path import join, walk
+    if not this_dir.endswith(join('pypy','tool')):
+        raise EnvironmentError("can only clone master version "
+                               "'%s'" % join(pypydir, 'tool',_myname))
+
+
+    def sync_walker(arg, dirname, fnames):
+        if _myname in fnames:
+            fn = join(dirname, _myname)
+            f = open(fn, 'rwb+')
+            try:
+                if f.read() == arg:
+                    print "checkok", fn
+                else:
+                    print "syncing", fn
+                    f = open(fn, 'w')
+                    f.write(arg)
+            finally:
+                f.close()
+    s = open(join(pypydir, 'tool', _myname), 'rb').read()
+    walk(pypydir, sync_walker, s)
+
+_myname = 'autopath.py'
+
+# set guaranteed attributes
+
+pypydir, this_dir = __dirinfo('pypy')
+
+if __name__ == '__main__':
+    __clone()

Added: pypy/dist/pypy/module/_sre/test/test_interp_sre.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/module/_sre/test/test_interp_sre.py	Wed Aug 10 23:05:20 2005
@@ -0,0 +1,50 @@
+"""Interp-level _sre tests."""
+import autopath
+from py.test import raises
+import pypy.module._sre.interp_sre as isre
+
+EM_SPACE = u"\u2001"
+INDIAN_DIGIT = u"\u0966"
+
+def test_is_uni_linebreak(space):
+    for char in ["\n", "\r"]:
+        assert space.is_true(isre.is_uni_linebreak(space, space.wrap(char)))
+        assert space.is_true(isre.is_uni_linebreak(space, space.newunicode([ord(char)])))
+    for char in [" ", "b"]:
+        assert not space.is_true(isre.is_uni_linebreak(space, space.wrap(char)))
+        assert not space.is_true(isre.is_uni_linebreak(space, space.newunicode([ord(char)])))
+    assert space.is_true(isre.is_uni_linebreak(space, space.newunicode([8232])))
+
+def test_is_uni_word(space):
+    for char in ["a", "_", "\xe4"]:
+        assert space.is_true(isre.is_uni_word(space, space.wrap(char)))
+    for char in ["a", "_", "\xe4", u"\u00e4", u"\u03a0"]:
+        assert space.is_true(isre.is_uni_word(space, space.newunicode([ord(char)])))
+    for char in [".", " "]:
+        assert not space.is_true(isre.is_uni_word(space, space.wrap(char)))
+    for char in [".", " ", EM_SPACE]:
+        assert not space.is_true(isre.is_uni_word(space, space.newunicode([ord(char)])))
+
+def test_is_loc_word(space):
+    # should also test chars actually affected by locale (between 128 and 256)
+    for char in ["1", "2"]:
+        assert space.is_true(isre.is_loc_word(space, space.wrap(char)))
+        assert space.is_true(isre.is_loc_word(space, space.newunicode([ord(char)])))
+    for char in [" ", u".", u"\u03a0"]:
+        assert not space.is_true(isre.is_loc_word(space, space.newunicode([ord(char)])))
+
+def test_is_uni_digit(space):
+    for char in ["0", "9"]:
+        assert space.is_true(isre.is_uni_digit(space, space.wrap(char)))
+    for char in ["0", "9", INDIAN_DIGIT]:
+        assert space.is_true(isre.is_uni_digit(space, space.newunicode([ord(char)])))
+    for char in [" ", "s"]:
+        assert not space.is_true(isre.is_uni_digit(space, space.wrap(char)))
+
+def test_is_uni_space(space):
+    for char in [" ", "\t"]:
+        assert space.is_true(isre.is_uni_space(space, space.wrap(char)))
+    for char in ["\v", "\n", EM_SPACE]:
+        assert space.is_true(isre.is_uni_space(space, space.newunicode([ord(char)])))
+    for char in ["a", "1"]:
+        assert not space.is_true(isre.is_uni_space(space, space.wrap(char)))



More information about the Pypy-commit mailing list