[pypy-svn] r71032 - in pypy/branch/rsre/pypy/rlib/rsre: . test

arigo at codespeak.net arigo at codespeak.net
Mon Feb 1 14:09:26 CET 2010


Author: arigo
Date: Mon Feb  1 14:09:25 2010
New Revision: 71032

Added:
   pypy/branch/rsre/pypy/rlib/rsre/test/make_x.py   (contents, props changed)
   pypy/branch/rsre/pypy/rlib/rsre/test/targetrsre.py   (contents, props changed)
Modified:
   pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py
Log:
A translation target and some translation fixes.
This is all to be run manually for now, but it
has a performance 5x worse on rlib/rsre than on
CPython, so it's a good starting point for
investigation.


Modified: pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py
==============================================================================
--- pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py	(original)
+++ pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py	Mon Feb  1 14:09:25 2010
@@ -47,6 +47,7 @@
 
 def getlower(char_ord, flags):
     if flags & SRE_FLAG_UNICODE:
+        assert unicodedb is not None
         char_ord = unicodedb.tolower(char_ord)
     elif flags & SRE_FLAG_LOCALE:
         return tolower(char_ord)
@@ -89,18 +90,21 @@
     return code < 128 and (ascii_char_info[code] & 1 != 0)
 
 def is_uni_digit(code):
+    assert unicodedb is not None
     return unicodedb.isdigit(code)
 
 def is_space(code):
     return code < 128 and (ascii_char_info[code] & 2 != 0)
 
 def is_uni_space(code):
+    assert unicodedb is not None
     return unicodedb.isspace(code)
 
 def is_word(code):
     return code < 128 and (ascii_char_info[code] & 16 != 0)
 
 def is_uni_word(code):
+    assert unicodedb is not None
     return unicodedb.isalnum(code) or code == underline
 
 def is_loc_alnum(code):
@@ -113,6 +117,7 @@
     return code == linebreak
 
 def is_uni_linebreak(code):
+    assert unicodedb is not None
     return unicodedb.islinebreak(code)
 
 

Added: pypy/branch/rsre/pypy/rlib/rsre/test/make_x.py
==============================================================================
--- (empty file)
+++ pypy/branch/rsre/pypy/rlib/rsre/test/make_x.py	Mon Feb  1 14:09:25 2010
@@ -0,0 +1,15 @@
+
+
+g = open('x', 'w')
+
+for i in range(100000):
+    if i == 74747:
+        tag = 'title'
+    else:
+        tag = 'titl'
+    print >> g
+    print >> g, '<item>'
+    print >> g, '  <%s>FooBar%d</%s>' % (tag, i, tag)
+    print >> g, '</item>'
+
+g.close()

Added: pypy/branch/rsre/pypy/rlib/rsre/test/targetrsre.py
==============================================================================
--- (empty file)
+++ pypy/branch/rsre/pypy/rlib/rsre/test/targetrsre.py	Mon Feb  1 14:09:25 2010
@@ -0,0 +1,66 @@
+from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rsre import rsre
+import os, time
+
+
+# <item>\s*<title>(.*?)</title>
+r_code1 = [17, 18, 1, 21, 131091, 6, 6, 60, 105, 116, 101, 109, 62, 0,
+0, 0, 0, 0, 0, 19, 60, 19, 105, 19, 116, 19, 101, 19, 109, 19, 62, 29,
+9, 0, 65535, 15, 4, 9, 2, 0, 1, 19, 60, 19, 116, 19, 105, 19, 116, 19,
+108, 19, 101, 19, 62, 21, 0, 31, 5, 0, 65535, 2, 1, 21, 1, 19, 60, 19,
+47, 19, 116, 19, 105, 19, 116, 19, 108, 19, 101, 19, 62, 1]
+
+
+def read(filename):
+    fd = os.open(filename, os.O_RDONLY, 0666)
+    if fd < 0:
+        raise OSError
+    end = os.lseek(fd, 0, 2)
+    os.lseek(fd, 0, 0)
+    data = os.read(fd, intmask(end))
+    os.close(fd)
+    return data
+
+def search_in_file(filename):
+    data = read(filename)
+    p = 0
+    while True:
+        state = rsre.SimpleStringState(data, p)
+        res = state.search(r_code1)
+        if not res:
+            break
+        groups = state.create_regs(1)
+        matchstart, matchstop = groups[1]
+        assert 0 <= matchstart <= matchstop
+        print '%s: %s' % (filename, data[matchstart:matchstop])
+        p = groups[0][1]
+
+# __________  Entry point  __________
+
+def entry_point(argv):
+    start = time.time()
+    for fn in argv[1:]:
+        search_in_file(fn)
+    stop = time.time()
+    print stop - start
+    return 0
+
+# _____ Define and setup target ___
+
+def target(*args):
+    return entry_point, None
+
+# _____ Pure Python equivalent _____
+
+if __name__ == '__main__':
+    import re, sys
+    r = re.compile(r"<item>\s*<title>(.*?)</title>")
+    start = time.time()
+    for fn in sys.argv[1:]:
+        f = open(fn, 'rb')
+        data = f.read()
+        f.close()
+        for title in r.findall(data):
+            print '%s: %s' % (fn, title)
+    stop = time.time()
+    print '%.4fs' % (stop - start,)



More information about the Pypy-commit mailing list