[pypy-svn] r71059 - pypy/branch/rsre/pypy/rlib/rsre

arigo at codespeak.net arigo at codespeak.net
Tue Feb 2 15:02:14 CET 2010


Author: arigo
Date: Tue Feb  2 15:02:14 2010
New Revision: 71059

Modified:
   pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py
Log:
Optimize a bit check_charset().


Modified: pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py
==============================================================================
--- pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py	(original)
+++ pypy/branch/rsre/pypy/rlib/rsre/rsre_char.py	Tue Feb  2 15:02:14 2010
@@ -150,96 +150,88 @@
 
 ##### Charset evaluation
 
-SET_OK = 1
-SET_NOT_OK = -1
-SET_NOT_FINISHED = 0
+SET_OK = -1
+SET_NOT_OK = -2
 
 def check_charset(char_code, context):
     """Checks whether a character matches set of arbitrary length. Currently
     assumes the set starts at the first member of pattern_codes."""
-    result = SET_NOT_FINISHED
-    context.set_ok = SET_OK
-    backup_code_position = context.code_position
-    while result == SET_NOT_FINISHED:
-        opcode = context.peek_code()
+    pattern_codes = context.pattern_codes
+    index = context.code_position
+    negated = SET_OK
+    while index >= 0:
+        opcode = pattern_codes[index]
         for i, function in set_dispatch_unroll:
             if function is not None and opcode == i:
-                result = function(context, char_code)
+                index = function(pattern_codes, index, char_code)
                 break
         else:
-            return False
-    context.code_position = backup_code_position
-    return result == SET_OK
+            if opcode == 26:   # NEGATE
+                negated ^= (SET_OK ^ SET_NOT_OK)
+                index += 1
+            else:
+                return False
+    return index == negated
 
-def set_failure(ctx, char_code):
-    return -ctx.set_ok
+def set_failure(pat, index, char_code):
+    return SET_NOT_OK
 
-def set_literal(ctx, char_code):
+def set_literal(pat, index, char_code):
     # <LITERAL> <code>
-    if ctx.peek_code(1) == char_code:
-        return ctx.set_ok
+    if pat[index+1] == char_code:
+        return SET_OK
     else:
-        ctx.skip_code(2)
-        return SET_NOT_FINISHED
+        return index + 2
 
-def set_category(ctx, char_code):
+def set_category(pat, index, char_code):
     # <CATEGORY> <code>
-    if category_dispatch(ctx.peek_code(1), char_code):
-        return ctx.set_ok
+    if category_dispatch(pat[index+1], char_code):
+        return SET_OK
     else:
-        ctx.skip_code(2)
-        return SET_NOT_FINISHED
+        return index + 2
 
-def set_charset(ctx, char_code):
+def set_charset(pat, index, char_code):
     # <CHARSET> <bitmap> (16 bits per code word)
-    ctx.skip_code(1) # point to beginning of bitmap
     if CODESIZE == 2:
-        if char_code < 256 and ctx.peek_code(char_code >> 4) \
+        if char_code < 256 and pat[index+1+(char_code >> 4)] \
                                         & (1 << (char_code & 15)):
-            return ctx.set_ok
-        ctx.skip_code(16) # skip bitmap
+            return SET_OK
+        return index + 17  # skip bitmap
     else:
-        if char_code < 256 and ctx.peek_code(char_code >> 5) \
+        if char_code < 256 and pat[index+1+(char_code >> 5)] \
                                         & (1 << (char_code & 31)):
-            return ctx.set_ok
-        ctx.skip_code(8) # skip bitmap
-    return SET_NOT_FINISHED
+            return SET_OK
+        return index + 9   # skip bitmap
 
-def set_range(ctx, char_code):
+def set_range(pat, index, char_code):
     # <RANGE> <lower> <upper>
-    if ctx.peek_code(1) <= char_code <= ctx.peek_code(2):
-        return ctx.set_ok
-    ctx.skip_code(3)
-    return SET_NOT_FINISHED
-
-def set_negate(ctx, char_code):
-    ctx.set_ok = -ctx.set_ok
-    ctx.skip_code(1)
-    return SET_NOT_FINISHED
+    if pat[index+1] <= char_code <= pat[index+2]:
+        return SET_OK
+    return index + 3
 
-def set_bigcharset(ctx, char_code):
+def set_bigcharset(pat, index, char_code):
     # <BIGCHARSET> <blockcount> <256 blockindices> <blocks>
     # XXX this function probably needs a makeover
-    count = ctx.peek_code(1)
-    ctx.skip_code(2)
+    count = pat[index+1]
+    index += 2
     if char_code < 65536:
         block_index = char_code >> 8
         # NB: there are CODESIZE block indices per bytecode
-        a = to_byte_array(ctx.peek_code(block_index / CODESIZE))
+        a = to_byte_array(pat[index+(block_index / CODESIZE)])
         block = a[block_index % CODESIZE]
-        ctx.skip_code(256 / CODESIZE) # skip block indices
+        index += 256 / CODESIZE  # skip block indices
         if CODESIZE == 2:
             shift = 4
         else:
             shift = 5
-        block_value = ctx.peek_code(block * (32 / CODESIZE)
-                                                + ((char_code & 255) >> shift))
+        block_value = pat[index+(block * (32 / CODESIZE)
+                                 + ((char_code & 255) >> shift))]
         if block_value & (1 << (char_code & ((8 * CODESIZE) - 1))):
-            return ctx.set_ok
+            return SET_OK
     else:
-        ctx.skip_code(256 / CODESIZE) # skip block indices
-    ctx.skip_code(count * (32 / CODESIZE)) # skip blocks
-    return SET_NOT_FINISHED
+        index += 256 / CODESIZE  # skip block indices
+    index += count * (32 / CODESIZE)  # skip blocks
+    return index
 
 def to_byte_array(int_value):
     """Creates a list of bytes out of an integer representing data that is
@@ -256,6 +248,8 @@
     set_failure, None, None, None, None, None, None, None, None,
     set_category, set_charset, set_bigcharset, None, None, None,
     None, None, None, None, set_literal, None, None, None, None,
-    None, None, set_negate, set_range
+    None, None,
+    None,  # NEGATE
+    set_range
 ]
 set_dispatch_unroll = unrolling_iterable(enumerate(set_dispatch_table))



More information about the Pypy-commit mailing list