[pypy-svn] r16266 - in pypy/dist/pypy/module/_sre: . test
nik at codespeak.net
nik at codespeak.net
Tue Aug 23 15:13:43 CEST 2005
Author: nik
Date: Tue Aug 23 15:13:42 2005
New Revision: 16266
Modified:
pypy/dist/pypy/module/_sre/interp_sre.py
pypy/dist/pypy/module/_sre/test/test_interp_sre.py
Log:
various fixes towards annotatability. removed use of array.
Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py Tue Aug 23 15:13:42 2005
@@ -1,7 +1,6 @@
from pypy.interpreter.baseobjspace import ObjSpace, Wrappable
# XXX is it allowed to import app-level module like this?
from pypy.module._sre.app_info import CODESIZE
-from pypy.module.array.app_array import array
from pypy.interpreter.typedef import GetSetProperty, TypeDef
from pypy.interpreter.gateway import interp2app
@@ -62,7 +61,7 @@
# This id marks the end of a group.
self.lastindex = mark_nr / 2 + 1
if mark_nr >= len(self.marks):
- self.marks.extend([None] * (mark_nr - len(self.marks) + 1))
+ self.marks.extend([-1] * (mark_nr - len(self.marks) + 1))
self.marks[mark_nr] = self.space.int_w(w_position)
def get_marks(self, w_group_index):
@@ -79,9 +78,7 @@
regs = [self.space.newtuple([self.space.wrap(self.start), self.space.wrap(self.string_position)])]
for group in range(self.space.int_w(w_group_count)):
mark_index = 2 * group
- if mark_index + 1 < len(self.marks) \
- and self.marks[mark_index] is not None \
- and self.marks[mark_index + 1] is not None:
+ if mark_index + 1 < len(self.marks):
regs.append(self.space.newtuple([self.space.wrap(self.marks[mark_index]),
self.space.wrap(self.marks[mark_index + 1])]))
else:
@@ -128,7 +125,6 @@
lower = interp2app(W_State.lower),
)
-
#### Category helpers
ascii_char_info = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 6, 2,
@@ -143,9 +139,7 @@
underline = ord("_")
# Static list of all unicode codepoints reported by Py_UNICODE_ISLINEBREAK.
-# Using a dict as a poor man's set.
-uni_linebreaks = {10: True, 13: True, 28: True, 29: True, 30: True, 133: True,
- 8232: True, 8233: True}
+uni_linebreaks = [10, 13, 28, 29, 30, 133, 8232, 8233]
def is_digit(space, w_char):
code = space.int_w(space.ord(w_char))
@@ -186,7 +180,7 @@
def is_uni_linebreak(space, w_char):
code = space.int_w(space.ord(w_char))
- return uni_linebreaks.has_key(code)
+ return code in uni_linebreaks
#### Category dispatch
@@ -217,6 +211,11 @@
class MatchContext:
# XXX This is not complete. It's tailored to at dispatch currently.
+
+ # XXX These constants should maybe not be here
+ OK = 1
+ NOT_OK = -1
+ NOT_FINISHED = 0
def __init__(self, space, pattern_codes, w_string, string_position, end):
self.space = space
@@ -225,7 +224,7 @@
self.string_position = string_position
self.end = end
self.code_position = 0
- self.set_ok = True # XXX maybe get rid of this
+ self.set_ok = self.OK # XXX maybe get rid of this
def peek_char(self, peek=0):
return self.space.getitem(self.w_string,
@@ -318,17 +317,18 @@
char_code = space.int_w(w_char_code)
context = MatchContext(space, pattern_codes, w_string,
space.int_w(w_string_position), space.int_w(space.len(w_string)))
- result = None
- while result is None:
+ result = MatchContext.NOT_FINISHED
+ while result == MatchContext.NOT_FINISHED:
opcode = context.peek_code()
if opcode >= len(set_dispatch_table):
return space.newbool(False)
function = set_dispatch_table[opcode]
result = function(space, context, char_code)
- return space.newbool(result)
+ print result
+ return space.newbool(result == MatchContext.OK)
def set_failure(space, ctx, char_code):
- return not ctx.set_ok
+ return -ctx.set_ok
def set_literal(space, ctx, char_code):
# <LITERAL> <code>
@@ -336,6 +336,7 @@
return ctx.set_ok
else:
ctx.skip_code(2)
+ return MatchContext.NOT_FINISHED
def set_category(space, ctx, char_code):
# <CATEGORY> <code>
@@ -344,6 +345,7 @@
return ctx.set_ok
else:
ctx.skip_code(2)
+ return MatchContext.NOT_FINISHED
def set_charset(space, ctx, char_code):
# <CHARSET> <bitmap> (16 bits per code word)
@@ -358,16 +360,19 @@
& (1 << (char_code & 31)):
return ctx.set_ok
ctx.skip_code(8) # skip bitmap
+ return MatchContext.NOT_FINISHED
def set_range(space, ctx, char_code):
# <RANGE> <lower> <upper>
if ctx.peek_code(1) <= char_code <= ctx.peek_code(2):
return ctx.set_ok
ctx.skip_code(3)
+ return MatchContext.NOT_FINISHED
def set_negate(space, ctx, char_code):
- ctx.set_ok = not ctx.set_ok
+ ctx.set_ok = -ctx.set_ok
ctx.skip_code(1)
+ return MatchContext.NOT_FINISHED
def set_bigcharset(space, ctx, char_code):
# <BIGCHARSET> <blockcount> <256 blockindices> <blocks>
@@ -377,19 +382,37 @@
if char_code < 65536:
block_index = char_code >> 8
# NB: there are CODESIZE block indices per bytecode
- # XXX can we really use array here?
- a = array("B")
- a.fromstring(array(CODESIZE == 2 and "H" or "I",
- [ctx.peek_code(block_index / CODESIZE)]).tostring())
+ a = to_byte_array(ctx.peek_code(block_index / CODESIZE))
block = a[block_index % CODESIZE]
ctx.skip_code(256 / CODESIZE) # skip block indices
+ if CODESIZE == 2:
+ shift = 4
+ else:
+ shift = 5
block_value = ctx.peek_code(block * (32 / CODESIZE)
- + ((char_code & 255) >> (CODESIZE == 2 and 4 or 5)))
+ + ((char_code & 255) >> shift))
if block_value & (1 << (char_code & ((8 * CODESIZE) - 1))):
return ctx.set_ok
else:
ctx.skip_code(256 / CODESIZE) # skip block indices
ctx.skip_code(count * (32 / CODESIZE)) # skip blocks
+ return MatchContext.NOT_FINISHED
+
+def to_byte_array(int_value):
+ """Creates a list of bytes out of an integer representing data that is
+ CODESIZE bytes wide."""
+ import sys
+ byte_array = [0] * CODESIZE
+ for i in range(CODESIZE):
+ byte_array[i] = int_value & 0xff
+ int_value = int_value >> 8
+ if sys.byteorder == "big":
+ # Uhm, maybe there's a better way to reverse lists
+ byte_array_reversed = [0] * CODESIZE
+ for i in range(CODESIZE):
+ byte_array_reversed[-i-1] = byte_array[i]
+ byte_array = byte_array_reversed
+ return byte_array
set_dispatch_table = [
set_failure, None, None, None, None, None, None, None, None,
Modified: pypy/dist/pypy/module/_sre/test/test_interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/test/test_interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/test/test_interp_sre.py Tue Aug 23 15:13:42 2005
@@ -1,5 +1,6 @@
"""Interp-level _sre tests."""
import autopath
+import sys
from py.test import raises
import pypy.module._sre.interp_sre as isre
@@ -80,3 +81,23 @@
def test_getlower(space):
assert space.int_w(isre.getlower(space, space.wrap(ord("A")), space.wrap(0))) == ord("a")
+
+def test_get_byte_array(space):
+ if sys.byteorder == "big":
+ if isre.CODESIZE == 2:
+ assert [0, 1] == isre.to_byte_array(1)
+ assert [1, 0] == isre.to_byte_array(256)
+ assert [1, 2] == isre.to_byte_array(258)
+ else:
+ assert [0, 0, 0, 1] == isre.to_byte_array(1)
+ assert [0, 0, 1, 0] == isre.to_byte_array(256)
+ assert [1, 2, 3, 4] == isre.to_byte_array(0x01020304)
+ else:
+ if isre.CODESIZE == 2:
+ assert [1, 0] == isre.to_byte_array(1)
+ assert [0, 1] == isre.to_byte_array(256)
+ assert [2, 1] == isre.to_byte_array(258)
+ else:
+ assert [1, 0, 0, ] == isre.to_byte_array(1)
+ assert [0, 1, 0, 0] == isre.to_byte_array(256)
+ assert [4, 3, 2, 1] == isre.to_byte_array(0x01020304)
More information about the Pypy-commit
mailing list