[pypy-svn] r75786 - in pypy/branch/rsre2/pypy/rlib/rsre: . test

arigo at codespeak.net arigo at codespeak.net
Fri Jul 2 19:40:01 CEST 2010


Author: arigo
Date: Fri Jul  2 19:39:59 2010
New Revision: 75786

Modified:
   pypy/branch/rsre2/pypy/rlib/rsre/rsre.py
   pypy/branch/rsre2/pypy/rlib/rsre/test/test_match.py
Log:
GROUPREF.


Modified: pypy/branch/rsre2/pypy/rlib/rsre/rsre.py
==============================================================================
--- pypy/branch/rsre2/pypy/rlib/rsre/rsre.py	(original)
+++ pypy/branch/rsre2/pypy/rlib/rsre/rsre.py	Fri Jul  2 19:39:59 2010
@@ -10,7 +10,10 @@
 OPCODE_AT                 = 6
 OPCODE_BRANCH             = 7
 #OPCODE_CALL              = 8
-OPCODE_CATEGORY           = 9
+#OPCODE_CATEGORY          = 9
+#OPCODE_CHARSET           = 10
+#OPCODE_BIGCHARSET        = 11
+OPCODE_GROUPREF           = 12
 OPCODE_IN                 = 15
 OPCODE_INFO               = 17
 OPCODE_JUMP               = 18
@@ -42,12 +45,7 @@
 
     def get_mark(self, gid):
         """Use this for testing."""
-        mark = self.match_marks
-        while mark is not None:
-            if mark.gid == gid:
-                return mark.position
-            mark = mark.prev
-        return -1
+        return find_mark(self.match_marks, gid)
 
 
 class Mark(object):
@@ -58,6 +56,13 @@
         self.position = position
         self.prev = prev      # chained list
 
+def find_mark(mark, gid):
+    while mark is not None:
+        if mark.gid == gid:
+            return mark.position
+        mark = mark.prev
+    return -1
+
 
 def match(pattern, string):
     ctx = MatchContext(pattern, string)
@@ -126,6 +131,22 @@
         #elif op == OPCODE_CATEGORY:
         #   seems to be never produced
 
+        elif op == OPCODE_GROUPREF:
+            # match backreference
+            # <GROUPREF> <groupnum>
+            gid = ctx.pat(ppos) * 2
+            startptr = find_mark(marks, gid)
+            if startptr < 0:
+                return False
+            endptr = find_mark(marks, gid + 1)
+            if endptr < startptr:   # also includes the case "endptr == -1"
+                return False
+            for i in range(startptr, endptr):
+                if ptr >= ctx.end or ctx.str(ptr) != ctx.str(i):
+                    return False
+                ptr += 1
+            ppos += 1
+
         elif op == OPCODE_IN:
             # match set member (or non_member)
             # <IN> <skip> <set>

Modified: pypy/branch/rsre2/pypy/rlib/rsre/test/test_match.py
==============================================================================
--- pypy/branch/rsre2/pypy/rlib/rsre/test/test_match.py	(original)
+++ pypy/branch/rsre2/pypy/rlib/rsre/test/test_match.py	Fri Jul  2 19:39:59 2010
@@ -101,3 +101,14 @@
         assert rsre.match(r, "x")
         assert rsre.match(r, " ")
         assert not rsre.match(r, "n")
+
+    def test_groupref(self):
+        r, _ = get_code(r"(xx+)\1+$")     # match non-prime numbers of x
+        assert not rsre.match(r, "xx")
+        assert not rsre.match(r, "xxx")
+        assert     rsre.match(r, "xxxx")
+        assert not rsre.match(r, "xxxxx")
+        assert     rsre.match(r, "xxxxxx")
+        assert not rsre.match(r, "xxxxxxx")
+        assert     rsre.match(r, "xxxxxxxx")
+        assert     rsre.match(r, "xxxxxxxxx")



More information about the Pypy-commit mailing list