[pypy-commit] pypy numpy-multidim: make compile use a real tokenizer - breaks test_zjit for now

fijal noreply at buildbot.pypy.org
Thu Nov 3 19:50:59 CET 2011


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: numpy-multidim
Changeset: r48712:6d64103f1147
Date: 2011-11-03 19:28 +0100
http://bitbucket.org/pypy/pypy/changeset/6d64103f1147/

Log:	make compile use a real tokenizer - breaks test_zjit for now

diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -23,6 +23,12 @@
 class WrongFunctionName(Exception):
     pass
 
+class TokenizerError(Exception):
+    pass
+
+class BadToken(Exception):
+    pass
+
 SINGLE_ARG_FUNCTIONS = ["sum", "prod", "max", "min", "all", "any", "unegative"]
 
 class FakeSpace(object):
@@ -192,7 +198,7 @@
         interp.variables[self.name] = self.expr.execute(interp)
 
     def __repr__(self):
-        return "%% = %r" % (self.name, self.expr)
+        return "%r = %r" % (self.name, self.expr)
 
 class ArrayAssignment(Node):
     def __init__(self, name, index, expr):
@@ -214,7 +220,7 @@
 
 class Variable(Node):
     def __init__(self, name):
-        self.name = name
+        self.name = name.strip()
 
     def execute(self, interp):
         return interp.variables[self.name]
@@ -332,7 +338,7 @@
 
 class FunctionCall(Node):
     def __init__(self, name, args):
-        self.name = name
+        self.name = name.strip()
         self.args = args
 
     def __repr__(self):
@@ -375,118 +381,174 @@
         else:
             raise WrongFunctionName
 
+import re
+
+_REGEXES = [
+    ('-?[\d]+', 'number'),
+    ('\[', 'array_left'),
+    (':', 'colon'),
+    ('\w+', 'identifier'),
+    ('\]', 'array_right'),
+    ('(->)|[\+\-\*\/]', 'operator'),
+    ('=', 'assign'),
+    (',', 'coma'),
+    ('\|', 'pipe'),
+    ('\(', 'paren_left'),
+    ('\)', 'paren_right'),
+]
+REGEXES = []
+
+for r, name in _REGEXES:
+    REGEXES.append((re.compile(' *(' + r + ')'), name))
+del _REGEXES
+
+class Token(object):
+    def __init__(self, name, v):
+        self.name = name
+        self.v = v
+
+    def __repr__(self):
+        return '(%s, %s)' % (self.name, self.v)
+
+empty = Token('', '')
+
+class TokenStack(object):
+    def __init__(self, tokens):
+        self.tokens = tokens
+        self.c = 0
+
+    def pop(self):
+        token = self.tokens[self.c]
+        self.c += 1
+        return token
+
+    def get(self, i):
+        if self.c + i >= len(self.tokens):
+            return empty
+        return self.tokens[self.c + i]
+
+    def remaining(self):
+        return len(self.tokens) - self.c
+
+    def push(self):
+        self.c -= 1
+
+    def __repr__(self):
+        return repr(self.tokens[self.c:])
+
 class Parser(object):
-    def parse_identifier(self, id):
-        id = id.strip(" ")
-        #assert id.isalpha()
-        return Variable(id)
+    def tokenize(self, line):
+        tokens = []
+        while True:
+            for r, name in REGEXES:
+                m = r.match(line)
+                if m is not None:
+                    g = m.group(0)
+                    tokens.append(Token(name, g))
+                    line = line[len(g):]
+                    if not line:
+                        return TokenStack(tokens)
+                    break
+            else:
+                raise TokenizerError(line)
 
-    def parse_expression(self, expr):
-        tokens = [i for i in expr.split(" ") if i]
-        if len(tokens) == 1:
-            return self.parse_constant_or_identifier(tokens[0])
+    def parse_number_or_slice(self, tokens):
+        start_tok = tokens.pop()
+        if start_tok.name == 'colon':
+            start = 0
+        else:
+            start = int(start_tok.v)
+            if tokens.get(0).name != 'colon':
+                return FloatConstant(start)
+            tokens.pop()
+        if not tokens.get(0).name in ['colon', 'number']:
+            stop = -1
+            step = 1
+        else:
+            next = tokens.pop()
+            if next.name == 'colon':
+                stop = -1
+                step = int(tokens.pop().v)
+            else:
+                stop = int(next.v)
+                if tokens.get(0).name == 'colon':
+                    tokens.pop()
+                    step = int(tokens.pop().v)
+                else:
+                    step = 1
+        return SliceConstant(start, stop, step)
+            
+        
+    def parse_expression(self, tokens):
         stack = []
-        tokens.reverse()
-        while tokens:
+        while tokens.remaining():
             token = tokens.pop()
-            if token == ')':
-                raise NotImplementedError
-            elif self.is_identifier_or_const(token):
-                if stack:
-                    name = stack.pop().name
-                    lhs = stack.pop()
-                    rhs = self.parse_constant_or_identifier(token)
-                    stack.append(Operator(lhs, name, rhs))
+            if token.name == 'identifier':
+                if tokens.remaining() and tokens.get(0).name == 'paren_left':
+                    stack.append(self.parse_function_call(token.v, tokens))
                 else:
-                    stack.append(self.parse_constant_or_identifier(token))
+                    stack.append(Variable(token.v))
+            elif token.name == 'array_left':
+                stack.append(ArrayConstant(self.parse_array_const(tokens)))
+            elif token.name == 'operator':
+                stack.append(Variable(token.v))
+            elif token.name == 'number' or token.name == 'colon':
+                tokens.push()
+                stack.append(self.parse_number_or_slice(tokens))
+            elif token.name == 'pipe':
+                stack.append(RangeConstant(tokens.pop().v))
+                end = tokens.pop()
+                assert end.name == 'pipe'
             else:
-                stack.append(Variable(token))
-        assert len(stack) == 1
-        return stack[-1]
+                tokens.push()
+                break
+        stack.reverse()
+        lhs = stack.pop()
+        while stack:
+            op = stack.pop()
+            assert isinstance(op, Variable)
+            rhs = stack.pop()
+            lhs = Operator(lhs, op.name, rhs)
+        return lhs
 
-    def parse_constant(self, v):
-        lgt = len(v)-1
-        assert lgt >= 0
-        if ':' in v:
-            # a slice
-            if v == ':':
-                return SliceConstant(0, 0, 0)
-            else:
-                l = v.split(':')
-                if len(l) == 2:
-                    one = l[0]
-                    two = l[1]
-                    if not one:
-                        one = 0
-                    else:
-                        one = int(one)
-                    return SliceConstant(int(l[0]), int(l[1]), 1)
-                else:
-                    three = int(l[2])
-                    # all can be empty
-                    if l[0]:
-                        one = int(l[0])
-                    else:
-                        one = 0
-                    if l[1]:
-                        two = int(l[1])
-                    else:
-                        two = -1
-                    return SliceConstant(one, two, three)
-                
-        if v[0] == '[':
-            return ArrayConstant([self.parse_constant(elem)
-                                  for elem in v[1:lgt].split(",")])
-        if v[0] == '|':
-            return RangeConstant(v[1:lgt])
-        return FloatConstant(v)
-
-    def is_identifier_or_const(self, v):
-        c = v[0]
-        if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or
-            (c >= '0' and c <= '9') or c in '-.[|:'):
-            if v == '-' or v == "->":
-                return False
-            return True
-        return False
-
-    def parse_function_call(self, v):
-        l = v.split('(')
-        assert len(l) == 2
-        name = l[0]
-        cut = len(l[1]) - 1
-        assert cut >= 0
-        args = [self.parse_constant_or_identifier(id)
-                for id in l[1][:cut].split(",")]
+    def parse_function_call(self, name, tokens):
+        args = []
+        tokens.pop() # lparen
+        while tokens.get(0).name != 'paren_right':
+            args.append(self.parse_expression(tokens))
         return FunctionCall(name, args)
 
-    def parse_constant_or_identifier(self, v):
-        c = v[0]
-        if (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z'):
-            if '(' in v:
-                return self.parse_function_call(v)
-            return self.parse_identifier(v)
-        return self.parse_constant(v)
-
-    def parse_array_subscript(self, v):
-        v = v.strip(" ")
-        l = v.split("[")
-        lgt = len(l[1]) - 1
-        assert lgt >= 0
-        rhs = self.parse_constant_or_identifier(l[1][:lgt])
-        return l[0], rhs
+    def parse_array_const(self, tokens):
+        elems = []
+        while True:
+            token = tokens.pop()
+            if token.name == 'number':
+                elems.append(FloatConstant(token.v))
+            elif token.name == 'array_left':
+                elems.append(ArrayConstant(self.parse_array_const(tokens)))
+            else:
+                raise BadToken()
+            token = tokens.pop()
+            if token.name == 'array_right':
+                return elems
+            assert token.name == 'coma'
         
-    def parse_statement(self, line):
-        if '=' in line:
-            lhs, rhs = line.split("=")
-            lhs = lhs.strip(" ")
-            if '[' in lhs:
-                name, index = self.parse_array_subscript(lhs)
-                return ArrayAssignment(name, index, self.parse_expression(rhs))
-            else: 
-                return Assignment(lhs, self.parse_expression(rhs))
-        else:
-            return Execute(self.parse_expression(line))
+    def parse_statement(self, tokens):
+        if (tokens.get(0).name == 'identifier' and
+            tokens.get(1).name == 'assign'):
+            lhs = tokens.pop().v
+            tokens.pop()
+            rhs = self.parse_expression(tokens)
+            return Assignment(lhs, rhs)
+        elif (tokens.get(0).name == 'identifier' and
+              tokens.get(1).name == 'array_left'):
+            name = tokens.pop().v
+            tokens.pop()
+            index = self.parse_expression(tokens)
+            tokens.pop()
+            tokens.pop()
+            return ArrayAssignment(name, index, self.parse_expression(tokens))
+        return Execute(self.parse_expression(tokens))
 
     def parse(self, code):
         statements = []
@@ -495,7 +557,8 @@
                 line = line.split('#', 1)[0]
             line = line.strip(" ")
             if line:
-                statements.append(self.parse_statement(line))
+                tokens = self.tokenize(line)
+                statements.append(self.parse_statement(tokens))
         return Code(statements)
 
 def numpy_compile(code):
diff --git a/pypy/module/micronumpy/test/test_compile.py b/pypy/module/micronumpy/test/test_compile.py
--- a/pypy/module/micronumpy/test/test_compile.py
+++ b/pypy/module/micronumpy/test/test_compile.py
@@ -177,3 +177,9 @@
         """)
         assert interp.results[0].value.val == 6
         
+    def test_multidim_getitem(self):
+        interp = self.run("""
+        a = [[1,2]]
+        a -> 0 -> 1
+        """)
+        assert interp.results[0].value.val == 2


More information about the pypy-commit mailing list