[pypy-commit] pypy py3k: Now reject u'' literals,

Sat Jan 14 21:48:39 CET 2012

Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch: py3k
Changeset: r51328:662eb2c58644
Date: 2011-12-27 00:29 +0100
http://bitbucket.org/pypy/pypy/changeset/662eb2c58644/

Log:	Now reject u'' literals, expect to break many tests here and
	there...

diff --git a/pypy/interpreter/pyparser/genpytokenize.py b/pypy/interpreter/pyparser/genpytokenize.py
--- a/pypy/interpreter/pyparser/genpytokenize.py
+++ b/pypy/interpreter/pyparser/genpytokenize.py
@@ -141,7 +141,7 @@
     # ____________________________________________________________
     def makeStrPrefix ():
         return chain(states,
-                     maybe(states, groupStr(states, "uUbB")),
+                     maybe(states, groupStr(states, "bB")),
                      maybe(states, groupStr(states, "rR")))
     # ____________________________________________________________
     contStr = group(states,
diff --git a/pypy/interpreter/pyparser/parsestring.py b/pypy/interpreter/pyparser/parsestring.py
--- a/pypy/interpreter/pyparser/parsestring.py
+++ b/pypy/interpreter/pyparser/parsestring.py
@@ -21,10 +21,6 @@
             ps += 1
             quote = s[ps]
             unicode = False
-        elif quote == 'u' or quote == 'U':
-            ps += 1
-            quote = s[ps]
-            unicode = True
         if quote == 'r' or quote == 'R':
             ps += 1
             quote = s[ps]
diff --git a/pypy/interpreter/pyparser/pytokenize.py b/pypy/interpreter/pyparser/pytokenize.py
--- a/pypy/interpreter/pyparser/pytokenize.py
+++ b/pypy/interpreter/pyparser/pytokenize.py
@@ -45,7 +45,7 @@
      'I': 1, 'J': 1, 'K': 1, 'L': 1,
      'M': 1, 'N': 1, 'O': 1, 'P': 1,
      'Q': 1, 'R': 3, 'S': 1, 'T': 1,
-     'U': 2, 'V': 1, 'W': 1, 'X': 1,
+     'U': 1, 'V': 1, 'W': 1, 'X': 1,
      'Y': 1, 'Z': 1, '[': 13, '\\': 17,
      ']': 13, '^': 12, '_': 1, '`': 13,
      'a': 1, 'b': 2, 'c': 1, 'd': 1,
@@ -53,7 +53,7 @@
      'i': 1, 'j': 1, 'k': 1, 'l': 1,
      'm': 1, 'n': 1, 'o': 1, 'p': 1,
      'q': 1, 'r': 3, 's': 1, 't': 1,
-     'u': 2, 'v': 1, 'w': 1, 'x': 1,
+     'u': 1, 'v': 1, 'w': 1, 'x': 1,
      'y': 1, 'z': 1, '{': 13, '|': 12,
      '}': 13, '~': 13},
     # 1
@@ -311,12 +311,10 @@
            '"' : doubleDFA,
            'r' : None,
            'R' : None,
-           'u' : None,
-           'U' : None,
            'b' : None,
            'B' : None}
 
-for uniPrefix in ("", "u", "U", "b", "B"):
+for uniPrefix in ("", "b", "B"):
     for rawPrefix in ("", "r", "R"):
         prefix = uniPrefix + rawPrefix
         endDFAs[prefix + "'''"] = single3DFA
@@ -332,20 +330,14 @@
 triple_quoted = {}
 for t in ("'''", '"""',
           "r'''", 'r"""', "R'''", 'R"""',
-          "u'''", 'u"""', "U'''", 'U"""',
           "b'''", 'b"""', "B'''", 'B"""',
-          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
-          "uR'''", 'uR"""', "UR'''", 'UR"""',
           "br'''", 'br"""', "Br'''", 'Br"""',
           "bR'''", 'bR"""', "BR'''", 'BR"""'):
     triple_quoted[t] = t
 single_quoted = {}
 for t in ("'", '"',
           "r'", 'r"', "R'", 'R"',
-          "u'", 'u"', "U'", 'U"',
           "b'", 'b"', "B'", 'B"',
-          "ur'", 'ur"', "Ur'", 'Ur"',
-          "uR'", 'uR"', "UR'", 'UR"',
           "br'", 'br"', "Br'", 'Br"',
           "bR'", 'bR"', "BR'", 'BR"'):
     single_quoted[t] = t
diff --git a/pypy/interpreter/pyparser/test/test_parsestring.py b/pypy/interpreter/pyparser/test/test_parsestring.py
--- a/pypy/interpreter/pyparser/test/test_parsestring.py
+++ b/pypy/interpreter/pyparser/test/test_parsestring.py
@@ -6,8 +6,8 @@
         space = self.space
         w_ret = parsestring.parsestr(space, None, literal)
         if isinstance(value, str):
-            assert space.type(w_ret) == space.w_str
-            assert space.str_w(w_ret) == value
+            assert space.type(w_ret) == space.w_bytes
+            assert space.bytes_w(w_ret) == value
         elif isinstance(value, unicode):
             assert space.type(w_ret) == space.w_unicode
             assert space.unicode_w(w_ret) == value
@@ -17,49 +17,49 @@
     def test_simple(self):
         space = self.space
         for s in ['hello world', 'hello\n world']:
-            self.parse_and_compare(repr(s), s)
+            self.parse_and_compare('b' + repr(s), s)
 
-        self.parse_and_compare("'''hello\\x42 world'''", 'hello\x42 world')
+        self.parse_and_compare("b'''hello\\x42 world'''", 'hello\x42 world')
 
         # octal
-        self.parse_and_compare(r'"\0"', chr(0))
-        self.parse_and_compare(r'"\07"', chr(7))
-        self.parse_and_compare(r'"\123"', chr(0123))
-        self.parse_and_compare(r'"\400"', chr(0))
-        self.parse_and_compare(r'"\9"', '\\' + '9')
-        self.parse_and_compare(r'"\08"', chr(0) + '8')
+        self.parse_and_compare(r'b"\0"', chr(0))
+        self.parse_and_compare(r'b"\07"', chr(7))
+        self.parse_and_compare(r'b"\123"', chr(0123))
+        self.parse_and_compare(r'b"\400"', chr(0))
+        self.parse_and_compare(r'b"\9"', '\\' + '9')
+        self.parse_and_compare(r'b"\08"', chr(0) + '8')
 
         # hexadecimal
-        self.parse_and_compare(r'"\xfF"', chr(0xFF))
-        self.parse_and_compare(r'"\""', '"')
-        self.parse_and_compare(r"'\''", "'")
-        for s in (r'"\x"', r'"\x7"', r'"\x7g"'):
+        self.parse_and_compare(r'b"\xfF"', chr(0xFF))
+        self.parse_and_compare(r'b"\""', '"')
+        self.parse_and_compare(r"b'\''", "'")
+        for s in (r'b"\x"', r'b"\x7"', r'b"\x7g"'):
             space.raises_w(space.w_ValueError,
                            parsestring.parsestr, space, None, s)
 
     def test_unicode(self):
         space = self.space
-        for s in [u'hello world', u'hello\n world']:
-            self.parse_and_compare(repr(s), s)
+        for s in ['hello world', 'hello\n world']:
+            self.parse_and_compare(repr(s), unicode(s))
 
-        self.parse_and_compare("u'''hello\\x42 world'''",
+        self.parse_and_compare("'''hello\\x42 world'''",
                                u'hello\x42 world')
-        self.parse_and_compare("u'''hello\\u0842 world'''",
+        self.parse_and_compare("'''hello\\u0842 world'''",
                                u'hello\u0842 world')
 
         s = "u'\x81'"
-        s = s.decode("koi8-u").encode("utf8")
+        s = s.decode("koi8-u").encode("utf8")[1:]
         w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
         ret = space.unwrap(w_ret)
         assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'")
 
     def test_unicode_literals(self):
         space = self.space
-        w_ret = parsestring.parsestr(space, None, repr("hello"), True)
+        w_ret = parsestring.parsestr(space, None, repr("hello"))
         assert space.isinstance_w(w_ret, space.w_unicode)
-        w_ret = parsestring.parsestr(space, None, "b'hi'", True)
+        w_ret = parsestring.parsestr(space, None, "b'hi'")
         assert space.isinstance_w(w_ret, space.w_str)
-        w_ret = parsestring.parsestr(space, None, "r'hi'", True)
+        w_ret = parsestring.parsestr(space, None, "r'hi'")
         assert space.isinstance_w(w_ret, space.w_unicode)
 
     def test_bytes(self):
@@ -77,7 +77,7 @@
         s = s.decode("koi8-u").encode("utf8")
         w_ret = parsestring.parsestr(self.space, 'koi8-u', s)
         ret = space.unwrap(w_ret)
-        assert ret == eval("# -*- coding: koi8-u -*-\n'\x81'") 
+        assert ret == eval("# -*- coding: koi8-u -*-\nu'\x81'") 
 
     def test_multiline_unicode_strings_with_backslash(self):
         space = self.space