[pypy-commit] pypy py3k: add a new %8 formatting code, to indicate that the argument is a utf-8 encoded string and that it should be decoded when building the exception message

antocuni noreply at buildbot.pypy.org
Thu Aug 30 17:16:05 CEST 2012


Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: py3k
Changeset: r56945:73f75e9e27f2
Date: 2012-08-30 10:47 +0200
http://bitbucket.org/pypy/pypy/changeset/73f75e9e27f2/

Log:	add a new %8 formatting code, to indicate that the argument is a
	utf-8 encoded string and that it should be decoded when building the
	exception message

diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -308,7 +308,8 @@
     parts = valuefmt.split('%')
     i = 1
     while i < len(parts):
-        if parts[i].startswith('s') or parts[i].startswith('d'):
+        if (parts[i].startswith('s') or parts[i].startswith('d') or
+            parts[i].startswith('8')):
             formats.append(parts[i][0])
             parts[i] = parts[i][1:]
             i += 1
@@ -316,11 +317,12 @@
             parts[i-1] += '%' + parts[i+1]
             del parts[i:i+2]
         else:
-            raise ValueError("invalid format string (only %s or %d supported)")
+            raise ValueError("invalid format string (only %s, %d or %8 supported)")
     assert len(formats) > 0, "unsupported: no % command found"
     return tuple(parts), tuple(formats)
 
 def get_operrcls2(valuefmt):
+    from pypy.rlib.runicode import str_decode_utf_8
     valuefmt = valuefmt.decode('ascii')
     strings, formats = decompose_valuefmt(valuefmt)
     assert len(strings) == len(formats) + 1
@@ -349,6 +351,9 @@
                     lst[i+i] = string
                     if fmt == 'd':
                         lst[i+i+1] = str(value).decode('ascii')
+                    elif fmt == '8':
+                        univalue, _ = str_decode_utf_8(value, len(value), 'strict')
+                        lst[i+i+1] = univalue
                     else:
                         lst[i+i+1] = unicode(value)
                 lst[-1] = self.xstrings[-1]
diff --git a/pypy/interpreter/test/test_error.py b/pypy/interpreter/test/test_error.py
--- a/pypy/interpreter/test/test_error.py
+++ b/pypy/interpreter/test/test_error.py
@@ -45,6 +45,12 @@
     val = operr._compute_value()
     assert val == u"abc &#224;&#232;&#236;&#242;&#249;"
 
+def test_operationerrfmt_utf8():
+    arg = u"&#224;&#232;&#236;&#242;&#249;".encode('utf-8')
+    operr = operationerrfmt("w_type", "abc %8", arg)
+    val = operr._compute_value()
+    assert val == u"abc &#224;&#232;&#236;&#242;&#249;"
+
 def test_errorstr(space):
     operr = OperationError(space.w_ValueError, space.wrap("message"))
     assert operr.errorstr(space) == "ValueError: message"


More information about the pypy-commit mailing list