[pypy-svn] r79019 - in pypy/branch/fast-forward/pypy/module/pyexpat: . test

afa at codespeak.net afa at codespeak.net
Thu Nov 11 23:53:21 CET 2010


Author: afa
Date: Thu Nov 11 23:53:20 2010
New Revision: 79019

Modified:
   pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
   pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
Log:
pyexpat: correctly parse strings with non-utf8 encoding.


Modified: pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py	(original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/interp_pyexpat.py	Thu Nov 11 23:53:20 2010
@@ -278,10 +278,7 @@
 
     def __init__(self, encoding, namespace_separator, w_intern,
                  _from_external_entity=False):
-        if encoding:
-            self.encoding = encoding
-        else:
-            self.encoding = 'utf-8'
+        self.encoding = encoding
         self.namespace_separator = namespace_separator
 
         self.w_intern = w_intern
@@ -339,10 +336,9 @@
 
     def w_convert(self, space, s):
         if self.returns_unicode:
-            return space.call_function(
-                space.getattr(space.wrap(s), space.wrap("decode")),
-                space.wrap(self.encoding),
-                space.wrap("strict"))
+            from pypy.rlib.runicode import str_decode_utf_8
+            return space.wrap(str_decode_utf_8(
+                s, len(s), "strict")[0])
         else:
             return space.wrap(s)
 

Modified: pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py
==============================================================================
--- pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py	(original)
+++ pypy/branch/fast-forward/pypy/module/pyexpat/test/test_parser.py	Thu Nov 11 23:53:20 2010
@@ -31,3 +31,25 @@
         p.buffer_size = 150
         assert p.buffer_size == 150
         raises(TypeError, setattr, p, 'buffer_size', sys.maxint + 1)
+
+    def test_encoding(self):
+        # use one of the few encodings built-in in expat
+        xml = "<?xml version='1.0' encoding='iso-8859-1'?><s>caf\xe9</s>"
+        import pyexpat
+        p = pyexpat.ParserCreate()
+        def gotText(text):
+            assert text == u"caf\xe9"
+        p.CharacterDataHandler = gotText
+        assert p.returns_unicode
+        p.Parse(xml)
+
+    def test_explicit_encoding(self):
+        xml = "<?xml version='1.0'?><s>caf\xe9</s>"
+        import pyexpat
+        p = pyexpat.ParserCreate(encoding='iso-8859-1')
+        def gotText(text):
+            assert text == u"caf\xe9"
+        p.CharacterDataHandler = gotText
+        assert p.returns_unicode
+        p.Parse(xml)
+



More information about the Pypy-commit mailing list