[Python-checkins] python/dist/src/Lib/test test_codecs.py, 1.12, 1.13

Tue Sep 7 22:24:24 CEST 2004

Update of /cvsroot/python/python/dist/src/Lib/test
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7801/Lib/test

Modified Files:
	test_codecs.py 
Log Message:
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted).
codecs.StreamReader now implements buffering, which enables proper
readline support for the UTF-16 decoders. codecs.StreamReader.read()
has a new argument chars which specifies the number of characters to
return. codecs.StreamReader.readline() and codecs.StreamReader.readlines()
have a new argument keepends. Trailing "\n"s will be stripped from the lines
if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and
PyUnicode_DecodeUTF16Stateful.


Index: test_codecs.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/test/test_codecs.py,v
retrieving revision 1.12
retrieving revision 1.13
diff -u -d -r1.12 -r1.13

--- test_codecs.py	10 Jul 2004 12:06:10 -0000	1.12
+++ test_codecs.py	7 Sep 2004 20:24:21 -0000	1.13
@@ -3,7 +3,45 @@
 import codecs
 import StringIO
 
-class UTF16Test(unittest.TestCase):
+class Queue(object):
+    """
+    queue: write bytes at one end, read bytes from the other end
+    """
+    def __init__(self):
+        self._buffer = ""
+
+    def write(self, chars):
+        self._buffer += chars
+
+    def read(self, size=-1):
+        if size<0:
+            s = self._buffer
+            self._buffer = ""
+            return s
+        else:
+            s = self._buffer[:size]
+            self._buffer = self._buffer[size:]
+            return s
+
+class PartialReadTest(unittest.TestCase):
+    def check_partial(self, encoding, input, partialresults):
+        # get a StreamReader for the encoding and feed the bytestring version
+        # of input to the reader byte by byte. Read every available from
+        # the StreamReader and check that the results equal the appropriate
+        # entries from partialresults.
+        q = Queue()
+        r = codecs.getreader(encoding)(q)
+        result = u""
+        for (c, partialresult) in zip(input.encode(encoding), partialresults):
+            q.write(c)
+            result += r.read()
+            self.assertEqual(result, partialresult)
+        # check that there's nothing left in the buffers
+        self.assertEqual(r.read(), u"")
+        self.assertEqual(r.bytebuffer, "")
+        self.assertEqual(r.charbuffer, u"")
+
+class UTF16Test(PartialReadTest):
 
     spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
     spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
@@ -23,6 +61,81 @@
         f = reader(s)
         self.assertEquals(f.read(), u"spamspam")
 
+    def test_partial(self):
+        self.check_partial(
+            "utf-16",
+            u"\x00\xff\u0100\uffff",
+            [
+                u"", # first byte of BOM read
+                u"", # second byte of BOM read => byteorder known
+                u"",
+                u"\x00",
+                u"\x00",
+                u"\x00\xff",
+                u"\x00\xff",
+                u"\x00\xff\u0100",
+                u"\x00\xff\u0100",
+                u"\x00\xff\u0100\uffff",
+            ]
+        )
+
+class UTF16LETest(PartialReadTest):
+
+    def test_partial(self):
+        self.check_partial(
+            "utf-16-le",
+            u"\x00\xff\u0100\uffff",
+            [
+                u"",
+                u"\x00",
+                u"\x00",
+                u"\x00\xff",
+                u"\x00\xff",
+                u"\x00\xff\u0100",
+                u"\x00\xff\u0100",
+                u"\x00\xff\u0100\uffff",
+            ]
+        )
+
+class UTF16BETest(PartialReadTest):
+
+    def test_partial(self):
+        self.check_partial(
+            "utf-16-be",
+            u"\x00\xff\u0100\uffff",
+            [
+                u"",
+                u"\x00",
+                u"\x00",
+                u"\x00\xff",
+                u"\x00\xff",
+                u"\x00\xff\u0100",
+                u"\x00\xff\u0100",
+                u"\x00\xff\u0100\uffff",
+            ]
+        )
+
+class UTF8Test(PartialReadTest):
+
+    def test_partial(self):
+        self.check_partial(
+            "utf-8",
+            u"\x00\xff\u07ff\u0800\uffff",
+            [
+                u"\x00",
+                u"\x00",
+                u"\x00\xff",
+                u"\x00\xff",
+                u"\x00\xff\u07ff",
+                u"\x00\xff\u07ff",
+                u"\x00\xff\u07ff",
+                u"\x00\xff\u07ff\u0800",
+                u"\x00\xff\u07ff\u0800",
+                u"\x00\xff\u07ff\u0800",
+                u"\x00\xff\u07ff\u0800\uffff",
+            ]
+        )
+
 class EscapeDecodeTest(unittest.TestCase):
     def test_empty_escape_decode(self):
         self.assertEquals(codecs.escape_decode(""), ("", 0))
@@ -348,6 +461,9 @@
 def test_main():
     test_support.run_unittest(
         UTF16Test,
+        UTF16LETest,
+        UTF16BETest,
+        UTF8Test,
         EscapeDecodeTest,
         RecodingTest,
         PunycodeTest,