[Python-checkins] python/dist/src/Lib/encodings utf_16.py, 1.4, 1.5 utf_16_be.py, 1.3, 1.4 utf_16_le.py, 1.3, 1.4 utf_8.py, 1.2, 1.3

doerwalter at users.sourceforge.net doerwalter at users.sourceforge.net
Tue Sep 7 22:24:23 CEST 2004


Update of /cvsroot/python/python/dist/src/Lib/encodings
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv7801/Lib/encodings

Modified Files:
	utf_16.py utf_16_be.py utf_16_le.py utf_8.py 
Log Message:
SF patch #998993: The UTF-8 and the UTF-16 stateful decoders now support
decoding incomplete input (when the input stream is temporarily exhausted).
codecs.StreamReader now implements buffering, which enables proper
readline support for the UTF-16 decoders. codecs.StreamReader.read()
has a new argument chars which specifies the number of characters to
return. codecs.StreamReader.readline() and codecs.StreamReader.readlines()
have a new argument keepends. Trailing "\n"s will be stripped from the lines
if keepends is false. Added C APIs PyUnicode_DecodeUTF8Stateful and
PyUnicode_DecodeUTF16Stateful.


Index: utf_16.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/utf_16.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -d -r1.4 -r1.5
--- utf_16.py	8 Aug 2002 20:19:19 -0000	1.4
+++ utf_16.py	7 Sep 2004 20:24:21 -0000	1.5
@@ -10,54 +10,40 @@
 
 ### Codec APIs
 
-class Codec(codecs.Codec):
+encode = codecs.utf_16_encode
 
-    # Note: Binding these as C functions will result in the class not
-    # converting them to methods. This is intended.
-    encode = codecs.utf_16_encode
-    decode = codecs.utf_16_decode
+def decode(input, errors='strict'):
+    return codecs.utf_16_decode(input, errors, True)
 
-class StreamWriter(Codec,codecs.StreamWriter):
+class StreamWriter(codecs.StreamWriter):
     def __init__(self, stream, errors='strict'):
-        self.bom_written = 0
+        self.bom_written = False
         codecs.StreamWriter.__init__(self, stream, errors)
 
-    def write(self, data):
-        result = codecs.StreamWriter.write(self, data)
-        if not self.bom_written:
-            self.bom_written = 1
-            if sys.byteorder == 'little':
-                self.encode = codecs.utf_16_le_encode
-            else:
-                self.encode = codecs.utf_16_be_encode
+    def encode(self, input, errors='strict'):
+        self.bom_written = True
+        result = codecs.utf_16_encode(input, errors)
+        if sys.byteorder == 'little':
+            self.encode = codecs.utf_16_le_encode
+        else:
+            self.encode = codecs.utf_16_be_encode
         return result
 
-class StreamReader(Codec,codecs.StreamReader):
-    def __init__(self, stream, errors='strict'):
-        self.bom_read = 0
-        codecs.StreamReader.__init__(self, stream, errors)
-
-    def read(self, size=-1):
-        if not self.bom_read:
-            signature = self.stream.read(2)
-            if signature == codecs.BOM_BE:
-                self.decode = codecs.utf_16_be_decode
-            elif signature == codecs.BOM_LE:
-                self.decode = codecs.utf_16_le_decode
-            else:
-                raise UnicodeError,"UTF-16 stream does not start with BOM"
-            if size > 2:
-                size -= 2
-            elif size >= 0:
-                size = 0
-            self.bom_read = 1
-        return codecs.StreamReader.read(self, size)
+class StreamReader(codecs.StreamReader):
 
-    def readline(self, size=None):
-        raise NotImplementedError, '.readline() is not implemented for UTF-16'
+    def decode(self, input, errors='strict'):
+        (object, consumed, byteorder) = \
+            codecs.utf_16_ex_decode(input, errors, 0, False)
+        if byteorder == -1:
+            self.decode = codecs.utf_16_le_decode
+        elif byteorder == 1:
+            self.decode = codecs.utf_16_be_decode
+        elif consumed>=2:
+            raise UnicodeError,"UTF-16 stream does not start with BOM"
+        return (object, consumed)
 
 ### encodings module API
 
 def getregentry():
 
-    return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
+    return (encode,decode,StreamReader,StreamWriter)

Index: utf_16_be.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/utf_16_be.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- utf_16_be.py	8 Aug 2002 20:19:19 -0000	1.3
+++ utf_16_be.py	7 Sep 2004 20:24:21 -0000	1.4
@@ -10,23 +10,19 @@
 
 ### Codec APIs
 
-class Codec(codecs.Codec):
-
-    # Note: Binding these as C functions will result in the class not
-    # converting them to methods. This is intended.
-    encode = codecs.utf_16_be_encode
-    decode = codecs.utf_16_be_decode
+encode = codecs.utf_16_be_encode
 
-class StreamWriter(Codec,codecs.StreamWriter):
-    pass
+def decode(input, errors='strict'):
+    return codecs.utf_16_be_decode(input, errors, True)
 
-class StreamReader(Codec,codecs.StreamReader):
+class StreamWriter(codecs.StreamWriter):
+    encode = codecs.utf_16_be_encode
 
-    def readline(self, size=None):
-        raise NotImplementedError, '.readline() is not implemented for UTF-16-BE'
+class StreamReader(codecs.StreamReader):
+    decode = codecs.utf_16_be_decode
 
 ### encodings module API
 
 def getregentry():
 
-    return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
+    return (encode,decode,StreamReader,StreamWriter)

Index: utf_16_le.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/utf_16_le.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -d -r1.3 -r1.4
--- utf_16_le.py	8 Aug 2002 20:19:19 -0000	1.3
+++ utf_16_le.py	7 Sep 2004 20:24:21 -0000	1.4
@@ -10,23 +10,20 @@
 
 ### Codec APIs
 
-class Codec(codecs.Codec):
+encode = codecs.utf_16_le_encode
 
-    # Note: Binding these as C functions will result in the class not
-    # converting them to methods. This is intended.
-    encode = codecs.utf_16_le_encode
-    decode = codecs.utf_16_le_decode
+def decode(input, errors='strict'):
+    return codecs.utf_16_le_decode(input, errors, True)
 
-class StreamWriter(Codec,codecs.StreamWriter):
-    pass
+class StreamWriter(codecs.StreamWriter):
+    encode = codecs.utf_16_le_encode
 
-class StreamReader(Codec,codecs.StreamReader):
+class StreamReader(codecs.StreamReader):
+    decode = codecs.utf_16_le_decode
 
-    def readline(self, size=None):
-        raise NotImplementedError, '.readline() is not implemented for UTF-16-LE'
 
 ### encodings module API
 
 def getregentry():
 
-    return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
+    return (encode,decode,StreamReader,StreamWriter)

Index: utf_8.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/utf_8.py,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -d -r1.2 -r1.3
--- utf_8.py	8 Aug 2002 20:19:19 -0000	1.2
+++ utf_8.py	7 Sep 2004 20:24:21 -0000	1.3
@@ -10,21 +10,19 @@
 
 ### Codec APIs
 
-class Codec(codecs.Codec):
+encode = codecs.utf_8_encode
 
-    # Note: Binding these as C functions will result in the class not
-    # converting them to methods. This is intended.
-    encode = codecs.utf_8_encode
-    decode = codecs.utf_8_decode
+def decode(input, errors='strict'):
+    return codecs.utf_8_decode(input, errors, True)
 
-class StreamWriter(Codec,codecs.StreamWriter):
-    pass
+class StreamWriter(codecs.StreamWriter):
+    encode = codecs.utf_8_encode
 
-class StreamReader(Codec,codecs.StreamReader):
-    pass
+class StreamReader(codecs.StreamReader):
+    decode = codecs.utf_8_decode
 
 ### encodings module API
 
 def getregentry():
 
-    return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
+    return (encode,decode,StreamReader,StreamWriter)



More information about the Python-checkins mailing list