[Python-checkins] cpython: Close #7475: Restore binary & text transform codecs

nick.coghlan python-checkins at python.org
Sat Nov 23 02:14:37 CET 2013


http://hg.python.org/cpython/rev/5e960d2c2156
changeset:   87388:5e960d2c2156
user:        Nick Coghlan <ncoghlan at gmail.com>
date:        Sat Nov 23 11:13:36 2013 +1000
summary:
  Close #7475: Restore binary & text transform codecs

The codecs themselves were restored in Python 3.2, this
completes the restoration by adding back the convenience
aliases.

These aliases were originally left out due to confusing
errors when attempting to use them with the text encoding
specific convenience methods. Python 3.4 includes several
improvements to those errors, thus permitting the aliases
to be restored as well.

files:
  Doc/library/codecs.rst   |  110 ++++++++++++++++----------
  Doc/whatsnew/3.4.rst     |   50 ++++++++---
  Lib/encodings/aliases.py |   36 ++++----
  Lib/test/test_codecs.py  |   20 ++++
  4 files changed, 139 insertions(+), 77 deletions(-)


diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -1188,6 +1188,9 @@
 arbitrary data transforms rather than just text encodings).  For asymmetric
 codecs, the stated purpose describes the encoding direction.
 
+Text Encodings
+^^^^^^^^^^^^^^
+
 The following codecs provide :class:`str` to :class:`bytes` encoding and
 :term:`bytes-like object` to :class:`str` decoding, similar to the Unicode text
 encodings.
@@ -1234,62 +1237,83 @@
 |                    |         | .. deprecated:: 3.3       |
 +--------------------+---------+---------------------------+
 
-The following codecs provide :term:`bytes-like object` to :class:`bytes`
-mappings.
+.. _binary-transforms:
 
+Binary Transforms
+^^^^^^^^^^^^^^^^^
 
-.. tabularcolumns:: |l|L|L|
+The following codecs provide binary transforms: :term:`bytes-like object`
+to :class:`bytes` mappings.
 
-+----------------------+------------------------------+------------------------------+
-| Codec                | Purpose                      | Encoder / decoder            |
-+======================+==============================+==============================+
-| base64_codec [#b64]_ | Convert operand to MIME      | :meth:`base64.b64encode` /   |
-|                      | base64 (the result always    | :meth:`base64.b64decode`     |
-|                      | includes a trailing          |                              |
-|                      | ``'\n'``)                    |                              |
-|                      |                              |                              |
-|                      | .. versionchanged:: 3.4      |                              |
-|                      |    accepts any               |                              |
-|                      |    :term:`bytes-like object` |                              |
-|                      |    as input for encoding and |                              |
-|                      |    decoding                  |                              |
-+----------------------+------------------------------+------------------------------+
-| bz2_codec            | Compress the operand         | :meth:`bz2.compress` /       |
-|                      | using bz2                    | :meth:`bz2.decompress`       |
-+----------------------+------------------------------+------------------------------+
-| hex_codec            | Convert operand to           | :meth:`base64.b16encode` /   |
-|                      | hexadecimal                  | :meth:`base64.b16decode`     |
-|                      | representation, with two     |                              |
-|                      | digits per byte              |                              |
-+----------------------+------------------------------+------------------------------+
-| quopri_codec         | Convert operand to MIME      | :meth:`quopri.encodestring` /|
-|                      | quoted printable             | :meth:`quopri.decodestring`  |
-+----------------------+------------------------------+------------------------------+
-| uu_codec             | Convert the operand using    | :meth:`uu.encode` /          |
-|                      | uuencode                     | :meth:`uu.decode`            |
-+----------------------+------------------------------+------------------------------+
-| zlib_codec           | Compress the operand         | :meth:`zlib.compress` /      |
-|                      | using gzip                   | :meth:`zlib.decompress`      |
-+----------------------+------------------------------+------------------------------+
+
+.. tabularcolumns:: |l|L|L|L|
+
++----------------------+------------------+------------------------------+------------------------------+
+| Codec                | Aliases          | Purpose                      | Encoder / decoder            |
++======================+==================+==============================+==============================+
+| base64_codec [#b64]_ | base64, base_64  | Convert operand to MIME      | :meth:`base64.b64encode` /   |
+|                      |                  | base64 (the result always    | :meth:`base64.b64decode`     |
+|                      |                  | includes a trailing          |                              |
+|                      |                  | ``'\n'``)                    |                              |
+|                      |                  |                              |                              |
+|                      |                  | .. versionchanged:: 3.4      |                              |
+|                      |                  |    accepts any               |                              |
+|                      |                  |    :term:`bytes-like object` |                              |
+|                      |                  |    as input for encoding and |                              |
+|                      |                  |    decoding                  |                              |
++----------------------+------------------+------------------------------+------------------------------+
+| bz2_codec            | bz2              | Compress the operand         | :meth:`bz2.compress` /       |
+|                      |                  | using bz2                    | :meth:`bz2.decompress`       |
++----------------------+------------------+------------------------------+------------------------------+
+| hex_codec            | hex              | Convert operand to           | :meth:`base64.b16encode` /   |
+|                      |                  | hexadecimal                  | :meth:`base64.b16decode`     |
+|                      |                  | representation, with two     |                              |
+|                      |                  | digits per byte              |                              |
++----------------------+------------------+------------------------------+------------------------------+
+| quopri_codec         | quopri,          | Convert operand to MIME      | :meth:`quopri.encodestring` /|
+|                      | quotedprintable, | quoted printable             | :meth:`quopri.decodestring`  |
+|                      | quoted_printable |                              |                              |
++----------------------+------------------+------------------------------+------------------------------+
+| uu_codec             | uu               | Convert the operand using    | :meth:`uu.encode` /          |
+|                      |                  | uuencode                     | :meth:`uu.decode`            |
++----------------------+------------------+------------------------------+------------------------------+
+| zlib_codec           | zip, zlib        | Compress the operand         | :meth:`zlib.compress` /      |
+|                      |                  | using gzip                   | :meth:`zlib.decompress`      |
++----------------------+------------------+------------------------------+------------------------------+
 
 .. [#b64] In addition to :term:`bytes-like objects <bytes-like object>`,
    ``'base64_codec'`` also accepts ASCII-only instances of :class:`str` for
    decoding
 
+.. versionadded:: 3.2
+   Restoration of the binary transforms.
 
-The following codecs provide :class:`str` to :class:`str` mappings.
+.. versionchanged:: 3.4
+   Restoration of the aliases for the binary transforms.
 
-.. tabularcolumns:: |l|L|
 
-+--------------------+---------------------------+
-| Codec              | Purpose                   |
-+====================+===========================+
-| rot_13             | Returns the Caesar-cypher |
-|                    | encryption of the operand |
-+--------------------+---------------------------+
+.. _text-transforms:
+
+Text Transforms
+^^^^^^^^^^^^^^^
+
+The following codec provides a text transform: a :class:`str` to :class:`str`
+mapping.
+
+.. tabularcolumns:: |l|l|L|
+
++--------------------+---------+---------------------------+
+| Codec              | Aliases | Purpose                   |
++====================+=========+===========================+
+| rot_13             | rot13   | Returns the Caesar-cypher |
+|                    |         | encryption of the operand |
++--------------------+---------+---------------------------+
 
 .. versionadded:: 3.2
-   bytes-to-bytes and str-to-str codecs.
+   Restoration of the ``rot_13`` text transform.
+
+.. versionchanged:: 3.4
+   Restoration of the ``rot13`` alias.
 
 
 :mod:`encodings.idna` --- Internationalized Domain Names in Applications
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -103,7 +103,8 @@
 * :ref:`PEP 446: Make newly created file descriptors non-inheritable <pep-446>`.
 * command line option for :ref:`isolated mode <using-on-misc-options>`,
   (:issue:`16499`).
-* improvements to handling of non-Unicode codecs
+* :ref:`improvements <codec-handling-improvements>` in the handling of
+  codecs that are not text encodings
 
 Significantly Improved Library Modules:
 
@@ -173,8 +174,10 @@
       PEP written and implemented by Victor Stinner.
 
 
-Improvements to handling of non-Unicode codecs
-==============================================
+.. _codec-handling-improvements:
+
+Improvements to codec handling
+==============================
 
 Since it was first introduced, the :mod:`codecs` module has always been
 intended to operate as a type-neutral dynamic encoding and decoding
@@ -186,7 +189,7 @@
 As a key step in clarifying the situation, the :meth:`codecs.encode` and
 :meth:`codecs.decode` convenience functions are now properly documented in
 Python 2.7, 3.3 and 3.4. These functions have existed in the :mod:`codecs`
-module and have been covered by the regression test suite since Python 2.4,
+module (and have been covered by the regression test suite) since Python 2.4,
 but were previously only discoverable through runtime introspection.
 
 Unlike the convenience methods on :class:`str`, :class:`bytes` and
@@ -199,43 +202,58 @@
 encodings provided in the standard library and direct users towards these
 general purpose convenience functions when appropriate::
 
-    >>> import codecs
-
-    >>> b"abcdef".decode("hex_codec")
+    >>> b"abcdef".decode("hex")
     Traceback (most recent call last):
       File "<stdin>", line 1, in <module>
-    LookupError: 'hex_codec' is not a text encoding; use codecs.decode() to handle arbitrary codecs
+    LookupError: 'hex' is not a text encoding; use codecs.decode() to handle arbitrary codecs
 
-    >>> "hello".encode("rot_13")
+    >>> "hello".encode("rot13")
     Traceback (most recent call last):
       File "<stdin>", line 1, in <module>
-    LookupError: 'rot_13' is not a text encoding; use codecs.encode() to handle arbitrary codecs
+    LookupError: 'rot13' is not a text encoding; use codecs.encode() to handle arbitrary codecs
 
 In a related change, whenever it is feasible without breaking backwards
 compatibility, exceptions raised during encoding and decoding operations
 will be wrapped in a chained exception of the same type that mentions the
 name of the codec responsible for producing the error::
 
-    >>> codecs.decode(b"abcdefgh", "hex_codec")
+    >>> import codecs
+
+    >>> codecs.decode(b"abcdefgh", "hex")
     binascii.Error: Non-hexadecimal digit found
 
     The above exception was the direct cause of the following exception:
 
     Traceback (most recent call last):
       File "<stdin>", line 1, in <module>
-    binascii.Error: decoding with 'hex_codec' codec failed (Error: Non-hexadecimal digit found)
+    binascii.Error: decoding with 'hex' codec failed (Error: Non-hexadecimal digit found)
 
-    >>> codecs.encode("hello", "bz2_codec")
+    >>> codecs.encode("hello", "bz2")
     TypeError: 'str' does not support the buffer interface
 
     The above exception was the direct cause of the following exception:
 
     Traceback (most recent call last):
       File "<stdin>", line 1, in <module>
-    TypeError: encoding with 'bz2_codec' codec failed (TypeError: 'str' does not support the buffer interface)
+    TypeError: encoding with 'bz2' codec failed (TypeError: 'str' does not support the buffer interface)
 
-(Contributed by Nick Coghlan in :issue:`17827`, :issue:`17828` and
-:issue:`19619`)
+Finally, as the examples above show, these improvements have permitted
+the restoration of the convenience aliases for the non-Unicode codecs that
+were themselves restored in Python 3.2. This means that encoding binary data
+to and from its hexadecimal representation (for example) can now be written
+as::
+
+    >>> from codecs import encode, decode
+    >>> encode(b"hello", "hex")
+    b'68656c6c6f'
+    >>> decode(b"68656c6c6f", "hex")
+    b'hello'
+
+The binary and text transforms provided in the standard library are detailed
+in :ref:`binary-transforms` and :ref:`text-transforms`.
+
+(Contributed by Nick Coghlan in :issue:`7475`, , :issue:`17827`,
+:issue:`17828` and :issue:`19619`)
 
 .. _pep-451:
 
diff --git a/Lib/encodings/aliases.py b/Lib/encodings/aliases.py
--- a/Lib/encodings/aliases.py
+++ b/Lib/encodings/aliases.py
@@ -33,9 +33,9 @@
     'us'                 : 'ascii',
     'us_ascii'           : 'ascii',
 
-    ## base64_codec codec
-    #'base64'             : 'base64_codec',
-    #'base_64'            : 'base64_codec',
+    # base64_codec codec
+    'base64'             : 'base64_codec',
+    'base_64'            : 'base64_codec',
 
     # big5 codec
     'big5_tw'            : 'big5',
@@ -45,8 +45,8 @@
     'big5_hkscs'         : 'big5hkscs',
     'hkscs'              : 'big5hkscs',
 
-    ## bz2_codec codec
-    #'bz2'                : 'bz2_codec',
+    # bz2_codec codec
+    'bz2'                : 'bz2_codec',
 
     # cp037 codec
     '037'                : 'cp037',
@@ -248,8 +248,8 @@
     'cp936'              : 'gbk',
     'ms936'              : 'gbk',
 
-    ## hex_codec codec
-    #'hex'                : 'hex_codec',
+    # hex_codec codec
+    'hex'                : 'hex_codec',
 
     # hp_roman8 codec
     'roman8'             : 'hp_roman8',
@@ -450,13 +450,13 @@
     'cp154'              : 'ptcp154',
     'cyrillic_asian'     : 'ptcp154',
 
-    ## quopri_codec codec
-    #'quopri'             : 'quopri_codec',
-    #'quoted_printable'   : 'quopri_codec',
-    #'quotedprintable'    : 'quopri_codec',
+    # quopri_codec codec
+    'quopri'             : 'quopri_codec',
+    'quoted_printable'   : 'quopri_codec',
+    'quotedprintable'    : 'quopri_codec',
 
-    ## rot_13 codec
-    #'rot13'              : 'rot_13',
+    # rot_13 codec
+    'rot13'              : 'rot_13',
 
     # shift_jis codec
     'csshiftjis'         : 'shift_jis',
@@ -518,12 +518,12 @@
     'utf8_ucs2'          : 'utf_8',
     'utf8_ucs4'          : 'utf_8',
 
-    ## uu_codec codec
-    #'uu'                 : 'uu_codec',
+    # uu_codec codec
+    'uu'                 : 'uu_codec',
 
-    ## zlib_codec codec
-    #'zip'                : 'zlib_codec',
-    #'zlib'               : 'zlib_codec',
+    # zlib_codec codec
+    'zip'                : 'zlib_codec',
+    'zlib'               : 'zlib_codec',
 
     # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
     'x_mac_japanese'      : 'shift_jis',
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -2320,18 +2320,29 @@
     "quopri_codec",
     "hex_codec",
 ]
+
+transform_aliases = {
+    "base64_codec": ["base64", "base_64"],
+    "uu_codec": ["uu"],
+    "quopri_codec": ["quopri", "quoted_printable", "quotedprintable"],
+    "hex_codec": ["hex"],
+    "rot_13": ["rot13"],
+}
+
 try:
     import zlib
 except ImportError:
     pass
 else:
     bytes_transform_encodings.append("zlib_codec")
+    transform_aliases["zlib_codec"] = ["zip", "zlib"]
 try:
     import bz2
 except ImportError:
     pass
 else:
     bytes_transform_encodings.append("bz2_codec")
+    transform_aliases["bz2_codec"] = ["bz2"]
 
 class TransformCodecTest(unittest.TestCase):
 
@@ -2445,6 +2456,15 @@
     # Unfortunately, the bz2 module throws OSError, which the codec
     # machinery currently can't wrap :(
 
+    # Ensure codec aliases from http://bugs.python.org/issue7475 work
+    def test_aliases(self):
+        for codec_name, aliases in transform_aliases.items():
+            expected_name = codecs.lookup(codec_name).name
+            for alias in aliases:
+                with self.subTest(alias=alias):
+                    info = codecs.lookup(alias)
+                    self.assertEqual(info.name, expected_name)
+
 
 # The codec system tries to wrap exceptions in order to ensure the error
 # mentions the operation being performed and the codec involved. We

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list