[Python-checkins] r80824 - in python/branches/py3k: Doc/library/tarfile.rst Lib/tarfile.py Lib/test/test_tarfile.py Misc/NEWS
victor.stinner
python-checkins at python.org
Wed May 5 23:43:58 CEST 2010
Author: victor.stinner
Date: Wed May 5 23:43:57 2010
New Revision: 80824
Log:
Issue #8390: tarfile uses surrogateespace as the default error handler
(instead of replace in read mode or strict in write mode)
Modified:
python/branches/py3k/Doc/library/tarfile.rst
python/branches/py3k/Lib/tarfile.py
python/branches/py3k/Lib/test/test_tarfile.py
python/branches/py3k/Misc/NEWS
Modified: python/branches/py3k/Doc/library/tarfile.rst
==============================================================================
--- python/branches/py3k/Doc/library/tarfile.rst (original)
+++ python/branches/py3k/Doc/library/tarfile.rst Wed May 5 23:43:57 2010
@@ -218,7 +218,7 @@
.. versionadded:: 3.2
Added support for the context manager protocol.
-.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors=None, pax_headers=None, debug=0, errorlevel=0)
+.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=0)
All following arguments are optional and can be accessed as instance attributes
as well.
@@ -267,6 +267,9 @@
to be handled. The default settings will work for most users.
See section :ref:`tar-unicode` for in-depth information.
+ .. versionchanged:: 3.2
+ Use ``'surrogateescape'`` as the default for the *errors* argument.
+
The *pax_headers* argument is an optional dictionary of strings which
will be added as a pax global header if *format* is :const:`PAX_FORMAT`.
@@ -449,11 +452,14 @@
a :class:`TarInfo` object.
-.. method:: TarInfo.tobuf(format=DEFAULT_FORMAT, encoding=ENCODING, errors='strict')
+.. method:: TarInfo.tobuf(format=DEFAULT_FORMAT, encoding=ENCODING, errors='surrogateescape')
Create a string buffer from a :class:`TarInfo` object. For information on the
arguments see the constructor of the :class:`TarFile` class.
+ .. versionchanged:: 3.2
+ Use ``'surrogateescape'`` as the default for the *errors* argument.
+
A ``TarInfo`` object has the following public data attributes:
@@ -701,11 +707,10 @@
appropriately, this conversion may fail.
The *errors* argument defines how characters are treated that cannot be
-converted. Possible values are listed in section :ref:`codec-base-classes`. In
-read mode the default scheme is ``'replace'``. This avoids unexpected
-:exc:`UnicodeError` exceptions and guarantees that an archive can always be
-read. In write mode the default value for *errors* is ``'strict'``. This
-ensures that name information is not altered unnoticed.
+converted. Possible values are listed in section :ref:`codec-base-classes`.
+The default scheme is ``'surrogateescape'`` which Python also uses for its
+file system calls, see :ref:`os-filenames`.
In case of writing :const:`PAX_FORMAT` archives, *encoding* is ignored because
-non-ASCII metadata is stored using *UTF-8*.
+non-ASCII metadata is stored using *UTF-8*. Storing surrogate characters is not
+possible and will raise a :exc:`UnicodeEncodeError`.
Modified: python/branches/py3k/Lib/tarfile.py
==============================================================================
--- python/branches/py3k/Lib/tarfile.py (original)
+++ python/branches/py3k/Lib/tarfile.py Wed May 5 23:43:57 2010
@@ -978,7 +978,7 @@
return info
- def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
+ def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):
"""Return a tar header as a string of 512 byte blocks.
"""
info = self.get_info()
@@ -1490,7 +1490,7 @@
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
- errors=None, pax_headers=None, debug=None, errorlevel=None):
+ errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None):
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
read from an existing archive, 'a' to append data to an existing
file or 'w' to create a new file overwriting an existing one. `mode'
@@ -1531,13 +1531,7 @@
self.ignore_zeros = ignore_zeros
if encoding is not None:
self.encoding = encoding
-
- if errors is not None:
- self.errors = errors
- elif mode == "r":
- self.errors = "replace"
- else:
- self.errors = "strict"
+ self.errors = errors
if pax_headers is not None and self.format == PAX_FORMAT:
self.pax_headers = pax_headers
Modified: python/branches/py3k/Lib/test/test_tarfile.py
==============================================================================
--- python/branches/py3k/Lib/test/test_tarfile.py (original)
+++ python/branches/py3k/Lib/test/test_tarfile.py Wed May 5 23:43:57 2010
@@ -1118,8 +1118,8 @@
if self.format != tarfile.PAX_FORMAT:
tar = tarfile.open(tmpname, encoding="ascii")
t = tar.getmember("foo")
- self.assertEqual(t.uname, "\ufffd\ufffd\ufffd")
- self.assertEqual(t.gname, "\ufffd\ufffd\ufffd")
+ self.assertEqual(t.uname, "\udce4\udcf6\udcfc")
+ self.assertEqual(t.gname, "\udce4\udcf6\udcfc")
class GNUUnicodeTest(UstarUnicodeTest):
Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS (original)
+++ python/branches/py3k/Misc/NEWS Wed May 5 23:43:57 2010
@@ -348,6 +348,9 @@
Library
-------
+- Issue #8390: tarfile uses surrogateespace as the default error handler
+ (instead of replace in read mode or strict in write mode)
+
- Issue #7755: Use an unencumbered audio file for tests.
- Issue #8621: uuid.uuid4() returned the same sequence of values in the
More information about the Python-checkins
mailing list