[Python-checkins] cpython (2.7): Issue #17656: Fix extraction of zip files with unicode member paths.

serhiy.storchaka python-checkins at python.org
Sat Apr 13 11:29:04 CEST 2013


http://hg.python.org/cpython/rev/d02507c9f973
changeset:   83282:d02507c9f973
branch:      2.7
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sat Apr 13 12:28:17 2013 +0300
summary:
  Issue #17656: Fix extraction of zip files with unicode member paths.

files:
  Lib/test/test_zipfile.py |  21 ++++++++++++++++++++-
  Lib/zipfile.py           |   5 ++++-
  Misc/NEWS                |   2 ++
  3 files changed, 26 insertions(+), 2 deletions(-)


diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -18,7 +18,7 @@
 from random import randint, random
 from unittest import skipUnless
 
-from test.test_support import TESTFN, run_unittest, findfile, unlink
+from test.test_support import TESTFN, TESTFN_UNICODE, run_unittest, findfile, unlink
 
 TESTFN2 = TESTFN + "2"
 TESTFNDIR = TESTFN + "d"
@@ -424,6 +424,25 @@
         with open(filename, 'rb') as f:
             self.assertEqual(f.read(), content)
 
+    def test_extract_unicode_filenames(self):
+        fnames = [u'foo.txt', os.path.basename(TESTFN_UNICODE)]
+        content = 'Test for unicode filename'
+        with zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED) as zipfp:
+            for fname in fnames:
+                zipfp.writestr(fname, content)
+
+        with zipfile.ZipFile(TESTFN2, "r") as zipfp:
+            for fname in fnames:
+                writtenfile = zipfp.extract(fname)
+
+                # make sure it was written to the right place
+                correctfile = os.path.join(os.getcwd(), fname)
+                correctfile = os.path.normpath(correctfile)
+                self.assertEqual(writtenfile, correctfile)
+
+                self.check_file(writtenfile, content)
+                os.remove(writtenfile)
+
     def test_extract_hackers_arcnames(self):
         hacknames = [
             ('../foo/bar', 'foo/bar'),
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1053,7 +1053,10 @@
         if os.path.sep == '\\':
             # filter illegal characters on Windows
             illegal = ':<>|"?*'
-            table = string.maketrans(illegal, '_' * len(illegal))
+            if isinstance(arcname, unicode):
+                table = {ord(c): ord('_') for c in illegal}
+            else:
+                table = string.maketrans(illegal, '_' * len(illegal))
             arcname = arcname.translate(table)
             # remove trailing dots
             arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -22,6 +22,8 @@
 Library
 -------
 
+- Issue #17656: Fix extraction of zip files with unicode member paths.
+
 - Issue #13355: Raise ValueError on random.triangular call with invalid params.
   Initial patch by Yuriy Senko.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list