[Python-checkins] cpython (3.2): Fixes Issue #6972: The zipfile module no longer overwrites files outside of

gregory.p.smith python-checkins at python.org
Fri Feb 1 20:40:33 CET 2013


http://hg.python.org/cpython/rev/0c5fa35c9f12
changeset:   81871:0c5fa35c9f12
branch:      3.2
parent:      81866:349419bb6283
user:        Gregory P. Smith <greg at krypto.org>
date:        Fri Feb 01 11:22:43 2013 -0800
summary:
  Fixes Issue #6972: The zipfile module no longer overwrites files outside of
its destination path when extracting malicious zip files.

files:
  Doc/library/zipfile.rst  |  17 +++-
  Lib/test/test_zipfile.py |  86 +++++++++++++++++++++++++--
  Lib/zipfile.py           |  23 ++++--
  Misc/NEWS                |   3 +
  4 files changed, 106 insertions(+), 23 deletions(-)


diff --git a/Doc/library/zipfile.rst b/Doc/library/zipfile.rst
--- a/Doc/library/zipfile.rst
+++ b/Doc/library/zipfile.rst
@@ -214,6 +214,16 @@
    to extract to.  *member* can be a filename or a :class:`ZipInfo` object.
    *pwd* is the password used for encrypted files.
 
+   .. note::
+
+      If a member filename is an absolute path, a drive/UNC sharepoint and
+      leading (back)slashes will be stripped, e.g.: ``///foo/bar`` becomes
+      ``foo/bar`` on Unix, and ``С:\foo\bar`` becomes ``foo\bar`` on Windows.
+      And all ``".."`` components in a member filename will be removed, e.g.:
+      ``../../foo../../ba..r`` becomes ``foo../ba..r``.  On Windows illegal
+      characters (``:``, ``<``, ``>``, ``|``, ``"``, ``?``, and ``*``)
+      replaced by underscore (``_``).
+
 
 .. method:: ZipFile.extractall(path=None, members=None, pwd=None)
 
@@ -222,12 +232,9 @@
    be a subset of the list returned by :meth:`namelist`.  *pwd* is the password
    used for encrypted files.
 
-   .. warning::
+   .. note::
 
-      Never extract archives from untrusted sources without prior inspection.
-      It is possible that files are created outside of *path*, e.g. members
-      that have absolute filenames starting with ``"/"`` or filenames with two
-      dots ``".."``.
+      See :meth:`extract` note.
 
 
 .. method:: ZipFile.printdir()
diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py
--- a/Lib/test/test_zipfile.py
+++ b/Lib/test/test_zipfile.py
@@ -29,7 +29,7 @@
 
 SMALL_TEST_DATA = [('_ziptest1', '1q2w3e4r5t'),
                    ('ziptest2dir/_ziptest2', 'qawsedrftg'),
-                   ('/ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
+                   ('ziptest2dir/ziptest3dir/_ziptest3', 'azsxdcfvgb'),
                    ('ziptest2dir/ziptest3dir/ziptest4dir/_ziptest3', '6y7u8i9o0p')]
 
 
@@ -409,10 +409,7 @@
                 writtenfile = zipfp.extract(fpath)
 
                 # make sure it was written to the right place
-                if os.path.isabs(fpath):
-                    correctfile = os.path.join(os.getcwd(), fpath[1:])
-                else:
-                    correctfile = os.path.join(os.getcwd(), fpath)
+                correctfile = os.path.join(os.getcwd(), fpath)
                 correctfile = os.path.normpath(correctfile)
 
                 self.assertEqual(writtenfile, correctfile)
@@ -434,10 +431,7 @@
         with zipfile.ZipFile(TESTFN2, "r") as zipfp:
             zipfp.extractall()
             for fpath, fdata in SMALL_TEST_DATA:
-                if os.path.isabs(fpath):
-                    outfile = os.path.join(os.getcwd(), fpath[1:])
-                else:
-                    outfile = os.path.join(os.getcwd(), fpath)
+                outfile = os.path.join(os.getcwd(), fpath)
 
                 with open(outfile, "rb") as f:
                     self.assertEqual(fdata.encode(), f.read())
@@ -447,6 +441,80 @@
         # remove the test file subdirectories
         shutil.rmtree(os.path.join(os.getcwd(), 'ziptest2dir'))
 
+    def check_file(self, filename, content):
+        self.assertTrue(os.path.isfile(filename))
+        with open(filename, 'rb') as f:
+            self.assertEqual(f.read(), content)
+
+    def test_extract_hackers_arcnames(self):
+        hacknames = [
+            ('../foo/bar', 'foo/bar'),
+            ('foo/../bar', 'foo/bar'),
+            ('foo/../../bar', 'foo/bar'),
+            ('foo/bar/..', 'foo/bar'),
+            ('./../foo/bar', 'foo/bar'),
+            ('/foo/bar', 'foo/bar'),
+            ('/foo/../bar', 'foo/bar'),
+            ('/foo/../../bar', 'foo/bar'),
+            ('//foo/bar', 'foo/bar'),
+            ('../../foo../../ba..r', 'foo../ba..r'),
+        ]
+        if os.path.sep == '\\':  # Windows.
+            hacknames.extend([
+                (r'..\foo\bar', 'foo/bar'),
+                (r'..\/foo\/bar', 'foo/bar'),
+                (r'foo/\..\/bar', 'foo/bar'),
+                (r'foo\/../\bar', 'foo/bar'),
+                (r'C:foo/bar', 'foo/bar'),
+                (r'C:/foo/bar', 'foo/bar'),
+                (r'C://foo/bar', 'foo/bar'),
+                (r'C:\foo\bar', 'foo/bar'),
+                (r'//conky/mountpoint/foo/bar', 'foo/bar'),
+                (r'\\conky\mountpoint\foo\bar', 'foo/bar'),
+                (r'///conky/mountpoint/foo/bar', 'conky/mountpoint/foo/bar'),
+                (r'\\\conky\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'),
+                (r'//conky//mountpoint/foo/bar', 'conky/mountpoint/foo/bar'),
+                (r'\\conky\\mountpoint\foo\bar', 'conky/mountpoint/foo/bar'),
+                (r'//?/C:/foo/bar', 'foo/bar'),
+                (r'\\?\C:\foo\bar', 'foo/bar'),
+                (r'C:/../C:/foo/bar', 'C_/foo/bar'),
+                (r'a:b\c<d>e|f"g?h*i', 'b/c_d_e_f_g_h_i'),
+            ])
+
+        for arcname, fixedname in hacknames:
+            content = b'foobar' + arcname.encode()
+            with zipfile.ZipFile(TESTFN2, 'w', zipfile.ZIP_STORED) as zipfp:
+                zipfp.writestr(arcname, content)
+
+            targetpath = os.path.join('target', 'subdir', 'subsub')
+            correctfile = os.path.join(targetpath, *fixedname.split('/'))
+
+            with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+                writtenfile = zipfp.extract(arcname, targetpath)
+                self.assertEqual(writtenfile, correctfile)
+            self.check_file(correctfile, content)
+            shutil.rmtree('target')
+
+            with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+                zipfp.extractall(targetpath)
+            self.check_file(correctfile, content)
+            shutil.rmtree('target')
+
+            correctfile = os.path.join(os.getcwd(), *fixedname.split('/'))
+
+            with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+                writtenfile = zipfp.extract(arcname)
+                self.assertEqual(writtenfile, correctfile)
+            self.check_file(correctfile, content)
+            shutil.rmtree(fixedname.split('/')[0])
+
+            with zipfile.ZipFile(TESTFN2, 'r') as zipfp:
+                zipfp.extractall()
+            self.check_file(correctfile, content)
+            shutil.rmtree(fixedname.split('/')[0])
+
+            os.remove(TESTFN2)
+
     def test_writestr_compression(self):
         zipfp = zipfile.ZipFile(TESTFN2, "w")
         zipfp.writestr("a.txt", "hello world", compress_type=zipfile.ZIP_STORED)
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -1062,17 +1062,22 @@
         """
         # build the destination pathname, replacing
         # forward slashes to platform specific separators.
-        # Strip trailing path separator, unless it represents the root.
-        if (targetpath[-1:] in (os.path.sep, os.path.altsep)
-            and len(os.path.splitdrive(targetpath)[1]) > 1):
-            targetpath = targetpath[:-1]
+        arcname = member.filename.replace('/', os.path.sep)
 
-        # don't include leading "/" from file name if present
-        if member.filename[0] == '/':
-            targetpath = os.path.join(targetpath, member.filename[1:])
-        else:
-            targetpath = os.path.join(targetpath, member.filename)
+        if os.path.altsep:
+            arcname = arcname.replace(os.path.altsep, os.path.sep)
+        # interpret absolute pathname as relative, remove drive letter or
+        # UNC path, redundant separators, "." and ".." components.
+        arcname = os.path.splitdrive(arcname)[1]
+        arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
+                    if x not in ('', os.path.curdir, os.path.pardir))
+        # filter illegal characters on Windows
+        if os.path.sep == '\\':
+            illegal = ':<>|"?*'
+            table = str.maketrans(illegal, '_' * len(illegal))
+            arcname = arcname.translate(table)
 
+        targetpath = os.path.join(targetpath, arcname)
         targetpath = os.path.normpath(targetpath)
 
         # Create all upper directories if necessary.
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -216,6 +216,9 @@
 Library
 -------
 
+- Issue #6972: The zipfile module no longer overwrites files outside of
+  its destination path when extracting malicious zip files.
+
 - Issue #4844: ZipFile now raises BadZipFile when opens a ZIP file with an
   incomplete "End of Central Directory" record.  Original patch by Guilherme
   Polo and Alan McIntyre.

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list