[Python-checkins] bpo-39039: tarfile raises descriptive exception from zlib.error (GH-27766)

ambv webhook-mailer at python.org
Wed Sep 29 05:25:52 EDT 2021

commit: b6fe8572509b77d2002eaddf99d718e9b4835684
branch: main
author: Jack DeVries <58614260+jdevries3133 at users.noreply.github.com>
committer: ambv <lukasz at langa.pl>
date: 2021-09-29T11:25:48+02:00

bpo-39039: tarfile raises descriptive exception from zlib.error (GH-27766)

* during tarfile parsing, a zlib error indicates invalid data
* tarfile.open now raises a descriptive exception from the zlib error
* this makes it clear to the user that they may be trying to open a
  corrupted tar file

A Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst
M Lib/tarfile.py
M Lib/test/test_tarfile.py

diff --git a/Lib/tarfile.py b/Lib/tarfile.py
index 18d415adf5441..c1ee1222e09b5 100755
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -2349,6 +2349,15 @@ def next(self):
                     raise ReadError(str(e)) from None
             except SubsequentHeaderError as e:
                 raise ReadError(str(e)) from None
+            except Exception as e:
+                try:
+                    import zlib
+                    if isinstance(e, zlib.error):
+                        raise ReadError(f'zlib error: {e}') from None
+                    else:
+                        raise e
+                except ImportError:
+                    raise e
         if tarinfo is not None:
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
index cfdda24a269f5..e4b5c52bf1eaf 100644
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -19,6 +19,10 @@
     import gzip
 except ImportError:
     gzip = None
+    import zlib
+except ImportError:
+    zlib = None
     import bz2
 except ImportError:
@@ -687,6 +691,16 @@ def test_parallel_iteration(self):
                 self.assertEqual(m1.offset, m2.offset)
                 self.assertEqual(m1.get_info(), m2.get_info())
+    @unittest.skipIf(zlib is None, "requires zlib")
+    def test_zlib_error_does_not_leak(self):
+        # bpo-39039: tarfile.open allowed zlib exceptions to bubble up when
+        # parsing certain types of invalid data
+        with unittest.mock.patch("tarfile.TarInfo.fromtarfile") as mock:
+            mock.side_effect = zlib.error
+            with self.assertRaises(tarfile.ReadError):
+                tarfile.open(self.tarname)
 class MiscReadTest(MiscReadTestBase, unittest.TestCase):
     test_fail_comp = None
diff --git a/Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst b/Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst
new file mode 100644
index 0000000000000..7250055c2a4a9
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2021-08-18-10-36-14.bpo-39039.A63LYh.rst
@@ -0,0 +1,2 @@
+tarfile.open raises :exc:`~tarfile.ReadError` when a zlib error occurs
+during file extraction.

More information about the Python-checkins mailing list