[Python-checkins] cpython (3.2): Correctly detect bzip2 compressed streams with blocksizes other than 900k.

lars.gustaebel python-checkins at python.org
Tue Dec 6 13:10:22 CET 2011


http://hg.python.org/cpython/rev/80876df8adce
changeset:   73868:80876df8adce
branch:      3.2
parent:      73866:4579cd952156
user:        Lars Gustäbel <lars at gustaebel.de>
date:        Tue Dec 06 12:56:38 2011 +0100
summary:
  Correctly detect bzip2 compressed streams with blocksizes other than 900k.

files:
  Lib/tarfile.py           |   2 +-
  Lib/test/test_tarfile.py |  22 ++++++++++++++++++----
  Misc/NEWS                |   3 +++
  3 files changed, 22 insertions(+), 5 deletions(-)


diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -627,7 +627,7 @@
     def getcomptype(self):
         if self.buf.startswith(b"\037\213\010"):
             return "gz"
-        if self.buf.startswith(b"BZh91"):
+        if self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":
             return "bz2"
         return "tar"
 
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -529,6 +529,23 @@
     def test_detect_fileobj(self):
         self._test_modes(self._testfunc_fileobj)
 
+    def test_detect_stream_bz2(self):
+        # Originally, tarfile's stream detection looked for the string
+        # "BZh91" at the start of the file. This is incorrect because
+        # the '9' represents the blocksize (900kB). If the file was
+        # compressed using another blocksize autodetection fails.
+        if not bz2:
+            return
+
+        with open(tarname, "rb") as fobj:
+            data = fobj.read()
+
+        # Compress with blocksize 100kB, the file starts with "BZh11".
+        with bz2.BZ2File(tmpname, "wb", compresslevel=1) as fobj:
+            fobj.write(data)
+
+        self._testfunc_file(tmpname, "r|*")
+
 
 class MemberReadTest(ReadTest):
 
@@ -1818,11 +1835,8 @@
     if bz2:
         # Create testtar.tar.bz2 and add bz2-specific tests.
         support.unlink(bz2name)
-        tar = bz2.BZ2File(bz2name, "wb")
-        try:
+        with bz2.BZ2File(bz2name, "wb") as tar:
             tar.write(data)
-        finally:
-            tar.close()
 
         tests += [
             Bz2MiscReadTest,
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -90,6 +90,9 @@
 Library
 -------
 
+- tarfile.py: Correctly detect bzip2 compressed streams with blocksizes
+  other than 900k.
+
 - Issue #13439: Fix many errors in turtle docstrings.
 
 - Issue #13487: Make inspect.getmodule robust against changes done to

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list