[Python-checkins] cpython (3.4): Issue #24259: tarfile now raises a ReadError if an archive is truncated inside

lars.gustaebel python-checkins at python.org
Mon Jul 6 09:33:11 CEST 2015


https://hg.python.org/cpython/rev/c7f4f61697b7
changeset:   96845:c7f4f61697b7
branch:      3.4
parent:      96838:0deca75537ec
user:        Lars Gustäbel <lars at gustaebel.de>
date:        Mon Jul 06 09:27:24 2015 +0200
summary:
  Issue #24259: tarfile now raises a ReadError if an archive is truncated inside a data segment.

files:
  Lib/tarfile.py           |  22 +++++++++++++++-------
  Lib/test/test_tarfile.py |  23 +++++++++++++++++++++++
  Misc/NEWS                |   3 +++
  3 files changed, 41 insertions(+), 7 deletions(-)


diff --git a/Lib/tarfile.py b/Lib/tarfile.py
--- a/Lib/tarfile.py
+++ b/Lib/tarfile.py
@@ -225,7 +225,7 @@
     signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))
     return unsigned_chksum, signed_chksum
 
-def copyfileobj(src, dst, length=None):
+def copyfileobj(src, dst, length=None, exception=OSError):
     """Copy length bytes from fileobj src to fileobj dst.
        If length is None, copy the entire content.
     """
@@ -240,13 +240,13 @@
     for b in range(blocks):
         buf = src.read(BUFSIZE)
         if len(buf) < BUFSIZE:
-            raise OSError("end of file reached")
+            raise exception("unexpected end of data")
         dst.write(buf)
 
     if remainder != 0:
         buf = src.read(remainder)
         if len(buf) < remainder:
-            raise OSError("end of file reached")
+            raise exception("unexpected end of data")
         dst.write(buf)
     return
 
@@ -690,7 +690,10 @@
             length = min(size, stop - self.position)
             if data:
                 self.fileobj.seek(offset + (self.position - start))
-                buf += self.fileobj.read(length)
+                b = self.fileobj.read(length)
+                if len(b) != length:
+                    raise ReadError("unexpected end of data")
+                buf += b
             else:
                 buf += NUL * length
             size -= length
@@ -2132,9 +2135,9 @@
             if tarinfo.sparse is not None:
                 for offset, size in tarinfo.sparse:
                     target.seek(offset)
-                    copyfileobj(source, target, size)
+                    copyfileobj(source, target, size, ReadError)
             else:
-                copyfileobj(source, target, tarinfo.size)
+                copyfileobj(source, target, tarinfo.size, ReadError)
             target.seek(tarinfo.size)
             target.truncate()
 
@@ -2244,8 +2247,13 @@
             self.firstmember = None
             return m
 
+        # Advance the file pointer.
+        if self.offset != self.fileobj.tell():
+            self.fileobj.seek(self.offset - 1)
+            if not self.fileobj.read(1):
+                raise ReadError("unexpected end of data")
+
         # Read the next block.
-        self.fileobj.seek(self.offset)
         tarinfo = None
         while True:
             try:
diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py
--- a/Lib/test/test_tarfile.py
+++ b/Lib/test/test_tarfile.py
@@ -349,6 +349,29 @@
             finally:
                 tar.close()
 
+    def test_premature_end_of_archive(self):
+        for size in (512, 600, 1024, 1200):
+            with tarfile.open(tmpname, "w:") as tar:
+                t = tarfile.TarInfo("foo")
+                t.size = 1024
+                tar.addfile(t, io.BytesIO(b"a" * 1024))
+
+            with open(tmpname, "r+b") as fobj:
+                fobj.truncate(size)
+
+            with tarfile.open(tmpname) as tar:
+                with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
+                    for t in tar:
+                        pass
+
+            with tarfile.open(tmpname) as tar:
+                t = tar.next()
+
+                with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
+                    tar.extract(t, TEMPDIR)
+
+                with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"):
+                    tar.extractfile(t).read()
 
 class MiscReadTestBase(CommonReadTest):
     def requires_name_attribute(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -66,6 +66,9 @@
 Library
 -------
 
+- Issue #24259: tarfile now raises a ReadError if an archive is truncated
+  inside a data segment.
+
 - Issue #24552: Fix use after free in an error case of the _pickle module.
 
 - Issue #24514: tarfile now tolerates number fields consisting of only

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list