[Python-checkins] r52525 - in python/branches/release25-maint: Lib/tarfile.py Misc/NEWS

georg.brandl python-checkins at python.org
Sun Oct 29 10:16:16 CET 2006


Author: georg.brandl
Date: Sun Oct 29 10:16:15 2006
New Revision: 52525

Modified:
   python/branches/release25-maint/Lib/tarfile.py
   python/branches/release25-maint/Misc/NEWS
Log:
Patch #1583880: fix tarfile's problems with long names and posix/
GNU modes.
 (backport from rev. 52524)

Modified: python/branches/release25-maint/Lib/tarfile.py
==============================================================================
--- python/branches/release25-maint/Lib/tarfile.py	(original)
+++ python/branches/release25-maint/Lib/tarfile.py	Sun Oct 29 10:16:15 2006
@@ -49,6 +49,7 @@
 import errno
 import time
 import struct
+import copy
 
 if sys.platform == 'mac':
     # This module needs work for MacOS9, especially in the area of pathname
@@ -793,7 +794,6 @@
         """Construct a TarInfo object. name is the optional name
            of the member.
         """
-
         self.name = name        # member name (dirnames must end with '/')
         self.mode = 0666        # file permissions
         self.uid = 0            # user id
@@ -807,8 +807,6 @@
         self.gname = "group"    # group name
         self.devmajor = 0       # device major number
         self.devminor = 0       # device minor number
-        self.prefix = ""        # prefix to filename or information
-                                # about sparse files
 
         self.offset = 0         # the tar header starts here
         self.offset_data = 0    # the file's data starts here
@@ -840,24 +838,70 @@
         tarinfo.gname = buf[297:329].rstrip(NUL)
         tarinfo.devmajor = nti(buf[329:337])
         tarinfo.devminor = nti(buf[337:345])
-        tarinfo.prefix = buf[345:500]
+        prefix = buf[345:500].rstrip(NUL)
+
+        if prefix and not tarinfo.issparse():
+            tarinfo.name = prefix + "/" + tarinfo.name
 
         if tarinfo.chksum not in calc_chksums(buf):
             raise ValueError("invalid header")
         return tarinfo
 
     def tobuf(self, posix=False):
-        """Return a tar header block as a 512 byte string.
+        """Return a tar header as a string of 512 byte blocks.
         """
+        buf = ""
+        type = self.type
+        prefix = ""
+
+        if self.name.endswith("/"):
+            type = DIRTYPE
+
+        name = normpath(self.name)
+
+        if type == DIRTYPE:
+            # directories should end with '/'
+            name += "/"
+
+        linkname = self.linkname
+        if linkname:
+            # if linkname is empty we end up with a '.'
+            linkname = normpath(linkname)
+
+        if posix:
+            if self.size > MAXSIZE_MEMBER:
+                raise ValueError("file is too large (>= 8 GB)")
+
+            if len(self.linkname) > LENGTH_LINK:
+                raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
+
+            if len(name) > LENGTH_NAME:
+                prefix = name[:LENGTH_PREFIX + 1]
+                while prefix and prefix[-1] != "/":
+                    prefix = prefix[:-1]
+
+                name = name[len(prefix):]
+                prefix = prefix[:-1]
+
+                if not prefix or len(name) > LENGTH_NAME:
+                    raise ValueError("name is too long")
+
+        else:
+            if len(self.linkname) > LENGTH_LINK:
+                buf += self._create_gnulong(self.linkname, GNUTYPE_LONGLINK)
+
+            if len(name) > LENGTH_NAME:
+                buf += self._create_gnulong(name, GNUTYPE_LONGNAME)
+
         parts = [
-            stn(self.name, 100),
+            stn(name, 100),
             itn(self.mode & 07777, 8, posix),
             itn(self.uid, 8, posix),
             itn(self.gid, 8, posix),
             itn(self.size, 12, posix),
             itn(self.mtime, 12, posix),
             "        ", # checksum field
-            self.type,
+            type,
             stn(self.linkname, 100),
             stn(MAGIC, 6),
             stn(VERSION, 2),
@@ -865,15 +909,38 @@
             stn(self.gname, 32),
             itn(self.devmajor, 8, posix),
             itn(self.devminor, 8, posix),
-            stn(self.prefix, 155)
+            stn(prefix, 155)
         ]
 
-        buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
+        buf += struct.pack("%ds" % BLOCKSIZE, "".join(parts))
         chksum = calc_chksums(buf)[0]
-        buf = buf[:148] + "%06o\0" % chksum + buf[155:]
+        buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
         self.buf = buf
         return buf
 
+    def _create_gnulong(self, name, type):
+        """Create a GNU longname/longlink header from name.
+           It consists of an extended tar header, with the length
+           of the longname as size, followed by data blocks,
+           which contain the longname as a null terminated string.
+        """
+        name += NUL
+
+        tarinfo = self.__class__()
+        tarinfo.name = "././@LongLink"
+        tarinfo.type = type
+        tarinfo.mode = 0
+        tarinfo.size = len(name)
+
+        # create extended header
+        buf = tarinfo.tobuf()
+        # create name blocks
+        buf += name
+        blocks, remainder = divmod(len(name), BLOCKSIZE)
+        if remainder > 0:
+            buf += (BLOCKSIZE - remainder) * NUL
+        return buf
+
     def isreg(self):
         return self.type in REGULAR_TYPES
     def isfile(self):
@@ -1377,50 +1444,11 @@
         """
         self._check("aw")
 
-        tarinfo.name = normpath(tarinfo.name)
-        if tarinfo.isdir():
-            # directories should end with '/'
-            tarinfo.name += "/"
-
-        if tarinfo.linkname:
-            tarinfo.linkname = normpath(tarinfo.linkname)
-
-        if tarinfo.size > MAXSIZE_MEMBER:
-            if self.posix:
-                raise ValueError("file is too large (>= 8 GB)")
-            else:
-                self._dbg(2, "tarfile: Created GNU tar largefile header")
-
-
-        if len(tarinfo.linkname) > LENGTH_LINK:
-            if self.posix:
-                raise ValueError("linkname is too long (>%d)" % (LENGTH_LINK))
-            else:
-                self._create_gnulong(tarinfo.linkname, GNUTYPE_LONGLINK)
-                tarinfo.linkname = tarinfo.linkname[:LENGTH_LINK -1]
-                self._dbg(2, "tarfile: Created GNU tar extension LONGLINK")
-
-        if len(tarinfo.name) > LENGTH_NAME:
-            if self.posix:
-                prefix = tarinfo.name[:LENGTH_PREFIX + 1]
-                while prefix and prefix[-1] != "/":
-                    prefix = prefix[:-1]
-
-                name = tarinfo.name[len(prefix):]
-                prefix = prefix[:-1]
+        tarinfo = copy.copy(tarinfo)
 
-                if not prefix or len(name) > LENGTH_NAME:
-                    raise ValueError("name is too long (>%d)" % (LENGTH_NAME))
-
-                tarinfo.name   = name
-                tarinfo.prefix = prefix
-            else:
-                self._create_gnulong(tarinfo.name, GNUTYPE_LONGNAME)
-                tarinfo.name = tarinfo.name[:LENGTH_NAME - 1]
-                self._dbg(2, "tarfile: Created GNU tar extension LONGNAME")
-
-        self.fileobj.write(tarinfo.tobuf(self.posix))
-        self.offset += BLOCKSIZE
+        buf = tarinfo.tobuf(self.posix)
+        self.fileobj.write(buf)
+        self.offset += len(buf)
 
         # If there's data to follow, append it.
         if fileobj is not None:
@@ -1779,12 +1807,6 @@
         if tarinfo.isreg() and tarinfo.name.endswith("/"):
             tarinfo.type = DIRTYPE
 
-        # The prefix field is used for filenames > 100 in
-        # the POSIX standard.
-        # name = prefix + '/' + name
-        tarinfo.name = normpath(os.path.join(tarinfo.prefix.rstrip(NUL),
-                                             tarinfo.name))
-
         # Directory names should have a '/' at the end.
         if tarinfo.isdir():
             tarinfo.name += "/"
@@ -1909,10 +1931,6 @@
         self.offset += self._block(tarinfo.size)
         tarinfo.size = origsize
 
-        # Clear the prefix field so that it is not used
-        # as a pathname in next().
-        tarinfo.prefix = ""
-
         return tarinfo
 
     #--------------------------------------------------------------------------
@@ -1970,31 +1988,6 @@
         else:
             return TarIter(self)
 
-    def _create_gnulong(self, name, type):
-        """Write a GNU longname/longlink member to the TarFile.
-           It consists of an extended tar header, with the length
-           of the longname as size, followed by data blocks,
-           which contain the longname as a null terminated string.
-        """
-        name += NUL
-
-        tarinfo = TarInfo()
-        tarinfo.name = "././@LongLink"
-        tarinfo.type = type
-        tarinfo.mode = 0
-        tarinfo.size = len(name)
-
-        # write extended header
-        self.fileobj.write(tarinfo.tobuf())
-        self.offset += BLOCKSIZE
-        # write name blocks
-        self.fileobj.write(name)
-        blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
-        if remainder > 0:
-            self.fileobj.write(NUL * (BLOCKSIZE - remainder))
-            blocks += 1
-        self.offset += blocks * BLOCKSIZE
-
     def _dbg(self, level, msg):
         """Write debugging output to sys.stderr.
         """

Modified: python/branches/release25-maint/Misc/NEWS
==============================================================================
--- python/branches/release25-maint/Misc/NEWS	(original)
+++ python/branches/release25-maint/Misc/NEWS	Sun Oct 29 10:16:15 2006
@@ -90,6 +90,9 @@
 Library
 -------
 
+- Patch #1583880: fix tarfile's problems with long names and posix/
+  GNU modes.
+
 - Fix codecs.EncodedFile which did not use file_encoding in 2.5.0, and
   fix all codecs file wrappers to work correctly with the "with"
   statement (bug #1586513).


More information about the Python-checkins mailing list