[Python-checkins] r81670 - in python/branches/py3k: Lib/tarfile.py Lib/test/test_tarfile.py Lib/test/testtar.tar

lars.gustaebel python-checkins at python.org
Thu Jun 3 14:45:17 CEST 2010


Author: lars.gustaebel
Date: Thu Jun  3 14:45:16 2010
New Revision: 81670

Log:
Merged revisions 81667 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r81667 | lars.gustaebel | 2010-06-03 14:34:14 +0200 (Thu, 03 Jun 2010) | 8 lines
  
  Issue #8741: Fixed the TarFile.makelink() method that is responsible
  for extracting symbolic and hard link entries as regular files as a
  work-around on platforms that do not support filesystem links.
  
  This stopped working reliably after a change in r74571. I also added
  a few tests for this functionality.
........


Modified:
   python/branches/py3k/   (props changed)
   python/branches/py3k/Lib/tarfile.py
   python/branches/py3k/Lib/test/test_tarfile.py
   python/branches/py3k/Lib/test/testtar.tar

Modified: python/branches/py3k/Lib/tarfile.py
==============================================================================
--- python/branches/py3k/Lib/tarfile.py	(original)
+++ python/branches/py3k/Lib/tarfile.py	Thu Jun  3 14:45:16 2010
@@ -2163,8 +2163,7 @@
                 raise StreamError("cannot extract (sym)link as file object")
             else:
                 # A (sym)link's file object is its target's file object.
-                return self.extractfile(self._getmember(tarinfo.linkname,
-                                                        tarinfo))
+                return self.extractfile(self._find_link_target(tarinfo))
         else:
             # If there's no data associated with the member (directory, chrdev,
             # blkdev, etc.), return None instead of a file object.
@@ -2273,27 +2272,21 @@
           (platform limitation), we try to make a copy of the referenced file
           instead of a link.
         """
-        try:
+        if hasattr(os, "symlink") and hasattr(os, "link"):
+            # For systems that support symbolic and hard links.
             if tarinfo.issym():
                 os.symlink(tarinfo.linkname, targetpath)
             else:
                 # See extract().
-                os.link(tarinfo._link_target, targetpath)
-        except AttributeError:
-            if tarinfo.issym():
-                linkpath = os.path.dirname(tarinfo.name) + "/" + \
-                                        tarinfo.linkname
-            else:
-                linkpath = tarinfo.linkname
-
+                if os.path.exists(tarinfo._link_target):
+                    os.link(tarinfo._link_target, targetpath)
+                else:
+                    self._extract_member(self._find_link_target(tarinfo), targetpath)
+        else:
             try:
-                self._extract_member(self.getmember(linkpath), targetpath)
-            except (EnvironmentError, KeyError) as e:
-                linkpath = linkpath.replace("/", os.sep)
-                try:
-                    shutil.copy2(linkpath, targetpath)
-                except EnvironmentError as e:
-                    raise IOError("link could not be created")
+                self._extract_member(self._find_link_target(tarinfo), targetpath)
+            except KeyError:
+                raise ExtractError("unable to resolve link inside archive")
 
     def chown(self, tarinfo, targetpath):
         """Set owner of targetpath according to tarinfo.
@@ -2392,21 +2385,28 @@
     #--------------------------------------------------------------------------
     # Little helper methods:
 
-    def _getmember(self, name, tarinfo=None):
+    def _getmember(self, name, tarinfo=None, normalize=False):
         """Find an archive member by name from bottom to top.
            If tarinfo is given, it is used as the starting point.
         """
         # Ensure that all members have been loaded.
         members = self.getmembers()
 
-        if tarinfo is None:
-            end = len(members)
-        else:
-            end = members.index(tarinfo)
+        # Limit the member search list up to tarinfo.
+        if tarinfo is not None:
+            members = members[:members.index(tarinfo)]
 
-        for i in range(end - 1, -1, -1):
-            if name == members[i].name:
-                return members[i]
+        if normalize:
+            name = os.path.normpath(name)
+
+        for member in reversed(members):
+            if normalize:
+                member_name = os.path.normpath(member.name)
+            else:
+                member_name = member.name
+
+            if name == member_name:
+                return member
 
     def _load(self):
         """Read through the entire archive file and look for readable
@@ -2427,6 +2427,25 @@
         if mode is not None and self.mode not in mode:
             raise IOError("bad operation for mode %r" % self.mode)
 
+    def _find_link_target(self, tarinfo):
+        """Find the target member of a symlink or hardlink member in the
+           archive.
+        """
+        if tarinfo.issym():
+            # Always search the entire archive.
+            linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
+            limit = None
+        else:
+            # Search the archive before the link, because a hard link is
+            # just a reference to an already archived file.
+            linkname = tarinfo.linkname
+            limit = tarinfo
+
+        member = self._getmember(linkname, tarinfo=limit, normalize=True)
+        if member is None:
+            raise KeyError("linkname %r not found" % linkname)
+        return member
+
     def __iter__(self):
         """Provide an iterator object.
         """

Modified: python/branches/py3k/Lib/test/test_tarfile.py
==============================================================================
--- python/branches/py3k/Lib/test/test_tarfile.py	(original)
+++ python/branches/py3k/Lib/test/test_tarfile.py	Thu Jun  3 14:45:16 2010
@@ -133,6 +133,26 @@
                      "read() after readline() failed")
         fobj.close()
 
+    # Test if symbolic and hard links are resolved by extractfile().  The
+    # test link members each point to a regular member whose data is
+    # supposed to be exported.
+    def _test_fileobj_link(self, lnktype, regtype):
+        a = self.tar.extractfile(lnktype)
+        b = self.tar.extractfile(regtype)
+        self.assertEqual(a.name, b.name)
+
+    def test_fileobj_link1(self):
+        self._test_fileobj_link("ustar/lnktype", "ustar/regtype")
+
+    def test_fileobj_link2(self):
+        self._test_fileobj_link("./ustar/linktest2/lnktype", "ustar/linktest1/regtype")
+
+    def test_fileobj_symlink1(self):
+        self._test_fileobj_link("ustar/symtype", "ustar/regtype")
+
+    def test_fileobj_symlink2(self):
+        self._test_fileobj_link("./ustar/linktest2/symtype", "ustar/linktest1/regtype")
+
 
 class CommonReadTest(ReadTest):
 
@@ -1378,6 +1398,29 @@
         fobj.close()
 
 
+class LinkEmulationTest(ReadTest):
+
+    # Test for issue #8741 regression. On platforms that do not support
+    # symbolic or hard links tarfile tries to extract these types of members as
+    # the regular files they point to.
+    def _test_link_extraction(self, name):
+        self.tar.extract(name, TEMPDIR)
+        data = open(os.path.join(TEMPDIR, name), "rb").read()
+        self.assertEqual(md5sum(data), md5_regtype)
+
+    def test_hardlink_extraction1(self):
+        self._test_link_extraction("ustar/lnktype")
+
+    def test_hardlink_extraction2(self):
+        self._test_link_extraction("./ustar/linktest2/lnktype")
+
+    def test_symlink_extraction1(self):
+        self._test_link_extraction("ustar/symtype")
+
+    def test_symlink_extraction2(self):
+        self._test_link_extraction("./ustar/linktest2/symtype")
+
+
 class GzipMiscReadTest(MiscReadTest):
     tarname = gzipname
     mode = "r:gz"
@@ -1463,6 +1506,8 @@
 
     if hasattr(os, "link"):
         tests.append(HardlinkTest)
+    else:
+        tests.append(LinkEmulationTest)
 
     fobj = open(tarname, "rb")
     data = fobj.read()

Modified: python/branches/py3k/Lib/test/testtar.tar
==============================================================================
Binary files. No diff available.


More information about the Python-checkins mailing list