[Python-checkins] cpython (3.2): Issue #6975: os.path.realpath() now correctly resolves multiple nested symlinks

serhiy.storchaka python-checkins at python.org
Sun Feb 10 11:27:48 CET 2013


http://hg.python.org/cpython/rev/c5f4fa02fc86
changeset:   82119:c5f4fa02fc86
branch:      3.2
parent:      82110:38830281d43b
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun Feb 10 12:22:07 2013 +0200
summary:
  Issue #6975: os.path.realpath() now correctly resolves multiple nested symlinks on POSIX platforms.

files:
  Lib/posixpath.py           |  84 ++++++++++++++-----------
  Lib/test/test_posixpath.py |  55 +++++++++++++++++
  Misc/NEWS                  |   3 +
  3 files changed, 104 insertions(+), 38 deletions(-)


diff --git a/Lib/posixpath.py b/Lib/posixpath.py
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -390,52 +390,60 @@
 def realpath(filename):
     """Return the canonical path of the specified filename, eliminating any
 symbolic links encountered in the path."""
-    if isinstance(filename, bytes):
+    path, ok = _joinrealpath(filename[:0], filename, {})
+    return abspath(path)
+
+# Join two paths, normalizing ang eliminating any symbolic links
+# encountered in the second path.
+def _joinrealpath(path, rest, seen):
+    if isinstance(path, bytes):
         sep = b'/'
-        empty = b''
+        curdir = b'.'
+        pardir = b'..'
     else:
         sep = '/'
-        empty = ''
-    if isabs(filename):
-        bits = [sep] + filename.split(sep)[1:]
-    else:
-        bits = [empty] + filename.split(sep)
+        curdir = '.'
+        pardir = '..'
 
-    for i in range(2, len(bits)+1):
-        component = join(*bits[0:i])
-        # Resolve symbolic links.
-        if islink(component):
-            resolved = _resolve_link(component)
-            if resolved is None:
-                # Infinite loop -- return original component + rest of the path
-                return abspath(join(*([component] + bits[i:])))
+    if isabs(rest):
+        rest = rest[1:]
+        path = sep
+
+    while rest:
+        name, _, rest = rest.partition(sep)
+        if not name or name == curdir:
+            # current dir
+            continue
+        if name == pardir:
+            # parent dir
+            if path:
+                path = dirname(path)
             else:
-                newpath = join(*([resolved] + bits[i:]))
-                return realpath(newpath)
+                path = name
+            continue
+        newpath = join(path, name)
+        if not islink(newpath):
+            path = newpath
+            continue
+        # Resolve the symbolic link
+        if newpath in seen:
+            # Already seen this path
+            path = seen[newpath]
+            if path is not None:
+                # use cached value
+                continue
+            # The symlink is not resolved, so we must have a symlink loop.
+            # Return already resolved part + rest of the path unchanged.
+            return join(newpath, rest), False
+        seen[newpath] = None # not resolved symlink
+        path, ok = _joinrealpath(path, os.readlink(newpath), seen)
+        if not ok:
+            return join(path, rest), False
+        seen[newpath] = path # resolved symlink
 
-    return abspath(filename)
+    return path, True
 
 
-def _resolve_link(path):
-    """Internal helper function.  Takes a path and follows symlinks
-    until we either arrive at something that isn't a symlink, or
-    encounter a path we've seen before (meaning that there's a loop).
-    """
-    paths_seen = set()
-    while islink(path):
-        if path in paths_seen:
-            # Already seen this path, so we must have a symlink loop
-            return None
-        paths_seen.add(path)
-        # Resolve where the link points to
-        resolved = os.readlink(path)
-        if not isabs(resolved):
-            dir = dirname(path)
-            path = normpath(join(dir, resolved))
-        else:
-            path = normpath(resolved)
-    return path
-
 supports_unicode_filenames = (sys.platform == 'darwin')
 
 def relpath(path, start=None):
diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py
--- a/Lib/test/test_posixpath.py
+++ b/Lib/test/test_posixpath.py
@@ -375,6 +375,22 @@
             self.assertEqual(realpath(ABSTFN+"1"), ABSTFN+"1")
             self.assertEqual(realpath(ABSTFN+"2"), ABSTFN+"2")
 
+            self.assertEqual(realpath(ABSTFN+"1/x"), ABSTFN+"1/x")
+            self.assertEqual(realpath(ABSTFN+"1/.."), dirname(ABSTFN))
+            self.assertEqual(realpath(ABSTFN+"1/../x"), dirname(ABSTFN) + "/x")
+            os.symlink(ABSTFN+"x", ABSTFN+"y")
+            self.assertEqual(realpath(ABSTFN+"1/../" + basename(ABSTFN) + "y"),
+                             ABSTFN + "y")
+            self.assertEqual(realpath(ABSTFN+"1/../" + basename(ABSTFN) + "1"),
+                             ABSTFN + "1")
+
+            os.symlink(basename(ABSTFN) + "a/b", ABSTFN+"a")
+            self.assertEqual(realpath(ABSTFN+"a"), ABSTFN+"a/b")
+
+            os.symlink("../" + basename(dirname(ABSTFN)) + "/" +
+                       basename(ABSTFN) + "c", ABSTFN+"c")
+            self.assertEqual(realpath(ABSTFN+"c"), ABSTFN+"c")
+
             # Test using relative path as well.
             os.chdir(dirname(ABSTFN))
             self.assertEqual(realpath(basename(ABSTFN)), ABSTFN)
@@ -383,6 +399,45 @@
             support.unlink(ABSTFN)
             support.unlink(ABSTFN+"1")
             support.unlink(ABSTFN+"2")
+            support.unlink(ABSTFN+"y")
+            support.unlink(ABSTFN+"c")
+
+    @unittest.skipUnless(hasattr(os, "symlink"),
+                         "Missing symlink implementation")
+    @skip_if_ABSTFN_contains_backslash
+    def test_realpath_repeated_indirect_symlinks(self):
+        # Issue #6975.
+        try:
+            os.mkdir(ABSTFN)
+            os.symlink('../' + basename(ABSTFN), ABSTFN + '/self')
+            os.symlink('self/self/self', ABSTFN + '/link')
+            self.assertEqual(realpath(ABSTFN + '/link'), ABSTFN)
+        finally:
+            support.unlink(ABSTFN + '/self')
+            support.unlink(ABSTFN + '/link')
+            safe_rmdir(ABSTFN)
+
+    @unittest.skipUnless(hasattr(os, "symlink"),
+                         "Missing symlink implementation")
+    @skip_if_ABSTFN_contains_backslash
+    def test_realpath_deep_recursion(self):
+        depth = 10
+        old_path = abspath('.')
+        try:
+            os.mkdir(ABSTFN)
+            for i in range(depth):
+                os.symlink('/'.join(['%d' % i] * 10), ABSTFN + '/%d' % (i + 1))
+            os.symlink('.', ABSTFN + '/0')
+            self.assertEqual(realpath(ABSTFN + '/%d' % depth), ABSTFN)
+
+            # Test using relative path as well.
+            os.chdir(ABSTFN)
+            self.assertEqual(realpath('%d' % depth), ABSTFN)
+        finally:
+            os.chdir(old_path)
+            for i in range(depth + 1):
+                support.unlink(ABSTFN + '/%d' % i)
+            safe_rmdir(ABSTFN)
 
     @unittest.skipUnless(hasattr(os, "symlink"),
                          "Missing symlink implementation")
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -218,6 +218,9 @@
 Library
 -------
 
+- Issue #6975: os.path.realpath() now correctly resolves multiple nested
+  symlinks on POSIX platforms.
+
 - Issue #17156: pygettext.py now uses an encoding of source file and correctly
   writes and escapes non-ascii characters.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list