[Python-checkins] cpython: Issue #10395: Added os.path.commonpath(). Implemented in posixpath and ntpath.

serhiy.storchaka python-checkins at python.org
Tue Mar 31 14:32:42 CEST 2015


https://hg.python.org/cpython/rev/ec6c812fbc1f
changeset:   95326:ec6c812fbc1f
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Tue Mar 31 15:31:53 2015 +0300
summary:
  Issue #10395: Added os.path.commonpath().  Implemented in posixpath and ntpath.
Based on patch by Rafik Draoui.

files:
  Doc/library/os.path.rst    |  19 ++++++-
  Doc/whatsnew/3.5.rst       |   7 ++
  Lib/ntpath.py              |  63 +++++++++++++++++++++++-
  Lib/posixpath.py           |  45 ++++++++++++++++-
  Lib/test/test_ntpath.py    |  69 ++++++++++++++++++++++++++
  Lib/test/test_posixpath.py |  54 ++++++++++++++++++++
  Misc/NEWS                  |   3 +
  7 files changed, 255 insertions(+), 5 deletions(-)


diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -66,11 +66,24 @@
    empty string (``''``).
 
 
+.. function:: commonpath(paths)
+
+   Return the longest common sub-path of each pathname in the sequence
+   *paths*.  Raise ValueError if *paths* contains both absolute and relative
+   pathnames, or if *paths* is empty.  Unlike :func:`commonprefix`, this
+   returns a valid path.
+
+   Availability: Unix, Windows
+
+   .. versionadded:: 3.5
+
+
 .. function:: commonprefix(list)
 
-   Return the longest path prefix (taken character-by-character) that is a prefix
-   of all paths in  *list*.  If *list* is empty, return the empty string (``''``).
-   Note that this may return invalid paths because it works a character at a time.
+   Return the longest path prefix (taken character-by-character) that is a
+   prefix of all paths in  *list*.  If *list* is empty, return the empty string
+   (``''``).  Note that this may return invalid paths because it works a
+   character at a time.  To obtain a valid path, see :func:`commonpath`.
 
 
 .. function:: dirname(path)
diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -370,6 +370,13 @@
 * :class:`os.stat_result` now has a :attr:`~os.stat_result.st_file_attributes`
   attribute on Windows.  (Contributed by Ben Hoyt in :issue:`21719`.)
 
+os.path
+-------
+
+* New :func:`~os.path.commonpath` function that extracts common path prefix.
+  Unlike the :func:`~os.path.commonprefix` function, it always returns a valid
+  patch.  (Contributed by Rafik Draoui and Serhiy Storchaka in :issue:`10395`.)
+
 pickle
 ------
 
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -17,7 +17,7 @@
            "ismount", "expanduser","expandvars","normpath","abspath",
            "splitunc","curdir","pardir","sep","pathsep","defpath","altsep",
            "extsep","devnull","realpath","supports_unicode_filenames","relpath",
-           "samefile", "sameopenfile", "samestat",]
+           "samefile", "sameopenfile", "samestat", "commonpath"]
 
 # strings representing various path-related bits and pieces
 # These are primarily for export; internally, they are hardcoded.
@@ -589,6 +589,67 @@
         raise
 
 
+# Return the longest common sub-path of the sequence of paths given as input.
+# The function is case-insensitive and 'separator-insensitive', i.e. if the
+# only difference between two paths is the use of '\' versus '/' as separator,
+# they are deemed to be equal.
+#
+# However, the returned path will have the standard '\' separator (even if the
+# given paths had the alternative '/' separator) and will have the case of the
+# first path given in the sequence. Additionally, any trailing separator is
+# stripped from the returned path.
+
+def commonpath(paths):
+    """Given a sequence of path names, returns the longest common sub-path."""
+
+    if not paths:
+        raise ValueError('commonpath() arg is an empty sequence')
+
+    if isinstance(paths[0], bytes):
+        sep = b'\\'
+        altsep = b'/'
+        curdir = b'.'
+    else:
+        sep = '\\'
+        altsep = '/'
+        curdir = '.'
+
+    try:
+        drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths]
+        split_paths = [p.split(sep) for d, p in drivesplits]
+
+        try:
+            isabs, = set(p[:1] == sep for d, p in drivesplits)
+        except ValueError:
+            raise ValueError("Can't mix absolute and relative paths") from None
+
+        # Check that all drive letters or UNC paths match. The check is made only
+        # now otherwise type errors for mixing strings and bytes would not be
+        # caught.
+        if len(set(d for d, p in drivesplits)) != 1:
+            raise ValueError("Paths don't have the same drive")
+
+        drive, path = splitdrive(paths[0].replace(altsep, sep))
+        common = path.split(sep)
+        common = [c for c in common if c and c != curdir]
+
+        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
+        s1 = min(split_paths)
+        s2 = max(split_paths)
+        for i, c in enumerate(s1):
+            if c != s2[i]:
+                common = common[:i]
+                break
+        else:
+            common = common[:len(s1)]
+
+        prefix = drive + sep if isabs else drive
+        return prefix + sep.join(common)
+    except (TypeError, AttributeError):
+        genericpath._check_arg_types('commonpath', *paths)
+        raise
+
+
 # determine if two files are in fact the same file
 try:
     # GetFinalPathNameByHandle is available starting with Windows 6.0.
diff --git a/Lib/posixpath.py b/Lib/posixpath.py
--- a/Lib/posixpath.py
+++ b/Lib/posixpath.py
@@ -22,7 +22,8 @@
            "ismount", "expanduser","expandvars","normpath","abspath",
            "samefile","sameopenfile","samestat",
            "curdir","pardir","sep","pathsep","defpath","altsep","extsep",
-           "devnull","realpath","supports_unicode_filenames","relpath"]
+           "devnull","realpath","supports_unicode_filenames","relpath",
+           "commonpath"]
 
 # Strings representing various path-related bits and pieces.
 # These are primarily for export; internally, they are hardcoded.
@@ -455,3 +456,45 @@
     except (TypeError, AttributeError, BytesWarning, DeprecationWarning):
         genericpath._check_arg_types('relpath', path, start)
         raise
+
+
+# Return the longest common sub-path of the sequence of paths given as input.
+# The paths are not normalized before comparing them (this is the
+# responsibility of the caller). Any trailing separator is stripped from the
+# returned path.
+
+def commonpath(paths):
+    """Given a sequence of path names, returns the longest common sub-path."""
+
+    if not paths:
+        raise ValueError('commonpath() arg is an empty sequence')
+
+    if isinstance(paths[0], bytes):
+        sep = b'/'
+        curdir = b'.'
+    else:
+        sep = '/'
+        curdir = '.'
+
+    try:
+        split_paths = [path.split(sep) for path in paths]
+
+        try:
+            isabs, = set(p[:1] == sep for p in paths)
+        except ValueError:
+            raise ValueError("Can't mix absolute and relative paths") from None
+
+        split_paths = [[c for c in s if c and c != curdir] for s in split_paths]
+        s1 = min(split_paths)
+        s2 = max(split_paths)
+        common = s1
+        for i, c in enumerate(s1):
+            if c != s2[i]:
+                common = s1[:i]
+                break
+
+        prefix = sep if isabs else sep[:0]
+        return prefix + sep.join(common)
+    except (TypeError, AttributeError):
+        genericpath._check_arg_types('commonpath', *paths)
+        raise
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -330,6 +330,75 @@
         tester('ntpath.relpath("/a/b", "/a/b")', '.')
         tester('ntpath.relpath("c:/foo", "C:/FOO")', '.')
 
+    def test_commonpath(self):
+        def check(paths, expected):
+            tester(('ntpath.commonpath(%r)' % paths).replace('\\\\', '\\'),
+                   expected)
+        def check_error(exc, paths):
+            self.assertRaises(exc, ntpath.commonpath, paths)
+            self.assertRaises(exc, ntpath.commonpath,
+                              [os.fsencode(p) for p in paths])
+
+        self.assertRaises(ValueError, ntpath.commonpath, [])
+        check_error(ValueError, ['C:\\Program Files', 'Program Files'])
+        check_error(ValueError, ['C:\\Program Files', 'C:Program Files'])
+        check_error(ValueError, ['\\Program Files', 'Program Files'])
+        check_error(ValueError, ['Program Files', 'C:\\Program Files'])
+        check(['C:\\Program Files'], 'C:\\Program Files')
+        check(['C:\\Program Files', 'C:\\Program Files'], 'C:\\Program Files')
+        check(['C:\\Program Files\\', 'C:\\Program Files'],
+              'C:\\Program Files')
+        check(['C:\\Program Files\\', 'C:\\Program Files\\'],
+              'C:\\Program Files')
+        check(['C:\\\\Program Files', 'C:\\Program Files\\\\'],
+              'C:\\Program Files')
+        check(['C:\\.\\Program Files', 'C:\\Program Files\\.'],
+              'C:\\Program Files')
+        check(['C:\\', 'C:\\bin'], 'C:\\')
+        check(['C:\\Program Files', 'C:\\bin'], 'C:\\')
+        check(['C:\\Program Files', 'C:\\Program Files\\Bar'],
+              'C:\\Program Files')
+        check(['C:\\Program Files\\Foo', 'C:\\Program Files\\Bar'],
+              'C:\\Program Files')
+        check(['C:\\Program Files', 'C:\\Projects'], 'C:\\')
+        check(['C:\\Program Files\\', 'C:\\Projects'], 'C:\\')
+
+        check(['C:\\Program Files\\Foo', 'C:/Program Files/Bar'],
+              'C:\\Program Files')
+        check(['C:\\Program Files\\Foo', 'c:/program files/bar'],
+              'C:\\Program Files')
+        check(['c:/program files/bar', 'C:\\Program Files\\Foo'],
+              'c:\\program files')
+
+        check_error(ValueError, ['C:\\Program Files', 'D:\\Program Files'])
+
+        check(['spam'], 'spam')
+        check(['spam', 'spam'], 'spam')
+        check(['spam', 'alot'], '')
+        check(['and\\jam', 'and\\spam'], 'and')
+        check(['and\\\\jam', 'and\\spam\\\\'], 'and')
+        check(['and\\.\\jam', '.\\and\\spam'], 'and')
+        check(['and\\jam', 'and\\spam', 'alot'], '')
+        check(['and\\jam', 'and\\spam', 'and'], 'and')
+        check(['C:and\\jam', 'C:and\\spam'], 'C:and')
+
+        check([''], '')
+        check(['', 'spam\\alot'], '')
+        check_error(ValueError, ['', '\\spam\\alot'])
+
+        self.assertRaises(TypeError, ntpath.commonpath,
+                          [b'C:\\Program Files', 'C:\\Program Files\\Foo'])
+        self.assertRaises(TypeError, ntpath.commonpath,
+                          [b'C:\\Program Files', 'Program Files\\Foo'])
+        self.assertRaises(TypeError, ntpath.commonpath,
+                          [b'Program Files', 'C:\\Program Files\\Foo'])
+        self.assertRaises(TypeError, ntpath.commonpath,
+                          ['C:\\Program Files', b'C:\\Program Files\\Foo'])
+        self.assertRaises(TypeError, ntpath.commonpath,
+                          ['C:\\Program Files', b'Program Files\\Foo'])
+        self.assertRaises(TypeError, ntpath.commonpath,
+                          ['Program Files', b'C:\\Program Files\\Foo'])
+
     def test_sameopenfile(self):
         with TemporaryFile() as tf1, TemporaryFile() as tf2:
             # Make sure the same file is really the same
diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py
--- a/Lib/test/test_posixpath.py
+++ b/Lib/test/test_posixpath.py
@@ -522,6 +522,60 @@
         finally:
             os.getcwdb = real_getcwdb
 
+    def test_commonpath(self):
+        def check(paths, expected):
+            self.assertEqual(posixpath.commonpath(paths), expected)
+            self.assertEqual(posixpath.commonpath([os.fsencode(p) for p in paths]),
+                             os.fsencode(expected))
+        def check_error(exc, paths):
+            self.assertRaises(exc, posixpath.commonpath, paths)
+            self.assertRaises(exc, posixpath.commonpath,
+                              [os.fsencode(p) for p in paths])
+
+        self.assertRaises(ValueError, posixpath.commonpath, [])
+        check_error(ValueError, ['/usr', 'usr'])
+        check_error(ValueError, ['usr', '/usr'])
+
+        check(['/usr/local'], '/usr/local')
+        check(['/usr/local', '/usr/local'], '/usr/local')
+        check(['/usr/local/', '/usr/local'], '/usr/local')
+        check(['/usr/local/', '/usr/local/'], '/usr/local')
+        check(['/usr//local', '//usr/local'], '/usr/local')
+        check(['/usr/./local', '/./usr/local'], '/usr/local')
+        check(['/', '/dev'], '/')
+        check(['/usr', '/dev'], '/')
+        check(['/usr/lib/', '/usr/lib/python3'], '/usr/lib')
+        check(['/usr/lib/', '/usr/lib64/'], '/usr')
+
+        check(['/usr/lib', '/usr/lib64'], '/usr')
+        check(['/usr/lib/', '/usr/lib64'], '/usr')
+
+        check(['spam'], 'spam')
+        check(['spam', 'spam'], 'spam')
+        check(['spam', 'alot'], '')
+        check(['and/jam', 'and/spam'], 'and')
+        check(['and//jam', 'and/spam//'], 'and')
+        check(['and/./jam', './and/spam'], 'and')
+        check(['and/jam', 'and/spam', 'alot'], '')
+        check(['and/jam', 'and/spam', 'and'], 'and')
+
+        check([''], '')
+        check(['', 'spam/alot'], '')
+        check_error(ValueError, ['', '/spam/alot'])
+
+        self.assertRaises(TypeError, posixpath.commonpath,
+                          [b'/usr/lib/', '/usr/lib/python3'])
+        self.assertRaises(TypeError, posixpath.commonpath,
+                          [b'/usr/lib/', 'usr/lib/python3'])
+        self.assertRaises(TypeError, posixpath.commonpath,
+                          [b'usr/lib/', '/usr/lib/python3'])
+        self.assertRaises(TypeError, posixpath.commonpath,
+                          ['/usr/lib/', b'/usr/lib/python3'])
+        self.assertRaises(TypeError, posixpath.commonpath,
+                          ['/usr/lib/', b'usr/lib/python3'])
+        self.assertRaises(TypeError, posixpath.commonpath,
+                          ['usr/lib/', b'/usr/lib/python3'])
+
 
 class PosixCommonTest(test_genericpath.CommonTest, unittest.TestCase):
     pathmodule = posixpath
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,9 @@
 Library
 -------
 
+- Issue #10395: Added os.path.commonpath(). Implemented in posixpath and ntpath.
+  Based on patch by Rafik Draoui.
+
 - Issue #23611: Serializing more "lookupable" objects (such as unbound methods
   or nested classes) now are supported with pickle protocols < 4.
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list