[Python-checkins] gh-81790: support "UNC" device paths in `ntpath.splitdrive()` (GH-91882)
zooba
webhook-mailer at python.org
Fri Jun 10 12:00:00 EDT 2022
https://github.com/python/cpython/commit/2ba0fd5767577954f331ecbd53596cd8035d7186
commit: 2ba0fd5767577954f331ecbd53596cd8035d7186
branch: main
author: Barney Gale <barney.gale at gmail.com>
committer: zooba <steve.dower at microsoft.com>
date: 2022-06-10T16:59:55+01:00
summary:
gh-81790: support "UNC" device paths in `ntpath.splitdrive()` (GH-91882)
files:
A Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst
M Doc/library/os.path.rst
M Lib/ntpath.py
M Lib/pathlib.py
M Lib/test/test_ntpath.py
diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst
index ce7913e3712d7..85989ef32d491 100644
--- a/Doc/library/os.path.rst
+++ b/Doc/library/os.path.rst
@@ -469,7 +469,7 @@ the :mod:`glob` module.)
("c:", "/dir")
If the path contains a UNC path, drive will contain the host name
- and share, up to but not including the fourth separator::
+ and share::
>>> splitdrive("//host/computer/dir")
("//host/computer", "/dir")
diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index 73b1bd12ddca7..959bcd0983118 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -172,17 +172,23 @@ def splitdrive(p):
sep = b'\\'
altsep = b'/'
colon = b':'
+ unc_prefix = b'\\\\?\\UNC'
else:
sep = '\\'
altsep = '/'
colon = ':'
+ unc_prefix = '\\\\?\\UNC'
normp = p.replace(altsep, sep)
if (normp[0:2] == sep*2) and (normp[2:3] != sep):
# is a UNC path:
# vvvvvvvvvvvvvvvvvvvv drive letter or UNC path
# \\machine\mountpoint\directory\etc\...
# directory ^^^^^^^^^^^^^^^
- index = normp.find(sep, 2)
+ if normp[:8].upper().rstrip(sep) == unc_prefix:
+ start = 8
+ else:
+ start = 2
+ index = normp.find(sep, start)
if index == -1:
return p[:0], p
index2 = normp.find(sep, index + 1)
diff --git a/Lib/pathlib.py b/Lib/pathlib.py
index 26da4eda92719..bb440c9d57216 100644
--- a/Lib/pathlib.py
+++ b/Lib/pathlib.py
@@ -120,68 +120,18 @@ class _WindowsFlavour(_Flavour):
is_supported = (os.name == 'nt')
- drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
- ext_namespace_prefix = '\\\\?\\'
-
reserved_names = (
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{'COM%s' % c for c in '123456789\xb9\xb2\xb3'} |
{'LPT%s' % c for c in '123456789\xb9\xb2\xb3'}
)
- # Interesting findings about extended paths:
- # * '\\?\c:\a' is an extended path, which bypasses normal Windows API
- # path processing. Thus relative paths are not resolved and slash is not
- # translated to backslash. It has the native NT path limit of 32767
- # characters, but a bit less after resolving device symbolic links,
- # such as '\??\C:' => '\Device\HarddiskVolume2'.
- # * '\\?\c:/a' looks for a device named 'C:/a' because slash is a
- # regular name character in the object namespace.
- # * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems.
- # The only path separator at the filesystem level is backslash.
- # * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and
- # thus limited to MAX_PATH.
- # * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH,
- # even with the '\\?\' prefix.
-
def splitroot(self, part, sep=sep):
- first = part[0:1]
- second = part[1:2]
- if (second == sep and first == sep):
- # XXX extended paths should also disable the collapsing of "."
- # components (according to MSDN docs).
- prefix, part = self._split_extended_path(part)
- first = part[0:1]
- second = part[1:2]
+ drv, rest = self.pathmod.splitdrive(part)
+ if drv[:1] == sep or rest[:1] == sep:
+ return drv, sep, rest.lstrip(sep)
else:
- prefix = ''
- third = part[2:3]
- if (second == sep and first == sep and third != sep):
- # is a UNC path:
- # vvvvvvvvvvvvvvvvvvvvv root
- # \\machine\mountpoint\directory\etc\...
- # directory ^^^^^^^^^^^^^^
- index = part.find(sep, 2)
- if index != -1:
- index2 = part.find(sep, index + 1)
- # a UNC path can't have two slashes in a row
- # (after the initial two)
- if index2 != index + 1:
- if index2 == -1:
- index2 = len(part)
- if prefix:
- return prefix + part[1:index2], sep, part[index2+1:]
- else:
- return part[:index2], sep, part[index2+1:]
- drv = root = ''
- if second == ':' and first in self.drive_letters:
- drv = part[:2]
- part = part[2:]
- first = third
- if first == sep:
- root = first
- part = part.lstrip(sep)
- return prefix + drv, root, part
+ return drv, '', rest
def casefold(self, s):
return s.lower()
@@ -192,16 +142,6 @@ def casefold_parts(self, parts):
def compile_pattern(self, pattern):
return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch
- def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix):
- prefix = ''
- if s.startswith(ext_prefix):
- prefix = s[:4]
- s = s[4:]
- if s.startswith('UNC\\'):
- prefix += s[:3]
- s = '\\' + s[3:]
- return prefix, s
-
def is_reserved(self, parts):
# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index ab3603bdd7301..d51946322c805 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -117,6 +117,31 @@ def test_splitdrive(self):
# Issue #19911: UNC part containing U+0130
self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'),
('//conky/MOUNTPOİNT', '/foo/bar'))
+ # gh-81790: support device namespace, including UNC drives.
+ tester('ntpath.splitdrive("//?/c:")', ("//?/c:", ""))
+ tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/"))
+ tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir"))
+ tester('ntpath.splitdrive("//?/UNC")', ("", "//?/UNC"))
+ tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/"))
+ tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", ""))
+ tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", ""))
+ tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir"))
+ tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")',
+ ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam'))
+ tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/"))
+
+ tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", ""))
+ tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\"))
+ tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir"))
+ tester('ntpath.splitdrive("\\\\?\\UNC")', ("", "\\\\?\\UNC"))
+ tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\"))
+ tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", ""))
+ tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", ""))
+ tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")',
+ ("\\\\?\\UNC\\server\\share", "\\dir"))
+ tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")',
+ ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam'))
+ tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\"))
def test_split(self):
tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar'))
diff --git a/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst
new file mode 100644
index 0000000000000..8894493e97410
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst
@@ -0,0 +1,2 @@
+:func:`os.path.splitdrive` now understands DOS device paths with UNC
+links (beginning ``\\?\UNC\``). Contributed by Barney Gale.
More information about the Python-checkins
mailing list