[pypy-commit] pypy default: Add os.path.normpath(), and make it called automatically from _posix_abspath().

arigo noreply at buildbot.pypy.org
Sun Aug 31 13:59:07 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r73225:5a0be08136b5
Date: 2014-08-31 13:58 +0200
http://bitbucket.org/pypy/pypy/changeset/5a0be08136b5/

Log:	Add os.path.normpath(), and make it called automatically from
	_posix_abspath().

diff --git a/rpython/flowspace/specialcase.py b/rpython/flowspace/specialcase.py
--- a/rpython/flowspace/specialcase.py
+++ b/rpython/flowspace/specialcase.py
@@ -66,6 +66,7 @@
 
 redirect_function(os.path.isdir,   'rpython.rlib.rpath.risdir')
 redirect_function(os.path.isabs,   'rpython.rlib.rpath.risabs')
+redirect_function(os.path.normpath,'rpython.rlib.rpath.rnormpath')
 redirect_function(os.path.abspath, 'rpython.rlib.rpath.rabspath')
 redirect_function(os.path.join,    'rpython.rlib.rpath.rjoin')
 if hasattr(os.path, 'splitdrive'):
diff --git a/rpython/rlib/rpath.py b/rpython/rlib/rpath.py
--- a/rpython/rlib/rpath.py
+++ b/rpython/rlib/rpath.py
@@ -29,6 +29,33 @@
     """Test whether a path is absolute"""
     return s.startswith('/')
 
+def _posix_rnormpath(path):
+    """Normalize path, eliminating double slashes, etc."""
+    slash, dot = '/', '.'
+    if path == '':
+        return dot
+    initial_slashes = path.startswith('/')
+    # POSIX allows one or two initial slashes, but treats three or more
+    # as single slash.
+    if (initial_slashes and
+        path.startswith('//') and not path.startswith('///')):
+        initial_slashes = 2
+    comps = path.split('/')
+    new_comps = []
+    for comp in comps:
+        if comp == '' or comp == '.':
+            continue
+        if (comp != '..' or (not initial_slashes and not new_comps) or
+             (new_comps and new_comps[-1] == '..')):
+            new_comps.append(comp)
+        elif new_comps:
+            new_comps.pop()
+    comps = new_comps
+    path = slash.join(comps)
+    if initial_slashes:
+        path = slash*initial_slashes + path
+    return path or dot
+
 def _posix_rabspath(path):
     """Return an absolute, **non-normalized** path.
       **This version does not let exceptions propagate.**"""
@@ -36,7 +63,7 @@
         if not _posix_risabs(path):
             cwd = os.getcwd()
             path = _posix_rjoin(cwd, path)
-        return path
+        return _posix_rnormpath(path)
     except OSError:
         return path
 
@@ -65,6 +92,56 @@
     s = _nt_rsplitdrive(s)[1]
     return s.startswith('/') or s.startswith('\\')
 
+def _nt_rnormpath(path):
+    """Normalize path, eliminating double slashes, etc."""
+    backslash, dot = '\\', '.'
+    if path.startswith(('\\\\.\\', '\\\\?\\')):
+        # in the case of paths with these prefixes:
+        # \\.\ -> device names
+        # \\?\ -> literal paths
+        # do not do any normalization, but return the path unchanged
+        return path
+    path = path.replace("/", "\\")
+    prefix, path = _nt_rsplitdrive(path)
+    # We need to be careful here. If the prefix is empty, and the path starts
+    # with a backslash, it could either be an absolute path on the current
+    # drive (\dir1\dir2\file) or a UNC filename (\\server\mount\dir1\file). It
+    # is therefore imperative NOT to collapse multiple backslashes blindly in
+    # that case.
+    # The code below preserves multiple backslashes when there is no drive
+    # letter. This means that the invalid filename \\\a\b is preserved
+    # unchanged, where a\\\b is normalised to a\b. It's not clear that there
+    # is any better behaviour for such edge cases.
+    if prefix == '':
+        # No drive letter - preserve initial backslashes
+        while path.startswith("\\"):
+            prefix = prefix + backslash
+            path = path[1:]
+    else:
+        # We have a drive letter - collapse initial backslashes
+        if path.startswith("\\"):
+            prefix = prefix + backslash
+            path = path.lstrip("\\")
+    comps = path.split("\\")
+    i = 0
+    while i < len(comps):
+        if comps[i] in ('.', ''):
+            del comps[i]
+        elif comps[i] == '..':
+            if i > 0 and comps[i-1] != '..':
+                del comps[i-1:i+1]
+                i -= 1
+            elif i == 0 and prefix.endswith("\\"):
+                del comps[i]
+            else:
+                i += 1
+        else:
+            i += 1
+    # If the path is now empty, substitute '.'
+    if not prefix and not comps:
+        comps.append(dot)
+    return prefix + backslash.join(comps)
+
 def _nt_rabspath(path):
     try:
         if path == '':
@@ -138,11 +215,13 @@
 if os.name == 'posix':
     sep = altsep = '/'
     risabs      = _posix_risabs
+    rnormpath   = _posix_rnormpath
     rabspath    = _posix_rabspath
     rjoin       = _posix_rjoin
 elif os.name == 'nt':
     sep, altsep = '\\', '/'
     risabs      = _nt_risabs
+    rnormpath   = _nt_rnormpath
     rabspath    = _nt_rabspath
     rsplitdrive = _nt_rsplitdrive
     rjoin       = _nt_rjoin
diff --git a/rpython/rlib/test/test_rpath.py b/rpython/rlib/test/test_rpath.py
--- a/rpython/rlib/test/test_rpath.py
+++ b/rpython/rlib/test/test_rpath.py
@@ -2,12 +2,48 @@
 import os
 from rpython.rlib import rpath
 
+def test_rnormpath_posix():
+    assert rpath._posix_rnormpath('///foo') == '/foo'
+    assert rpath._posix_rnormpath("") == "."
+    assert rpath._posix_rnormpath("/") == "/"
+    assert rpath._posix_rnormpath("//") == "//"
+    assert rpath._posix_rnormpath("///") == "/"
+    assert rpath._posix_rnormpath("///foo/.//bar//") == "/foo/bar"
+    assert rpath._posix_rnormpath("///foo/.//bar//.//..//.//baz") == "/foo/baz"
+    assert rpath._posix_rnormpath("///..//./foo/.//bar") == "/foo/bar"
+
+def test_rnormpath_nt():
+    assert rpath._nt_rnormpath('A//////././//.//B') == r'A\B'
+    assert rpath._nt_rnormpath('A/./B') == r'A\B'
+    assert rpath._nt_rnormpath('A/foo/../B') == r'A\B'
+    assert rpath._nt_rnormpath('C:A//B') == r'C:A\B'
+    assert rpath._nt_rnormpath('D:A/./B') == r'D:A\B'
+    assert rpath._nt_rnormpath('e:A/foo/../B') == r'e:A\B'
+    assert rpath._nt_rnormpath('C:///A//B') == r'C:\A\B'
+    assert rpath._nt_rnormpath('D:///A/./B') == r'D:\A\B'
+    assert rpath._nt_rnormpath('e:///A/foo/../B') == r'e:\A\B'
+    assert rpath._nt_rnormpath('..') == r'..'
+    assert rpath._nt_rnormpath('.') == r'.'
+    assert rpath._nt_rnormpath('') == r'.'
+    assert rpath._nt_rnormpath('/') == '\\'
+    assert rpath._nt_rnormpath('c:/') == 'c:\\'
+    assert rpath._nt_rnormpath('/../.././..') == '\\'
+    assert rpath._nt_rnormpath('c:/../../..') == 'c:\\'
+    assert rpath._nt_rnormpath('../.././..') == r'..\..\..'
+    assert rpath._nt_rnormpath('K:../.././..') == r'K:..\..\..'
+    assert rpath._nt_rnormpath('C:////a/b') == r'C:\a\b'
+    assert rpath._nt_rnormpath('//machine/share//a/b') == r'\\machine\share\a\b'
+    assert rpath._nt_rnormpath('\\\\.\\NUL') == r'\\.\NUL'
+    assert rpath._nt_rnormpath('\\\\?\\D:/XY\\Z') == r'\\?\D:/XY\Z'
+
 def test_rabspath_relative(tmpdir):
     tmpdir.chdir()
     assert rpath.rabspath('foo') == os.path.realpath(str(tmpdir.join('foo')))
 
 def test_rabspath_absolute_posix():
     assert rpath._posix_rabspath('/foo') == '/foo'
+    assert rpath._posix_rabspath('/foo/bar/..') == '/foo'
+    assert rpath._posix_rabspath('/foo/bar/../x') == '/foo/x'
 
 @py.test.mark.skipif("os.name == 'nt'")
 def test_missing_current_dir(tmpdir):
@@ -28,6 +64,9 @@
 
 @py.test.mark.skipif("os.name != 'nt'")
 def test_rabspath_absolute_nt():
+    assert rpath._nt_rabspath('d:\\foo') == 'd:\\foo'
+    assert rpath._nt_rabspath('d:\\foo\\bar\\..') == 'd:\\foo'
+    assert rpath._nt_rabspath('d:\\foo\\bar\\..\\x') == 'd:\\foo\\x'
     curdrive = _ = rpath._nt_rsplitdrive(os.getcwd())
     assert len(curdrive) == 2 and curdrive[1] == ':'
     assert rpath.rabspath('\\foo') == '%s\\foo' % curdrive
@@ -46,6 +85,7 @@
     assert not rpath._nt_risabs('C:FOO')
 
 def test_risdir(tmpdir):
+    tmpdir = str(tmpdir)
     assert rpath.risdir(tmpdir)
     assert not rpath.risdir('_some_non_existant_file_')
     assert not rpath.risdir(os.path.join(tmpdir, '_some_non_existant_file_'))


More information about the pypy-commit mailing list