[Python-checkins] bpo-28564: Use os.scandir() in shutil.rmtree(). (#4085)

Serhiy Storchaka webhook-mailer at python.org
Sat Nov 4 08:16:39 EDT 2017


https://github.com/python/cpython/commit/d4d79bc1ff91b04625c312f0219c89aabcd19ce4
commit: d4d79bc1ff91b04625c312f0219c89aabcd19ce4
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-11-04T14:16:35+02:00
summary:

bpo-28564: Use os.scandir() in shutil.rmtree(). (#4085)

This speeds up it to 20-40%.

files:
A Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst
M Doc/whatsnew/3.7.rst
M Lib/shutil.py
M Lib/test/test_shutil.py

diff --git a/Doc/whatsnew/3.7.rst b/Doc/whatsnew/3.7.rst
index eb64c6a2b64..d1792dc97ef 100644
--- a/Doc/whatsnew/3.7.rst
+++ b/Doc/whatsnew/3.7.rst
@@ -440,6 +440,10 @@ Optimizations
   using the :func:`os.scandir` function.
   (Contributed by Serhiy Storchaka in :issue:`25996`.)
 
+* The :func:`shutil.rmtree` function has been sped up to 20--40%.
+  This was done using the :func:`os.scandir` function.
+  (Contributed by Serhiy Storchaka in :issue:`28564`.)
+
 * Optimized case-insensitive matching and searching of :mod:`regular
   expressions <re>`.  Searching some patterns can now be up to 20 times faster.
   (Contributed by Serhiy Storchaka in :issue:`30285`.)
@@ -656,6 +660,11 @@ Changes in the Python API
 * ``repr`` for :class:`datetime.timedelta` has changed to include keyword arguments
   in the output. (Contributed by Utkarsh Upadhyay in :issue:`30302`.)
 
+* Because :func:`shutil.rmtree` is now implemented using the :func:`os.scandir`
+  function, the user specified handler *onerror* is now called with the first
+  argument ``os.scandir`` instead of ``os.listdir`` when listing the direcory
+  is failed.
+
 
 Changes in the C API
 --------------------
diff --git a/Lib/shutil.py b/Lib/shutil.py
index 464ee912f5c..3c02776a406 100644
--- a/Lib/shutil.py
+++ b/Lib/shutil.py
@@ -362,25 +362,27 @@ def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2,
 # version vulnerable to race conditions
 def _rmtree_unsafe(path, onerror):
     try:
-        if os.path.islink(path):
-            # symlinks to directories are forbidden, see bug #1669
-            raise OSError("Cannot call rmtree on a symbolic link")
+        with os.scandir(path) as scandir_it:
+            entries = list(scandir_it)
     except OSError:
-        onerror(os.path.islink, path, sys.exc_info())
-        # can't continue even if onerror hook returns
-        return
-    names = []
-    try:
-        names = os.listdir(path)
-    except OSError:
-        onerror(os.listdir, path, sys.exc_info())
-    for name in names:
-        fullname = os.path.join(path, name)
+        onerror(os.scandir, path, sys.exc_info())
+        entries = []
+    for entry in entries:
+        fullname = entry.path
         try:
-            mode = os.lstat(fullname).st_mode
+            is_dir = entry.is_dir(follow_symlinks=False)
         except OSError:
-            mode = 0
-        if stat.S_ISDIR(mode):
+            is_dir = False
+        if is_dir:
+            try:
+                if entry.is_symlink():
+                    # This can only happen if someone replaces
+                    # a directory with a symlink after the call to
+                    # os.scandir or entry.is_dir above.
+                    raise OSError("Cannot call rmtree on a symbolic link")
+            except OSError:
+                onerror(os.path.islink, fullname, sys.exc_info())
+                continue
             _rmtree_unsafe(fullname, onerror)
         else:
             try:
@@ -394,22 +396,25 @@ def _rmtree_unsafe(path, onerror):
 
 # Version using fd-based APIs to protect against races
 def _rmtree_safe_fd(topfd, path, onerror):
-    names = []
     try:
-        names = os.listdir(topfd)
+        with os.scandir(topfd) as scandir_it:
+            entries = list(scandir_it)
     except OSError as err:
         err.filename = path
-        onerror(os.listdir, path, sys.exc_info())
-    for name in names:
-        fullname = os.path.join(path, name)
+        onerror(os.scandir, path, sys.exc_info())
+        return
+    for entry in entries:
+        fullname = os.path.join(path, entry.name)
         try:
-            orig_st = os.stat(name, dir_fd=topfd, follow_symlinks=False)
-            mode = orig_st.st_mode
+            is_dir = entry.is_dir(follow_symlinks=False)
+            if is_dir:
+                orig_st = entry.stat(follow_symlinks=False)
+                is_dir = stat.S_ISDIR(orig_st.st_mode)
         except OSError:
-            mode = 0
-        if stat.S_ISDIR(mode):
+            is_dir = False
+        if is_dir:
             try:
-                dirfd = os.open(name, os.O_RDONLY, dir_fd=topfd)
+                dirfd = os.open(entry.name, os.O_RDONLY, dir_fd=topfd)
             except OSError:
                 onerror(os.open, fullname, sys.exc_info())
             else:
@@ -417,14 +422,14 @@ def _rmtree_safe_fd(topfd, path, onerror):
                     if os.path.samestat(orig_st, os.fstat(dirfd)):
                         _rmtree_safe_fd(dirfd, fullname, onerror)
                         try:
-                            os.rmdir(name, dir_fd=topfd)
+                            os.rmdir(entry.name, dir_fd=topfd)
                         except OSError:
                             onerror(os.rmdir, fullname, sys.exc_info())
                     else:
                         try:
                             # This can only happen if someone replaces
                             # a directory with a symlink after the call to
-                            # stat.S_ISDIR above.
+                            # os.scandir or stat.S_ISDIR above.
                             raise OSError("Cannot call rmtree on a symbolic "
                                           "link")
                         except OSError:
@@ -433,13 +438,13 @@ def _rmtree_safe_fd(topfd, path, onerror):
                     os.close(dirfd)
         else:
             try:
-                os.unlink(name, dir_fd=topfd)
+                os.unlink(entry.name, dir_fd=topfd)
             except OSError:
                 onerror(os.unlink, fullname, sys.exc_info())
 
 _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <=
                      os.supports_dir_fd and
-                     os.listdir in os.supports_fd and
+                     os.scandir in os.supports_fd and
                      os.stat in os.supports_follow_symlinks)
 
 def rmtree(path, ignore_errors=False, onerror=None):
@@ -491,6 +496,14 @@ def onerror(*args):
         finally:
             os.close(fd)
     else:
+        try:
+            if os.path.islink(path):
+                # symlinks to directories are forbidden, see bug #1669
+                raise OSError("Cannot call rmtree on a symbolic link")
+        except OSError:
+            onerror(os.path.islink, path, sys.exc_info())
+            # can't continue even if onerror hook returns
+            return
         return _rmtree_unsafe(path, onerror)
 
 # Allow introspection of whether or not the hardening against symlink
diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py
index 3b4891ddcaf..4a72b4a0764 100644
--- a/Lib/test/test_shutil.py
+++ b/Lib/test/test_shutil.py
@@ -183,7 +183,7 @@ def onerror(*args):
             errors.append(args)
         shutil.rmtree(filename, onerror=onerror)
         self.assertEqual(len(errors), 2)
-        self.assertIs(errors[0][0], os.listdir)
+        self.assertIs(errors[0][0], os.scandir)
         self.assertEqual(errors[0][1], filename)
         self.assertIsInstance(errors[0][2][1], NotADirectoryError)
         self.assertIn(errors[0][2][1].filename, possible_args)
diff --git a/Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst b/Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst
new file mode 100644
index 00000000000..0889119db35
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-10-23-16-22-54.bpo-28564.Tx-l-I.rst
@@ -0,0 +1,2 @@
+The shutil.rmtree() function has been sped up to 20--40%. This was done
+using the os.scandir() function.



More information about the Python-checkins mailing list