[Python-checkins] bpo-42398: Fix "make regen-all" race condition (GH-23362) (GH-23367)

vstinner webhook-mailer at python.org
Wed Nov 18 11:11:18 EST 2020


https://github.com/python/cpython/commit/c53c3f400050a7edc92ccb7285a6d7eeb4c37fd2
commit: c53c3f400050a7edc92ccb7285a6d7eeb4c37fd2
branch: 3.9
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2020-11-18T17:11:09+01:00
summary:

bpo-42398: Fix "make regen-all" race condition (GH-23362) (GH-23367)

Fix a race condition in "make regen-all" when make -jN option is used
to run jobs in parallel. The clinic.py script now only use atomic
write to write files. Moveover, generated files are now left
unchanged if the content does not change, to not change the file
modification time.

The "make regen-all" command runs "make clinic" and "make
regen-importlib" targets:

* "make regen-importlib" builds object files (ex: Modules/_weakref.o)
  from source files (ex: Modules/_weakref.c) and clinic files (ex:
  Modules/clinic/_weakref.c.h)
* "make clinic" always rewrites all clinic files
  (ex: Modules/clinic/_weakref.c.h)

Since there is no dependency between "clinic" and "regen-importlib"
Makefile targets, these two targets can be run in parallel. Moreover,
half of clinic.py file writes are not atomic and so there is a race
condition when "make regen-all" runs jobs in parallel using make -jN
option (which can be passed in MAKEFLAGS environment variable).

Fix clinic.py to make all file writes atomic:

* Add write_file() function to ensure that all file writes are
  atomic: write into a temporary file and then use os.replace().
* Moreover, write_file() doesn't recreate or modify the file if the
  content does not change to avoid modifying the file modification
  file.
* Update test_clinic to verify these assertions with a functional
  test.
* Remove Clinic.force attribute which was no longer used, whereas
  Clinic.verify remains useful.

(cherry picked from commit 8fba9523cf08029dc2e280d9f48fdd57ab178c9d)

files:
A Misc/NEWS.d/next/Build/2020-11-18-11-58-44.bpo-42398.Yt5wO8.rst
M Lib/test/test_clinic.py
M Tools/clinic/clinic.py

diff --git a/Lib/test/test_clinic.py b/Lib/test/test_clinic.py
index 3d5dc4759d501..9989b148afbd7 100644
--- a/Lib/test/test_clinic.py
+++ b/Lib/test/test_clinic.py
@@ -794,17 +794,29 @@ class ClinicExternalTest(TestCase):
     maxDiff = None
 
     def test_external(self):
+        # bpo-42398: Test that the destination file is left unchanged if the
+        # content does not change. Moreover, check also that the file
+        # modification time does not change in this case.
         source = support.findfile('clinic.test')
         with open(source, 'r', encoding='utf-8') as f:
-            original = f.read()
-        with support.temp_dir() as testdir:
-            testfile = os.path.join(testdir, 'clinic.test.c')
+            orig_contents = f.read()
+
+        with support.temp_dir() as tmp_dir:
+            testfile = os.path.join(tmp_dir, 'clinic.test.c')
             with open(testfile, 'w', encoding='utf-8') as f:
-                f.write(original)
-            clinic.parse_file(testfile, force=True)
+                f.write(orig_contents)
+            old_mtime_ns = os.stat(testfile).st_mtime_ns
+
+            clinic.parse_file(testfile)
+
             with open(testfile, 'r', encoding='utf-8') as f:
-                result = f.read()
-            self.assertEqual(result, original)
+                new_contents = f.read()
+            new_mtime_ns = os.stat(testfile).st_mtime_ns
+
+        self.assertEqual(new_contents, orig_contents)
+        # Don't change the file modification time
+        # if the content does not change
+        self.assertEqual(new_mtime_ns, old_mtime_ns)
 
 
 if __name__ == "__main__":
diff --git a/Misc/NEWS.d/next/Build/2020-11-18-11-58-44.bpo-42398.Yt5wO8.rst b/Misc/NEWS.d/next/Build/2020-11-18-11-58-44.bpo-42398.Yt5wO8.rst
new file mode 100644
index 0000000000000..9ab99d0e69dd1
--- /dev/null
+++ b/Misc/NEWS.d/next/Build/2020-11-18-11-58-44.bpo-42398.Yt5wO8.rst
@@ -0,0 +1,4 @@
+Fix a race condition in "make regen-all" when make -jN option is used to run
+jobs in parallel. The clinic.py script now only use atomic write to write
+files. Moveover, generated files are now left unchanged if the content does not
+change, to not change the file modification time.
diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py
index b07ffdd928f15..34b58079281b6 100755
--- a/Tools/clinic/clinic.py
+++ b/Tools/clinic/clinic.py
@@ -1777,6 +1777,30 @@ def dump(self):
 # The callable should not call builtins.print.
 return_converters = {}
 
+
+def write_file(filename, new_contents):
+    try:
+        with open(filename, 'r', encoding="utf-8") as fp:
+            old_contents = fp.read()
+
+        if old_contents == new_contents:
+            # no change: avoid modifying the file modification time
+            return
+    except FileNotFoundError:
+        pass
+
+    # Atomic write using a temporary file and os.replace()
+    filename_new = f"{filename}.new"
+    with open(filename_new, "w", encoding="utf-8") as fp:
+        fp.write(new_contents)
+
+    try:
+        os.replace(filename_new, filename)
+    except:
+        os.unlink(filename_new)
+        raise
+
+
 clinic = None
 class Clinic:
 
@@ -1823,7 +1847,7 @@ class Clinic:
 
 """
 
-    def __init__(self, language, printer=None, *, force=False, verify=True, filename=None):
+    def __init__(self, language, printer=None, *, verify=True, filename=None):
         # maps strings to Parser objects.
         # (instantiated from the "parsers" global.)
         self.parsers = {}
@@ -1832,7 +1856,6 @@ def __init__(self, language, printer=None, *, force=False, verify=True, filename
             fail("Custom printers are broken right now")
         self.printer = printer or BlockPrinter(language)
         self.verify = verify
-        self.force = force
         self.filename = filename
         self.modules = collections.OrderedDict()
         self.classes = collections.OrderedDict()
@@ -1965,8 +1988,7 @@ def parse(self, input):
                     block.input = 'preserve\n'
                     printer_2 = BlockPrinter(self.language)
                     printer_2.print_block(block)
-                    with open(destination.filename, "wt") as f:
-                        f.write(printer_2.f.getvalue())
+                    write_file(destination.filename, printer_2.f.getvalue())
                     continue
         text = printer.f.getvalue()
 
@@ -2018,7 +2040,10 @@ def _module_and_class(self, fields):
         return module, cls
 
 
-def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf-8'):
+def parse_file(filename, *, verify=True, output=None):
+    if not output:
+        output = filename
+
     extension = os.path.splitext(filename)[1][1:]
     if not extension:
         fail("Can't extract file type for file " + repr(filename))
@@ -2028,7 +2053,7 @@ def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf
     except KeyError:
         fail("Can't identify file type for file " + repr(filename))
 
-    with open(filename, 'r', encoding=encoding) as f:
+    with open(filename, 'r', encoding="utf-8") as f:
         raw = f.read()
 
     # exit quickly if there are no clinic markers in the file
@@ -2036,19 +2061,10 @@ def parse_file(filename, *, force=False, verify=True, output=None, encoding='utf
     if not find_start_re.search(raw):
         return
 
-    clinic = Clinic(language, force=force, verify=verify, filename=filename)
+    clinic = Clinic(language, verify=verify, filename=filename)
     cooked = clinic.parse(raw)
-    if (cooked == raw) and not force:
-        return
-
-    directory = os.path.dirname(filename) or '.'
 
-    with tempfile.TemporaryDirectory(prefix="clinic", dir=directory) as tmpdir:
-        bytes = cooked.encode(encoding)
-        tmpfilename = os.path.join(tmpdir, os.path.basename(filename))
-        with open(tmpfilename, "wb") as f:
-            f.write(bytes)
-        os.replace(tmpfilename, output or filename)
+    write_file(output, cooked)
 
 
 def compute_checksum(input, length=None):
@@ -5087,7 +5103,7 @@ def main(argv):
                 path = os.path.join(root, filename)
                 if ns.verbose:
                     print(path)
-                parse_file(path, force=ns.force, verify=not ns.force)
+                parse_file(path, verify=not ns.force)
         return
 
     if not ns.filename:
@@ -5103,7 +5119,7 @@ def main(argv):
     for filename in ns.filename:
         if ns.verbose:
             print(filename)
-        parse_file(filename, output=ns.output, force=ns.force, verify=not ns.force)
+        parse_file(filename, output=ns.output, verify=not ns.force)
 
 
 if __name__ == "__main__":



More information about the Python-checkins mailing list