[Python-checkins] bpo-45850: Implement deep-freeze on Windows (#29648)

gvanrossum webhook-mailer at python.org
Mon Nov 22 13:09:57 EST 2021


https://github.com/python/cpython/commit/1037ca5a8ea001bfa2a198e08655620234e9befd
commit: 1037ca5a8ea001bfa2a198e08655620234e9befd
branch: main
author: Guido van Rossum <guido at python.org>
committer: gvanrossum <gvanrossum at gmail.com>
date: 2021-11-22T10:09:48-08:00
summary:

bpo-45850: Implement deep-freeze on Windows (#29648)

Implement changes to build with deep-frozen modules on Windows.
Note that we now require Python 3.10 as the "bootstrap" or "host" Python.
This causes a modest startup speed (around 7%) on Windows.

files:
A Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst
A Tools/scripts/startuptime.py
A Tools/scripts/umarshal.py
M PCbuild/_freeze_module.vcxproj
M PCbuild/find_python.bat
M PCbuild/pythoncore.vcxproj
M Python/frozen.c
M Tools/scripts/deepfreeze.py
M Tools/scripts/freeze_modules.py

diff --git a/Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst b/Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst
new file mode 100644
index 0000000000000..a84e1feb75025
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2021-11-20-00-06-59.bpo-45850.q9lofz.rst
@@ -0,0 +1,2 @@
+Implement changes to build with deep-frozen modules on Windows.
+Note that we now require Python 3.10 as the "bootstrap" or "host" Python.
diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj
index 6a91776b9d8e9..54fef9ca629b1 100644
--- a/PCbuild/_freeze_module.vcxproj
+++ b/PCbuild/_freeze_module.vcxproj
@@ -236,101 +236,141 @@
       <ModName>importlib._bootstrap</ModName>
       <IntFile>$(IntDir)importlib._bootstrap.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\importlib._bootstrap.h</OutFile>
+      <DeepIntFile>$(IntDir)importlib._bootstrap.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.importlib._bootstrap.c</DeepOutFile>
     </None>
     <None Include="..\Lib\importlib\_bootstrap_external.py">
       <ModName>importlib._bootstrap_external</ModName>
       <IntFile>$(IntDir)importlib._bootstrap_external.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\importlib._bootstrap_external.h</OutFile>
+      <DeepIntFile>$(IntDir)importlib._bootstrap_external.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.importlib._bootstrap_external.c</DeepOutFile>
     </None>
     <None Include="..\Lib\zipimport.py">
       <ModName>zipimport</ModName>
       <IntFile>$(IntDir)zipimport.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\zipimport.h</OutFile>
+      <DeepIntFile>$(IntDir)zipimport.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.zipimport.c</DeepOutFile>
     </None>
     <None Include="..\Lib\abc.py">
       <ModName>abc</ModName>
       <IntFile>$(IntDir)abc.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\abc.h</OutFile>
+      <DeepIntFile>$(IntDir)abc.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.abc.c</DeepOutFile>
     </None>
     <None Include="..\Lib\codecs.py">
       <ModName>codecs</ModName>
       <IntFile>$(IntDir)codecs.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\codecs.h</OutFile>
+      <DeepIntFile>$(IntDir)codecs.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.codecs.c</DeepOutFile>
     </None>
     <None Include="..\Lib\io.py">
       <ModName>io</ModName>
       <IntFile>$(IntDir)io.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\io.h</OutFile>
+      <DeepIntFile>$(IntDir)io.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.io.c</DeepOutFile>
     </None>
     <None Include="..\Lib\_collections_abc.py">
       <ModName>_collections_abc</ModName>
       <IntFile>$(IntDir)_collections_abc.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\_collections_abc.h</OutFile>
+      <DeepIntFile>$(IntDir)_collections_abc.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df._collections_abc.c</DeepOutFile>
     </None>
     <None Include="..\Lib\_sitebuiltins.py">
       <ModName>_sitebuiltins</ModName>
       <IntFile>$(IntDir)_sitebuiltins.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\_sitebuiltins.h</OutFile>
+      <DeepIntFile>$(IntDir)_sitebuiltins.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df._sitebuiltins.c</DeepOutFile>
     </None>
     <None Include="..\Lib\genericpath.py">
       <ModName>genericpath</ModName>
       <IntFile>$(IntDir)genericpath.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\genericpath.h</OutFile>
+      <DeepIntFile>$(IntDir)genericpath.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.genericpath.c</DeepOutFile>
     </None>
     <None Include="..\Lib\ntpath.py">
       <ModName>ntpath</ModName>
       <IntFile>$(IntDir)ntpath.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\ntpath.h</OutFile>
+      <DeepIntFile>$(IntDir)ntpath.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.ntpath.c</DeepOutFile>
     </None>
     <None Include="..\Lib\posixpath.py">
       <ModName>posixpath</ModName>
       <IntFile>$(IntDir)posixpath.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\posixpath.h</OutFile>
+      <DeepIntFile>$(IntDir)posixpath.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.posixpath.c</DeepOutFile>
     </None>
     <None Include="..\Lib\os.py">
       <ModName>os</ModName>
       <IntFile>$(IntDir)os.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\os.h</OutFile>
+      <DeepIntFile>$(IntDir)os.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.os.c</DeepOutFile>
     </None>
     <None Include="..\Lib\site.py">
       <ModName>site</ModName>
       <IntFile>$(IntDir)site.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\site.h</OutFile>
+      <DeepIntFile>$(IntDir)site.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.site.c</DeepOutFile>
     </None>
     <None Include="..\Lib\stat.py">
       <ModName>stat</ModName>
       <IntFile>$(IntDir)stat.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\stat.h</OutFile>
+      <DeepIntFile>$(IntDir)stat.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.stat.c</DeepOutFile>
     </None>
     <None Include="..\Lib\__hello__.py">
       <ModName>__hello__</ModName>
       <IntFile>$(IntDir)__hello__.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\__hello__.h</OutFile>
+      <DeepIntFile>$(IntDir)__hello__.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__hello__.c</DeepOutFile>
     </None>
     <None Include="..\Lib\__phello__\__init__.py">
       <ModName>__phello__</ModName>
       <IntFile>$(IntDir)__phello__.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.h</OutFile>
+      <DeepIntFile>$(IntDir)__phello__.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.c</DeepOutFile>
     </None>
     <None Include="..\Lib\__phello__\ham\__init__.py">
       <ModName>__phello__.ham</ModName>
       <IntFile>$(IntDir)__phello__.ham.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.ham.h</OutFile>
+      <DeepIntFile>$(IntDir)__phello__.ham.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.ham.c</DeepOutFile>
     </None>
     <None Include="..\Lib\__phello__\ham\eggs.py">
       <ModName>__phello__.ham.eggs</ModName>
       <IntFile>$(IntDir)__phello__.ham.eggs.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.ham.eggs.h</OutFile>
+      <DeepIntFile>$(IntDir)__phello__.ham.eggs.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.ham.eggs.c</DeepOutFile>
     </None>
     <None Include="..\Lib\__phello__\spam.py">
       <ModName>__phello__.spam</ModName>
       <IntFile>$(IntDir)__phello__.spam.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\__phello__.spam.h</OutFile>
+      <DeepIntFile>$(IntDir)__phello__.spam.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.__phello__.spam.c</DeepOutFile>
     </None>
     <None Include="..\Tools\freeze\flag.py">
       <ModName>frozen_only</ModName>
       <IntFile>$(IntDir)frozen_only.g.h</IntFile>
       <OutFile>$(PySourcePath)Python\frozen_modules\frozen_only.h</OutFile>
+      <DeepIntFile>$(IntDir)frozen_only.g.c</DeepIntFile>
+      <DeepOutFile>$(PySourcePath)Python\deepfreeze\df.frozen_only.c</DeepOutFile>
     </None>
     <!-- END frozen modules -->
   </ItemGroup>
@@ -338,17 +378,29 @@
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
   <Target Name="_RebuildFrozen" AfterTargets="AfterBuild" Condition="$(Configuration) != 'PGUpdate'">
-    <Exec Command='"$(TargetPath)" "%(None.ModName)" "%(None.FullPath)" "%(None.IntFile)"' />
+    <Exec Command='"$(TargetPath)" "%(None.ModName)" "%(None.FullPath)" "%(None.DeepIntFile)"' />
 
-    <Copy SourceFiles="%(None.IntFile)"
+    <Copy SourceFiles="%(None.DeepIntFile)"
           DestinationFiles="%(None.OutFile)"
-          Condition="!Exists(%(None.OutFile)) or (Exists(%(None.IntFile)) and '$([System.IO.File]::ReadAllText(%(None.OutFile)).Replace(`&#x0D;&#x0A;`, `&#x0A;`))' != '$([System.IO.File]::ReadAllText(%(None.IntFile)).Replace(`&#x0D;&#x0A;`, `&#x0A;`))')">
+          Condition="!Exists(%(None.OutFile)) or (Exists(%(None.DeepIntFile)) and '$([System.IO.File]::ReadAllText(%(None.OutFile)).Replace(`&#x0D;&#x0A;`, `&#x0A;`))' != '$([System.IO.File]::ReadAllText(%(None.DeepIntFile)).Replace(`&#x0D;&#x0A;`, `&#x0A;`))')">
       <Output TaskParameter="CopiedFiles" ItemName="_Updated" />
     </Copy>
 
     <Message Text="Updated files: @(_Updated->'%(Filename)%(Extension)',', ')"
              Condition="'@(_Updated)' != ''" Importance="high" />
   </Target>
+  <Target Name="_RebuildDeepFrozen" AfterTargets="_RebuildFrozen" Condition="$(Configuration) != 'PGUpdate'">
+    <Exec Command='$(PythonForBuild) "$(PySourcePath)Tools\scripts\deepfreeze.py" "%(None.OutFile)" "-m" "%(None.ModName)" -o "%(None.IntFile)"' />
+
+    <Copy SourceFiles="%(None.IntFile)"
+          DestinationFiles="%(None.DeepOutFile)"
+          Condition="!Exists(%(None.DeepOutFile)) or (Exists(%(None.IntFile)) and '$([System.IO.File]::ReadAllText(%(None.DeepOutFile)).Replace(`&#x0D;&#x0A;`, `&#x0A;`))' != '$([System.IO.File]::ReadAllText(%(None.IntFile)).Replace(`&#x0D;&#x0A;`, `&#x0A;`))')">
+      <Output TaskParameter="CopiedFiles" ItemName="_DeepUpdated" />
+    </Copy>
+
+    <Message Text="Updated files: @(_DeepUpdated->'%(Filename)%(Extension)',', ')"
+             Condition="'@(_DeepUpdated)' != ''" Importance="high" />
+  </Target>
   <Target Name="_CleanFrozen" BeforeTargets="CoreClean" Condition="$(Configuration) != 'PGUpdate'">
     <ItemGroup>
       <Clean Include="%(None.IntFile)" />
diff --git a/PCbuild/find_python.bat b/PCbuild/find_python.bat
index d0e4a86b01fd0..a9f14c5277ffa 100644
--- a/PCbuild/find_python.bat
+++ b/PCbuild/find_python.bat
@@ -31,13 +31,13 @@
 @if "%_Py_EXTERNALS_DIR%"=="" (set _Py_EXTERNALS_DIR=%~dp0\..\externals)
 
 @rem If we have Python in externals, use that one
- at if exist "%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" (set PYTHON="%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe") & (set _Py_Python_Source=found in externals directory) & goto :found
+ at if exist "%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" ("%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe" -Ec "import sys; assert sys.version_info[:2] >= (3, 10)" >nul 2>nul) && (set PYTHON="%_Py_EXTERNALS_DIR%\pythonx86\tools\python.exe") && (set _Py_Python_Source=found in externals directory) && goto :found || rmdir /Q /S "%_Py_EXTERNALS_DIR%\pythonx86"
 
 @rem If HOST_PYTHON is recent enough, use that
- at if NOT "%HOST_PYTHON%"=="" @%HOST_PYTHON% -Ec "import sys; assert sys.version_info[:2] >= (3, 8)" >nul 2>nul && (set PYTHON="%HOST_PYTHON%") && (set _Py_Python_Source=found as HOST_PYTHON) && goto :found
+ at if NOT "%HOST_PYTHON%"=="" @%HOST_PYTHON% -Ec "import sys; assert sys.version_info[:2] >= (3, 10)" >nul 2>nul && (set PYTHON="%HOST_PYTHON%") && (set _Py_Python_Source=found as HOST_PYTHON) && goto :found
 
 @rem If py.exe finds a recent enough version, use that one
- at for %%p in (3.9 3.8) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found
+ at for %%p in (3.10) do @py -%%p -EV >nul 2>&1 && (set PYTHON=py -%%p) && (set _Py_Python_Source=found %%p with py.exe) && goto :found
 
 @if NOT exist "%_Py_EXTERNALS_DIR%" mkdir "%_Py_EXTERNALS_DIR%"
 @set _Py_NUGET=%NUGET%
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index 70f05563fa391..e1d59de7bc8f2 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -502,6 +502,30 @@
     <ClCompile Include="..\Python\thread.c" />
     <ClCompile Include="..\Python\traceback.c" />
   </ItemGroup>
+  <ItemGroup>
+    <!-- BEGIN deepfreeze -->
+    <ClCompile Include="..\Python\deepfreeze\df.importlib._bootstrap.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.importlib._bootstrap_external.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.zipimport.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.abc.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.codecs.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.io.c" />
+    <ClCompile Include="..\Python\deepfreeze\df._collections_abc.c" />
+    <ClCompile Include="..\Python\deepfreeze\df._sitebuiltins.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.genericpath.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.ntpath.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.posixpath.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.os.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.site.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.stat.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.__hello__.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.__phello__.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.__phello__.ham.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.__phello__.ham.eggs.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.__phello__.spam.c" />
+    <ClCompile Include="..\Python\deepfreeze\df.frozen_only.c" />
+    <!-- END deepfreeze -->
+    </ItemGroup>
   <ItemGroup Condition="$(IncludeExternals)">
     <ClCompile Include="..\Modules\zlibmodule.c" />
     <ClCompile Include="$(zlibDir)\adler32.c" />
diff --git a/Python/frozen.c b/Python/frozen.c
index 1565c9a3d73f7..9f43db70886f7 100644
--- a/Python/frozen.c
+++ b/Python/frozen.c
@@ -61,12 +61,7 @@
 #include "frozen_modules/frozen_only.h"
 /* End includes */
 
-#ifdef MS_WINDOWS
-/* Deepfreeze isn't supported on Windows yet. */
-#define GET_CODE(name) NULL
-#else
 #define GET_CODE(name) _Py_get_##name##_toplevel
-#endif
 
 /* Start extern declarations */
 extern PyObject *_Py_get_importlib__bootstrap_toplevel(void);
diff --git a/Tools/scripts/deepfreeze.py b/Tools/scripts/deepfreeze.py
index 074127f949298..b6d52b7454965 100644
--- a/Tools/scripts/deepfreeze.py
+++ b/Tools/scripts/deepfreeze.py
@@ -1,13 +1,16 @@
 import argparse
+import ast
 import builtins
 import collections
 import contextlib
 import os
-import sys
+import re
 import time
 import types
 import typing
 
+import umarshal
+
 verbose = False
 
 
@@ -55,7 +58,8 @@ def get_localsplus_counts(code: types.CodeType,
             nplaincellvars += 1
         elif kind & CO_FAST_FREE:
             nfreevars += 1
-    assert nlocals == len(code.co_varnames) == code.co_nlocals
+    assert nlocals == len(code.co_varnames) == code.co_nlocals, \
+        (nlocals, len(code.co_varnames), code.co_nlocals)
     assert ncellvars == len(code.co_cellvars)
     assert nfreevars == len(code.co_freevars)
     assert len(names) == nlocals + nplaincellvars + nfreevars
@@ -274,14 +278,7 @@ def generate_tuple(self, name: str, t: tuple[object, ...]) -> str:
                             self.write(item + ",")
         return f"& {name}._object.ob_base.ob_base"
 
-    def generate_int(self, name: str, i: int) -> str:
-        maxint = sys.maxsize
-        if maxint == 2**31 - 1:
-            digit = 2**15
-        elif maxint == 2**63 - 1:
-            digit = 2**30
-        else:
-            assert False, f"What int size is this system?!? {maxint=}"
+    def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
         sign = -1 if i < 0 else 0 if i == 0 else +1
         i = abs(i)
         digits: list[int] = []
@@ -298,6 +295,20 @@ def generate_int(self, name: str, i: int) -> str:
             if digits:
                 ds = ", ".join(map(str, digits))
                 self.write(f".ob_digit = {{ {ds} }},")
+
+    def generate_int(self, name: str, i: int) -> str:
+        if abs(i) < 2**15:
+            self._generate_int_for_bits(name, i, 2**15)
+        else:
+            connective = "if"
+            for bits_in_digit in 15, 30:
+                self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
+                self._generate_int_for_bits(name, i, 2**bits_in_digit)
+                connective = "elif"
+            self.write("#else")
+            self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
+            self.write("#endif")
+            # If neither clause applies, it won't compile
         return f"& {name}.ob_base.ob_base"
 
     def generate_float(self, name: str, x: float) -> str:
@@ -326,7 +337,7 @@ def generate(self, name: str, obj: object) -> str:
             return self.cache[key]
         self.misses += 1
         match obj:
-            case types.CodeType() as code:
+            case types.CodeType() | umarshal.Code() as code:
                 val = self.generate_code(name, code)
             case tuple(t):
                 val = self.generate_tuple(name, t)
@@ -367,8 +378,31 @@ def generate(self, name: str, obj: object) -> str:
 }
 """
 
+FROZEN_COMMENT = "/* Auto-generated by Programs/_freeze_module.c */"
+
+FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
+
+
+def is_frozen_header(source: str) -> bool:
+    return source.startswith(FROZEN_COMMENT)
+
+
+def decode_frozen_data(source: str) -> types.CodeType:
+    lines = source.splitlines()
+    while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
+        del lines[0]
+    while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
+        del lines[-1]
+    values: tuple[int, ...] = ast.literal_eval("".join(lines))
+    data = bytes(values)
+    return umarshal.loads(data)
+
+
 def generate(source: str, filename: str, modname: str, file: typing.TextIO) -> None:
-    code = compile(source, filename, "exec")
+    if is_frozen_header(source):
+        code = decode_frozen_data(source)
+    else:
+        code = compile(source, filename, "exec")
     printer = Printer(file)
     printer.generate("toplevel", code)
     printer.write("")
diff --git a/Tools/scripts/freeze_modules.py b/Tools/scripts/freeze_modules.py
index ccea4e11ab6ca..61ccae61e4f96 100644
--- a/Tools/scripts/freeze_modules.py
+++ b/Tools/scripts/freeze_modules.py
@@ -11,7 +11,6 @@
 import platform
 import subprocess
 import sys
-import textwrap
 import time
 
 from update_file import updating_file_with_tmpfile, update_file_with_tmpfile
@@ -55,6 +54,7 @@ def find_tool():
 MAKEFILE = os.path.join(ROOT_DIR, 'Makefile.pre.in')
 PCBUILD_PROJECT = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj')
 PCBUILD_FILTERS = os.path.join(ROOT_DIR, 'PCbuild', '_freeze_module.vcxproj.filters')
+PCBUILD_PYTHONCORE = os.path.join(ROOT_DIR, 'PCbuild', 'pythoncore.vcxproj')
 
 
 OS_PATH = 'ntpath' if os.name == 'nt' else 'posixpath'
@@ -717,20 +717,28 @@ def regen_makefile(modules):
 def regen_pcbuild(modules):
     projlines = []
     filterlines = []
+    corelines = []
     for src in _iter_sources(modules):
         pyfile = relpath_for_windows_display(src.pyfile, ROOT_DIR)
         header = relpath_for_windows_display(src.frozenfile, ROOT_DIR)
+        deepbase = "df." + src.id
+        deepoutfile = f"Python\\deepfreeze\\{deepbase}.c"
         intfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.h'
+        deepintfile = ntpath.splitext(ntpath.basename(header))[0] + '.g.c'
         projlines.append(f'    <None Include="..\\{pyfile}">')
         projlines.append(f'      <ModName>{src.frozenid}</ModName>')
         projlines.append(f'      <IntFile>$(IntDir){intfile}</IntFile>')
         projlines.append(f'      <OutFile>$(PySourcePath){header}</OutFile>')
+        projlines.append(f'      <DeepIntFile>$(IntDir){deepintfile}</DeepIntFile>')
+        projlines.append(f'      <DeepOutFile>$(PySourcePath){deepoutfile}</DeepOutFile>')
         projlines.append(f'    </None>')
 
         filterlines.append(f'    <None Include="..\\{pyfile}">')
         filterlines.append('      <Filter>Python Files</Filter>')
         filterlines.append('    </None>')
 
+        corelines.append(f'    <ClCompile Include="..\\{deepoutfile}" />')
+
     print(f'# Updating {os.path.relpath(PCBUILD_PROJECT)}')
     with updating_file_with_tmpfile(PCBUILD_PROJECT) as (infile, outfile):
         lines = infile.readlines()
@@ -753,6 +761,17 @@ def regen_pcbuild(modules):
             PCBUILD_FILTERS,
         )
         outfile.writelines(lines)
+    print(f'# Updating {os.path.relpath(PCBUILD_PYTHONCORE)}')
+    with updating_file_with_tmpfile(PCBUILD_PYTHONCORE) as (infile, outfile):
+        lines = infile.readlines()
+        lines = replace_block(
+            lines,
+            '<!-- BEGIN deepfreeze -->',
+            '<!-- END deepfreeze -->',
+            corelines,
+            PCBUILD_FILTERS,
+        )
+        outfile.writelines(lines)
 
 
 #######################################
diff --git a/Tools/scripts/startuptime.py b/Tools/scripts/startuptime.py
new file mode 100644
index 0000000000000..1bb5b208f66e0
--- /dev/null
+++ b/Tools/scripts/startuptime.py
@@ -0,0 +1,22 @@
+# Quick script to time startup for various binaries
+
+import subprocess
+import sys
+import time
+
+NREPS = 100
+
+
+def main():
+    binaries = sys.argv[1:]
+    for bin in binaries:
+        t0 = time.time()
+        for _ in range(NREPS):
+            result = subprocess.run([bin, "-c", "pass"])
+            result.check_returncode()
+        t1 = time.time()
+        print(f"{(t1-t0)/NREPS:6.3f} {bin}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/scripts/umarshal.py b/Tools/scripts/umarshal.py
new file mode 100644
index 0000000000000..e0d18c89e7531
--- /dev/null
+++ b/Tools/scripts/umarshal.py
@@ -0,0 +1,328 @@
+# Implementat marshal.loads() in pure Python
+
+import ast
+
+from typing import Any
+
+
+class Type:
+    # Adapted from marshal.c
+    NULL                = ord('0')
+    NONE                = ord('N')
+    FALSE               = ord('F')
+    TRUE                = ord('T')
+    STOPITER            = ord('S')
+    ELLIPSIS            = ord('.')
+    INT                 = ord('i')
+    INT64               = ord('I')
+    FLOAT               = ord('f')
+    BINARY_FLOAT        = ord('g')
+    COMPLEX             = ord('x')
+    BINARY_COMPLEX      = ord('y')
+    LONG                = ord('l')
+    STRING              = ord('s')
+    INTERNED            = ord('t')
+    REF                 = ord('r')
+    TUPLE               = ord('(')
+    LIST                = ord('[')
+    DICT                = ord('{')
+    CODE                = ord('c')
+    UNICODE             = ord('u')
+    UNKNOWN             = ord('?')
+    SET                 = ord('<')
+    FROZENSET           = ord('>')
+    ASCII               = ord('a')
+    ASCII_INTERNED      = ord('A')
+    SMALL_TUPLE         = ord(')')
+    SHORT_ASCII         = ord('z')
+    SHORT_ASCII_INTERNED = ord('Z')
+
+
+FLAG_REF = 0x80  # with a type, add obj to index
+
+NULL = object()  # marker
+
+# Cell kinds
+CO_FAST_LOCAL = 0x20
+CO_FAST_CELL = 0x40
+CO_FAST_FREE = 0x80
+
+
+class Code:
+    def __init__(self, **kwds: Any):
+        self.__dict__.update(kwds)
+
+    def __repr__(self) -> str:
+        return f"Code(**{self.__dict__})"
+
+    co_localsplusnames: tuple[str]
+    co_localspluskinds: tuple[int]
+
+    def get_localsplus_names(self, select_kind: int) -> tuple[str, ...]:
+        varnames: list[str] = []
+        for name, kind in zip(self.co_localsplusnames,
+                              self.co_localspluskinds):
+            if kind & select_kind:
+                varnames.append(name)
+        return tuple(varnames)
+
+    @property
+    def co_varnames(self) -> tuple[str, ...]:
+        return self.get_localsplus_names(CO_FAST_LOCAL)
+
+    @property
+    def co_cellvars(self) -> tuple[str, ...]:
+        return self.get_localsplus_names(CO_FAST_CELL)
+
+    @property
+    def co_freevars(self) -> tuple[str, ...]:
+        return self.get_localsplus_names(CO_FAST_FREE)
+
+    @property
+    def co_nlocals(self) -> int:
+        return len(self.co_varnames)
+
+
+class Reader:
+    # A fairly literal translation of the marshal reader.
+
+    def __init__(self, data: bytes):
+        self.data: bytes = data
+        self.end: int = len(self.data)
+        self.pos: int = 0
+        self.refs: list[Any] = []
+        self.level: int = 0
+
+    def r_string(self, n: int) -> bytes:
+        assert 0 <= n <= self.end - self.pos
+        buf = self.data[self.pos : self.pos + n]
+        self.pos += n
+        return buf
+
+    def r_byte(self) -> int:
+        buf = self.r_string(1)
+        return buf[0]
+
+    def r_short(self) -> int:
+        buf = self.r_string(2)
+        x = buf[0]
+        x |= buf[1] << 8
+        x |= -(x & (1<<15))  # Sign-extend
+        return x
+
+    def r_long(self) -> int:
+        buf = self.r_string(4)
+        x = buf[0]
+        x |= buf[1] << 8
+        x |= buf[2] << 16
+        x |= buf[3] << 24
+        x |= -(x & (1<<31))  # Sign-extend
+        return x
+
+    def r_long64(self) -> int:
+        buf = self.r_string(8)
+        x = buf[0]
+        x |= buf[1] << 8
+        x |= buf[2] << 16
+        x |= buf[3] << 24
+        x |= buf[1] << 32
+        x |= buf[1] << 40
+        x |= buf[1] << 48
+        x |= buf[1] << 56
+        x |= -(x & (1<<63))  # Sign-extend
+        return x
+
+    def r_PyLong(self) -> int:
+        n = self.r_long()
+        size = abs(n)
+        x = 0
+        # Pray this is right
+        for i in range(size):
+            x |= self.r_short() << i*15
+        if n < 0:
+            x = -x
+        return x
+
+    def r_float_bin(self) -> float:
+        buf = self.r_string(8)
+        import struct  # Lazy import to avoid breaking UNIX build
+        return struct.unpack("d", buf)[0]
+
+    def r_float_str(self) -> float:
+        n = self.r_byte()
+        buf = self.r_string(n)
+        return ast.literal_eval(buf.decode("ascii"))
+
+    def r_ref_reserve(self, flag: int) -> int:
+        if flag:
+            idx = len(self.refs)
+            self.refs.append(None)
+            return idx
+        else:
+            return 0
+
+    def r_ref_insert(self, obj: Any, idx: int, flag: int) -> Any:
+        if flag:
+            self.refs[idx] = obj
+        return obj
+
+    def r_ref(self, obj: Any, flag: int) -> Any:
+        assert flag & FLAG_REF
+        self.refs.append(obj)
+        return obj
+
+    def r_object(self) -> Any:
+        old_level = self.level
+        try:
+            return self._r_object()
+        finally:
+            self.level = old_level
+
+    def _r_object(self) -> Any:
+        code = self.r_byte()
+        flag = code & FLAG_REF
+        type = code & ~FLAG_REF
+        # print("  "*self.level + f"{code} {flag} {type} {chr(type)!r}")
+        self.level += 1
+
+        def R_REF(obj: Any) -> Any:
+            if flag:
+                obj = self.r_ref(obj, flag)
+            return obj
+
+        match type:
+            case Type.NULL:
+                return NULL
+            case Type.NONE:
+                return None
+            case Type.ELLIPSIS:
+                return Ellipsis
+            case Type.FALSE:
+                return False
+            case Type.TRUE:
+                return True
+            case Type.INT:
+                return R_REF(self.r_long())
+            case Type.INT64:
+                return R_REF(self.r_long64())
+            case Type.LONG:
+                return R_REF(self.r_PyLong())
+            case Type.FLOAT:
+                return R_REF(self.r_float_str())
+            case Type.BINARY_FLOAT:
+                return R_REF(self.r_float_bin())
+            case Type.COMPLEX:
+                return R_REF(complex(self.r_float_str(),
+                                     self.r_float_str()))
+            case Type.BINARY_COMPLEX:
+                return R_REF(complex(self.r_float_bin(),
+                                     self.r_float_bin()))
+            case Type.STRING:
+                n = self.r_long()
+                return R_REF(self.r_string(n))
+            case Type.ASCII_INTERNED | Type.ASCII:
+                n = self.r_long()
+                return R_REF(self.r_string(n).decode("ascii"))
+            case Type.SHORT_ASCII_INTERNED | Type.SHORT_ASCII:
+                n = self.r_byte()
+                return R_REF(self.r_string(n).decode("ascii"))
+            case Type.INTERNED | Type.UNICODE:
+                n = self.r_long()
+                return R_REF(self.r_string(n).decode("utf8", "surrogatepass"))
+            case Type.SMALL_TUPLE:
+                n = self.r_byte()
+                idx = self.r_ref_reserve(flag)
+                retval: Any = tuple(self.r_object() for _ in range(n))
+                self.r_ref_insert(retval, idx, flag)
+                return retval
+            case Type.TUPLE:
+                n = self.r_long()
+                idx = self.r_ref_reserve(flag)
+                retval = tuple(self.r_object() for _ in range(n))
+                self.r_ref_insert(retval, idx, flag)
+                return retval
+            case Type.LIST:
+                n = self.r_long()
+                retval = R_REF([])
+                for _ in range(n):
+                    retval.append(self.r_object())
+                return retval
+            case Type.DICT:
+                retval = R_REF({})
+                while True:
+                    key = self.r_object()
+                    if key == NULL:
+                        break
+                    val = self.r_object()
+                    retval[key] = val
+                return retval
+            case Type.SET:
+                n = self.r_long()
+                retval = R_REF(set())
+                for _ in range(n):
+                    v = self.r_object()
+                    retval.add(v)
+                return retval
+            case Type.FROZENSET:
+                n = self.r_long()
+                s: set[Any] = set()
+                idx = self.r_ref_reserve(flag)
+                for _ in range(n):
+                    v = self.r_object()
+                    s.add(v)
+                retval = frozenset(s)
+                self.r_ref_insert(retval, idx, flag)
+                return retval
+            case Type.CODE:
+                retval = R_REF(Code())
+                retval.co_argcount = self.r_long()
+                retval.co_posonlyargcount = self.r_long()
+                retval.co_kwonlyargcount = self.r_long()
+                retval.co_stacksize = self.r_long()
+                retval.co_flags = self.r_long()
+                retval.co_code = self.r_object()
+                retval.co_consts = self.r_object()
+                retval.co_names = self.r_object()
+                retval.co_localsplusnames = self.r_object()
+                retval.co_localspluskinds = self.r_object()
+                retval.co_filename = self.r_object()
+                retval.co_name = self.r_object()
+                retval.co_qualname = self.r_object()
+                retval.co_firstlineno = self.r_long()
+                retval.co_linetable = self.r_object()
+                retval.co_endlinetable = self.r_object()
+                retval.co_columntable = self.r_object()
+                retval.co_exceptiontable = self.r_object()
+                return retval
+            case Type.REF:
+                n = self.r_long()
+                retval = self.refs[n]
+                assert retval is not None
+                return retval
+            case _:
+                breakpoint()
+                raise AssertionError(f"Unknown type {type} {chr(type)!r}")
+
+
+def loads(data: bytes) -> Any:
+    assert isinstance(data, bytes)
+    r = Reader(data)
+    return r.r_object()
+
+
+def main():
+    # Test
+    import marshal, pprint
+    sample = {'foo': {(42, "bar", 3.14)}}
+    data = marshal.dumps(sample)
+    retval = loads(data)
+    assert retval == sample, retval
+    sample = main.__code__
+    data = marshal.dumps(sample)
+    retval = loads(data)
+    assert isinstance(retval, Code), retval
+    pprint.pprint(retval.__dict__)
+
+
+if __name__ == "__main__":
+    main()



More information about the Python-checkins mailing list