[Python-checkins] bpo-40280: WASM docs and smaller browser builds (GH-32412)

tiran webhook-mailer at python.org
Sun Apr 10 03:30:12 EDT 2022


https://github.com/python/cpython/commit/defbbd68f7f68f4edb3a6b256f26e0532727b3da
commit: defbbd68f7f68f4edb3a6b256f26e0532727b3da
branch: main
author: Christian Heimes <christian at python.org>
committer: tiran <christian at python.org>
date: 2022-04-10T09:29:51+02:00
summary:

bpo-40280: WASM docs and smaller browser builds (GH-32412)

Co-authored-by: Brett Cannon <brett at python.org>

files:
A Tools/wasm/Setup.local.example
M Doc/library/sys.rst
M Makefile.pre.in
M Tools/wasm/README.md
M Tools/wasm/wasm_assets.py
M Tools/wasm/wasm_webserver.py

diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst
index 3b09a4cdf8b98..126da31b5bd32 100644
--- a/Doc/library/sys.rst
+++ b/Doc/library/sys.rst
@@ -1183,7 +1183,9 @@ always available.
    System           ``platform`` value
    ================ ===========================
    AIX              ``'aix'``
+   Emscripten       ``'emscripten'``
    Linux            ``'linux'``
+   WASI             ``'wasi'``
    Windows          ``'win32'``
    Windows/Cygwin   ``'cygwin'``
    macOS            ``'darwin'``
diff --git a/Makefile.pre.in b/Makefile.pre.in
index cb6e962045a04..22a68a7048792 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -812,8 +812,9 @@ $(DLLLIBRARY) libpython$(LDVERSION).dll.a: $(LIBRARY_OBJS)
 # --preload-file turns a relative asset path into an absolute path.
 
 $(WASM_STDLIB): $(srcdir)/Lib/*.py $(srcdir)/Lib/*/*.py \
-                pybuilddir.txt $(srcdir)/Tools/wasm/wasm_assets.py \
-				python.html python.worker.js
+	    $(srcdir)/Tools/wasm/wasm_assets.py \
+	    Makefile pybuilddir.txt Modules/Setup.local \
+	    python.html python.worker.js
 	$(PYTHON_FOR_BUILD) $(srcdir)/Tools/wasm/wasm_assets.py \
 	    --builddir . --prefix $(prefix)
 
diff --git a/Tools/wasm/README.md b/Tools/wasm/README.md
index 40b82e8f9397a..fa99703acfc5b 100644
--- a/Tools/wasm/README.md
+++ b/Tools/wasm/README.md
@@ -9,9 +9,21 @@ possible to build for *wasm32-wasi* out-of-the-box yet.
 
 ## wasm32-emscripten build
 
-Cross compiling to wasm32-emscripten platform needs the [Emscripten](https://emscripten.org/)
-tool chain and a build Python interpreter.
-All commands below are relative to a repository checkout.
+Cross compiling to the wasm32-emscripten platform needs the
+[Emscripten](https://emscripten.org/) SDK and a build Python interpreter.
+Emscripten 3.1.8 or newer are recommended. All commands below are relative
+to a repository checkout.
+
+Christian Heimes maintains a container image with Emscripten SDK, Python
+build dependencies, WASI-SDK, wasmtime, and several additional tools.
+
+```
+# Fedora, RHEL, CentOS
+podman run --rm -ti -v $(pwd):/python-wasm/cpython:Z quay.io/tiran/cpythonbuild:emsdk3
+
+# other
+docker run --rm -ti -v $(pwd):/python-wasm/cpython quay.io/tiran/cpythonbuild:emsdk3
+```
 
 ### Compile a build Python interpreter
 
@@ -167,3 +179,77 @@ linker options.
 - pthread support requires WASM threads and SharedArrayBuffer (bulk memory).
   The runtime keeps a pool of web workers around. Each web worker uses
   several file descriptors (eventfd, epoll, pipe).
+
+# Hosting Python WASM builds
+
+The simple REPL terminal uses SharedArrayBuffer. For security reasons
+browsers only provide the feature in secure environents with cross-origin
+isolation. The webserver must send cross-origin headers and correct MIME types
+for the JavaScript and WASM files. Otherwise the terminal will fail to load
+with an error message like ``Browsers disable shared array buffer``.
+
+## Apache HTTP .htaccess
+
+Place a ``.htaccess`` file in the same directory as ``python.wasm``.
+
+```
+# .htaccess
+Header set Cross-Origin-Opener-Policy same-origin
+Header set Cross-Origin-Embedder-Policy require-corp
+
+AddType application/javascript js
+AddType application/wasm wasm
+
+<IfModule mod_deflate.c>
+    AddOutputFilterByType DEFLATE text/html application/javascript application/wasm
+</IfModule>
+```
+
+# Detect WebAssembly builds
+
+## Python code
+
+```# python
+import os, sys
+
+if sys.platform == "emscripten":
+    # Python on Emscripten
+if sys.platform == "wasi":
+    # Python on WASI
+
+if os.name == "posix":
+    # WASM platforms identify as POSIX-like.
+    # Windows does not provide os.uname().
+    machine = os.uname().machine
+    if machine.startswith("wasm"):
+        # WebAssembly (wasm32 or wasm64)
+```
+
+## C code
+
+Emscripten SDK and WASI SDK define several built-in macros. You can dump a
+full list of built-ins with ``emcc -dM -E - < /dev/null`` and
+``/path/to/wasi-sdk/bin/clang -dM -E - < /dev/null``.
+
+```# C
+#ifdef __EMSCRIPTEN__
+    // Python on Emscripten
+#endif
+```
+
+* WebAssembly ``__wasm__`` (also ``__wasm``)
+* wasm32 ``__wasm32__`` (also ``__wasm32``)
+* wasm64 ``__wasm64__``
+* Emscripten ``__EMSCRIPTEN__`` (also ``EMSCRIPTEN``)
+* Emscripten version ``__EMSCRIPTEN_major__``, ``__EMSCRIPTEN_minor__``, ``__EMSCRIPTEN_tiny__``
+* WASI ``__wasi__``
+
+Feature detection flags:
+
+* ``__EMSCRIPTEN_PTHREADS__``
+* ``__EMSCRIPTEN_SHARED_MEMORY__``
+* ``__wasm_simd128__``
+* ``__wasm_sign_ext__``
+* ``__wasm_bulk_memory__``
+* ``__wasm_atomics__``
+* ``__wasm_mutable_globals__``
diff --git a/Tools/wasm/Setup.local.example b/Tools/wasm/Setup.local.example
new file mode 100644
index 0000000000000..ad58c31a2efe3
--- /dev/null
+++ b/Tools/wasm/Setup.local.example
@@ -0,0 +1,15 @@
+# Module/Setup.local with reduced stdlib
+*disabled*
+_asyncio
+audioop
+_bz2
+_crypt
+_decimal
+_pickle
+pyexpat _elementtree
+_sha3 _blake2
+_zoneinfo
+xxsubtype
+
+# cjk codecs
+#_multibytecodec _codecs_cn _codecs_hk _codecs_iso2022 _codecs_jp _codecs_kr _codecs_tw
diff --git a/Tools/wasm/wasm_assets.py b/Tools/wasm/wasm_assets.py
index bb1983af4c7a7..fba70b9c9d042 100755
--- a/Tools/wasm/wasm_assets.py
+++ b/Tools/wasm/wasm_assets.py
@@ -20,7 +20,11 @@
 SRCDIR_LIB = SRCDIR / "Lib"
 
 # sysconfig data relative to build dir.
-SYSCONFIGDATA_GLOB = "build/lib.*/_sysconfigdata_*.py"
+SYSCONFIGDATA = pathlib.PurePath(
+    "build",
+    f"lib.emscripten-wasm32-{sys.version_info.major}.{sys.version_info.minor}",
+    "_sysconfigdata__emscripten_wasm32-emscripten.py",
+)
 
 # Library directory relative to $(prefix).
 WASM_LIB = pathlib.PurePath("lib")
@@ -38,33 +42,44 @@
 OMIT_FILES = (
     # regression tests
     "test/",
-    # user interfaces: TK, curses
-    "curses/",
-    "idlelib/",
-    "tkinter/",
-    "turtle.py",
-    "turtledemo/",
     # package management
     "ensurepip/",
     "venv/",
     # build system
     "distutils/",
     "lib2to3/",
-    # concurrency
-    "concurrent/",
-    "multiprocessing/",
     # deprecated
     "asyncore.py",
     "asynchat.py",
-    # Synchronous network I/O and protocols are not supported; for example,
-    # socket.create_connection() raises an exception:
-    # "BlockingIOError: [Errno 26] Operation in progress".
+    "uu.py",
+    "xdrlib.py",
+    # other platforms
+    "_aix_support.py",
+    "_bootsubprocess.py",
+    "_osx_support.py",
+    # webbrowser
+    "antigravity.py",
+    "webbrowser.py",
+    # Pure Python implementations of C extensions
+    "_pydecimal.py",
+    "_pyio.py",
+    # Misc unused or large files
+    "pydoc_data/",
+    "msilib/",
+)
+
+# Synchronous network I/O and protocols are not supported; for example,
+# socket.create_connection() raises an exception:
+# "BlockingIOError: [Errno 26] Operation in progress".
+OMIT_NETWORKING_FILES = (
     "cgi.py",
     "cgitb.py",
     "email/",
     "ftplib.py",
     "http/",
     "imaplib.py",
+    "mailbox.py",
+    "mailcap.py",
     "nntplib.py",
     "poplib.py",
     "smtpd.py",
@@ -77,26 +92,28 @@
     "urllib/response.py",
     "urllib/robotparser.py",
     "wsgiref/",
-    "xmlrpc/",
-    # dbm / gdbm
-    "dbm/",
-    # other platforms
-    "_aix_support.py",
-    "_bootsubprocess.py",
-    "_osx_support.py",
-    # webbrowser
-    "antigravity.py",
-    "webbrowser.py",
-    # ctypes
-    "ctypes/",
-    # Pure Python implementations of C extensions
-    "_pydecimal.py",
-    "_pyio.py",
-    # Misc unused or large files
-    "pydoc_data/",
-    "msilib/",
 )
 
+OMIT_MODULE_FILES = {
+    "_asyncio": ["asyncio/"],
+    "audioop": ["aifc.py", "sunau.py", "wave.py"],
+    "_crypt": ["crypt.py"],
+    "_curses": ["curses/"],
+    "_ctypes": ["ctypes/"],
+    "_decimal": ["decimal.py"],
+    "_dbm": ["dbm/ndbm.py"],
+    "_gdbm": ["dbm/gnu.py"],
+    "_json": ["json/"],
+    "_multiprocessing": ["concurrent/", "multiprocessing/"],
+    "pyexpat": ["xml/", "xmlrpc/"],
+    "readline": ["rlcompleter.py"],
+    "_sqlite3": ["sqlite3/"],
+    "_ssl": ["ssl.py"],
+    "_tkinter": ["idlelib/", "tkinter/", "turtle.py", "turtledemo/"],
+
+    "_zoneinfo": ["zoneinfo/"],
+}
+
 # regression test sub directories
 OMIT_SUBDIRS = (
     "ctypes/test/",
@@ -105,34 +122,59 @@
 )
 
 
-OMIT_ABSOLUTE = {SRCDIR_LIB / name for name in OMIT_FILES}
-OMIT_SUBDIRS_ABSOLUTE = tuple(str(SRCDIR_LIB / name) for name in OMIT_SUBDIRS)
-
-
-def filterfunc(name: str) -> bool:
-    return not name.startswith(OMIT_SUBDIRS_ABSOLUTE)
-
-
 def create_stdlib_zip(
-    args: argparse.Namespace, compression: int = zipfile.ZIP_DEFLATED, *, optimize: int = 0
+    args: argparse.Namespace,
+    *,
+    optimize: int = 0,
 ) -> None:
-    sysconfig_data = list(args.builddir.glob(SYSCONFIGDATA_GLOB))
-    if not sysconfig_data:
-        raise ValueError("No sysconfigdata file found")
+    def filterfunc(name: str) -> bool:
+        return not name.startswith(args.omit_subdirs_absolute)
 
     with zipfile.PyZipFile(
-        args.wasm_stdlib_zip, mode="w", compression=compression, optimize=0
+        args.wasm_stdlib_zip, mode="w", compression=args.compression, optimize=optimize
     ) as pzf:
+        if args.compresslevel is not None:
+            pzf.compresslevel = args.compresslevel
+        pzf.writepy(args.sysconfig_data)
         for entry in sorted(args.srcdir_lib.iterdir()):
             if entry.name == "__pycache__":
                 continue
-            if entry in OMIT_ABSOLUTE:
+            if entry in args.omit_files_absolute:
                 continue
             if entry.name.endswith(".py") or entry.is_dir():
                 # writepy() writes .pyc files (bytecode).
                 pzf.writepy(entry, filterfunc=filterfunc)
-        for entry in sysconfig_data:
-            pzf.writepy(entry)
+
+
+def detect_extension_modules(args: argparse.Namespace):
+    modules = {}
+
+    # disabled by Modules/Setup.local ?
+    with open(args.builddir / "Makefile") as f:
+        for line in f:
+            if line.startswith("MODDISABLED_NAMES="):
+                disabled = line.split("=", 1)[1].strip().split()
+                for modname in disabled:
+                    modules[modname] = False
+                break
+
+    # disabled by configure?
+    with open(args.sysconfig_data) as f:
+        data = f.read()
+    loc = {}
+    exec(data, globals(), loc)
+
+    for name, value in loc["build_time_vars"].items():
+        if value not in {"yes", "missing", "disabled", "n/a"}:
+            continue
+        if not name.startswith("MODULE_"):
+            continue
+        if name.endswith(("_CFLAGS", "_DEPS", "_LDFLAGS")):
+            continue
+        modname = name.removeprefix("MODULE_").lower()
+        if modname not in modules:
+            modules[modname] = value == "yes"
+    return modules
 
 
 def path(val: str) -> pathlib.Path:
@@ -147,7 +189,10 @@ def path(val: str) -> pathlib.Path:
     type=path,
 )
 parser.add_argument(
-    "--prefix", help="install prefix", default=pathlib.Path("/usr/local"), type=path
+    "--prefix",
+    help="install prefix",
+    default=pathlib.Path("/usr/local"),
+    type=path,
 )
 
 
@@ -162,6 +207,27 @@ def main():
     args.wasm_stdlib = args.wasm_root / WASM_STDLIB
     args.wasm_dynload = args.wasm_root / WASM_DYNLOAD
 
+    # bpo-17004: zipimport supports only zlib compression.
+    # Emscripten ZIP_STORED + -sLZ4=1 linker flags results in larger file.
+    args.compression = zipfile.ZIP_DEFLATED
+    args.compresslevel = 9
+
+    args.sysconfig_data = args.builddir / SYSCONFIGDATA
+    if not args.sysconfig_data.is_file():
+        raise ValueError(f"sysconfigdata file {SYSCONFIGDATA} missing.")
+
+    extmods = detect_extension_modules(args)
+    omit_files = list(OMIT_FILES)
+    omit_files.extend(OMIT_NETWORKING_FILES)
+    for modname, modfiles in OMIT_MODULE_FILES.items():
+        if not extmods.get(modname):
+            omit_files.extend(modfiles)
+
+    args.omit_files_absolute = {args.srcdir_lib / name for name in omit_files}
+    args.omit_subdirs_absolute = tuple(
+        str(args.srcdir_lib / name) for name in OMIT_SUBDIRS
+    )
+
     # Empty, unused directory for dynamic libs, but required for site initialization.
     args.wasm_dynload.mkdir(parents=True, exist_ok=True)
     marker = args.wasm_dynload / ".empty"
@@ -170,7 +236,7 @@ def main():
     shutil.copy(args.srcdir_lib / "os.py", args.wasm_stdlib)
     # The rest of stdlib that's useful in a WASM context.
     create_stdlib_zip(args)
-    size = round(args.wasm_stdlib_zip.stat().st_size / 1024 ** 2, 2)
+    size = round(args.wasm_stdlib_zip.stat().st_size / 1024**2, 2)
     parser.exit(0, f"Created {args.wasm_stdlib_zip} ({size} MiB)\n")
 
 
diff --git a/Tools/wasm/wasm_webserver.py b/Tools/wasm/wasm_webserver.py
index ef642bf8a5be8..186bd57fc2067 100755
--- a/Tools/wasm/wasm_webserver.py
+++ b/Tools/wasm/wasm_webserver.py
@@ -14,6 +14,13 @@
 
 
 class MyHTTPRequestHandler(server.SimpleHTTPRequestHandler):
+    extensions_map = server.SimpleHTTPRequestHandler.extensions_map.copy()
+    extensions_map.update(
+        {
+            ".wasm": "application/wasm",
+        }
+    )
+
     def end_headers(self):
         self.send_my_headers()
         super().end_headers()



More information about the Python-checkins mailing list