[Python-checkins] cpython (merge 3.4 -> default): (Merge 3.4) Issue #20879: Delay the initialization of encoding and decoding

Mon Mar 17 22:40:09 CET 2014

http://hg.python.org/cpython/rev/06d646935c9a
changeset:   89831:06d646935c9a
parent:      89829:cb161cd94e6e
parent:      89830:7093d5758954
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Mon Mar 17 22:39:49 2014 +0100
summary:
  (Merge 3.4) Issue #20879: Delay the initialization of encoding and decoding
tables for base32, ascii85 and base85 codecs in the base64 module, and delay
the initialization of the unquote_to_bytes() table of the urllib.parse module,
to not waste memory if these modules are not used.

files:
  Lib/base64.py       |  59 +++++++++++++++++++++++---------
  Lib/urllib/parse.py |   9 +++-
  Misc/NEWS           |   5 ++
  3 files changed, 54 insertions(+), 19 deletions(-)

diff --git a/Lib/base64.py b/Lib/base64.py
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -138,15 +138,22 @@
 
 # Base32 encoding/decoding must be done in Python
 _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
-_b32tab = [bytes([i]) for i in _b32alphabet]
-_b32tab2 = [a + b for a in _b32tab for b in _b32tab]
-_b32rev = {v: k for k, v in enumerate(_b32alphabet)}
+_b32tab2 = None
+_b32rev = None
 
 def b32encode(s):
     """Encode a byte string using Base32.
 
     s is the byte string to encode.  The encoded byte string is returned.
     """
+    global _b32tab2
+    # Delay the initialization of the table to not waste memory
+    # if the function is never called
+    if _b32tab2 is None:
+        b32tab = [bytes((i,)) for i in _b32alphabet]
+        _b32tab2 = [a + b for a in b32tab for b in b32tab]
+        b32tab = None
+
     if not isinstance(s, bytes_types):
         s = memoryview(s).tobytes()
     leftover = len(s) % 5
@@ -193,6 +200,11 @@
     the input is incorrectly padded or if there are non-alphabet
     characters present in the input.
     """
+    global _b32rev
+    # Delay the initialization of the table to not waste memory
+    # if the function is never called
+    if _b32rev is None:
+        _b32rev = {v: k for k, v in enumerate(_b32alphabet)}
     s = _bytes_from_decode_data(s)
     if len(s) % 8:
         raise binascii.Error('Incorrect padding')
@@ -274,6 +286,11 @@
 # Ascii85 encoding/decoding
 #
 
+_a85chars = None
+_a85chars2 = None
+_A85START = b"<~"
+_A85END = b"~>"
+
 def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False):
     # Helper function for a85encode and b85encode
     if not isinstance(b, bytes_types):
@@ -284,8 +301,6 @@
         b = b + b'\0' * padding
     words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b)
 
-    a85chars2 = _a85chars2
-    a85chars = _a85chars
     chunks = [b'z' if foldnuls and not word else
               b'y' if foldspaces and word == 0x20202020 else
               (chars2[word // 614125] +
@@ -300,11 +315,6 @@
 
     return b''.join(chunks)
 
-_A85START = b"<~"
-_A85END = b"~>"
-_a85chars = [bytes([i]) for i in range(33, 118)]
-_a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
-
 def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False):
     """Encode a byte string using Ascii85.
 
@@ -324,6 +334,13 @@
     adobe controls whether the encoded byte sequence is framed with <~ and ~>,
     which is used by the Adobe implementation.
     """
+    global _a85chars, _a85chars2
+    # Delay the initialization of tables to not waste memory
+    # if the function is never called
+    if _a85chars is None:
+        _a85chars = [bytes((i,)) for i in range(33, 118)]
+        _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars]
+
     result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces)
 
     if adobe:
@@ -408,10 +425,10 @@
 
 # The following code is originally taken (with permission) from Mercurial
 
-_b85chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" \
-            b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~"
-_b85chars = [bytes([i]) for i in _b85chars]
-_b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
+_b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+                b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~")
+_b85chars = None
+_b85chars2 = None
 _b85dec = None
 
 def b85encode(b, pad=False):
@@ -420,17 +437,25 @@
     If pad is true, the input is padded with "\0" so its length is a multiple of
     4 characters before encoding.
     """
+    global _b85chars, _b85chars2
+    # Delay the initialization of tables to not waste memory
+    # if the function is never called
+    if _b85chars is None:
+        _b85chars = [bytes((i,)) for i in _b85alphabet]
+        _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars]
     return _85encode(b, _b85chars, _b85chars2, pad)
 
 def b85decode(b):
     """Decode base85-encoded byte array"""
-    b = _bytes_from_decode_data(b)
     global _b85dec
+    # Delay the initialization of tables to not waste memory
+    # if the function is never called
     if _b85dec is None:
         _b85dec = [None] * 256
-        for i, c in enumerate(_b85chars):
-            _b85dec[c[0]] = i
+        for i, c in enumerate(_b85alphabet):
+            _b85dec[c] = i
 
+    b = _bytes_from_decode_data(b)
     padding = (-len(b)) % 5
     b = b + b'~' * padding
     out = []
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -472,8 +472,7 @@
     return _coerce_result(DefragResult(defrag, frag))
 
 _hexdig = '0123456789ABCDEFabcdef'
-_hextobyte = {(a + b).encode(): bytes([int(a + b, 16)])
-              for a in _hexdig for b in _hexdig}
+_hextobyte = None
 
 def unquote_to_bytes(string):
     """unquote_to_bytes('abc%20def') -> b'abc def'."""
@@ -490,6 +489,12 @@
         return string
     res = [bits[0]]
     append = res.append
+    # Delay the initialization of the table to not waste memory
+    # if the function is never called
+    global _hextobyte
+    if _hextobyte is None:
+        _hextobyte = {(a + b).encode(): bytes([int(a + b, 16)])
+                      for a in _hexdig for b in _hexdig}
     for item in bits[1:]:
         try:
             append(_hextobyte[item[:2]])
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,11 @@
 Library
 -------
 
+- Issue #20879: Delay the initialization of encoding and decoding tables for
+  base32, ascii85 and base85 codecs in the base64 module, and delay the
+  initialization of the unquote_to_bytes() table of the urllib.parse module, to
+  not waste memory if these modules are not used.
+
 
 What's New in Python 3.4.0?
 ===========================

-- 
Repository URL: http://hg.python.org/cpython