[Python-Dev] non-US zip archives support in zipfile.py

Sergey Dorofeev python at fidoman.ru
Mon Oct 14 22:55:05 CEST 2013


Hello,

I'd like to submit patch to support zip archives created on systems that 
use non-US codepage (e.g. russian CP866).
Codepage would be specified in additional parameter of ZipFile 
constructor, named "codepage".
If it is not specified, old behavior is preserved (use CP437).

--- zipfile.py-orig     2013-09-18 16:45:56.000000000 +0400
+++ zipfile.py  2013-10-15 00:24:06.105157572 +0400
@@ -885,7 +885,7 @@
      fp = None                   # Set here since __del__ checks it
      _windows_illegal_name_trans_table = None

-    def __init__(self, file, mode="r", compression=ZIP_STORED, 
allowZip64=False):
+    def __init__(self, file, mode="r", compression=ZIP_STORED, 
allowZip64=False, codepage='cp437'):
          """Open the ZIP file with mode read "r", write "w" or append 
"a"."""
          if mode not in ("r", "w", "a"):
              raise RuntimeError('ZipFile() requires mode "r", "w", or 
"a"')
@@ -901,6 +901,7 @@
          self.mode = key = mode.replace('b', '')[0]
          self.pwd = None
          self._comment = b''
+        self.codepage = codepage

          # Check if we were passed a file-like object
          if isinstance(file, str):
@@ -1002,7 +1003,7 @@
                  filename = filename.decode('utf-8')
              else:
                  # Historical ZIP filename encoding
-                filename = filename.decode('cp437')
+                filename = filename.decode(self.codepage)
              # Create ZipInfo instance to store file information
              x = ZipInfo(filename)
              x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
@@ -1157,7 +1158,7 @@
                  # UTF-8 filename
                  fname_str = fname.decode("utf-8")
              else:
-                fname_str = fname.decode("cp437")
+                fname_str = fname.decode(self.codepage)

              if fname_str != zinfo.orig_filename:
                  raise BadZipFile(



More information about the Python-Dev mailing list