[Python-checkins] bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-3062)

Steve Dower webhook-mailer at python.org
Mon Jun 24 19:47:03 EDT 2019


https://github.com/python/cpython/commit/9fc720e5e4f772598013ea48a3f0d22b2b6b04fa
commit: 9fc720e5e4f772598013ea48a3f0d22b2b6b04fa
branch: master
author: David K. Hess <david_k_hess at mac.com>
committer: Steve Dower <steve.dower at python.org>
date: 2019-06-24T16:46:59-07:00
summary:

bpo-4963: Fix for initialization and non-deterministic behavior issues in mimetypes (GH-3062)

files:
A Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst
M Doc/library/mimetypes.rst
M Lib/mimetypes.py
M Lib/test/test_mimetypes.py

diff --git a/Doc/library/mimetypes.rst b/Doc/library/mimetypes.rst
index 5728407cb34c..f610032acbe4 100644
--- a/Doc/library/mimetypes.rst
+++ b/Doc/library/mimetypes.rst
@@ -93,6 +93,10 @@ behavior of the module.
    Specifying an empty list for *files* will prevent the system defaults from
    being applied: only the well-known values will be present from a built-in list.
 
+   If *files* is ``None`` the internal data structure is completely rebuilt to its
+   initial default value. This is a stable operation and will produce the same results
+   when called multiple times.
+
    .. versionchanged:: 3.2
       Previously, Windows registry settings were ignored.
 
diff --git a/Lib/mimetypes.py b/Lib/mimetypes.py
index 8861b75362db..01a16fdf9aa1 100644
--- a/Lib/mimetypes.py
+++ b/Lib/mimetypes.py
@@ -66,13 +66,13 @@ class MimeTypes:
     def __init__(self, filenames=(), strict=True):
         if not inited:
             init()
-        self.encodings_map = encodings_map.copy()
-        self.suffix_map = suffix_map.copy()
+        self.encodings_map = _encodings_map_default.copy()
+        self.suffix_map = _suffix_map_default.copy()
         self.types_map = ({}, {}) # dict for (non-strict, strict)
         self.types_map_inv = ({}, {})
-        for (ext, type) in types_map.items():
+        for (ext, type) in _types_map_default.items():
             self.add_type(type, ext, True)
-        for (ext, type) in common_types.items():
+        for (ext, type) in _common_types_default.items():
             self.add_type(type, ext, False)
         for name in filenames:
             self.read(name, strict)
@@ -346,11 +346,19 @@ def init(files=None):
     global suffix_map, types_map, encodings_map, common_types
     global inited, _db
     inited = True    # so that MimeTypes.__init__() doesn't call us again
-    db = MimeTypes()
-    if files is None:
+
+    if files is None or _db is None:
+        db = MimeTypes()
         if _winreg:
             db.read_windows_registry()
-        files = knownfiles
+
+        if files is None:
+            files = knownfiles
+        else:
+            files = knownfiles + list(files)
+    else:
+        db = _db
+
     for file in files:
         if os.path.isfile(file):
             db.read(file)
@@ -374,12 +382,12 @@ def read_mime_types(file):
 
 
 def _default_mime_types():
-    global suffix_map
-    global encodings_map
-    global types_map
-    global common_types
+    global suffix_map, _suffix_map_default
+    global encodings_map, _encodings_map_default
+    global types_map, _types_map_default
+    global common_types, _common_types_default
 
-    suffix_map = {
+    suffix_map = _suffix_map_default = {
         '.svgz': '.svg.gz',
         '.tgz': '.tar.gz',
         '.taz': '.tar.gz',
@@ -388,7 +396,7 @@ def _default_mime_types():
         '.txz': '.tar.xz',
         }
 
-    encodings_map = {
+    encodings_map = _encodings_map_default = {
         '.gz': 'gzip',
         '.Z': 'compress',
         '.bz2': 'bzip2',
@@ -399,152 +407,155 @@ def _default_mime_types():
     # at http://www.iana.org/assignments/media-types
     # or extensions, i.e. using the x- prefix
 
-    # If you add to these, please keep them sorted!
-    types_map = {
+    # If you add to these, please keep them sorted by mime type.
+    # Make sure the entry with the preferred file extension for a particular mime type
+    # appears before any others of the same mimetype.
+    types_map = _types_map_default = {
+        '.js'     : 'application/javascript',
+        '.mjs'    : 'application/javascript',
+        '.json'   : 'application/json',
+        '.doc'    : 'application/msword',
+        '.dot'    : 'application/msword',
+        '.wiz'    : 'application/msword',
+        '.bin'    : 'application/octet-stream',
         '.a'      : 'application/octet-stream',
+        '.dll'    : 'application/octet-stream',
+        '.exe'    : 'application/octet-stream',
+        '.o'      : 'application/octet-stream',
+        '.obj'    : 'application/octet-stream',
+        '.so'     : 'application/octet-stream',
+        '.oda'    : 'application/oda',
+        '.pdf'    : 'application/pdf',
+        '.p7c'    : 'application/pkcs7-mime',
+        '.ps'     : 'application/postscript',
         '.ai'     : 'application/postscript',
-        '.aif'    : 'audio/x-aiff',
-        '.aifc'   : 'audio/x-aiff',
-        '.aiff'   : 'audio/x-aiff',
-        '.au'     : 'audio/basic',
-        '.avi'    : 'video/x-msvideo',
-        '.bat'    : 'text/plain',
+        '.eps'    : 'application/postscript',
+        '.m3u'    : 'application/vnd.apple.mpegurl',
+        '.m3u8'   : 'application/vnd.apple.mpegurl',
+        '.xls'    : 'application/vnd.ms-excel',
+        '.xlb'    : 'application/vnd.ms-excel',
+        '.ppt'    : 'application/vnd.ms-powerpoint',
+        '.pot'    : 'application/vnd.ms-powerpoint',
+        '.ppa'    : 'application/vnd.ms-powerpoint',
+        '.pps'    : 'application/vnd.ms-powerpoint',
+        '.pwz'    : 'application/vnd.ms-powerpoint',
+        '.wasm'   : 'application/wasm',
         '.bcpio'  : 'application/x-bcpio',
-        '.bin'    : 'application/octet-stream',
-        '.bmp'    : 'image/bmp',
-        '.c'      : 'text/plain',
-        '.cdf'    : 'application/x-netcdf',
         '.cpio'   : 'application/x-cpio',
         '.csh'    : 'application/x-csh',
-        '.css'    : 'text/css',
-        '.csv'    : 'text/csv',
-        '.dll'    : 'application/octet-stream',
-        '.doc'    : 'application/msword',
-        '.dot'    : 'application/msword',
         '.dvi'    : 'application/x-dvi',
-        '.eml'    : 'message/rfc822',
-        '.eps'    : 'application/postscript',
-        '.etx'    : 'text/x-setext',
-        '.exe'    : 'application/octet-stream',
-        '.gif'    : 'image/gif',
         '.gtar'   : 'application/x-gtar',
-        '.h'      : 'text/plain',
         '.hdf'    : 'application/x-hdf',
-        '.htm'    : 'text/html',
-        '.html'   : 'text/html',
-        '.ico'    : 'image/vnd.microsoft.icon',
-        '.ief'    : 'image/ief',
-        '.jpe'    : 'image/jpeg',
-        '.jpeg'   : 'image/jpeg',
-        '.jpg'    : 'image/jpeg',
-        '.js'     : 'application/javascript',
-        '.json'   : 'application/json',
-        '.ksh'    : 'text/plain',
         '.latex'  : 'application/x-latex',
-        '.m1v'    : 'video/mpeg',
-        '.m3u'    : 'application/vnd.apple.mpegurl',
-        '.m3u8'   : 'application/vnd.apple.mpegurl',
-        '.man'    : 'application/x-troff-man',
-        '.me'     : 'application/x-troff-me',
-        '.mht'    : 'message/rfc822',
-        '.mhtml'  : 'message/rfc822',
         '.mif'    : 'application/x-mif',
-        '.mjs'    : 'application/javascript',
-        '.mov'    : 'video/quicktime',
-        '.movie'  : 'video/x-sgi-movie',
-        '.mp2'    : 'audio/mpeg',
-        '.mp3'    : 'audio/mpeg',
-        '.mp4'    : 'video/mp4',
-        '.mpa'    : 'video/mpeg',
-        '.mpe'    : 'video/mpeg',
-        '.mpeg'   : 'video/mpeg',
-        '.mpg'    : 'video/mpeg',
-        '.ms'     : 'application/x-troff-ms',
+        '.cdf'    : 'application/x-netcdf',
         '.nc'     : 'application/x-netcdf',
-        '.nws'    : 'message/rfc822',
-        '.o'      : 'application/octet-stream',
-        '.obj'    : 'application/octet-stream',
-        '.oda'    : 'application/oda',
         '.p12'    : 'application/x-pkcs12',
-        '.p7c'    : 'application/pkcs7-mime',
-        '.pbm'    : 'image/x-portable-bitmap',
-        '.pdf'    : 'application/pdf',
         '.pfx'    : 'application/x-pkcs12',
-        '.pgm'    : 'image/x-portable-graymap',
-        '.pl'     : 'text/plain',
-        '.png'    : 'image/png',
-        '.pnm'    : 'image/x-portable-anymap',
-        '.pot'    : 'application/vnd.ms-powerpoint',
-        '.ppa'    : 'application/vnd.ms-powerpoint',
-        '.ppm'    : 'image/x-portable-pixmap',
-        '.pps'    : 'application/vnd.ms-powerpoint',
-        '.ppt'    : 'application/vnd.ms-powerpoint',
-        '.ps'     : 'application/postscript',
-        '.pwz'    : 'application/vnd.ms-powerpoint',
-        '.py'     : 'text/x-python',
+        '.ram'    : 'application/x-pn-realaudio',
         '.pyc'    : 'application/x-python-code',
         '.pyo'    : 'application/x-python-code',
-        '.qt'     : 'video/quicktime',
-        '.ra'     : 'audio/x-pn-realaudio',
-        '.ram'    : 'application/x-pn-realaudio',
-        '.ras'    : 'image/x-cmu-raster',
-        '.rdf'    : 'application/xml',
-        '.rgb'    : 'image/x-rgb',
-        '.roff'   : 'application/x-troff',
-        '.rtx'    : 'text/richtext',
-        '.sgm'    : 'text/x-sgml',
-        '.sgml'   : 'text/x-sgml',
         '.sh'     : 'application/x-sh',
         '.shar'   : 'application/x-shar',
-        '.snd'    : 'audio/basic',
-        '.so'     : 'application/octet-stream',
-        '.src'    : 'application/x-wais-source',
+        '.swf'    : 'application/x-shockwave-flash',
         '.sv4cpio': 'application/x-sv4cpio',
         '.sv4crc' : 'application/x-sv4crc',
-        '.svg'    : 'image/svg+xml',
-        '.swf'    : 'application/x-shockwave-flash',
-        '.t'      : 'application/x-troff',
         '.tar'    : 'application/x-tar',
         '.tcl'    : 'application/x-tcl',
         '.tex'    : 'application/x-tex',
         '.texi'   : 'application/x-texinfo',
         '.texinfo': 'application/x-texinfo',
-        '.tif'    : 'image/tiff',
-        '.tiff'   : 'image/tiff',
+        '.roff'   : 'application/x-troff',
+        '.t'      : 'application/x-troff',
         '.tr'     : 'application/x-troff',
-        '.tsv'    : 'text/tab-separated-values',
-        '.txt'    : 'text/plain',
+        '.man'    : 'application/x-troff-man',
+        '.me'     : 'application/x-troff-me',
+        '.ms'     : 'application/x-troff-ms',
         '.ustar'  : 'application/x-ustar',
-        '.vcf'    : 'text/x-vcard',
-        '.wasm'   : 'application/wasm',
-        '.wav'    : 'audio/x-wav',
-        '.webm'   : 'video/webm',
-        '.wiz'    : 'application/msword',
+        '.src'    : 'application/x-wais-source',
+        '.xsl'    : 'application/xml',
+        '.rdf'    : 'application/xml',
         '.wsdl'   : 'application/xml',
-        '.xbm'    : 'image/x-xbitmap',
-        '.xlb'    : 'application/vnd.ms-excel',
-        '.xls'    : 'application/vnd.ms-excel',
-        '.xml'    : 'text/xml',
         '.xpdl'   : 'application/xml',
+        '.zip'    : 'application/zip',
+        '.au'     : 'audio/basic',
+        '.snd'    : 'audio/basic',
+        '.mp3'    : 'audio/mpeg',
+        '.mp2'    : 'audio/mpeg',
+        '.aif'    : 'audio/x-aiff',
+        '.aifc'   : 'audio/x-aiff',
+        '.aiff'   : 'audio/x-aiff',
+        '.ra'     : 'audio/x-pn-realaudio',
+        '.wav'    : 'audio/x-wav',
+        '.bmp'    : 'image/bmp',
+        '.gif'    : 'image/gif',
+        '.ief'    : 'image/ief',
+        '.jpg'    : 'image/jpeg',
+        '.jpe'    : 'image/jpeg',
+        '.jpeg'   : 'image/jpeg',
+        '.png'    : 'image/png',
+        '.svg'    : 'image/svg+xml',
+        '.tiff'   : 'image/tiff',
+        '.tif'    : 'image/tiff',
+        '.ico'    : 'image/vnd.microsoft.icon',
+        '.ras'    : 'image/x-cmu-raster',
+        '.bmp'    : 'image/x-ms-bmp',
+        '.pnm'    : 'image/x-portable-anymap',
+        '.pbm'    : 'image/x-portable-bitmap',
+        '.pgm'    : 'image/x-portable-graymap',
+        '.ppm'    : 'image/x-portable-pixmap',
+        '.rgb'    : 'image/x-rgb',
+        '.xbm'    : 'image/x-xbitmap',
         '.xpm'    : 'image/x-xpixmap',
-        '.xsl'    : 'application/xml',
         '.xwd'    : 'image/x-xwindowdump',
-        '.zip'    : 'application/zip',
+        '.eml'    : 'message/rfc822',
+        '.mht'    : 'message/rfc822',
+        '.mhtml'  : 'message/rfc822',
+        '.nws'    : 'message/rfc822',
+        '.css'    : 'text/css',
+        '.csv'    : 'text/csv',
+        '.html'   : 'text/html',
+        '.htm'    : 'text/html',
+        '.txt'    : 'text/plain',
+        '.bat'    : 'text/plain',
+        '.c'      : 'text/plain',
+        '.h'      : 'text/plain',
+        '.ksh'    : 'text/plain',
+        '.pl'     : 'text/plain',
+        '.rtx'    : 'text/richtext',
+        '.tsv'    : 'text/tab-separated-values',
+        '.py'     : 'text/x-python',
+        '.etx'    : 'text/x-setext',
+        '.sgm'    : 'text/x-sgml',
+        '.sgml'   : 'text/x-sgml',
+        '.vcf'    : 'text/x-vcard',
+        '.xml'    : 'text/xml',
+        '.mp4'    : 'video/mp4',
+        '.mpeg'   : 'video/mpeg',
+        '.m1v'    : 'video/mpeg',
+        '.mpa'    : 'video/mpeg',
+        '.mpe'    : 'video/mpeg',
+        '.mpg'    : 'video/mpeg',
+        '.mov'    : 'video/quicktime',
+        '.qt'     : 'video/quicktime',
+        '.webm'   : 'video/webm',
+        '.avi'    : 'video/x-msvideo',
+        '.movie'  : 'video/x-sgi-movie',
         }
 
     # These are non-standard types, commonly found in the wild.  They will
     # only match if strict=0 flag is given to the API methods.
 
     # Please sort these too
-    common_types = {
-        '.jpg' : 'image/jpg',
-        '.mid' : 'audio/midi',
+    common_types = _common_types_default = {
+        '.rtf' : 'application/rtf',
         '.midi': 'audio/midi',
+        '.mid' : 'audio/midi',
+        '.jpg' : 'image/jpg',
+        '.pict': 'image/pict',
         '.pct' : 'image/pict',
         '.pic' : 'image/pict',
-        '.pict': 'image/pict',
-        '.rtf' : 'application/rtf',
-        '.xul' : 'text/xul'
+        '.xul' : 'text/xul',
         }
 
 
diff --git a/Lib/test/test_mimetypes.py b/Lib/test/test_mimetypes.py
index c4b2fe2047a7..bfd5eeedaa77 100644
--- a/Lib/test/test_mimetypes.py
+++ b/Lib/test/test_mimetypes.py
@@ -79,6 +79,57 @@ def test_encoding(self):
                                           strict=True)
         self.assertEqual(exts, ['.g3', '.g\xb3'])
 
+    def test_init_reinitializes(self):
+        # Issue 4936: make sure an init starts clean
+        # First, put some poison into the types table
+        mimetypes.add_type('foo/bar', '.foobar')
+        self.assertEqual(mimetypes.guess_extension('foo/bar'), '.foobar')
+        # Reinitialize
+        mimetypes.init()
+        # Poison should be gone.
+        self.assertEqual(mimetypes.guess_extension('foo/bar'), None)
+
+    def test_preferred_extension(self):
+        def check_extensions():
+            self.assertEqual(mimetypes.guess_extension('application/octet-stream'), '.bin')
+            self.assertEqual(mimetypes.guess_extension('application/postscript'), '.ps')
+            self.assertEqual(mimetypes.guess_extension('application/vnd.apple.mpegurl'), '.m3u')
+            self.assertEqual(mimetypes.guess_extension('application/vnd.ms-excel'), '.xls')
+            self.assertEqual(mimetypes.guess_extension('application/vnd.ms-powerpoint'), '.ppt')
+            self.assertEqual(mimetypes.guess_extension('application/x-texinfo'), '.texi')
+            self.assertEqual(mimetypes.guess_extension('application/x-troff'), '.roff')
+            self.assertEqual(mimetypes.guess_extension('application/xml'), '.xsl')
+            self.assertEqual(mimetypes.guess_extension('audio/mpeg'), '.mp3')
+            self.assertEqual(mimetypes.guess_extension('image/jpeg'), '.jpg')
+            self.assertEqual(mimetypes.guess_extension('image/tiff'), '.tiff')
+            self.assertEqual(mimetypes.guess_extension('message/rfc822'), '.eml')
+            self.assertEqual(mimetypes.guess_extension('text/html'), '.html')
+            self.assertEqual(mimetypes.guess_extension('text/plain'), '.txt')
+            self.assertEqual(mimetypes.guess_extension('video/mpeg'), '.mpeg')
+            self.assertEqual(mimetypes.guess_extension('video/quicktime'), '.mov')
+
+        check_extensions()
+        mimetypes.init()
+        check_extensions()
+
+    def test_init_stability(self):
+        mimetypes.init()
+
+        suffix_map = mimetypes.suffix_map
+        encodings_map = mimetypes.encodings_map
+        types_map = mimetypes.types_map
+        common_types = mimetypes.common_types
+
+        mimetypes.init()
+        self.assertIsNot(suffix_map, mimetypes.suffix_map)
+        self.assertIsNot(encodings_map, mimetypes.encodings_map)
+        self.assertIsNot(types_map, mimetypes.types_map)
+        self.assertIsNot(common_types, mimetypes.common_types)
+        self.assertEqual(suffix_map, mimetypes.suffix_map)
+        self.assertEqual(encodings_map, mimetypes.encodings_map)
+        self.assertEqual(types_map, mimetypes.types_map)
+        self.assertEqual(common_types, mimetypes.common_types)
+
     def test_path_like_ob(self):
         filename = "LICENSE.txt"
         filepath = pathlib.Path(filename)
diff --git a/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst b/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst
new file mode 100644
index 000000000000..3b060052fd35
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-08-15-11-24-41.bpo-4963.LRYres.rst
@@ -0,0 +1,2 @@
+Fixed non-deterministic behavior related to mimetypes extension mapping and
+module reinitialization.



More information about the Python-checkins mailing list