[Python-checkins] CVS: python/dist/src/Lib mimetypes.py,1.14,1.15

Fred L. Drake fdrake@users.sourceforge.net
Fri, 03 Aug 2001 14:01:46 -0700


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv21371

Modified Files:
	mimetypes.py 
Log Message:

Refactor so that it is easier to work with alternate MIME types databases,
and programmatically extend the database in different ways.

This closes the SF bug (feature request) #439710.


Index: mimetypes.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/mimetypes.py,v
retrieving revision 1.14
retrieving revision 1.15
diff -C2 -d -r1.14 -r1.15
*** mimetypes.py	2001/06/05 05:17:00	1.14
--- mimetypes.py	2001/08/03 21:01:44	1.15
***************
*** 13,17 ****
  knownfiles -- list of files to parse
  inited -- flag set when init() has been called
! suffixes_map -- dictionary mapping suffixes to suffixes
  encodings_map -- dictionary mapping suffixes to encodings
  types_map -- dictionary mapping suffixes to types
--- 13,17 ----
  knownfiles -- list of files to parse
  inited -- flag set when init() has been called
! suffix_map -- dictionary mapping suffixes to suffixes
  encodings_map -- dictionary mapping suffixes to encodings
  types_map -- dictionary mapping suffixes to types
***************
*** 24,27 ****
--- 24,28 ----
  """
  
+ import os
  import posixpath
  import urllib
***************
*** 38,41 ****
--- 39,153 ----
  inited = 0
  
+ 
+ class MimeTypes:
+     """MIME-types datastore.
+ 
+     This datastore can handle information from mime.types-style files
+     and supports basic determination of MIME type from a filename or
+     URL, and can guess a reasonable extension given a MIME type.
+     """
+ 
+     def __init__(self, filenames=()):
+         if not inited:
+             init()
+         self.encodings_map = encodings_map.copy()
+         self.suffix_map = suffix_map.copy()
+         self.types_map = types_map.copy()
+         for name in filenames:
+             self.read(name)
+ 
+     def guess_type(self, url):
+         """Guess the type of a file based on its URL.
+ 
+         Return value is a tuple (type, encoding) where type is None if
+         the type can't be guessed (no or unknown suffix) or a string
+         of the form type/subtype, usable for a MIME Content-type
+         header; and encoding is None for no encoding or the name of
+         the program used to encode (e.g. compress or gzip).  The
+         mappings are table driven.  Encoding suffixes are case
+         sensitive; type suffixes are first tried case sensitive, then
+         case insensitive.
+ 
+         The suffixes .tgz, .taz and .tz (case sensitive!) are all
+         mapped to '.tar.gz'.  (This is table-driven too, using the
+         dictionary suffix_map.)
+         """
+         scheme, url = urllib.splittype(url)
+         if scheme == 'data':
+             # syntax of data URLs:
+             # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
+             # mediatype := [ type "/" subtype ] *( ";" parameter )
+             # data      := *urlchar
+             # parameter := attribute "=" value
+             # type/subtype defaults to "text/plain"
+             comma = url.find(',')
+             if comma < 0:
+                 # bad data URL
+                 return None, None
+             semi = url.find(';', 0, comma)
+             if semi >= 0:
+                 type = url[:semi]
+             else:
+                 type = url[:comma]
+             if '=' in type or '/' not in type:
+                 type = 'text/plain'
+             return type, None           # never compressed, so encoding is None
+         base, ext = posixpath.splitext(url)
+         while self.suffix_map.has_key(ext):
+             base, ext = posixpath.splitext(base + self.suffix_map[ext])
+         if self.encodings_map.has_key(ext):
+             encoding = self.encodings_map[ext]
+             base, ext = posixpath.splitext(base)
+         else:
+             encoding = None
+         types_map = self.types_map
+         if types_map.has_key(ext):
+             return types_map[ext], encoding
+         elif types_map.has_key(ext.lower()):
+             return types_map[ext.lower()], encoding
+         else:
+             return None, encoding
+ 
+     def guess_extension(self, type):
+         """Guess the extension for a file based on its MIME type.
+ 
+         Return value is a string giving a filename extension,
+         including the leading dot ('.').  The extension is not
+         guaranteed to have been associated with any particular data
+         stream, but would be mapped to the MIME type `type' by
+         guess_type().  If no extension can be guessed for `type', None
+         is returned.
+         """
+         type = type.lower()
+         for ext, stype in self.types_map.items():
+             if type == stype:
+                 return ext
+         return None
+ 
+     def read(self, filename):
+         """Read a single mime.types-format file, specified by pathname."""
+         fp = open(filename)
+         self.readfp(fp)
+         fp.close()
+ 
+     def readfp(self):
+         """Read a single mime.types-format file."""
+         map = self.types_map
+         while 1:
+             line = f.readline()
+             if not line:
+                 break
+             words = line.split()
+             for i in range(len(words)):
+                 if words[i][0] == '#':
+                     del words[i:]
+                     break
+             if not words:
+                 continue
+             type, suffixes = words[0], words[1:]
+             for suff in suffixes:
+                 map['.' + suff] = type
+ 
+ 
  def guess_type(url):
      """Guess the type of a file based on its URL.
***************
*** 52,93 ****
      to ".tar.gz".  (This is table-driven too, using the dictionary
      suffix_map).
- 
      """
!     if not inited:
!         init()
!     scheme, url = urllib.splittype(url)
!     if scheme == 'data':
!         # syntax of data URLs:
!         # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data
!         # mediatype := [ type "/" subtype ] *( ";" parameter )
!         # data      := *urlchar
!         # parameter := attribute "=" value
!         # type/subtype defaults to "text/plain"
!         comma = url.find(',')
!         if comma < 0:
!             # bad data URL
!             return None, None
!         semi = url.find(';', 0, comma)
!         if semi >= 0:
!             type = url[:semi]
!         else:
!             type = url[:comma]
!         if '=' in type or '/' not in type:
!             type = 'text/plain'
!         return type, None               # never compressed, so encoding is None
!     base, ext = posixpath.splitext(url)
!     while suffix_map.has_key(ext):
!         base, ext = posixpath.splitext(base + suffix_map[ext])
!     if encodings_map.has_key(ext):
!         encoding = encodings_map[ext]
!         base, ext = posixpath.splitext(base)
!     else:
!         encoding = None
!     if types_map.has_key(ext):
!         return types_map[ext], encoding
!     elif types_map.has_key(ext.lower()):
!         return types_map[ext.lower()], encoding
!     else:
!         return None, encoding
  
  def guess_extension(type):
--- 164,171 ----
      to ".tar.gz".  (This is table-driven too, using the dictionary
      suffix_map).
      """
!     init()
!     return guess_type(url)
! 
  
  def guess_extension(type):
***************
*** 100,120 ****
      `type', None is returned.
      """
!     global inited
!     if not inited:
!         init()
!     type = type.lower()
!     for ext, stype in types_map.items():
!         if type == stype:
!             return ext
!     return None
  
  def init(files=None):
      global inited
-     for file in files or knownfiles:
-         s = read_mime_types(file)
-         if s:
-             for key, value in s.items():
-                 types_map[key] = value
      inited = 1
  
  def read_mime_types(file):
--- 178,202 ----
      `type', None is returned.
      """
!     init()
!     return guess_extension(type)
  
+ 
  def init(files=None):
+     global guess_extension, guess_type
+     global suffix_map, types_map, encodings_map
      global inited
      inited = 1
+     db = MimeTypes()
+     if files is None:
+         files = knownfiles
+     for file in files:
+         if os.path.isfile(file):
+             db.readfp(open(file))
+     encodings_map = db.encodings_map
+     suffix_map = db.encodings_map
+     types_map = db.types_map
+     guess_extension = db.guess_extension
+     guess_type = db.guess_type
+ 
  
  def read_mime_types(file):
***************
*** 123,147 ****
      except IOError:
          return None
!     map = {}
!     while 1:
!         line = f.readline()
!         if not line: break
!         words = line.split()
!         for i in range(len(words)):
!             if words[i][0] == '#':
!                 del words[i:]
!                 break
!         if not words: continue
!         type, suffixes = words[0], words[1:]
!         for suff in suffixes:
!             map['.'+suff] = type
!     f.close()
!     return map
  
  suffix_map = {
      '.tgz': '.tar.gz',
      '.taz': '.tar.gz',
      '.tz': '.tar.gz',
! }
  
  encodings_map = {
--- 205,218 ----
      except IOError:
          return None
!     db = MimeTypes()
!     db.readfp(f)
!     return db.types_map
  
+ 
  suffix_map = {
      '.tgz': '.tar.gz',
      '.taz': '.tar.gz',
      '.tz': '.tar.gz',
!     }
  
  encodings_map = {