[Python-checkins] python/dist/src/Lib/encodings __init__.py,1.11,1.12

lemburg@users.sourceforge.net lemburg@users.sourceforge.net
Fri, 04 Oct 2002 04:45:40 -0700


Update of /cvsroot/python/python/dist/src/Lib/encodings
In directory usw-pr-cvs1:/tmp/cvs-serv20044/Lib/encodings

Modified Files:
	__init__.py 
Log Message:
Extending the encoding name normalization to handle more non-alphanumeric
characters.



Index: __init__.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/encodings/__init__.py,v
retrieving revision 1.11
retrieving revision 1.12
diff -C2 -d -r1.11 -r1.12
*** __init__.py	8 Aug 2002 20:19:18 -0000	1.11
--- __init__.py	4 Oct 2002 11:45:38 -0000	1.12
***************
*** 4,10 ****
      directory.
  
!     Codec modules must have names corresponding to standard lower-case
!     encoding names with hyphens mapped to underscores, e.g. 'utf-8' is
!     implemented by the module 'utf_8.py'.
  
      Each codec module must export the following interface:
--- 4,10 ----
      directory.
  
!     Codec modules must have names corresponding to normalized encoding
!     names as defined in the normalize_encoding() function below, e.g.
!     'utf-8' must be implemented by the module 'utf_8.py'.
  
      Each codec module must export the following interface:
***************
*** 19,25 ****
      * getaliases() -> sequence of encoding name strings to use as aliases
  
!     Alias names returned by getaliases() must be standard encoding
!     names as defined above (lower-case, hyphens converted to
!     underscores).
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
--- 19,24 ----
      * getaliases() -> sequence of encoding name strings to use as aliases
  
!     Alias names returned by getaliases() must be normalized encoding
!     names as defined by normalize_encoding().
  
  Written by Marc-Andre Lemburg (mal@lemburg.com).
***************
*** 29,37 ****
  """#"
  
! import codecs,exceptions
  
  _cache = {}
  _unknown = '--unknown--'
  _import_tail = ['*']
  
  class CodecRegistryError(exceptions.LookupError,
--- 28,37 ----
  """#"
  
! import codecs, exceptions, re
  
  _cache = {}
  _unknown = '--unknown--'
  _import_tail = ['*']
+ _norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')
  
  class CodecRegistryError(exceptions.LookupError,
***************
*** 39,42 ****
--- 39,54 ----
      pass
  
+ def normalize_encoding(encoding):
+ 
+     """ Normalize an encoding name.
+ 
+         Normalization works as follows: all non-alphanumeric
+         characters except the dot used for Python package names are
+         collapsed and replaced with a single underscore, e.g. '  -;#'
+         becomes '_'.
+         
+     """
+     return '_'.join(_norm_encoding_RE.split(encoding))
+ 
  def search_function(encoding):
  
***************
*** 52,56 ****
      # default import module lookup scheme with the alias name.
      #
!     modname = encoding.replace('-', '_')
      try:
          mod = __import__('encodings.' + modname,
--- 64,68 ----
      # default import module lookup scheme with the alias name.
      #
!     modname = normalize_encoding(encoding)
      try:
          mod = __import__('encodings.' + modname,