[Python-3000-checkins] r55813 - python/branches/py3k-struni/Lib/encodings/__init__.py
guido.van.rossum
python-3000-checkins at python.org
Thu Jun 7 23:43:47 CEST 2007
Author: guido.van.rossum
Date: Thu Jun 7 23:43:46 2007
New Revision: 55813
Modified:
python/branches/py3k-struni/Lib/encodings/__init__.py
Log:
Change normalize_encodings() to avoid using .translate() or depending on
the string type. It will always return a Unicode string. The algoritm's
specification is unchanged.
Modified: python/branches/py3k-struni/Lib/encodings/__init__.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/__init__.py (original)
+++ python/branches/py3k-struni/Lib/encodings/__init__.py Thu Jun 7 23:43:46 2007
@@ -34,12 +34,6 @@
_cache = {}
_unknown = '--unknown--'
_import_tail = ['*']
-_norm_encoding_map = (' . '
- '0123456789 ABCDEFGHIJKLMNOPQRSTUVWXYZ '
- ' abcdefghijklmnopqrstuvwxyz '
- ' '
- ' '
- ' ')
_aliases = aliases.aliases
class CodecRegistryError(LookupError, SystemError):
@@ -58,14 +52,17 @@
non-ASCII characters, these must be Latin-1 compatible.
"""
- # Make sure we have an 8-bit string, because .translate() works
- # differently for Unicode strings.
- if isinstance(encoding, str):
- # Note that .encode('latin-1') does *not* use the codec
- # registry, so this call doesn't recurse. (See unicodeobject.c
- # PyUnicode_AsEncodedString() for details)
- encoding = encoding.encode('latin-1')
- return '_'.join(encoding.translate(_norm_encoding_map).split())
+ chars = []
+ punct = False
+ for c in encoding:
+ if c.isalnum() or c == '.':
+ if punct and chars:
+ chars.append('_')
+ chars.append(c)
+ punct = False
+ else:
+ punct = True
+ return ''.join(chars)
def search_function(encoding):
More information about the Python-3000-checkins
mailing list