[Python-checkins] r55079 - in python/branches/py3k-struni/Lib: HTMLParser.py codecs.py compiler/transformer.py cookielib.py copy.py ctypes/__init__.py ctypes/macholib/dyld.py ctypes/test/test_as_parameter.py ctypes/test/test_buffers.py ctypes/test/test_functions.py ctypes/test/test_parameters.py ctypes/test/test_prototypes.py ctypes/test/test_slicing.py ctypes/test/test_strings.py ctypes/test/test_structures.py ctypes/test/test_unicode.py distutils/command/bdist_wininst.py distutils/command/build_clib.py distutils/command/register.py doctest.py email/charset.py email/generator.py email/header.py email/message.py email/test/test_email.py email/test/test_email_codecs.py email/test/test_email_codecs_renamed.py email/test/test_email_renamed.py email/utils.py encodings/__init__.py encodings/cp037.py encodings/cp1006.py encodings/cp1026.py encodings/cp1140.py encodings/cp1250.py encodings/cp1251.py encodings/cp1252.py encodings/cp1253.py encodings/cp1254.py encodings/cp1255.py encodings/cp1256.py encodings/cp1257.py encodings/cp1258.py encodings/cp424.py encodings/cp437.py encodings/cp500.py encodings/cp737.py encodings/cp775.py encodings/cp850.py encodings/cp852.py encodings/cp855.py encodings/cp856.py encodings/cp857.py encodings/cp860.py encodings/cp861.py encodings/cp862.py encodings/cp863.py encodings/cp864.py encodings/cp865.py encodings/cp866.py encodings/cp869.py encodings/cp874.py encodings/cp875.py encodings/idna.py encodings/iso8859_1.py encodings/iso8859_10.py encodings/iso8859_11.py encodings/iso8859_13.py encodings/iso8859_14.py encodings/iso8859_15.py encodings/iso8859_16.py encodings/iso8859_2.py encodings/iso8859_3.py encodings/iso8859_4.py encodings/iso8859_5.py encodings/iso8859_6.py encodings/iso8859_7.py encodings/iso8859_8.py encodings/iso8859_9.py encodings/koi8_r.py encodings/koi8_u.py encodings/mac_arabic.py encodings/mac_centeuro.py encodings/mac_croatian.py encodings/mac_cyrillic.py encodings/mac_farsi.py encodings/mac_greek.py encodings/mac_iceland.py encodings/mac_roman.py encodings/mac_romanian.py encodings/mac_turkish.py encodings/punycode.py encodings/tis_620.py encodings/utf_8_sig.py gettext.py glob.py idlelib/EditorWindow.py idlelib/IOBinding.py idlelib/OutputWindow.py idlelib/PyParse.py idlelib/PyShell.py lib-tk/Tkinter.py msilib/schema.py msilib/sequence.py msilib/text.py pickle.py pickletools.py plat-mac/EasyDialogs.py plat-mac/FrameWork.py plat-mac/aepack.py plat-mac/buildtools.py plat-mac/macostools.py plat-mac/macresource.py plat-mac/plistlib.py sqlite3/test/dbapi.py sqlite3/test/factory.py sqlite3/test/types.py sqlite3/test/userfunctions.py stringprep.py tarfile.py test/bad_coding2.py test/pickletester.py test/string_tests.py test/test_StringIO.py test/test_array.py test/test_bigmem.py test/test_binascii.py test/test_bool.py test/test_builtin.py test/test_bytes.py test/test_cfgparser.py test/test_charmapcodec.py test/test_codeccallbacks.py test/test_codecencodings_cn.py test/test_codecencodings_hk.py test/test_codecencodings_jp.py test/test_codecencodings_kr.py test/test_codecencodings_tw.py test/test_codecmaps_jp.py test/test_codecmaps_kr.py test/test_codecmaps_tw.py test/test_codecs.py test/test_compile.py test/test_complex.py test/test_contains.py test/test_cookielib.py test/test_copy.py test/test_descr.py test/test_doctest2.py test/test_exceptions.py test/test_file.py test/test_fileinput.py test/test_fileio.py test/test_format.py test/test_getargs.py test/test_gettext.py test/test_glob.py test/test_htmlparser.py test/test_index.py test/test_io.py test/test_isinstance.py test/test_iter.py test/test_macfs.py test/test_marshal.py test/test_minidom.py test/test_module.py test/test_multibytecodec.py test/test_multibytecodec_support.py test/test_normalization.py test/test_optparse.py test/test_pep263.py test/test_pep277.py test/test_pep292.py test/test_pep352.py test/test_plistlib.py test/test_pprint.py test/test_pyexpat.py test/test_re.py test/test_set.py test/test_startfile.py test/test_str.py test/test_stringprep.py test/test_support.py test/test_tarfile.py test/test_textwrap.py test/test_timeout.py test/test_types.py test/test_ucn.py test/test_unicode.py test/test_unicode_file.py test/test_unicodedata.py test/test_urllib.py test/test_winreg.py test/test_xmlrpc.py test/testcodec.py textwrap.py types.py urllib.py xml/dom/minicompat.py xmlrpclib.py

guido.van.rossum python-checkins at python.org
Wed May 2 21:10:59 CEST 2007


Author: guido.van.rossum
Date: Wed May  2 21:09:54 2007
New Revision: 55079

Modified:
   python/branches/py3k-struni/Lib/HTMLParser.py
   python/branches/py3k-struni/Lib/codecs.py
   python/branches/py3k-struni/Lib/compiler/transformer.py
   python/branches/py3k-struni/Lib/cookielib.py
   python/branches/py3k-struni/Lib/copy.py
   python/branches/py3k-struni/Lib/ctypes/__init__.py
   python/branches/py3k-struni/Lib/ctypes/macholib/dyld.py
   python/branches/py3k-struni/Lib/ctypes/test/test_as_parameter.py
   python/branches/py3k-struni/Lib/ctypes/test/test_buffers.py
   python/branches/py3k-struni/Lib/ctypes/test/test_functions.py
   python/branches/py3k-struni/Lib/ctypes/test/test_parameters.py
   python/branches/py3k-struni/Lib/ctypes/test/test_prototypes.py
   python/branches/py3k-struni/Lib/ctypes/test/test_slicing.py
   python/branches/py3k-struni/Lib/ctypes/test/test_strings.py
   python/branches/py3k-struni/Lib/ctypes/test/test_structures.py
   python/branches/py3k-struni/Lib/ctypes/test/test_unicode.py
   python/branches/py3k-struni/Lib/distutils/command/bdist_wininst.py
   python/branches/py3k-struni/Lib/distutils/command/build_clib.py
   python/branches/py3k-struni/Lib/distutils/command/register.py
   python/branches/py3k-struni/Lib/doctest.py
   python/branches/py3k-struni/Lib/email/charset.py
   python/branches/py3k-struni/Lib/email/generator.py
   python/branches/py3k-struni/Lib/email/header.py
   python/branches/py3k-struni/Lib/email/message.py
   python/branches/py3k-struni/Lib/email/test/test_email.py
   python/branches/py3k-struni/Lib/email/test/test_email_codecs.py
   python/branches/py3k-struni/Lib/email/test/test_email_codecs_renamed.py
   python/branches/py3k-struni/Lib/email/test/test_email_renamed.py
   python/branches/py3k-struni/Lib/email/utils.py
   python/branches/py3k-struni/Lib/encodings/__init__.py
   python/branches/py3k-struni/Lib/encodings/cp037.py
   python/branches/py3k-struni/Lib/encodings/cp1006.py
   python/branches/py3k-struni/Lib/encodings/cp1026.py
   python/branches/py3k-struni/Lib/encodings/cp1140.py
   python/branches/py3k-struni/Lib/encodings/cp1250.py
   python/branches/py3k-struni/Lib/encodings/cp1251.py
   python/branches/py3k-struni/Lib/encodings/cp1252.py
   python/branches/py3k-struni/Lib/encodings/cp1253.py
   python/branches/py3k-struni/Lib/encodings/cp1254.py
   python/branches/py3k-struni/Lib/encodings/cp1255.py
   python/branches/py3k-struni/Lib/encodings/cp1256.py
   python/branches/py3k-struni/Lib/encodings/cp1257.py
   python/branches/py3k-struni/Lib/encodings/cp1258.py
   python/branches/py3k-struni/Lib/encodings/cp424.py
   python/branches/py3k-struni/Lib/encodings/cp437.py
   python/branches/py3k-struni/Lib/encodings/cp500.py
   python/branches/py3k-struni/Lib/encodings/cp737.py
   python/branches/py3k-struni/Lib/encodings/cp775.py
   python/branches/py3k-struni/Lib/encodings/cp850.py
   python/branches/py3k-struni/Lib/encodings/cp852.py
   python/branches/py3k-struni/Lib/encodings/cp855.py
   python/branches/py3k-struni/Lib/encodings/cp856.py
   python/branches/py3k-struni/Lib/encodings/cp857.py
   python/branches/py3k-struni/Lib/encodings/cp860.py
   python/branches/py3k-struni/Lib/encodings/cp861.py
   python/branches/py3k-struni/Lib/encodings/cp862.py
   python/branches/py3k-struni/Lib/encodings/cp863.py
   python/branches/py3k-struni/Lib/encodings/cp864.py
   python/branches/py3k-struni/Lib/encodings/cp865.py
   python/branches/py3k-struni/Lib/encodings/cp866.py
   python/branches/py3k-struni/Lib/encodings/cp869.py
   python/branches/py3k-struni/Lib/encodings/cp874.py
   python/branches/py3k-struni/Lib/encodings/cp875.py
   python/branches/py3k-struni/Lib/encodings/idna.py
   python/branches/py3k-struni/Lib/encodings/iso8859_1.py
   python/branches/py3k-struni/Lib/encodings/iso8859_10.py
   python/branches/py3k-struni/Lib/encodings/iso8859_11.py
   python/branches/py3k-struni/Lib/encodings/iso8859_13.py
   python/branches/py3k-struni/Lib/encodings/iso8859_14.py
   python/branches/py3k-struni/Lib/encodings/iso8859_15.py
   python/branches/py3k-struni/Lib/encodings/iso8859_16.py
   python/branches/py3k-struni/Lib/encodings/iso8859_2.py
   python/branches/py3k-struni/Lib/encodings/iso8859_3.py
   python/branches/py3k-struni/Lib/encodings/iso8859_4.py
   python/branches/py3k-struni/Lib/encodings/iso8859_5.py
   python/branches/py3k-struni/Lib/encodings/iso8859_6.py
   python/branches/py3k-struni/Lib/encodings/iso8859_7.py
   python/branches/py3k-struni/Lib/encodings/iso8859_8.py
   python/branches/py3k-struni/Lib/encodings/iso8859_9.py
   python/branches/py3k-struni/Lib/encodings/koi8_r.py
   python/branches/py3k-struni/Lib/encodings/koi8_u.py
   python/branches/py3k-struni/Lib/encodings/mac_arabic.py
   python/branches/py3k-struni/Lib/encodings/mac_centeuro.py
   python/branches/py3k-struni/Lib/encodings/mac_croatian.py
   python/branches/py3k-struni/Lib/encodings/mac_cyrillic.py
   python/branches/py3k-struni/Lib/encodings/mac_farsi.py
   python/branches/py3k-struni/Lib/encodings/mac_greek.py
   python/branches/py3k-struni/Lib/encodings/mac_iceland.py
   python/branches/py3k-struni/Lib/encodings/mac_roman.py
   python/branches/py3k-struni/Lib/encodings/mac_romanian.py
   python/branches/py3k-struni/Lib/encodings/mac_turkish.py
   python/branches/py3k-struni/Lib/encodings/punycode.py
   python/branches/py3k-struni/Lib/encodings/tis_620.py
   python/branches/py3k-struni/Lib/encodings/utf_8_sig.py
   python/branches/py3k-struni/Lib/gettext.py
   python/branches/py3k-struni/Lib/glob.py
   python/branches/py3k-struni/Lib/idlelib/EditorWindow.py
   python/branches/py3k-struni/Lib/idlelib/IOBinding.py
   python/branches/py3k-struni/Lib/idlelib/OutputWindow.py
   python/branches/py3k-struni/Lib/idlelib/PyParse.py
   python/branches/py3k-struni/Lib/idlelib/PyShell.py
   python/branches/py3k-struni/Lib/lib-tk/Tkinter.py
   python/branches/py3k-struni/Lib/msilib/schema.py
   python/branches/py3k-struni/Lib/msilib/sequence.py
   python/branches/py3k-struni/Lib/msilib/text.py
   python/branches/py3k-struni/Lib/pickle.py
   python/branches/py3k-struni/Lib/pickletools.py
   python/branches/py3k-struni/Lib/plat-mac/EasyDialogs.py
   python/branches/py3k-struni/Lib/plat-mac/FrameWork.py
   python/branches/py3k-struni/Lib/plat-mac/aepack.py
   python/branches/py3k-struni/Lib/plat-mac/buildtools.py
   python/branches/py3k-struni/Lib/plat-mac/macostools.py
   python/branches/py3k-struni/Lib/plat-mac/macresource.py
   python/branches/py3k-struni/Lib/plat-mac/plistlib.py
   python/branches/py3k-struni/Lib/sqlite3/test/dbapi.py
   python/branches/py3k-struni/Lib/sqlite3/test/factory.py
   python/branches/py3k-struni/Lib/sqlite3/test/types.py
   python/branches/py3k-struni/Lib/sqlite3/test/userfunctions.py
   python/branches/py3k-struni/Lib/stringprep.py
   python/branches/py3k-struni/Lib/tarfile.py
   python/branches/py3k-struni/Lib/test/bad_coding2.py
   python/branches/py3k-struni/Lib/test/pickletester.py
   python/branches/py3k-struni/Lib/test/string_tests.py
   python/branches/py3k-struni/Lib/test/test_StringIO.py
   python/branches/py3k-struni/Lib/test/test_array.py
   python/branches/py3k-struni/Lib/test/test_bigmem.py
   python/branches/py3k-struni/Lib/test/test_binascii.py
   python/branches/py3k-struni/Lib/test/test_bool.py
   python/branches/py3k-struni/Lib/test/test_builtin.py
   python/branches/py3k-struni/Lib/test/test_bytes.py
   python/branches/py3k-struni/Lib/test/test_cfgparser.py
   python/branches/py3k-struni/Lib/test/test_charmapcodec.py
   python/branches/py3k-struni/Lib/test/test_codeccallbacks.py
   python/branches/py3k-struni/Lib/test/test_codecencodings_cn.py
   python/branches/py3k-struni/Lib/test/test_codecencodings_hk.py
   python/branches/py3k-struni/Lib/test/test_codecencodings_jp.py
   python/branches/py3k-struni/Lib/test/test_codecencodings_kr.py
   python/branches/py3k-struni/Lib/test/test_codecencodings_tw.py
   python/branches/py3k-struni/Lib/test/test_codecmaps_jp.py
   python/branches/py3k-struni/Lib/test/test_codecmaps_kr.py
   python/branches/py3k-struni/Lib/test/test_codecmaps_tw.py
   python/branches/py3k-struni/Lib/test/test_codecs.py
   python/branches/py3k-struni/Lib/test/test_compile.py
   python/branches/py3k-struni/Lib/test/test_complex.py
   python/branches/py3k-struni/Lib/test/test_contains.py
   python/branches/py3k-struni/Lib/test/test_cookielib.py
   python/branches/py3k-struni/Lib/test/test_copy.py
   python/branches/py3k-struni/Lib/test/test_descr.py
   python/branches/py3k-struni/Lib/test/test_doctest2.py
   python/branches/py3k-struni/Lib/test/test_exceptions.py
   python/branches/py3k-struni/Lib/test/test_file.py
   python/branches/py3k-struni/Lib/test/test_fileinput.py
   python/branches/py3k-struni/Lib/test/test_fileio.py
   python/branches/py3k-struni/Lib/test/test_format.py
   python/branches/py3k-struni/Lib/test/test_getargs.py
   python/branches/py3k-struni/Lib/test/test_gettext.py
   python/branches/py3k-struni/Lib/test/test_glob.py
   python/branches/py3k-struni/Lib/test/test_htmlparser.py
   python/branches/py3k-struni/Lib/test/test_index.py
   python/branches/py3k-struni/Lib/test/test_io.py
   python/branches/py3k-struni/Lib/test/test_isinstance.py
   python/branches/py3k-struni/Lib/test/test_iter.py
   python/branches/py3k-struni/Lib/test/test_macfs.py
   python/branches/py3k-struni/Lib/test/test_marshal.py
   python/branches/py3k-struni/Lib/test/test_minidom.py
   python/branches/py3k-struni/Lib/test/test_module.py
   python/branches/py3k-struni/Lib/test/test_multibytecodec.py
   python/branches/py3k-struni/Lib/test/test_multibytecodec_support.py
   python/branches/py3k-struni/Lib/test/test_normalization.py
   python/branches/py3k-struni/Lib/test/test_optparse.py
   python/branches/py3k-struni/Lib/test/test_pep263.py
   python/branches/py3k-struni/Lib/test/test_pep277.py
   python/branches/py3k-struni/Lib/test/test_pep292.py
   python/branches/py3k-struni/Lib/test/test_pep352.py
   python/branches/py3k-struni/Lib/test/test_plistlib.py
   python/branches/py3k-struni/Lib/test/test_pprint.py
   python/branches/py3k-struni/Lib/test/test_pyexpat.py
   python/branches/py3k-struni/Lib/test/test_re.py
   python/branches/py3k-struni/Lib/test/test_set.py
   python/branches/py3k-struni/Lib/test/test_startfile.py
   python/branches/py3k-struni/Lib/test/test_str.py
   python/branches/py3k-struni/Lib/test/test_stringprep.py
   python/branches/py3k-struni/Lib/test/test_support.py
   python/branches/py3k-struni/Lib/test/test_tarfile.py
   python/branches/py3k-struni/Lib/test/test_textwrap.py
   python/branches/py3k-struni/Lib/test/test_timeout.py
   python/branches/py3k-struni/Lib/test/test_types.py
   python/branches/py3k-struni/Lib/test/test_ucn.py
   python/branches/py3k-struni/Lib/test/test_unicode.py
   python/branches/py3k-struni/Lib/test/test_unicode_file.py
   python/branches/py3k-struni/Lib/test/test_unicodedata.py
   python/branches/py3k-struni/Lib/test/test_urllib.py
   python/branches/py3k-struni/Lib/test/test_winreg.py
   python/branches/py3k-struni/Lib/test/test_xmlrpc.py
   python/branches/py3k-struni/Lib/test/testcodec.py
   python/branches/py3k-struni/Lib/textwrap.py
   python/branches/py3k-struni/Lib/types.py
   python/branches/py3k-struni/Lib/urllib.py
   python/branches/py3k-struni/Lib/xml/dom/minicompat.py
   python/branches/py3k-struni/Lib/xmlrpclib.py
Log:
Rip out all the u"..." literals and calls to unicode().


Modified: python/branches/py3k-struni/Lib/HTMLParser.py
==============================================================================
--- python/branches/py3k-struni/Lib/HTMLParser.py	(original)
+++ python/branches/py3k-struni/Lib/HTMLParser.py	Wed May  2 21:09:54 2007
@@ -376,7 +376,7 @@
                 # which is not part of HTML 4
                 import htmlentitydefs
                 if HTMLParser.entitydefs is None:
-                    entitydefs = HTMLParser.entitydefs = {'apos':u"'"}
+                    entitydefs = HTMLParser.entitydefs = {'apos':"'"}
                     for k, v in htmlentitydefs.name2codepoint.items():
                         entitydefs[k] = unichr(v)
                 try:

Modified: python/branches/py3k-struni/Lib/codecs.py
==============================================================================
--- python/branches/py3k-struni/Lib/codecs.py	(original)
+++ python/branches/py3k-struni/Lib/codecs.py	Wed May  2 21:09:54 2007
@@ -589,7 +589,7 @@
 
         """
         self.bytebuffer = ""
-        self.charbuffer = u""
+        self.charbuffer = ""
         self.linebuffer = None
 
     def seek(self, offset, whence=0):

Modified: python/branches/py3k-struni/Lib/compiler/transformer.py
==============================================================================
--- python/branches/py3k-struni/Lib/compiler/transformer.py	(original)
+++ python/branches/py3k-struni/Lib/compiler/transformer.py	Wed May  2 21:09:54 2007
@@ -740,7 +740,7 @@
             # hack... changes in compile.c:parsestr and
             # tokenizer.c must be reflected here.
             if self.encoding not in ['utf-8', 'iso-8859-1']:
-                lit = unicode(lit, 'utf-8').encode(self.encoding)
+                lit = str(lit, 'utf-8').encode(self.encoding)
             return eval("# coding: %s\n%s" % (self.encoding, lit))
         else:
             return eval(lit)
@@ -750,7 +750,7 @@
         for node in nodelist[1:]:
             k += self.decode_literal(node[1])
         if isinstance(k, bytes):
-          return Bytes(str(k), lineno=nodelist[0][2])
+            return Bytes(str(k), lineno=nodelist[0][2])
         return Const(k, lineno=nodelist[0][2])
 
     def atom_ellipsis(self, nodelist):
@@ -825,7 +825,7 @@
         else:
             annotation = None
         return SimpleArg(name, annotation, lineno)
-        
+
     def com_arglist(self, nodelist):
         # varargslist:
         #     (fpdef ['=' test] ',')*

Modified: python/branches/py3k-struni/Lib/cookielib.py
==============================================================================
--- python/branches/py3k-struni/Lib/cookielib.py	(original)
+++ python/branches/py3k-struni/Lib/cookielib.py	Wed May  2 21:09:54 2007
@@ -644,7 +644,7 @@
     # And here, kind of: draft-fielding-uri-rfc2396bis-03
     # (And in draft IRI specification: draft-duerst-iri-05)
     # (And here, for new URI schemes: RFC 2718)
-    if isinstance(path, unicode):
+    if isinstance(path, str):
         path = path.encode("utf-8")
     path = urllib.quote(path, HTTP_PATH_SAFE)
     path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)

Modified: python/branches/py3k-struni/Lib/copy.py
==============================================================================
--- python/branches/py3k-struni/Lib/copy.py	(original)
+++ python/branches/py3k-struni/Lib/copy.py	Wed May  2 21:09:54 2007
@@ -186,7 +186,7 @@
     pass
 d[str] = _deepcopy_atomic
 try:
-    d[unicode] = _deepcopy_atomic
+    d[str] = _deepcopy_atomic
 except NameError:
     pass
 try:

Modified: python/branches/py3k-struni/Lib/ctypes/__init__.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/__init__.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/__init__.py	Wed May  2 21:09:54 2007
@@ -59,7 +59,7 @@
     create_string_buffer(anInteger) -> character array
     create_string_buffer(aString, anInteger) -> character array
     """
-    if isinstance(init, (str, unicode)):
+    if isinstance(init, (str, str)):
         if size is None:
             size = len(init)+1
         buftype = c_char * size
@@ -281,7 +281,7 @@
         create_unicode_buffer(anInteger) -> character array
         create_unicode_buffer(aString, anInteger) -> character array
         """
-        if isinstance(init, (str, unicode)):
+        if isinstance(init, (str, str)):
             if size is None:
                 size = len(init)+1
             buftype = c_wchar * size

Modified: python/branches/py3k-struni/Lib/ctypes/macholib/dyld.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/macholib/dyld.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/macholib/dyld.py	Wed May  2 21:09:54 2007
@@ -33,7 +33,7 @@
 
 def ensure_utf8(s):
     """Not all of PyObjC and Python understand unicode paths very well yet"""
-    if isinstance(s, unicode):
+    if isinstance(s, str):
         return s.encode('utf8')
     return s
 

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_as_parameter.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_as_parameter.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_as_parameter.py	Wed May  2 21:09:54 2007
@@ -24,7 +24,7 @@
             return
         f = dll._testfunc_i_bhilfd
         f.argtypes = [c_byte, c_wchar, c_int, c_long, c_float, c_double]
-        result = f(self.wrap(1), self.wrap(u"x"), self.wrap(3), self.wrap(4), self.wrap(5.0), self.wrap(6.0))
+        result = f(self.wrap(1), self.wrap("x"), self.wrap(3), self.wrap(4), self.wrap(5.0), self.wrap(6.0))
         self.failUnlessEqual(result, 139)
         self.failUnless(type(result), int)
 

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_buffers.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_buffers.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_buffers.py	Wed May  2 21:09:54 2007
@@ -17,7 +17,7 @@
         self.failUnlessEqual(b[:], "abc\0")
 
     def test_string_conversion(self):
-        b = create_string_buffer(u"abc")
+        b = create_string_buffer("abc")
         self.failUnlessEqual(len(b), 4) # trailing nul char
         self.failUnlessEqual(sizeof(b), 4 * sizeof(c_char))
         self.failUnless(type(b[0]) is str)
@@ -33,21 +33,21 @@
             b = create_unicode_buffer(32)
             self.failUnlessEqual(len(b), 32)
             self.failUnlessEqual(sizeof(b), 32 * sizeof(c_wchar))
-            self.failUnless(type(b[0]) is unicode)
+            self.failUnless(type(b[0]) is str)
 
-            b = create_unicode_buffer(u"abc")
+            b = create_unicode_buffer("abc")
             self.failUnlessEqual(len(b), 4) # trailing nul char
             self.failUnlessEqual(sizeof(b), 4 * sizeof(c_wchar))
-            self.failUnless(type(b[0]) is unicode)
-            self.failUnlessEqual(b[0], u"a")
+            self.failUnless(type(b[0]) is str)
+            self.failUnlessEqual(b[0], "a")
             self.failUnlessEqual(b[:], "abc\0")
 
         def test_unicode_conversion(self):
             b = create_unicode_buffer("abc")
             self.failUnlessEqual(len(b), 4) # trailing nul char
             self.failUnlessEqual(sizeof(b), 4 * sizeof(c_wchar))
-            self.failUnless(type(b[0]) is unicode)
-            self.failUnlessEqual(b[0], u"a")
+            self.failUnless(type(b[0]) is str)
+            self.failUnlessEqual(b[0], "a")
             self.failUnlessEqual(b[:], "abc\0")
 
 if __name__ == "__main__":

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_functions.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_functions.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_functions.py	Wed May  2 21:09:54 2007
@@ -70,7 +70,7 @@
             return
         f = dll._testfunc_i_bhilfd
         f.argtypes = [c_byte, c_wchar, c_int, c_long, c_float, c_double]
-        result = f(1, u"x", 3, 4, 5.0, 6.0)
+        result = f(1, "x", 3, 4, 5.0, 6.0)
         self.failUnlessEqual(result, 139)
         self.failUnlessEqual(type(result), int)
 
@@ -83,7 +83,7 @@
         f.argtypes = [c_byte, c_short, c_int, c_long, c_float, c_double]
         f.restype = c_wchar
         result = f(0, 0, 0, 0, 0, 0)
-        self.failUnlessEqual(result, u'\x00')
+        self.failUnlessEqual(result, '\x00')
 
     def test_voidresult(self):
         f = dll._testfunc_v

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_parameters.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_parameters.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_parameters.py	Wed May  2 21:09:54 2007
@@ -58,8 +58,8 @@
         self.failUnless(c_char_p.from_param(s)._obj is s)
 
         # new in 0.9.1: convert (encode) unicode to ascii
-        self.failUnlessEqual(c_char_p.from_param(u"123")._obj, "123")
-        self.assertRaises(UnicodeEncodeError, c_char_p.from_param, u"123\377")
+        self.failUnlessEqual(c_char_p.from_param("123")._obj, "123")
+        self.assertRaises(UnicodeEncodeError, c_char_p.from_param, "123\377")
 
         self.assertRaises(TypeError, c_char_p.from_param, 42)
 
@@ -75,16 +75,16 @@
         except ImportError:
 ##            print "(No c_wchar_p)"
             return
-        s = u"123"
+        s = "123"
         if sys.platform == "win32":
             self.failUnless(c_wchar_p.from_param(s)._obj is s)
             self.assertRaises(TypeError, c_wchar_p.from_param, 42)
 
             # new in 0.9.1: convert (decode) ascii to unicode
-            self.failUnlessEqual(c_wchar_p.from_param("123")._obj, u"123")
+            self.failUnlessEqual(c_wchar_p.from_param("123")._obj, "123")
         self.assertRaises(UnicodeDecodeError, c_wchar_p.from_param, "123\377")
 
-        pa = c_wchar_p.from_param(c_wchar_p(u"123"))
+        pa = c_wchar_p.from_param(c_wchar_p("123"))
         self.failUnlessEqual(type(pa), c_wchar_p)
 
     def test_int_pointers(self):

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_prototypes.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_prototypes.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_prototypes.py	Wed May  2 21:09:54 2007
@@ -123,7 +123,7 @@
             pass
         else:
             self.failUnlessEqual(None, func(c_wchar_p(None)))
-            self.failUnlessEqual(u"123", func(c_wchar_p(u"123")))
+            self.failUnlessEqual("123", func(c_wchar_p("123")))
 
     def test_instance(self):
         func = testdll._testfunc_p_p
@@ -157,24 +157,24 @@
             func.argtypes = POINTER(c_wchar),
 
             self.failUnlessEqual(None, func(None))
-            self.failUnlessEqual(u"123", func(u"123"))
+            self.failUnlessEqual("123", func("123"))
             self.failUnlessEqual(None, func(c_wchar_p(None)))
-            self.failUnlessEqual(u"123", func(c_wchar_p(u"123")))
+            self.failUnlessEqual("123", func(c_wchar_p("123")))
 
-            self.failUnlessEqual(u"123", func(c_wbuffer(u"123")))
+            self.failUnlessEqual("123", func(c_wbuffer("123")))
             ca = c_wchar("a")
-            self.failUnlessEqual(u"a", func(pointer(ca))[0])
-            self.failUnlessEqual(u"a", func(byref(ca))[0])
+            self.failUnlessEqual("a", func(pointer(ca))[0])
+            self.failUnlessEqual("a", func(byref(ca))[0])
 
         def test_c_wchar_p_arg(self):
             func = testdll._testfunc_p_p
             func.restype = c_wchar_p
             func.argtypes = c_wchar_p,
 
-            c_wchar_p.from_param(u"123")
+            c_wchar_p.from_param("123")
 
             self.failUnlessEqual(None, func(None))
-            self.failUnlessEqual("123", func(u"123"))
+            self.failUnlessEqual("123", func("123"))
             self.failUnlessEqual(None, func(c_wchar_p(None)))
             self.failUnlessEqual("123", func(c_wchar_p("123")))
 

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_slicing.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_slicing.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_slicing.py	Wed May  2 21:09:54 2007
@@ -45,7 +45,7 @@
 
         import operator
         self.assertRaises(TypeError, operator.setslice,
-                          res, 0, 5, u"abcde")
+                          res, 0, 5, "abcde")
         dll.my_free(res)
 
         dll.my_strdup.restype = POINTER(c_byte)
@@ -88,7 +88,7 @@
         pass
     else:
         def test_wchar_ptr(self):
-            s = u"abcdefghijklmnopqrstuvwxyz\0"
+            s = "abcdefghijklmnopqrstuvwxyz\0"
 
             dll = CDLL(_ctypes_test.__file__)
             dll.my_wcsdup.restype = POINTER(c_wchar)
@@ -99,7 +99,7 @@
 
             import operator
             self.assertRaises(TypeError, operator.setslice,
-                              res, 0, 5, u"abcde")
+                              res, 0, 5, "abcde")
             dll.my_free(res)
 
             if sizeof(c_wchar) == sizeof(c_short):

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_strings.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_strings.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_strings.py	Wed May  2 21:09:54 2007
@@ -62,17 +62,17 @@
         def test(self):
             BUF = c_wchar * 4
 
-            buf = BUF(u"a", u"b", u"c")
-            self.failUnlessEqual(buf.value, u"abc")
+            buf = BUF("a", "b", "c")
+            self.failUnlessEqual(buf.value, "abc")
 
-            buf.value = u"ABCD"
-            self.failUnlessEqual(buf.value, u"ABCD")
+            buf.value = "ABCD"
+            self.failUnlessEqual(buf.value, "ABCD")
 
-            buf.value = u"x"
-            self.failUnlessEqual(buf.value, u"x")
+            buf.value = "x"
+            self.failUnlessEqual(buf.value, "x")
 
-            buf[1] = u"Z"
-            self.failUnlessEqual(buf.value, u"xZCD")
+            buf[1] = "Z"
+            self.failUnlessEqual(buf.value, "xZCD")
 
 class StringTestCase(unittest.TestCase):
     def XX_test_basic_strings(self):
@@ -99,7 +99,7 @@
         self.failUnlessEqual(cs.value, "XY")
         self.failUnlessEqual(cs.raw, "XY\000\000\000\000\000")
 
-        self.assertRaises(TypeError, c_string, u"123")
+        self.assertRaises(TypeError, c_string, "123")
 
     def XX_test_sized_strings(self):
 
@@ -142,13 +142,13 @@
 else:
     class WStringTestCase(unittest.TestCase):
         def test_wchar(self):
-            c_wchar(u"x")
-            repr(byref(c_wchar(u"x")))
+            c_wchar("x")
+            repr(byref(c_wchar("x")))
             c_wchar("x")
 
 
         def X_test_basic_wstrings(self):
-            cs = c_wstring(u"abcdef")
+            cs = c_wstring("abcdef")
 
             # XXX This behaviour is about to change:
             # len returns the size of the internal buffer in bytes.
@@ -156,30 +156,30 @@
             self.failUnless(sizeof(cs) == 14)
 
             # The value property is the string up to the first terminating NUL.
-            self.failUnless(cs.value == u"abcdef")
-            self.failUnless(c_wstring(u"abc\000def").value == u"abc")
+            self.failUnless(cs.value == "abcdef")
+            self.failUnless(c_wstring("abc\000def").value == "abc")
 
-            self.failUnless(c_wstring(u"abc\000def").value == u"abc")
+            self.failUnless(c_wstring("abc\000def").value == "abc")
 
             # The raw property is the total buffer contents:
-            self.failUnless(cs.raw == u"abcdef\000")
-            self.failUnless(c_wstring(u"abc\000def").raw == u"abc\000def\000")
+            self.failUnless(cs.raw == "abcdef\000")
+            self.failUnless(c_wstring("abc\000def").raw == "abc\000def\000")
 
             # We can change the value:
-            cs.value = u"ab"
-            self.failUnless(cs.value == u"ab")
-            self.failUnless(cs.raw == u"ab\000\000\000\000\000")
+            cs.value = "ab"
+            self.failUnless(cs.value == "ab")
+            self.failUnless(cs.raw == "ab\000\000\000\000\000")
 
             self.assertRaises(TypeError, c_wstring, "123")
             self.assertRaises(ValueError, c_wstring, 0)
 
         def X_test_toolong(self):
-            cs = c_wstring(u"abcdef")
+            cs = c_wstring("abcdef")
             # Much too long string:
-            self.assertRaises(ValueError, setattr, cs, "value", u"123456789012345")
+            self.assertRaises(ValueError, setattr, cs, "value", "123456789012345")
 
             # One char too long values:
-            self.assertRaises(ValueError, setattr, cs, "value", u"1234567")
+            self.assertRaises(ValueError, setattr, cs, "value", "1234567")
 
 
 def run_test(rep, msg, func, arg):

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_structures.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_structures.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_structures.py	Wed May  2 21:09:54 2007
@@ -269,15 +269,15 @@
             _fields_ = [("name", c_wchar * 12),
                         ("age", c_int)]
 
-        p = PersonW(u"Someone")
+        p = PersonW("Someone")
         self.failUnlessEqual(p.name, "Someone")
 
-        self.failUnlessEqual(PersonW(u"1234567890").name, u"1234567890")
-        self.failUnlessEqual(PersonW(u"12345678901").name, u"12345678901")
+        self.failUnlessEqual(PersonW("1234567890").name, "1234567890")
+        self.failUnlessEqual(PersonW("12345678901").name, "12345678901")
         # exact fit
-        self.failUnlessEqual(PersonW(u"123456789012").name, u"123456789012")
+        self.failUnlessEqual(PersonW("123456789012").name, "123456789012")
         #too long
-        self.assertRaises(ValueError, PersonW, u"1234567890123")
+        self.assertRaises(ValueError, PersonW, "1234567890123")
 
     def test_init_errors(self):
         class Phone(Structure):

Modified: python/branches/py3k-struni/Lib/ctypes/test/test_unicode.py
==============================================================================
--- python/branches/py3k-struni/Lib/ctypes/test/test_unicode.py	(original)
+++ python/branches/py3k-struni/Lib/ctypes/test/test_unicode.py	Wed May  2 21:09:54 2007
@@ -23,31 +23,31 @@
         def test_ascii_strict(self):
             ctypes.set_conversion_mode("ascii", "strict")
             # no conversions take place with unicode arguments
-            self.failUnlessEqual(wcslen(u"abc"), 3)
-            self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
+            self.failUnlessEqual(wcslen("abc"), 3)
+            self.failUnlessEqual(wcslen("ab\u2070"), 3)
             # string args are converted
             self.failUnlessEqual(wcslen("abc"), 3)
             self.failUnlessRaises(ctypes.ArgumentError, wcslen, "abä")
 
         def test_ascii_replace(self):
             ctypes.set_conversion_mode("ascii", "replace")
-            self.failUnlessEqual(wcslen(u"abc"), 3)
-            self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
+            self.failUnlessEqual(wcslen("abc"), 3)
+            self.failUnlessEqual(wcslen("ab\u2070"), 3)
             self.failUnlessEqual(wcslen("abc"), 3)
             self.failUnlessEqual(wcslen("abä"), 3)
 
         def test_ascii_ignore(self):
             ctypes.set_conversion_mode("ascii", "ignore")
-            self.failUnlessEqual(wcslen(u"abc"), 3)
-            self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
+            self.failUnlessEqual(wcslen("abc"), 3)
+            self.failUnlessEqual(wcslen("ab\u2070"), 3)
             # ignore error mode skips non-ascii characters
             self.failUnlessEqual(wcslen("abc"), 3)
             self.failUnlessEqual(wcslen("äöüß"), 0)
 
         def test_latin1_strict(self):
             ctypes.set_conversion_mode("latin-1", "strict")
-            self.failUnlessEqual(wcslen(u"abc"), 3)
-            self.failUnlessEqual(wcslen(u"ab\u2070"), 3)
+            self.failUnlessEqual(wcslen("abc"), 3)
+            self.failUnlessEqual(wcslen("ab\u2070"), 3)
             self.failUnlessEqual(wcslen("abc"), 3)
             self.failUnlessEqual(wcslen("äöüß"), 4)
 
@@ -58,12 +58,12 @@
 
             ctypes.set_conversion_mode("ascii", "replace")
             buf = ctypes.create_unicode_buffer("abäöü")
-            self.failUnlessEqual(buf[:], u"ab\uFFFD\uFFFD\uFFFD\0")
+            self.failUnlessEqual(buf[:], "ab\uFFFD\uFFFD\uFFFD\0")
 
             ctypes.set_conversion_mode("ascii", "ignore")
             buf = ctypes.create_unicode_buffer("abäöü")
             # is that correct? not sure.  But with 'ignore', you get what you pay for..
-            self.failUnlessEqual(buf[:], u"ab\0\0\0\0")
+            self.failUnlessEqual(buf[:], "ab\0\0\0\0")
 
     import _ctypes_test
     func = ctypes.CDLL(_ctypes_test.__file__)._testfunc_p_p
@@ -82,32 +82,32 @@
         def test_ascii_replace(self):
             ctypes.set_conversion_mode("ascii", "strict")
             self.failUnlessEqual(func("abc"), "abc")
-            self.failUnlessEqual(func(u"abc"), "abc")
-            self.assertRaises(ctypes.ArgumentError, func, u"abä")
+            self.failUnlessEqual(func("abc"), "abc")
+            self.assertRaises(ctypes.ArgumentError, func, "abä")
 
         def test_ascii_ignore(self):
             ctypes.set_conversion_mode("ascii", "ignore")
             self.failUnlessEqual(func("abc"), "abc")
-            self.failUnlessEqual(func(u"abc"), "abc")
-            self.failUnlessEqual(func(u"äöüß"), "")
+            self.failUnlessEqual(func("abc"), "abc")
+            self.failUnlessEqual(func("äöüß"), "")
 
         def test_ascii_replace(self):
             ctypes.set_conversion_mode("ascii", "replace")
             self.failUnlessEqual(func("abc"), "abc")
-            self.failUnlessEqual(func(u"abc"), "abc")
-            self.failUnlessEqual(func(u"äöüß"), "????")
+            self.failUnlessEqual(func("abc"), "abc")
+            self.failUnlessEqual(func("äöüß"), "????")
 
         def test_buffers(self):
             ctypes.set_conversion_mode("ascii", "strict")
-            buf = ctypes.create_string_buffer(u"abc")
+            buf = ctypes.create_string_buffer("abc")
             self.failUnlessEqual(len(buf), 3+1)
 
             ctypes.set_conversion_mode("ascii", "replace")
-            buf = ctypes.create_string_buffer(u"abäöü")
+            buf = ctypes.create_string_buffer("abäöü")
             self.failUnlessEqual(buf[:], "ab???\0")
 
             ctypes.set_conversion_mode("ascii", "ignore")
-            buf = ctypes.create_string_buffer(u"abäöü")
+            buf = ctypes.create_string_buffer("abäöü")
             # is that correct? not sure.  But with 'ignore', you get what you pay for..
             self.failUnlessEqual(buf[:], "ab\0\0\0\0")
 

Modified: python/branches/py3k-struni/Lib/distutils/command/bdist_wininst.py
==============================================================================
--- python/branches/py3k-struni/Lib/distutils/command/bdist_wininst.py	(original)
+++ python/branches/py3k-struni/Lib/distutils/command/bdist_wininst.py	Wed May  2 21:09:54 2007
@@ -247,11 +247,11 @@
 
         # Convert cfgdata from unicode to ascii, mbcs encoded
         try:
-            unicode
+            str
         except NameError:
             pass
         else:
-            if isinstance(cfgdata, unicode):
+            if isinstance(cfgdata, str):
                 cfgdata = cfgdata.encode("mbcs")
 
         # Append the pre-install script

Modified: python/branches/py3k-struni/Lib/distutils/command/build_clib.py
==============================================================================
--- python/branches/py3k-struni/Lib/distutils/command/build_clib.py	(original)
+++ python/branches/py3k-struni/Lib/distutils/command/build_clib.py	Wed May  2 21:09:54 2007
@@ -147,7 +147,7 @@
                 raise DistutilsSetupError, \
                       "each element of 'libraries' must a 2-tuple"
 
-            if isinstance(lib[0], basestring) StringType:
+            if isinstance(lib[0], basestring):
                 raise DistutilsSetupError, \
                       "first element of each tuple in 'libraries' " + \
                       "must be a string (the library name)"

Modified: python/branches/py3k-struni/Lib/distutils/command/register.py
==============================================================================
--- python/branches/py3k-struni/Lib/distutils/command/register.py	(original)
+++ python/branches/py3k-struni/Lib/distutils/command/register.py	Wed May  2 21:09:54 2007
@@ -259,7 +259,7 @@
             if type(value) not in (type([]), type( () )):
                 value = [value]
             for value in value:
-                value = unicode(value).encode("utf-8")
+                value = str(value).encode("utf-8")
                 body.write(sep_boundary)
                 body.write('\nContent-Disposition: form-data; name="%s"'%key)
                 body.write("\n\n")

Modified: python/branches/py3k-struni/Lib/doctest.py
==============================================================================
--- python/branches/py3k-struni/Lib/doctest.py	(original)
+++ python/branches/py3k-struni/Lib/doctest.py	Wed May  2 21:09:54 2007
@@ -196,7 +196,7 @@
     """
     if inspect.ismodule(module):
         return module
-    elif isinstance(module, (str, unicode)):
+    elif isinstance(module, (str, str)):
         return __import__(module, globals(), locals(), ["*"])
     elif module is None:
         return sys.modules[sys._getframe(depth).f_globals['__name__']]

Modified: python/branches/py3k-struni/Lib/email/charset.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/charset.py	(original)
+++ python/branches/py3k-struni/Lib/email/charset.py	Wed May  2 21:09:54 2007
@@ -202,10 +202,10 @@
         # is already a unicode, we leave it at that, but ensure that the
         # charset is ASCII, as the standard (RFC XXX) requires.
         try:
-            if isinstance(input_charset, unicode):
+            if isinstance(input_charset, str):
                 input_charset.encode('ascii')
             else:
-                input_charset = unicode(input_charset, 'ascii')
+                input_charset = str(input_charset, 'ascii')
         except UnicodeError:
             raise errors.CharsetError(input_charset)
         input_charset = input_charset.lower()
@@ -264,7 +264,7 @@
     def convert(self, s):
         """Convert a string from the input_codec to the output_codec."""
         if self.input_codec != self.output_codec:
-            return unicode(s, self.input_codec).encode(self.output_codec)
+            return str(s, self.input_codec).encode(self.output_codec)
         else:
             return s
 
@@ -281,10 +281,10 @@
         Characters that could not be converted to Unicode will be replaced
         with the Unicode replacement character U+FFFD.
         """
-        if isinstance(s, unicode) or self.input_codec is None:
+        if isinstance(s, str) or self.input_codec is None:
             return s
         try:
-            return unicode(s, self.input_codec, 'replace')
+            return str(s, self.input_codec, 'replace')
         except LookupError:
             # Input codec not installed on system, so return the original
             # string unchanged.
@@ -307,7 +307,7 @@
             codec = self.output_codec
         else:
             codec = self.input_codec
-        if not isinstance(ustr, unicode) or codec is None:
+        if not isinstance(ustr, str) or codec is None:
             return ustr
         try:
             return ustr.encode(codec, 'replace')

Modified: python/branches/py3k-struni/Lib/email/generator.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/generator.py	(original)
+++ python/branches/py3k-struni/Lib/email/generator.py	Wed May  2 21:09:54 2007
@@ -23,7 +23,7 @@
 def _is8bitstring(s):
     if isinstance(s, str):
         try:
-            unicode(s, 'us-ascii')
+            str(s, 'us-ascii')
         except UnicodeError:
             return True
     return False

Modified: python/branches/py3k-struni/Lib/email/header.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/header.py	(original)
+++ python/branches/py3k-struni/Lib/email/header.py	Wed May  2 21:09:54 2007
@@ -21,9 +21,9 @@
 
 NL = '\n'
 SPACE = ' '
-USPACE = u' '
+USPACE = ' '
 SPACE8 = ' ' * 8
-UEMPTYSTRING = u''
+UEMPTYSTRING = ''
 
 MAXLINELEN = 76
 
@@ -210,7 +210,7 @@
                 elif nextcs not in (None, 'us-ascii'):
                     uchunks.append(USPACE)
             lastcs = nextcs
-            uchunks.append(unicode(s, str(charset)))
+            uchunks.append(str(s, str(charset)))
         return UEMPTYSTRING.join(uchunks)
 
     # Rich comparison operators for equality only.  BAW: does it make sense to
@@ -257,13 +257,13 @@
                 # Possibly raise UnicodeError if the byte string can't be
                 # converted to a unicode with the input codec of the charset.
                 incodec = charset.input_codec or 'us-ascii'
-                ustr = unicode(s, incodec, errors)
+                ustr = str(s, incodec, errors)
                 # Now make sure that the unicode could be converted back to a
                 # byte string with the output codec, which may be different
                 # than the iput coded.  Still, use the original byte string.
                 outcodec = charset.output_codec or 'us-ascii'
                 ustr.encode(outcodec, errors)
-            elif isinstance(s, unicode):
+            elif isinstance(s, str):
                 # Now we have to be sure the unicode string can be converted
                 # to a byte string with a reasonable output codec.  We want to
                 # use the byte string in the chunk.

Modified: python/branches/py3k-struni/Lib/email/message.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/message.py	(original)
+++ python/branches/py3k-struni/Lib/email/message.py	Wed May  2 21:09:54 2007
@@ -751,13 +751,13 @@
                 # LookupError will be raised if the charset isn't known to
                 # Python.  UnicodeError will be raised if the encoded text
                 # contains a character not in the charset.
-                charset = unicode(charset[2], pcharset).encode('us-ascii')
+                charset = str(charset[2], pcharset).encode('us-ascii')
             except (LookupError, UnicodeError):
                 charset = charset[2]
         # charset character must be in us-ascii range
         try:
             if isinstance(charset, str):
-                charset = unicode(charset, 'us-ascii')
+                charset = str(charset, 'us-ascii')
             charset = charset.encode('us-ascii')
         except UnicodeError:
             return failobj

Modified: python/branches/py3k-struni/Lib/email/test/test_email.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/test/test_email.py	(original)
+++ python/branches/py3k-struni/Lib/email/test/test_email.py	Wed May  2 21:09:54 2007
@@ -505,7 +505,7 @@
         msg = Message()
         msg.set_charset('us-ascii')
         self.assertEqual('us-ascii', msg.get_content_charset())
-        msg.set_charset(u'us-ascii')
+        msg.set_charset('us-ascii')
         self.assertEqual('us-ascii', msg.get_content_charset())
 
 
@@ -583,7 +583,7 @@
         utf8 = Charset("utf-8")
         g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
         cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
-        utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
+        utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
         h = Header(g_head, g, header_name='Subject')
         h.append(cz_head, cz)
         h.append(utf8_head, utf8)
@@ -1514,7 +1514,7 @@
         s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard at dom.ain>'
         dh = decode_header(s)
         eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard at dom.ain>', None)])
-        hu = unicode(make_header(dh)).encode('latin-1')
+        hu = str(make_header(dh)).encode('latin-1')
         eq(hu, 'Andr\xe9 Pirard <pirard at dom.ain>')
 
     def test_whitespace_eater_unicode_2(self):
@@ -1524,7 +1524,7 @@
         eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'),
                 ('jumped over the', None), ('lazy dog', 'iso-8859-1')])
         hu = make_header(dh).__unicode__()
-        eq(hu, u'The quick brown fox jumped over the lazy dog')
+        eq(hu, 'The quick brown fox jumped over the lazy dog')
 
     def test_rfc2047_without_whitespace(self):
         s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
@@ -2770,7 +2770,7 @@
         eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
 
     def test_unicode_charset_name(self):
-        charset = Charset(u'us-ascii')
+        charset = Charset('us-ascii')
         self.assertEqual(str(charset), 'us-ascii')
         self.assertRaises(Errors.CharsetError, Charset, 'asc\xffii')
 
@@ -2809,7 +2809,7 @@
         utf8 = Charset("utf-8")
         g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
         cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
-        utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
+        utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
         h = Header(g_head, g)
         h.append(cz_head, cz)
         h.append(utf8_head, utf8)
@@ -2829,7 +2829,7 @@
         eq(decode_header(enc),
            [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
             (utf8_head, "utf-8")])
-        ustr = unicode(h)
+        ustr = str(h)
         eq(ustr.encode('utf-8'),
            'Die Mieter treten hier ein werden mit einem Foerderband '
            'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
@@ -2897,9 +2897,9 @@
 
     def test_utf8_shortest(self):
         eq = self.assertEqual
-        h = Header(u'p\xf6stal', 'utf-8')
+        h = Header('p\xf6stal', 'utf-8')
         eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
-        h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
+        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
         eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
 
     def test_bad_8bit_header(self):
@@ -3152,7 +3152,7 @@
 '''
         msg = email.message_from_string(m)
         self.assertEqual(msg.get_filename(),
-                         u'This is even more ***fun*** is it not.pdf\ufffd')
+                         'This is even more ***fun*** is it not.pdf\ufffd')
 
     def test_rfc2231_unknown_encoding(self):
         m = """\

Modified: python/branches/py3k-struni/Lib/email/test/test_email_codecs.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/test/test_email_codecs.py	(original)
+++ python/branches/py3k-struni/Lib/email/test/test_email_codecs.py	Wed May  2 21:09:54 2007
@@ -13,7 +13,7 @@
 # We're compatible with Python 2.3, but it doesn't have the built-in Asian
 # codecs, so we have to skip all these tests.
 try:
-    unicode('foo', 'euc-jp')
+    str('foo', 'euc-jp')
 except LookupError:
     raise TestSkipped
 
@@ -57,7 +57,7 @@
         jcode  = 'euc-jp'
         msg = Message()
         msg.set_payload(jhello, jcode)
-        ustr = unicode(msg.get_payload(), msg.get_content_charset())
+        ustr = str(msg.get_payload(), msg.get_content_charset())
         self.assertEqual(jhello, ustr.encode(jcode))
 
 

Modified: python/branches/py3k-struni/Lib/email/test/test_email_codecs_renamed.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/test/test_email_codecs_renamed.py	(original)
+++ python/branches/py3k-struni/Lib/email/test/test_email_codecs_renamed.py	Wed May  2 21:09:54 2007
@@ -13,7 +13,7 @@
 # We're compatible with Python 2.3, but it doesn't have the built-in Asian
 # codecs, so we have to skip all these tests.
 try:
-    unicode('foo', 'euc-jp')
+    str('foo', 'euc-jp')
 except LookupError:
     raise TestSkipped
 
@@ -57,7 +57,7 @@
         jcode  = 'euc-jp'
         msg = Message()
         msg.set_payload(jhello, jcode)
-        ustr = unicode(msg.get_payload(), msg.get_content_charset())
+        ustr = str(msg.get_payload(), msg.get_content_charset())
         self.assertEqual(jhello, ustr.encode(jcode))
 
 

Modified: python/branches/py3k-struni/Lib/email/test/test_email_renamed.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/test/test_email_renamed.py	(original)
+++ python/branches/py3k-struni/Lib/email/test/test_email_renamed.py	Wed May  2 21:09:54 2007
@@ -564,7 +564,7 @@
         utf8 = Charset("utf-8")
         g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
         cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
-        utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
+        utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
         h = Header(g_head, g, header_name='Subject')
         h.append(cz_head, cz)
         h.append(utf8_head, utf8)
@@ -1512,7 +1512,7 @@
         s = '=?ISO-8859-1?Q?Andr=E9?= Pirard <pirard at dom.ain>'
         dh = decode_header(s)
         eq(dh, [('Andr\xe9', 'iso-8859-1'), ('Pirard <pirard at dom.ain>', None)])
-        hu = unicode(make_header(dh)).encode('latin-1')
+        hu = str(make_header(dh)).encode('latin-1')
         eq(hu, 'Andr\xe9 Pirard <pirard at dom.ain>')
 
     def test_whitespace_eater_unicode_2(self):
@@ -1522,7 +1522,7 @@
         eq(dh, [('The', None), ('quick brown fox', 'iso-8859-1'),
                 ('jumped over the', None), ('lazy dog', 'iso-8859-1')])
         hu = make_header(dh).__unicode__()
-        eq(hu, u'The quick brown fox jumped over the lazy dog')
+        eq(hu, 'The quick brown fox jumped over the lazy dog')
 
     def test_rfc2047_missing_whitespace(self):
         s = 'Sm=?ISO-8859-1?B?9g==?=rg=?ISO-8859-1?B?5Q==?=sbord'
@@ -2769,7 +2769,7 @@
         eq('hello w\xf6rld', c.body_encode('hello w\xf6rld'))
 
     def test_unicode_charset_name(self):
-        charset = Charset(u'us-ascii')
+        charset = Charset('us-ascii')
         self.assertEqual(str(charset), 'us-ascii')
         self.assertRaises(errors.CharsetError, Charset, 'asc\xffii')
 
@@ -2808,7 +2808,7 @@
         utf8 = Charset("utf-8")
         g_head = "Die Mieter treten hier ein werden mit einem Foerderband komfortabel den Korridor entlang, an s\xfcdl\xfcndischen Wandgem\xe4lden vorbei, gegen die rotierenden Klingen bef\xf6rdert. "
         cz_head = "Finan\xe8ni metropole se hroutily pod tlakem jejich d\xf9vtipu.. "
-        utf8_head = u"\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
+        utf8_head = "\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das Nunstuck git und Slotermeyer? Ja! Beiherhund das Oder die Flipperwaldt gersput.\u300d\u3068\u8a00\u3063\u3066\u3044\u307e\u3059\u3002".encode("utf-8")
         h = Header(g_head, g)
         h.append(cz_head, cz)
         h.append(utf8_head, utf8)
@@ -2828,7 +2828,7 @@
         eq(decode_header(enc),
            [(g_head, "iso-8859-1"), (cz_head, "iso-8859-2"),
             (utf8_head, "utf-8")])
-        ustr = unicode(h)
+        ustr = str(h)
         eq(ustr.encode('utf-8'),
            'Die Mieter treten hier ein werden mit einem Foerderband '
            'komfortabel den Korridor entlang, an s\xc3\xbcdl\xc3\xbcndischen '
@@ -2896,9 +2896,9 @@
 
     def test_utf8_shortest(self):
         eq = self.assertEqual
-        h = Header(u'p\xf6stal', 'utf-8')
+        h = Header('p\xf6stal', 'utf-8')
         eq(h.encode(), '=?utf-8?q?p=C3=B6stal?=')
-        h = Header(u'\u83ca\u5730\u6642\u592b', 'utf-8')
+        h = Header('\u83ca\u5730\u6642\u592b', 'utf-8')
         eq(h.encode(), '=?utf-8?b?6I+K5Zyw5pmC5aSr?=')
 
     def test_bad_8bit_header(self):
@@ -3151,7 +3151,7 @@
 '''
         msg = email.message_from_string(m)
         self.assertEqual(msg.get_filename(),
-                         u'This is even more ***fun*** is it not.pdf\ufffd')
+                         'This is even more ***fun*** is it not.pdf\ufffd')
 
     def test_rfc2231_unknown_encoding(self):
         m = """\

Modified: python/branches/py3k-struni/Lib/email/utils.py
==============================================================================
--- python/branches/py3k-struni/Lib/email/utils.py	(original)
+++ python/branches/py3k-struni/Lib/email/utils.py	Wed May  2 21:09:54 2007
@@ -44,7 +44,7 @@
 
 COMMASPACE = ', '
 EMPTYSTRING = ''
-UEMPTYSTRING = u''
+UEMPTYSTRING = ''
 CRLF = '\r\n'
 TICK = "'"
 
@@ -315,9 +315,9 @@
         rawval = unquote(value[2])
         charset = value[0] or 'us-ascii'
         try:
-            return unicode(rawval, charset, errors)
+            return str(rawval, charset, errors)
         except LookupError:
             # XXX charset is unknown to Python.
-            return unicode(rawval, fallback_charset, errors)
+            return str(rawval, fallback_charset, errors)
     else:
         return unquote(value)

Modified: python/branches/py3k-struni/Lib/encodings/__init__.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/__init__.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/__init__.py	Wed May  2 21:09:54 2007
@@ -60,7 +60,7 @@
     """
     # Make sure we have an 8-bit string, because .translate() works
     # differently for Unicode strings.
-    if isinstance(encoding, unicode):
+    if isinstance(encoding, str):
         # Note that .encode('latin-1') does *not* use the codec
         # registry, so this call doesn't recurse. (See unicodeobject.c
         # PyUnicode_AsEncodedString() for details)

Modified: python/branches/py3k-struni/Lib/encodings/cp037.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp037.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp037.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x9c'     #  0x04 -> CONTROL
-    u'\t'       #  0x05 -> HORIZONTAL TABULATION
-    u'\x86'     #  0x06 -> CONTROL
-    u'\x7f'     #  0x07 -> DELETE
-    u'\x97'     #  0x08 -> CONTROL
-    u'\x8d'     #  0x09 -> CONTROL
-    u'\x8e'     #  0x0A -> CONTROL
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x9d'     #  0x14 -> CONTROL
-    u'\x85'     #  0x15 -> CONTROL
-    u'\x08'     #  0x16 -> BACKSPACE
-    u'\x87'     #  0x17 -> CONTROL
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x92'     #  0x1A -> CONTROL
-    u'\x8f'     #  0x1B -> CONTROL
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u'\x80'     #  0x20 -> CONTROL
-    u'\x81'     #  0x21 -> CONTROL
-    u'\x82'     #  0x22 -> CONTROL
-    u'\x83'     #  0x23 -> CONTROL
-    u'\x84'     #  0x24 -> CONTROL
-    u'\n'       #  0x25 -> LINE FEED
-    u'\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
-    u'\x1b'     #  0x27 -> ESCAPE
-    u'\x88'     #  0x28 -> CONTROL
-    u'\x89'     #  0x29 -> CONTROL
-    u'\x8a'     #  0x2A -> CONTROL
-    u'\x8b'     #  0x2B -> CONTROL
-    u'\x8c'     #  0x2C -> CONTROL
-    u'\x05'     #  0x2D -> ENQUIRY
-    u'\x06'     #  0x2E -> ACKNOWLEDGE
-    u'\x07'     #  0x2F -> BELL
-    u'\x90'     #  0x30 -> CONTROL
-    u'\x91'     #  0x31 -> CONTROL
-    u'\x16'     #  0x32 -> SYNCHRONOUS IDLE
-    u'\x93'     #  0x33 -> CONTROL
-    u'\x94'     #  0x34 -> CONTROL
-    u'\x95'     #  0x35 -> CONTROL
-    u'\x96'     #  0x36 -> CONTROL
-    u'\x04'     #  0x37 -> END OF TRANSMISSION
-    u'\x98'     #  0x38 -> CONTROL
-    u'\x99'     #  0x39 -> CONTROL
-    u'\x9a'     #  0x3A -> CONTROL
-    u'\x9b'     #  0x3B -> CONTROL
-    u'\x14'     #  0x3C -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
-    u'\x9e'     #  0x3E -> CONTROL
-    u'\x1a'     #  0x3F -> SUBSTITUTE
-    u' '        #  0x40 -> SPACE
-    u'\xa0'     #  0x41 -> NO-BREAK SPACE
-    u'\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x48 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xa2'     #  0x4A -> CENT SIGN
-    u'.'        #  0x4B -> FULL STOP
-    u'<'        #  0x4C -> LESS-THAN SIGN
-    u'('        #  0x4D -> LEFT PARENTHESIS
-    u'+'        #  0x4E -> PLUS SIGN
-    u'|'        #  0x4F -> VERTICAL LINE
-    u'&'        #  0x50 -> AMPERSAND
-    u'\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
-    u'!'        #  0x5A -> EXCLAMATION MARK
-    u'$'        #  0x5B -> DOLLAR SIGN
-    u'*'        #  0x5C -> ASTERISK
-    u')'        #  0x5D -> RIGHT PARENTHESIS
-    u';'        #  0x5E -> SEMICOLON
-    u'\xac'     #  0x5F -> NOT SIGN
-    u'-'        #  0x60 -> HYPHEN-MINUS
-    u'/'        #  0x61 -> SOLIDUS
-    u'\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xa6'     #  0x6A -> BROKEN BAR
-    u','        #  0x6B -> COMMA
-    u'%'        #  0x6C -> PERCENT SIGN
-    u'_'        #  0x6D -> LOW LINE
-    u'>'        #  0x6E -> GREATER-THAN SIGN
-    u'?'        #  0x6F -> QUESTION MARK
-    u'\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'`'        #  0x79 -> GRAVE ACCENT
-    u':'        #  0x7A -> COLON
-    u'#'        #  0x7B -> NUMBER SIGN
-    u'@'        #  0x7C -> COMMERCIAL AT
-    u"'"        #  0x7D -> APOSTROPHE
-    u'='        #  0x7E -> EQUALS SIGN
-    u'"'        #  0x7F -> QUOTATION MARK
-    u'\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'a'        #  0x81 -> LATIN SMALL LETTER A
-    u'b'        #  0x82 -> LATIN SMALL LETTER B
-    u'c'        #  0x83 -> LATIN SMALL LETTER C
-    u'd'        #  0x84 -> LATIN SMALL LETTER D
-    u'e'        #  0x85 -> LATIN SMALL LETTER E
-    u'f'        #  0x86 -> LATIN SMALL LETTER F
-    u'g'        #  0x87 -> LATIN SMALL LETTER G
-    u'h'        #  0x88 -> LATIN SMALL LETTER H
-    u'i'        #  0x89 -> LATIN SMALL LETTER I
-    u'\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xf0'     #  0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
-    u'\xfd'     #  0x8D -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xfe'     #  0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
-    u'\xb1'     #  0x8F -> PLUS-MINUS SIGN
-    u'\xb0'     #  0x90 -> DEGREE SIGN
-    u'j'        #  0x91 -> LATIN SMALL LETTER J
-    u'k'        #  0x92 -> LATIN SMALL LETTER K
-    u'l'        #  0x93 -> LATIN SMALL LETTER L
-    u'm'        #  0x94 -> LATIN SMALL LETTER M
-    u'n'        #  0x95 -> LATIN SMALL LETTER N
-    u'o'        #  0x96 -> LATIN SMALL LETTER O
-    u'p'        #  0x97 -> LATIN SMALL LETTER P
-    u'q'        #  0x98 -> LATIN SMALL LETTER Q
-    u'r'        #  0x99 -> LATIN SMALL LETTER R
-    u'\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
-    u'\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
-    u'\xb8'     #  0x9D -> CEDILLA
-    u'\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
-    u'\xa4'     #  0x9F -> CURRENCY SIGN
-    u'\xb5'     #  0xA0 -> MICRO SIGN
-    u'~'        #  0xA1 -> TILDE
-    u's'        #  0xA2 -> LATIN SMALL LETTER S
-    u't'        #  0xA3 -> LATIN SMALL LETTER T
-    u'u'        #  0xA4 -> LATIN SMALL LETTER U
-    u'v'        #  0xA5 -> LATIN SMALL LETTER V
-    u'w'        #  0xA6 -> LATIN SMALL LETTER W
-    u'x'        #  0xA7 -> LATIN SMALL LETTER X
-    u'y'        #  0xA8 -> LATIN SMALL LETTER Y
-    u'z'        #  0xA9 -> LATIN SMALL LETTER Z
-    u'\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
-    u'\xbf'     #  0xAB -> INVERTED QUESTION MARK
-    u'\xd0'     #  0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
-    u'\xdd'     #  0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xde'     #  0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
-    u'\xae'     #  0xAF -> REGISTERED SIGN
-    u'^'        #  0xB0 -> CIRCUMFLEX ACCENT
-    u'\xa3'     #  0xB1 -> POUND SIGN
-    u'\xa5'     #  0xB2 -> YEN SIGN
-    u'\xb7'     #  0xB3 -> MIDDLE DOT
-    u'\xa9'     #  0xB4 -> COPYRIGHT SIGN
-    u'\xa7'     #  0xB5 -> SECTION SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
-    u'['        #  0xBA -> LEFT SQUARE BRACKET
-    u']'        #  0xBB -> RIGHT SQUARE BRACKET
-    u'\xaf'     #  0xBC -> MACRON
-    u'\xa8'     #  0xBD -> DIAERESIS
-    u'\xb4'     #  0xBE -> ACUTE ACCENT
-    u'\xd7'     #  0xBF -> MULTIPLICATION SIGN
-    u'{'        #  0xC0 -> LEFT CURLY BRACKET
-    u'A'        #  0xC1 -> LATIN CAPITAL LETTER A
-    u'B'        #  0xC2 -> LATIN CAPITAL LETTER B
-    u'C'        #  0xC3 -> LATIN CAPITAL LETTER C
-    u'D'        #  0xC4 -> LATIN CAPITAL LETTER D
-    u'E'        #  0xC5 -> LATIN CAPITAL LETTER E
-    u'F'        #  0xC6 -> LATIN CAPITAL LETTER F
-    u'G'        #  0xC7 -> LATIN CAPITAL LETTER G
-    u'H'        #  0xC8 -> LATIN CAPITAL LETTER H
-    u'I'        #  0xC9 -> LATIN CAPITAL LETTER I
-    u'\xad'     #  0xCA -> SOFT HYPHEN
-    u'\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
-    u'}'        #  0xD0 -> RIGHT CURLY BRACKET
-    u'J'        #  0xD1 -> LATIN CAPITAL LETTER J
-    u'K'        #  0xD2 -> LATIN CAPITAL LETTER K
-    u'L'        #  0xD3 -> LATIN CAPITAL LETTER L
-    u'M'        #  0xD4 -> LATIN CAPITAL LETTER M
-    u'N'        #  0xD5 -> LATIN CAPITAL LETTER N
-    u'O'        #  0xD6 -> LATIN CAPITAL LETTER O
-    u'P'        #  0xD7 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0xD9 -> LATIN CAPITAL LETTER R
-    u'\xb9'     #  0xDA -> SUPERSCRIPT ONE
-    u'\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\\'       #  0xE0 -> REVERSE SOLIDUS
-    u'\xf7'     #  0xE1 -> DIVISION SIGN
-    u'S'        #  0xE2 -> LATIN CAPITAL LETTER S
-    u'T'        #  0xE3 -> LATIN CAPITAL LETTER T
-    u'U'        #  0xE4 -> LATIN CAPITAL LETTER U
-    u'V'        #  0xE5 -> LATIN CAPITAL LETTER V
-    u'W'        #  0xE6 -> LATIN CAPITAL LETTER W
-    u'X'        #  0xE7 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
-    u'\xb2'     #  0xEA -> SUPERSCRIPT TWO
-    u'\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd6'     #  0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
-    u'0'        #  0xF0 -> DIGIT ZERO
-    u'1'        #  0xF1 -> DIGIT ONE
-    u'2'        #  0xF2 -> DIGIT TWO
-    u'3'        #  0xF3 -> DIGIT THREE
-    u'4'        #  0xF4 -> DIGIT FOUR
-    u'5'        #  0xF5 -> DIGIT FIVE
-    u'6'        #  0xF6 -> DIGIT SIX
-    u'7'        #  0xF7 -> DIGIT SEVEN
-    u'8'        #  0xF8 -> DIGIT EIGHT
-    u'9'        #  0xF9 -> DIGIT NINE
-    u'\xb3'     #  0xFA -> SUPERSCRIPT THREE
-    u'\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\x9f'     #  0xFF -> CONTROL
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x9c'     #  0x04 -> CONTROL
+    '\t'       #  0x05 -> HORIZONTAL TABULATION
+    '\x86'     #  0x06 -> CONTROL
+    '\x7f'     #  0x07 -> DELETE
+    '\x97'     #  0x08 -> CONTROL
+    '\x8d'     #  0x09 -> CONTROL
+    '\x8e'     #  0x0A -> CONTROL
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x9d'     #  0x14 -> CONTROL
+    '\x85'     #  0x15 -> CONTROL
+    '\x08'     #  0x16 -> BACKSPACE
+    '\x87'     #  0x17 -> CONTROL
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x92'     #  0x1A -> CONTROL
+    '\x8f'     #  0x1B -> CONTROL
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    '\x80'     #  0x20 -> CONTROL
+    '\x81'     #  0x21 -> CONTROL
+    '\x82'     #  0x22 -> CONTROL
+    '\x83'     #  0x23 -> CONTROL
+    '\x84'     #  0x24 -> CONTROL
+    '\n'       #  0x25 -> LINE FEED
+    '\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
+    '\x1b'     #  0x27 -> ESCAPE
+    '\x88'     #  0x28 -> CONTROL
+    '\x89'     #  0x29 -> CONTROL
+    '\x8a'     #  0x2A -> CONTROL
+    '\x8b'     #  0x2B -> CONTROL
+    '\x8c'     #  0x2C -> CONTROL
+    '\x05'     #  0x2D -> ENQUIRY
+    '\x06'     #  0x2E -> ACKNOWLEDGE
+    '\x07'     #  0x2F -> BELL
+    '\x90'     #  0x30 -> CONTROL
+    '\x91'     #  0x31 -> CONTROL
+    '\x16'     #  0x32 -> SYNCHRONOUS IDLE
+    '\x93'     #  0x33 -> CONTROL
+    '\x94'     #  0x34 -> CONTROL
+    '\x95'     #  0x35 -> CONTROL
+    '\x96'     #  0x36 -> CONTROL
+    '\x04'     #  0x37 -> END OF TRANSMISSION
+    '\x98'     #  0x38 -> CONTROL
+    '\x99'     #  0x39 -> CONTROL
+    '\x9a'     #  0x3A -> CONTROL
+    '\x9b'     #  0x3B -> CONTROL
+    '\x14'     #  0x3C -> DEVICE CONTROL FOUR
+    '\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
+    '\x9e'     #  0x3E -> CONTROL
+    '\x1a'     #  0x3F -> SUBSTITUTE
+    ' '        #  0x40 -> SPACE
+    '\xa0'     #  0x41 -> NO-BREAK SPACE
+    '\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
+    '\xa2'     #  0x4A -> CENT SIGN
+    '.'        #  0x4B -> FULL STOP
+    '<'        #  0x4C -> LESS-THAN SIGN
+    '('        #  0x4D -> LEFT PARENTHESIS
+    '+'        #  0x4E -> PLUS SIGN
+    '|'        #  0x4F -> VERTICAL LINE
+    '&'        #  0x50 -> AMPERSAND
+    '\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+    '!'        #  0x5A -> EXCLAMATION MARK
+    '$'        #  0x5B -> DOLLAR SIGN
+    '*'        #  0x5C -> ASTERISK
+    ')'        #  0x5D -> RIGHT PARENTHESIS
+    ';'        #  0x5E -> SEMICOLON
+    '\xac'     #  0x5F -> NOT SIGN
+    '-'        #  0x60 -> HYPHEN-MINUS
+    '/'        #  0x61 -> SOLIDUS
+    '\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xa6'     #  0x6A -> BROKEN BAR
+    ','        #  0x6B -> COMMA
+    '%'        #  0x6C -> PERCENT SIGN
+    '_'        #  0x6D -> LOW LINE
+    '>'        #  0x6E -> GREATER-THAN SIGN
+    '?'        #  0x6F -> QUESTION MARK
+    '\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
+    '\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+    '`'        #  0x79 -> GRAVE ACCENT
+    ':'        #  0x7A -> COLON
+    '#'        #  0x7B -> NUMBER SIGN
+    '@'        #  0x7C -> COMMERCIAL AT
+    "'"        #  0x7D -> APOSTROPHE
+    '='        #  0x7E -> EQUALS SIGN
+    '"'        #  0x7F -> QUOTATION MARK
+    '\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+    'a'        #  0x81 -> LATIN SMALL LETTER A
+    'b'        #  0x82 -> LATIN SMALL LETTER B
+    'c'        #  0x83 -> LATIN SMALL LETTER C
+    'd'        #  0x84 -> LATIN SMALL LETTER D
+    'e'        #  0x85 -> LATIN SMALL LETTER E
+    'f'        #  0x86 -> LATIN SMALL LETTER F
+    'g'        #  0x87 -> LATIN SMALL LETTER G
+    'h'        #  0x88 -> LATIN SMALL LETTER H
+    'i'        #  0x89 -> LATIN SMALL LETTER I
+    '\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xf0'     #  0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+    '\xfd'     #  0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xfe'     #  0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+    '\xb1'     #  0x8F -> PLUS-MINUS SIGN
+    '\xb0'     #  0x90 -> DEGREE SIGN
+    'j'        #  0x91 -> LATIN SMALL LETTER J
+    'k'        #  0x92 -> LATIN SMALL LETTER K
+    'l'        #  0x93 -> LATIN SMALL LETTER L
+    'm'        #  0x94 -> LATIN SMALL LETTER M
+    'n'        #  0x95 -> LATIN SMALL LETTER N
+    'o'        #  0x96 -> LATIN SMALL LETTER O
+    'p'        #  0x97 -> LATIN SMALL LETTER P
+    'q'        #  0x98 -> LATIN SMALL LETTER Q
+    'r'        #  0x99 -> LATIN SMALL LETTER R
+    '\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
+    '\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
+    '\xb8'     #  0x9D -> CEDILLA
+    '\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
+    '\xa4'     #  0x9F -> CURRENCY SIGN
+    '\xb5'     #  0xA0 -> MICRO SIGN
+    '~'        #  0xA1 -> TILDE
+    's'        #  0xA2 -> LATIN SMALL LETTER S
+    't'        #  0xA3 -> LATIN SMALL LETTER T
+    'u'        #  0xA4 -> LATIN SMALL LETTER U
+    'v'        #  0xA5 -> LATIN SMALL LETTER V
+    'w'        #  0xA6 -> LATIN SMALL LETTER W
+    'x'        #  0xA7 -> LATIN SMALL LETTER X
+    'y'        #  0xA8 -> LATIN SMALL LETTER Y
+    'z'        #  0xA9 -> LATIN SMALL LETTER Z
+    '\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
+    '\xbf'     #  0xAB -> INVERTED QUESTION MARK
+    '\xd0'     #  0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+    '\xdd'     #  0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xde'     #  0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+    '\xae'     #  0xAF -> REGISTERED SIGN
+    '^'        #  0xB0 -> CIRCUMFLEX ACCENT
+    '\xa3'     #  0xB1 -> POUND SIGN
+    '\xa5'     #  0xB2 -> YEN SIGN
+    '\xb7'     #  0xB3 -> MIDDLE DOT
+    '\xa9'     #  0xB4 -> COPYRIGHT SIGN
+    '\xa7'     #  0xB5 -> SECTION SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
+    '['        #  0xBA -> LEFT SQUARE BRACKET
+    ']'        #  0xBB -> RIGHT SQUARE BRACKET
+    '\xaf'     #  0xBC -> MACRON
+    '\xa8'     #  0xBD -> DIAERESIS
+    '\xb4'     #  0xBE -> ACUTE ACCENT
+    '\xd7'     #  0xBF -> MULTIPLICATION SIGN
+    '{'        #  0xC0 -> LEFT CURLY BRACKET
+    'A'        #  0xC1 -> LATIN CAPITAL LETTER A
+    'B'        #  0xC2 -> LATIN CAPITAL LETTER B
+    'C'        #  0xC3 -> LATIN CAPITAL LETTER C
+    'D'        #  0xC4 -> LATIN CAPITAL LETTER D
+    'E'        #  0xC5 -> LATIN CAPITAL LETTER E
+    'F'        #  0xC6 -> LATIN CAPITAL LETTER F
+    'G'        #  0xC7 -> LATIN CAPITAL LETTER G
+    'H'        #  0xC8 -> LATIN CAPITAL LETTER H
+    'I'        #  0xC9 -> LATIN CAPITAL LETTER I
+    '\xad'     #  0xCA -> SOFT HYPHEN
+    '\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
+    '}'        #  0xD0 -> RIGHT CURLY BRACKET
+    'J'        #  0xD1 -> LATIN CAPITAL LETTER J
+    'K'        #  0xD2 -> LATIN CAPITAL LETTER K
+    'L'        #  0xD3 -> LATIN CAPITAL LETTER L
+    'M'        #  0xD4 -> LATIN CAPITAL LETTER M
+    'N'        #  0xD5 -> LATIN CAPITAL LETTER N
+    'O'        #  0xD6 -> LATIN CAPITAL LETTER O
+    'P'        #  0xD7 -> LATIN CAPITAL LETTER P
+    'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
+    'R'        #  0xD9 -> LATIN CAPITAL LETTER R
+    '\xb9'     #  0xDA -> SUPERSCRIPT ONE
+    '\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
+    '\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\\'       #  0xE0 -> REVERSE SOLIDUS
+    '\xf7'     #  0xE1 -> DIVISION SIGN
+    'S'        #  0xE2 -> LATIN CAPITAL LETTER S
+    'T'        #  0xE3 -> LATIN CAPITAL LETTER T
+    'U'        #  0xE4 -> LATIN CAPITAL LETTER U
+    'V'        #  0xE5 -> LATIN CAPITAL LETTER V
+    'W'        #  0xE6 -> LATIN CAPITAL LETTER W
+    'X'        #  0xE7 -> LATIN CAPITAL LETTER X
+    'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
+    '\xb2'     #  0xEA -> SUPERSCRIPT TWO
+    '\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd6'     #  0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+    '0'        #  0xF0 -> DIGIT ZERO
+    '1'        #  0xF1 -> DIGIT ONE
+    '2'        #  0xF2 -> DIGIT TWO
+    '3'        #  0xF3 -> DIGIT THREE
+    '4'        #  0xF4 -> DIGIT FOUR
+    '5'        #  0xF5 -> DIGIT FIVE
+    '6'        #  0xF6 -> DIGIT SIX
+    '7'        #  0xF7 -> DIGIT SEVEN
+    '8'        #  0xF8 -> DIGIT EIGHT
+    '9'        #  0xF9 -> DIGIT NINE
+    '\xb3'     #  0xFA -> SUPERSCRIPT THREE
+    '\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\x9f'     #  0xFF -> CONTROL
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1006.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1006.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1006.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u06f0'   #  0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO
-    u'\u06f1'   #  0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE
-    u'\u06f2'   #  0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO
-    u'\u06f3'   #  0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE
-    u'\u06f4'   #  0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR
-    u'\u06f5'   #  0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE
-    u'\u06f6'   #  0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX
-    u'\u06f7'   #  0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN
-    u'\u06f8'   #  0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT
-    u'\u06f9'   #  0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE
-    u'\u060c'   #  0xAB -> ARABIC COMMA
-    u'\u061b'   #  0xAC -> ARABIC SEMICOLON
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\u061f'   #  0xAE -> ARABIC QUESTION MARK
-    u'\ufe81'   #  0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
-    u'\ufe8d'   #  0xB0 -> ARABIC LETTER ALEF ISOLATED FORM
-    u'\ufe8e'   #  0xB1 -> ARABIC LETTER ALEF FINAL FORM
-    u'\ufe8e'   #  0xB2 -> ARABIC LETTER ALEF FINAL FORM
-    u'\ufe8f'   #  0xB3 -> ARABIC LETTER BEH ISOLATED FORM
-    u'\ufe91'   #  0xB4 -> ARABIC LETTER BEH INITIAL FORM
-    u'\ufb56'   #  0xB5 -> ARABIC LETTER PEH ISOLATED FORM
-    u'\ufb58'   #  0xB6 -> ARABIC LETTER PEH INITIAL FORM
-    u'\ufe93'   #  0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
-    u'\ufe95'   #  0xB8 -> ARABIC LETTER TEH ISOLATED FORM
-    u'\ufe97'   #  0xB9 -> ARABIC LETTER TEH INITIAL FORM
-    u'\ufb66'   #  0xBA -> ARABIC LETTER TTEH ISOLATED FORM
-    u'\ufb68'   #  0xBB -> ARABIC LETTER TTEH INITIAL FORM
-    u'\ufe99'   #  0xBC -> ARABIC LETTER THEH ISOLATED FORM
-    u'\ufe9b'   #  0xBD -> ARABIC LETTER THEH INITIAL FORM
-    u'\ufe9d'   #  0xBE -> ARABIC LETTER JEEM ISOLATED FORM
-    u'\ufe9f'   #  0xBF -> ARABIC LETTER JEEM INITIAL FORM
-    u'\ufb7a'   #  0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM
-    u'\ufb7c'   #  0xC1 -> ARABIC LETTER TCHEH INITIAL FORM
-    u'\ufea1'   #  0xC2 -> ARABIC LETTER HAH ISOLATED FORM
-    u'\ufea3'   #  0xC3 -> ARABIC LETTER HAH INITIAL FORM
-    u'\ufea5'   #  0xC4 -> ARABIC LETTER KHAH ISOLATED FORM
-    u'\ufea7'   #  0xC5 -> ARABIC LETTER KHAH INITIAL FORM
-    u'\ufea9'   #  0xC6 -> ARABIC LETTER DAL ISOLATED FORM
-    u'\ufb84'   #  0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN
-    u'\ufeab'   #  0xC8 -> ARABIC LETTER THAL ISOLATED FORM
-    u'\ufead'   #  0xC9 -> ARABIC LETTER REH ISOLATED FORM
-    u'\ufb8c'   #  0xCA -> ARABIC LETTER RREH ISOLATED FORM
-    u'\ufeaf'   #  0xCB -> ARABIC LETTER ZAIN ISOLATED FORM
-    u'\ufb8a'   #  0xCC -> ARABIC LETTER JEH ISOLATED FORM
-    u'\ufeb1'   #  0xCD -> ARABIC LETTER SEEN ISOLATED FORM
-    u'\ufeb3'   #  0xCE -> ARABIC LETTER SEEN INITIAL FORM
-    u'\ufeb5'   #  0xCF -> ARABIC LETTER SHEEN ISOLATED FORM
-    u'\ufeb7'   #  0xD0 -> ARABIC LETTER SHEEN INITIAL FORM
-    u'\ufeb9'   #  0xD1 -> ARABIC LETTER SAD ISOLATED FORM
-    u'\ufebb'   #  0xD2 -> ARABIC LETTER SAD INITIAL FORM
-    u'\ufebd'   #  0xD3 -> ARABIC LETTER DAD ISOLATED FORM
-    u'\ufebf'   #  0xD4 -> ARABIC LETTER DAD INITIAL FORM
-    u'\ufec1'   #  0xD5 -> ARABIC LETTER TAH ISOLATED FORM
-    u'\ufec5'   #  0xD6 -> ARABIC LETTER ZAH ISOLATED FORM
-    u'\ufec9'   #  0xD7 -> ARABIC LETTER AIN ISOLATED FORM
-    u'\ufeca'   #  0xD8 -> ARABIC LETTER AIN FINAL FORM
-    u'\ufecb'   #  0xD9 -> ARABIC LETTER AIN INITIAL FORM
-    u'\ufecc'   #  0xDA -> ARABIC LETTER AIN MEDIAL FORM
-    u'\ufecd'   #  0xDB -> ARABIC LETTER GHAIN ISOLATED FORM
-    u'\ufece'   #  0xDC -> ARABIC LETTER GHAIN FINAL FORM
-    u'\ufecf'   #  0xDD -> ARABIC LETTER GHAIN INITIAL FORM
-    u'\ufed0'   #  0xDE -> ARABIC LETTER GHAIN MEDIAL FORM
-    u'\ufed1'   #  0xDF -> ARABIC LETTER FEH ISOLATED FORM
-    u'\ufed3'   #  0xE0 -> ARABIC LETTER FEH INITIAL FORM
-    u'\ufed5'   #  0xE1 -> ARABIC LETTER QAF ISOLATED FORM
-    u'\ufed7'   #  0xE2 -> ARABIC LETTER QAF INITIAL FORM
-    u'\ufed9'   #  0xE3 -> ARABIC LETTER KAF ISOLATED FORM
-    u'\ufedb'   #  0xE4 -> ARABIC LETTER KAF INITIAL FORM
-    u'\ufb92'   #  0xE5 -> ARABIC LETTER GAF ISOLATED FORM
-    u'\ufb94'   #  0xE6 -> ARABIC LETTER GAF INITIAL FORM
-    u'\ufedd'   #  0xE7 -> ARABIC LETTER LAM ISOLATED FORM
-    u'\ufedf'   #  0xE8 -> ARABIC LETTER LAM INITIAL FORM
-    u'\ufee0'   #  0xE9 -> ARABIC LETTER LAM MEDIAL FORM
-    u'\ufee1'   #  0xEA -> ARABIC LETTER MEEM ISOLATED FORM
-    u'\ufee3'   #  0xEB -> ARABIC LETTER MEEM INITIAL FORM
-    u'\ufb9e'   #  0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM
-    u'\ufee5'   #  0xED -> ARABIC LETTER NOON ISOLATED FORM
-    u'\ufee7'   #  0xEE -> ARABIC LETTER NOON INITIAL FORM
-    u'\ufe85'   #  0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
-    u'\ufeed'   #  0xF0 -> ARABIC LETTER WAW ISOLATED FORM
-    u'\ufba6'   #  0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM
-    u'\ufba8'   #  0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM
-    u'\ufba9'   #  0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM
-    u'\ufbaa'   #  0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
-    u'\ufe80'   #  0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM
-    u'\ufe89'   #  0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
-    u'\ufe8a'   #  0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
-    u'\ufe8b'   #  0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
-    u'\ufef1'   #  0xF9 -> ARABIC LETTER YEH ISOLATED FORM
-    u'\ufef2'   #  0xFA -> ARABIC LETTER YEH FINAL FORM
-    u'\ufef3'   #  0xFB -> ARABIC LETTER YEH INITIAL FORM
-    u'\ufbb0'   #  0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
-    u'\ufbae'   #  0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM
-    u'\ufe7c'   #  0xFE -> ARABIC SHADDA ISOLATED FORM
-    u'\ufe7d'   #  0xFF -> ARABIC SHADDA MEDIAL FORM
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u06f0'   #  0xA1 -> EXTENDED ARABIC-INDIC DIGIT ZERO
+    '\u06f1'   #  0xA2 -> EXTENDED ARABIC-INDIC DIGIT ONE
+    '\u06f2'   #  0xA3 -> EXTENDED ARABIC-INDIC DIGIT TWO
+    '\u06f3'   #  0xA4 -> EXTENDED ARABIC-INDIC DIGIT THREE
+    '\u06f4'   #  0xA5 -> EXTENDED ARABIC-INDIC DIGIT FOUR
+    '\u06f5'   #  0xA6 -> EXTENDED ARABIC-INDIC DIGIT FIVE
+    '\u06f6'   #  0xA7 -> EXTENDED ARABIC-INDIC DIGIT SIX
+    '\u06f7'   #  0xA8 -> EXTENDED ARABIC-INDIC DIGIT SEVEN
+    '\u06f8'   #  0xA9 -> EXTENDED ARABIC-INDIC DIGIT EIGHT
+    '\u06f9'   #  0xAA -> EXTENDED ARABIC-INDIC DIGIT NINE
+    '\u060c'   #  0xAB -> ARABIC COMMA
+    '\u061b'   #  0xAC -> ARABIC SEMICOLON
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\u061f'   #  0xAE -> ARABIC QUESTION MARK
+    '\ufe81'   #  0xAF -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+    '\ufe8d'   #  0xB0 -> ARABIC LETTER ALEF ISOLATED FORM
+    '\ufe8e'   #  0xB1 -> ARABIC LETTER ALEF FINAL FORM
+    '\ufe8e'   #  0xB2 -> ARABIC LETTER ALEF FINAL FORM
+    '\ufe8f'   #  0xB3 -> ARABIC LETTER BEH ISOLATED FORM
+    '\ufe91'   #  0xB4 -> ARABIC LETTER BEH INITIAL FORM
+    '\ufb56'   #  0xB5 -> ARABIC LETTER PEH ISOLATED FORM
+    '\ufb58'   #  0xB6 -> ARABIC LETTER PEH INITIAL FORM
+    '\ufe93'   #  0xB7 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
+    '\ufe95'   #  0xB8 -> ARABIC LETTER TEH ISOLATED FORM
+    '\ufe97'   #  0xB9 -> ARABIC LETTER TEH INITIAL FORM
+    '\ufb66'   #  0xBA -> ARABIC LETTER TTEH ISOLATED FORM
+    '\ufb68'   #  0xBB -> ARABIC LETTER TTEH INITIAL FORM
+    '\ufe99'   #  0xBC -> ARABIC LETTER THEH ISOLATED FORM
+    '\ufe9b'   #  0xBD -> ARABIC LETTER THEH INITIAL FORM
+    '\ufe9d'   #  0xBE -> ARABIC LETTER JEEM ISOLATED FORM
+    '\ufe9f'   #  0xBF -> ARABIC LETTER JEEM INITIAL FORM
+    '\ufb7a'   #  0xC0 -> ARABIC LETTER TCHEH ISOLATED FORM
+    '\ufb7c'   #  0xC1 -> ARABIC LETTER TCHEH INITIAL FORM
+    '\ufea1'   #  0xC2 -> ARABIC LETTER HAH ISOLATED FORM
+    '\ufea3'   #  0xC3 -> ARABIC LETTER HAH INITIAL FORM
+    '\ufea5'   #  0xC4 -> ARABIC LETTER KHAH ISOLATED FORM
+    '\ufea7'   #  0xC5 -> ARABIC LETTER KHAH INITIAL FORM
+    '\ufea9'   #  0xC6 -> ARABIC LETTER DAL ISOLATED FORM
+    '\ufb84'   #  0xC7 -> ARABIC LETTER DAHAL ISOLATED FORMN
+    '\ufeab'   #  0xC8 -> ARABIC LETTER THAL ISOLATED FORM
+    '\ufead'   #  0xC9 -> ARABIC LETTER REH ISOLATED FORM
+    '\ufb8c'   #  0xCA -> ARABIC LETTER RREH ISOLATED FORM
+    '\ufeaf'   #  0xCB -> ARABIC LETTER ZAIN ISOLATED FORM
+    '\ufb8a'   #  0xCC -> ARABIC LETTER JEH ISOLATED FORM
+    '\ufeb1'   #  0xCD -> ARABIC LETTER SEEN ISOLATED FORM
+    '\ufeb3'   #  0xCE -> ARABIC LETTER SEEN INITIAL FORM
+    '\ufeb5'   #  0xCF -> ARABIC LETTER SHEEN ISOLATED FORM
+    '\ufeb7'   #  0xD0 -> ARABIC LETTER SHEEN INITIAL FORM
+    '\ufeb9'   #  0xD1 -> ARABIC LETTER SAD ISOLATED FORM
+    '\ufebb'   #  0xD2 -> ARABIC LETTER SAD INITIAL FORM
+    '\ufebd'   #  0xD3 -> ARABIC LETTER DAD ISOLATED FORM
+    '\ufebf'   #  0xD4 -> ARABIC LETTER DAD INITIAL FORM
+    '\ufec1'   #  0xD5 -> ARABIC LETTER TAH ISOLATED FORM
+    '\ufec5'   #  0xD6 -> ARABIC LETTER ZAH ISOLATED FORM
+    '\ufec9'   #  0xD7 -> ARABIC LETTER AIN ISOLATED FORM
+    '\ufeca'   #  0xD8 -> ARABIC LETTER AIN FINAL FORM
+    '\ufecb'   #  0xD9 -> ARABIC LETTER AIN INITIAL FORM
+    '\ufecc'   #  0xDA -> ARABIC LETTER AIN MEDIAL FORM
+    '\ufecd'   #  0xDB -> ARABIC LETTER GHAIN ISOLATED FORM
+    '\ufece'   #  0xDC -> ARABIC LETTER GHAIN FINAL FORM
+    '\ufecf'   #  0xDD -> ARABIC LETTER GHAIN INITIAL FORM
+    '\ufed0'   #  0xDE -> ARABIC LETTER GHAIN MEDIAL FORM
+    '\ufed1'   #  0xDF -> ARABIC LETTER FEH ISOLATED FORM
+    '\ufed3'   #  0xE0 -> ARABIC LETTER FEH INITIAL FORM
+    '\ufed5'   #  0xE1 -> ARABIC LETTER QAF ISOLATED FORM
+    '\ufed7'   #  0xE2 -> ARABIC LETTER QAF INITIAL FORM
+    '\ufed9'   #  0xE3 -> ARABIC LETTER KAF ISOLATED FORM
+    '\ufedb'   #  0xE4 -> ARABIC LETTER KAF INITIAL FORM
+    '\ufb92'   #  0xE5 -> ARABIC LETTER GAF ISOLATED FORM
+    '\ufb94'   #  0xE6 -> ARABIC LETTER GAF INITIAL FORM
+    '\ufedd'   #  0xE7 -> ARABIC LETTER LAM ISOLATED FORM
+    '\ufedf'   #  0xE8 -> ARABIC LETTER LAM INITIAL FORM
+    '\ufee0'   #  0xE9 -> ARABIC LETTER LAM MEDIAL FORM
+    '\ufee1'   #  0xEA -> ARABIC LETTER MEEM ISOLATED FORM
+    '\ufee3'   #  0xEB -> ARABIC LETTER MEEM INITIAL FORM
+    '\ufb9e'   #  0xEC -> ARABIC LETTER NOON GHUNNA ISOLATED FORM
+    '\ufee5'   #  0xED -> ARABIC LETTER NOON ISOLATED FORM
+    '\ufee7'   #  0xEE -> ARABIC LETTER NOON INITIAL FORM
+    '\ufe85'   #  0xEF -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+    '\ufeed'   #  0xF0 -> ARABIC LETTER WAW ISOLATED FORM
+    '\ufba6'   #  0xF1 -> ARABIC LETTER HEH GOAL ISOLATED FORM
+    '\ufba8'   #  0xF2 -> ARABIC LETTER HEH GOAL INITIAL FORM
+    '\ufba9'   #  0xF3 -> ARABIC LETTER HEH GOAL MEDIAL FORM
+    '\ufbaa'   #  0xF4 -> ARABIC LETTER HEH DOACHASHMEE ISOLATED FORM
+    '\ufe80'   #  0xF5 -> ARABIC LETTER HAMZA ISOLATED FORM
+    '\ufe89'   #  0xF6 -> ARABIC LETTER YEH WITH HAMZA ABOVE ISOLATED FORM
+    '\ufe8a'   #  0xF7 -> ARABIC LETTER YEH WITH HAMZA ABOVE FINAL FORM
+    '\ufe8b'   #  0xF8 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+    '\ufef1'   #  0xF9 -> ARABIC LETTER YEH ISOLATED FORM
+    '\ufef2'   #  0xFA -> ARABIC LETTER YEH FINAL FORM
+    '\ufef3'   #  0xFB -> ARABIC LETTER YEH INITIAL FORM
+    '\ufbb0'   #  0xFC -> ARABIC LETTER YEH BARREE WITH HAMZA ABOVE ISOLATED FORM
+    '\ufbae'   #  0xFD -> ARABIC LETTER YEH BARREE ISOLATED FORM
+    '\ufe7c'   #  0xFE -> ARABIC SHADDA ISOLATED FORM
+    '\ufe7d'   #  0xFF -> ARABIC SHADDA MEDIAL FORM
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1026.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1026.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1026.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x9c'     #  0x04 -> CONTROL
-    u'\t'       #  0x05 -> HORIZONTAL TABULATION
-    u'\x86'     #  0x06 -> CONTROL
-    u'\x7f'     #  0x07 -> DELETE
-    u'\x97'     #  0x08 -> CONTROL
-    u'\x8d'     #  0x09 -> CONTROL
-    u'\x8e'     #  0x0A -> CONTROL
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x9d'     #  0x14 -> CONTROL
-    u'\x85'     #  0x15 -> CONTROL
-    u'\x08'     #  0x16 -> BACKSPACE
-    u'\x87'     #  0x17 -> CONTROL
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x92'     #  0x1A -> CONTROL
-    u'\x8f'     #  0x1B -> CONTROL
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u'\x80'     #  0x20 -> CONTROL
-    u'\x81'     #  0x21 -> CONTROL
-    u'\x82'     #  0x22 -> CONTROL
-    u'\x83'     #  0x23 -> CONTROL
-    u'\x84'     #  0x24 -> CONTROL
-    u'\n'       #  0x25 -> LINE FEED
-    u'\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
-    u'\x1b'     #  0x27 -> ESCAPE
-    u'\x88'     #  0x28 -> CONTROL
-    u'\x89'     #  0x29 -> CONTROL
-    u'\x8a'     #  0x2A -> CONTROL
-    u'\x8b'     #  0x2B -> CONTROL
-    u'\x8c'     #  0x2C -> CONTROL
-    u'\x05'     #  0x2D -> ENQUIRY
-    u'\x06'     #  0x2E -> ACKNOWLEDGE
-    u'\x07'     #  0x2F -> BELL
-    u'\x90'     #  0x30 -> CONTROL
-    u'\x91'     #  0x31 -> CONTROL
-    u'\x16'     #  0x32 -> SYNCHRONOUS IDLE
-    u'\x93'     #  0x33 -> CONTROL
-    u'\x94'     #  0x34 -> CONTROL
-    u'\x95'     #  0x35 -> CONTROL
-    u'\x96'     #  0x36 -> CONTROL
-    u'\x04'     #  0x37 -> END OF TRANSMISSION
-    u'\x98'     #  0x38 -> CONTROL
-    u'\x99'     #  0x39 -> CONTROL
-    u'\x9a'     #  0x3A -> CONTROL
-    u'\x9b'     #  0x3B -> CONTROL
-    u'\x14'     #  0x3C -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
-    u'\x9e'     #  0x3E -> CONTROL
-    u'\x1a'     #  0x3F -> SUBSTITUTE
-    u' '        #  0x40 -> SPACE
-    u'\xa0'     #  0x41 -> NO-BREAK SPACE
-    u'\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'{'        #  0x48 -> LEFT CURLY BRACKET
-    u'\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xc7'     #  0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'.'        #  0x4B -> FULL STOP
-    u'<'        #  0x4C -> LESS-THAN SIGN
-    u'('        #  0x4D -> LEFT PARENTHESIS
-    u'+'        #  0x4E -> PLUS SIGN
-    u'!'        #  0x4F -> EXCLAMATION MARK
-    u'&'        #  0x50 -> AMPERSAND
-    u'\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
-    u'\u011e'   #  0x5A -> LATIN CAPITAL LETTER G WITH BREVE
-    u'\u0130'   #  0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE
-    u'*'        #  0x5C -> ASTERISK
-    u')'        #  0x5D -> RIGHT PARENTHESIS
-    u';'        #  0x5E -> SEMICOLON
-    u'^'        #  0x5F -> CIRCUMFLEX ACCENT
-    u'-'        #  0x60 -> HYPHEN-MINUS
-    u'/'        #  0x61 -> SOLIDUS
-    u'\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'['        #  0x68 -> LEFT SQUARE BRACKET
-    u'\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\u015f'   #  0x6A -> LATIN SMALL LETTER S WITH CEDILLA
-    u','        #  0x6B -> COMMA
-    u'%'        #  0x6C -> PERCENT SIGN
-    u'_'        #  0x6D -> LOW LINE
-    u'>'        #  0x6E -> GREATER-THAN SIGN
-    u'?'        #  0x6F -> QUESTION MARK
-    u'\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\u0131'   #  0x79 -> LATIN SMALL LETTER DOTLESS I
-    u':'        #  0x7A -> COLON
-    u'\xd6'     #  0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\u015e'   #  0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u"'"        #  0x7D -> APOSTROPHE
-    u'='        #  0x7E -> EQUALS SIGN
-    u'\xdc'     #  0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'a'        #  0x81 -> LATIN SMALL LETTER A
-    u'b'        #  0x82 -> LATIN SMALL LETTER B
-    u'c'        #  0x83 -> LATIN SMALL LETTER C
-    u'd'        #  0x84 -> LATIN SMALL LETTER D
-    u'e'        #  0x85 -> LATIN SMALL LETTER E
-    u'f'        #  0x86 -> LATIN SMALL LETTER F
-    u'g'        #  0x87 -> LATIN SMALL LETTER G
-    u'h'        #  0x88 -> LATIN SMALL LETTER H
-    u'i'        #  0x89 -> LATIN SMALL LETTER I
-    u'\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'}'        #  0x8C -> RIGHT CURLY BRACKET
-    u'`'        #  0x8D -> GRAVE ACCENT
-    u'\xa6'     #  0x8E -> BROKEN BAR
-    u'\xb1'     #  0x8F -> PLUS-MINUS SIGN
-    u'\xb0'     #  0x90 -> DEGREE SIGN
-    u'j'        #  0x91 -> LATIN SMALL LETTER J
-    u'k'        #  0x92 -> LATIN SMALL LETTER K
-    u'l'        #  0x93 -> LATIN SMALL LETTER L
-    u'm'        #  0x94 -> LATIN SMALL LETTER M
-    u'n'        #  0x95 -> LATIN SMALL LETTER N
-    u'o'        #  0x96 -> LATIN SMALL LETTER O
-    u'p'        #  0x97 -> LATIN SMALL LETTER P
-    u'q'        #  0x98 -> LATIN SMALL LETTER Q
-    u'r'        #  0x99 -> LATIN SMALL LETTER R
-    u'\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
-    u'\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
-    u'\xb8'     #  0x9D -> CEDILLA
-    u'\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
-    u'\xa4'     #  0x9F -> CURRENCY SIGN
-    u'\xb5'     #  0xA0 -> MICRO SIGN
-    u'\xf6'     #  0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u's'        #  0xA2 -> LATIN SMALL LETTER S
-    u't'        #  0xA3 -> LATIN SMALL LETTER T
-    u'u'        #  0xA4 -> LATIN SMALL LETTER U
-    u'v'        #  0xA5 -> LATIN SMALL LETTER V
-    u'w'        #  0xA6 -> LATIN SMALL LETTER W
-    u'x'        #  0xA7 -> LATIN SMALL LETTER X
-    u'y'        #  0xA8 -> LATIN SMALL LETTER Y
-    u'z'        #  0xA9 -> LATIN SMALL LETTER Z
-    u'\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
-    u'\xbf'     #  0xAB -> INVERTED QUESTION MARK
-    u']'        #  0xAC -> RIGHT SQUARE BRACKET
-    u'$'        #  0xAD -> DOLLAR SIGN
-    u'@'        #  0xAE -> COMMERCIAL AT
-    u'\xae'     #  0xAF -> REGISTERED SIGN
-    u'\xa2'     #  0xB0 -> CENT SIGN
-    u'\xa3'     #  0xB1 -> POUND SIGN
-    u'\xa5'     #  0xB2 -> YEN SIGN
-    u'\xb7'     #  0xB3 -> MIDDLE DOT
-    u'\xa9'     #  0xB4 -> COPYRIGHT SIGN
-    u'\xa7'     #  0xB5 -> SECTION SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
-    u'\xac'     #  0xBA -> NOT SIGN
-    u'|'        #  0xBB -> VERTICAL LINE
-    u'\xaf'     #  0xBC -> MACRON
-    u'\xa8'     #  0xBD -> DIAERESIS
-    u'\xb4'     #  0xBE -> ACUTE ACCENT
-    u'\xd7'     #  0xBF -> MULTIPLICATION SIGN
-    u'\xe7'     #  0xC0 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'A'        #  0xC1 -> LATIN CAPITAL LETTER A
-    u'B'        #  0xC2 -> LATIN CAPITAL LETTER B
-    u'C'        #  0xC3 -> LATIN CAPITAL LETTER C
-    u'D'        #  0xC4 -> LATIN CAPITAL LETTER D
-    u'E'        #  0xC5 -> LATIN CAPITAL LETTER E
-    u'F'        #  0xC6 -> LATIN CAPITAL LETTER F
-    u'G'        #  0xC7 -> LATIN CAPITAL LETTER G
-    u'H'        #  0xC8 -> LATIN CAPITAL LETTER H
-    u'I'        #  0xC9 -> LATIN CAPITAL LETTER I
-    u'\xad'     #  0xCA -> SOFT HYPHEN
-    u'\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'~'        #  0xCC -> TILDE
-    u'\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
-    u'\u011f'   #  0xD0 -> LATIN SMALL LETTER G WITH BREVE
-    u'J'        #  0xD1 -> LATIN CAPITAL LETTER J
-    u'K'        #  0xD2 -> LATIN CAPITAL LETTER K
-    u'L'        #  0xD3 -> LATIN CAPITAL LETTER L
-    u'M'        #  0xD4 -> LATIN CAPITAL LETTER M
-    u'N'        #  0xD5 -> LATIN CAPITAL LETTER N
-    u'O'        #  0xD6 -> LATIN CAPITAL LETTER O
-    u'P'        #  0xD7 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0xD9 -> LATIN CAPITAL LETTER R
-    u'\xb9'     #  0xDA -> SUPERSCRIPT ONE
-    u'\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\\'       #  0xDC -> REVERSE SOLIDUS
-    u'\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\xfc'     #  0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xf7'     #  0xE1 -> DIVISION SIGN
-    u'S'        #  0xE2 -> LATIN CAPITAL LETTER S
-    u'T'        #  0xE3 -> LATIN CAPITAL LETTER T
-    u'U'        #  0xE4 -> LATIN CAPITAL LETTER U
-    u'V'        #  0xE5 -> LATIN CAPITAL LETTER V
-    u'W'        #  0xE6 -> LATIN CAPITAL LETTER W
-    u'X'        #  0xE7 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
-    u'\xb2'     #  0xEA -> SUPERSCRIPT TWO
-    u'\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'#'        #  0xEC -> NUMBER SIGN
-    u'\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
-    u'0'        #  0xF0 -> DIGIT ZERO
-    u'1'        #  0xF1 -> DIGIT ONE
-    u'2'        #  0xF2 -> DIGIT TWO
-    u'3'        #  0xF3 -> DIGIT THREE
-    u'4'        #  0xF4 -> DIGIT FOUR
-    u'5'        #  0xF5 -> DIGIT FIVE
-    u'6'        #  0xF6 -> DIGIT SIX
-    u'7'        #  0xF7 -> DIGIT SEVEN
-    u'8'        #  0xF8 -> DIGIT EIGHT
-    u'9'        #  0xF9 -> DIGIT NINE
-    u'\xb3'     #  0xFA -> SUPERSCRIPT THREE
-    u'\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'"'        #  0xFC -> QUOTATION MARK
-    u'\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\x9f'     #  0xFF -> CONTROL
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x9c'     #  0x04 -> CONTROL
+    '\t'       #  0x05 -> HORIZONTAL TABULATION
+    '\x86'     #  0x06 -> CONTROL
+    '\x7f'     #  0x07 -> DELETE
+    '\x97'     #  0x08 -> CONTROL
+    '\x8d'     #  0x09 -> CONTROL
+    '\x8e'     #  0x0A -> CONTROL
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x9d'     #  0x14 -> CONTROL
+    '\x85'     #  0x15 -> CONTROL
+    '\x08'     #  0x16 -> BACKSPACE
+    '\x87'     #  0x17 -> CONTROL
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x92'     #  0x1A -> CONTROL
+    '\x8f'     #  0x1B -> CONTROL
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    '\x80'     #  0x20 -> CONTROL
+    '\x81'     #  0x21 -> CONTROL
+    '\x82'     #  0x22 -> CONTROL
+    '\x83'     #  0x23 -> CONTROL
+    '\x84'     #  0x24 -> CONTROL
+    '\n'       #  0x25 -> LINE FEED
+    '\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
+    '\x1b'     #  0x27 -> ESCAPE
+    '\x88'     #  0x28 -> CONTROL
+    '\x89'     #  0x29 -> CONTROL
+    '\x8a'     #  0x2A -> CONTROL
+    '\x8b'     #  0x2B -> CONTROL
+    '\x8c'     #  0x2C -> CONTROL
+    '\x05'     #  0x2D -> ENQUIRY
+    '\x06'     #  0x2E -> ACKNOWLEDGE
+    '\x07'     #  0x2F -> BELL
+    '\x90'     #  0x30 -> CONTROL
+    '\x91'     #  0x31 -> CONTROL
+    '\x16'     #  0x32 -> SYNCHRONOUS IDLE
+    '\x93'     #  0x33 -> CONTROL
+    '\x94'     #  0x34 -> CONTROL
+    '\x95'     #  0x35 -> CONTROL
+    '\x96'     #  0x36 -> CONTROL
+    '\x04'     #  0x37 -> END OF TRANSMISSION
+    '\x98'     #  0x38 -> CONTROL
+    '\x99'     #  0x39 -> CONTROL
+    '\x9a'     #  0x3A -> CONTROL
+    '\x9b'     #  0x3B -> CONTROL
+    '\x14'     #  0x3C -> DEVICE CONTROL FOUR
+    '\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
+    '\x9e'     #  0x3E -> CONTROL
+    '\x1a'     #  0x3F -> SUBSTITUTE
+    ' '        #  0x40 -> SPACE
+    '\xa0'     #  0x41 -> NO-BREAK SPACE
+    '\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '{'        #  0x48 -> LEFT CURLY BRACKET
+    '\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
+    '\xc7'     #  0x4A -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '.'        #  0x4B -> FULL STOP
+    '<'        #  0x4C -> LESS-THAN SIGN
+    '('        #  0x4D -> LEFT PARENTHESIS
+    '+'        #  0x4E -> PLUS SIGN
+    '!'        #  0x4F -> EXCLAMATION MARK
+    '&'        #  0x50 -> AMPERSAND
+    '\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+    '\u011e'   #  0x5A -> LATIN CAPITAL LETTER G WITH BREVE
+    '\u0130'   #  0x5B -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+    '*'        #  0x5C -> ASTERISK
+    ')'        #  0x5D -> RIGHT PARENTHESIS
+    ';'        #  0x5E -> SEMICOLON
+    '^'        #  0x5F -> CIRCUMFLEX ACCENT
+    '-'        #  0x60 -> HYPHEN-MINUS
+    '/'        #  0x61 -> SOLIDUS
+    '\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '['        #  0x68 -> LEFT SQUARE BRACKET
+    '\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\u015f'   #  0x6A -> LATIN SMALL LETTER S WITH CEDILLA
+    ','        #  0x6B -> COMMA
+    '%'        #  0x6C -> PERCENT SIGN
+    '_'        #  0x6D -> LOW LINE
+    '>'        #  0x6E -> GREATER-THAN SIGN
+    '?'        #  0x6F -> QUESTION MARK
+    '\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
+    '\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\u0131'   #  0x79 -> LATIN SMALL LETTER DOTLESS I
+    ':'        #  0x7A -> COLON
+    '\xd6'     #  0x7B -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\u015e'   #  0x7C -> LATIN CAPITAL LETTER S WITH CEDILLA
+    "'"        #  0x7D -> APOSTROPHE
+    '='        #  0x7E -> EQUALS SIGN
+    '\xdc'     #  0x7F -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+    'a'        #  0x81 -> LATIN SMALL LETTER A
+    'b'        #  0x82 -> LATIN SMALL LETTER B
+    'c'        #  0x83 -> LATIN SMALL LETTER C
+    'd'        #  0x84 -> LATIN SMALL LETTER D
+    'e'        #  0x85 -> LATIN SMALL LETTER E
+    'f'        #  0x86 -> LATIN SMALL LETTER F
+    'g'        #  0x87 -> LATIN SMALL LETTER G
+    'h'        #  0x88 -> LATIN SMALL LETTER H
+    'i'        #  0x89 -> LATIN SMALL LETTER I
+    '\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '}'        #  0x8C -> RIGHT CURLY BRACKET
+    '`'        #  0x8D -> GRAVE ACCENT
+    '\xa6'     #  0x8E -> BROKEN BAR
+    '\xb1'     #  0x8F -> PLUS-MINUS SIGN
+    '\xb0'     #  0x90 -> DEGREE SIGN
+    'j'        #  0x91 -> LATIN SMALL LETTER J
+    'k'        #  0x92 -> LATIN SMALL LETTER K
+    'l'        #  0x93 -> LATIN SMALL LETTER L
+    'm'        #  0x94 -> LATIN SMALL LETTER M
+    'n'        #  0x95 -> LATIN SMALL LETTER N
+    'o'        #  0x96 -> LATIN SMALL LETTER O
+    'p'        #  0x97 -> LATIN SMALL LETTER P
+    'q'        #  0x98 -> LATIN SMALL LETTER Q
+    'r'        #  0x99 -> LATIN SMALL LETTER R
+    '\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
+    '\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
+    '\xb8'     #  0x9D -> CEDILLA
+    '\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
+    '\xa4'     #  0x9F -> CURRENCY SIGN
+    '\xb5'     #  0xA0 -> MICRO SIGN
+    '\xf6'     #  0xA1 -> LATIN SMALL LETTER O WITH DIAERESIS
+    's'        #  0xA2 -> LATIN SMALL LETTER S
+    't'        #  0xA3 -> LATIN SMALL LETTER T
+    'u'        #  0xA4 -> LATIN SMALL LETTER U
+    'v'        #  0xA5 -> LATIN SMALL LETTER V
+    'w'        #  0xA6 -> LATIN SMALL LETTER W
+    'x'        #  0xA7 -> LATIN SMALL LETTER X
+    'y'        #  0xA8 -> LATIN SMALL LETTER Y
+    'z'        #  0xA9 -> LATIN SMALL LETTER Z
+    '\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
+    '\xbf'     #  0xAB -> INVERTED QUESTION MARK
+    ']'        #  0xAC -> RIGHT SQUARE BRACKET
+    '$'        #  0xAD -> DOLLAR SIGN
+    '@'        #  0xAE -> COMMERCIAL AT
+    '\xae'     #  0xAF -> REGISTERED SIGN
+    '\xa2'     #  0xB0 -> CENT SIGN
+    '\xa3'     #  0xB1 -> POUND SIGN
+    '\xa5'     #  0xB2 -> YEN SIGN
+    '\xb7'     #  0xB3 -> MIDDLE DOT
+    '\xa9'     #  0xB4 -> COPYRIGHT SIGN
+    '\xa7'     #  0xB5 -> SECTION SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
+    '\xac'     #  0xBA -> NOT SIGN
+    '|'        #  0xBB -> VERTICAL LINE
+    '\xaf'     #  0xBC -> MACRON
+    '\xa8'     #  0xBD -> DIAERESIS
+    '\xb4'     #  0xBE -> ACUTE ACCENT
+    '\xd7'     #  0xBF -> MULTIPLICATION SIGN
+    '\xe7'     #  0xC0 -> LATIN SMALL LETTER C WITH CEDILLA
+    'A'        #  0xC1 -> LATIN CAPITAL LETTER A
+    'B'        #  0xC2 -> LATIN CAPITAL LETTER B
+    'C'        #  0xC3 -> LATIN CAPITAL LETTER C
+    'D'        #  0xC4 -> LATIN CAPITAL LETTER D
+    'E'        #  0xC5 -> LATIN CAPITAL LETTER E
+    'F'        #  0xC6 -> LATIN CAPITAL LETTER F
+    'G'        #  0xC7 -> LATIN CAPITAL LETTER G
+    'H'        #  0xC8 -> LATIN CAPITAL LETTER H
+    'I'        #  0xC9 -> LATIN CAPITAL LETTER I
+    '\xad'     #  0xCA -> SOFT HYPHEN
+    '\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '~'        #  0xCC -> TILDE
+    '\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
+    '\u011f'   #  0xD0 -> LATIN SMALL LETTER G WITH BREVE
+    'J'        #  0xD1 -> LATIN CAPITAL LETTER J
+    'K'        #  0xD2 -> LATIN CAPITAL LETTER K
+    'L'        #  0xD3 -> LATIN CAPITAL LETTER L
+    'M'        #  0xD4 -> LATIN CAPITAL LETTER M
+    'N'        #  0xD5 -> LATIN CAPITAL LETTER N
+    'O'        #  0xD6 -> LATIN CAPITAL LETTER O
+    'P'        #  0xD7 -> LATIN CAPITAL LETTER P
+    'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
+    'R'        #  0xD9 -> LATIN CAPITAL LETTER R
+    '\xb9'     #  0xDA -> SUPERSCRIPT ONE
+    '\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\\'       #  0xDC -> REVERSE SOLIDUS
+    '\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
+    '\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\xfc'     #  0xE0 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xf7'     #  0xE1 -> DIVISION SIGN
+    'S'        #  0xE2 -> LATIN CAPITAL LETTER S
+    'T'        #  0xE3 -> LATIN CAPITAL LETTER T
+    'U'        #  0xE4 -> LATIN CAPITAL LETTER U
+    'V'        #  0xE5 -> LATIN CAPITAL LETTER V
+    'W'        #  0xE6 -> LATIN CAPITAL LETTER W
+    'X'        #  0xE7 -> LATIN CAPITAL LETTER X
+    'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
+    '\xb2'     #  0xEA -> SUPERSCRIPT TWO
+    '\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '#'        #  0xEC -> NUMBER SIGN
+    '\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+    '0'        #  0xF0 -> DIGIT ZERO
+    '1'        #  0xF1 -> DIGIT ONE
+    '2'        #  0xF2 -> DIGIT TWO
+    '3'        #  0xF3 -> DIGIT THREE
+    '4'        #  0xF4 -> DIGIT FOUR
+    '5'        #  0xF5 -> DIGIT FIVE
+    '6'        #  0xF6 -> DIGIT SIX
+    '7'        #  0xF7 -> DIGIT SEVEN
+    '8'        #  0xF8 -> DIGIT EIGHT
+    '9'        #  0xF9 -> DIGIT NINE
+    '\xb3'     #  0xFA -> SUPERSCRIPT THREE
+    '\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '"'        #  0xFC -> QUOTATION MARK
+    '\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\x9f'     #  0xFF -> CONTROL
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1140.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1140.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1140.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x9c'     #  0x04 -> CONTROL
-    u'\t'       #  0x05 -> HORIZONTAL TABULATION
-    u'\x86'     #  0x06 -> CONTROL
-    u'\x7f'     #  0x07 -> DELETE
-    u'\x97'     #  0x08 -> CONTROL
-    u'\x8d'     #  0x09 -> CONTROL
-    u'\x8e'     #  0x0A -> CONTROL
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x9d'     #  0x14 -> CONTROL
-    u'\x85'     #  0x15 -> CONTROL
-    u'\x08'     #  0x16 -> BACKSPACE
-    u'\x87'     #  0x17 -> CONTROL
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x92'     #  0x1A -> CONTROL
-    u'\x8f'     #  0x1B -> CONTROL
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u'\x80'     #  0x20 -> CONTROL
-    u'\x81'     #  0x21 -> CONTROL
-    u'\x82'     #  0x22 -> CONTROL
-    u'\x83'     #  0x23 -> CONTROL
-    u'\x84'     #  0x24 -> CONTROL
-    u'\n'       #  0x25 -> LINE FEED
-    u'\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
-    u'\x1b'     #  0x27 -> ESCAPE
-    u'\x88'     #  0x28 -> CONTROL
-    u'\x89'     #  0x29 -> CONTROL
-    u'\x8a'     #  0x2A -> CONTROL
-    u'\x8b'     #  0x2B -> CONTROL
-    u'\x8c'     #  0x2C -> CONTROL
-    u'\x05'     #  0x2D -> ENQUIRY
-    u'\x06'     #  0x2E -> ACKNOWLEDGE
-    u'\x07'     #  0x2F -> BELL
-    u'\x90'     #  0x30 -> CONTROL
-    u'\x91'     #  0x31 -> CONTROL
-    u'\x16'     #  0x32 -> SYNCHRONOUS IDLE
-    u'\x93'     #  0x33 -> CONTROL
-    u'\x94'     #  0x34 -> CONTROL
-    u'\x95'     #  0x35 -> CONTROL
-    u'\x96'     #  0x36 -> CONTROL
-    u'\x04'     #  0x37 -> END OF TRANSMISSION
-    u'\x98'     #  0x38 -> CONTROL
-    u'\x99'     #  0x39 -> CONTROL
-    u'\x9a'     #  0x3A -> CONTROL
-    u'\x9b'     #  0x3B -> CONTROL
-    u'\x14'     #  0x3C -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
-    u'\x9e'     #  0x3E -> CONTROL
-    u'\x1a'     #  0x3F -> SUBSTITUTE
-    u' '        #  0x40 -> SPACE
-    u'\xa0'     #  0x41 -> NO-BREAK SPACE
-    u'\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x48 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xa2'     #  0x4A -> CENT SIGN
-    u'.'        #  0x4B -> FULL STOP
-    u'<'        #  0x4C -> LESS-THAN SIGN
-    u'('        #  0x4D -> LEFT PARENTHESIS
-    u'+'        #  0x4E -> PLUS SIGN
-    u'|'        #  0x4F -> VERTICAL LINE
-    u'&'        #  0x50 -> AMPERSAND
-    u'\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
-    u'!'        #  0x5A -> EXCLAMATION MARK
-    u'$'        #  0x5B -> DOLLAR SIGN
-    u'*'        #  0x5C -> ASTERISK
-    u')'        #  0x5D -> RIGHT PARENTHESIS
-    u';'        #  0x5E -> SEMICOLON
-    u'\xac'     #  0x5F -> NOT SIGN
-    u'-'        #  0x60 -> HYPHEN-MINUS
-    u'/'        #  0x61 -> SOLIDUS
-    u'\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xa6'     #  0x6A -> BROKEN BAR
-    u','        #  0x6B -> COMMA
-    u'%'        #  0x6C -> PERCENT SIGN
-    u'_'        #  0x6D -> LOW LINE
-    u'>'        #  0x6E -> GREATER-THAN SIGN
-    u'?'        #  0x6F -> QUESTION MARK
-    u'\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'`'        #  0x79 -> GRAVE ACCENT
-    u':'        #  0x7A -> COLON
-    u'#'        #  0x7B -> NUMBER SIGN
-    u'@'        #  0x7C -> COMMERCIAL AT
-    u"'"        #  0x7D -> APOSTROPHE
-    u'='        #  0x7E -> EQUALS SIGN
-    u'"'        #  0x7F -> QUOTATION MARK
-    u'\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'a'        #  0x81 -> LATIN SMALL LETTER A
-    u'b'        #  0x82 -> LATIN SMALL LETTER B
-    u'c'        #  0x83 -> LATIN SMALL LETTER C
-    u'd'        #  0x84 -> LATIN SMALL LETTER D
-    u'e'        #  0x85 -> LATIN SMALL LETTER E
-    u'f'        #  0x86 -> LATIN SMALL LETTER F
-    u'g'        #  0x87 -> LATIN SMALL LETTER G
-    u'h'        #  0x88 -> LATIN SMALL LETTER H
-    u'i'        #  0x89 -> LATIN SMALL LETTER I
-    u'\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xf0'     #  0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
-    u'\xfd'     #  0x8D -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xfe'     #  0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
-    u'\xb1'     #  0x8F -> PLUS-MINUS SIGN
-    u'\xb0'     #  0x90 -> DEGREE SIGN
-    u'j'        #  0x91 -> LATIN SMALL LETTER J
-    u'k'        #  0x92 -> LATIN SMALL LETTER K
-    u'l'        #  0x93 -> LATIN SMALL LETTER L
-    u'm'        #  0x94 -> LATIN SMALL LETTER M
-    u'n'        #  0x95 -> LATIN SMALL LETTER N
-    u'o'        #  0x96 -> LATIN SMALL LETTER O
-    u'p'        #  0x97 -> LATIN SMALL LETTER P
-    u'q'        #  0x98 -> LATIN SMALL LETTER Q
-    u'r'        #  0x99 -> LATIN SMALL LETTER R
-    u'\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
-    u'\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
-    u'\xb8'     #  0x9D -> CEDILLA
-    u'\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
-    u'\u20ac'   #  0x9F -> EURO SIGN
-    u'\xb5'     #  0xA0 -> MICRO SIGN
-    u'~'        #  0xA1 -> TILDE
-    u's'        #  0xA2 -> LATIN SMALL LETTER S
-    u't'        #  0xA3 -> LATIN SMALL LETTER T
-    u'u'        #  0xA4 -> LATIN SMALL LETTER U
-    u'v'        #  0xA5 -> LATIN SMALL LETTER V
-    u'w'        #  0xA6 -> LATIN SMALL LETTER W
-    u'x'        #  0xA7 -> LATIN SMALL LETTER X
-    u'y'        #  0xA8 -> LATIN SMALL LETTER Y
-    u'z'        #  0xA9 -> LATIN SMALL LETTER Z
-    u'\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
-    u'\xbf'     #  0xAB -> INVERTED QUESTION MARK
-    u'\xd0'     #  0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
-    u'\xdd'     #  0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xde'     #  0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
-    u'\xae'     #  0xAF -> REGISTERED SIGN
-    u'^'        #  0xB0 -> CIRCUMFLEX ACCENT
-    u'\xa3'     #  0xB1 -> POUND SIGN
-    u'\xa5'     #  0xB2 -> YEN SIGN
-    u'\xb7'     #  0xB3 -> MIDDLE DOT
-    u'\xa9'     #  0xB4 -> COPYRIGHT SIGN
-    u'\xa7'     #  0xB5 -> SECTION SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
-    u'['        #  0xBA -> LEFT SQUARE BRACKET
-    u']'        #  0xBB -> RIGHT SQUARE BRACKET
-    u'\xaf'     #  0xBC -> MACRON
-    u'\xa8'     #  0xBD -> DIAERESIS
-    u'\xb4'     #  0xBE -> ACUTE ACCENT
-    u'\xd7'     #  0xBF -> MULTIPLICATION SIGN
-    u'{'        #  0xC0 -> LEFT CURLY BRACKET
-    u'A'        #  0xC1 -> LATIN CAPITAL LETTER A
-    u'B'        #  0xC2 -> LATIN CAPITAL LETTER B
-    u'C'        #  0xC3 -> LATIN CAPITAL LETTER C
-    u'D'        #  0xC4 -> LATIN CAPITAL LETTER D
-    u'E'        #  0xC5 -> LATIN CAPITAL LETTER E
-    u'F'        #  0xC6 -> LATIN CAPITAL LETTER F
-    u'G'        #  0xC7 -> LATIN CAPITAL LETTER G
-    u'H'        #  0xC8 -> LATIN CAPITAL LETTER H
-    u'I'        #  0xC9 -> LATIN CAPITAL LETTER I
-    u'\xad'     #  0xCA -> SOFT HYPHEN
-    u'\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
-    u'}'        #  0xD0 -> RIGHT CURLY BRACKET
-    u'J'        #  0xD1 -> LATIN CAPITAL LETTER J
-    u'K'        #  0xD2 -> LATIN CAPITAL LETTER K
-    u'L'        #  0xD3 -> LATIN CAPITAL LETTER L
-    u'M'        #  0xD4 -> LATIN CAPITAL LETTER M
-    u'N'        #  0xD5 -> LATIN CAPITAL LETTER N
-    u'O'        #  0xD6 -> LATIN CAPITAL LETTER O
-    u'P'        #  0xD7 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0xD9 -> LATIN CAPITAL LETTER R
-    u'\xb9'     #  0xDA -> SUPERSCRIPT ONE
-    u'\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\\'       #  0xE0 -> REVERSE SOLIDUS
-    u'\xf7'     #  0xE1 -> DIVISION SIGN
-    u'S'        #  0xE2 -> LATIN CAPITAL LETTER S
-    u'T'        #  0xE3 -> LATIN CAPITAL LETTER T
-    u'U'        #  0xE4 -> LATIN CAPITAL LETTER U
-    u'V'        #  0xE5 -> LATIN CAPITAL LETTER V
-    u'W'        #  0xE6 -> LATIN CAPITAL LETTER W
-    u'X'        #  0xE7 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
-    u'\xb2'     #  0xEA -> SUPERSCRIPT TWO
-    u'\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd6'     #  0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
-    u'0'        #  0xF0 -> DIGIT ZERO
-    u'1'        #  0xF1 -> DIGIT ONE
-    u'2'        #  0xF2 -> DIGIT TWO
-    u'3'        #  0xF3 -> DIGIT THREE
-    u'4'        #  0xF4 -> DIGIT FOUR
-    u'5'        #  0xF5 -> DIGIT FIVE
-    u'6'        #  0xF6 -> DIGIT SIX
-    u'7'        #  0xF7 -> DIGIT SEVEN
-    u'8'        #  0xF8 -> DIGIT EIGHT
-    u'9'        #  0xF9 -> DIGIT NINE
-    u'\xb3'     #  0xFA -> SUPERSCRIPT THREE
-    u'\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\x9f'     #  0xFF -> CONTROL
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x9c'     #  0x04 -> CONTROL
+    '\t'       #  0x05 -> HORIZONTAL TABULATION
+    '\x86'     #  0x06 -> CONTROL
+    '\x7f'     #  0x07 -> DELETE
+    '\x97'     #  0x08 -> CONTROL
+    '\x8d'     #  0x09 -> CONTROL
+    '\x8e'     #  0x0A -> CONTROL
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x9d'     #  0x14 -> CONTROL
+    '\x85'     #  0x15 -> CONTROL
+    '\x08'     #  0x16 -> BACKSPACE
+    '\x87'     #  0x17 -> CONTROL
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x92'     #  0x1A -> CONTROL
+    '\x8f'     #  0x1B -> CONTROL
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    '\x80'     #  0x20 -> CONTROL
+    '\x81'     #  0x21 -> CONTROL
+    '\x82'     #  0x22 -> CONTROL
+    '\x83'     #  0x23 -> CONTROL
+    '\x84'     #  0x24 -> CONTROL
+    '\n'       #  0x25 -> LINE FEED
+    '\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
+    '\x1b'     #  0x27 -> ESCAPE
+    '\x88'     #  0x28 -> CONTROL
+    '\x89'     #  0x29 -> CONTROL
+    '\x8a'     #  0x2A -> CONTROL
+    '\x8b'     #  0x2B -> CONTROL
+    '\x8c'     #  0x2C -> CONTROL
+    '\x05'     #  0x2D -> ENQUIRY
+    '\x06'     #  0x2E -> ACKNOWLEDGE
+    '\x07'     #  0x2F -> BELL
+    '\x90'     #  0x30 -> CONTROL
+    '\x91'     #  0x31 -> CONTROL
+    '\x16'     #  0x32 -> SYNCHRONOUS IDLE
+    '\x93'     #  0x33 -> CONTROL
+    '\x94'     #  0x34 -> CONTROL
+    '\x95'     #  0x35 -> CONTROL
+    '\x96'     #  0x36 -> CONTROL
+    '\x04'     #  0x37 -> END OF TRANSMISSION
+    '\x98'     #  0x38 -> CONTROL
+    '\x99'     #  0x39 -> CONTROL
+    '\x9a'     #  0x3A -> CONTROL
+    '\x9b'     #  0x3B -> CONTROL
+    '\x14'     #  0x3C -> DEVICE CONTROL FOUR
+    '\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
+    '\x9e'     #  0x3E -> CONTROL
+    '\x1a'     #  0x3F -> SUBSTITUTE
+    ' '        #  0x40 -> SPACE
+    '\xa0'     #  0x41 -> NO-BREAK SPACE
+    '\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
+    '\xa2'     #  0x4A -> CENT SIGN
+    '.'        #  0x4B -> FULL STOP
+    '<'        #  0x4C -> LESS-THAN SIGN
+    '('        #  0x4D -> LEFT PARENTHESIS
+    '+'        #  0x4E -> PLUS SIGN
+    '|'        #  0x4F -> VERTICAL LINE
+    '&'        #  0x50 -> AMPERSAND
+    '\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+    '!'        #  0x5A -> EXCLAMATION MARK
+    '$'        #  0x5B -> DOLLAR SIGN
+    '*'        #  0x5C -> ASTERISK
+    ')'        #  0x5D -> RIGHT PARENTHESIS
+    ';'        #  0x5E -> SEMICOLON
+    '\xac'     #  0x5F -> NOT SIGN
+    '-'        #  0x60 -> HYPHEN-MINUS
+    '/'        #  0x61 -> SOLIDUS
+    '\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xa6'     #  0x6A -> BROKEN BAR
+    ','        #  0x6B -> COMMA
+    '%'        #  0x6C -> PERCENT SIGN
+    '_'        #  0x6D -> LOW LINE
+    '>'        #  0x6E -> GREATER-THAN SIGN
+    '?'        #  0x6F -> QUESTION MARK
+    '\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
+    '\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+    '`'        #  0x79 -> GRAVE ACCENT
+    ':'        #  0x7A -> COLON
+    '#'        #  0x7B -> NUMBER SIGN
+    '@'        #  0x7C -> COMMERCIAL AT
+    "'"        #  0x7D -> APOSTROPHE
+    '='        #  0x7E -> EQUALS SIGN
+    '"'        #  0x7F -> QUOTATION MARK
+    '\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+    'a'        #  0x81 -> LATIN SMALL LETTER A
+    'b'        #  0x82 -> LATIN SMALL LETTER B
+    'c'        #  0x83 -> LATIN SMALL LETTER C
+    'd'        #  0x84 -> LATIN SMALL LETTER D
+    'e'        #  0x85 -> LATIN SMALL LETTER E
+    'f'        #  0x86 -> LATIN SMALL LETTER F
+    'g'        #  0x87 -> LATIN SMALL LETTER G
+    'h'        #  0x88 -> LATIN SMALL LETTER H
+    'i'        #  0x89 -> LATIN SMALL LETTER I
+    '\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xf0'     #  0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+    '\xfd'     #  0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xfe'     #  0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+    '\xb1'     #  0x8F -> PLUS-MINUS SIGN
+    '\xb0'     #  0x90 -> DEGREE SIGN
+    'j'        #  0x91 -> LATIN SMALL LETTER J
+    'k'        #  0x92 -> LATIN SMALL LETTER K
+    'l'        #  0x93 -> LATIN SMALL LETTER L
+    'm'        #  0x94 -> LATIN SMALL LETTER M
+    'n'        #  0x95 -> LATIN SMALL LETTER N
+    'o'        #  0x96 -> LATIN SMALL LETTER O
+    'p'        #  0x97 -> LATIN SMALL LETTER P
+    'q'        #  0x98 -> LATIN SMALL LETTER Q
+    'r'        #  0x99 -> LATIN SMALL LETTER R
+    '\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
+    '\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
+    '\xb8'     #  0x9D -> CEDILLA
+    '\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
+    '\u20ac'   #  0x9F -> EURO SIGN
+    '\xb5'     #  0xA0 -> MICRO SIGN
+    '~'        #  0xA1 -> TILDE
+    's'        #  0xA2 -> LATIN SMALL LETTER S
+    't'        #  0xA3 -> LATIN SMALL LETTER T
+    'u'        #  0xA4 -> LATIN SMALL LETTER U
+    'v'        #  0xA5 -> LATIN SMALL LETTER V
+    'w'        #  0xA6 -> LATIN SMALL LETTER W
+    'x'        #  0xA7 -> LATIN SMALL LETTER X
+    'y'        #  0xA8 -> LATIN SMALL LETTER Y
+    'z'        #  0xA9 -> LATIN SMALL LETTER Z
+    '\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
+    '\xbf'     #  0xAB -> INVERTED QUESTION MARK
+    '\xd0'     #  0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+    '\xdd'     #  0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xde'     #  0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+    '\xae'     #  0xAF -> REGISTERED SIGN
+    '^'        #  0xB0 -> CIRCUMFLEX ACCENT
+    '\xa3'     #  0xB1 -> POUND SIGN
+    '\xa5'     #  0xB2 -> YEN SIGN
+    '\xb7'     #  0xB3 -> MIDDLE DOT
+    '\xa9'     #  0xB4 -> COPYRIGHT SIGN
+    '\xa7'     #  0xB5 -> SECTION SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
+    '['        #  0xBA -> LEFT SQUARE BRACKET
+    ']'        #  0xBB -> RIGHT SQUARE BRACKET
+    '\xaf'     #  0xBC -> MACRON
+    '\xa8'     #  0xBD -> DIAERESIS
+    '\xb4'     #  0xBE -> ACUTE ACCENT
+    '\xd7'     #  0xBF -> MULTIPLICATION SIGN
+    '{'        #  0xC0 -> LEFT CURLY BRACKET
+    'A'        #  0xC1 -> LATIN CAPITAL LETTER A
+    'B'        #  0xC2 -> LATIN CAPITAL LETTER B
+    'C'        #  0xC3 -> LATIN CAPITAL LETTER C
+    'D'        #  0xC4 -> LATIN CAPITAL LETTER D
+    'E'        #  0xC5 -> LATIN CAPITAL LETTER E
+    'F'        #  0xC6 -> LATIN CAPITAL LETTER F
+    'G'        #  0xC7 -> LATIN CAPITAL LETTER G
+    'H'        #  0xC8 -> LATIN CAPITAL LETTER H
+    'I'        #  0xC9 -> LATIN CAPITAL LETTER I
+    '\xad'     #  0xCA -> SOFT HYPHEN
+    '\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
+    '}'        #  0xD0 -> RIGHT CURLY BRACKET
+    'J'        #  0xD1 -> LATIN CAPITAL LETTER J
+    'K'        #  0xD2 -> LATIN CAPITAL LETTER K
+    'L'        #  0xD3 -> LATIN CAPITAL LETTER L
+    'M'        #  0xD4 -> LATIN CAPITAL LETTER M
+    'N'        #  0xD5 -> LATIN CAPITAL LETTER N
+    'O'        #  0xD6 -> LATIN CAPITAL LETTER O
+    'P'        #  0xD7 -> LATIN CAPITAL LETTER P
+    'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
+    'R'        #  0xD9 -> LATIN CAPITAL LETTER R
+    '\xb9'     #  0xDA -> SUPERSCRIPT ONE
+    '\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
+    '\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\\'       #  0xE0 -> REVERSE SOLIDUS
+    '\xf7'     #  0xE1 -> DIVISION SIGN
+    'S'        #  0xE2 -> LATIN CAPITAL LETTER S
+    'T'        #  0xE3 -> LATIN CAPITAL LETTER T
+    'U'        #  0xE4 -> LATIN CAPITAL LETTER U
+    'V'        #  0xE5 -> LATIN CAPITAL LETTER V
+    'W'        #  0xE6 -> LATIN CAPITAL LETTER W
+    'X'        #  0xE7 -> LATIN CAPITAL LETTER X
+    'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
+    '\xb2'     #  0xEA -> SUPERSCRIPT TWO
+    '\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd6'     #  0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+    '0'        #  0xF0 -> DIGIT ZERO
+    '1'        #  0xF1 -> DIGIT ONE
+    '2'        #  0xF2 -> DIGIT TWO
+    '3'        #  0xF3 -> DIGIT THREE
+    '4'        #  0xF4 -> DIGIT FOUR
+    '5'        #  0xF5 -> DIGIT FIVE
+    '6'        #  0xF6 -> DIGIT SIX
+    '7'        #  0xF7 -> DIGIT SEVEN
+    '8'        #  0xF8 -> DIGIT EIGHT
+    '9'        #  0xF9 -> DIGIT NINE
+    '\xb3'     #  0xFA -> SUPERSCRIPT THREE
+    '\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\x9f'     #  0xFF -> CONTROL
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1250.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1250.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1250.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\ufffe'   #  0x83 -> UNDEFINED
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\ufffe'   #  0x88 -> UNDEFINED
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\u0160'   #  0x8A -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u015a'   #  0x8C -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\u0164'   #  0x8D -> LATIN CAPITAL LETTER T WITH CARON
-    u'\u017d'   #  0x8E -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\u0179'   #  0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\ufffe'   #  0x98 -> UNDEFINED
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\u0161'   #  0x9A -> LATIN SMALL LETTER S WITH CARON
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u015b'   #  0x9C -> LATIN SMALL LETTER S WITH ACUTE
-    u'\u0165'   #  0x9D -> LATIN SMALL LETTER T WITH CARON
-    u'\u017e'   #  0x9E -> LATIN SMALL LETTER Z WITH CARON
-    u'\u017a'   #  0x9F -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u02c7'   #  0xA1 -> CARON
-    u'\u02d8'   #  0xA2 -> BREVE
-    u'\u0141'   #  0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\u0104'   #  0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u015e'   #  0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u02db'   #  0xB2 -> OGONEK
-    u'\u0142'   #  0xB3 -> LATIN SMALL LETTER L WITH STROKE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\u0105'   #  0xB9 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u015f'   #  0xBA -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u013d'   #  0xBC -> LATIN CAPITAL LETTER L WITH CARON
-    u'\u02dd'   #  0xBD -> DOUBLE ACUTE ACCENT
-    u'\u013e'   #  0xBE -> LATIN SMALL LETTER L WITH CARON
-    u'\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\u0154'   #  0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\u0139'   #  0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
-    u'\u0106'   #  0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\u011a'   #  0xCC -> LATIN CAPITAL LETTER E WITH CARON
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\u010e'   #  0xCF -> LATIN CAPITAL LETTER D WITH CARON
-    u'\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\u0147'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CARON
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u0150'   #  0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\u0158'   #  0xD8 -> LATIN CAPITAL LETTER R WITH CARON
-    u'\u016e'   #  0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\u0170'   #  0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\u0162'   #  0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\u0155'   #  0xE0 -> LATIN SMALL LETTER R WITH ACUTE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u013a'   #  0xE5 -> LATIN SMALL LETTER L WITH ACUTE
-    u'\u0107'   #  0xE6 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\u011b'   #  0xEC -> LATIN SMALL LETTER E WITH CARON
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\u010f'   #  0xEF -> LATIN SMALL LETTER D WITH CARON
-    u'\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
-    u'\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\u0148'   #  0xF2 -> LATIN SMALL LETTER N WITH CARON
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\u0151'   #  0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\u0159'   #  0xF8 -> LATIN SMALL LETTER R WITH CARON
-    u'\u016f'   #  0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\u0171'   #  0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\u0163'   #  0xFE -> LATIN SMALL LETTER T WITH CEDILLA
-    u'\u02d9'   #  0xFF -> DOT ABOVE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\ufffe'   #  0x83 -> UNDEFINED
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\ufffe'   #  0x88 -> UNDEFINED
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\u0160'   #  0x8A -> LATIN CAPITAL LETTER S WITH CARON
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u015a'   #  0x8C -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\u0164'   #  0x8D -> LATIN CAPITAL LETTER T WITH CARON
+    '\u017d'   #  0x8E -> LATIN CAPITAL LETTER Z WITH CARON
+    '\u0179'   #  0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\ufffe'   #  0x98 -> UNDEFINED
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\u0161'   #  0x9A -> LATIN SMALL LETTER S WITH CARON
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u015b'   #  0x9C -> LATIN SMALL LETTER S WITH ACUTE
+    '\u0165'   #  0x9D -> LATIN SMALL LETTER T WITH CARON
+    '\u017e'   #  0x9E -> LATIN SMALL LETTER Z WITH CARON
+    '\u017a'   #  0x9F -> LATIN SMALL LETTER Z WITH ACUTE
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u02c7'   #  0xA1 -> CARON
+    '\u02d8'   #  0xA2 -> BREVE
+    '\u0141'   #  0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\u0104'   #  0xA5 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u015e'   #  0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u02db'   #  0xB2 -> OGONEK
+    '\u0142'   #  0xB3 -> LATIN SMALL LETTER L WITH STROKE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\u0105'   #  0xB9 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u015f'   #  0xBA -> LATIN SMALL LETTER S WITH CEDILLA
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u013d'   #  0xBC -> LATIN CAPITAL LETTER L WITH CARON
+    '\u02dd'   #  0xBD -> DOUBLE ACUTE ACCENT
+    '\u013e'   #  0xBE -> LATIN SMALL LETTER L WITH CARON
+    '\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\u0154'   #  0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\u0139'   #  0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
+    '\u0106'   #  0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\u011a'   #  0xCC -> LATIN CAPITAL LETTER E WITH CARON
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\u010e'   #  0xCF -> LATIN CAPITAL LETTER D WITH CARON
+    '\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\u0147'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CARON
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u0150'   #  0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\u0158'   #  0xD8 -> LATIN CAPITAL LETTER R WITH CARON
+    '\u016e'   #  0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\u0170'   #  0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\u0162'   #  0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\u0155'   #  0xE0 -> LATIN SMALL LETTER R WITH ACUTE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u013a'   #  0xE5 -> LATIN SMALL LETTER L WITH ACUTE
+    '\u0107'   #  0xE6 -> LATIN SMALL LETTER C WITH ACUTE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\u011b'   #  0xEC -> LATIN SMALL LETTER E WITH CARON
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\u010f'   #  0xEF -> LATIN SMALL LETTER D WITH CARON
+    '\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
+    '\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+    '\u0148'   #  0xF2 -> LATIN SMALL LETTER N WITH CARON
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\u0151'   #  0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\u0159'   #  0xF8 -> LATIN SMALL LETTER R WITH CARON
+    '\u016f'   #  0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\u0171'   #  0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+    '\u0163'   #  0xFE -> LATIN SMALL LETTER T WITH CEDILLA
+    '\u02d9'   #  0xFF -> DOT ABOVE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1251.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1251.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1251.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u0402'   #  0x80 -> CYRILLIC CAPITAL LETTER DJE
-    u'\u0403'   #  0x81 -> CYRILLIC CAPITAL LETTER GJE
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u0453'   #  0x83 -> CYRILLIC SMALL LETTER GJE
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\u20ac'   #  0x88 -> EURO SIGN
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\u0409'   #  0x8A -> CYRILLIC CAPITAL LETTER LJE
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u040a'   #  0x8C -> CYRILLIC CAPITAL LETTER NJE
-    u'\u040c'   #  0x8D -> CYRILLIC CAPITAL LETTER KJE
-    u'\u040b'   #  0x8E -> CYRILLIC CAPITAL LETTER TSHE
-    u'\u040f'   #  0x8F -> CYRILLIC CAPITAL LETTER DZHE
-    u'\u0452'   #  0x90 -> CYRILLIC SMALL LETTER DJE
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\ufffe'   #  0x98 -> UNDEFINED
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\u0459'   #  0x9A -> CYRILLIC SMALL LETTER LJE
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u045a'   #  0x9C -> CYRILLIC SMALL LETTER NJE
-    u'\u045c'   #  0x9D -> CYRILLIC SMALL LETTER KJE
-    u'\u045b'   #  0x9E -> CYRILLIC SMALL LETTER TSHE
-    u'\u045f'   #  0x9F -> CYRILLIC SMALL LETTER DZHE
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u040e'   #  0xA1 -> CYRILLIC CAPITAL LETTER SHORT U
-    u'\u045e'   #  0xA2 -> CYRILLIC SMALL LETTER SHORT U
-    u'\u0408'   #  0xA3 -> CYRILLIC CAPITAL LETTER JE
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\u0490'   #  0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\u0401'   #  0xA8 -> CYRILLIC CAPITAL LETTER IO
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u0404'   #  0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\u0407'   #  0xAF -> CYRILLIC CAPITAL LETTER YI
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u0406'   #  0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0456'   #  0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0491'   #  0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\u0451'   #  0xB8 -> CYRILLIC SMALL LETTER IO
-    u'\u2116'   #  0xB9 -> NUMERO SIGN
-    u'\u0454'   #  0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u0458'   #  0xBC -> CYRILLIC SMALL LETTER JE
-    u'\u0405'   #  0xBD -> CYRILLIC CAPITAL LETTER DZE
-    u'\u0455'   #  0xBE -> CYRILLIC SMALL LETTER DZE
-    u'\u0457'   #  0xBF -> CYRILLIC SMALL LETTER YI
-    u'\u0410'   #  0xC0 -> CYRILLIC CAPITAL LETTER A
-    u'\u0411'   #  0xC1 -> CYRILLIC CAPITAL LETTER BE
-    u'\u0412'   #  0xC2 -> CYRILLIC CAPITAL LETTER VE
-    u'\u0413'   #  0xC3 -> CYRILLIC CAPITAL LETTER GHE
-    u'\u0414'   #  0xC4 -> CYRILLIC CAPITAL LETTER DE
-    u'\u0415'   #  0xC5 -> CYRILLIC CAPITAL LETTER IE
-    u'\u0416'   #  0xC6 -> CYRILLIC CAPITAL LETTER ZHE
-    u'\u0417'   #  0xC7 -> CYRILLIC CAPITAL LETTER ZE
-    u'\u0418'   #  0xC8 -> CYRILLIC CAPITAL LETTER I
-    u'\u0419'   #  0xC9 -> CYRILLIC CAPITAL LETTER SHORT I
-    u'\u041a'   #  0xCA -> CYRILLIC CAPITAL LETTER KA
-    u'\u041b'   #  0xCB -> CYRILLIC CAPITAL LETTER EL
-    u'\u041c'   #  0xCC -> CYRILLIC CAPITAL LETTER EM
-    u'\u041d'   #  0xCD -> CYRILLIC CAPITAL LETTER EN
-    u'\u041e'   #  0xCE -> CYRILLIC CAPITAL LETTER O
-    u'\u041f'   #  0xCF -> CYRILLIC CAPITAL LETTER PE
-    u'\u0420'   #  0xD0 -> CYRILLIC CAPITAL LETTER ER
-    u'\u0421'   #  0xD1 -> CYRILLIC CAPITAL LETTER ES
-    u'\u0422'   #  0xD2 -> CYRILLIC CAPITAL LETTER TE
-    u'\u0423'   #  0xD3 -> CYRILLIC CAPITAL LETTER U
-    u'\u0424'   #  0xD4 -> CYRILLIC CAPITAL LETTER EF
-    u'\u0425'   #  0xD5 -> CYRILLIC CAPITAL LETTER HA
-    u'\u0426'   #  0xD6 -> CYRILLIC CAPITAL LETTER TSE
-    u'\u0427'   #  0xD7 -> CYRILLIC CAPITAL LETTER CHE
-    u'\u0428'   #  0xD8 -> CYRILLIC CAPITAL LETTER SHA
-    u'\u0429'   #  0xD9 -> CYRILLIC CAPITAL LETTER SHCHA
-    u'\u042a'   #  0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN
-    u'\u042b'   #  0xDB -> CYRILLIC CAPITAL LETTER YERU
-    u'\u042c'   #  0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN
-    u'\u042d'   #  0xDD -> CYRILLIC CAPITAL LETTER E
-    u'\u042e'   #  0xDE -> CYRILLIC CAPITAL LETTER YU
-    u'\u042f'   #  0xDF -> CYRILLIC CAPITAL LETTER YA
-    u'\u0430'   #  0xE0 -> CYRILLIC SMALL LETTER A
-    u'\u0431'   #  0xE1 -> CYRILLIC SMALL LETTER BE
-    u'\u0432'   #  0xE2 -> CYRILLIC SMALL LETTER VE
-    u'\u0433'   #  0xE3 -> CYRILLIC SMALL LETTER GHE
-    u'\u0434'   #  0xE4 -> CYRILLIC SMALL LETTER DE
-    u'\u0435'   #  0xE5 -> CYRILLIC SMALL LETTER IE
-    u'\u0436'   #  0xE6 -> CYRILLIC SMALL LETTER ZHE
-    u'\u0437'   #  0xE7 -> CYRILLIC SMALL LETTER ZE
-    u'\u0438'   #  0xE8 -> CYRILLIC SMALL LETTER I
-    u'\u0439'   #  0xE9 -> CYRILLIC SMALL LETTER SHORT I
-    u'\u043a'   #  0xEA -> CYRILLIC SMALL LETTER KA
-    u'\u043b'   #  0xEB -> CYRILLIC SMALL LETTER EL
-    u'\u043c'   #  0xEC -> CYRILLIC SMALL LETTER EM
-    u'\u043d'   #  0xED -> CYRILLIC SMALL LETTER EN
-    u'\u043e'   #  0xEE -> CYRILLIC SMALL LETTER O
-    u'\u043f'   #  0xEF -> CYRILLIC SMALL LETTER PE
-    u'\u0440'   #  0xF0 -> CYRILLIC SMALL LETTER ER
-    u'\u0441'   #  0xF1 -> CYRILLIC SMALL LETTER ES
-    u'\u0442'   #  0xF2 -> CYRILLIC SMALL LETTER TE
-    u'\u0443'   #  0xF3 -> CYRILLIC SMALL LETTER U
-    u'\u0444'   #  0xF4 -> CYRILLIC SMALL LETTER EF
-    u'\u0445'   #  0xF5 -> CYRILLIC SMALL LETTER HA
-    u'\u0446'   #  0xF6 -> CYRILLIC SMALL LETTER TSE
-    u'\u0447'   #  0xF7 -> CYRILLIC SMALL LETTER CHE
-    u'\u0448'   #  0xF8 -> CYRILLIC SMALL LETTER SHA
-    u'\u0449'   #  0xF9 -> CYRILLIC SMALL LETTER SHCHA
-    u'\u044a'   #  0xFA -> CYRILLIC SMALL LETTER HARD SIGN
-    u'\u044b'   #  0xFB -> CYRILLIC SMALL LETTER YERU
-    u'\u044c'   #  0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
-    u'\u044d'   #  0xFD -> CYRILLIC SMALL LETTER E
-    u'\u044e'   #  0xFE -> CYRILLIC SMALL LETTER YU
-    u'\u044f'   #  0xFF -> CYRILLIC SMALL LETTER YA
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u0402'   #  0x80 -> CYRILLIC CAPITAL LETTER DJE
+    '\u0403'   #  0x81 -> CYRILLIC CAPITAL LETTER GJE
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\u0453'   #  0x83 -> CYRILLIC SMALL LETTER GJE
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\u20ac'   #  0x88 -> EURO SIGN
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\u0409'   #  0x8A -> CYRILLIC CAPITAL LETTER LJE
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u040a'   #  0x8C -> CYRILLIC CAPITAL LETTER NJE
+    '\u040c'   #  0x8D -> CYRILLIC CAPITAL LETTER KJE
+    '\u040b'   #  0x8E -> CYRILLIC CAPITAL LETTER TSHE
+    '\u040f'   #  0x8F -> CYRILLIC CAPITAL LETTER DZHE
+    '\u0452'   #  0x90 -> CYRILLIC SMALL LETTER DJE
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\ufffe'   #  0x98 -> UNDEFINED
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\u0459'   #  0x9A -> CYRILLIC SMALL LETTER LJE
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u045a'   #  0x9C -> CYRILLIC SMALL LETTER NJE
+    '\u045c'   #  0x9D -> CYRILLIC SMALL LETTER KJE
+    '\u045b'   #  0x9E -> CYRILLIC SMALL LETTER TSHE
+    '\u045f'   #  0x9F -> CYRILLIC SMALL LETTER DZHE
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u040e'   #  0xA1 -> CYRILLIC CAPITAL LETTER SHORT U
+    '\u045e'   #  0xA2 -> CYRILLIC SMALL LETTER SHORT U
+    '\u0408'   #  0xA3 -> CYRILLIC CAPITAL LETTER JE
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\u0490'   #  0xA5 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\u0401'   #  0xA8 -> CYRILLIC CAPITAL LETTER IO
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u0404'   #  0xAA -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\u0407'   #  0xAF -> CYRILLIC CAPITAL LETTER YI
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u0406'   #  0xB2 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0456'   #  0xB3 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0491'   #  0xB4 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\u0451'   #  0xB8 -> CYRILLIC SMALL LETTER IO
+    '\u2116'   #  0xB9 -> NUMERO SIGN
+    '\u0454'   #  0xBA -> CYRILLIC SMALL LETTER UKRAINIAN IE
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u0458'   #  0xBC -> CYRILLIC SMALL LETTER JE
+    '\u0405'   #  0xBD -> CYRILLIC CAPITAL LETTER DZE
+    '\u0455'   #  0xBE -> CYRILLIC SMALL LETTER DZE
+    '\u0457'   #  0xBF -> CYRILLIC SMALL LETTER YI
+    '\u0410'   #  0xC0 -> CYRILLIC CAPITAL LETTER A
+    '\u0411'   #  0xC1 -> CYRILLIC CAPITAL LETTER BE
+    '\u0412'   #  0xC2 -> CYRILLIC CAPITAL LETTER VE
+    '\u0413'   #  0xC3 -> CYRILLIC CAPITAL LETTER GHE
+    '\u0414'   #  0xC4 -> CYRILLIC CAPITAL LETTER DE
+    '\u0415'   #  0xC5 -> CYRILLIC CAPITAL LETTER IE
+    '\u0416'   #  0xC6 -> CYRILLIC CAPITAL LETTER ZHE
+    '\u0417'   #  0xC7 -> CYRILLIC CAPITAL LETTER ZE
+    '\u0418'   #  0xC8 -> CYRILLIC CAPITAL LETTER I
+    '\u0419'   #  0xC9 -> CYRILLIC CAPITAL LETTER SHORT I
+    '\u041a'   #  0xCA -> CYRILLIC CAPITAL LETTER KA
+    '\u041b'   #  0xCB -> CYRILLIC CAPITAL LETTER EL
+    '\u041c'   #  0xCC -> CYRILLIC CAPITAL LETTER EM
+    '\u041d'   #  0xCD -> CYRILLIC CAPITAL LETTER EN
+    '\u041e'   #  0xCE -> CYRILLIC CAPITAL LETTER O
+    '\u041f'   #  0xCF -> CYRILLIC CAPITAL LETTER PE
+    '\u0420'   #  0xD0 -> CYRILLIC CAPITAL LETTER ER
+    '\u0421'   #  0xD1 -> CYRILLIC CAPITAL LETTER ES
+    '\u0422'   #  0xD2 -> CYRILLIC CAPITAL LETTER TE
+    '\u0423'   #  0xD3 -> CYRILLIC CAPITAL LETTER U
+    '\u0424'   #  0xD4 -> CYRILLIC CAPITAL LETTER EF
+    '\u0425'   #  0xD5 -> CYRILLIC CAPITAL LETTER HA
+    '\u0426'   #  0xD6 -> CYRILLIC CAPITAL LETTER TSE
+    '\u0427'   #  0xD7 -> CYRILLIC CAPITAL LETTER CHE
+    '\u0428'   #  0xD8 -> CYRILLIC CAPITAL LETTER SHA
+    '\u0429'   #  0xD9 -> CYRILLIC CAPITAL LETTER SHCHA
+    '\u042a'   #  0xDA -> CYRILLIC CAPITAL LETTER HARD SIGN
+    '\u042b'   #  0xDB -> CYRILLIC CAPITAL LETTER YERU
+    '\u042c'   #  0xDC -> CYRILLIC CAPITAL LETTER SOFT SIGN
+    '\u042d'   #  0xDD -> CYRILLIC CAPITAL LETTER E
+    '\u042e'   #  0xDE -> CYRILLIC CAPITAL LETTER YU
+    '\u042f'   #  0xDF -> CYRILLIC CAPITAL LETTER YA
+    '\u0430'   #  0xE0 -> CYRILLIC SMALL LETTER A
+    '\u0431'   #  0xE1 -> CYRILLIC SMALL LETTER BE
+    '\u0432'   #  0xE2 -> CYRILLIC SMALL LETTER VE
+    '\u0433'   #  0xE3 -> CYRILLIC SMALL LETTER GHE
+    '\u0434'   #  0xE4 -> CYRILLIC SMALL LETTER DE
+    '\u0435'   #  0xE5 -> CYRILLIC SMALL LETTER IE
+    '\u0436'   #  0xE6 -> CYRILLIC SMALL LETTER ZHE
+    '\u0437'   #  0xE7 -> CYRILLIC SMALL LETTER ZE
+    '\u0438'   #  0xE8 -> CYRILLIC SMALL LETTER I
+    '\u0439'   #  0xE9 -> CYRILLIC SMALL LETTER SHORT I
+    '\u043a'   #  0xEA -> CYRILLIC SMALL LETTER KA
+    '\u043b'   #  0xEB -> CYRILLIC SMALL LETTER EL
+    '\u043c'   #  0xEC -> CYRILLIC SMALL LETTER EM
+    '\u043d'   #  0xED -> CYRILLIC SMALL LETTER EN
+    '\u043e'   #  0xEE -> CYRILLIC SMALL LETTER O
+    '\u043f'   #  0xEF -> CYRILLIC SMALL LETTER PE
+    '\u0440'   #  0xF0 -> CYRILLIC SMALL LETTER ER
+    '\u0441'   #  0xF1 -> CYRILLIC SMALL LETTER ES
+    '\u0442'   #  0xF2 -> CYRILLIC SMALL LETTER TE
+    '\u0443'   #  0xF3 -> CYRILLIC SMALL LETTER U
+    '\u0444'   #  0xF4 -> CYRILLIC SMALL LETTER EF
+    '\u0445'   #  0xF5 -> CYRILLIC SMALL LETTER HA
+    '\u0446'   #  0xF6 -> CYRILLIC SMALL LETTER TSE
+    '\u0447'   #  0xF7 -> CYRILLIC SMALL LETTER CHE
+    '\u0448'   #  0xF8 -> CYRILLIC SMALL LETTER SHA
+    '\u0449'   #  0xF9 -> CYRILLIC SMALL LETTER SHCHA
+    '\u044a'   #  0xFA -> CYRILLIC SMALL LETTER HARD SIGN
+    '\u044b'   #  0xFB -> CYRILLIC SMALL LETTER YERU
+    '\u044c'   #  0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
+    '\u044d'   #  0xFD -> CYRILLIC SMALL LETTER E
+    '\u044e'   #  0xFE -> CYRILLIC SMALL LETTER YU
+    '\u044f'   #  0xFF -> CYRILLIC SMALL LETTER YA
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1252.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1252.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1252.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\u0160'   #  0x8A -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
-    u'\ufffe'   #  0x8D -> UNDEFINED
-    u'\u017d'   #  0x8E -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\ufffe'   #  0x8F -> UNDEFINED
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\u02dc'   #  0x98 -> SMALL TILDE
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\u0161'   #  0x9A -> LATIN SMALL LETTER S WITH CARON
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
-    u'\ufffe'   #  0x9D -> UNDEFINED
-    u'\u017e'   #  0x9E -> LATIN SMALL LETTER Z WITH CARON
-    u'\u0178'   #  0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xbf'     #  0xBF -> INVERTED QUESTION MARK
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xfe'     #  0xFE -> LATIN SMALL LETTER THORN
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\u0160'   #  0x8A -> LATIN CAPITAL LETTER S WITH CARON
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
+    '\ufffe'   #  0x8D -> UNDEFINED
+    '\u017d'   #  0x8E -> LATIN CAPITAL LETTER Z WITH CARON
+    '\ufffe'   #  0x8F -> UNDEFINED
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\u02dc'   #  0x98 -> SMALL TILDE
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\u0161'   #  0x9A -> LATIN SMALL LETTER S WITH CARON
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
+    '\ufffe'   #  0x9D -> UNDEFINED
+    '\u017e'   #  0x9E -> LATIN SMALL LETTER Z WITH CARON
+    '\u0178'   #  0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xbf'     #  0xBF -> INVERTED QUESTION MARK
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xfe'     #  0xFE -> LATIN SMALL LETTER THORN
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1253.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1253.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1253.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\ufffe'   #  0x88 -> UNDEFINED
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\ufffe'   #  0x8A -> UNDEFINED
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\ufffe'   #  0x8C -> UNDEFINED
-    u'\ufffe'   #  0x8D -> UNDEFINED
-    u'\ufffe'   #  0x8E -> UNDEFINED
-    u'\ufffe'   #  0x8F -> UNDEFINED
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\ufffe'   #  0x98 -> UNDEFINED
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\ufffe'   #  0x9A -> UNDEFINED
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\ufffe'   #  0x9C -> UNDEFINED
-    u'\ufffe'   #  0x9D -> UNDEFINED
-    u'\ufffe'   #  0x9E -> UNDEFINED
-    u'\ufffe'   #  0x9F -> UNDEFINED
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0385'   #  0xA1 -> GREEK DIALYTIKA TONOS
-    u'\u0386'   #  0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\ufffe'   #  0xAA -> UNDEFINED
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\u2015'   #  0xAF -> HORIZONTAL BAR
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\u0384'   #  0xB4 -> GREEK TONOS
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\u0388'   #  0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
-    u'\u0389'   #  0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS
-    u'\u038a'   #  0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u038c'   #  0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\u038e'   #  0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS
-    u'\u038f'   #  0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
-    u'\u0390'   #  0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
-    u'\u0391'   #  0xC1 -> GREEK CAPITAL LETTER ALPHA
-    u'\u0392'   #  0xC2 -> GREEK CAPITAL LETTER BETA
-    u'\u0393'   #  0xC3 -> GREEK CAPITAL LETTER GAMMA
-    u'\u0394'   #  0xC4 -> GREEK CAPITAL LETTER DELTA
-    u'\u0395'   #  0xC5 -> GREEK CAPITAL LETTER EPSILON
-    u'\u0396'   #  0xC6 -> GREEK CAPITAL LETTER ZETA
-    u'\u0397'   #  0xC7 -> GREEK CAPITAL LETTER ETA
-    u'\u0398'   #  0xC8 -> GREEK CAPITAL LETTER THETA
-    u'\u0399'   #  0xC9 -> GREEK CAPITAL LETTER IOTA
-    u'\u039a'   #  0xCA -> GREEK CAPITAL LETTER KAPPA
-    u'\u039b'   #  0xCB -> GREEK CAPITAL LETTER LAMDA
-    u'\u039c'   #  0xCC -> GREEK CAPITAL LETTER MU
-    u'\u039d'   #  0xCD -> GREEK CAPITAL LETTER NU
-    u'\u039e'   #  0xCE -> GREEK CAPITAL LETTER XI
-    u'\u039f'   #  0xCF -> GREEK CAPITAL LETTER OMICRON
-    u'\u03a0'   #  0xD0 -> GREEK CAPITAL LETTER PI
-    u'\u03a1'   #  0xD1 -> GREEK CAPITAL LETTER RHO
-    u'\ufffe'   #  0xD2 -> UNDEFINED
-    u'\u03a3'   #  0xD3 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03a4'   #  0xD4 -> GREEK CAPITAL LETTER TAU
-    u'\u03a5'   #  0xD5 -> GREEK CAPITAL LETTER UPSILON
-    u'\u03a6'   #  0xD6 -> GREEK CAPITAL LETTER PHI
-    u'\u03a7'   #  0xD7 -> GREEK CAPITAL LETTER CHI
-    u'\u03a8'   #  0xD8 -> GREEK CAPITAL LETTER PSI
-    u'\u03a9'   #  0xD9 -> GREEK CAPITAL LETTER OMEGA
-    u'\u03aa'   #  0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
-    u'\u03ab'   #  0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-    u'\u03ac'   #  0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS
-    u'\u03ad'   #  0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS
-    u'\u03ae'   #  0xDE -> GREEK SMALL LETTER ETA WITH TONOS
-    u'\u03af'   #  0xDF -> GREEK SMALL LETTER IOTA WITH TONOS
-    u'\u03b0'   #  0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
-    u'\u03b1'   #  0xE1 -> GREEK SMALL LETTER ALPHA
-    u'\u03b2'   #  0xE2 -> GREEK SMALL LETTER BETA
-    u'\u03b3'   #  0xE3 -> GREEK SMALL LETTER GAMMA
-    u'\u03b4'   #  0xE4 -> GREEK SMALL LETTER DELTA
-    u'\u03b5'   #  0xE5 -> GREEK SMALL LETTER EPSILON
-    u'\u03b6'   #  0xE6 -> GREEK SMALL LETTER ZETA
-    u'\u03b7'   #  0xE7 -> GREEK SMALL LETTER ETA
-    u'\u03b8'   #  0xE8 -> GREEK SMALL LETTER THETA
-    u'\u03b9'   #  0xE9 -> GREEK SMALL LETTER IOTA
-    u'\u03ba'   #  0xEA -> GREEK SMALL LETTER KAPPA
-    u'\u03bb'   #  0xEB -> GREEK SMALL LETTER LAMDA
-    u'\u03bc'   #  0xEC -> GREEK SMALL LETTER MU
-    u'\u03bd'   #  0xED -> GREEK SMALL LETTER NU
-    u'\u03be'   #  0xEE -> GREEK SMALL LETTER XI
-    u'\u03bf'   #  0xEF -> GREEK SMALL LETTER OMICRON
-    u'\u03c0'   #  0xF0 -> GREEK SMALL LETTER PI
-    u'\u03c1'   #  0xF1 -> GREEK SMALL LETTER RHO
-    u'\u03c2'   #  0xF2 -> GREEK SMALL LETTER FINAL SIGMA
-    u'\u03c3'   #  0xF3 -> GREEK SMALL LETTER SIGMA
-    u'\u03c4'   #  0xF4 -> GREEK SMALL LETTER TAU
-    u'\u03c5'   #  0xF5 -> GREEK SMALL LETTER UPSILON
-    u'\u03c6'   #  0xF6 -> GREEK SMALL LETTER PHI
-    u'\u03c7'   #  0xF7 -> GREEK SMALL LETTER CHI
-    u'\u03c8'   #  0xF8 -> GREEK SMALL LETTER PSI
-    u'\u03c9'   #  0xF9 -> GREEK SMALL LETTER OMEGA
-    u'\u03ca'   #  0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
-    u'\u03cb'   #  0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
-    u'\u03cc'   #  0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS
-    u'\u03cd'   #  0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS
-    u'\u03ce'   #  0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS
-    u'\ufffe'   #  0xFF -> UNDEFINED
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\ufffe'   #  0x88 -> UNDEFINED
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\ufffe'   #  0x8A -> UNDEFINED
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\ufffe'   #  0x8C -> UNDEFINED
+    '\ufffe'   #  0x8D -> UNDEFINED
+    '\ufffe'   #  0x8E -> UNDEFINED
+    '\ufffe'   #  0x8F -> UNDEFINED
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\ufffe'   #  0x98 -> UNDEFINED
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\ufffe'   #  0x9A -> UNDEFINED
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\ufffe'   #  0x9C -> UNDEFINED
+    '\ufffe'   #  0x9D -> UNDEFINED
+    '\ufffe'   #  0x9E -> UNDEFINED
+    '\ufffe'   #  0x9F -> UNDEFINED
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0385'   #  0xA1 -> GREEK DIALYTIKA TONOS
+    '\u0386'   #  0xA2 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\ufffe'   #  0xAA -> UNDEFINED
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\u2015'   #  0xAF -> HORIZONTAL BAR
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\u0384'   #  0xB4 -> GREEK TONOS
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\u0388'   #  0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+    '\u0389'   #  0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS
+    '\u038a'   #  0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u038c'   #  0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\u038e'   #  0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+    '\u038f'   #  0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+    '\u0390'   #  0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+    '\u0391'   #  0xC1 -> GREEK CAPITAL LETTER ALPHA
+    '\u0392'   #  0xC2 -> GREEK CAPITAL LETTER BETA
+    '\u0393'   #  0xC3 -> GREEK CAPITAL LETTER GAMMA
+    '\u0394'   #  0xC4 -> GREEK CAPITAL LETTER DELTA
+    '\u0395'   #  0xC5 -> GREEK CAPITAL LETTER EPSILON
+    '\u0396'   #  0xC6 -> GREEK CAPITAL LETTER ZETA
+    '\u0397'   #  0xC7 -> GREEK CAPITAL LETTER ETA
+    '\u0398'   #  0xC8 -> GREEK CAPITAL LETTER THETA
+    '\u0399'   #  0xC9 -> GREEK CAPITAL LETTER IOTA
+    '\u039a'   #  0xCA -> GREEK CAPITAL LETTER KAPPA
+    '\u039b'   #  0xCB -> GREEK CAPITAL LETTER LAMDA
+    '\u039c'   #  0xCC -> GREEK CAPITAL LETTER MU
+    '\u039d'   #  0xCD -> GREEK CAPITAL LETTER NU
+    '\u039e'   #  0xCE -> GREEK CAPITAL LETTER XI
+    '\u039f'   #  0xCF -> GREEK CAPITAL LETTER OMICRON
+    '\u03a0'   #  0xD0 -> GREEK CAPITAL LETTER PI
+    '\u03a1'   #  0xD1 -> GREEK CAPITAL LETTER RHO
+    '\ufffe'   #  0xD2 -> UNDEFINED
+    '\u03a3'   #  0xD3 -> GREEK CAPITAL LETTER SIGMA
+    '\u03a4'   #  0xD4 -> GREEK CAPITAL LETTER TAU
+    '\u03a5'   #  0xD5 -> GREEK CAPITAL LETTER UPSILON
+    '\u03a6'   #  0xD6 -> GREEK CAPITAL LETTER PHI
+    '\u03a7'   #  0xD7 -> GREEK CAPITAL LETTER CHI
+    '\u03a8'   #  0xD8 -> GREEK CAPITAL LETTER PSI
+    '\u03a9'   #  0xD9 -> GREEK CAPITAL LETTER OMEGA
+    '\u03aa'   #  0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+    '\u03ab'   #  0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+    '\u03ac'   #  0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS
+    '\u03ad'   #  0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS
+    '\u03ae'   #  0xDE -> GREEK SMALL LETTER ETA WITH TONOS
+    '\u03af'   #  0xDF -> GREEK SMALL LETTER IOTA WITH TONOS
+    '\u03b0'   #  0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+    '\u03b1'   #  0xE1 -> GREEK SMALL LETTER ALPHA
+    '\u03b2'   #  0xE2 -> GREEK SMALL LETTER BETA
+    '\u03b3'   #  0xE3 -> GREEK SMALL LETTER GAMMA
+    '\u03b4'   #  0xE4 -> GREEK SMALL LETTER DELTA
+    '\u03b5'   #  0xE5 -> GREEK SMALL LETTER EPSILON
+    '\u03b6'   #  0xE6 -> GREEK SMALL LETTER ZETA
+    '\u03b7'   #  0xE7 -> GREEK SMALL LETTER ETA
+    '\u03b8'   #  0xE8 -> GREEK SMALL LETTER THETA
+    '\u03b9'   #  0xE9 -> GREEK SMALL LETTER IOTA
+    '\u03ba'   #  0xEA -> GREEK SMALL LETTER KAPPA
+    '\u03bb'   #  0xEB -> GREEK SMALL LETTER LAMDA
+    '\u03bc'   #  0xEC -> GREEK SMALL LETTER MU
+    '\u03bd'   #  0xED -> GREEK SMALL LETTER NU
+    '\u03be'   #  0xEE -> GREEK SMALL LETTER XI
+    '\u03bf'   #  0xEF -> GREEK SMALL LETTER OMICRON
+    '\u03c0'   #  0xF0 -> GREEK SMALL LETTER PI
+    '\u03c1'   #  0xF1 -> GREEK SMALL LETTER RHO
+    '\u03c2'   #  0xF2 -> GREEK SMALL LETTER FINAL SIGMA
+    '\u03c3'   #  0xF3 -> GREEK SMALL LETTER SIGMA
+    '\u03c4'   #  0xF4 -> GREEK SMALL LETTER TAU
+    '\u03c5'   #  0xF5 -> GREEK SMALL LETTER UPSILON
+    '\u03c6'   #  0xF6 -> GREEK SMALL LETTER PHI
+    '\u03c7'   #  0xF7 -> GREEK SMALL LETTER CHI
+    '\u03c8'   #  0xF8 -> GREEK SMALL LETTER PSI
+    '\u03c9'   #  0xF9 -> GREEK SMALL LETTER OMEGA
+    '\u03ca'   #  0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+    '\u03cb'   #  0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+    '\u03cc'   #  0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS
+    '\u03cd'   #  0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS
+    '\u03ce'   #  0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS
+    '\ufffe'   #  0xFF -> UNDEFINED
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1254.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1254.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1254.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\u0160'   #  0x8A -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
-    u'\ufffe'   #  0x8D -> UNDEFINED
-    u'\ufffe'   #  0x8E -> UNDEFINED
-    u'\ufffe'   #  0x8F -> UNDEFINED
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\u02dc'   #  0x98 -> SMALL TILDE
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\u0161'   #  0x9A -> LATIN SMALL LETTER S WITH CARON
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
-    u'\ufffe'   #  0x9D -> UNDEFINED
-    u'\ufffe'   #  0x9E -> UNDEFINED
-    u'\u0178'   #  0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xbf'     #  0xBF -> INVERTED QUESTION MARK
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\u011e'   #  0xD0 -> LATIN CAPITAL LETTER G WITH BREVE
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u0130'   #  0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE
-    u'\u015e'   #  0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\u011f'   #  0xF0 -> LATIN SMALL LETTER G WITH BREVE
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u0131'   #  0xFD -> LATIN SMALL LETTER DOTLESS I
-    u'\u015f'   #  0xFE -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\u0160'   #  0x8A -> LATIN CAPITAL LETTER S WITH CARON
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
+    '\ufffe'   #  0x8D -> UNDEFINED
+    '\ufffe'   #  0x8E -> UNDEFINED
+    '\ufffe'   #  0x8F -> UNDEFINED
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\u02dc'   #  0x98 -> SMALL TILDE
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\u0161'   #  0x9A -> LATIN SMALL LETTER S WITH CARON
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
+    '\ufffe'   #  0x9D -> UNDEFINED
+    '\ufffe'   #  0x9E -> UNDEFINED
+    '\u0178'   #  0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xbf'     #  0xBF -> INVERTED QUESTION MARK
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\u011e'   #  0xD0 -> LATIN CAPITAL LETTER G WITH BREVE
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u0130'   #  0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+    '\u015e'   #  0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\u011f'   #  0xF0 -> LATIN SMALL LETTER G WITH BREVE
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u0131'   #  0xFD -> LATIN SMALL LETTER DOTLESS I
+    '\u015f'   #  0xFE -> LATIN SMALL LETTER S WITH CEDILLA
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1255.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1255.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1255.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\ufffe'   #  0x8A -> UNDEFINED
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\ufffe'   #  0x8C -> UNDEFINED
-    u'\ufffe'   #  0x8D -> UNDEFINED
-    u'\ufffe'   #  0x8E -> UNDEFINED
-    u'\ufffe'   #  0x8F -> UNDEFINED
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\u02dc'   #  0x98 -> SMALL TILDE
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\ufffe'   #  0x9A -> UNDEFINED
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\ufffe'   #  0x9C -> UNDEFINED
-    u'\ufffe'   #  0x9D -> UNDEFINED
-    u'\ufffe'   #  0x9E -> UNDEFINED
-    u'\ufffe'   #  0x9F -> UNDEFINED
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\u20aa'   #  0xA4 -> NEW SHEQEL SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xd7'     #  0xAA -> MULTIPLICATION SIGN
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xf7'     #  0xBA -> DIVISION SIGN
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xbf'     #  0xBF -> INVERTED QUESTION MARK
-    u'\u05b0'   #  0xC0 -> HEBREW POINT SHEVA
-    u'\u05b1'   #  0xC1 -> HEBREW POINT HATAF SEGOL
-    u'\u05b2'   #  0xC2 -> HEBREW POINT HATAF PATAH
-    u'\u05b3'   #  0xC3 -> HEBREW POINT HATAF QAMATS
-    u'\u05b4'   #  0xC4 -> HEBREW POINT HIRIQ
-    u'\u05b5'   #  0xC5 -> HEBREW POINT TSERE
-    u'\u05b6'   #  0xC6 -> HEBREW POINT SEGOL
-    u'\u05b7'   #  0xC7 -> HEBREW POINT PATAH
-    u'\u05b8'   #  0xC8 -> HEBREW POINT QAMATS
-    u'\u05b9'   #  0xC9 -> HEBREW POINT HOLAM
-    u'\ufffe'   #  0xCA -> UNDEFINED
-    u'\u05bb'   #  0xCB -> HEBREW POINT QUBUTS
-    u'\u05bc'   #  0xCC -> HEBREW POINT DAGESH OR MAPIQ
-    u'\u05bd'   #  0xCD -> HEBREW POINT METEG
-    u'\u05be'   #  0xCE -> HEBREW PUNCTUATION MAQAF
-    u'\u05bf'   #  0xCF -> HEBREW POINT RAFE
-    u'\u05c0'   #  0xD0 -> HEBREW PUNCTUATION PASEQ
-    u'\u05c1'   #  0xD1 -> HEBREW POINT SHIN DOT
-    u'\u05c2'   #  0xD2 -> HEBREW POINT SIN DOT
-    u'\u05c3'   #  0xD3 -> HEBREW PUNCTUATION SOF PASUQ
-    u'\u05f0'   #  0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV
-    u'\u05f1'   #  0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD
-    u'\u05f2'   #  0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD
-    u'\u05f3'   #  0xD7 -> HEBREW PUNCTUATION GERESH
-    u'\u05f4'   #  0xD8 -> HEBREW PUNCTUATION GERSHAYIM
-    u'\ufffe'   #  0xD9 -> UNDEFINED
-    u'\ufffe'   #  0xDA -> UNDEFINED
-    u'\ufffe'   #  0xDB -> UNDEFINED
-    u'\ufffe'   #  0xDC -> UNDEFINED
-    u'\ufffe'   #  0xDD -> UNDEFINED
-    u'\ufffe'   #  0xDE -> UNDEFINED
-    u'\ufffe'   #  0xDF -> UNDEFINED
-    u'\u05d0'   #  0xE0 -> HEBREW LETTER ALEF
-    u'\u05d1'   #  0xE1 -> HEBREW LETTER BET
-    u'\u05d2'   #  0xE2 -> HEBREW LETTER GIMEL
-    u'\u05d3'   #  0xE3 -> HEBREW LETTER DALET
-    u'\u05d4'   #  0xE4 -> HEBREW LETTER HE
-    u'\u05d5'   #  0xE5 -> HEBREW LETTER VAV
-    u'\u05d6'   #  0xE6 -> HEBREW LETTER ZAYIN
-    u'\u05d7'   #  0xE7 -> HEBREW LETTER HET
-    u'\u05d8'   #  0xE8 -> HEBREW LETTER TET
-    u'\u05d9'   #  0xE9 -> HEBREW LETTER YOD
-    u'\u05da'   #  0xEA -> HEBREW LETTER FINAL KAF
-    u'\u05db'   #  0xEB -> HEBREW LETTER KAF
-    u'\u05dc'   #  0xEC -> HEBREW LETTER LAMED
-    u'\u05dd'   #  0xED -> HEBREW LETTER FINAL MEM
-    u'\u05de'   #  0xEE -> HEBREW LETTER MEM
-    u'\u05df'   #  0xEF -> HEBREW LETTER FINAL NUN
-    u'\u05e0'   #  0xF0 -> HEBREW LETTER NUN
-    u'\u05e1'   #  0xF1 -> HEBREW LETTER SAMEKH
-    u'\u05e2'   #  0xF2 -> HEBREW LETTER AYIN
-    u'\u05e3'   #  0xF3 -> HEBREW LETTER FINAL PE
-    u'\u05e4'   #  0xF4 -> HEBREW LETTER PE
-    u'\u05e5'   #  0xF5 -> HEBREW LETTER FINAL TSADI
-    u'\u05e6'   #  0xF6 -> HEBREW LETTER TSADI
-    u'\u05e7'   #  0xF7 -> HEBREW LETTER QOF
-    u'\u05e8'   #  0xF8 -> HEBREW LETTER RESH
-    u'\u05e9'   #  0xF9 -> HEBREW LETTER SHIN
-    u'\u05ea'   #  0xFA -> HEBREW LETTER TAV
-    u'\ufffe'   #  0xFB -> UNDEFINED
-    u'\ufffe'   #  0xFC -> UNDEFINED
-    u'\u200e'   #  0xFD -> LEFT-TO-RIGHT MARK
-    u'\u200f'   #  0xFE -> RIGHT-TO-LEFT MARK
-    u'\ufffe'   #  0xFF -> UNDEFINED
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\ufffe'   #  0x8A -> UNDEFINED
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\ufffe'   #  0x8C -> UNDEFINED
+    '\ufffe'   #  0x8D -> UNDEFINED
+    '\ufffe'   #  0x8E -> UNDEFINED
+    '\ufffe'   #  0x8F -> UNDEFINED
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\u02dc'   #  0x98 -> SMALL TILDE
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\ufffe'   #  0x9A -> UNDEFINED
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\ufffe'   #  0x9C -> UNDEFINED
+    '\ufffe'   #  0x9D -> UNDEFINED
+    '\ufffe'   #  0x9E -> UNDEFINED
+    '\ufffe'   #  0x9F -> UNDEFINED
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\u20aa'   #  0xA4 -> NEW SHEQEL SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xd7'     #  0xAA -> MULTIPLICATION SIGN
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xf7'     #  0xBA -> DIVISION SIGN
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xbf'     #  0xBF -> INVERTED QUESTION MARK
+    '\u05b0'   #  0xC0 -> HEBREW POINT SHEVA
+    '\u05b1'   #  0xC1 -> HEBREW POINT HATAF SEGOL
+    '\u05b2'   #  0xC2 -> HEBREW POINT HATAF PATAH
+    '\u05b3'   #  0xC3 -> HEBREW POINT HATAF QAMATS
+    '\u05b4'   #  0xC4 -> HEBREW POINT HIRIQ
+    '\u05b5'   #  0xC5 -> HEBREW POINT TSERE
+    '\u05b6'   #  0xC6 -> HEBREW POINT SEGOL
+    '\u05b7'   #  0xC7 -> HEBREW POINT PATAH
+    '\u05b8'   #  0xC8 -> HEBREW POINT QAMATS
+    '\u05b9'   #  0xC9 -> HEBREW POINT HOLAM
+    '\ufffe'   #  0xCA -> UNDEFINED
+    '\u05bb'   #  0xCB -> HEBREW POINT QUBUTS
+    '\u05bc'   #  0xCC -> HEBREW POINT DAGESH OR MAPIQ
+    '\u05bd'   #  0xCD -> HEBREW POINT METEG
+    '\u05be'   #  0xCE -> HEBREW PUNCTUATION MAQAF
+    '\u05bf'   #  0xCF -> HEBREW POINT RAFE
+    '\u05c0'   #  0xD0 -> HEBREW PUNCTUATION PASEQ
+    '\u05c1'   #  0xD1 -> HEBREW POINT SHIN DOT
+    '\u05c2'   #  0xD2 -> HEBREW POINT SIN DOT
+    '\u05c3'   #  0xD3 -> HEBREW PUNCTUATION SOF PASUQ
+    '\u05f0'   #  0xD4 -> HEBREW LIGATURE YIDDISH DOUBLE VAV
+    '\u05f1'   #  0xD5 -> HEBREW LIGATURE YIDDISH VAV YOD
+    '\u05f2'   #  0xD6 -> HEBREW LIGATURE YIDDISH DOUBLE YOD
+    '\u05f3'   #  0xD7 -> HEBREW PUNCTUATION GERESH
+    '\u05f4'   #  0xD8 -> HEBREW PUNCTUATION GERSHAYIM
+    '\ufffe'   #  0xD9 -> UNDEFINED
+    '\ufffe'   #  0xDA -> UNDEFINED
+    '\ufffe'   #  0xDB -> UNDEFINED
+    '\ufffe'   #  0xDC -> UNDEFINED
+    '\ufffe'   #  0xDD -> UNDEFINED
+    '\ufffe'   #  0xDE -> UNDEFINED
+    '\ufffe'   #  0xDF -> UNDEFINED
+    '\u05d0'   #  0xE0 -> HEBREW LETTER ALEF
+    '\u05d1'   #  0xE1 -> HEBREW LETTER BET
+    '\u05d2'   #  0xE2 -> HEBREW LETTER GIMEL
+    '\u05d3'   #  0xE3 -> HEBREW LETTER DALET
+    '\u05d4'   #  0xE4 -> HEBREW LETTER HE
+    '\u05d5'   #  0xE5 -> HEBREW LETTER VAV
+    '\u05d6'   #  0xE6 -> HEBREW LETTER ZAYIN
+    '\u05d7'   #  0xE7 -> HEBREW LETTER HET
+    '\u05d8'   #  0xE8 -> HEBREW LETTER TET
+    '\u05d9'   #  0xE9 -> HEBREW LETTER YOD
+    '\u05da'   #  0xEA -> HEBREW LETTER FINAL KAF
+    '\u05db'   #  0xEB -> HEBREW LETTER KAF
+    '\u05dc'   #  0xEC -> HEBREW LETTER LAMED
+    '\u05dd'   #  0xED -> HEBREW LETTER FINAL MEM
+    '\u05de'   #  0xEE -> HEBREW LETTER MEM
+    '\u05df'   #  0xEF -> HEBREW LETTER FINAL NUN
+    '\u05e0'   #  0xF0 -> HEBREW LETTER NUN
+    '\u05e1'   #  0xF1 -> HEBREW LETTER SAMEKH
+    '\u05e2'   #  0xF2 -> HEBREW LETTER AYIN
+    '\u05e3'   #  0xF3 -> HEBREW LETTER FINAL PE
+    '\u05e4'   #  0xF4 -> HEBREW LETTER PE
+    '\u05e5'   #  0xF5 -> HEBREW LETTER FINAL TSADI
+    '\u05e6'   #  0xF6 -> HEBREW LETTER TSADI
+    '\u05e7'   #  0xF7 -> HEBREW LETTER QOF
+    '\u05e8'   #  0xF8 -> HEBREW LETTER RESH
+    '\u05e9'   #  0xF9 -> HEBREW LETTER SHIN
+    '\u05ea'   #  0xFA -> HEBREW LETTER TAV
+    '\ufffe'   #  0xFB -> UNDEFINED
+    '\ufffe'   #  0xFC -> UNDEFINED
+    '\u200e'   #  0xFD -> LEFT-TO-RIGHT MARK
+    '\u200f'   #  0xFE -> RIGHT-TO-LEFT MARK
+    '\ufffe'   #  0xFF -> UNDEFINED
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1256.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1256.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1256.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\u067e'   #  0x81 -> ARABIC LETTER PEH
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\u0679'   #  0x8A -> ARABIC LETTER TTEH
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
-    u'\u0686'   #  0x8D -> ARABIC LETTER TCHEH
-    u'\u0698'   #  0x8E -> ARABIC LETTER JEH
-    u'\u0688'   #  0x8F -> ARABIC LETTER DDAL
-    u'\u06af'   #  0x90 -> ARABIC LETTER GAF
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\u06a9'   #  0x98 -> ARABIC LETTER KEHEH
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\u0691'   #  0x9A -> ARABIC LETTER RREH
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
-    u'\u200c'   #  0x9D -> ZERO WIDTH NON-JOINER
-    u'\u200d'   #  0x9E -> ZERO WIDTH JOINER
-    u'\u06ba'   #  0x9F -> ARABIC LETTER NOON GHUNNA
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u060c'   #  0xA1 -> ARABIC COMMA
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u06be'   #  0xAA -> ARABIC LETTER HEH DOACHASHMEE
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\u061b'   #  0xBA -> ARABIC SEMICOLON
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\u061f'   #  0xBF -> ARABIC QUESTION MARK
-    u'\u06c1'   #  0xC0 -> ARABIC LETTER HEH GOAL
-    u'\u0621'   #  0xC1 -> ARABIC LETTER HAMZA
-    u'\u0622'   #  0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
-    u'\u0623'   #  0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
-    u'\u0624'   #  0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
-    u'\u0625'   #  0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
-    u'\u0626'   #  0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
-    u'\u0627'   #  0xC7 -> ARABIC LETTER ALEF
-    u'\u0628'   #  0xC8 -> ARABIC LETTER BEH
-    u'\u0629'   #  0xC9 -> ARABIC LETTER TEH MARBUTA
-    u'\u062a'   #  0xCA -> ARABIC LETTER TEH
-    u'\u062b'   #  0xCB -> ARABIC LETTER THEH
-    u'\u062c'   #  0xCC -> ARABIC LETTER JEEM
-    u'\u062d'   #  0xCD -> ARABIC LETTER HAH
-    u'\u062e'   #  0xCE -> ARABIC LETTER KHAH
-    u'\u062f'   #  0xCF -> ARABIC LETTER DAL
-    u'\u0630'   #  0xD0 -> ARABIC LETTER THAL
-    u'\u0631'   #  0xD1 -> ARABIC LETTER REH
-    u'\u0632'   #  0xD2 -> ARABIC LETTER ZAIN
-    u'\u0633'   #  0xD3 -> ARABIC LETTER SEEN
-    u'\u0634'   #  0xD4 -> ARABIC LETTER SHEEN
-    u'\u0635'   #  0xD5 -> ARABIC LETTER SAD
-    u'\u0636'   #  0xD6 -> ARABIC LETTER DAD
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\u0637'   #  0xD8 -> ARABIC LETTER TAH
-    u'\u0638'   #  0xD9 -> ARABIC LETTER ZAH
-    u'\u0639'   #  0xDA -> ARABIC LETTER AIN
-    u'\u063a'   #  0xDB -> ARABIC LETTER GHAIN
-    u'\u0640'   #  0xDC -> ARABIC TATWEEL
-    u'\u0641'   #  0xDD -> ARABIC LETTER FEH
-    u'\u0642'   #  0xDE -> ARABIC LETTER QAF
-    u'\u0643'   #  0xDF -> ARABIC LETTER KAF
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\u0644'   #  0xE1 -> ARABIC LETTER LAM
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\u0645'   #  0xE3 -> ARABIC LETTER MEEM
-    u'\u0646'   #  0xE4 -> ARABIC LETTER NOON
-    u'\u0647'   #  0xE5 -> ARABIC LETTER HEH
-    u'\u0648'   #  0xE6 -> ARABIC LETTER WAW
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\u0649'   #  0xEC -> ARABIC LETTER ALEF MAKSURA
-    u'\u064a'   #  0xED -> ARABIC LETTER YEH
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\u064b'   #  0xF0 -> ARABIC FATHATAN
-    u'\u064c'   #  0xF1 -> ARABIC DAMMATAN
-    u'\u064d'   #  0xF2 -> ARABIC KASRATAN
-    u'\u064e'   #  0xF3 -> ARABIC FATHA
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\u064f'   #  0xF5 -> ARABIC DAMMA
-    u'\u0650'   #  0xF6 -> ARABIC KASRA
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\u0651'   #  0xF8 -> ARABIC SHADDA
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\u0652'   #  0xFA -> ARABIC SUKUN
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u200e'   #  0xFD -> LEFT-TO-RIGHT MARK
-    u'\u200f'   #  0xFE -> RIGHT-TO-LEFT MARK
-    u'\u06d2'   #  0xFF -> ARABIC LETTER YEH BARREE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\u067e'   #  0x81 -> ARABIC LETTER PEH
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\u0679'   #  0x8A -> ARABIC LETTER TTEH
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
+    '\u0686'   #  0x8D -> ARABIC LETTER TCHEH
+    '\u0698'   #  0x8E -> ARABIC LETTER JEH
+    '\u0688'   #  0x8F -> ARABIC LETTER DDAL
+    '\u06af'   #  0x90 -> ARABIC LETTER GAF
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\u06a9'   #  0x98 -> ARABIC LETTER KEHEH
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\u0691'   #  0x9A -> ARABIC LETTER RREH
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
+    '\u200c'   #  0x9D -> ZERO WIDTH NON-JOINER
+    '\u200d'   #  0x9E -> ZERO WIDTH JOINER
+    '\u06ba'   #  0x9F -> ARABIC LETTER NOON GHUNNA
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u060c'   #  0xA1 -> ARABIC COMMA
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u06be'   #  0xAA -> ARABIC LETTER HEH DOACHASHMEE
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\u061b'   #  0xBA -> ARABIC SEMICOLON
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\u061f'   #  0xBF -> ARABIC QUESTION MARK
+    '\u06c1'   #  0xC0 -> ARABIC LETTER HEH GOAL
+    '\u0621'   #  0xC1 -> ARABIC LETTER HAMZA
+    '\u0622'   #  0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+    '\u0623'   #  0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+    '\u0624'   #  0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
+    '\u0625'   #  0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
+    '\u0626'   #  0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
+    '\u0627'   #  0xC7 -> ARABIC LETTER ALEF
+    '\u0628'   #  0xC8 -> ARABIC LETTER BEH
+    '\u0629'   #  0xC9 -> ARABIC LETTER TEH MARBUTA
+    '\u062a'   #  0xCA -> ARABIC LETTER TEH
+    '\u062b'   #  0xCB -> ARABIC LETTER THEH
+    '\u062c'   #  0xCC -> ARABIC LETTER JEEM
+    '\u062d'   #  0xCD -> ARABIC LETTER HAH
+    '\u062e'   #  0xCE -> ARABIC LETTER KHAH
+    '\u062f'   #  0xCF -> ARABIC LETTER DAL
+    '\u0630'   #  0xD0 -> ARABIC LETTER THAL
+    '\u0631'   #  0xD1 -> ARABIC LETTER REH
+    '\u0632'   #  0xD2 -> ARABIC LETTER ZAIN
+    '\u0633'   #  0xD3 -> ARABIC LETTER SEEN
+    '\u0634'   #  0xD4 -> ARABIC LETTER SHEEN
+    '\u0635'   #  0xD5 -> ARABIC LETTER SAD
+    '\u0636'   #  0xD6 -> ARABIC LETTER DAD
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\u0637'   #  0xD8 -> ARABIC LETTER TAH
+    '\u0638'   #  0xD9 -> ARABIC LETTER ZAH
+    '\u0639'   #  0xDA -> ARABIC LETTER AIN
+    '\u063a'   #  0xDB -> ARABIC LETTER GHAIN
+    '\u0640'   #  0xDC -> ARABIC TATWEEL
+    '\u0641'   #  0xDD -> ARABIC LETTER FEH
+    '\u0642'   #  0xDE -> ARABIC LETTER QAF
+    '\u0643'   #  0xDF -> ARABIC LETTER KAF
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\u0644'   #  0xE1 -> ARABIC LETTER LAM
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\u0645'   #  0xE3 -> ARABIC LETTER MEEM
+    '\u0646'   #  0xE4 -> ARABIC LETTER NOON
+    '\u0647'   #  0xE5 -> ARABIC LETTER HEH
+    '\u0648'   #  0xE6 -> ARABIC LETTER WAW
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\u0649'   #  0xEC -> ARABIC LETTER ALEF MAKSURA
+    '\u064a'   #  0xED -> ARABIC LETTER YEH
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\u064b'   #  0xF0 -> ARABIC FATHATAN
+    '\u064c'   #  0xF1 -> ARABIC DAMMATAN
+    '\u064d'   #  0xF2 -> ARABIC KASRATAN
+    '\u064e'   #  0xF3 -> ARABIC FATHA
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\u064f'   #  0xF5 -> ARABIC DAMMA
+    '\u0650'   #  0xF6 -> ARABIC KASRA
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\u0651'   #  0xF8 -> ARABIC SHADDA
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\u0652'   #  0xFA -> ARABIC SUKUN
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u200e'   #  0xFD -> LEFT-TO-RIGHT MARK
+    '\u200f'   #  0xFE -> RIGHT-TO-LEFT MARK
+    '\u06d2'   #  0xFF -> ARABIC LETTER YEH BARREE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1257.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1257.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1257.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\ufffe'   #  0x83 -> UNDEFINED
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\ufffe'   #  0x88 -> UNDEFINED
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\ufffe'   #  0x8A -> UNDEFINED
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\ufffe'   #  0x8C -> UNDEFINED
-    u'\xa8'     #  0x8D -> DIAERESIS
-    u'\u02c7'   #  0x8E -> CARON
-    u'\xb8'     #  0x8F -> CEDILLA
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\ufffe'   #  0x98 -> UNDEFINED
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\ufffe'   #  0x9A -> UNDEFINED
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\ufffe'   #  0x9C -> UNDEFINED
-    u'\xaf'     #  0x9D -> MACRON
-    u'\u02db'   #  0x9E -> OGONEK
-    u'\ufffe'   #  0x9F -> UNDEFINED
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\ufffe'   #  0xA1 -> UNDEFINED
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\ufffe'   #  0xA5 -> UNDEFINED
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xd8'     #  0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u0156'   #  0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xc6'     #  0xAF -> LATIN CAPITAL LETTER AE
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xf8'     #  0xB8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\u0157'   #  0xBA -> LATIN SMALL LETTER R WITH CEDILLA
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xe6'     #  0xBF -> LATIN SMALL LETTER AE
-    u'\u0104'   #  0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u012e'   #  0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
-    u'\u0100'   #  0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
-    u'\u0106'   #  0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\u0118'   #  0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\u0112'   #  0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
-    u'\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0179'   #  0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\u0116'   #  0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
-    u'\u0122'   #  0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
-    u'\u0136'   #  0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
-    u'\u012a'   #  0xCE -> LATIN CAPITAL LETTER I WITH MACRON
-    u'\u013b'   #  0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
-    u'\u0160'   #  0xD0 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\u0145'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\u014c'   #  0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\u0172'   #  0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
-    u'\u0141'   #  0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\u015a'   #  0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\u016a'   #  0xDB -> LATIN CAPITAL LETTER U WITH MACRON
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u017b'   #  0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\u017d'   #  0xDE -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\u0105'   #  0xE0 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u012f'   #  0xE1 -> LATIN SMALL LETTER I WITH OGONEK
-    u'\u0101'   #  0xE2 -> LATIN SMALL LETTER A WITH MACRON
-    u'\u0107'   #  0xE3 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\u0119'   #  0xE6 -> LATIN SMALL LETTER E WITH OGONEK
-    u'\u0113'   #  0xE7 -> LATIN SMALL LETTER E WITH MACRON
-    u'\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u017a'   #  0xEA -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\u0117'   #  0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
-    u'\u0123'   #  0xEC -> LATIN SMALL LETTER G WITH CEDILLA
-    u'\u0137'   #  0xED -> LATIN SMALL LETTER K WITH CEDILLA
-    u'\u012b'   #  0xEE -> LATIN SMALL LETTER I WITH MACRON
-    u'\u013c'   #  0xEF -> LATIN SMALL LETTER L WITH CEDILLA
-    u'\u0161'   #  0xF0 -> LATIN SMALL LETTER S WITH CARON
-    u'\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\u0146'   #  0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\u014d'   #  0xF4 -> LATIN SMALL LETTER O WITH MACRON
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\u0173'   #  0xF8 -> LATIN SMALL LETTER U WITH OGONEK
-    u'\u0142'   #  0xF9 -> LATIN SMALL LETTER L WITH STROKE
-    u'\u015b'   #  0xFA -> LATIN SMALL LETTER S WITH ACUTE
-    u'\u016b'   #  0xFB -> LATIN SMALL LETTER U WITH MACRON
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u017c'   #  0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\u017e'   #  0xFE -> LATIN SMALL LETTER Z WITH CARON
-    u'\u02d9'   #  0xFF -> DOT ABOVE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\ufffe'   #  0x83 -> UNDEFINED
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\ufffe'   #  0x88 -> UNDEFINED
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\ufffe'   #  0x8A -> UNDEFINED
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\ufffe'   #  0x8C -> UNDEFINED
+    '\xa8'     #  0x8D -> DIAERESIS
+    '\u02c7'   #  0x8E -> CARON
+    '\xb8'     #  0x8F -> CEDILLA
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\ufffe'   #  0x98 -> UNDEFINED
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\ufffe'   #  0x9A -> UNDEFINED
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\ufffe'   #  0x9C -> UNDEFINED
+    '\xaf'     #  0x9D -> MACRON
+    '\u02db'   #  0x9E -> OGONEK
+    '\ufffe'   #  0x9F -> UNDEFINED
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\ufffe'   #  0xA1 -> UNDEFINED
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\ufffe'   #  0xA5 -> UNDEFINED
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xd8'     #  0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u0156'   #  0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xc6'     #  0xAF -> LATIN CAPITAL LETTER AE
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xf8'     #  0xB8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\u0157'   #  0xBA -> LATIN SMALL LETTER R WITH CEDILLA
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xe6'     #  0xBF -> LATIN SMALL LETTER AE
+    '\u0104'   #  0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u012e'   #  0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
+    '\u0100'   #  0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
+    '\u0106'   #  0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\u0118'   #  0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\u0112'   #  0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
+    '\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0179'   #  0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\u0116'   #  0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+    '\u0122'   #  0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
+    '\u0136'   #  0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
+    '\u012a'   #  0xCE -> LATIN CAPITAL LETTER I WITH MACRON
+    '\u013b'   #  0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
+    '\u0160'   #  0xD0 -> LATIN CAPITAL LETTER S WITH CARON
+    '\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\u0145'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\u014c'   #  0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\u0172'   #  0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
+    '\u0141'   #  0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
+    '\u015a'   #  0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\u016a'   #  0xDB -> LATIN CAPITAL LETTER U WITH MACRON
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u017b'   #  0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\u017d'   #  0xDE -> LATIN CAPITAL LETTER Z WITH CARON
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\u0105'   #  0xE0 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u012f'   #  0xE1 -> LATIN SMALL LETTER I WITH OGONEK
+    '\u0101'   #  0xE2 -> LATIN SMALL LETTER A WITH MACRON
+    '\u0107'   #  0xE3 -> LATIN SMALL LETTER C WITH ACUTE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\u0119'   #  0xE6 -> LATIN SMALL LETTER E WITH OGONEK
+    '\u0113'   #  0xE7 -> LATIN SMALL LETTER E WITH MACRON
+    '\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\u017a'   #  0xEA -> LATIN SMALL LETTER Z WITH ACUTE
+    '\u0117'   #  0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
+    '\u0123'   #  0xEC -> LATIN SMALL LETTER G WITH CEDILLA
+    '\u0137'   #  0xED -> LATIN SMALL LETTER K WITH CEDILLA
+    '\u012b'   #  0xEE -> LATIN SMALL LETTER I WITH MACRON
+    '\u013c'   #  0xEF -> LATIN SMALL LETTER L WITH CEDILLA
+    '\u0161'   #  0xF0 -> LATIN SMALL LETTER S WITH CARON
+    '\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+    '\u0146'   #  0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\u014d'   #  0xF4 -> LATIN SMALL LETTER O WITH MACRON
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\u0173'   #  0xF8 -> LATIN SMALL LETTER U WITH OGONEK
+    '\u0142'   #  0xF9 -> LATIN SMALL LETTER L WITH STROKE
+    '\u015b'   #  0xFA -> LATIN SMALL LETTER S WITH ACUTE
+    '\u016b'   #  0xFB -> LATIN SMALL LETTER U WITH MACRON
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u017c'   #  0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\u017e'   #  0xFE -> LATIN SMALL LETTER Z WITH CARON
+    '\u02d9'   #  0xFF -> DOT ABOVE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp1258.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp1258.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp1258.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\u2020'   #  0x86 -> DAGGER
-    u'\u2021'   #  0x87 -> DOUBLE DAGGER
-    u'\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u2030'   #  0x89 -> PER MILLE SIGN
-    u'\ufffe'   #  0x8A -> UNDEFINED
-    u'\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
-    u'\ufffe'   #  0x8D -> UNDEFINED
-    u'\ufffe'   #  0x8E -> UNDEFINED
-    u'\ufffe'   #  0x8F -> UNDEFINED
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\u02dc'   #  0x98 -> SMALL TILDE
-    u'\u2122'   #  0x99 -> TRADE MARK SIGN
-    u'\ufffe'   #  0x9A -> UNDEFINED
-    u'\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
-    u'\ufffe'   #  0x9D -> UNDEFINED
-    u'\ufffe'   #  0x9E -> UNDEFINED
-    u'\u0178'   #  0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xbf'     #  0xBF -> INVERTED QUESTION MARK
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\u0300'   #  0xCC -> COMBINING GRAVE ACCENT
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\u0309'   #  0xD2 -> COMBINING HOOK ABOVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u01a0'   #  0xD5 -> LATIN CAPITAL LETTER O WITH HORN
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u01af'   #  0xDD -> LATIN CAPITAL LETTER U WITH HORN
-    u'\u0303'   #  0xDE -> COMBINING TILDE
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\u0301'   #  0xEC -> COMBINING ACUTE ACCENT
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\u0323'   #  0xF2 -> COMBINING DOT BELOW
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\u01a1'   #  0xF5 -> LATIN SMALL LETTER O WITH HORN
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u01b0'   #  0xFD -> LATIN SMALL LETTER U WITH HORN
-    u'\u20ab'   #  0xFE -> DONG SIGN
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\u201a'   #  0x82 -> SINGLE LOW-9 QUOTATION MARK
+    '\u0192'   #  0x83 -> LATIN SMALL LETTER F WITH HOOK
+    '\u201e'   #  0x84 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\u2020'   #  0x86 -> DAGGER
+    '\u2021'   #  0x87 -> DOUBLE DAGGER
+    '\u02c6'   #  0x88 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u2030'   #  0x89 -> PER MILLE SIGN
+    '\ufffe'   #  0x8A -> UNDEFINED
+    '\u2039'   #  0x8B -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u0152'   #  0x8C -> LATIN CAPITAL LIGATURE OE
+    '\ufffe'   #  0x8D -> UNDEFINED
+    '\ufffe'   #  0x8E -> UNDEFINED
+    '\ufffe'   #  0x8F -> UNDEFINED
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\u02dc'   #  0x98 -> SMALL TILDE
+    '\u2122'   #  0x99 -> TRADE MARK SIGN
+    '\ufffe'   #  0x9A -> UNDEFINED
+    '\u203a'   #  0x9B -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u0153'   #  0x9C -> LATIN SMALL LIGATURE OE
+    '\ufffe'   #  0x9D -> UNDEFINED
+    '\ufffe'   #  0x9E -> UNDEFINED
+    '\u0178'   #  0x9F -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xbf'     #  0xBF -> INVERTED QUESTION MARK
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\u0300'   #  0xCC -> COMBINING GRAVE ACCENT
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\u0309'   #  0xD2 -> COMBINING HOOK ABOVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u01a0'   #  0xD5 -> LATIN CAPITAL LETTER O WITH HORN
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u01af'   #  0xDD -> LATIN CAPITAL LETTER U WITH HORN
+    '\u0303'   #  0xDE -> COMBINING TILDE
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\u0301'   #  0xEC -> COMBINING ACUTE ACCENT
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\u0323'   #  0xF2 -> COMBINING DOT BELOW
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\u01a1'   #  0xF5 -> LATIN SMALL LETTER O WITH HORN
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u01b0'   #  0xFD -> LATIN SMALL LETTER U WITH HORN
+    '\u20ab'   #  0xFE -> DONG SIGN
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp424.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp424.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp424.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x9c'     #  0x04 -> SELECT
-    u'\t'       #  0x05 -> HORIZONTAL TABULATION
-    u'\x86'     #  0x06 -> REQUIRED NEW LINE
-    u'\x7f'     #  0x07 -> DELETE
-    u'\x97'     #  0x08 -> GRAPHIC ESCAPE
-    u'\x8d'     #  0x09 -> SUPERSCRIPT
-    u'\x8e'     #  0x0A -> REPEAT
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x9d'     #  0x14 -> RESTORE/ENABLE PRESENTATION
-    u'\x85'     #  0x15 -> NEW LINE
-    u'\x08'     #  0x16 -> BACKSPACE
-    u'\x87'     #  0x17 -> PROGRAM OPERATOR COMMUNICATION
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x92'     #  0x1A -> UNIT BACK SPACE
-    u'\x8f'     #  0x1B -> CUSTOMER USE ONE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u'\x80'     #  0x20 -> DIGIT SELECT
-    u'\x81'     #  0x21 -> START OF SIGNIFICANCE
-    u'\x82'     #  0x22 -> FIELD SEPARATOR
-    u'\x83'     #  0x23 -> WORD UNDERSCORE
-    u'\x84'     #  0x24 -> BYPASS OR INHIBIT PRESENTATION
-    u'\n'       #  0x25 -> LINE FEED
-    u'\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
-    u'\x1b'     #  0x27 -> ESCAPE
-    u'\x88'     #  0x28 -> SET ATTRIBUTE
-    u'\x89'     #  0x29 -> START FIELD EXTENDED
-    u'\x8a'     #  0x2A -> SET MODE OR SWITCH
-    u'\x8b'     #  0x2B -> CONTROL SEQUENCE PREFIX
-    u'\x8c'     #  0x2C -> MODIFY FIELD ATTRIBUTE
-    u'\x05'     #  0x2D -> ENQUIRY
-    u'\x06'     #  0x2E -> ACKNOWLEDGE
-    u'\x07'     #  0x2F -> BELL
-    u'\x90'     #  0x30 -> <reserved>
-    u'\x91'     #  0x31 -> <reserved>
-    u'\x16'     #  0x32 -> SYNCHRONOUS IDLE
-    u'\x93'     #  0x33 -> INDEX RETURN
-    u'\x94'     #  0x34 -> PRESENTATION POSITION
-    u'\x95'     #  0x35 -> TRANSPARENT
-    u'\x96'     #  0x36 -> NUMERIC BACKSPACE
-    u'\x04'     #  0x37 -> END OF TRANSMISSION
-    u'\x98'     #  0x38 -> SUBSCRIPT
-    u'\x99'     #  0x39 -> INDENT TABULATION
-    u'\x9a'     #  0x3A -> REVERSE FORM FEED
-    u'\x9b'     #  0x3B -> CUSTOMER USE THREE
-    u'\x14'     #  0x3C -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
-    u'\x9e'     #  0x3E -> <reserved>
-    u'\x1a'     #  0x3F -> SUBSTITUTE
-    u' '        #  0x40 -> SPACE
-    u'\u05d0'   #  0x41 -> HEBREW LETTER ALEF
-    u'\u05d1'   #  0x42 -> HEBREW LETTER BET
-    u'\u05d2'   #  0x43 -> HEBREW LETTER GIMEL
-    u'\u05d3'   #  0x44 -> HEBREW LETTER DALET
-    u'\u05d4'   #  0x45 -> HEBREW LETTER HE
-    u'\u05d5'   #  0x46 -> HEBREW LETTER VAV
-    u'\u05d6'   #  0x47 -> HEBREW LETTER ZAYIN
-    u'\u05d7'   #  0x48 -> HEBREW LETTER HET
-    u'\u05d8'   #  0x49 -> HEBREW LETTER TET
-    u'\xa2'     #  0x4A -> CENT SIGN
-    u'.'        #  0x4B -> FULL STOP
-    u'<'        #  0x4C -> LESS-THAN SIGN
-    u'('        #  0x4D -> LEFT PARENTHESIS
-    u'+'        #  0x4E -> PLUS SIGN
-    u'|'        #  0x4F -> VERTICAL LINE
-    u'&'        #  0x50 -> AMPERSAND
-    u'\u05d9'   #  0x51 -> HEBREW LETTER YOD
-    u'\u05da'   #  0x52 -> HEBREW LETTER FINAL KAF
-    u'\u05db'   #  0x53 -> HEBREW LETTER KAF
-    u'\u05dc'   #  0x54 -> HEBREW LETTER LAMED
-    u'\u05dd'   #  0x55 -> HEBREW LETTER FINAL MEM
-    u'\u05de'   #  0x56 -> HEBREW LETTER MEM
-    u'\u05df'   #  0x57 -> HEBREW LETTER FINAL NUN
-    u'\u05e0'   #  0x58 -> HEBREW LETTER NUN
-    u'\u05e1'   #  0x59 -> HEBREW LETTER SAMEKH
-    u'!'        #  0x5A -> EXCLAMATION MARK
-    u'$'        #  0x5B -> DOLLAR SIGN
-    u'*'        #  0x5C -> ASTERISK
-    u')'        #  0x5D -> RIGHT PARENTHESIS
-    u';'        #  0x5E -> SEMICOLON
-    u'\xac'     #  0x5F -> NOT SIGN
-    u'-'        #  0x60 -> HYPHEN-MINUS
-    u'/'        #  0x61 -> SOLIDUS
-    u'\u05e2'   #  0x62 -> HEBREW LETTER AYIN
-    u'\u05e3'   #  0x63 -> HEBREW LETTER FINAL PE
-    u'\u05e4'   #  0x64 -> HEBREW LETTER PE
-    u'\u05e5'   #  0x65 -> HEBREW LETTER FINAL TSADI
-    u'\u05e6'   #  0x66 -> HEBREW LETTER TSADI
-    u'\u05e7'   #  0x67 -> HEBREW LETTER QOF
-    u'\u05e8'   #  0x68 -> HEBREW LETTER RESH
-    u'\u05e9'   #  0x69 -> HEBREW LETTER SHIN
-    u'\xa6'     #  0x6A -> BROKEN BAR
-    u','        #  0x6B -> COMMA
-    u'%'        #  0x6C -> PERCENT SIGN
-    u'_'        #  0x6D -> LOW LINE
-    u'>'        #  0x6E -> GREATER-THAN SIGN
-    u'?'        #  0x6F -> QUESTION MARK
-    u'\ufffe'   #  0x70 -> UNDEFINED
-    u'\u05ea'   #  0x71 -> HEBREW LETTER TAV
-    u'\ufffe'   #  0x72 -> UNDEFINED
-    u'\ufffe'   #  0x73 -> UNDEFINED
-    u'\xa0'     #  0x74 -> NO-BREAK SPACE
-    u'\ufffe'   #  0x75 -> UNDEFINED
-    u'\ufffe'   #  0x76 -> UNDEFINED
-    u'\ufffe'   #  0x77 -> UNDEFINED
-    u'\u2017'   #  0x78 -> DOUBLE LOW LINE
-    u'`'        #  0x79 -> GRAVE ACCENT
-    u':'        #  0x7A -> COLON
-    u'#'        #  0x7B -> NUMBER SIGN
-    u'@'        #  0x7C -> COMMERCIAL AT
-    u"'"        #  0x7D -> APOSTROPHE
-    u'='        #  0x7E -> EQUALS SIGN
-    u'"'        #  0x7F -> QUOTATION MARK
-    u'\ufffe'   #  0x80 -> UNDEFINED
-    u'a'        #  0x81 -> LATIN SMALL LETTER A
-    u'b'        #  0x82 -> LATIN SMALL LETTER B
-    u'c'        #  0x83 -> LATIN SMALL LETTER C
-    u'd'        #  0x84 -> LATIN SMALL LETTER D
-    u'e'        #  0x85 -> LATIN SMALL LETTER E
-    u'f'        #  0x86 -> LATIN SMALL LETTER F
-    u'g'        #  0x87 -> LATIN SMALL LETTER G
-    u'h'        #  0x88 -> LATIN SMALL LETTER H
-    u'i'        #  0x89 -> LATIN SMALL LETTER I
-    u'\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\ufffe'   #  0x8C -> UNDEFINED
-    u'\ufffe'   #  0x8D -> UNDEFINED
-    u'\ufffe'   #  0x8E -> UNDEFINED
-    u'\xb1'     #  0x8F -> PLUS-MINUS SIGN
-    u'\xb0'     #  0x90 -> DEGREE SIGN
-    u'j'        #  0x91 -> LATIN SMALL LETTER J
-    u'k'        #  0x92 -> LATIN SMALL LETTER K
-    u'l'        #  0x93 -> LATIN SMALL LETTER L
-    u'm'        #  0x94 -> LATIN SMALL LETTER M
-    u'n'        #  0x95 -> LATIN SMALL LETTER N
-    u'o'        #  0x96 -> LATIN SMALL LETTER O
-    u'p'        #  0x97 -> LATIN SMALL LETTER P
-    u'q'        #  0x98 -> LATIN SMALL LETTER Q
-    u'r'        #  0x99 -> LATIN SMALL LETTER R
-    u'\ufffe'   #  0x9A -> UNDEFINED
-    u'\ufffe'   #  0x9B -> UNDEFINED
-    u'\ufffe'   #  0x9C -> UNDEFINED
-    u'\xb8'     #  0x9D -> CEDILLA
-    u'\ufffe'   #  0x9E -> UNDEFINED
-    u'\xa4'     #  0x9F -> CURRENCY SIGN
-    u'\xb5'     #  0xA0 -> MICRO SIGN
-    u'~'        #  0xA1 -> TILDE
-    u's'        #  0xA2 -> LATIN SMALL LETTER S
-    u't'        #  0xA3 -> LATIN SMALL LETTER T
-    u'u'        #  0xA4 -> LATIN SMALL LETTER U
-    u'v'        #  0xA5 -> LATIN SMALL LETTER V
-    u'w'        #  0xA6 -> LATIN SMALL LETTER W
-    u'x'        #  0xA7 -> LATIN SMALL LETTER X
-    u'y'        #  0xA8 -> LATIN SMALL LETTER Y
-    u'z'        #  0xA9 -> LATIN SMALL LETTER Z
-    u'\ufffe'   #  0xAA -> UNDEFINED
-    u'\ufffe'   #  0xAB -> UNDEFINED
-    u'\ufffe'   #  0xAC -> UNDEFINED
-    u'\ufffe'   #  0xAD -> UNDEFINED
-    u'\ufffe'   #  0xAE -> UNDEFINED
-    u'\xae'     #  0xAF -> REGISTERED SIGN
-    u'^'        #  0xB0 -> CIRCUMFLEX ACCENT
-    u'\xa3'     #  0xB1 -> POUND SIGN
-    u'\xa5'     #  0xB2 -> YEN SIGN
-    u'\xb7'     #  0xB3 -> MIDDLE DOT
-    u'\xa9'     #  0xB4 -> COPYRIGHT SIGN
-    u'\xa7'     #  0xB5 -> SECTION SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
-    u'['        #  0xBA -> LEFT SQUARE BRACKET
-    u']'        #  0xBB -> RIGHT SQUARE BRACKET
-    u'\xaf'     #  0xBC -> MACRON
-    u'\xa8'     #  0xBD -> DIAERESIS
-    u'\xb4'     #  0xBE -> ACUTE ACCENT
-    u'\xd7'     #  0xBF -> MULTIPLICATION SIGN
-    u'{'        #  0xC0 -> LEFT CURLY BRACKET
-    u'A'        #  0xC1 -> LATIN CAPITAL LETTER A
-    u'B'        #  0xC2 -> LATIN CAPITAL LETTER B
-    u'C'        #  0xC3 -> LATIN CAPITAL LETTER C
-    u'D'        #  0xC4 -> LATIN CAPITAL LETTER D
-    u'E'        #  0xC5 -> LATIN CAPITAL LETTER E
-    u'F'        #  0xC6 -> LATIN CAPITAL LETTER F
-    u'G'        #  0xC7 -> LATIN CAPITAL LETTER G
-    u'H'        #  0xC8 -> LATIN CAPITAL LETTER H
-    u'I'        #  0xC9 -> LATIN CAPITAL LETTER I
-    u'\xad'     #  0xCA -> SOFT HYPHEN
-    u'\ufffe'   #  0xCB -> UNDEFINED
-    u'\ufffe'   #  0xCC -> UNDEFINED
-    u'\ufffe'   #  0xCD -> UNDEFINED
-    u'\ufffe'   #  0xCE -> UNDEFINED
-    u'\ufffe'   #  0xCF -> UNDEFINED
-    u'}'        #  0xD0 -> RIGHT CURLY BRACKET
-    u'J'        #  0xD1 -> LATIN CAPITAL LETTER J
-    u'K'        #  0xD2 -> LATIN CAPITAL LETTER K
-    u'L'        #  0xD3 -> LATIN CAPITAL LETTER L
-    u'M'        #  0xD4 -> LATIN CAPITAL LETTER M
-    u'N'        #  0xD5 -> LATIN CAPITAL LETTER N
-    u'O'        #  0xD6 -> LATIN CAPITAL LETTER O
-    u'P'        #  0xD7 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0xD9 -> LATIN CAPITAL LETTER R
-    u'\xb9'     #  0xDA -> SUPERSCRIPT ONE
-    u'\ufffe'   #  0xDB -> UNDEFINED
-    u'\ufffe'   #  0xDC -> UNDEFINED
-    u'\ufffe'   #  0xDD -> UNDEFINED
-    u'\ufffe'   #  0xDE -> UNDEFINED
-    u'\ufffe'   #  0xDF -> UNDEFINED
-    u'\\'       #  0xE0 -> REVERSE SOLIDUS
-    u'\xf7'     #  0xE1 -> DIVISION SIGN
-    u'S'        #  0xE2 -> LATIN CAPITAL LETTER S
-    u'T'        #  0xE3 -> LATIN CAPITAL LETTER T
-    u'U'        #  0xE4 -> LATIN CAPITAL LETTER U
-    u'V'        #  0xE5 -> LATIN CAPITAL LETTER V
-    u'W'        #  0xE6 -> LATIN CAPITAL LETTER W
-    u'X'        #  0xE7 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
-    u'\xb2'     #  0xEA -> SUPERSCRIPT TWO
-    u'\ufffe'   #  0xEB -> UNDEFINED
-    u'\ufffe'   #  0xEC -> UNDEFINED
-    u'\ufffe'   #  0xED -> UNDEFINED
-    u'\ufffe'   #  0xEE -> UNDEFINED
-    u'\ufffe'   #  0xEF -> UNDEFINED
-    u'0'        #  0xF0 -> DIGIT ZERO
-    u'1'        #  0xF1 -> DIGIT ONE
-    u'2'        #  0xF2 -> DIGIT TWO
-    u'3'        #  0xF3 -> DIGIT THREE
-    u'4'        #  0xF4 -> DIGIT FOUR
-    u'5'        #  0xF5 -> DIGIT FIVE
-    u'6'        #  0xF6 -> DIGIT SIX
-    u'7'        #  0xF7 -> DIGIT SEVEN
-    u'8'        #  0xF8 -> DIGIT EIGHT
-    u'9'        #  0xF9 -> DIGIT NINE
-    u'\xb3'     #  0xFA -> SUPERSCRIPT THREE
-    u'\ufffe'   #  0xFB -> UNDEFINED
-    u'\ufffe'   #  0xFC -> UNDEFINED
-    u'\ufffe'   #  0xFD -> UNDEFINED
-    u'\ufffe'   #  0xFE -> UNDEFINED
-    u'\x9f'     #  0xFF -> EIGHT ONES
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x9c'     #  0x04 -> SELECT
+    '\t'       #  0x05 -> HORIZONTAL TABULATION
+    '\x86'     #  0x06 -> REQUIRED NEW LINE
+    '\x7f'     #  0x07 -> DELETE
+    '\x97'     #  0x08 -> GRAPHIC ESCAPE
+    '\x8d'     #  0x09 -> SUPERSCRIPT
+    '\x8e'     #  0x0A -> REPEAT
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x9d'     #  0x14 -> RESTORE/ENABLE PRESENTATION
+    '\x85'     #  0x15 -> NEW LINE
+    '\x08'     #  0x16 -> BACKSPACE
+    '\x87'     #  0x17 -> PROGRAM OPERATOR COMMUNICATION
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x92'     #  0x1A -> UNIT BACK SPACE
+    '\x8f'     #  0x1B -> CUSTOMER USE ONE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    '\x80'     #  0x20 -> DIGIT SELECT
+    '\x81'     #  0x21 -> START OF SIGNIFICANCE
+    '\x82'     #  0x22 -> FIELD SEPARATOR
+    '\x83'     #  0x23 -> WORD UNDERSCORE
+    '\x84'     #  0x24 -> BYPASS OR INHIBIT PRESENTATION
+    '\n'       #  0x25 -> LINE FEED
+    '\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
+    '\x1b'     #  0x27 -> ESCAPE
+    '\x88'     #  0x28 -> SET ATTRIBUTE
+    '\x89'     #  0x29 -> START FIELD EXTENDED
+    '\x8a'     #  0x2A -> SET MODE OR SWITCH
+    '\x8b'     #  0x2B -> CONTROL SEQUENCE PREFIX
+    '\x8c'     #  0x2C -> MODIFY FIELD ATTRIBUTE
+    '\x05'     #  0x2D -> ENQUIRY
+    '\x06'     #  0x2E -> ACKNOWLEDGE
+    '\x07'     #  0x2F -> BELL
+    '\x90'     #  0x30 -> <reserved>
+    '\x91'     #  0x31 -> <reserved>
+    '\x16'     #  0x32 -> SYNCHRONOUS IDLE
+    '\x93'     #  0x33 -> INDEX RETURN
+    '\x94'     #  0x34 -> PRESENTATION POSITION
+    '\x95'     #  0x35 -> TRANSPARENT
+    '\x96'     #  0x36 -> NUMERIC BACKSPACE
+    '\x04'     #  0x37 -> END OF TRANSMISSION
+    '\x98'     #  0x38 -> SUBSCRIPT
+    '\x99'     #  0x39 -> INDENT TABULATION
+    '\x9a'     #  0x3A -> REVERSE FORM FEED
+    '\x9b'     #  0x3B -> CUSTOMER USE THREE
+    '\x14'     #  0x3C -> DEVICE CONTROL FOUR
+    '\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
+    '\x9e'     #  0x3E -> <reserved>
+    '\x1a'     #  0x3F -> SUBSTITUTE
+    ' '        #  0x40 -> SPACE
+    '\u05d0'   #  0x41 -> HEBREW LETTER ALEF
+    '\u05d1'   #  0x42 -> HEBREW LETTER BET
+    '\u05d2'   #  0x43 -> HEBREW LETTER GIMEL
+    '\u05d3'   #  0x44 -> HEBREW LETTER DALET
+    '\u05d4'   #  0x45 -> HEBREW LETTER HE
+    '\u05d5'   #  0x46 -> HEBREW LETTER VAV
+    '\u05d6'   #  0x47 -> HEBREW LETTER ZAYIN
+    '\u05d7'   #  0x48 -> HEBREW LETTER HET
+    '\u05d8'   #  0x49 -> HEBREW LETTER TET
+    '\xa2'     #  0x4A -> CENT SIGN
+    '.'        #  0x4B -> FULL STOP
+    '<'        #  0x4C -> LESS-THAN SIGN
+    '('        #  0x4D -> LEFT PARENTHESIS
+    '+'        #  0x4E -> PLUS SIGN
+    '|'        #  0x4F -> VERTICAL LINE
+    '&'        #  0x50 -> AMPERSAND
+    '\u05d9'   #  0x51 -> HEBREW LETTER YOD
+    '\u05da'   #  0x52 -> HEBREW LETTER FINAL KAF
+    '\u05db'   #  0x53 -> HEBREW LETTER KAF
+    '\u05dc'   #  0x54 -> HEBREW LETTER LAMED
+    '\u05dd'   #  0x55 -> HEBREW LETTER FINAL MEM
+    '\u05de'   #  0x56 -> HEBREW LETTER MEM
+    '\u05df'   #  0x57 -> HEBREW LETTER FINAL NUN
+    '\u05e0'   #  0x58 -> HEBREW LETTER NUN
+    '\u05e1'   #  0x59 -> HEBREW LETTER SAMEKH
+    '!'        #  0x5A -> EXCLAMATION MARK
+    '$'        #  0x5B -> DOLLAR SIGN
+    '*'        #  0x5C -> ASTERISK
+    ')'        #  0x5D -> RIGHT PARENTHESIS
+    ';'        #  0x5E -> SEMICOLON
+    '\xac'     #  0x5F -> NOT SIGN
+    '-'        #  0x60 -> HYPHEN-MINUS
+    '/'        #  0x61 -> SOLIDUS
+    '\u05e2'   #  0x62 -> HEBREW LETTER AYIN
+    '\u05e3'   #  0x63 -> HEBREW LETTER FINAL PE
+    '\u05e4'   #  0x64 -> HEBREW LETTER PE
+    '\u05e5'   #  0x65 -> HEBREW LETTER FINAL TSADI
+    '\u05e6'   #  0x66 -> HEBREW LETTER TSADI
+    '\u05e7'   #  0x67 -> HEBREW LETTER QOF
+    '\u05e8'   #  0x68 -> HEBREW LETTER RESH
+    '\u05e9'   #  0x69 -> HEBREW LETTER SHIN
+    '\xa6'     #  0x6A -> BROKEN BAR
+    ','        #  0x6B -> COMMA
+    '%'        #  0x6C -> PERCENT SIGN
+    '_'        #  0x6D -> LOW LINE
+    '>'        #  0x6E -> GREATER-THAN SIGN
+    '?'        #  0x6F -> QUESTION MARK
+    '\ufffe'   #  0x70 -> UNDEFINED
+    '\u05ea'   #  0x71 -> HEBREW LETTER TAV
+    '\ufffe'   #  0x72 -> UNDEFINED
+    '\ufffe'   #  0x73 -> UNDEFINED
+    '\xa0'     #  0x74 -> NO-BREAK SPACE
+    '\ufffe'   #  0x75 -> UNDEFINED
+    '\ufffe'   #  0x76 -> UNDEFINED
+    '\ufffe'   #  0x77 -> UNDEFINED
+    '\u2017'   #  0x78 -> DOUBLE LOW LINE
+    '`'        #  0x79 -> GRAVE ACCENT
+    ':'        #  0x7A -> COLON
+    '#'        #  0x7B -> NUMBER SIGN
+    '@'        #  0x7C -> COMMERCIAL AT
+    "'"        #  0x7D -> APOSTROPHE
+    '='        #  0x7E -> EQUALS SIGN
+    '"'        #  0x7F -> QUOTATION MARK
+    '\ufffe'   #  0x80 -> UNDEFINED
+    'a'        #  0x81 -> LATIN SMALL LETTER A
+    'b'        #  0x82 -> LATIN SMALL LETTER B
+    'c'        #  0x83 -> LATIN SMALL LETTER C
+    'd'        #  0x84 -> LATIN SMALL LETTER D
+    'e'        #  0x85 -> LATIN SMALL LETTER E
+    'f'        #  0x86 -> LATIN SMALL LETTER F
+    'g'        #  0x87 -> LATIN SMALL LETTER G
+    'h'        #  0x88 -> LATIN SMALL LETTER H
+    'i'        #  0x89 -> LATIN SMALL LETTER I
+    '\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\ufffe'   #  0x8C -> UNDEFINED
+    '\ufffe'   #  0x8D -> UNDEFINED
+    '\ufffe'   #  0x8E -> UNDEFINED
+    '\xb1'     #  0x8F -> PLUS-MINUS SIGN
+    '\xb0'     #  0x90 -> DEGREE SIGN
+    'j'        #  0x91 -> LATIN SMALL LETTER J
+    'k'        #  0x92 -> LATIN SMALL LETTER K
+    'l'        #  0x93 -> LATIN SMALL LETTER L
+    'm'        #  0x94 -> LATIN SMALL LETTER M
+    'n'        #  0x95 -> LATIN SMALL LETTER N
+    'o'        #  0x96 -> LATIN SMALL LETTER O
+    'p'        #  0x97 -> LATIN SMALL LETTER P
+    'q'        #  0x98 -> LATIN SMALL LETTER Q
+    'r'        #  0x99 -> LATIN SMALL LETTER R
+    '\ufffe'   #  0x9A -> UNDEFINED
+    '\ufffe'   #  0x9B -> UNDEFINED
+    '\ufffe'   #  0x9C -> UNDEFINED
+    '\xb8'     #  0x9D -> CEDILLA
+    '\ufffe'   #  0x9E -> UNDEFINED
+    '\xa4'     #  0x9F -> CURRENCY SIGN
+    '\xb5'     #  0xA0 -> MICRO SIGN
+    '~'        #  0xA1 -> TILDE
+    's'        #  0xA2 -> LATIN SMALL LETTER S
+    't'        #  0xA3 -> LATIN SMALL LETTER T
+    'u'        #  0xA4 -> LATIN SMALL LETTER U
+    'v'        #  0xA5 -> LATIN SMALL LETTER V
+    'w'        #  0xA6 -> LATIN SMALL LETTER W
+    'x'        #  0xA7 -> LATIN SMALL LETTER X
+    'y'        #  0xA8 -> LATIN SMALL LETTER Y
+    'z'        #  0xA9 -> LATIN SMALL LETTER Z
+    '\ufffe'   #  0xAA -> UNDEFINED
+    '\ufffe'   #  0xAB -> UNDEFINED
+    '\ufffe'   #  0xAC -> UNDEFINED
+    '\ufffe'   #  0xAD -> UNDEFINED
+    '\ufffe'   #  0xAE -> UNDEFINED
+    '\xae'     #  0xAF -> REGISTERED SIGN
+    '^'        #  0xB0 -> CIRCUMFLEX ACCENT
+    '\xa3'     #  0xB1 -> POUND SIGN
+    '\xa5'     #  0xB2 -> YEN SIGN
+    '\xb7'     #  0xB3 -> MIDDLE DOT
+    '\xa9'     #  0xB4 -> COPYRIGHT SIGN
+    '\xa7'     #  0xB5 -> SECTION SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
+    '['        #  0xBA -> LEFT SQUARE BRACKET
+    ']'        #  0xBB -> RIGHT SQUARE BRACKET
+    '\xaf'     #  0xBC -> MACRON
+    '\xa8'     #  0xBD -> DIAERESIS
+    '\xb4'     #  0xBE -> ACUTE ACCENT
+    '\xd7'     #  0xBF -> MULTIPLICATION SIGN
+    '{'        #  0xC0 -> LEFT CURLY BRACKET
+    'A'        #  0xC1 -> LATIN CAPITAL LETTER A
+    'B'        #  0xC2 -> LATIN CAPITAL LETTER B
+    'C'        #  0xC3 -> LATIN CAPITAL LETTER C
+    'D'        #  0xC4 -> LATIN CAPITAL LETTER D
+    'E'        #  0xC5 -> LATIN CAPITAL LETTER E
+    'F'        #  0xC6 -> LATIN CAPITAL LETTER F
+    'G'        #  0xC7 -> LATIN CAPITAL LETTER G
+    'H'        #  0xC8 -> LATIN CAPITAL LETTER H
+    'I'        #  0xC9 -> LATIN CAPITAL LETTER I
+    '\xad'     #  0xCA -> SOFT HYPHEN
+    '\ufffe'   #  0xCB -> UNDEFINED
+    '\ufffe'   #  0xCC -> UNDEFINED
+    '\ufffe'   #  0xCD -> UNDEFINED
+    '\ufffe'   #  0xCE -> UNDEFINED
+    '\ufffe'   #  0xCF -> UNDEFINED
+    '}'        #  0xD0 -> RIGHT CURLY BRACKET
+    'J'        #  0xD1 -> LATIN CAPITAL LETTER J
+    'K'        #  0xD2 -> LATIN CAPITAL LETTER K
+    'L'        #  0xD3 -> LATIN CAPITAL LETTER L
+    'M'        #  0xD4 -> LATIN CAPITAL LETTER M
+    'N'        #  0xD5 -> LATIN CAPITAL LETTER N
+    'O'        #  0xD6 -> LATIN CAPITAL LETTER O
+    'P'        #  0xD7 -> LATIN CAPITAL LETTER P
+    'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
+    'R'        #  0xD9 -> LATIN CAPITAL LETTER R
+    '\xb9'     #  0xDA -> SUPERSCRIPT ONE
+    '\ufffe'   #  0xDB -> UNDEFINED
+    '\ufffe'   #  0xDC -> UNDEFINED
+    '\ufffe'   #  0xDD -> UNDEFINED
+    '\ufffe'   #  0xDE -> UNDEFINED
+    '\ufffe'   #  0xDF -> UNDEFINED
+    '\\'       #  0xE0 -> REVERSE SOLIDUS
+    '\xf7'     #  0xE1 -> DIVISION SIGN
+    'S'        #  0xE2 -> LATIN CAPITAL LETTER S
+    'T'        #  0xE3 -> LATIN CAPITAL LETTER T
+    'U'        #  0xE4 -> LATIN CAPITAL LETTER U
+    'V'        #  0xE5 -> LATIN CAPITAL LETTER V
+    'W'        #  0xE6 -> LATIN CAPITAL LETTER W
+    'X'        #  0xE7 -> LATIN CAPITAL LETTER X
+    'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
+    '\xb2'     #  0xEA -> SUPERSCRIPT TWO
+    '\ufffe'   #  0xEB -> UNDEFINED
+    '\ufffe'   #  0xEC -> UNDEFINED
+    '\ufffe'   #  0xED -> UNDEFINED
+    '\ufffe'   #  0xEE -> UNDEFINED
+    '\ufffe'   #  0xEF -> UNDEFINED
+    '0'        #  0xF0 -> DIGIT ZERO
+    '1'        #  0xF1 -> DIGIT ONE
+    '2'        #  0xF2 -> DIGIT TWO
+    '3'        #  0xF3 -> DIGIT THREE
+    '4'        #  0xF4 -> DIGIT FOUR
+    '5'        #  0xF5 -> DIGIT FIVE
+    '6'        #  0xF6 -> DIGIT SIX
+    '7'        #  0xF7 -> DIGIT SEVEN
+    '8'        #  0xF8 -> DIGIT EIGHT
+    '9'        #  0xF9 -> DIGIT NINE
+    '\xb3'     #  0xFA -> SUPERSCRIPT THREE
+    '\ufffe'   #  0xFB -> UNDEFINED
+    '\ufffe'   #  0xFC -> UNDEFINED
+    '\ufffe'   #  0xFD -> UNDEFINED
+    '\ufffe'   #  0xFE -> UNDEFINED
+    '\x9f'     #  0xFF -> EIGHT ONES
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp437.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp437.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp437.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
-    u'\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xff'     #  0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xa2'     #  0x009b -> CENT SIGN
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xa5'     #  0x009d -> YEN SIGN
-    u'\u20a7'   #  0x009e -> PESETA SIGN
-    u'\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
-    u'\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
-    u'\u2310'   #  0x00a9 -> REVERSED NOT SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
-    u'\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
-    u'\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
-    u'\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
-    u'\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
-    u'\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
-    u'\u221e'   #  0x00ec -> INFINITY
-    u'\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
-    u'\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
-    u'\u2229'   #  0x00ef -> INTERSECTION
-    u'\u2261'   #  0x00f0 -> IDENTICAL TO
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
-    u'\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
-    u'\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
-    u'\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u2248'   #  0x00f7 -> ALMOST EQUAL TO
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
+    '\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
+    '\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
+    '\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xff'     #  0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xa2'     #  0x009b -> CENT SIGN
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xa5'     #  0x009d -> YEN SIGN
+    '\u20a7'   #  0x009e -> PESETA SIGN
+    '\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+    '\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
+    '\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
+    '\u2310'   #  0x00a9 -> REVERSED NOT SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
+    '\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
+    '\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
+    '\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
+    '\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
+    '\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
+    '\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
+    '\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
+    '\u221e'   #  0x00ec -> INFINITY
+    '\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
+    '\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
+    '\u2229'   #  0x00ef -> INTERSECTION
+    '\u2261'   #  0x00f0 -> IDENTICAL TO
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
+    '\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
+    '\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
+    '\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u2248'   #  0x00f7 -> ALMOST EQUAL TO
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp500.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp500.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp500.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x9c'     #  0x04 -> CONTROL
-    u'\t'       #  0x05 -> HORIZONTAL TABULATION
-    u'\x86'     #  0x06 -> CONTROL
-    u'\x7f'     #  0x07 -> DELETE
-    u'\x97'     #  0x08 -> CONTROL
-    u'\x8d'     #  0x09 -> CONTROL
-    u'\x8e'     #  0x0A -> CONTROL
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x9d'     #  0x14 -> CONTROL
-    u'\x85'     #  0x15 -> CONTROL
-    u'\x08'     #  0x16 -> BACKSPACE
-    u'\x87'     #  0x17 -> CONTROL
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x92'     #  0x1A -> CONTROL
-    u'\x8f'     #  0x1B -> CONTROL
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u'\x80'     #  0x20 -> CONTROL
-    u'\x81'     #  0x21 -> CONTROL
-    u'\x82'     #  0x22 -> CONTROL
-    u'\x83'     #  0x23 -> CONTROL
-    u'\x84'     #  0x24 -> CONTROL
-    u'\n'       #  0x25 -> LINE FEED
-    u'\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
-    u'\x1b'     #  0x27 -> ESCAPE
-    u'\x88'     #  0x28 -> CONTROL
-    u'\x89'     #  0x29 -> CONTROL
-    u'\x8a'     #  0x2A -> CONTROL
-    u'\x8b'     #  0x2B -> CONTROL
-    u'\x8c'     #  0x2C -> CONTROL
-    u'\x05'     #  0x2D -> ENQUIRY
-    u'\x06'     #  0x2E -> ACKNOWLEDGE
-    u'\x07'     #  0x2F -> BELL
-    u'\x90'     #  0x30 -> CONTROL
-    u'\x91'     #  0x31 -> CONTROL
-    u'\x16'     #  0x32 -> SYNCHRONOUS IDLE
-    u'\x93'     #  0x33 -> CONTROL
-    u'\x94'     #  0x34 -> CONTROL
-    u'\x95'     #  0x35 -> CONTROL
-    u'\x96'     #  0x36 -> CONTROL
-    u'\x04'     #  0x37 -> END OF TRANSMISSION
-    u'\x98'     #  0x38 -> CONTROL
-    u'\x99'     #  0x39 -> CONTROL
-    u'\x9a'     #  0x3A -> CONTROL
-    u'\x9b'     #  0x3B -> CONTROL
-    u'\x14'     #  0x3C -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
-    u'\x9e'     #  0x3E -> CONTROL
-    u'\x1a'     #  0x3F -> SUBSTITUTE
-    u' '        #  0x40 -> SPACE
-    u'\xa0'     #  0x41 -> NO-BREAK SPACE
-    u'\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x48 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
-    u'['        #  0x4A -> LEFT SQUARE BRACKET
-    u'.'        #  0x4B -> FULL STOP
-    u'<'        #  0x4C -> LESS-THAN SIGN
-    u'('        #  0x4D -> LEFT PARENTHESIS
-    u'+'        #  0x4E -> PLUS SIGN
-    u'!'        #  0x4F -> EXCLAMATION MARK
-    u'&'        #  0x50 -> AMPERSAND
-    u'\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
-    u']'        #  0x5A -> RIGHT SQUARE BRACKET
-    u'$'        #  0x5B -> DOLLAR SIGN
-    u'*'        #  0x5C -> ASTERISK
-    u')'        #  0x5D -> RIGHT PARENTHESIS
-    u';'        #  0x5E -> SEMICOLON
-    u'^'        #  0x5F -> CIRCUMFLEX ACCENT
-    u'-'        #  0x60 -> HYPHEN-MINUS
-    u'/'        #  0x61 -> SOLIDUS
-    u'\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xa6'     #  0x6A -> BROKEN BAR
-    u','        #  0x6B -> COMMA
-    u'%'        #  0x6C -> PERCENT SIGN
-    u'_'        #  0x6D -> LOW LINE
-    u'>'        #  0x6E -> GREATER-THAN SIGN
-    u'?'        #  0x6F -> QUESTION MARK
-    u'\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'`'        #  0x79 -> GRAVE ACCENT
-    u':'        #  0x7A -> COLON
-    u'#'        #  0x7B -> NUMBER SIGN
-    u'@'        #  0x7C -> COMMERCIAL AT
-    u"'"        #  0x7D -> APOSTROPHE
-    u'='        #  0x7E -> EQUALS SIGN
-    u'"'        #  0x7F -> QUOTATION MARK
-    u'\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'a'        #  0x81 -> LATIN SMALL LETTER A
-    u'b'        #  0x82 -> LATIN SMALL LETTER B
-    u'c'        #  0x83 -> LATIN SMALL LETTER C
-    u'd'        #  0x84 -> LATIN SMALL LETTER D
-    u'e'        #  0x85 -> LATIN SMALL LETTER E
-    u'f'        #  0x86 -> LATIN SMALL LETTER F
-    u'g'        #  0x87 -> LATIN SMALL LETTER G
-    u'h'        #  0x88 -> LATIN SMALL LETTER H
-    u'i'        #  0x89 -> LATIN SMALL LETTER I
-    u'\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xf0'     #  0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
-    u'\xfd'     #  0x8D -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xfe'     #  0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
-    u'\xb1'     #  0x8F -> PLUS-MINUS SIGN
-    u'\xb0'     #  0x90 -> DEGREE SIGN
-    u'j'        #  0x91 -> LATIN SMALL LETTER J
-    u'k'        #  0x92 -> LATIN SMALL LETTER K
-    u'l'        #  0x93 -> LATIN SMALL LETTER L
-    u'm'        #  0x94 -> LATIN SMALL LETTER M
-    u'n'        #  0x95 -> LATIN SMALL LETTER N
-    u'o'        #  0x96 -> LATIN SMALL LETTER O
-    u'p'        #  0x97 -> LATIN SMALL LETTER P
-    u'q'        #  0x98 -> LATIN SMALL LETTER Q
-    u'r'        #  0x99 -> LATIN SMALL LETTER R
-    u'\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
-    u'\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
-    u'\xb8'     #  0x9D -> CEDILLA
-    u'\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
-    u'\xa4'     #  0x9F -> CURRENCY SIGN
-    u'\xb5'     #  0xA0 -> MICRO SIGN
-    u'~'        #  0xA1 -> TILDE
-    u's'        #  0xA2 -> LATIN SMALL LETTER S
-    u't'        #  0xA3 -> LATIN SMALL LETTER T
-    u'u'        #  0xA4 -> LATIN SMALL LETTER U
-    u'v'        #  0xA5 -> LATIN SMALL LETTER V
-    u'w'        #  0xA6 -> LATIN SMALL LETTER W
-    u'x'        #  0xA7 -> LATIN SMALL LETTER X
-    u'y'        #  0xA8 -> LATIN SMALL LETTER Y
-    u'z'        #  0xA9 -> LATIN SMALL LETTER Z
-    u'\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
-    u'\xbf'     #  0xAB -> INVERTED QUESTION MARK
-    u'\xd0'     #  0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
-    u'\xdd'     #  0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xde'     #  0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
-    u'\xae'     #  0xAF -> REGISTERED SIGN
-    u'\xa2'     #  0xB0 -> CENT SIGN
-    u'\xa3'     #  0xB1 -> POUND SIGN
-    u'\xa5'     #  0xB2 -> YEN SIGN
-    u'\xb7'     #  0xB3 -> MIDDLE DOT
-    u'\xa9'     #  0xB4 -> COPYRIGHT SIGN
-    u'\xa7'     #  0xB5 -> SECTION SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
-    u'\xac'     #  0xBA -> NOT SIGN
-    u'|'        #  0xBB -> VERTICAL LINE
-    u'\xaf'     #  0xBC -> MACRON
-    u'\xa8'     #  0xBD -> DIAERESIS
-    u'\xb4'     #  0xBE -> ACUTE ACCENT
-    u'\xd7'     #  0xBF -> MULTIPLICATION SIGN
-    u'{'        #  0xC0 -> LEFT CURLY BRACKET
-    u'A'        #  0xC1 -> LATIN CAPITAL LETTER A
-    u'B'        #  0xC2 -> LATIN CAPITAL LETTER B
-    u'C'        #  0xC3 -> LATIN CAPITAL LETTER C
-    u'D'        #  0xC4 -> LATIN CAPITAL LETTER D
-    u'E'        #  0xC5 -> LATIN CAPITAL LETTER E
-    u'F'        #  0xC6 -> LATIN CAPITAL LETTER F
-    u'G'        #  0xC7 -> LATIN CAPITAL LETTER G
-    u'H'        #  0xC8 -> LATIN CAPITAL LETTER H
-    u'I'        #  0xC9 -> LATIN CAPITAL LETTER I
-    u'\xad'     #  0xCA -> SOFT HYPHEN
-    u'\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
-    u'}'        #  0xD0 -> RIGHT CURLY BRACKET
-    u'J'        #  0xD1 -> LATIN CAPITAL LETTER J
-    u'K'        #  0xD2 -> LATIN CAPITAL LETTER K
-    u'L'        #  0xD3 -> LATIN CAPITAL LETTER L
-    u'M'        #  0xD4 -> LATIN CAPITAL LETTER M
-    u'N'        #  0xD5 -> LATIN CAPITAL LETTER N
-    u'O'        #  0xD6 -> LATIN CAPITAL LETTER O
-    u'P'        #  0xD7 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0xD9 -> LATIN CAPITAL LETTER R
-    u'\xb9'     #  0xDA -> SUPERSCRIPT ONE
-    u'\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\\'       #  0xE0 -> REVERSE SOLIDUS
-    u'\xf7'     #  0xE1 -> DIVISION SIGN
-    u'S'        #  0xE2 -> LATIN CAPITAL LETTER S
-    u'T'        #  0xE3 -> LATIN CAPITAL LETTER T
-    u'U'        #  0xE4 -> LATIN CAPITAL LETTER U
-    u'V'        #  0xE5 -> LATIN CAPITAL LETTER V
-    u'W'        #  0xE6 -> LATIN CAPITAL LETTER W
-    u'X'        #  0xE7 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
-    u'\xb2'     #  0xEA -> SUPERSCRIPT TWO
-    u'\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd6'     #  0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
-    u'0'        #  0xF0 -> DIGIT ZERO
-    u'1'        #  0xF1 -> DIGIT ONE
-    u'2'        #  0xF2 -> DIGIT TWO
-    u'3'        #  0xF3 -> DIGIT THREE
-    u'4'        #  0xF4 -> DIGIT FOUR
-    u'5'        #  0xF5 -> DIGIT FIVE
-    u'6'        #  0xF6 -> DIGIT SIX
-    u'7'        #  0xF7 -> DIGIT SEVEN
-    u'8'        #  0xF8 -> DIGIT EIGHT
-    u'9'        #  0xF9 -> DIGIT NINE
-    u'\xb3'     #  0xFA -> SUPERSCRIPT THREE
-    u'\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\x9f'     #  0xFF -> CONTROL
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x9c'     #  0x04 -> CONTROL
+    '\t'       #  0x05 -> HORIZONTAL TABULATION
+    '\x86'     #  0x06 -> CONTROL
+    '\x7f'     #  0x07 -> DELETE
+    '\x97'     #  0x08 -> CONTROL
+    '\x8d'     #  0x09 -> CONTROL
+    '\x8e'     #  0x0A -> CONTROL
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x9d'     #  0x14 -> CONTROL
+    '\x85'     #  0x15 -> CONTROL
+    '\x08'     #  0x16 -> BACKSPACE
+    '\x87'     #  0x17 -> CONTROL
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x92'     #  0x1A -> CONTROL
+    '\x8f'     #  0x1B -> CONTROL
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    '\x80'     #  0x20 -> CONTROL
+    '\x81'     #  0x21 -> CONTROL
+    '\x82'     #  0x22 -> CONTROL
+    '\x83'     #  0x23 -> CONTROL
+    '\x84'     #  0x24 -> CONTROL
+    '\n'       #  0x25 -> LINE FEED
+    '\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
+    '\x1b'     #  0x27 -> ESCAPE
+    '\x88'     #  0x28 -> CONTROL
+    '\x89'     #  0x29 -> CONTROL
+    '\x8a'     #  0x2A -> CONTROL
+    '\x8b'     #  0x2B -> CONTROL
+    '\x8c'     #  0x2C -> CONTROL
+    '\x05'     #  0x2D -> ENQUIRY
+    '\x06'     #  0x2E -> ACKNOWLEDGE
+    '\x07'     #  0x2F -> BELL
+    '\x90'     #  0x30 -> CONTROL
+    '\x91'     #  0x31 -> CONTROL
+    '\x16'     #  0x32 -> SYNCHRONOUS IDLE
+    '\x93'     #  0x33 -> CONTROL
+    '\x94'     #  0x34 -> CONTROL
+    '\x95'     #  0x35 -> CONTROL
+    '\x96'     #  0x36 -> CONTROL
+    '\x04'     #  0x37 -> END OF TRANSMISSION
+    '\x98'     #  0x38 -> CONTROL
+    '\x99'     #  0x39 -> CONTROL
+    '\x9a'     #  0x3A -> CONTROL
+    '\x9b'     #  0x3B -> CONTROL
+    '\x14'     #  0x3C -> DEVICE CONTROL FOUR
+    '\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
+    '\x9e'     #  0x3E -> CONTROL
+    '\x1a'     #  0x3F -> SUBSTITUTE
+    ' '        #  0x40 -> SPACE
+    '\xa0'     #  0x41 -> NO-BREAK SPACE
+    '\xe2'     #  0x42 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x43 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x44 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0x45 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe3'     #  0x46 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x47 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x48 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xf1'     #  0x49 -> LATIN SMALL LETTER N WITH TILDE
+    '['        #  0x4A -> LEFT SQUARE BRACKET
+    '.'        #  0x4B -> FULL STOP
+    '<'        #  0x4C -> LESS-THAN SIGN
+    '('        #  0x4D -> LEFT PARENTHESIS
+    '+'        #  0x4E -> PLUS SIGN
+    '!'        #  0x4F -> EXCLAMATION MARK
+    '&'        #  0x50 -> AMPERSAND
+    '\xe9'     #  0x51 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0x52 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x53 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x54 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xed'     #  0x55 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0x56 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x57 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xec'     #  0x58 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xdf'     #  0x59 -> LATIN SMALL LETTER SHARP S (GERMAN)
+    ']'        #  0x5A -> RIGHT SQUARE BRACKET
+    '$'        #  0x5B -> DOLLAR SIGN
+    '*'        #  0x5C -> ASTERISK
+    ')'        #  0x5D -> RIGHT PARENTHESIS
+    ';'        #  0x5E -> SEMICOLON
+    '^'        #  0x5F -> CIRCUMFLEX ACCENT
+    '-'        #  0x60 -> HYPHEN-MINUS
+    '/'        #  0x61 -> SOLIDUS
+    '\xc2'     #  0x62 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc4'     #  0x63 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc0'     #  0x64 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0x65 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc3'     #  0x66 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc5'     #  0x67 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x68 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xd1'     #  0x69 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xa6'     #  0x6A -> BROKEN BAR
+    ','        #  0x6B -> COMMA
+    '%'        #  0x6C -> PERCENT SIGN
+    '_'        #  0x6D -> LOW LINE
+    '>'        #  0x6E -> GREATER-THAN SIGN
+    '?'        #  0x6F -> QUESTION MARK
+    '\xf8'     #  0x70 -> LATIN SMALL LETTER O WITH STROKE
+    '\xc9'     #  0x71 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0x72 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0x73 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0x74 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0x75 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0x76 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0x77 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0x78 -> LATIN CAPITAL LETTER I WITH GRAVE
+    '`'        #  0x79 -> GRAVE ACCENT
+    ':'        #  0x7A -> COLON
+    '#'        #  0x7B -> NUMBER SIGN
+    '@'        #  0x7C -> COMMERCIAL AT
+    "'"        #  0x7D -> APOSTROPHE
+    '='        #  0x7E -> EQUALS SIGN
+    '"'        #  0x7F -> QUOTATION MARK
+    '\xd8'     #  0x80 -> LATIN CAPITAL LETTER O WITH STROKE
+    'a'        #  0x81 -> LATIN SMALL LETTER A
+    'b'        #  0x82 -> LATIN SMALL LETTER B
+    'c'        #  0x83 -> LATIN SMALL LETTER C
+    'd'        #  0x84 -> LATIN SMALL LETTER D
+    'e'        #  0x85 -> LATIN SMALL LETTER E
+    'f'        #  0x86 -> LATIN SMALL LETTER F
+    'g'        #  0x87 -> LATIN SMALL LETTER G
+    'h'        #  0x88 -> LATIN SMALL LETTER H
+    'i'        #  0x89 -> LATIN SMALL LETTER I
+    '\xab'     #  0x8A -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x8B -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xf0'     #  0x8C -> LATIN SMALL LETTER ETH (ICELANDIC)
+    '\xfd'     #  0x8D -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xfe'     #  0x8E -> LATIN SMALL LETTER THORN (ICELANDIC)
+    '\xb1'     #  0x8F -> PLUS-MINUS SIGN
+    '\xb0'     #  0x90 -> DEGREE SIGN
+    'j'        #  0x91 -> LATIN SMALL LETTER J
+    'k'        #  0x92 -> LATIN SMALL LETTER K
+    'l'        #  0x93 -> LATIN SMALL LETTER L
+    'm'        #  0x94 -> LATIN SMALL LETTER M
+    'n'        #  0x95 -> LATIN SMALL LETTER N
+    'o'        #  0x96 -> LATIN SMALL LETTER O
+    'p'        #  0x97 -> LATIN SMALL LETTER P
+    'q'        #  0x98 -> LATIN SMALL LETTER Q
+    'r'        #  0x99 -> LATIN SMALL LETTER R
+    '\xaa'     #  0x9A -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x9B -> MASCULINE ORDINAL INDICATOR
+    '\xe6'     #  0x9C -> LATIN SMALL LIGATURE AE
+    '\xb8'     #  0x9D -> CEDILLA
+    '\xc6'     #  0x9E -> LATIN CAPITAL LIGATURE AE
+    '\xa4'     #  0x9F -> CURRENCY SIGN
+    '\xb5'     #  0xA0 -> MICRO SIGN
+    '~'        #  0xA1 -> TILDE
+    's'        #  0xA2 -> LATIN SMALL LETTER S
+    't'        #  0xA3 -> LATIN SMALL LETTER T
+    'u'        #  0xA4 -> LATIN SMALL LETTER U
+    'v'        #  0xA5 -> LATIN SMALL LETTER V
+    'w'        #  0xA6 -> LATIN SMALL LETTER W
+    'x'        #  0xA7 -> LATIN SMALL LETTER X
+    'y'        #  0xA8 -> LATIN SMALL LETTER Y
+    'z'        #  0xA9 -> LATIN SMALL LETTER Z
+    '\xa1'     #  0xAA -> INVERTED EXCLAMATION MARK
+    '\xbf'     #  0xAB -> INVERTED QUESTION MARK
+    '\xd0'     #  0xAC -> LATIN CAPITAL LETTER ETH (ICELANDIC)
+    '\xdd'     #  0xAD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xde'     #  0xAE -> LATIN CAPITAL LETTER THORN (ICELANDIC)
+    '\xae'     #  0xAF -> REGISTERED SIGN
+    '\xa2'     #  0xB0 -> CENT SIGN
+    '\xa3'     #  0xB1 -> POUND SIGN
+    '\xa5'     #  0xB2 -> YEN SIGN
+    '\xb7'     #  0xB3 -> MIDDLE DOT
+    '\xa9'     #  0xB4 -> COPYRIGHT SIGN
+    '\xa7'     #  0xB5 -> SECTION SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xbc'     #  0xB7 -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xB8 -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xB9 -> VULGAR FRACTION THREE QUARTERS
+    '\xac'     #  0xBA -> NOT SIGN
+    '|'        #  0xBB -> VERTICAL LINE
+    '\xaf'     #  0xBC -> MACRON
+    '\xa8'     #  0xBD -> DIAERESIS
+    '\xb4'     #  0xBE -> ACUTE ACCENT
+    '\xd7'     #  0xBF -> MULTIPLICATION SIGN
+    '{'        #  0xC0 -> LEFT CURLY BRACKET
+    'A'        #  0xC1 -> LATIN CAPITAL LETTER A
+    'B'        #  0xC2 -> LATIN CAPITAL LETTER B
+    'C'        #  0xC3 -> LATIN CAPITAL LETTER C
+    'D'        #  0xC4 -> LATIN CAPITAL LETTER D
+    'E'        #  0xC5 -> LATIN CAPITAL LETTER E
+    'F'        #  0xC6 -> LATIN CAPITAL LETTER F
+    'G'        #  0xC7 -> LATIN CAPITAL LETTER G
+    'H'        #  0xC8 -> LATIN CAPITAL LETTER H
+    'I'        #  0xC9 -> LATIN CAPITAL LETTER I
+    '\xad'     #  0xCA -> SOFT HYPHEN
+    '\xf4'     #  0xCB -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0xCC -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf2'     #  0xCD -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xCE -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf5'     #  0xCF -> LATIN SMALL LETTER O WITH TILDE
+    '}'        #  0xD0 -> RIGHT CURLY BRACKET
+    'J'        #  0xD1 -> LATIN CAPITAL LETTER J
+    'K'        #  0xD2 -> LATIN CAPITAL LETTER K
+    'L'        #  0xD3 -> LATIN CAPITAL LETTER L
+    'M'        #  0xD4 -> LATIN CAPITAL LETTER M
+    'N'        #  0xD5 -> LATIN CAPITAL LETTER N
+    'O'        #  0xD6 -> LATIN CAPITAL LETTER O
+    'P'        #  0xD7 -> LATIN CAPITAL LETTER P
+    'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
+    'R'        #  0xD9 -> LATIN CAPITAL LETTER R
+    '\xb9'     #  0xDA -> SUPERSCRIPT ONE
+    '\xfb'     #  0xDB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xDC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xf9'     #  0xDD -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xDE -> LATIN SMALL LETTER U WITH ACUTE
+    '\xff'     #  0xDF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\\'       #  0xE0 -> REVERSE SOLIDUS
+    '\xf7'     #  0xE1 -> DIVISION SIGN
+    'S'        #  0xE2 -> LATIN CAPITAL LETTER S
+    'T'        #  0xE3 -> LATIN CAPITAL LETTER T
+    'U'        #  0xE4 -> LATIN CAPITAL LETTER U
+    'V'        #  0xE5 -> LATIN CAPITAL LETTER V
+    'W'        #  0xE6 -> LATIN CAPITAL LETTER W
+    'X'        #  0xE7 -> LATIN CAPITAL LETTER X
+    'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
+    '\xb2'     #  0xEA -> SUPERSCRIPT TWO
+    '\xd4'     #  0xEB -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd6'     #  0xEC -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd2'     #  0xED -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd5'     #  0xEF -> LATIN CAPITAL LETTER O WITH TILDE
+    '0'        #  0xF0 -> DIGIT ZERO
+    '1'        #  0xF1 -> DIGIT ONE
+    '2'        #  0xF2 -> DIGIT TWO
+    '3'        #  0xF3 -> DIGIT THREE
+    '4'        #  0xF4 -> DIGIT FOUR
+    '5'        #  0xF5 -> DIGIT FIVE
+    '6'        #  0xF6 -> DIGIT SIX
+    '7'        #  0xF7 -> DIGIT SEVEN
+    '8'        #  0xF8 -> DIGIT EIGHT
+    '9'        #  0xF9 -> DIGIT NINE
+    '\xb3'     #  0xFA -> SUPERSCRIPT THREE
+    '\xdb'     #  0xFB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xFC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xd9'     #  0xFD -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xFE -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\x9f'     #  0xFF -> CONTROL
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp737.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp737.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp737.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\u0391'   #  0x0080 -> GREEK CAPITAL LETTER ALPHA
-    u'\u0392'   #  0x0081 -> GREEK CAPITAL LETTER BETA
-    u'\u0393'   #  0x0082 -> GREEK CAPITAL LETTER GAMMA
-    u'\u0394'   #  0x0083 -> GREEK CAPITAL LETTER DELTA
-    u'\u0395'   #  0x0084 -> GREEK CAPITAL LETTER EPSILON
-    u'\u0396'   #  0x0085 -> GREEK CAPITAL LETTER ZETA
-    u'\u0397'   #  0x0086 -> GREEK CAPITAL LETTER ETA
-    u'\u0398'   #  0x0087 -> GREEK CAPITAL LETTER THETA
-    u'\u0399'   #  0x0088 -> GREEK CAPITAL LETTER IOTA
-    u'\u039a'   #  0x0089 -> GREEK CAPITAL LETTER KAPPA
-    u'\u039b'   #  0x008a -> GREEK CAPITAL LETTER LAMDA
-    u'\u039c'   #  0x008b -> GREEK CAPITAL LETTER MU
-    u'\u039d'   #  0x008c -> GREEK CAPITAL LETTER NU
-    u'\u039e'   #  0x008d -> GREEK CAPITAL LETTER XI
-    u'\u039f'   #  0x008e -> GREEK CAPITAL LETTER OMICRON
-    u'\u03a0'   #  0x008f -> GREEK CAPITAL LETTER PI
-    u'\u03a1'   #  0x0090 -> GREEK CAPITAL LETTER RHO
-    u'\u03a3'   #  0x0091 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03a4'   #  0x0092 -> GREEK CAPITAL LETTER TAU
-    u'\u03a5'   #  0x0093 -> GREEK CAPITAL LETTER UPSILON
-    u'\u03a6'   #  0x0094 -> GREEK CAPITAL LETTER PHI
-    u'\u03a7'   #  0x0095 -> GREEK CAPITAL LETTER CHI
-    u'\u03a8'   #  0x0096 -> GREEK CAPITAL LETTER PSI
-    u'\u03a9'   #  0x0097 -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b1'   #  0x0098 -> GREEK SMALL LETTER ALPHA
-    u'\u03b2'   #  0x0099 -> GREEK SMALL LETTER BETA
-    u'\u03b3'   #  0x009a -> GREEK SMALL LETTER GAMMA
-    u'\u03b4'   #  0x009b -> GREEK SMALL LETTER DELTA
-    u'\u03b5'   #  0x009c -> GREEK SMALL LETTER EPSILON
-    u'\u03b6'   #  0x009d -> GREEK SMALL LETTER ZETA
-    u'\u03b7'   #  0x009e -> GREEK SMALL LETTER ETA
-    u'\u03b8'   #  0x009f -> GREEK SMALL LETTER THETA
-    u'\u03b9'   #  0x00a0 -> GREEK SMALL LETTER IOTA
-    u'\u03ba'   #  0x00a1 -> GREEK SMALL LETTER KAPPA
-    u'\u03bb'   #  0x00a2 -> GREEK SMALL LETTER LAMDA
-    u'\u03bc'   #  0x00a3 -> GREEK SMALL LETTER MU
-    u'\u03bd'   #  0x00a4 -> GREEK SMALL LETTER NU
-    u'\u03be'   #  0x00a5 -> GREEK SMALL LETTER XI
-    u'\u03bf'   #  0x00a6 -> GREEK SMALL LETTER OMICRON
-    u'\u03c0'   #  0x00a7 -> GREEK SMALL LETTER PI
-    u'\u03c1'   #  0x00a8 -> GREEK SMALL LETTER RHO
-    u'\u03c3'   #  0x00a9 -> GREEK SMALL LETTER SIGMA
-    u'\u03c2'   #  0x00aa -> GREEK SMALL LETTER FINAL SIGMA
-    u'\u03c4'   #  0x00ab -> GREEK SMALL LETTER TAU
-    u'\u03c5'   #  0x00ac -> GREEK SMALL LETTER UPSILON
-    u'\u03c6'   #  0x00ad -> GREEK SMALL LETTER PHI
-    u'\u03c7'   #  0x00ae -> GREEK SMALL LETTER CHI
-    u'\u03c8'   #  0x00af -> GREEK SMALL LETTER PSI
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03c9'   #  0x00e0 -> GREEK SMALL LETTER OMEGA
-    u'\u03ac'   #  0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS
-    u'\u03ad'   #  0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS
-    u'\u03ae'   #  0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS
-    u'\u03ca'   #  0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
-    u'\u03af'   #  0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS
-    u'\u03cc'   #  0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS
-    u'\u03cd'   #  0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS
-    u'\u03cb'   #  0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
-    u'\u03ce'   #  0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS
-    u'\u0386'   #  0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS
-    u'\u0388'   #  0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS
-    u'\u0389'   #  0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS
-    u'\u038a'   #  0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS
-    u'\u038c'   #  0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS
-    u'\u038e'   #  0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS
-    u'\u038f'   #  0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
-    u'\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
-    u'\u03aa'   #  0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
-    u'\u03ab'   #  0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u2248'   #  0x00f7 -> ALMOST EQUAL TO
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\u0391'   #  0x0080 -> GREEK CAPITAL LETTER ALPHA
+    '\u0392'   #  0x0081 -> GREEK CAPITAL LETTER BETA
+    '\u0393'   #  0x0082 -> GREEK CAPITAL LETTER GAMMA
+    '\u0394'   #  0x0083 -> GREEK CAPITAL LETTER DELTA
+    '\u0395'   #  0x0084 -> GREEK CAPITAL LETTER EPSILON
+    '\u0396'   #  0x0085 -> GREEK CAPITAL LETTER ZETA
+    '\u0397'   #  0x0086 -> GREEK CAPITAL LETTER ETA
+    '\u0398'   #  0x0087 -> GREEK CAPITAL LETTER THETA
+    '\u0399'   #  0x0088 -> GREEK CAPITAL LETTER IOTA
+    '\u039a'   #  0x0089 -> GREEK CAPITAL LETTER KAPPA
+    '\u039b'   #  0x008a -> GREEK CAPITAL LETTER LAMDA
+    '\u039c'   #  0x008b -> GREEK CAPITAL LETTER MU
+    '\u039d'   #  0x008c -> GREEK CAPITAL LETTER NU
+    '\u039e'   #  0x008d -> GREEK CAPITAL LETTER XI
+    '\u039f'   #  0x008e -> GREEK CAPITAL LETTER OMICRON
+    '\u03a0'   #  0x008f -> GREEK CAPITAL LETTER PI
+    '\u03a1'   #  0x0090 -> GREEK CAPITAL LETTER RHO
+    '\u03a3'   #  0x0091 -> GREEK CAPITAL LETTER SIGMA
+    '\u03a4'   #  0x0092 -> GREEK CAPITAL LETTER TAU
+    '\u03a5'   #  0x0093 -> GREEK CAPITAL LETTER UPSILON
+    '\u03a6'   #  0x0094 -> GREEK CAPITAL LETTER PHI
+    '\u03a7'   #  0x0095 -> GREEK CAPITAL LETTER CHI
+    '\u03a8'   #  0x0096 -> GREEK CAPITAL LETTER PSI
+    '\u03a9'   #  0x0097 -> GREEK CAPITAL LETTER OMEGA
+    '\u03b1'   #  0x0098 -> GREEK SMALL LETTER ALPHA
+    '\u03b2'   #  0x0099 -> GREEK SMALL LETTER BETA
+    '\u03b3'   #  0x009a -> GREEK SMALL LETTER GAMMA
+    '\u03b4'   #  0x009b -> GREEK SMALL LETTER DELTA
+    '\u03b5'   #  0x009c -> GREEK SMALL LETTER EPSILON
+    '\u03b6'   #  0x009d -> GREEK SMALL LETTER ZETA
+    '\u03b7'   #  0x009e -> GREEK SMALL LETTER ETA
+    '\u03b8'   #  0x009f -> GREEK SMALL LETTER THETA
+    '\u03b9'   #  0x00a0 -> GREEK SMALL LETTER IOTA
+    '\u03ba'   #  0x00a1 -> GREEK SMALL LETTER KAPPA
+    '\u03bb'   #  0x00a2 -> GREEK SMALL LETTER LAMDA
+    '\u03bc'   #  0x00a3 -> GREEK SMALL LETTER MU
+    '\u03bd'   #  0x00a4 -> GREEK SMALL LETTER NU
+    '\u03be'   #  0x00a5 -> GREEK SMALL LETTER XI
+    '\u03bf'   #  0x00a6 -> GREEK SMALL LETTER OMICRON
+    '\u03c0'   #  0x00a7 -> GREEK SMALL LETTER PI
+    '\u03c1'   #  0x00a8 -> GREEK SMALL LETTER RHO
+    '\u03c3'   #  0x00a9 -> GREEK SMALL LETTER SIGMA
+    '\u03c2'   #  0x00aa -> GREEK SMALL LETTER FINAL SIGMA
+    '\u03c4'   #  0x00ab -> GREEK SMALL LETTER TAU
+    '\u03c5'   #  0x00ac -> GREEK SMALL LETTER UPSILON
+    '\u03c6'   #  0x00ad -> GREEK SMALL LETTER PHI
+    '\u03c7'   #  0x00ae -> GREEK SMALL LETTER CHI
+    '\u03c8'   #  0x00af -> GREEK SMALL LETTER PSI
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03c9'   #  0x00e0 -> GREEK SMALL LETTER OMEGA
+    '\u03ac'   #  0x00e1 -> GREEK SMALL LETTER ALPHA WITH TONOS
+    '\u03ad'   #  0x00e2 -> GREEK SMALL LETTER EPSILON WITH TONOS
+    '\u03ae'   #  0x00e3 -> GREEK SMALL LETTER ETA WITH TONOS
+    '\u03ca'   #  0x00e4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+    '\u03af'   #  0x00e5 -> GREEK SMALL LETTER IOTA WITH TONOS
+    '\u03cc'   #  0x00e6 -> GREEK SMALL LETTER OMICRON WITH TONOS
+    '\u03cd'   #  0x00e7 -> GREEK SMALL LETTER UPSILON WITH TONOS
+    '\u03cb'   #  0x00e8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+    '\u03ce'   #  0x00e9 -> GREEK SMALL LETTER OMEGA WITH TONOS
+    '\u0386'   #  0x00ea -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+    '\u0388'   #  0x00eb -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+    '\u0389'   #  0x00ec -> GREEK CAPITAL LETTER ETA WITH TONOS
+    '\u038a'   #  0x00ed -> GREEK CAPITAL LETTER IOTA WITH TONOS
+    '\u038c'   #  0x00ee -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+    '\u038e'   #  0x00ef -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+    '\u038f'   #  0x00f0 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
+    '\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
+    '\u03aa'   #  0x00f4 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+    '\u03ab'   #  0x00f5 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u2248'   #  0x00f7 -> ALMOST EQUAL TO
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp775.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp775.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp775.py	Wed May  2 21:09:54 2007
@@ -177,262 +177,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\u0106'   #  0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u0101'   #  0x0083 -> LATIN SMALL LETTER A WITH MACRON
-    u'\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u0123'   #  0x0085 -> LATIN SMALL LETTER G WITH CEDILLA
-    u'\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\u0107'   #  0x0087 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\u0142'   #  0x0088 -> LATIN SMALL LETTER L WITH STROKE
-    u'\u0113'   #  0x0089 -> LATIN SMALL LETTER E WITH MACRON
-    u'\u0156'   #  0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA
-    u'\u0157'   #  0x008b -> LATIN SMALL LETTER R WITH CEDILLA
-    u'\u012b'   #  0x008c -> LATIN SMALL LETTER I WITH MACRON
-    u'\u0179'   #  0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
-    u'\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
-    u'\u014d'   #  0x0093 -> LATIN SMALL LETTER O WITH MACRON
-    u'\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\u0122'   #  0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA
-    u'\xa2'     #  0x0096 -> CENT SIGN
-    u'\u015a'   #  0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\u015b'   #  0x0098 -> LATIN SMALL LETTER S WITH ACUTE
-    u'\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd7'     #  0x009e -> MULTIPLICATION SIGN
-    u'\xa4'     #  0x009f -> CURRENCY SIGN
-    u'\u0100'   #  0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON
-    u'\u012a'   #  0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\u017b'   #  0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\u017c'   #  0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\u017a'   #  0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\u201d'   #  0x00a6 -> RIGHT DOUBLE QUOTATION MARK
-    u'\xa6'     #  0x00a7 -> BROKEN BAR
-    u'\xa9'     #  0x00a8 -> COPYRIGHT SIGN
-    u'\xae'     #  0x00a9 -> REGISTERED SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\u0141'   #  0x00ad -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u0104'   #  0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u010c'   #  0x00b6 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\u0118'   #  0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\u0116'   #  0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u012e'   #  0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK
-    u'\u0160'   #  0x00be -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u0172'   #  0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK
-    u'\u016a'   #  0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u017d'   #  0x00cf -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\u0105'   #  0x00d0 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u010d'   #  0x00d1 -> LATIN SMALL LETTER C WITH CARON
-    u'\u0119'   #  0x00d2 -> LATIN SMALL LETTER E WITH OGONEK
-    u'\u0117'   #  0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE
-    u'\u012f'   #  0x00d4 -> LATIN SMALL LETTER I WITH OGONEK
-    u'\u0161'   #  0x00d5 -> LATIN SMALL LETTER S WITH CARON
-    u'\u0173'   #  0x00d6 -> LATIN SMALL LETTER U WITH OGONEK
-    u'\u016b'   #  0x00d7 -> LATIN SMALL LETTER U WITH MACRON
-    u'\u017e'   #  0x00d8 -> LATIN SMALL LETTER Z WITH CARON
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
-    u'\u014c'   #  0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON
-    u'\u0143'   #  0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\xf5'     #  0x00e4 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xd5'     #  0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\u0144'   #  0x00e7 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\u0136'   #  0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA
-    u'\u0137'   #  0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA
-    u'\u013b'   #  0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA
-    u'\u013c'   #  0x00eb -> LATIN SMALL LETTER L WITH CEDILLA
-    u'\u0146'   #  0x00ec -> LATIN SMALL LETTER N WITH CEDILLA
-    u'\u0112'   #  0x00ed -> LATIN CAPITAL LETTER E WITH MACRON
-    u'\u0145'   #  0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA
-    u'\u2019'   #  0x00ef -> RIGHT SINGLE QUOTATION MARK
-    u'\xad'     #  0x00f0 -> SOFT HYPHEN
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u201c'   #  0x00f2 -> LEFT DOUBLE QUOTATION MARK
-    u'\xbe'     #  0x00f3 -> VULGAR FRACTION THREE QUARTERS
-    u'\xb6'     #  0x00f4 -> PILCROW SIGN
-    u'\xa7'     #  0x00f5 -> SECTION SIGN
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u201e'   #  0x00f7 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\xb9'     #  0x00fb -> SUPERSCRIPT ONE
-    u'\xb3'     #  0x00fc -> SUPERSCRIPT THREE
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\u0106'   #  0x0080 -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\u0101'   #  0x0083 -> LATIN SMALL LETTER A WITH MACRON
+    '\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u0123'   #  0x0085 -> LATIN SMALL LETTER G WITH CEDILLA
+    '\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\u0107'   #  0x0087 -> LATIN SMALL LETTER C WITH ACUTE
+    '\u0142'   #  0x0088 -> LATIN SMALL LETTER L WITH STROKE
+    '\u0113'   #  0x0089 -> LATIN SMALL LETTER E WITH MACRON
+    '\u0156'   #  0x008a -> LATIN CAPITAL LETTER R WITH CEDILLA
+    '\u0157'   #  0x008b -> LATIN SMALL LETTER R WITH CEDILLA
+    '\u012b'   #  0x008c -> LATIN SMALL LETTER I WITH MACRON
+    '\u0179'   #  0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
+    '\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
+    '\u014d'   #  0x0093 -> LATIN SMALL LETTER O WITH MACRON
+    '\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\u0122'   #  0x0095 -> LATIN CAPITAL LETTER G WITH CEDILLA
+    '\xa2'     #  0x0096 -> CENT SIGN
+    '\u015a'   #  0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\u015b'   #  0x0098 -> LATIN SMALL LETTER S WITH ACUTE
+    '\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd7'     #  0x009e -> MULTIPLICATION SIGN
+    '\xa4'     #  0x009f -> CURRENCY SIGN
+    '\u0100'   #  0x00a0 -> LATIN CAPITAL LETTER A WITH MACRON
+    '\u012a'   #  0x00a1 -> LATIN CAPITAL LETTER I WITH MACRON
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\u017b'   #  0x00a3 -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\u017c'   #  0x00a4 -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\u017a'   #  0x00a5 -> LATIN SMALL LETTER Z WITH ACUTE
+    '\u201d'   #  0x00a6 -> RIGHT DOUBLE QUOTATION MARK
+    '\xa6'     #  0x00a7 -> BROKEN BAR
+    '\xa9'     #  0x00a8 -> COPYRIGHT SIGN
+    '\xae'     #  0x00a9 -> REGISTERED SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\u0141'   #  0x00ad -> LATIN CAPITAL LETTER L WITH STROKE
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u0104'   #  0x00b5 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u010c'   #  0x00b6 -> LATIN CAPITAL LETTER C WITH CARON
+    '\u0118'   #  0x00b7 -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\u0116'   #  0x00b8 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u012e'   #  0x00bd -> LATIN CAPITAL LETTER I WITH OGONEK
+    '\u0160'   #  0x00be -> LATIN CAPITAL LETTER S WITH CARON
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u0172'   #  0x00c6 -> LATIN CAPITAL LETTER U WITH OGONEK
+    '\u016a'   #  0x00c7 -> LATIN CAPITAL LETTER U WITH MACRON
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u017d'   #  0x00cf -> LATIN CAPITAL LETTER Z WITH CARON
+    '\u0105'   #  0x00d0 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u010d'   #  0x00d1 -> LATIN SMALL LETTER C WITH CARON
+    '\u0119'   #  0x00d2 -> LATIN SMALL LETTER E WITH OGONEK
+    '\u0117'   #  0x00d3 -> LATIN SMALL LETTER E WITH DOT ABOVE
+    '\u012f'   #  0x00d4 -> LATIN SMALL LETTER I WITH OGONEK
+    '\u0161'   #  0x00d5 -> LATIN SMALL LETTER S WITH CARON
+    '\u0173'   #  0x00d6 -> LATIN SMALL LETTER U WITH OGONEK
+    '\u016b'   #  0x00d7 -> LATIN SMALL LETTER U WITH MACRON
+    '\u017e'   #  0x00d8 -> LATIN SMALL LETTER Z WITH CARON
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
+    '\u014c'   #  0x00e2 -> LATIN CAPITAL LETTER O WITH MACRON
+    '\u0143'   #  0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\xf5'     #  0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+    '\xd5'     #  0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\u0144'   #  0x00e7 -> LATIN SMALL LETTER N WITH ACUTE
+    '\u0136'   #  0x00e8 -> LATIN CAPITAL LETTER K WITH CEDILLA
+    '\u0137'   #  0x00e9 -> LATIN SMALL LETTER K WITH CEDILLA
+    '\u013b'   #  0x00ea -> LATIN CAPITAL LETTER L WITH CEDILLA
+    '\u013c'   #  0x00eb -> LATIN SMALL LETTER L WITH CEDILLA
+    '\u0146'   #  0x00ec -> LATIN SMALL LETTER N WITH CEDILLA
+    '\u0112'   #  0x00ed -> LATIN CAPITAL LETTER E WITH MACRON
+    '\u0145'   #  0x00ee -> LATIN CAPITAL LETTER N WITH CEDILLA
+    '\u2019'   #  0x00ef -> RIGHT SINGLE QUOTATION MARK
+    '\xad'     #  0x00f0 -> SOFT HYPHEN
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u201c'   #  0x00f2 -> LEFT DOUBLE QUOTATION MARK
+    '\xbe'     #  0x00f3 -> VULGAR FRACTION THREE QUARTERS
+    '\xb6'     #  0x00f4 -> PILCROW SIGN
+    '\xa7'     #  0x00f5 -> SECTION SIGN
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u201e'   #  0x00f7 -> DOUBLE LOW-9 QUOTATION MARK
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\xb9'     #  0x00fb -> SUPERSCRIPT ONE
+    '\xb3'     #  0x00fc -> SUPERSCRIPT THREE
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp850.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp850.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp850.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
-    u'\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xff'     #  0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd7'     #  0x009e -> MULTIPLICATION SIGN
-    u'\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
-    u'\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
-    u'\xae'     #  0x00a9 -> REGISTERED SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\xc1'     #  0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc0'     #  0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xa9'     #  0x00b8 -> COPYRIGHT SIGN
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\xa2'     #  0x00bd -> CENT SIGN
-    u'\xa5'     #  0x00be -> YEN SIGN
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\xe3'     #  0x00c6 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xc3'     #  0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\xa4'     #  0x00cf -> CURRENCY SIGN
-    u'\xf0'     #  0x00d0 -> LATIN SMALL LETTER ETH
-    u'\xd0'     #  0x00d1 -> LATIN CAPITAL LETTER ETH
-    u'\xca'     #  0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\u0131'   #  0x00d5 -> LATIN SMALL LETTER DOTLESS I
-    u'\xcd'     #  0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\xa6'     #  0x00dd -> BROKEN BAR
-    u'\xcc'     #  0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\xd4'     #  0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd2'     #  0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xf5'     #  0x00e4 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xd5'     #  0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\xfe'     #  0x00e7 -> LATIN SMALL LETTER THORN
-    u'\xde'     #  0x00e8 -> LATIN CAPITAL LETTER THORN
-    u'\xda'     #  0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xd9'     #  0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xfd'     #  0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xdd'     #  0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xaf'     #  0x00ee -> MACRON
-    u'\xb4'     #  0x00ef -> ACUTE ACCENT
-    u'\xad'     #  0x00f0 -> SOFT HYPHEN
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2017'   #  0x00f2 -> DOUBLE LOW LINE
-    u'\xbe'     #  0x00f3 -> VULGAR FRACTION THREE QUARTERS
-    u'\xb6'     #  0x00f4 -> PILCROW SIGN
-    u'\xa7'     #  0x00f5 -> SECTION SIGN
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\xb8'     #  0x00f7 -> CEDILLA
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\xa8'     #  0x00f9 -> DIAERESIS
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\xb9'     #  0x00fb -> SUPERSCRIPT ONE
-    u'\xb3'     #  0x00fc -> SUPERSCRIPT THREE
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
+    '\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
+    '\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
+    '\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xff'     #  0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd7'     #  0x009e -> MULTIPLICATION SIGN
+    '\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+    '\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
+    '\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
+    '\xae'     #  0x00a9 -> REGISTERED SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\xc1'     #  0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc0'     #  0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xa9'     #  0x00b8 -> COPYRIGHT SIGN
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\xa2'     #  0x00bd -> CENT SIGN
+    '\xa5'     #  0x00be -> YEN SIGN
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\xe3'     #  0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+    '\xc3'     #  0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\xa4'     #  0x00cf -> CURRENCY SIGN
+    '\xf0'     #  0x00d0 -> LATIN SMALL LETTER ETH
+    '\xd0'     #  0x00d1 -> LATIN CAPITAL LETTER ETH
+    '\xca'     #  0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\u0131'   #  0x00d5 -> LATIN SMALL LETTER DOTLESS I
+    '\xcd'     #  0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\xa6'     #  0x00dd -> BROKEN BAR
+    '\xcc'     #  0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\xd4'     #  0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd2'     #  0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xf5'     #  0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+    '\xd5'     #  0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\xfe'     #  0x00e7 -> LATIN SMALL LETTER THORN
+    '\xde'     #  0x00e8 -> LATIN CAPITAL LETTER THORN
+    '\xda'     #  0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xd9'     #  0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xfd'     #  0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xdd'     #  0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xaf'     #  0x00ee -> MACRON
+    '\xb4'     #  0x00ef -> ACUTE ACCENT
+    '\xad'     #  0x00f0 -> SOFT HYPHEN
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2017'   #  0x00f2 -> DOUBLE LOW LINE
+    '\xbe'     #  0x00f3 -> VULGAR FRACTION THREE QUARTERS
+    '\xb6'     #  0x00f4 -> PILCROW SIGN
+    '\xa7'     #  0x00f5 -> SECTION SIGN
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\xb8'     #  0x00f7 -> CEDILLA
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\xa8'     #  0x00f9 -> DIAERESIS
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\xb9'     #  0x00fb -> SUPERSCRIPT ONE
+    '\xb3'     #  0x00fc -> SUPERSCRIPT THREE
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp852.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp852.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp852.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u016f'   #  0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE
-    u'\u0107'   #  0x0086 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\u0142'   #  0x0088 -> LATIN SMALL LETTER L WITH STROKE
-    u'\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\u0150'   #  0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-    u'\u0151'   #  0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
-    u'\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\u0179'   #  0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\u0106'   #  0x008f -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0139'   #  0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE
-    u'\u013a'   #  0x0092 -> LATIN SMALL LETTER L WITH ACUTE
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\u013d'   #  0x0095 -> LATIN CAPITAL LETTER L WITH CARON
-    u'\u013e'   #  0x0096 -> LATIN SMALL LETTER L WITH CARON
-    u'\u015a'   #  0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\u015b'   #  0x0098 -> LATIN SMALL LETTER S WITH ACUTE
-    u'\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u0164'   #  0x009b -> LATIN CAPITAL LETTER T WITH CARON
-    u'\u0165'   #  0x009c -> LATIN SMALL LETTER T WITH CARON
-    u'\u0141'   #  0x009d -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\xd7'     #  0x009e -> MULTIPLICATION SIGN
-    u'\u010d'   #  0x009f -> LATIN SMALL LETTER C WITH CARON
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\u0104'   #  0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u0105'   #  0x00a5 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u017d'   #  0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\u017e'   #  0x00a7 -> LATIN SMALL LETTER Z WITH CARON
-    u'\u0118'   #  0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\u0119'   #  0x00a9 -> LATIN SMALL LETTER E WITH OGONEK
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\u017a'   #  0x00ab -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\u010c'   #  0x00ac -> LATIN CAPITAL LETTER C WITH CARON
-    u'\u015f'   #  0x00ad -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\xc1'     #  0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\u011a'   #  0x00b7 -> LATIN CAPITAL LETTER E WITH CARON
-    u'\u015e'   #  0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u017b'   #  0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\u017c'   #  0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u0102'   #  0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE
-    u'\u0103'   #  0x00c7 -> LATIN SMALL LETTER A WITH BREVE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\xa4'     #  0x00cf -> CURRENCY SIGN
-    u'\u0111'   #  0x00d0 -> LATIN SMALL LETTER D WITH STROKE
-    u'\u0110'   #  0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\u010e'   #  0x00d2 -> LATIN CAPITAL LETTER D WITH CARON
-    u'\xcb'     #  0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\u010f'   #  0x00d4 -> LATIN SMALL LETTER D WITH CARON
-    u'\u0147'   #  0x00d5 -> LATIN CAPITAL LETTER N WITH CARON
-    u'\xcd'     #  0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\u011b'   #  0x00d8 -> LATIN SMALL LETTER E WITH CARON
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u0162'   #  0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA
-    u'\u016e'   #  0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\xd4'     #  0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u0143'   #  0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\u0144'   #  0x00e4 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\u0148'   #  0x00e5 -> LATIN SMALL LETTER N WITH CARON
-    u'\u0160'   #  0x00e6 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u0161'   #  0x00e7 -> LATIN SMALL LETTER S WITH CARON
-    u'\u0154'   #  0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE
-    u'\xda'     #  0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\u0155'   #  0x00ea -> LATIN SMALL LETTER R WITH ACUTE
-    u'\u0170'   #  0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-    u'\xfd'     #  0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xdd'     #  0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\u0163'   #  0x00ee -> LATIN SMALL LETTER T WITH CEDILLA
-    u'\xb4'     #  0x00ef -> ACUTE ACCENT
-    u'\xad'     #  0x00f0 -> SOFT HYPHEN
-    u'\u02dd'   #  0x00f1 -> DOUBLE ACUTE ACCENT
-    u'\u02db'   #  0x00f2 -> OGONEK
-    u'\u02c7'   #  0x00f3 -> CARON
-    u'\u02d8'   #  0x00f4 -> BREVE
-    u'\xa7'     #  0x00f5 -> SECTION SIGN
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\xb8'     #  0x00f7 -> CEDILLA
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\xa8'     #  0x00f9 -> DIAERESIS
-    u'\u02d9'   #  0x00fa -> DOT ABOVE
-    u'\u0171'   #  0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
-    u'\u0158'   #  0x00fc -> LATIN CAPITAL LETTER R WITH CARON
-    u'\u0159'   #  0x00fd -> LATIN SMALL LETTER R WITH CARON
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u016f'   #  0x0085 -> LATIN SMALL LETTER U WITH RING ABOVE
+    '\u0107'   #  0x0086 -> LATIN SMALL LETTER C WITH ACUTE
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\u0142'   #  0x0088 -> LATIN SMALL LETTER L WITH STROKE
+    '\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\u0150'   #  0x008a -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+    '\u0151'   #  0x008b -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+    '\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\u0179'   #  0x008d -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\u0106'   #  0x008f -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0139'   #  0x0091 -> LATIN CAPITAL LETTER L WITH ACUTE
+    '\u013a'   #  0x0092 -> LATIN SMALL LETTER L WITH ACUTE
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\u013d'   #  0x0095 -> LATIN CAPITAL LETTER L WITH CARON
+    '\u013e'   #  0x0096 -> LATIN SMALL LETTER L WITH CARON
+    '\u015a'   #  0x0097 -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\u015b'   #  0x0098 -> LATIN SMALL LETTER S WITH ACUTE
+    '\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u0164'   #  0x009b -> LATIN CAPITAL LETTER T WITH CARON
+    '\u0165'   #  0x009c -> LATIN SMALL LETTER T WITH CARON
+    '\u0141'   #  0x009d -> LATIN CAPITAL LETTER L WITH STROKE
+    '\xd7'     #  0x009e -> MULTIPLICATION SIGN
+    '\u010d'   #  0x009f -> LATIN SMALL LETTER C WITH CARON
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\u0104'   #  0x00a4 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u0105'   #  0x00a5 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u017d'   #  0x00a6 -> LATIN CAPITAL LETTER Z WITH CARON
+    '\u017e'   #  0x00a7 -> LATIN SMALL LETTER Z WITH CARON
+    '\u0118'   #  0x00a8 -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\u0119'   #  0x00a9 -> LATIN SMALL LETTER E WITH OGONEK
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\u017a'   #  0x00ab -> LATIN SMALL LETTER Z WITH ACUTE
+    '\u010c'   #  0x00ac -> LATIN CAPITAL LETTER C WITH CARON
+    '\u015f'   #  0x00ad -> LATIN SMALL LETTER S WITH CEDILLA
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\xc1'     #  0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\u011a'   #  0x00b7 -> LATIN CAPITAL LETTER E WITH CARON
+    '\u015e'   #  0x00b8 -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u017b'   #  0x00bd -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\u017c'   #  0x00be -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u0102'   #  0x00c6 -> LATIN CAPITAL LETTER A WITH BREVE
+    '\u0103'   #  0x00c7 -> LATIN SMALL LETTER A WITH BREVE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\xa4'     #  0x00cf -> CURRENCY SIGN
+    '\u0111'   #  0x00d0 -> LATIN SMALL LETTER D WITH STROKE
+    '\u0110'   #  0x00d1 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\u010e'   #  0x00d2 -> LATIN CAPITAL LETTER D WITH CARON
+    '\xcb'     #  0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\u010f'   #  0x00d4 -> LATIN SMALL LETTER D WITH CARON
+    '\u0147'   #  0x00d5 -> LATIN CAPITAL LETTER N WITH CARON
+    '\xcd'     #  0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\u011b'   #  0x00d8 -> LATIN SMALL LETTER E WITH CARON
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u0162'   #  0x00dd -> LATIN CAPITAL LETTER T WITH CEDILLA
+    '\u016e'   #  0x00de -> LATIN CAPITAL LETTER U WITH RING ABOVE
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\xd4'     #  0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u0143'   #  0x00e3 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\u0144'   #  0x00e4 -> LATIN SMALL LETTER N WITH ACUTE
+    '\u0148'   #  0x00e5 -> LATIN SMALL LETTER N WITH CARON
+    '\u0160'   #  0x00e6 -> LATIN CAPITAL LETTER S WITH CARON
+    '\u0161'   #  0x00e7 -> LATIN SMALL LETTER S WITH CARON
+    '\u0154'   #  0x00e8 -> LATIN CAPITAL LETTER R WITH ACUTE
+    '\xda'     #  0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\u0155'   #  0x00ea -> LATIN SMALL LETTER R WITH ACUTE
+    '\u0170'   #  0x00eb -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+    '\xfd'     #  0x00ec -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xdd'     #  0x00ed -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\u0163'   #  0x00ee -> LATIN SMALL LETTER T WITH CEDILLA
+    '\xb4'     #  0x00ef -> ACUTE ACCENT
+    '\xad'     #  0x00f0 -> SOFT HYPHEN
+    '\u02dd'   #  0x00f1 -> DOUBLE ACUTE ACCENT
+    '\u02db'   #  0x00f2 -> OGONEK
+    '\u02c7'   #  0x00f3 -> CARON
+    '\u02d8'   #  0x00f4 -> BREVE
+    '\xa7'     #  0x00f5 -> SECTION SIGN
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\xb8'     #  0x00f7 -> CEDILLA
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\xa8'     #  0x00f9 -> DIAERESIS
+    '\u02d9'   #  0x00fa -> DOT ABOVE
+    '\u0171'   #  0x00fb -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+    '\u0158'   #  0x00fc -> LATIN CAPITAL LETTER R WITH CARON
+    '\u0159'   #  0x00fd -> LATIN SMALL LETTER R WITH CARON
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp855.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp855.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp855.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\u0452'   #  0x0080 -> CYRILLIC SMALL LETTER DJE
-    u'\u0402'   #  0x0081 -> CYRILLIC CAPITAL LETTER DJE
-    u'\u0453'   #  0x0082 -> CYRILLIC SMALL LETTER GJE
-    u'\u0403'   #  0x0083 -> CYRILLIC CAPITAL LETTER GJE
-    u'\u0451'   #  0x0084 -> CYRILLIC SMALL LETTER IO
-    u'\u0401'   #  0x0085 -> CYRILLIC CAPITAL LETTER IO
-    u'\u0454'   #  0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE
-    u'\u0404'   #  0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
-    u'\u0455'   #  0x0088 -> CYRILLIC SMALL LETTER DZE
-    u'\u0405'   #  0x0089 -> CYRILLIC CAPITAL LETTER DZE
-    u'\u0456'   #  0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0406'   #  0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0457'   #  0x008c -> CYRILLIC SMALL LETTER YI
-    u'\u0407'   #  0x008d -> CYRILLIC CAPITAL LETTER YI
-    u'\u0458'   #  0x008e -> CYRILLIC SMALL LETTER JE
-    u'\u0408'   #  0x008f -> CYRILLIC CAPITAL LETTER JE
-    u'\u0459'   #  0x0090 -> CYRILLIC SMALL LETTER LJE
-    u'\u0409'   #  0x0091 -> CYRILLIC CAPITAL LETTER LJE
-    u'\u045a'   #  0x0092 -> CYRILLIC SMALL LETTER NJE
-    u'\u040a'   #  0x0093 -> CYRILLIC CAPITAL LETTER NJE
-    u'\u045b'   #  0x0094 -> CYRILLIC SMALL LETTER TSHE
-    u'\u040b'   #  0x0095 -> CYRILLIC CAPITAL LETTER TSHE
-    u'\u045c'   #  0x0096 -> CYRILLIC SMALL LETTER KJE
-    u'\u040c'   #  0x0097 -> CYRILLIC CAPITAL LETTER KJE
-    u'\u045e'   #  0x0098 -> CYRILLIC SMALL LETTER SHORT U
-    u'\u040e'   #  0x0099 -> CYRILLIC CAPITAL LETTER SHORT U
-    u'\u045f'   #  0x009a -> CYRILLIC SMALL LETTER DZHE
-    u'\u040f'   #  0x009b -> CYRILLIC CAPITAL LETTER DZHE
-    u'\u044e'   #  0x009c -> CYRILLIC SMALL LETTER YU
-    u'\u042e'   #  0x009d -> CYRILLIC CAPITAL LETTER YU
-    u'\u044a'   #  0x009e -> CYRILLIC SMALL LETTER HARD SIGN
-    u'\u042a'   #  0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN
-    u'\u0430'   #  0x00a0 -> CYRILLIC SMALL LETTER A
-    u'\u0410'   #  0x00a1 -> CYRILLIC CAPITAL LETTER A
-    u'\u0431'   #  0x00a2 -> CYRILLIC SMALL LETTER BE
-    u'\u0411'   #  0x00a3 -> CYRILLIC CAPITAL LETTER BE
-    u'\u0446'   #  0x00a4 -> CYRILLIC SMALL LETTER TSE
-    u'\u0426'   #  0x00a5 -> CYRILLIC CAPITAL LETTER TSE
-    u'\u0434'   #  0x00a6 -> CYRILLIC SMALL LETTER DE
-    u'\u0414'   #  0x00a7 -> CYRILLIC CAPITAL LETTER DE
-    u'\u0435'   #  0x00a8 -> CYRILLIC SMALL LETTER IE
-    u'\u0415'   #  0x00a9 -> CYRILLIC CAPITAL LETTER IE
-    u'\u0444'   #  0x00aa -> CYRILLIC SMALL LETTER EF
-    u'\u0424'   #  0x00ab -> CYRILLIC CAPITAL LETTER EF
-    u'\u0433'   #  0x00ac -> CYRILLIC SMALL LETTER GHE
-    u'\u0413'   #  0x00ad -> CYRILLIC CAPITAL LETTER GHE
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u0445'   #  0x00b5 -> CYRILLIC SMALL LETTER HA
-    u'\u0425'   #  0x00b6 -> CYRILLIC CAPITAL LETTER HA
-    u'\u0438'   #  0x00b7 -> CYRILLIC SMALL LETTER I
-    u'\u0418'   #  0x00b8 -> CYRILLIC CAPITAL LETTER I
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u0439'   #  0x00bd -> CYRILLIC SMALL LETTER SHORT I
-    u'\u0419'   #  0x00be -> CYRILLIC CAPITAL LETTER SHORT I
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u043a'   #  0x00c6 -> CYRILLIC SMALL LETTER KA
-    u'\u041a'   #  0x00c7 -> CYRILLIC CAPITAL LETTER KA
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\xa4'     #  0x00cf -> CURRENCY SIGN
-    u'\u043b'   #  0x00d0 -> CYRILLIC SMALL LETTER EL
-    u'\u041b'   #  0x00d1 -> CYRILLIC CAPITAL LETTER EL
-    u'\u043c'   #  0x00d2 -> CYRILLIC SMALL LETTER EM
-    u'\u041c'   #  0x00d3 -> CYRILLIC CAPITAL LETTER EM
-    u'\u043d'   #  0x00d4 -> CYRILLIC SMALL LETTER EN
-    u'\u041d'   #  0x00d5 -> CYRILLIC CAPITAL LETTER EN
-    u'\u043e'   #  0x00d6 -> CYRILLIC SMALL LETTER O
-    u'\u041e'   #  0x00d7 -> CYRILLIC CAPITAL LETTER O
-    u'\u043f'   #  0x00d8 -> CYRILLIC SMALL LETTER PE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u041f'   #  0x00dd -> CYRILLIC CAPITAL LETTER PE
-    u'\u044f'   #  0x00de -> CYRILLIC SMALL LETTER YA
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u042f'   #  0x00e0 -> CYRILLIC CAPITAL LETTER YA
-    u'\u0440'   #  0x00e1 -> CYRILLIC SMALL LETTER ER
-    u'\u0420'   #  0x00e2 -> CYRILLIC CAPITAL LETTER ER
-    u'\u0441'   #  0x00e3 -> CYRILLIC SMALL LETTER ES
-    u'\u0421'   #  0x00e4 -> CYRILLIC CAPITAL LETTER ES
-    u'\u0442'   #  0x00e5 -> CYRILLIC SMALL LETTER TE
-    u'\u0422'   #  0x00e6 -> CYRILLIC CAPITAL LETTER TE
-    u'\u0443'   #  0x00e7 -> CYRILLIC SMALL LETTER U
-    u'\u0423'   #  0x00e8 -> CYRILLIC CAPITAL LETTER U
-    u'\u0436'   #  0x00e9 -> CYRILLIC SMALL LETTER ZHE
-    u'\u0416'   #  0x00ea -> CYRILLIC CAPITAL LETTER ZHE
-    u'\u0432'   #  0x00eb -> CYRILLIC SMALL LETTER VE
-    u'\u0412'   #  0x00ec -> CYRILLIC CAPITAL LETTER VE
-    u'\u044c'   #  0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN
-    u'\u042c'   #  0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN
-    u'\u2116'   #  0x00ef -> NUMERO SIGN
-    u'\xad'     #  0x00f0 -> SOFT HYPHEN
-    u'\u044b'   #  0x00f1 -> CYRILLIC SMALL LETTER YERU
-    u'\u042b'   #  0x00f2 -> CYRILLIC CAPITAL LETTER YERU
-    u'\u0437'   #  0x00f3 -> CYRILLIC SMALL LETTER ZE
-    u'\u0417'   #  0x00f4 -> CYRILLIC CAPITAL LETTER ZE
-    u'\u0448'   #  0x00f5 -> CYRILLIC SMALL LETTER SHA
-    u'\u0428'   #  0x00f6 -> CYRILLIC CAPITAL LETTER SHA
-    u'\u044d'   #  0x00f7 -> CYRILLIC SMALL LETTER E
-    u'\u042d'   #  0x00f8 -> CYRILLIC CAPITAL LETTER E
-    u'\u0449'   #  0x00f9 -> CYRILLIC SMALL LETTER SHCHA
-    u'\u0429'   #  0x00fa -> CYRILLIC CAPITAL LETTER SHCHA
-    u'\u0447'   #  0x00fb -> CYRILLIC SMALL LETTER CHE
-    u'\u0427'   #  0x00fc -> CYRILLIC CAPITAL LETTER CHE
-    u'\xa7'     #  0x00fd -> SECTION SIGN
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\u0452'   #  0x0080 -> CYRILLIC SMALL LETTER DJE
+    '\u0402'   #  0x0081 -> CYRILLIC CAPITAL LETTER DJE
+    '\u0453'   #  0x0082 -> CYRILLIC SMALL LETTER GJE
+    '\u0403'   #  0x0083 -> CYRILLIC CAPITAL LETTER GJE
+    '\u0451'   #  0x0084 -> CYRILLIC SMALL LETTER IO
+    '\u0401'   #  0x0085 -> CYRILLIC CAPITAL LETTER IO
+    '\u0454'   #  0x0086 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+    '\u0404'   #  0x0087 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    '\u0455'   #  0x0088 -> CYRILLIC SMALL LETTER DZE
+    '\u0405'   #  0x0089 -> CYRILLIC CAPITAL LETTER DZE
+    '\u0456'   #  0x008a -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0406'   #  0x008b -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0457'   #  0x008c -> CYRILLIC SMALL LETTER YI
+    '\u0407'   #  0x008d -> CYRILLIC CAPITAL LETTER YI
+    '\u0458'   #  0x008e -> CYRILLIC SMALL LETTER JE
+    '\u0408'   #  0x008f -> CYRILLIC CAPITAL LETTER JE
+    '\u0459'   #  0x0090 -> CYRILLIC SMALL LETTER LJE
+    '\u0409'   #  0x0091 -> CYRILLIC CAPITAL LETTER LJE
+    '\u045a'   #  0x0092 -> CYRILLIC SMALL LETTER NJE
+    '\u040a'   #  0x0093 -> CYRILLIC CAPITAL LETTER NJE
+    '\u045b'   #  0x0094 -> CYRILLIC SMALL LETTER TSHE
+    '\u040b'   #  0x0095 -> CYRILLIC CAPITAL LETTER TSHE
+    '\u045c'   #  0x0096 -> CYRILLIC SMALL LETTER KJE
+    '\u040c'   #  0x0097 -> CYRILLIC CAPITAL LETTER KJE
+    '\u045e'   #  0x0098 -> CYRILLIC SMALL LETTER SHORT U
+    '\u040e'   #  0x0099 -> CYRILLIC CAPITAL LETTER SHORT U
+    '\u045f'   #  0x009a -> CYRILLIC SMALL LETTER DZHE
+    '\u040f'   #  0x009b -> CYRILLIC CAPITAL LETTER DZHE
+    '\u044e'   #  0x009c -> CYRILLIC SMALL LETTER YU
+    '\u042e'   #  0x009d -> CYRILLIC CAPITAL LETTER YU
+    '\u044a'   #  0x009e -> CYRILLIC SMALL LETTER HARD SIGN
+    '\u042a'   #  0x009f -> CYRILLIC CAPITAL LETTER HARD SIGN
+    '\u0430'   #  0x00a0 -> CYRILLIC SMALL LETTER A
+    '\u0410'   #  0x00a1 -> CYRILLIC CAPITAL LETTER A
+    '\u0431'   #  0x00a2 -> CYRILLIC SMALL LETTER BE
+    '\u0411'   #  0x00a3 -> CYRILLIC CAPITAL LETTER BE
+    '\u0446'   #  0x00a4 -> CYRILLIC SMALL LETTER TSE
+    '\u0426'   #  0x00a5 -> CYRILLIC CAPITAL LETTER TSE
+    '\u0434'   #  0x00a6 -> CYRILLIC SMALL LETTER DE
+    '\u0414'   #  0x00a7 -> CYRILLIC CAPITAL LETTER DE
+    '\u0435'   #  0x00a8 -> CYRILLIC SMALL LETTER IE
+    '\u0415'   #  0x00a9 -> CYRILLIC CAPITAL LETTER IE
+    '\u0444'   #  0x00aa -> CYRILLIC SMALL LETTER EF
+    '\u0424'   #  0x00ab -> CYRILLIC CAPITAL LETTER EF
+    '\u0433'   #  0x00ac -> CYRILLIC SMALL LETTER GHE
+    '\u0413'   #  0x00ad -> CYRILLIC CAPITAL LETTER GHE
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u0445'   #  0x00b5 -> CYRILLIC SMALL LETTER HA
+    '\u0425'   #  0x00b6 -> CYRILLIC CAPITAL LETTER HA
+    '\u0438'   #  0x00b7 -> CYRILLIC SMALL LETTER I
+    '\u0418'   #  0x00b8 -> CYRILLIC CAPITAL LETTER I
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u0439'   #  0x00bd -> CYRILLIC SMALL LETTER SHORT I
+    '\u0419'   #  0x00be -> CYRILLIC CAPITAL LETTER SHORT I
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u043a'   #  0x00c6 -> CYRILLIC SMALL LETTER KA
+    '\u041a'   #  0x00c7 -> CYRILLIC CAPITAL LETTER KA
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\xa4'     #  0x00cf -> CURRENCY SIGN
+    '\u043b'   #  0x00d0 -> CYRILLIC SMALL LETTER EL
+    '\u041b'   #  0x00d1 -> CYRILLIC CAPITAL LETTER EL
+    '\u043c'   #  0x00d2 -> CYRILLIC SMALL LETTER EM
+    '\u041c'   #  0x00d3 -> CYRILLIC CAPITAL LETTER EM
+    '\u043d'   #  0x00d4 -> CYRILLIC SMALL LETTER EN
+    '\u041d'   #  0x00d5 -> CYRILLIC CAPITAL LETTER EN
+    '\u043e'   #  0x00d6 -> CYRILLIC SMALL LETTER O
+    '\u041e'   #  0x00d7 -> CYRILLIC CAPITAL LETTER O
+    '\u043f'   #  0x00d8 -> CYRILLIC SMALL LETTER PE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u041f'   #  0x00dd -> CYRILLIC CAPITAL LETTER PE
+    '\u044f'   #  0x00de -> CYRILLIC SMALL LETTER YA
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u042f'   #  0x00e0 -> CYRILLIC CAPITAL LETTER YA
+    '\u0440'   #  0x00e1 -> CYRILLIC SMALL LETTER ER
+    '\u0420'   #  0x00e2 -> CYRILLIC CAPITAL LETTER ER
+    '\u0441'   #  0x00e3 -> CYRILLIC SMALL LETTER ES
+    '\u0421'   #  0x00e4 -> CYRILLIC CAPITAL LETTER ES
+    '\u0442'   #  0x00e5 -> CYRILLIC SMALL LETTER TE
+    '\u0422'   #  0x00e6 -> CYRILLIC CAPITAL LETTER TE
+    '\u0443'   #  0x00e7 -> CYRILLIC SMALL LETTER U
+    '\u0423'   #  0x00e8 -> CYRILLIC CAPITAL LETTER U
+    '\u0436'   #  0x00e9 -> CYRILLIC SMALL LETTER ZHE
+    '\u0416'   #  0x00ea -> CYRILLIC CAPITAL LETTER ZHE
+    '\u0432'   #  0x00eb -> CYRILLIC SMALL LETTER VE
+    '\u0412'   #  0x00ec -> CYRILLIC CAPITAL LETTER VE
+    '\u044c'   #  0x00ed -> CYRILLIC SMALL LETTER SOFT SIGN
+    '\u042c'   #  0x00ee -> CYRILLIC CAPITAL LETTER SOFT SIGN
+    '\u2116'   #  0x00ef -> NUMERO SIGN
+    '\xad'     #  0x00f0 -> SOFT HYPHEN
+    '\u044b'   #  0x00f1 -> CYRILLIC SMALL LETTER YERU
+    '\u042b'   #  0x00f2 -> CYRILLIC CAPITAL LETTER YERU
+    '\u0437'   #  0x00f3 -> CYRILLIC SMALL LETTER ZE
+    '\u0417'   #  0x00f4 -> CYRILLIC CAPITAL LETTER ZE
+    '\u0448'   #  0x00f5 -> CYRILLIC SMALL LETTER SHA
+    '\u0428'   #  0x00f6 -> CYRILLIC CAPITAL LETTER SHA
+    '\u044d'   #  0x00f7 -> CYRILLIC SMALL LETTER E
+    '\u042d'   #  0x00f8 -> CYRILLIC CAPITAL LETTER E
+    '\u0449'   #  0x00f9 -> CYRILLIC SMALL LETTER SHCHA
+    '\u0429'   #  0x00fa -> CYRILLIC CAPITAL LETTER SHCHA
+    '\u0447'   #  0x00fb -> CYRILLIC SMALL LETTER CHE
+    '\u0427'   #  0x00fc -> CYRILLIC CAPITAL LETTER CHE
+    '\xa7'     #  0x00fd -> SECTION SIGN
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp856.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp856.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp856.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u05d0'   #  0x80 -> HEBREW LETTER ALEF
-    u'\u05d1'   #  0x81 -> HEBREW LETTER BET
-    u'\u05d2'   #  0x82 -> HEBREW LETTER GIMEL
-    u'\u05d3'   #  0x83 -> HEBREW LETTER DALET
-    u'\u05d4'   #  0x84 -> HEBREW LETTER HE
-    u'\u05d5'   #  0x85 -> HEBREW LETTER VAV
-    u'\u05d6'   #  0x86 -> HEBREW LETTER ZAYIN
-    u'\u05d7'   #  0x87 -> HEBREW LETTER HET
-    u'\u05d8'   #  0x88 -> HEBREW LETTER TET
-    u'\u05d9'   #  0x89 -> HEBREW LETTER YOD
-    u'\u05da'   #  0x8A -> HEBREW LETTER FINAL KAF
-    u'\u05db'   #  0x8B -> HEBREW LETTER KAF
-    u'\u05dc'   #  0x8C -> HEBREW LETTER LAMED
-    u'\u05dd'   #  0x8D -> HEBREW LETTER FINAL MEM
-    u'\u05de'   #  0x8E -> HEBREW LETTER MEM
-    u'\u05df'   #  0x8F -> HEBREW LETTER FINAL NUN
-    u'\u05e0'   #  0x90 -> HEBREW LETTER NUN
-    u'\u05e1'   #  0x91 -> HEBREW LETTER SAMEKH
-    u'\u05e2'   #  0x92 -> HEBREW LETTER AYIN
-    u'\u05e3'   #  0x93 -> HEBREW LETTER FINAL PE
-    u'\u05e4'   #  0x94 -> HEBREW LETTER PE
-    u'\u05e5'   #  0x95 -> HEBREW LETTER FINAL TSADI
-    u'\u05e6'   #  0x96 -> HEBREW LETTER TSADI
-    u'\u05e7'   #  0x97 -> HEBREW LETTER QOF
-    u'\u05e8'   #  0x98 -> HEBREW LETTER RESH
-    u'\u05e9'   #  0x99 -> HEBREW LETTER SHIN
-    u'\u05ea'   #  0x9A -> HEBREW LETTER TAV
-    u'\ufffe'   #  0x9B -> UNDEFINED
-    u'\xa3'     #  0x9C -> POUND SIGN
-    u'\ufffe'   #  0x9D -> UNDEFINED
-    u'\xd7'     #  0x9E -> MULTIPLICATION SIGN
-    u'\ufffe'   #  0x9F -> UNDEFINED
-    u'\ufffe'   #  0xA0 -> UNDEFINED
-    u'\ufffe'   #  0xA1 -> UNDEFINED
-    u'\ufffe'   #  0xA2 -> UNDEFINED
-    u'\ufffe'   #  0xA3 -> UNDEFINED
-    u'\ufffe'   #  0xA4 -> UNDEFINED
-    u'\ufffe'   #  0xA5 -> UNDEFINED
-    u'\ufffe'   #  0xA6 -> UNDEFINED
-    u'\ufffe'   #  0xA7 -> UNDEFINED
-    u'\ufffe'   #  0xA8 -> UNDEFINED
-    u'\xae'     #  0xA9 -> REGISTERED SIGN
-    u'\xac'     #  0xAA -> NOT SIGN
-    u'\xbd'     #  0xAB -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0xAC -> VULGAR FRACTION ONE QUARTER
-    u'\ufffe'   #  0xAD -> UNDEFINED
-    u'\xab'     #  0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0xB0 -> LIGHT SHADE
-    u'\u2592'   #  0xB1 -> MEDIUM SHADE
-    u'\u2593'   #  0xB2 -> DARK SHADE
-    u'\u2502'   #  0xB3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\ufffe'   #  0xB5 -> UNDEFINED
-    u'\ufffe'   #  0xB6 -> UNDEFINED
-    u'\ufffe'   #  0xB7 -> UNDEFINED
-    u'\xa9'     #  0xB8 -> COPYRIGHT SIGN
-    u'\u2563'   #  0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0xBA -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\xa2'     #  0xBD -> CENT SIGN
-    u'\xa5'     #  0xBE -> YEN SIGN
-    u'\u2510'   #  0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\ufffe'   #  0xC6 -> UNDEFINED
-    u'\ufffe'   #  0xC7 -> UNDEFINED
-    u'\u255a'   #  0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\xa4'     #  0xCF -> CURRENCY SIGN
-    u'\ufffe'   #  0xD0 -> UNDEFINED
-    u'\ufffe'   #  0xD1 -> UNDEFINED
-    u'\ufffe'   #  0xD2 -> UNDEFINED
-    u'\ufffe'   #  0xD3 -> UNDEFINEDS
-    u'\ufffe'   #  0xD4 -> UNDEFINED
-    u'\ufffe'   #  0xD5 -> UNDEFINED
-    u'\ufffe'   #  0xD6 -> UNDEFINEDE
-    u'\ufffe'   #  0xD7 -> UNDEFINED
-    u'\ufffe'   #  0xD8 -> UNDEFINED
-    u'\u2518'   #  0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0xDB -> FULL BLOCK
-    u'\u2584'   #  0xDC -> LOWER HALF BLOCK
-    u'\xa6'     #  0xDD -> BROKEN BAR
-    u'\ufffe'   #  0xDE -> UNDEFINED
-    u'\u2580'   #  0xDF -> UPPER HALF BLOCK
-    u'\ufffe'   #  0xE0 -> UNDEFINED
-    u'\ufffe'   #  0xE1 -> UNDEFINED
-    u'\ufffe'   #  0xE2 -> UNDEFINED
-    u'\ufffe'   #  0xE3 -> UNDEFINED
-    u'\ufffe'   #  0xE4 -> UNDEFINED
-    u'\ufffe'   #  0xE5 -> UNDEFINED
-    u'\xb5'     #  0xE6 -> MICRO SIGN
-    u'\ufffe'   #  0xE7 -> UNDEFINED
-    u'\ufffe'   #  0xE8 -> UNDEFINED
-    u'\ufffe'   #  0xE9 -> UNDEFINED
-    u'\ufffe'   #  0xEA -> UNDEFINED
-    u'\ufffe'   #  0xEB -> UNDEFINED
-    u'\ufffe'   #  0xEC -> UNDEFINED
-    u'\ufffe'   #  0xED -> UNDEFINED
-    u'\xaf'     #  0xEE -> MACRON
-    u'\xb4'     #  0xEF -> ACUTE ACCENT
-    u'\xad'     #  0xF0 -> SOFT HYPHEN
-    u'\xb1'     #  0xF1 -> PLUS-MINUS SIGN
-    u'\u2017'   #  0xF2 -> DOUBLE LOW LINE
-    u'\xbe'     #  0xF3 -> VULGAR FRACTION THREE QUARTERS
-    u'\xb6'     #  0xF4 -> PILCROW SIGN
-    u'\xa7'     #  0xF5 -> SECTION SIGN
-    u'\xf7'     #  0xF6 -> DIVISION SIGN
-    u'\xb8'     #  0xF7 -> CEDILLA
-    u'\xb0'     #  0xF8 -> DEGREE SIGN
-    u'\xa8'     #  0xF9 -> DIAERESIS
-    u'\xb7'     #  0xFA -> MIDDLE DOT
-    u'\xb9'     #  0xFB -> SUPERSCRIPT ONE
-    u'\xb3'     #  0xFC -> SUPERSCRIPT THREE
-    u'\xb2'     #  0xFD -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0xFE -> BLACK SQUARE
-    u'\xa0'     #  0xFF -> NO-BREAK SPACE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u05d0'   #  0x80 -> HEBREW LETTER ALEF
+    '\u05d1'   #  0x81 -> HEBREW LETTER BET
+    '\u05d2'   #  0x82 -> HEBREW LETTER GIMEL
+    '\u05d3'   #  0x83 -> HEBREW LETTER DALET
+    '\u05d4'   #  0x84 -> HEBREW LETTER HE
+    '\u05d5'   #  0x85 -> HEBREW LETTER VAV
+    '\u05d6'   #  0x86 -> HEBREW LETTER ZAYIN
+    '\u05d7'   #  0x87 -> HEBREW LETTER HET
+    '\u05d8'   #  0x88 -> HEBREW LETTER TET
+    '\u05d9'   #  0x89 -> HEBREW LETTER YOD
+    '\u05da'   #  0x8A -> HEBREW LETTER FINAL KAF
+    '\u05db'   #  0x8B -> HEBREW LETTER KAF
+    '\u05dc'   #  0x8C -> HEBREW LETTER LAMED
+    '\u05dd'   #  0x8D -> HEBREW LETTER FINAL MEM
+    '\u05de'   #  0x8E -> HEBREW LETTER MEM
+    '\u05df'   #  0x8F -> HEBREW LETTER FINAL NUN
+    '\u05e0'   #  0x90 -> HEBREW LETTER NUN
+    '\u05e1'   #  0x91 -> HEBREW LETTER SAMEKH
+    '\u05e2'   #  0x92 -> HEBREW LETTER AYIN
+    '\u05e3'   #  0x93 -> HEBREW LETTER FINAL PE
+    '\u05e4'   #  0x94 -> HEBREW LETTER PE
+    '\u05e5'   #  0x95 -> HEBREW LETTER FINAL TSADI
+    '\u05e6'   #  0x96 -> HEBREW LETTER TSADI
+    '\u05e7'   #  0x97 -> HEBREW LETTER QOF
+    '\u05e8'   #  0x98 -> HEBREW LETTER RESH
+    '\u05e9'   #  0x99 -> HEBREW LETTER SHIN
+    '\u05ea'   #  0x9A -> HEBREW LETTER TAV
+    '\ufffe'   #  0x9B -> UNDEFINED
+    '\xa3'     #  0x9C -> POUND SIGN
+    '\ufffe'   #  0x9D -> UNDEFINED
+    '\xd7'     #  0x9E -> MULTIPLICATION SIGN
+    '\ufffe'   #  0x9F -> UNDEFINED
+    '\ufffe'   #  0xA0 -> UNDEFINED
+    '\ufffe'   #  0xA1 -> UNDEFINED
+    '\ufffe'   #  0xA2 -> UNDEFINED
+    '\ufffe'   #  0xA3 -> UNDEFINED
+    '\ufffe'   #  0xA4 -> UNDEFINED
+    '\ufffe'   #  0xA5 -> UNDEFINED
+    '\ufffe'   #  0xA6 -> UNDEFINED
+    '\ufffe'   #  0xA7 -> UNDEFINED
+    '\ufffe'   #  0xA8 -> UNDEFINED
+    '\xae'     #  0xA9 -> REGISTERED SIGN
+    '\xac'     #  0xAA -> NOT SIGN
+    '\xbd'     #  0xAB -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0xAC -> VULGAR FRACTION ONE QUARTER
+    '\ufffe'   #  0xAD -> UNDEFINED
+    '\xab'     #  0xAE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xAF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0xB0 -> LIGHT SHADE
+    '\u2592'   #  0xB1 -> MEDIUM SHADE
+    '\u2593'   #  0xB2 -> DARK SHADE
+    '\u2502'   #  0xB3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0xB4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\ufffe'   #  0xB5 -> UNDEFINED
+    '\ufffe'   #  0xB6 -> UNDEFINED
+    '\ufffe'   #  0xB7 -> UNDEFINED
+    '\xa9'     #  0xB8 -> COPYRIGHT SIGN
+    '\u2563'   #  0xB9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0xBA -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0xBB -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0xBC -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\xa2'     #  0xBD -> CENT SIGN
+    '\xa5'     #  0xBE -> YEN SIGN
+    '\u2510'   #  0xBF -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0xC0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0xC1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0xC2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0xC3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0xC4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0xC5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\ufffe'   #  0xC6 -> UNDEFINED
+    '\ufffe'   #  0xC7 -> UNDEFINED
+    '\u255a'   #  0xC8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0xC9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0xCA -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0xCB -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0xCC -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0xCD -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0xCE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\xa4'     #  0xCF -> CURRENCY SIGN
+    '\ufffe'   #  0xD0 -> UNDEFINED
+    '\ufffe'   #  0xD1 -> UNDEFINED
+    '\ufffe'   #  0xD2 -> UNDEFINED
+    '\ufffe'   #  0xD3 -> UNDEFINEDS
+    '\ufffe'   #  0xD4 -> UNDEFINED
+    '\ufffe'   #  0xD5 -> UNDEFINED
+    '\ufffe'   #  0xD6 -> UNDEFINEDE
+    '\ufffe'   #  0xD7 -> UNDEFINED
+    '\ufffe'   #  0xD8 -> UNDEFINED
+    '\u2518'   #  0xD9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0xDA -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0xDB -> FULL BLOCK
+    '\u2584'   #  0xDC -> LOWER HALF BLOCK
+    '\xa6'     #  0xDD -> BROKEN BAR
+    '\ufffe'   #  0xDE -> UNDEFINED
+    '\u2580'   #  0xDF -> UPPER HALF BLOCK
+    '\ufffe'   #  0xE0 -> UNDEFINED
+    '\ufffe'   #  0xE1 -> UNDEFINED
+    '\ufffe'   #  0xE2 -> UNDEFINED
+    '\ufffe'   #  0xE3 -> UNDEFINED
+    '\ufffe'   #  0xE4 -> UNDEFINED
+    '\ufffe'   #  0xE5 -> UNDEFINED
+    '\xb5'     #  0xE6 -> MICRO SIGN
+    '\ufffe'   #  0xE7 -> UNDEFINED
+    '\ufffe'   #  0xE8 -> UNDEFINED
+    '\ufffe'   #  0xE9 -> UNDEFINED
+    '\ufffe'   #  0xEA -> UNDEFINED
+    '\ufffe'   #  0xEB -> UNDEFINED
+    '\ufffe'   #  0xEC -> UNDEFINED
+    '\ufffe'   #  0xED -> UNDEFINED
+    '\xaf'     #  0xEE -> MACRON
+    '\xb4'     #  0xEF -> ACUTE ACCENT
+    '\xad'     #  0xF0 -> SOFT HYPHEN
+    '\xb1'     #  0xF1 -> PLUS-MINUS SIGN
+    '\u2017'   #  0xF2 -> DOUBLE LOW LINE
+    '\xbe'     #  0xF3 -> VULGAR FRACTION THREE QUARTERS
+    '\xb6'     #  0xF4 -> PILCROW SIGN
+    '\xa7'     #  0xF5 -> SECTION SIGN
+    '\xf7'     #  0xF6 -> DIVISION SIGN
+    '\xb8'     #  0xF7 -> CEDILLA
+    '\xb0'     #  0xF8 -> DEGREE SIGN
+    '\xa8'     #  0xF9 -> DIAERESIS
+    '\xb7'     #  0xFA -> MIDDLE DOT
+    '\xb9'     #  0xFB -> SUPERSCRIPT ONE
+    '\xb3'     #  0xFC -> SUPERSCRIPT THREE
+    '\xb2'     #  0xFD -> SUPERSCRIPT TWO
+    '\u25a0'   #  0xFE -> BLACK SQUARE
+    '\xa0'     #  0xFF -> NO-BREAK SPACE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp857.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp857.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp857.py	Wed May  2 21:09:54 2007
@@ -177,262 +177,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\u0131'   #  0x008d -> LATIN SMALL LETTER DOTLESS I
-    u'\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
-    u'\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\u0130'   #  0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
-    u'\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u015e'   #  0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\u015f'   #  0x009f -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\u011e'   #  0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE
-    u'\u011f'   #  0x00a7 -> LATIN SMALL LETTER G WITH BREVE
-    u'\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
-    u'\xae'     #  0x00a9 -> REGISTERED SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\xc1'     #  0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc0'     #  0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xa9'     #  0x00b8 -> COPYRIGHT SIGN
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\xa2'     #  0x00bd -> CENT SIGN
-    u'\xa5'     #  0x00be -> YEN SIGN
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\xe3'     #  0x00c6 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xc3'     #  0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\xa4'     #  0x00cf -> CURRENCY SIGN
-    u'\xba'     #  0x00d0 -> MASCULINE ORDINAL INDICATOR
-    u'\xaa'     #  0x00d1 -> FEMININE ORDINAL INDICATOR
-    u'\xca'     #  0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\ufffe'   #  0x00d5 -> UNDEFINED
-    u'\xcd'     #  0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\xa6'     #  0x00dd -> BROKEN BAR
-    u'\xcc'     #  0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\xd4'     #  0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd2'     #  0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xf5'     #  0x00e4 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xd5'     #  0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\ufffe'   #  0x00e7 -> UNDEFINED
-    u'\xd7'     #  0x00e8 -> MULTIPLICATION SIGN
-    u'\xda'     #  0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xd9'     #  0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xec'     #  0x00ec -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xff'     #  0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\xaf'     #  0x00ee -> MACRON
-    u'\xb4'     #  0x00ef -> ACUTE ACCENT
-    u'\xad'     #  0x00f0 -> SOFT HYPHEN
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\ufffe'   #  0x00f2 -> UNDEFINED
-    u'\xbe'     #  0x00f3 -> VULGAR FRACTION THREE QUARTERS
-    u'\xb6'     #  0x00f4 -> PILCROW SIGN
-    u'\xa7'     #  0x00f5 -> SECTION SIGN
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\xb8'     #  0x00f7 -> CEDILLA
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\xa8'     #  0x00f9 -> DIAERESIS
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\xb9'     #  0x00fb -> SUPERSCRIPT ONE
-    u'\xb3'     #  0x00fc -> SUPERSCRIPT THREE
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
+    '\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\u0131'   #  0x008d -> LATIN SMALL LETTER DOTLESS I
+    '\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
+    '\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+    '\u0130'   #  0x0098 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+    '\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u015e'   #  0x009e -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\u015f'   #  0x009f -> LATIN SMALL LETTER S WITH CEDILLA
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+    '\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\u011e'   #  0x00a6 -> LATIN CAPITAL LETTER G WITH BREVE
+    '\u011f'   #  0x00a7 -> LATIN SMALL LETTER G WITH BREVE
+    '\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
+    '\xae'     #  0x00a9 -> REGISTERED SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\xc1'     #  0x00b5 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0x00b6 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc0'     #  0x00b7 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xa9'     #  0x00b8 -> COPYRIGHT SIGN
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\xa2'     #  0x00bd -> CENT SIGN
+    '\xa5'     #  0x00be -> YEN SIGN
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\xe3'     #  0x00c6 -> LATIN SMALL LETTER A WITH TILDE
+    '\xc3'     #  0x00c7 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\xa4'     #  0x00cf -> CURRENCY SIGN
+    '\xba'     #  0x00d0 -> MASCULINE ORDINAL INDICATOR
+    '\xaa'     #  0x00d1 -> FEMININE ORDINAL INDICATOR
+    '\xca'     #  0x00d2 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0x00d3 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0x00d4 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\ufffe'   #  0x00d5 -> UNDEFINED
+    '\xcd'     #  0x00d6 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0x00d7 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0x00d8 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\xa6'     #  0x00dd -> BROKEN BAR
+    '\xcc'     #  0x00de -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\xd3'     #  0x00e0 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\xd4'     #  0x00e2 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd2'     #  0x00e3 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xf5'     #  0x00e4 -> LATIN SMALL LETTER O WITH TILDE
+    '\xd5'     #  0x00e5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\ufffe'   #  0x00e7 -> UNDEFINED
+    '\xd7'     #  0x00e8 -> MULTIPLICATION SIGN
+    '\xda'     #  0x00e9 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0x00ea -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xd9'     #  0x00eb -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xec'     #  0x00ec -> LATIN SMALL LETTER I WITH GRAVE
+    '\xff'     #  0x00ed -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\xaf'     #  0x00ee -> MACRON
+    '\xb4'     #  0x00ef -> ACUTE ACCENT
+    '\xad'     #  0x00f0 -> SOFT HYPHEN
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\ufffe'   #  0x00f2 -> UNDEFINED
+    '\xbe'     #  0x00f3 -> VULGAR FRACTION THREE QUARTERS
+    '\xb6'     #  0x00f4 -> PILCROW SIGN
+    '\xa7'     #  0x00f5 -> SECTION SIGN
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\xb8'     #  0x00f7 -> CEDILLA
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\xa8'     #  0x00f9 -> DIAERESIS
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\xb9'     #  0x00fb -> SUPERSCRIPT ONE
+    '\xb3'     #  0x00fc -> SUPERSCRIPT THREE
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp860.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp860.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp860.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0x0084 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xc1'     #  0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xca'     #  0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xcd'     #  0x008b -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xd4'     #  0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xc3'     #  0x008e -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc2'     #  0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xc0'     #  0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc8'     #  0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0x0094 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xda'     #  0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xcc'     #  0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xd5'     #  0x0099 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xa2'     #  0x009b -> CENT SIGN
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xd9'     #  0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\u20a7'   #  0x009e -> PESETA SIGN
-    u'\xd3'     #  0x009f -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
-    u'\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
-    u'\xd2'     #  0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
-    u'\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
-    u'\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
-    u'\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
-    u'\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
-    u'\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
-    u'\u221e'   #  0x00ec -> INFINITY
-    u'\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
-    u'\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
-    u'\u2229'   #  0x00ef -> INTERSECTION
-    u'\u2261'   #  0x00f0 -> IDENTICAL TO
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
-    u'\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
-    u'\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
-    u'\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u2248'   #  0x00f7 -> ALMOST EQUAL TO
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0x0084 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xc1'     #  0x0086 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xca'     #  0x0089 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
+    '\xcd'     #  0x008b -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xd4'     #  0x008c -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
+    '\xc3'     #  0x008e -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc2'     #  0x008f -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xc0'     #  0x0091 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc8'     #  0x0092 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0x0094 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xda'     #  0x0096 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xcc'     #  0x0098 -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xd5'     #  0x0099 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xa2'     #  0x009b -> CENT SIGN
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xd9'     #  0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\u20a7'   #  0x009e -> PESETA SIGN
+    '\xd3'     #  0x009f -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+    '\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
+    '\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
+    '\xd2'     #  0x00a9 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
+    '\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
+    '\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
+    '\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
+    '\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
+    '\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
+    '\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
+    '\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
+    '\u221e'   #  0x00ec -> INFINITY
+    '\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
+    '\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
+    '\u2229'   #  0x00ef -> INTERSECTION
+    '\u2261'   #  0x00f0 -> IDENTICAL TO
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
+    '\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
+    '\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
+    '\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u2248'   #  0x00f7 -> ALMOST EQUAL TO
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp861.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp861.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp861.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xd0'     #  0x008b -> LATIN CAPITAL LETTER ETH
-    u'\xf0'     #  0x008c -> LATIN SMALL LETTER ETH
-    u'\xde'     #  0x008d -> LATIN CAPITAL LETTER THORN
-    u'\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
-    u'\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xfe'     #  0x0095 -> LATIN SMALL LETTER THORN
-    u'\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xdd'     #  0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xfd'     #  0x0098 -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u20a7'   #  0x009e -> PESETA SIGN
-    u'\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xc1'     #  0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xcd'     #  0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xd3'     #  0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xda'     #  0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
-    u'\u2310'   #  0x00a9 -> REVERSED NOT SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
-    u'\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
-    u'\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
-    u'\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
-    u'\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
-    u'\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
-    u'\u221e'   #  0x00ec -> INFINITY
-    u'\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
-    u'\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
-    u'\u2229'   #  0x00ef -> INTERSECTION
-    u'\u2261'   #  0x00f0 -> IDENTICAL TO
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
-    u'\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
-    u'\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
-    u'\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u2248'   #  0x00f7 -> ALMOST EQUAL TO
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
+    '\xd0'     #  0x008b -> LATIN CAPITAL LETTER ETH
+    '\xf0'     #  0x008c -> LATIN SMALL LETTER ETH
+    '\xde'     #  0x008d -> LATIN CAPITAL LETTER THORN
+    '\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
+    '\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xfe'     #  0x0095 -> LATIN SMALL LETTER THORN
+    '\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xdd'     #  0x0097 -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xfd'     #  0x0098 -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u20a7'   #  0x009e -> PESETA SIGN
+    '\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xc1'     #  0x00a4 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xcd'     #  0x00a5 -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xd3'     #  0x00a6 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xda'     #  0x00a7 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
+    '\u2310'   #  0x00a9 -> REVERSED NOT SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
+    '\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
+    '\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
+    '\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
+    '\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
+    '\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
+    '\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
+    '\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
+    '\u221e'   #  0x00ec -> INFINITY
+    '\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
+    '\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
+    '\u2229'   #  0x00ef -> INTERSECTION
+    '\u2261'   #  0x00f0 -> IDENTICAL TO
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
+    '\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
+    '\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
+    '\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u2248'   #  0x00f7 -> ALMOST EQUAL TO
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp862.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp862.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp862.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\u05d0'   #  0x0080 -> HEBREW LETTER ALEF
-    u'\u05d1'   #  0x0081 -> HEBREW LETTER BET
-    u'\u05d2'   #  0x0082 -> HEBREW LETTER GIMEL
-    u'\u05d3'   #  0x0083 -> HEBREW LETTER DALET
-    u'\u05d4'   #  0x0084 -> HEBREW LETTER HE
-    u'\u05d5'   #  0x0085 -> HEBREW LETTER VAV
-    u'\u05d6'   #  0x0086 -> HEBREW LETTER ZAYIN
-    u'\u05d7'   #  0x0087 -> HEBREW LETTER HET
-    u'\u05d8'   #  0x0088 -> HEBREW LETTER TET
-    u'\u05d9'   #  0x0089 -> HEBREW LETTER YOD
-    u'\u05da'   #  0x008a -> HEBREW LETTER FINAL KAF
-    u'\u05db'   #  0x008b -> HEBREW LETTER KAF
-    u'\u05dc'   #  0x008c -> HEBREW LETTER LAMED
-    u'\u05dd'   #  0x008d -> HEBREW LETTER FINAL MEM
-    u'\u05de'   #  0x008e -> HEBREW LETTER MEM
-    u'\u05df'   #  0x008f -> HEBREW LETTER FINAL NUN
-    u'\u05e0'   #  0x0090 -> HEBREW LETTER NUN
-    u'\u05e1'   #  0x0091 -> HEBREW LETTER SAMEKH
-    u'\u05e2'   #  0x0092 -> HEBREW LETTER AYIN
-    u'\u05e3'   #  0x0093 -> HEBREW LETTER FINAL PE
-    u'\u05e4'   #  0x0094 -> HEBREW LETTER PE
-    u'\u05e5'   #  0x0095 -> HEBREW LETTER FINAL TSADI
-    u'\u05e6'   #  0x0096 -> HEBREW LETTER TSADI
-    u'\u05e7'   #  0x0097 -> HEBREW LETTER QOF
-    u'\u05e8'   #  0x0098 -> HEBREW LETTER RESH
-    u'\u05e9'   #  0x0099 -> HEBREW LETTER SHIN
-    u'\u05ea'   #  0x009a -> HEBREW LETTER TAV
-    u'\xa2'     #  0x009b -> CENT SIGN
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xa5'     #  0x009d -> YEN SIGN
-    u'\u20a7'   #  0x009e -> PESETA SIGN
-    u'\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
-    u'\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
-    u'\u2310'   #  0x00a9 -> REVERSED NOT SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
-    u'\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
-    u'\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
-    u'\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
-    u'\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
-    u'\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
-    u'\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
-    u'\u221e'   #  0x00ec -> INFINITY
-    u'\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
-    u'\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
-    u'\u2229'   #  0x00ef -> INTERSECTION
-    u'\u2261'   #  0x00f0 -> IDENTICAL TO
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
-    u'\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
-    u'\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
-    u'\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u2248'   #  0x00f7 -> ALMOST EQUAL TO
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\u05d0'   #  0x0080 -> HEBREW LETTER ALEF
+    '\u05d1'   #  0x0081 -> HEBREW LETTER BET
+    '\u05d2'   #  0x0082 -> HEBREW LETTER GIMEL
+    '\u05d3'   #  0x0083 -> HEBREW LETTER DALET
+    '\u05d4'   #  0x0084 -> HEBREW LETTER HE
+    '\u05d5'   #  0x0085 -> HEBREW LETTER VAV
+    '\u05d6'   #  0x0086 -> HEBREW LETTER ZAYIN
+    '\u05d7'   #  0x0087 -> HEBREW LETTER HET
+    '\u05d8'   #  0x0088 -> HEBREW LETTER TET
+    '\u05d9'   #  0x0089 -> HEBREW LETTER YOD
+    '\u05da'   #  0x008a -> HEBREW LETTER FINAL KAF
+    '\u05db'   #  0x008b -> HEBREW LETTER KAF
+    '\u05dc'   #  0x008c -> HEBREW LETTER LAMED
+    '\u05dd'   #  0x008d -> HEBREW LETTER FINAL MEM
+    '\u05de'   #  0x008e -> HEBREW LETTER MEM
+    '\u05df'   #  0x008f -> HEBREW LETTER FINAL NUN
+    '\u05e0'   #  0x0090 -> HEBREW LETTER NUN
+    '\u05e1'   #  0x0091 -> HEBREW LETTER SAMEKH
+    '\u05e2'   #  0x0092 -> HEBREW LETTER AYIN
+    '\u05e3'   #  0x0093 -> HEBREW LETTER FINAL PE
+    '\u05e4'   #  0x0094 -> HEBREW LETTER PE
+    '\u05e5'   #  0x0095 -> HEBREW LETTER FINAL TSADI
+    '\u05e6'   #  0x0096 -> HEBREW LETTER TSADI
+    '\u05e7'   #  0x0097 -> HEBREW LETTER QOF
+    '\u05e8'   #  0x0098 -> HEBREW LETTER RESH
+    '\u05e9'   #  0x0099 -> HEBREW LETTER SHIN
+    '\u05ea'   #  0x009a -> HEBREW LETTER TAV
+    '\xa2'     #  0x009b -> CENT SIGN
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xa5'     #  0x009d -> YEN SIGN
+    '\u20a7'   #  0x009e -> PESETA SIGN
+    '\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+    '\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
+    '\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
+    '\u2310'   #  0x00a9 -> REVERSED NOT SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S (GERMAN)
+    '\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
+    '\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
+    '\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
+    '\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
+    '\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
+    '\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
+    '\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
+    '\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
+    '\u221e'   #  0x00ec -> INFINITY
+    '\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
+    '\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
+    '\u2229'   #  0x00ef -> INTERSECTION
+    '\u2261'   #  0x00f0 -> IDENTICAL TO
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
+    '\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
+    '\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
+    '\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u2248'   #  0x00f7 -> ALMOST EQUAL TO
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp863.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp863.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp863.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xc2'     #  0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xb6'     #  0x0086 -> PILCROW SIGN
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\u2017'   #  0x008d -> DOUBLE LOW LINE
-    u'\xc0'     #  0x008e -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xa7'     #  0x008f -> SECTION SIGN
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xc8'     #  0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xca'     #  0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xcb'     #  0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcf'     #  0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xa4'     #  0x0098 -> CURRENCY SIGN
-    u'\xd4'     #  0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xa2'     #  0x009b -> CENT SIGN
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xd9'     #  0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xdb'     #  0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
-    u'\xa6'     #  0x00a0 -> BROKEN BAR
-    u'\xb4'     #  0x00a1 -> ACUTE ACCENT
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xa8'     #  0x00a4 -> DIAERESIS
-    u'\xb8'     #  0x00a5 -> CEDILLA
-    u'\xb3'     #  0x00a6 -> SUPERSCRIPT THREE
-    u'\xaf'     #  0x00a7 -> MACRON
-    u'\xce'     #  0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\u2310'   #  0x00a9 -> REVERSED NOT SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xbe'     #  0x00ad -> VULGAR FRACTION THREE QUARTERS
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
-    u'\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
-    u'\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
-    u'\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
-    u'\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
-    u'\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
-    u'\u221e'   #  0x00ec -> INFINITY
-    u'\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
-    u'\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
-    u'\u2229'   #  0x00ef -> INTERSECTION
-    u'\u2261'   #  0x00f0 -> IDENTICAL TO
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
-    u'\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
-    u'\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
-    u'\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u2248'   #  0x00f7 -> ALMOST EQUAL TO
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xc2'     #  0x0084 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xb6'     #  0x0086 -> PILCROW SIGN
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
+    '\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\u2017'   #  0x008d -> DOUBLE LOW LINE
+    '\xc0'     #  0x008e -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xa7'     #  0x008f -> SECTION SIGN
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xc8'     #  0x0091 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xca'     #  0x0092 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xcb'     #  0x0094 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcf'     #  0x0095 -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xa4'     #  0x0098 -> CURRENCY SIGN
+    '\xd4'     #  0x0099 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xa2'     #  0x009b -> CENT SIGN
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xd9'     #  0x009d -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xdb'     #  0x009e -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
+    '\xa6'     #  0x00a0 -> BROKEN BAR
+    '\xb4'     #  0x00a1 -> ACUTE ACCENT
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xa8'     #  0x00a4 -> DIAERESIS
+    '\xb8'     #  0x00a5 -> CEDILLA
+    '\xb3'     #  0x00a6 -> SUPERSCRIPT THREE
+    '\xaf'     #  0x00a7 -> MACRON
+    '\xce'     #  0x00a8 -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\u2310'   #  0x00a9 -> REVERSED NOT SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xbe'     #  0x00ad -> VULGAR FRACTION THREE QUARTERS
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
+    '\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
+    '\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
+    '\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
+    '\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
+    '\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
+    '\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
+    '\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
+    '\u221e'   #  0x00ec -> INFINITY
+    '\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
+    '\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
+    '\u2229'   #  0x00ef -> INTERSECTION
+    '\u2261'   #  0x00f0 -> IDENTICAL TO
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
+    '\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
+    '\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
+    '\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u2248'   #  0x00f7 -> ALMOST EQUAL TO
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp864.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp864.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp864.py	Wed May  2 21:09:54 2007
@@ -176,262 +176,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'\u066a'   #  0x0025 -> ARABIC PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xb0'     #  0x0080 -> DEGREE SIGN
-    u'\xb7'     #  0x0081 -> MIDDLE DOT
-    u'\u2219'   #  0x0082 -> BULLET OPERATOR
-    u'\u221a'   #  0x0083 -> SQUARE ROOT
-    u'\u2592'   #  0x0084 -> MEDIUM SHADE
-    u'\u2500'   #  0x0085 -> FORMS LIGHT HORIZONTAL
-    u'\u2502'   #  0x0086 -> FORMS LIGHT VERTICAL
-    u'\u253c'   #  0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL
-    u'\u2524'   #  0x0088 -> FORMS LIGHT VERTICAL AND LEFT
-    u'\u252c'   #  0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x008a -> FORMS LIGHT VERTICAL AND RIGHT
-    u'\u2534'   #  0x008b -> FORMS LIGHT UP AND HORIZONTAL
-    u'\u2510'   #  0x008c -> FORMS LIGHT DOWN AND LEFT
-    u'\u250c'   #  0x008d -> FORMS LIGHT DOWN AND RIGHT
-    u'\u2514'   #  0x008e -> FORMS LIGHT UP AND RIGHT
-    u'\u2518'   #  0x008f -> FORMS LIGHT UP AND LEFT
-    u'\u03b2'   #  0x0090 -> GREEK SMALL BETA
-    u'\u221e'   #  0x0091 -> INFINITY
-    u'\u03c6'   #  0x0092 -> GREEK SMALL PHI
-    u'\xb1'     #  0x0093 -> PLUS-OR-MINUS SIGN
-    u'\xbd'     #  0x0094 -> FRACTION 1/2
-    u'\xbc'     #  0x0095 -> FRACTION 1/4
-    u'\u2248'   #  0x0096 -> ALMOST EQUAL TO
-    u'\xab'     #  0x0097 -> LEFT POINTING GUILLEMET
-    u'\xbb'     #  0x0098 -> RIGHT POINTING GUILLEMET
-    u'\ufef7'   #  0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
-    u'\ufef8'   #  0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
-    u'\ufffe'   #  0x009b -> UNDEFINED
-    u'\ufffe'   #  0x009c -> UNDEFINED
-    u'\ufefb'   #  0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
-    u'\ufefc'   #  0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM
-    u'\ufffe'   #  0x009f -> UNDEFINED
-    u'\xa0'     #  0x00a0 -> NON-BREAKING SPACE
-    u'\xad'     #  0x00a1 -> SOFT HYPHEN
-    u'\ufe82'   #  0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
-    u'\xa3'     #  0x00a3 -> POUND SIGN
-    u'\xa4'     #  0x00a4 -> CURRENCY SIGN
-    u'\ufe84'   #  0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
-    u'\ufffe'   #  0x00a6 -> UNDEFINED
-    u'\ufffe'   #  0x00a7 -> UNDEFINED
-    u'\ufe8e'   #  0x00a8 -> ARABIC LETTER ALEF FINAL FORM
-    u'\ufe8f'   #  0x00a9 -> ARABIC LETTER BEH ISOLATED FORM
-    u'\ufe95'   #  0x00aa -> ARABIC LETTER TEH ISOLATED FORM
-    u'\ufe99'   #  0x00ab -> ARABIC LETTER THEH ISOLATED FORM
-    u'\u060c'   #  0x00ac -> ARABIC COMMA
-    u'\ufe9d'   #  0x00ad -> ARABIC LETTER JEEM ISOLATED FORM
-    u'\ufea1'   #  0x00ae -> ARABIC LETTER HAH ISOLATED FORM
-    u'\ufea5'   #  0x00af -> ARABIC LETTER KHAH ISOLATED FORM
-    u'\u0660'   #  0x00b0 -> ARABIC-INDIC DIGIT ZERO
-    u'\u0661'   #  0x00b1 -> ARABIC-INDIC DIGIT ONE
-    u'\u0662'   #  0x00b2 -> ARABIC-INDIC DIGIT TWO
-    u'\u0663'   #  0x00b3 -> ARABIC-INDIC DIGIT THREE
-    u'\u0664'   #  0x00b4 -> ARABIC-INDIC DIGIT FOUR
-    u'\u0665'   #  0x00b5 -> ARABIC-INDIC DIGIT FIVE
-    u'\u0666'   #  0x00b6 -> ARABIC-INDIC DIGIT SIX
-    u'\u0667'   #  0x00b7 -> ARABIC-INDIC DIGIT SEVEN
-    u'\u0668'   #  0x00b8 -> ARABIC-INDIC DIGIT EIGHT
-    u'\u0669'   #  0x00b9 -> ARABIC-INDIC DIGIT NINE
-    u'\ufed1'   #  0x00ba -> ARABIC LETTER FEH ISOLATED FORM
-    u'\u061b'   #  0x00bb -> ARABIC SEMICOLON
-    u'\ufeb1'   #  0x00bc -> ARABIC LETTER SEEN ISOLATED FORM
-    u'\ufeb5'   #  0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM
-    u'\ufeb9'   #  0x00be -> ARABIC LETTER SAD ISOLATED FORM
-    u'\u061f'   #  0x00bf -> ARABIC QUESTION MARK
-    u'\xa2'     #  0x00c0 -> CENT SIGN
-    u'\ufe80'   #  0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM
-    u'\ufe81'   #  0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
-    u'\ufe83'   #  0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
-    u'\ufe85'   #  0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
-    u'\ufeca'   #  0x00c5 -> ARABIC LETTER AIN FINAL FORM
-    u'\ufe8b'   #  0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
-    u'\ufe8d'   #  0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM
-    u'\ufe91'   #  0x00c8 -> ARABIC LETTER BEH INITIAL FORM
-    u'\ufe93'   #  0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
-    u'\ufe97'   #  0x00ca -> ARABIC LETTER TEH INITIAL FORM
-    u'\ufe9b'   #  0x00cb -> ARABIC LETTER THEH INITIAL FORM
-    u'\ufe9f'   #  0x00cc -> ARABIC LETTER JEEM INITIAL FORM
-    u'\ufea3'   #  0x00cd -> ARABIC LETTER HAH INITIAL FORM
-    u'\ufea7'   #  0x00ce -> ARABIC LETTER KHAH INITIAL FORM
-    u'\ufea9'   #  0x00cf -> ARABIC LETTER DAL ISOLATED FORM
-    u'\ufeab'   #  0x00d0 -> ARABIC LETTER THAL ISOLATED FORM
-    u'\ufead'   #  0x00d1 -> ARABIC LETTER REH ISOLATED FORM
-    u'\ufeaf'   #  0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM
-    u'\ufeb3'   #  0x00d3 -> ARABIC LETTER SEEN INITIAL FORM
-    u'\ufeb7'   #  0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM
-    u'\ufebb'   #  0x00d5 -> ARABIC LETTER SAD INITIAL FORM
-    u'\ufebf'   #  0x00d6 -> ARABIC LETTER DAD INITIAL FORM
-    u'\ufec1'   #  0x00d7 -> ARABIC LETTER TAH ISOLATED FORM
-    u'\ufec5'   #  0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM
-    u'\ufecb'   #  0x00d9 -> ARABIC LETTER AIN INITIAL FORM
-    u'\ufecf'   #  0x00da -> ARABIC LETTER GHAIN INITIAL FORM
-    u'\xa6'     #  0x00db -> BROKEN VERTICAL BAR
-    u'\xac'     #  0x00dc -> NOT SIGN
-    u'\xf7'     #  0x00dd -> DIVISION SIGN
-    u'\xd7'     #  0x00de -> MULTIPLICATION SIGN
-    u'\ufec9'   #  0x00df -> ARABIC LETTER AIN ISOLATED FORM
-    u'\u0640'   #  0x00e0 -> ARABIC TATWEEL
-    u'\ufed3'   #  0x00e1 -> ARABIC LETTER FEH INITIAL FORM
-    u'\ufed7'   #  0x00e2 -> ARABIC LETTER QAF INITIAL FORM
-    u'\ufedb'   #  0x00e3 -> ARABIC LETTER KAF INITIAL FORM
-    u'\ufedf'   #  0x00e4 -> ARABIC LETTER LAM INITIAL FORM
-    u'\ufee3'   #  0x00e5 -> ARABIC LETTER MEEM INITIAL FORM
-    u'\ufee7'   #  0x00e6 -> ARABIC LETTER NOON INITIAL FORM
-    u'\ufeeb'   #  0x00e7 -> ARABIC LETTER HEH INITIAL FORM
-    u'\ufeed'   #  0x00e8 -> ARABIC LETTER WAW ISOLATED FORM
-    u'\ufeef'   #  0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM
-    u'\ufef3'   #  0x00ea -> ARABIC LETTER YEH INITIAL FORM
-    u'\ufebd'   #  0x00eb -> ARABIC LETTER DAD ISOLATED FORM
-    u'\ufecc'   #  0x00ec -> ARABIC LETTER AIN MEDIAL FORM
-    u'\ufece'   #  0x00ed -> ARABIC LETTER GHAIN FINAL FORM
-    u'\ufecd'   #  0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM
-    u'\ufee1'   #  0x00ef -> ARABIC LETTER MEEM ISOLATED FORM
-    u'\ufe7d'   #  0x00f0 -> ARABIC SHADDA MEDIAL FORM
-    u'\u0651'   #  0x00f1 -> ARABIC SHADDAH
-    u'\ufee5'   #  0x00f2 -> ARABIC LETTER NOON ISOLATED FORM
-    u'\ufee9'   #  0x00f3 -> ARABIC LETTER HEH ISOLATED FORM
-    u'\ufeec'   #  0x00f4 -> ARABIC LETTER HEH MEDIAL FORM
-    u'\ufef0'   #  0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM
-    u'\ufef2'   #  0x00f6 -> ARABIC LETTER YEH FINAL FORM
-    u'\ufed0'   #  0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM
-    u'\ufed5'   #  0x00f8 -> ARABIC LETTER QAF ISOLATED FORM
-    u'\ufef5'   #  0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
-    u'\ufef6'   #  0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
-    u'\ufedd'   #  0x00fb -> ARABIC LETTER LAM ISOLATED FORM
-    u'\ufed9'   #  0x00fc -> ARABIC LETTER KAF ISOLATED FORM
-    u'\ufef1'   #  0x00fd -> ARABIC LETTER YEH ISOLATED FORM
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\ufffe'   #  0x00ff -> UNDEFINED
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '\u066a'   #  0x0025 -> ARABIC PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xb0'     #  0x0080 -> DEGREE SIGN
+    '\xb7'     #  0x0081 -> MIDDLE DOT
+    '\u2219'   #  0x0082 -> BULLET OPERATOR
+    '\u221a'   #  0x0083 -> SQUARE ROOT
+    '\u2592'   #  0x0084 -> MEDIUM SHADE
+    '\u2500'   #  0x0085 -> FORMS LIGHT HORIZONTAL
+    '\u2502'   #  0x0086 -> FORMS LIGHT VERTICAL
+    '\u253c'   #  0x0087 -> FORMS LIGHT VERTICAL AND HORIZONTAL
+    '\u2524'   #  0x0088 -> FORMS LIGHT VERTICAL AND LEFT
+    '\u252c'   #  0x0089 -> FORMS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x008a -> FORMS LIGHT VERTICAL AND RIGHT
+    '\u2534'   #  0x008b -> FORMS LIGHT UP AND HORIZONTAL
+    '\u2510'   #  0x008c -> FORMS LIGHT DOWN AND LEFT
+    '\u250c'   #  0x008d -> FORMS LIGHT DOWN AND RIGHT
+    '\u2514'   #  0x008e -> FORMS LIGHT UP AND RIGHT
+    '\u2518'   #  0x008f -> FORMS LIGHT UP AND LEFT
+    '\u03b2'   #  0x0090 -> GREEK SMALL BETA
+    '\u221e'   #  0x0091 -> INFINITY
+    '\u03c6'   #  0x0092 -> GREEK SMALL PHI
+    '\xb1'     #  0x0093 -> PLUS-OR-MINUS SIGN
+    '\xbd'     #  0x0094 -> FRACTION 1/2
+    '\xbc'     #  0x0095 -> FRACTION 1/4
+    '\u2248'   #  0x0096 -> ALMOST EQUAL TO
+    '\xab'     #  0x0097 -> LEFT POINTING GUILLEMET
+    '\xbb'     #  0x0098 -> RIGHT POINTING GUILLEMET
+    '\ufef7'   #  0x0099 -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE ISOLATED FORM
+    '\ufef8'   #  0x009a -> ARABIC LIGATURE LAM WITH ALEF WITH HAMZA ABOVE FINAL FORM
+    '\ufffe'   #  0x009b -> UNDEFINED
+    '\ufffe'   #  0x009c -> UNDEFINED
+    '\ufefb'   #  0x009d -> ARABIC LIGATURE LAM WITH ALEF ISOLATED FORM
+    '\ufefc'   #  0x009e -> ARABIC LIGATURE LAM WITH ALEF FINAL FORM
+    '\ufffe'   #  0x009f -> UNDEFINED
+    '\xa0'     #  0x00a0 -> NON-BREAKING SPACE
+    '\xad'     #  0x00a1 -> SOFT HYPHEN
+    '\ufe82'   #  0x00a2 -> ARABIC LETTER ALEF WITH MADDA ABOVE FINAL FORM
+    '\xa3'     #  0x00a3 -> POUND SIGN
+    '\xa4'     #  0x00a4 -> CURRENCY SIGN
+    '\ufe84'   #  0x00a5 -> ARABIC LETTER ALEF WITH HAMZA ABOVE FINAL FORM
+    '\ufffe'   #  0x00a6 -> UNDEFINED
+    '\ufffe'   #  0x00a7 -> UNDEFINED
+    '\ufe8e'   #  0x00a8 -> ARABIC LETTER ALEF FINAL FORM
+    '\ufe8f'   #  0x00a9 -> ARABIC LETTER BEH ISOLATED FORM
+    '\ufe95'   #  0x00aa -> ARABIC LETTER TEH ISOLATED FORM
+    '\ufe99'   #  0x00ab -> ARABIC LETTER THEH ISOLATED FORM
+    '\u060c'   #  0x00ac -> ARABIC COMMA
+    '\ufe9d'   #  0x00ad -> ARABIC LETTER JEEM ISOLATED FORM
+    '\ufea1'   #  0x00ae -> ARABIC LETTER HAH ISOLATED FORM
+    '\ufea5'   #  0x00af -> ARABIC LETTER KHAH ISOLATED FORM
+    '\u0660'   #  0x00b0 -> ARABIC-INDIC DIGIT ZERO
+    '\u0661'   #  0x00b1 -> ARABIC-INDIC DIGIT ONE
+    '\u0662'   #  0x00b2 -> ARABIC-INDIC DIGIT TWO
+    '\u0663'   #  0x00b3 -> ARABIC-INDIC DIGIT THREE
+    '\u0664'   #  0x00b4 -> ARABIC-INDIC DIGIT FOUR
+    '\u0665'   #  0x00b5 -> ARABIC-INDIC DIGIT FIVE
+    '\u0666'   #  0x00b6 -> ARABIC-INDIC DIGIT SIX
+    '\u0667'   #  0x00b7 -> ARABIC-INDIC DIGIT SEVEN
+    '\u0668'   #  0x00b8 -> ARABIC-INDIC DIGIT EIGHT
+    '\u0669'   #  0x00b9 -> ARABIC-INDIC DIGIT NINE
+    '\ufed1'   #  0x00ba -> ARABIC LETTER FEH ISOLATED FORM
+    '\u061b'   #  0x00bb -> ARABIC SEMICOLON
+    '\ufeb1'   #  0x00bc -> ARABIC LETTER SEEN ISOLATED FORM
+    '\ufeb5'   #  0x00bd -> ARABIC LETTER SHEEN ISOLATED FORM
+    '\ufeb9'   #  0x00be -> ARABIC LETTER SAD ISOLATED FORM
+    '\u061f'   #  0x00bf -> ARABIC QUESTION MARK
+    '\xa2'     #  0x00c0 -> CENT SIGN
+    '\ufe80'   #  0x00c1 -> ARABIC LETTER HAMZA ISOLATED FORM
+    '\ufe81'   #  0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE ISOLATED FORM
+    '\ufe83'   #  0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE ISOLATED FORM
+    '\ufe85'   #  0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE ISOLATED FORM
+    '\ufeca'   #  0x00c5 -> ARABIC LETTER AIN FINAL FORM
+    '\ufe8b'   #  0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE INITIAL FORM
+    '\ufe8d'   #  0x00c7 -> ARABIC LETTER ALEF ISOLATED FORM
+    '\ufe91'   #  0x00c8 -> ARABIC LETTER BEH INITIAL FORM
+    '\ufe93'   #  0x00c9 -> ARABIC LETTER TEH MARBUTA ISOLATED FORM
+    '\ufe97'   #  0x00ca -> ARABIC LETTER TEH INITIAL FORM
+    '\ufe9b'   #  0x00cb -> ARABIC LETTER THEH INITIAL FORM
+    '\ufe9f'   #  0x00cc -> ARABIC LETTER JEEM INITIAL FORM
+    '\ufea3'   #  0x00cd -> ARABIC LETTER HAH INITIAL FORM
+    '\ufea7'   #  0x00ce -> ARABIC LETTER KHAH INITIAL FORM
+    '\ufea9'   #  0x00cf -> ARABIC LETTER DAL ISOLATED FORM
+    '\ufeab'   #  0x00d0 -> ARABIC LETTER THAL ISOLATED FORM
+    '\ufead'   #  0x00d1 -> ARABIC LETTER REH ISOLATED FORM
+    '\ufeaf'   #  0x00d2 -> ARABIC LETTER ZAIN ISOLATED FORM
+    '\ufeb3'   #  0x00d3 -> ARABIC LETTER SEEN INITIAL FORM
+    '\ufeb7'   #  0x00d4 -> ARABIC LETTER SHEEN INITIAL FORM
+    '\ufebb'   #  0x00d5 -> ARABIC LETTER SAD INITIAL FORM
+    '\ufebf'   #  0x00d6 -> ARABIC LETTER DAD INITIAL FORM
+    '\ufec1'   #  0x00d7 -> ARABIC LETTER TAH ISOLATED FORM
+    '\ufec5'   #  0x00d8 -> ARABIC LETTER ZAH ISOLATED FORM
+    '\ufecb'   #  0x00d9 -> ARABIC LETTER AIN INITIAL FORM
+    '\ufecf'   #  0x00da -> ARABIC LETTER GHAIN INITIAL FORM
+    '\xa6'     #  0x00db -> BROKEN VERTICAL BAR
+    '\xac'     #  0x00dc -> NOT SIGN
+    '\xf7'     #  0x00dd -> DIVISION SIGN
+    '\xd7'     #  0x00de -> MULTIPLICATION SIGN
+    '\ufec9'   #  0x00df -> ARABIC LETTER AIN ISOLATED FORM
+    '\u0640'   #  0x00e0 -> ARABIC TATWEEL
+    '\ufed3'   #  0x00e1 -> ARABIC LETTER FEH INITIAL FORM
+    '\ufed7'   #  0x00e2 -> ARABIC LETTER QAF INITIAL FORM
+    '\ufedb'   #  0x00e3 -> ARABIC LETTER KAF INITIAL FORM
+    '\ufedf'   #  0x00e4 -> ARABIC LETTER LAM INITIAL FORM
+    '\ufee3'   #  0x00e5 -> ARABIC LETTER MEEM INITIAL FORM
+    '\ufee7'   #  0x00e6 -> ARABIC LETTER NOON INITIAL FORM
+    '\ufeeb'   #  0x00e7 -> ARABIC LETTER HEH INITIAL FORM
+    '\ufeed'   #  0x00e8 -> ARABIC LETTER WAW ISOLATED FORM
+    '\ufeef'   #  0x00e9 -> ARABIC LETTER ALEF MAKSURA ISOLATED FORM
+    '\ufef3'   #  0x00ea -> ARABIC LETTER YEH INITIAL FORM
+    '\ufebd'   #  0x00eb -> ARABIC LETTER DAD ISOLATED FORM
+    '\ufecc'   #  0x00ec -> ARABIC LETTER AIN MEDIAL FORM
+    '\ufece'   #  0x00ed -> ARABIC LETTER GHAIN FINAL FORM
+    '\ufecd'   #  0x00ee -> ARABIC LETTER GHAIN ISOLATED FORM
+    '\ufee1'   #  0x00ef -> ARABIC LETTER MEEM ISOLATED FORM
+    '\ufe7d'   #  0x00f0 -> ARABIC SHADDA MEDIAL FORM
+    '\u0651'   #  0x00f1 -> ARABIC SHADDAH
+    '\ufee5'   #  0x00f2 -> ARABIC LETTER NOON ISOLATED FORM
+    '\ufee9'   #  0x00f3 -> ARABIC LETTER HEH ISOLATED FORM
+    '\ufeec'   #  0x00f4 -> ARABIC LETTER HEH MEDIAL FORM
+    '\ufef0'   #  0x00f5 -> ARABIC LETTER ALEF MAKSURA FINAL FORM
+    '\ufef2'   #  0x00f6 -> ARABIC LETTER YEH FINAL FORM
+    '\ufed0'   #  0x00f7 -> ARABIC LETTER GHAIN MEDIAL FORM
+    '\ufed5'   #  0x00f8 -> ARABIC LETTER QAF ISOLATED FORM
+    '\ufef5'   #  0x00f9 -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE ISOLATED FORM
+    '\ufef6'   #  0x00fa -> ARABIC LIGATURE LAM WITH ALEF WITH MADDA ABOVE FINAL FORM
+    '\ufedd'   #  0x00fb -> ARABIC LETTER LAM ISOLATED FORM
+    '\ufed9'   #  0x00fc -> ARABIC LETTER KAF ISOLATED FORM
+    '\ufef1'   #  0x00fd -> ARABIC LETTER YEH ISOLATED FORM
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\ufffe'   #  0x00ff -> UNDEFINED
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp865.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp865.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp865.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
-    u'\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
-    u'\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xff'     #  0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u20a7'   #  0x009e -> PESETA SIGN
-    u'\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
-    u'\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
-    u'\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
-    u'\u2310'   #  0x00a9 -> REVERSED NOT SIGN
-    u'\xac'     #  0x00aa -> NOT SIGN
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
-    u'\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xa4'     #  0x00af -> CURRENCY SIGN
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
-    u'\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
-    u'\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
-    u'\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
-    u'\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
-    u'\xb5'     #  0x00e6 -> MICRO SIGN
-    u'\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
-    u'\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
-    u'\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
-    u'\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
-    u'\u221e'   #  0x00ec -> INFINITY
-    u'\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
-    u'\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
-    u'\u2229'   #  0x00ef -> INTERSECTION
-    u'\u2261'   #  0x00f0 -> IDENTICAL TO
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
-    u'\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
-    u'\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
-    u'\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
-    u'\xf7'     #  0x00f6 -> DIVISION SIGN
-    u'\u2248'   #  0x00f7 -> ALMOST EQUAL TO
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
-    u'\xb2'     #  0x00fd -> SUPERSCRIPT TWO
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\xc7'     #  0x0080 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xfc'     #  0x0081 -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xe9'     #  0x0082 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe2'     #  0x0083 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x0084 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe0'     #  0x0085 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe5'     #  0x0086 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x0087 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xea'     #  0x0088 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x0089 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xe8'     #  0x008a -> LATIN SMALL LETTER E WITH GRAVE
+    '\xef'     #  0x008b -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xee'     #  0x008c -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xec'     #  0x008d -> LATIN SMALL LETTER I WITH GRAVE
+    '\xc4'     #  0x008e -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x008f -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc9'     #  0x0090 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xe6'     #  0x0091 -> LATIN SMALL LIGATURE AE
+    '\xc6'     #  0x0092 -> LATIN CAPITAL LIGATURE AE
+    '\xf4'     #  0x0093 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x0094 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf2'     #  0x0095 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xfb'     #  0x0096 -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xf9'     #  0x0097 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xff'     #  0x0098 -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\xd6'     #  0x0099 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x009a -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xf8'     #  0x009b -> LATIN SMALL LETTER O WITH STROKE
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\xd8'     #  0x009d -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u20a7'   #  0x009e -> PESETA SIGN
+    '\u0192'   #  0x009f -> LATIN SMALL LETTER F WITH HOOK
+    '\xe1'     #  0x00a0 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xed'     #  0x00a1 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xf3'     #  0x00a2 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xfa'     #  0x00a3 -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf1'     #  0x00a4 -> LATIN SMALL LETTER N WITH TILDE
+    '\xd1'     #  0x00a5 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xaa'     #  0x00a6 -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0x00a7 -> MASCULINE ORDINAL INDICATOR
+    '\xbf'     #  0x00a8 -> INVERTED QUESTION MARK
+    '\u2310'   #  0x00a9 -> REVERSED NOT SIGN
+    '\xac'     #  0x00aa -> NOT SIGN
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\xbc'     #  0x00ac -> VULGAR FRACTION ONE QUARTER
+    '\xa1'     #  0x00ad -> INVERTED EXCLAMATION MARK
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xa4'     #  0x00af -> CURRENCY SIGN
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03b1'   #  0x00e0 -> GREEK SMALL LETTER ALPHA
+    '\xdf'     #  0x00e1 -> LATIN SMALL LETTER SHARP S
+    '\u0393'   #  0x00e2 -> GREEK CAPITAL LETTER GAMMA
+    '\u03c0'   #  0x00e3 -> GREEK SMALL LETTER PI
+    '\u03a3'   #  0x00e4 -> GREEK CAPITAL LETTER SIGMA
+    '\u03c3'   #  0x00e5 -> GREEK SMALL LETTER SIGMA
+    '\xb5'     #  0x00e6 -> MICRO SIGN
+    '\u03c4'   #  0x00e7 -> GREEK SMALL LETTER TAU
+    '\u03a6'   #  0x00e8 -> GREEK CAPITAL LETTER PHI
+    '\u0398'   #  0x00e9 -> GREEK CAPITAL LETTER THETA
+    '\u03a9'   #  0x00ea -> GREEK CAPITAL LETTER OMEGA
+    '\u03b4'   #  0x00eb -> GREEK SMALL LETTER DELTA
+    '\u221e'   #  0x00ec -> INFINITY
+    '\u03c6'   #  0x00ed -> GREEK SMALL LETTER PHI
+    '\u03b5'   #  0x00ee -> GREEK SMALL LETTER EPSILON
+    '\u2229'   #  0x00ef -> INTERSECTION
+    '\u2261'   #  0x00f0 -> IDENTICAL TO
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u2265'   #  0x00f2 -> GREATER-THAN OR EQUAL TO
+    '\u2264'   #  0x00f3 -> LESS-THAN OR EQUAL TO
+    '\u2320'   #  0x00f4 -> TOP HALF INTEGRAL
+    '\u2321'   #  0x00f5 -> BOTTOM HALF INTEGRAL
+    '\xf7'     #  0x00f6 -> DIVISION SIGN
+    '\u2248'   #  0x00f7 -> ALMOST EQUAL TO
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u207f'   #  0x00fc -> SUPERSCRIPT LATIN SMALL LETTER N
+    '\xb2'     #  0x00fd -> SUPERSCRIPT TWO
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp866.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp866.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp866.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\u0410'   #  0x0080 -> CYRILLIC CAPITAL LETTER A
-    u'\u0411'   #  0x0081 -> CYRILLIC CAPITAL LETTER BE
-    u'\u0412'   #  0x0082 -> CYRILLIC CAPITAL LETTER VE
-    u'\u0413'   #  0x0083 -> CYRILLIC CAPITAL LETTER GHE
-    u'\u0414'   #  0x0084 -> CYRILLIC CAPITAL LETTER DE
-    u'\u0415'   #  0x0085 -> CYRILLIC CAPITAL LETTER IE
-    u'\u0416'   #  0x0086 -> CYRILLIC CAPITAL LETTER ZHE
-    u'\u0417'   #  0x0087 -> CYRILLIC CAPITAL LETTER ZE
-    u'\u0418'   #  0x0088 -> CYRILLIC CAPITAL LETTER I
-    u'\u0419'   #  0x0089 -> CYRILLIC CAPITAL LETTER SHORT I
-    u'\u041a'   #  0x008a -> CYRILLIC CAPITAL LETTER KA
-    u'\u041b'   #  0x008b -> CYRILLIC CAPITAL LETTER EL
-    u'\u041c'   #  0x008c -> CYRILLIC CAPITAL LETTER EM
-    u'\u041d'   #  0x008d -> CYRILLIC CAPITAL LETTER EN
-    u'\u041e'   #  0x008e -> CYRILLIC CAPITAL LETTER O
-    u'\u041f'   #  0x008f -> CYRILLIC CAPITAL LETTER PE
-    u'\u0420'   #  0x0090 -> CYRILLIC CAPITAL LETTER ER
-    u'\u0421'   #  0x0091 -> CYRILLIC CAPITAL LETTER ES
-    u'\u0422'   #  0x0092 -> CYRILLIC CAPITAL LETTER TE
-    u'\u0423'   #  0x0093 -> CYRILLIC CAPITAL LETTER U
-    u'\u0424'   #  0x0094 -> CYRILLIC CAPITAL LETTER EF
-    u'\u0425'   #  0x0095 -> CYRILLIC CAPITAL LETTER HA
-    u'\u0426'   #  0x0096 -> CYRILLIC CAPITAL LETTER TSE
-    u'\u0427'   #  0x0097 -> CYRILLIC CAPITAL LETTER CHE
-    u'\u0428'   #  0x0098 -> CYRILLIC CAPITAL LETTER SHA
-    u'\u0429'   #  0x0099 -> CYRILLIC CAPITAL LETTER SHCHA
-    u'\u042a'   #  0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN
-    u'\u042b'   #  0x009b -> CYRILLIC CAPITAL LETTER YERU
-    u'\u042c'   #  0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN
-    u'\u042d'   #  0x009d -> CYRILLIC CAPITAL LETTER E
-    u'\u042e'   #  0x009e -> CYRILLIC CAPITAL LETTER YU
-    u'\u042f'   #  0x009f -> CYRILLIC CAPITAL LETTER YA
-    u'\u0430'   #  0x00a0 -> CYRILLIC SMALL LETTER A
-    u'\u0431'   #  0x00a1 -> CYRILLIC SMALL LETTER BE
-    u'\u0432'   #  0x00a2 -> CYRILLIC SMALL LETTER VE
-    u'\u0433'   #  0x00a3 -> CYRILLIC SMALL LETTER GHE
-    u'\u0434'   #  0x00a4 -> CYRILLIC SMALL LETTER DE
-    u'\u0435'   #  0x00a5 -> CYRILLIC SMALL LETTER IE
-    u'\u0436'   #  0x00a6 -> CYRILLIC SMALL LETTER ZHE
-    u'\u0437'   #  0x00a7 -> CYRILLIC SMALL LETTER ZE
-    u'\u0438'   #  0x00a8 -> CYRILLIC SMALL LETTER I
-    u'\u0439'   #  0x00a9 -> CYRILLIC SMALL LETTER SHORT I
-    u'\u043a'   #  0x00aa -> CYRILLIC SMALL LETTER KA
-    u'\u043b'   #  0x00ab -> CYRILLIC SMALL LETTER EL
-    u'\u043c'   #  0x00ac -> CYRILLIC SMALL LETTER EM
-    u'\u043d'   #  0x00ad -> CYRILLIC SMALL LETTER EN
-    u'\u043e'   #  0x00ae -> CYRILLIC SMALL LETTER O
-    u'\u043f'   #  0x00af -> CYRILLIC SMALL LETTER PE
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u258c'   #  0x00dd -> LEFT HALF BLOCK
-    u'\u2590'   #  0x00de -> RIGHT HALF BLOCK
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u0440'   #  0x00e0 -> CYRILLIC SMALL LETTER ER
-    u'\u0441'   #  0x00e1 -> CYRILLIC SMALL LETTER ES
-    u'\u0442'   #  0x00e2 -> CYRILLIC SMALL LETTER TE
-    u'\u0443'   #  0x00e3 -> CYRILLIC SMALL LETTER U
-    u'\u0444'   #  0x00e4 -> CYRILLIC SMALL LETTER EF
-    u'\u0445'   #  0x00e5 -> CYRILLIC SMALL LETTER HA
-    u'\u0446'   #  0x00e6 -> CYRILLIC SMALL LETTER TSE
-    u'\u0447'   #  0x00e7 -> CYRILLIC SMALL LETTER CHE
-    u'\u0448'   #  0x00e8 -> CYRILLIC SMALL LETTER SHA
-    u'\u0449'   #  0x00e9 -> CYRILLIC SMALL LETTER SHCHA
-    u'\u044a'   #  0x00ea -> CYRILLIC SMALL LETTER HARD SIGN
-    u'\u044b'   #  0x00eb -> CYRILLIC SMALL LETTER YERU
-    u'\u044c'   #  0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN
-    u'\u044d'   #  0x00ed -> CYRILLIC SMALL LETTER E
-    u'\u044e'   #  0x00ee -> CYRILLIC SMALL LETTER YU
-    u'\u044f'   #  0x00ef -> CYRILLIC SMALL LETTER YA
-    u'\u0401'   #  0x00f0 -> CYRILLIC CAPITAL LETTER IO
-    u'\u0451'   #  0x00f1 -> CYRILLIC SMALL LETTER IO
-    u'\u0404'   #  0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
-    u'\u0454'   #  0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE
-    u'\u0407'   #  0x00f4 -> CYRILLIC CAPITAL LETTER YI
-    u'\u0457'   #  0x00f5 -> CYRILLIC SMALL LETTER YI
-    u'\u040e'   #  0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U
-    u'\u045e'   #  0x00f7 -> CYRILLIC SMALL LETTER SHORT U
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\u2219'   #  0x00f9 -> BULLET OPERATOR
-    u'\xb7'     #  0x00fa -> MIDDLE DOT
-    u'\u221a'   #  0x00fb -> SQUARE ROOT
-    u'\u2116'   #  0x00fc -> NUMERO SIGN
-    u'\xa4'     #  0x00fd -> CURRENCY SIGN
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\u0410'   #  0x0080 -> CYRILLIC CAPITAL LETTER A
+    '\u0411'   #  0x0081 -> CYRILLIC CAPITAL LETTER BE
+    '\u0412'   #  0x0082 -> CYRILLIC CAPITAL LETTER VE
+    '\u0413'   #  0x0083 -> CYRILLIC CAPITAL LETTER GHE
+    '\u0414'   #  0x0084 -> CYRILLIC CAPITAL LETTER DE
+    '\u0415'   #  0x0085 -> CYRILLIC CAPITAL LETTER IE
+    '\u0416'   #  0x0086 -> CYRILLIC CAPITAL LETTER ZHE
+    '\u0417'   #  0x0087 -> CYRILLIC CAPITAL LETTER ZE
+    '\u0418'   #  0x0088 -> CYRILLIC CAPITAL LETTER I
+    '\u0419'   #  0x0089 -> CYRILLIC CAPITAL LETTER SHORT I
+    '\u041a'   #  0x008a -> CYRILLIC CAPITAL LETTER KA
+    '\u041b'   #  0x008b -> CYRILLIC CAPITAL LETTER EL
+    '\u041c'   #  0x008c -> CYRILLIC CAPITAL LETTER EM
+    '\u041d'   #  0x008d -> CYRILLIC CAPITAL LETTER EN
+    '\u041e'   #  0x008e -> CYRILLIC CAPITAL LETTER O
+    '\u041f'   #  0x008f -> CYRILLIC CAPITAL LETTER PE
+    '\u0420'   #  0x0090 -> CYRILLIC CAPITAL LETTER ER
+    '\u0421'   #  0x0091 -> CYRILLIC CAPITAL LETTER ES
+    '\u0422'   #  0x0092 -> CYRILLIC CAPITAL LETTER TE
+    '\u0423'   #  0x0093 -> CYRILLIC CAPITAL LETTER U
+    '\u0424'   #  0x0094 -> CYRILLIC CAPITAL LETTER EF
+    '\u0425'   #  0x0095 -> CYRILLIC CAPITAL LETTER HA
+    '\u0426'   #  0x0096 -> CYRILLIC CAPITAL LETTER TSE
+    '\u0427'   #  0x0097 -> CYRILLIC CAPITAL LETTER CHE
+    '\u0428'   #  0x0098 -> CYRILLIC CAPITAL LETTER SHA
+    '\u0429'   #  0x0099 -> CYRILLIC CAPITAL LETTER SHCHA
+    '\u042a'   #  0x009a -> CYRILLIC CAPITAL LETTER HARD SIGN
+    '\u042b'   #  0x009b -> CYRILLIC CAPITAL LETTER YERU
+    '\u042c'   #  0x009c -> CYRILLIC CAPITAL LETTER SOFT SIGN
+    '\u042d'   #  0x009d -> CYRILLIC CAPITAL LETTER E
+    '\u042e'   #  0x009e -> CYRILLIC CAPITAL LETTER YU
+    '\u042f'   #  0x009f -> CYRILLIC CAPITAL LETTER YA
+    '\u0430'   #  0x00a0 -> CYRILLIC SMALL LETTER A
+    '\u0431'   #  0x00a1 -> CYRILLIC SMALL LETTER BE
+    '\u0432'   #  0x00a2 -> CYRILLIC SMALL LETTER VE
+    '\u0433'   #  0x00a3 -> CYRILLIC SMALL LETTER GHE
+    '\u0434'   #  0x00a4 -> CYRILLIC SMALL LETTER DE
+    '\u0435'   #  0x00a5 -> CYRILLIC SMALL LETTER IE
+    '\u0436'   #  0x00a6 -> CYRILLIC SMALL LETTER ZHE
+    '\u0437'   #  0x00a7 -> CYRILLIC SMALL LETTER ZE
+    '\u0438'   #  0x00a8 -> CYRILLIC SMALL LETTER I
+    '\u0439'   #  0x00a9 -> CYRILLIC SMALL LETTER SHORT I
+    '\u043a'   #  0x00aa -> CYRILLIC SMALL LETTER KA
+    '\u043b'   #  0x00ab -> CYRILLIC SMALL LETTER EL
+    '\u043c'   #  0x00ac -> CYRILLIC SMALL LETTER EM
+    '\u043d'   #  0x00ad -> CYRILLIC SMALL LETTER EN
+    '\u043e'   #  0x00ae -> CYRILLIC SMALL LETTER O
+    '\u043f'   #  0x00af -> CYRILLIC SMALL LETTER PE
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u2561'   #  0x00b5 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u2562'   #  0x00b6 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2556'   #  0x00b7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2555'   #  0x00b8 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255c'   #  0x00bd -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255b'   #  0x00be -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u255e'   #  0x00c6 -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0x00c7 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u2567'   #  0x00cf -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0x00d0 -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2564'   #  0x00d1 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0x00d2 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2559'   #  0x00d3 -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u2558'   #  0x00d4 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2552'   #  0x00d5 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u2553'   #  0x00d6 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u256b'   #  0x00d7 -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256a'   #  0x00d8 -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u258c'   #  0x00dd -> LEFT HALF BLOCK
+    '\u2590'   #  0x00de -> RIGHT HALF BLOCK
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u0440'   #  0x00e0 -> CYRILLIC SMALL LETTER ER
+    '\u0441'   #  0x00e1 -> CYRILLIC SMALL LETTER ES
+    '\u0442'   #  0x00e2 -> CYRILLIC SMALL LETTER TE
+    '\u0443'   #  0x00e3 -> CYRILLIC SMALL LETTER U
+    '\u0444'   #  0x00e4 -> CYRILLIC SMALL LETTER EF
+    '\u0445'   #  0x00e5 -> CYRILLIC SMALL LETTER HA
+    '\u0446'   #  0x00e6 -> CYRILLIC SMALL LETTER TSE
+    '\u0447'   #  0x00e7 -> CYRILLIC SMALL LETTER CHE
+    '\u0448'   #  0x00e8 -> CYRILLIC SMALL LETTER SHA
+    '\u0449'   #  0x00e9 -> CYRILLIC SMALL LETTER SHCHA
+    '\u044a'   #  0x00ea -> CYRILLIC SMALL LETTER HARD SIGN
+    '\u044b'   #  0x00eb -> CYRILLIC SMALL LETTER YERU
+    '\u044c'   #  0x00ec -> CYRILLIC SMALL LETTER SOFT SIGN
+    '\u044d'   #  0x00ed -> CYRILLIC SMALL LETTER E
+    '\u044e'   #  0x00ee -> CYRILLIC SMALL LETTER YU
+    '\u044f'   #  0x00ef -> CYRILLIC SMALL LETTER YA
+    '\u0401'   #  0x00f0 -> CYRILLIC CAPITAL LETTER IO
+    '\u0451'   #  0x00f1 -> CYRILLIC SMALL LETTER IO
+    '\u0404'   #  0x00f2 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    '\u0454'   #  0x00f3 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+    '\u0407'   #  0x00f4 -> CYRILLIC CAPITAL LETTER YI
+    '\u0457'   #  0x00f5 -> CYRILLIC SMALL LETTER YI
+    '\u040e'   #  0x00f6 -> CYRILLIC CAPITAL LETTER SHORT U
+    '\u045e'   #  0x00f7 -> CYRILLIC SMALL LETTER SHORT U
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\u2219'   #  0x00f9 -> BULLET OPERATOR
+    '\xb7'     #  0x00fa -> MIDDLE DOT
+    '\u221a'   #  0x00fb -> SQUARE ROOT
+    '\u2116'   #  0x00fc -> NUMERO SIGN
+    '\xa4'     #  0x00fd -> CURRENCY SIGN
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp869.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp869.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp869.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> NULL
-    u'\x01'     #  0x0001 -> START OF HEADING
-    u'\x02'     #  0x0002 -> START OF TEXT
-    u'\x03'     #  0x0003 -> END OF TEXT
-    u'\x04'     #  0x0004 -> END OF TRANSMISSION
-    u'\x05'     #  0x0005 -> ENQUIRY
-    u'\x06'     #  0x0006 -> ACKNOWLEDGE
-    u'\x07'     #  0x0007 -> BELL
-    u'\x08'     #  0x0008 -> BACKSPACE
-    u'\t'       #  0x0009 -> HORIZONTAL TABULATION
-    u'\n'       #  0x000a -> LINE FEED
-    u'\x0b'     #  0x000b -> VERTICAL TABULATION
-    u'\x0c'     #  0x000c -> FORM FEED
-    u'\r'       #  0x000d -> CARRIAGE RETURN
-    u'\x0e'     #  0x000e -> SHIFT OUT
-    u'\x0f'     #  0x000f -> SHIFT IN
-    u'\x10'     #  0x0010 -> DATA LINK ESCAPE
-    u'\x11'     #  0x0011 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x0012 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x0013 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x0014 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x0016 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x0018 -> CANCEL
-    u'\x19'     #  0x0019 -> END OF MEDIUM
-    u'\x1a'     #  0x001a -> SUBSTITUTE
-    u'\x1b'     #  0x001b -> ESCAPE
-    u'\x1c'     #  0x001c -> FILE SEPARATOR
-    u'\x1d'     #  0x001d -> GROUP SEPARATOR
-    u'\x1e'     #  0x001e -> RECORD SEPARATOR
-    u'\x1f'     #  0x001f -> UNIT SEPARATOR
-    u' '        #  0x0020 -> SPACE
-    u'!'        #  0x0021 -> EXCLAMATION MARK
-    u'"'        #  0x0022 -> QUOTATION MARK
-    u'#'        #  0x0023 -> NUMBER SIGN
-    u'$'        #  0x0024 -> DOLLAR SIGN
-    u'%'        #  0x0025 -> PERCENT SIGN
-    u'&'        #  0x0026 -> AMPERSAND
-    u"'"        #  0x0027 -> APOSTROPHE
-    u'('        #  0x0028 -> LEFT PARENTHESIS
-    u')'        #  0x0029 -> RIGHT PARENTHESIS
-    u'*'        #  0x002a -> ASTERISK
-    u'+'        #  0x002b -> PLUS SIGN
-    u','        #  0x002c -> COMMA
-    u'-'        #  0x002d -> HYPHEN-MINUS
-    u'.'        #  0x002e -> FULL STOP
-    u'/'        #  0x002f -> SOLIDUS
-    u'0'        #  0x0030 -> DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE
-    u':'        #  0x003a -> COLON
-    u';'        #  0x003b -> SEMICOLON
-    u'<'        #  0x003c -> LESS-THAN SIGN
-    u'='        #  0x003d -> EQUALS SIGN
-    u'>'        #  0x003e -> GREATER-THAN SIGN
-    u'?'        #  0x003f -> QUESTION MARK
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET
-    u'\\'       #  0x005c -> REVERSE SOLIDUS
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT
-    u'_'        #  0x005f -> LOW LINE
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET
-    u'|'        #  0x007c -> VERTICAL LINE
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> DELETE
-    u'\ufffe'   #  0x0080 -> UNDEFINED
-    u'\ufffe'   #  0x0081 -> UNDEFINED
-    u'\ufffe'   #  0x0082 -> UNDEFINED
-    u'\ufffe'   #  0x0083 -> UNDEFINED
-    u'\ufffe'   #  0x0084 -> UNDEFINED
-    u'\ufffe'   #  0x0085 -> UNDEFINED
-    u'\u0386'   #  0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
-    u'\ufffe'   #  0x0087 -> UNDEFINED
-    u'\xb7'     #  0x0088 -> MIDDLE DOT
-    u'\xac'     #  0x0089 -> NOT SIGN
-    u'\xa6'     #  0x008a -> BROKEN BAR
-    u'\u2018'   #  0x008b -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x008c -> RIGHT SINGLE QUOTATION MARK
-    u'\u0388'   #  0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS
-    u'\u2015'   #  0x008e -> HORIZONTAL BAR
-    u'\u0389'   #  0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS
-    u'\u038a'   #  0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS
-    u'\u03aa'   #  0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
-    u'\u038c'   #  0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
-    u'\ufffe'   #  0x0093 -> UNDEFINED
-    u'\ufffe'   #  0x0094 -> UNDEFINED
-    u'\u038e'   #  0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
-    u'\u03ab'   #  0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-    u'\xa9'     #  0x0097 -> COPYRIGHT SIGN
-    u'\u038f'   #  0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
-    u'\xb2'     #  0x0099 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0x009a -> SUPERSCRIPT THREE
-    u'\u03ac'   #  0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS
-    u'\xa3'     #  0x009c -> POUND SIGN
-    u'\u03ad'   #  0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS
-    u'\u03ae'   #  0x009e -> GREEK SMALL LETTER ETA WITH TONOS
-    u'\u03af'   #  0x009f -> GREEK SMALL LETTER IOTA WITH TONOS
-    u'\u03ca'   #  0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
-    u'\u0390'   #  0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
-    u'\u03cc'   #  0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS
-    u'\u03cd'   #  0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS
-    u'\u0391'   #  0x00a4 -> GREEK CAPITAL LETTER ALPHA
-    u'\u0392'   #  0x00a5 -> GREEK CAPITAL LETTER BETA
-    u'\u0393'   #  0x00a6 -> GREEK CAPITAL LETTER GAMMA
-    u'\u0394'   #  0x00a7 -> GREEK CAPITAL LETTER DELTA
-    u'\u0395'   #  0x00a8 -> GREEK CAPITAL LETTER EPSILON
-    u'\u0396'   #  0x00a9 -> GREEK CAPITAL LETTER ZETA
-    u'\u0397'   #  0x00aa -> GREEK CAPITAL LETTER ETA
-    u'\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
-    u'\u0398'   #  0x00ac -> GREEK CAPITAL LETTER THETA
-    u'\u0399'   #  0x00ad -> GREEK CAPITAL LETTER IOTA
-    u'\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2591'   #  0x00b0 -> LIGHT SHADE
-    u'\u2592'   #  0x00b1 -> MEDIUM SHADE
-    u'\u2593'   #  0x00b2 -> DARK SHADE
-    u'\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u039a'   #  0x00b5 -> GREEK CAPITAL LETTER KAPPA
-    u'\u039b'   #  0x00b6 -> GREEK CAPITAL LETTER LAMDA
-    u'\u039c'   #  0x00b7 -> GREEK CAPITAL LETTER MU
-    u'\u039d'   #  0x00b8 -> GREEK CAPITAL LETTER NU
-    u'\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u039e'   #  0x00bd -> GREEK CAPITAL LETTER XI
-    u'\u039f'   #  0x00be -> GREEK CAPITAL LETTER OMICRON
-    u'\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u03a0'   #  0x00c6 -> GREEK CAPITAL LETTER PI
-    u'\u03a1'   #  0x00c7 -> GREEK CAPITAL LETTER RHO
-    u'\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\u03a3'   #  0x00cf -> GREEK CAPITAL LETTER SIGMA
-    u'\u03a4'   #  0x00d0 -> GREEK CAPITAL LETTER TAU
-    u'\u03a5'   #  0x00d1 -> GREEK CAPITAL LETTER UPSILON
-    u'\u03a6'   #  0x00d2 -> GREEK CAPITAL LETTER PHI
-    u'\u03a7'   #  0x00d3 -> GREEK CAPITAL LETTER CHI
-    u'\u03a8'   #  0x00d4 -> GREEK CAPITAL LETTER PSI
-    u'\u03a9'   #  0x00d5 -> GREEK CAPITAL LETTER OMEGA
-    u'\u03b1'   #  0x00d6 -> GREEK SMALL LETTER ALPHA
-    u'\u03b2'   #  0x00d7 -> GREEK SMALL LETTER BETA
-    u'\u03b3'   #  0x00d8 -> GREEK SMALL LETTER GAMMA
-    u'\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2588'   #  0x00db -> FULL BLOCK
-    u'\u2584'   #  0x00dc -> LOWER HALF BLOCK
-    u'\u03b4'   #  0x00dd -> GREEK SMALL LETTER DELTA
-    u'\u03b5'   #  0x00de -> GREEK SMALL LETTER EPSILON
-    u'\u2580'   #  0x00df -> UPPER HALF BLOCK
-    u'\u03b6'   #  0x00e0 -> GREEK SMALL LETTER ZETA
-    u'\u03b7'   #  0x00e1 -> GREEK SMALL LETTER ETA
-    u'\u03b8'   #  0x00e2 -> GREEK SMALL LETTER THETA
-    u'\u03b9'   #  0x00e3 -> GREEK SMALL LETTER IOTA
-    u'\u03ba'   #  0x00e4 -> GREEK SMALL LETTER KAPPA
-    u'\u03bb'   #  0x00e5 -> GREEK SMALL LETTER LAMDA
-    u'\u03bc'   #  0x00e6 -> GREEK SMALL LETTER MU
-    u'\u03bd'   #  0x00e7 -> GREEK SMALL LETTER NU
-    u'\u03be'   #  0x00e8 -> GREEK SMALL LETTER XI
-    u'\u03bf'   #  0x00e9 -> GREEK SMALL LETTER OMICRON
-    u'\u03c0'   #  0x00ea -> GREEK SMALL LETTER PI
-    u'\u03c1'   #  0x00eb -> GREEK SMALL LETTER RHO
-    u'\u03c3'   #  0x00ec -> GREEK SMALL LETTER SIGMA
-    u'\u03c2'   #  0x00ed -> GREEK SMALL LETTER FINAL SIGMA
-    u'\u03c4'   #  0x00ee -> GREEK SMALL LETTER TAU
-    u'\u0384'   #  0x00ef -> GREEK TONOS
-    u'\xad'     #  0x00f0 -> SOFT HYPHEN
-    u'\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
-    u'\u03c5'   #  0x00f2 -> GREEK SMALL LETTER UPSILON
-    u'\u03c6'   #  0x00f3 -> GREEK SMALL LETTER PHI
-    u'\u03c7'   #  0x00f4 -> GREEK SMALL LETTER CHI
-    u'\xa7'     #  0x00f5 -> SECTION SIGN
-    u'\u03c8'   #  0x00f6 -> GREEK SMALL LETTER PSI
-    u'\u0385'   #  0x00f7 -> GREEK DIALYTIKA TONOS
-    u'\xb0'     #  0x00f8 -> DEGREE SIGN
-    u'\xa8'     #  0x00f9 -> DIAERESIS
-    u'\u03c9'   #  0x00fa -> GREEK SMALL LETTER OMEGA
-    u'\u03cb'   #  0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
-    u'\u03b0'   #  0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
-    u'\u03ce'   #  0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS
-    u'\u25a0'   #  0x00fe -> BLACK SQUARE
-    u'\xa0'     #  0x00ff -> NO-BREAK SPACE
+    '\x00'     #  0x0000 -> NULL
+    '\x01'     #  0x0001 -> START OF HEADING
+    '\x02'     #  0x0002 -> START OF TEXT
+    '\x03'     #  0x0003 -> END OF TEXT
+    '\x04'     #  0x0004 -> END OF TRANSMISSION
+    '\x05'     #  0x0005 -> ENQUIRY
+    '\x06'     #  0x0006 -> ACKNOWLEDGE
+    '\x07'     #  0x0007 -> BELL
+    '\x08'     #  0x0008 -> BACKSPACE
+    '\t'       #  0x0009 -> HORIZONTAL TABULATION
+    '\n'       #  0x000a -> LINE FEED
+    '\x0b'     #  0x000b -> VERTICAL TABULATION
+    '\x0c'     #  0x000c -> FORM FEED
+    '\r'       #  0x000d -> CARRIAGE RETURN
+    '\x0e'     #  0x000e -> SHIFT OUT
+    '\x0f'     #  0x000f -> SHIFT IN
+    '\x10'     #  0x0010 -> DATA LINK ESCAPE
+    '\x11'     #  0x0011 -> DEVICE CONTROL ONE
+    '\x12'     #  0x0012 -> DEVICE CONTROL TWO
+    '\x13'     #  0x0013 -> DEVICE CONTROL THREE
+    '\x14'     #  0x0014 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x0015 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x0016 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x0017 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x0018 -> CANCEL
+    '\x19'     #  0x0019 -> END OF MEDIUM
+    '\x1a'     #  0x001a -> SUBSTITUTE
+    '\x1b'     #  0x001b -> ESCAPE
+    '\x1c'     #  0x001c -> FILE SEPARATOR
+    '\x1d'     #  0x001d -> GROUP SEPARATOR
+    '\x1e'     #  0x001e -> RECORD SEPARATOR
+    '\x1f'     #  0x001f -> UNIT SEPARATOR
+    ' '        #  0x0020 -> SPACE
+    '!'        #  0x0021 -> EXCLAMATION MARK
+    '"'        #  0x0022 -> QUOTATION MARK
+    '#'        #  0x0023 -> NUMBER SIGN
+    '$'        #  0x0024 -> DOLLAR SIGN
+    '%'        #  0x0025 -> PERCENT SIGN
+    '&'        #  0x0026 -> AMPERSAND
+    "'"        #  0x0027 -> APOSTROPHE
+    '('        #  0x0028 -> LEFT PARENTHESIS
+    ')'        #  0x0029 -> RIGHT PARENTHESIS
+    '*'        #  0x002a -> ASTERISK
+    '+'        #  0x002b -> PLUS SIGN
+    ','        #  0x002c -> COMMA
+    '-'        #  0x002d -> HYPHEN-MINUS
+    '.'        #  0x002e -> FULL STOP
+    '/'        #  0x002f -> SOLIDUS
+    '0'        #  0x0030 -> DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE
+    ':'        #  0x003a -> COLON
+    ';'        #  0x003b -> SEMICOLON
+    '<'        #  0x003c -> LESS-THAN SIGN
+    '='        #  0x003d -> EQUALS SIGN
+    '>'        #  0x003e -> GREATER-THAN SIGN
+    '?'        #  0x003f -> QUESTION MARK
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET
+    '\\'       #  0x005c -> REVERSE SOLIDUS
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT
+    '_'        #  0x005f -> LOW LINE
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET
+    '|'        #  0x007c -> VERTICAL LINE
+    '}'        #  0x007d -> RIGHT CURLY BRACKET
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> DELETE
+    '\ufffe'   #  0x0080 -> UNDEFINED
+    '\ufffe'   #  0x0081 -> UNDEFINED
+    '\ufffe'   #  0x0082 -> UNDEFINED
+    '\ufffe'   #  0x0083 -> UNDEFINED
+    '\ufffe'   #  0x0084 -> UNDEFINED
+    '\ufffe'   #  0x0085 -> UNDEFINED
+    '\u0386'   #  0x0086 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+    '\ufffe'   #  0x0087 -> UNDEFINED
+    '\xb7'     #  0x0088 -> MIDDLE DOT
+    '\xac'     #  0x0089 -> NOT SIGN
+    '\xa6'     #  0x008a -> BROKEN BAR
+    '\u2018'   #  0x008b -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x008c -> RIGHT SINGLE QUOTATION MARK
+    '\u0388'   #  0x008d -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+    '\u2015'   #  0x008e -> HORIZONTAL BAR
+    '\u0389'   #  0x008f -> GREEK CAPITAL LETTER ETA WITH TONOS
+    '\u038a'   #  0x0090 -> GREEK CAPITAL LETTER IOTA WITH TONOS
+    '\u03aa'   #  0x0091 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+    '\u038c'   #  0x0092 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+    '\ufffe'   #  0x0093 -> UNDEFINED
+    '\ufffe'   #  0x0094 -> UNDEFINED
+    '\u038e'   #  0x0095 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+    '\u03ab'   #  0x0096 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+    '\xa9'     #  0x0097 -> COPYRIGHT SIGN
+    '\u038f'   #  0x0098 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+    '\xb2'     #  0x0099 -> SUPERSCRIPT TWO
+    '\xb3'     #  0x009a -> SUPERSCRIPT THREE
+    '\u03ac'   #  0x009b -> GREEK SMALL LETTER ALPHA WITH TONOS
+    '\xa3'     #  0x009c -> POUND SIGN
+    '\u03ad'   #  0x009d -> GREEK SMALL LETTER EPSILON WITH TONOS
+    '\u03ae'   #  0x009e -> GREEK SMALL LETTER ETA WITH TONOS
+    '\u03af'   #  0x009f -> GREEK SMALL LETTER IOTA WITH TONOS
+    '\u03ca'   #  0x00a0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+    '\u0390'   #  0x00a1 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+    '\u03cc'   #  0x00a2 -> GREEK SMALL LETTER OMICRON WITH TONOS
+    '\u03cd'   #  0x00a3 -> GREEK SMALL LETTER UPSILON WITH TONOS
+    '\u0391'   #  0x00a4 -> GREEK CAPITAL LETTER ALPHA
+    '\u0392'   #  0x00a5 -> GREEK CAPITAL LETTER BETA
+    '\u0393'   #  0x00a6 -> GREEK CAPITAL LETTER GAMMA
+    '\u0394'   #  0x00a7 -> GREEK CAPITAL LETTER DELTA
+    '\u0395'   #  0x00a8 -> GREEK CAPITAL LETTER EPSILON
+    '\u0396'   #  0x00a9 -> GREEK CAPITAL LETTER ZETA
+    '\u0397'   #  0x00aa -> GREEK CAPITAL LETTER ETA
+    '\xbd'     #  0x00ab -> VULGAR FRACTION ONE HALF
+    '\u0398'   #  0x00ac -> GREEK CAPITAL LETTER THETA
+    '\u0399'   #  0x00ad -> GREEK CAPITAL LETTER IOTA
+    '\xab'     #  0x00ae -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0x00af -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2591'   #  0x00b0 -> LIGHT SHADE
+    '\u2592'   #  0x00b1 -> MEDIUM SHADE
+    '\u2593'   #  0x00b2 -> DARK SHADE
+    '\u2502'   #  0x00b3 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u2524'   #  0x00b4 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u039a'   #  0x00b5 -> GREEK CAPITAL LETTER KAPPA
+    '\u039b'   #  0x00b6 -> GREEK CAPITAL LETTER LAMDA
+    '\u039c'   #  0x00b7 -> GREEK CAPITAL LETTER MU
+    '\u039d'   #  0x00b8 -> GREEK CAPITAL LETTER NU
+    '\u2563'   #  0x00b9 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2551'   #  0x00ba -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2557'   #  0x00bb -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u255d'   #  0x00bc -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u039e'   #  0x00bd -> GREEK CAPITAL LETTER XI
+    '\u039f'   #  0x00be -> GREEK CAPITAL LETTER OMICRON
+    '\u2510'   #  0x00bf -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x00c0 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2534'   #  0x00c1 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u252c'   #  0x00c2 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u251c'   #  0x00c3 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2500'   #  0x00c4 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u253c'   #  0x00c5 -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u03a0'   #  0x00c6 -> GREEK CAPITAL LETTER PI
+    '\u03a1'   #  0x00c7 -> GREEK CAPITAL LETTER RHO
+    '\u255a'   #  0x00c8 -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u2554'   #  0x00c9 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2569'   #  0x00ca -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u2566'   #  0x00cb -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2560'   #  0x00cc -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2550'   #  0x00cd -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u256c'   #  0x00ce -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\u03a3'   #  0x00cf -> GREEK CAPITAL LETTER SIGMA
+    '\u03a4'   #  0x00d0 -> GREEK CAPITAL LETTER TAU
+    '\u03a5'   #  0x00d1 -> GREEK CAPITAL LETTER UPSILON
+    '\u03a6'   #  0x00d2 -> GREEK CAPITAL LETTER PHI
+    '\u03a7'   #  0x00d3 -> GREEK CAPITAL LETTER CHI
+    '\u03a8'   #  0x00d4 -> GREEK CAPITAL LETTER PSI
+    '\u03a9'   #  0x00d5 -> GREEK CAPITAL LETTER OMEGA
+    '\u03b1'   #  0x00d6 -> GREEK SMALL LETTER ALPHA
+    '\u03b2'   #  0x00d7 -> GREEK SMALL LETTER BETA
+    '\u03b3'   #  0x00d8 -> GREEK SMALL LETTER GAMMA
+    '\u2518'   #  0x00d9 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u250c'   #  0x00da -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2588'   #  0x00db -> FULL BLOCK
+    '\u2584'   #  0x00dc -> LOWER HALF BLOCK
+    '\u03b4'   #  0x00dd -> GREEK SMALL LETTER DELTA
+    '\u03b5'   #  0x00de -> GREEK SMALL LETTER EPSILON
+    '\u2580'   #  0x00df -> UPPER HALF BLOCK
+    '\u03b6'   #  0x00e0 -> GREEK SMALL LETTER ZETA
+    '\u03b7'   #  0x00e1 -> GREEK SMALL LETTER ETA
+    '\u03b8'   #  0x00e2 -> GREEK SMALL LETTER THETA
+    '\u03b9'   #  0x00e3 -> GREEK SMALL LETTER IOTA
+    '\u03ba'   #  0x00e4 -> GREEK SMALL LETTER KAPPA
+    '\u03bb'   #  0x00e5 -> GREEK SMALL LETTER LAMDA
+    '\u03bc'   #  0x00e6 -> GREEK SMALL LETTER MU
+    '\u03bd'   #  0x00e7 -> GREEK SMALL LETTER NU
+    '\u03be'   #  0x00e8 -> GREEK SMALL LETTER XI
+    '\u03bf'   #  0x00e9 -> GREEK SMALL LETTER OMICRON
+    '\u03c0'   #  0x00ea -> GREEK SMALL LETTER PI
+    '\u03c1'   #  0x00eb -> GREEK SMALL LETTER RHO
+    '\u03c3'   #  0x00ec -> GREEK SMALL LETTER SIGMA
+    '\u03c2'   #  0x00ed -> GREEK SMALL LETTER FINAL SIGMA
+    '\u03c4'   #  0x00ee -> GREEK SMALL LETTER TAU
+    '\u0384'   #  0x00ef -> GREEK TONOS
+    '\xad'     #  0x00f0 -> SOFT HYPHEN
+    '\xb1'     #  0x00f1 -> PLUS-MINUS SIGN
+    '\u03c5'   #  0x00f2 -> GREEK SMALL LETTER UPSILON
+    '\u03c6'   #  0x00f3 -> GREEK SMALL LETTER PHI
+    '\u03c7'   #  0x00f4 -> GREEK SMALL LETTER CHI
+    '\xa7'     #  0x00f5 -> SECTION SIGN
+    '\u03c8'   #  0x00f6 -> GREEK SMALL LETTER PSI
+    '\u0385'   #  0x00f7 -> GREEK DIALYTIKA TONOS
+    '\xb0'     #  0x00f8 -> DEGREE SIGN
+    '\xa8'     #  0x00f9 -> DIAERESIS
+    '\u03c9'   #  0x00fa -> GREEK SMALL LETTER OMEGA
+    '\u03cb'   #  0x00fb -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+    '\u03b0'   #  0x00fc -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+    '\u03ce'   #  0x00fd -> GREEK SMALL LETTER OMEGA WITH TONOS
+    '\u25a0'   #  0x00fe -> BLACK SQUARE
+    '\xa0'     #  0x00ff -> NO-BREAK SPACE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/cp874.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp874.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp874.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u20ac'   #  0x80 -> EURO SIGN
-    u'\ufffe'   #  0x81 -> UNDEFINED
-    u'\ufffe'   #  0x82 -> UNDEFINED
-    u'\ufffe'   #  0x83 -> UNDEFINED
-    u'\ufffe'   #  0x84 -> UNDEFINED
-    u'\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
-    u'\ufffe'   #  0x86 -> UNDEFINED
-    u'\ufffe'   #  0x87 -> UNDEFINED
-    u'\ufffe'   #  0x88 -> UNDEFINED
-    u'\ufffe'   #  0x89 -> UNDEFINED
-    u'\ufffe'   #  0x8A -> UNDEFINED
-    u'\ufffe'   #  0x8B -> UNDEFINED
-    u'\ufffe'   #  0x8C -> UNDEFINED
-    u'\ufffe'   #  0x8D -> UNDEFINED
-    u'\ufffe'   #  0x8E -> UNDEFINED
-    u'\ufffe'   #  0x8F -> UNDEFINED
-    u'\ufffe'   #  0x90 -> UNDEFINED
-    u'\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
-    u'\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2022'   #  0x95 -> BULLET
-    u'\u2013'   #  0x96 -> EN DASH
-    u'\u2014'   #  0x97 -> EM DASH
-    u'\ufffe'   #  0x98 -> UNDEFINED
-    u'\ufffe'   #  0x99 -> UNDEFINED
-    u'\ufffe'   #  0x9A -> UNDEFINED
-    u'\ufffe'   #  0x9B -> UNDEFINED
-    u'\ufffe'   #  0x9C -> UNDEFINED
-    u'\ufffe'   #  0x9D -> UNDEFINED
-    u'\ufffe'   #  0x9E -> UNDEFINED
-    u'\ufffe'   #  0x9F -> UNDEFINED
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0e01'   #  0xA1 -> THAI CHARACTER KO KAI
-    u'\u0e02'   #  0xA2 -> THAI CHARACTER KHO KHAI
-    u'\u0e03'   #  0xA3 -> THAI CHARACTER KHO KHUAT
-    u'\u0e04'   #  0xA4 -> THAI CHARACTER KHO KHWAI
-    u'\u0e05'   #  0xA5 -> THAI CHARACTER KHO KHON
-    u'\u0e06'   #  0xA6 -> THAI CHARACTER KHO RAKHANG
-    u'\u0e07'   #  0xA7 -> THAI CHARACTER NGO NGU
-    u'\u0e08'   #  0xA8 -> THAI CHARACTER CHO CHAN
-    u'\u0e09'   #  0xA9 -> THAI CHARACTER CHO CHING
-    u'\u0e0a'   #  0xAA -> THAI CHARACTER CHO CHANG
-    u'\u0e0b'   #  0xAB -> THAI CHARACTER SO SO
-    u'\u0e0c'   #  0xAC -> THAI CHARACTER CHO CHOE
-    u'\u0e0d'   #  0xAD -> THAI CHARACTER YO YING
-    u'\u0e0e'   #  0xAE -> THAI CHARACTER DO CHADA
-    u'\u0e0f'   #  0xAF -> THAI CHARACTER TO PATAK
-    u'\u0e10'   #  0xB0 -> THAI CHARACTER THO THAN
-    u'\u0e11'   #  0xB1 -> THAI CHARACTER THO NANGMONTHO
-    u'\u0e12'   #  0xB2 -> THAI CHARACTER THO PHUTHAO
-    u'\u0e13'   #  0xB3 -> THAI CHARACTER NO NEN
-    u'\u0e14'   #  0xB4 -> THAI CHARACTER DO DEK
-    u'\u0e15'   #  0xB5 -> THAI CHARACTER TO TAO
-    u'\u0e16'   #  0xB6 -> THAI CHARACTER THO THUNG
-    u'\u0e17'   #  0xB7 -> THAI CHARACTER THO THAHAN
-    u'\u0e18'   #  0xB8 -> THAI CHARACTER THO THONG
-    u'\u0e19'   #  0xB9 -> THAI CHARACTER NO NU
-    u'\u0e1a'   #  0xBA -> THAI CHARACTER BO BAIMAI
-    u'\u0e1b'   #  0xBB -> THAI CHARACTER PO PLA
-    u'\u0e1c'   #  0xBC -> THAI CHARACTER PHO PHUNG
-    u'\u0e1d'   #  0xBD -> THAI CHARACTER FO FA
-    u'\u0e1e'   #  0xBE -> THAI CHARACTER PHO PHAN
-    u'\u0e1f'   #  0xBF -> THAI CHARACTER FO FAN
-    u'\u0e20'   #  0xC0 -> THAI CHARACTER PHO SAMPHAO
-    u'\u0e21'   #  0xC1 -> THAI CHARACTER MO MA
-    u'\u0e22'   #  0xC2 -> THAI CHARACTER YO YAK
-    u'\u0e23'   #  0xC3 -> THAI CHARACTER RO RUA
-    u'\u0e24'   #  0xC4 -> THAI CHARACTER RU
-    u'\u0e25'   #  0xC5 -> THAI CHARACTER LO LING
-    u'\u0e26'   #  0xC6 -> THAI CHARACTER LU
-    u'\u0e27'   #  0xC7 -> THAI CHARACTER WO WAEN
-    u'\u0e28'   #  0xC8 -> THAI CHARACTER SO SALA
-    u'\u0e29'   #  0xC9 -> THAI CHARACTER SO RUSI
-    u'\u0e2a'   #  0xCA -> THAI CHARACTER SO SUA
-    u'\u0e2b'   #  0xCB -> THAI CHARACTER HO HIP
-    u'\u0e2c'   #  0xCC -> THAI CHARACTER LO CHULA
-    u'\u0e2d'   #  0xCD -> THAI CHARACTER O ANG
-    u'\u0e2e'   #  0xCE -> THAI CHARACTER HO NOKHUK
-    u'\u0e2f'   #  0xCF -> THAI CHARACTER PAIYANNOI
-    u'\u0e30'   #  0xD0 -> THAI CHARACTER SARA A
-    u'\u0e31'   #  0xD1 -> THAI CHARACTER MAI HAN-AKAT
-    u'\u0e32'   #  0xD2 -> THAI CHARACTER SARA AA
-    u'\u0e33'   #  0xD3 -> THAI CHARACTER SARA AM
-    u'\u0e34'   #  0xD4 -> THAI CHARACTER SARA I
-    u'\u0e35'   #  0xD5 -> THAI CHARACTER SARA II
-    u'\u0e36'   #  0xD6 -> THAI CHARACTER SARA UE
-    u'\u0e37'   #  0xD7 -> THAI CHARACTER SARA UEE
-    u'\u0e38'   #  0xD8 -> THAI CHARACTER SARA U
-    u'\u0e39'   #  0xD9 -> THAI CHARACTER SARA UU
-    u'\u0e3a'   #  0xDA -> THAI CHARACTER PHINTHU
-    u'\ufffe'   #  0xDB -> UNDEFINED
-    u'\ufffe'   #  0xDC -> UNDEFINED
-    u'\ufffe'   #  0xDD -> UNDEFINED
-    u'\ufffe'   #  0xDE -> UNDEFINED
-    u'\u0e3f'   #  0xDF -> THAI CURRENCY SYMBOL BAHT
-    u'\u0e40'   #  0xE0 -> THAI CHARACTER SARA E
-    u'\u0e41'   #  0xE1 -> THAI CHARACTER SARA AE
-    u'\u0e42'   #  0xE2 -> THAI CHARACTER SARA O
-    u'\u0e43'   #  0xE3 -> THAI CHARACTER SARA AI MAIMUAN
-    u'\u0e44'   #  0xE4 -> THAI CHARACTER SARA AI MAIMALAI
-    u'\u0e45'   #  0xE5 -> THAI CHARACTER LAKKHANGYAO
-    u'\u0e46'   #  0xE6 -> THAI CHARACTER MAIYAMOK
-    u'\u0e47'   #  0xE7 -> THAI CHARACTER MAITAIKHU
-    u'\u0e48'   #  0xE8 -> THAI CHARACTER MAI EK
-    u'\u0e49'   #  0xE9 -> THAI CHARACTER MAI THO
-    u'\u0e4a'   #  0xEA -> THAI CHARACTER MAI TRI
-    u'\u0e4b'   #  0xEB -> THAI CHARACTER MAI CHATTAWA
-    u'\u0e4c'   #  0xEC -> THAI CHARACTER THANTHAKHAT
-    u'\u0e4d'   #  0xED -> THAI CHARACTER NIKHAHIT
-    u'\u0e4e'   #  0xEE -> THAI CHARACTER YAMAKKAN
-    u'\u0e4f'   #  0xEF -> THAI CHARACTER FONGMAN
-    u'\u0e50'   #  0xF0 -> THAI DIGIT ZERO
-    u'\u0e51'   #  0xF1 -> THAI DIGIT ONE
-    u'\u0e52'   #  0xF2 -> THAI DIGIT TWO
-    u'\u0e53'   #  0xF3 -> THAI DIGIT THREE
-    u'\u0e54'   #  0xF4 -> THAI DIGIT FOUR
-    u'\u0e55'   #  0xF5 -> THAI DIGIT FIVE
-    u'\u0e56'   #  0xF6 -> THAI DIGIT SIX
-    u'\u0e57'   #  0xF7 -> THAI DIGIT SEVEN
-    u'\u0e58'   #  0xF8 -> THAI DIGIT EIGHT
-    u'\u0e59'   #  0xF9 -> THAI DIGIT NINE
-    u'\u0e5a'   #  0xFA -> THAI CHARACTER ANGKHANKHU
-    u'\u0e5b'   #  0xFB -> THAI CHARACTER KHOMUT
-    u'\ufffe'   #  0xFC -> UNDEFINED
-    u'\ufffe'   #  0xFD -> UNDEFINED
-    u'\ufffe'   #  0xFE -> UNDEFINED
-    u'\ufffe'   #  0xFF -> UNDEFINED
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u20ac'   #  0x80 -> EURO SIGN
+    '\ufffe'   #  0x81 -> UNDEFINED
+    '\ufffe'   #  0x82 -> UNDEFINED
+    '\ufffe'   #  0x83 -> UNDEFINED
+    '\ufffe'   #  0x84 -> UNDEFINED
+    '\u2026'   #  0x85 -> HORIZONTAL ELLIPSIS
+    '\ufffe'   #  0x86 -> UNDEFINED
+    '\ufffe'   #  0x87 -> UNDEFINED
+    '\ufffe'   #  0x88 -> UNDEFINED
+    '\ufffe'   #  0x89 -> UNDEFINED
+    '\ufffe'   #  0x8A -> UNDEFINED
+    '\ufffe'   #  0x8B -> UNDEFINED
+    '\ufffe'   #  0x8C -> UNDEFINED
+    '\ufffe'   #  0x8D -> UNDEFINED
+    '\ufffe'   #  0x8E -> UNDEFINED
+    '\ufffe'   #  0x8F -> UNDEFINED
+    '\ufffe'   #  0x90 -> UNDEFINED
+    '\u2018'   #  0x91 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0x92 -> RIGHT SINGLE QUOTATION MARK
+    '\u201c'   #  0x93 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0x94 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2022'   #  0x95 -> BULLET
+    '\u2013'   #  0x96 -> EN DASH
+    '\u2014'   #  0x97 -> EM DASH
+    '\ufffe'   #  0x98 -> UNDEFINED
+    '\ufffe'   #  0x99 -> UNDEFINED
+    '\ufffe'   #  0x9A -> UNDEFINED
+    '\ufffe'   #  0x9B -> UNDEFINED
+    '\ufffe'   #  0x9C -> UNDEFINED
+    '\ufffe'   #  0x9D -> UNDEFINED
+    '\ufffe'   #  0x9E -> UNDEFINED
+    '\ufffe'   #  0x9F -> UNDEFINED
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0e01'   #  0xA1 -> THAI CHARACTER KO KAI
+    '\u0e02'   #  0xA2 -> THAI CHARACTER KHO KHAI
+    '\u0e03'   #  0xA3 -> THAI CHARACTER KHO KHUAT
+    '\u0e04'   #  0xA4 -> THAI CHARACTER KHO KHWAI
+    '\u0e05'   #  0xA5 -> THAI CHARACTER KHO KHON
+    '\u0e06'   #  0xA6 -> THAI CHARACTER KHO RAKHANG
+    '\u0e07'   #  0xA7 -> THAI CHARACTER NGO NGU
+    '\u0e08'   #  0xA8 -> THAI CHARACTER CHO CHAN
+    '\u0e09'   #  0xA9 -> THAI CHARACTER CHO CHING
+    '\u0e0a'   #  0xAA -> THAI CHARACTER CHO CHANG
+    '\u0e0b'   #  0xAB -> THAI CHARACTER SO SO
+    '\u0e0c'   #  0xAC -> THAI CHARACTER CHO CHOE
+    '\u0e0d'   #  0xAD -> THAI CHARACTER YO YING
+    '\u0e0e'   #  0xAE -> THAI CHARACTER DO CHADA
+    '\u0e0f'   #  0xAF -> THAI CHARACTER TO PATAK
+    '\u0e10'   #  0xB0 -> THAI CHARACTER THO THAN
+    '\u0e11'   #  0xB1 -> THAI CHARACTER THO NANGMONTHO
+    '\u0e12'   #  0xB2 -> THAI CHARACTER THO PHUTHAO
+    '\u0e13'   #  0xB3 -> THAI CHARACTER NO NEN
+    '\u0e14'   #  0xB4 -> THAI CHARACTER DO DEK
+    '\u0e15'   #  0xB5 -> THAI CHARACTER TO TAO
+    '\u0e16'   #  0xB6 -> THAI CHARACTER THO THUNG
+    '\u0e17'   #  0xB7 -> THAI CHARACTER THO THAHAN
+    '\u0e18'   #  0xB8 -> THAI CHARACTER THO THONG
+    '\u0e19'   #  0xB9 -> THAI CHARACTER NO NU
+    '\u0e1a'   #  0xBA -> THAI CHARACTER BO BAIMAI
+    '\u0e1b'   #  0xBB -> THAI CHARACTER PO PLA
+    '\u0e1c'   #  0xBC -> THAI CHARACTER PHO PHUNG
+    '\u0e1d'   #  0xBD -> THAI CHARACTER FO FA
+    '\u0e1e'   #  0xBE -> THAI CHARACTER PHO PHAN
+    '\u0e1f'   #  0xBF -> THAI CHARACTER FO FAN
+    '\u0e20'   #  0xC0 -> THAI CHARACTER PHO SAMPHAO
+    '\u0e21'   #  0xC1 -> THAI CHARACTER MO MA
+    '\u0e22'   #  0xC2 -> THAI CHARACTER YO YAK
+    '\u0e23'   #  0xC3 -> THAI CHARACTER RO RUA
+    '\u0e24'   #  0xC4 -> THAI CHARACTER RU
+    '\u0e25'   #  0xC5 -> THAI CHARACTER LO LING
+    '\u0e26'   #  0xC6 -> THAI CHARACTER LU
+    '\u0e27'   #  0xC7 -> THAI CHARACTER WO WAEN
+    '\u0e28'   #  0xC8 -> THAI CHARACTER SO SALA
+    '\u0e29'   #  0xC9 -> THAI CHARACTER SO RUSI
+    '\u0e2a'   #  0xCA -> THAI CHARACTER SO SUA
+    '\u0e2b'   #  0xCB -> THAI CHARACTER HO HIP
+    '\u0e2c'   #  0xCC -> THAI CHARACTER LO CHULA
+    '\u0e2d'   #  0xCD -> THAI CHARACTER O ANG
+    '\u0e2e'   #  0xCE -> THAI CHARACTER HO NOKHUK
+    '\u0e2f'   #  0xCF -> THAI CHARACTER PAIYANNOI
+    '\u0e30'   #  0xD0 -> THAI CHARACTER SARA A
+    '\u0e31'   #  0xD1 -> THAI CHARACTER MAI HAN-AKAT
+    '\u0e32'   #  0xD2 -> THAI CHARACTER SARA AA
+    '\u0e33'   #  0xD3 -> THAI CHARACTER SARA AM
+    '\u0e34'   #  0xD4 -> THAI CHARACTER SARA I
+    '\u0e35'   #  0xD5 -> THAI CHARACTER SARA II
+    '\u0e36'   #  0xD6 -> THAI CHARACTER SARA UE
+    '\u0e37'   #  0xD7 -> THAI CHARACTER SARA UEE
+    '\u0e38'   #  0xD8 -> THAI CHARACTER SARA U
+    '\u0e39'   #  0xD9 -> THAI CHARACTER SARA UU
+    '\u0e3a'   #  0xDA -> THAI CHARACTER PHINTHU
+    '\ufffe'   #  0xDB -> UNDEFINED
+    '\ufffe'   #  0xDC -> UNDEFINED
+    '\ufffe'   #  0xDD -> UNDEFINED
+    '\ufffe'   #  0xDE -> UNDEFINED
+    '\u0e3f'   #  0xDF -> THAI CURRENCY SYMBOL BAHT
+    '\u0e40'   #  0xE0 -> THAI CHARACTER SARA E
+    '\u0e41'   #  0xE1 -> THAI CHARACTER SARA AE
+    '\u0e42'   #  0xE2 -> THAI CHARACTER SARA O
+    '\u0e43'   #  0xE3 -> THAI CHARACTER SARA AI MAIMUAN
+    '\u0e44'   #  0xE4 -> THAI CHARACTER SARA AI MAIMALAI
+    '\u0e45'   #  0xE5 -> THAI CHARACTER LAKKHANGYAO
+    '\u0e46'   #  0xE6 -> THAI CHARACTER MAIYAMOK
+    '\u0e47'   #  0xE7 -> THAI CHARACTER MAITAIKHU
+    '\u0e48'   #  0xE8 -> THAI CHARACTER MAI EK
+    '\u0e49'   #  0xE9 -> THAI CHARACTER MAI THO
+    '\u0e4a'   #  0xEA -> THAI CHARACTER MAI TRI
+    '\u0e4b'   #  0xEB -> THAI CHARACTER MAI CHATTAWA
+    '\u0e4c'   #  0xEC -> THAI CHARACTER THANTHAKHAT
+    '\u0e4d'   #  0xED -> THAI CHARACTER NIKHAHIT
+    '\u0e4e'   #  0xEE -> THAI CHARACTER YAMAKKAN
+    '\u0e4f'   #  0xEF -> THAI CHARACTER FONGMAN
+    '\u0e50'   #  0xF0 -> THAI DIGIT ZERO
+    '\u0e51'   #  0xF1 -> THAI DIGIT ONE
+    '\u0e52'   #  0xF2 -> THAI DIGIT TWO
+    '\u0e53'   #  0xF3 -> THAI DIGIT THREE
+    '\u0e54'   #  0xF4 -> THAI DIGIT FOUR
+    '\u0e55'   #  0xF5 -> THAI DIGIT FIVE
+    '\u0e56'   #  0xF6 -> THAI DIGIT SIX
+    '\u0e57'   #  0xF7 -> THAI DIGIT SEVEN
+    '\u0e58'   #  0xF8 -> THAI DIGIT EIGHT
+    '\u0e59'   #  0xF9 -> THAI DIGIT NINE
+    '\u0e5a'   #  0xFA -> THAI CHARACTER ANGKHANKHU
+    '\u0e5b'   #  0xFB -> THAI CHARACTER KHOMUT
+    '\ufffe'   #  0xFC -> UNDEFINED
+    '\ufffe'   #  0xFD -> UNDEFINED
+    '\ufffe'   #  0xFE -> UNDEFINED
+    '\ufffe'   #  0xFF -> UNDEFINED
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/cp875.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/cp875.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/cp875.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x9c'     #  0x04 -> CONTROL
-    u'\t'       #  0x05 -> HORIZONTAL TABULATION
-    u'\x86'     #  0x06 -> CONTROL
-    u'\x7f'     #  0x07 -> DELETE
-    u'\x97'     #  0x08 -> CONTROL
-    u'\x8d'     #  0x09 -> CONTROL
-    u'\x8e'     #  0x0A -> CONTROL
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x9d'     #  0x14 -> CONTROL
-    u'\x85'     #  0x15 -> CONTROL
-    u'\x08'     #  0x16 -> BACKSPACE
-    u'\x87'     #  0x17 -> CONTROL
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x92'     #  0x1A -> CONTROL
-    u'\x8f'     #  0x1B -> CONTROL
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u'\x80'     #  0x20 -> CONTROL
-    u'\x81'     #  0x21 -> CONTROL
-    u'\x82'     #  0x22 -> CONTROL
-    u'\x83'     #  0x23 -> CONTROL
-    u'\x84'     #  0x24 -> CONTROL
-    u'\n'       #  0x25 -> LINE FEED
-    u'\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
-    u'\x1b'     #  0x27 -> ESCAPE
-    u'\x88'     #  0x28 -> CONTROL
-    u'\x89'     #  0x29 -> CONTROL
-    u'\x8a'     #  0x2A -> CONTROL
-    u'\x8b'     #  0x2B -> CONTROL
-    u'\x8c'     #  0x2C -> CONTROL
-    u'\x05'     #  0x2D -> ENQUIRY
-    u'\x06'     #  0x2E -> ACKNOWLEDGE
-    u'\x07'     #  0x2F -> BELL
-    u'\x90'     #  0x30 -> CONTROL
-    u'\x91'     #  0x31 -> CONTROL
-    u'\x16'     #  0x32 -> SYNCHRONOUS IDLE
-    u'\x93'     #  0x33 -> CONTROL
-    u'\x94'     #  0x34 -> CONTROL
-    u'\x95'     #  0x35 -> CONTROL
-    u'\x96'     #  0x36 -> CONTROL
-    u'\x04'     #  0x37 -> END OF TRANSMISSION
-    u'\x98'     #  0x38 -> CONTROL
-    u'\x99'     #  0x39 -> CONTROL
-    u'\x9a'     #  0x3A -> CONTROL
-    u'\x9b'     #  0x3B -> CONTROL
-    u'\x14'     #  0x3C -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
-    u'\x9e'     #  0x3E -> CONTROL
-    u'\x1a'     #  0x3F -> SUBSTITUTE
-    u' '        #  0x40 -> SPACE
-    u'\u0391'   #  0x41 -> GREEK CAPITAL LETTER ALPHA
-    u'\u0392'   #  0x42 -> GREEK CAPITAL LETTER BETA
-    u'\u0393'   #  0x43 -> GREEK CAPITAL LETTER GAMMA
-    u'\u0394'   #  0x44 -> GREEK CAPITAL LETTER DELTA
-    u'\u0395'   #  0x45 -> GREEK CAPITAL LETTER EPSILON
-    u'\u0396'   #  0x46 -> GREEK CAPITAL LETTER ZETA
-    u'\u0397'   #  0x47 -> GREEK CAPITAL LETTER ETA
-    u'\u0398'   #  0x48 -> GREEK CAPITAL LETTER THETA
-    u'\u0399'   #  0x49 -> GREEK CAPITAL LETTER IOTA
-    u'['        #  0x4A -> LEFT SQUARE BRACKET
-    u'.'        #  0x4B -> FULL STOP
-    u'<'        #  0x4C -> LESS-THAN SIGN
-    u'('        #  0x4D -> LEFT PARENTHESIS
-    u'+'        #  0x4E -> PLUS SIGN
-    u'!'        #  0x4F -> EXCLAMATION MARK
-    u'&'        #  0x50 -> AMPERSAND
-    u'\u039a'   #  0x51 -> GREEK CAPITAL LETTER KAPPA
-    u'\u039b'   #  0x52 -> GREEK CAPITAL LETTER LAMDA
-    u'\u039c'   #  0x53 -> GREEK CAPITAL LETTER MU
-    u'\u039d'   #  0x54 -> GREEK CAPITAL LETTER NU
-    u'\u039e'   #  0x55 -> GREEK CAPITAL LETTER XI
-    u'\u039f'   #  0x56 -> GREEK CAPITAL LETTER OMICRON
-    u'\u03a0'   #  0x57 -> GREEK CAPITAL LETTER PI
-    u'\u03a1'   #  0x58 -> GREEK CAPITAL LETTER RHO
-    u'\u03a3'   #  0x59 -> GREEK CAPITAL LETTER SIGMA
-    u']'        #  0x5A -> RIGHT SQUARE BRACKET
-    u'$'        #  0x5B -> DOLLAR SIGN
-    u'*'        #  0x5C -> ASTERISK
-    u')'        #  0x5D -> RIGHT PARENTHESIS
-    u';'        #  0x5E -> SEMICOLON
-    u'^'        #  0x5F -> CIRCUMFLEX ACCENT
-    u'-'        #  0x60 -> HYPHEN-MINUS
-    u'/'        #  0x61 -> SOLIDUS
-    u'\u03a4'   #  0x62 -> GREEK CAPITAL LETTER TAU
-    u'\u03a5'   #  0x63 -> GREEK CAPITAL LETTER UPSILON
-    u'\u03a6'   #  0x64 -> GREEK CAPITAL LETTER PHI
-    u'\u03a7'   #  0x65 -> GREEK CAPITAL LETTER CHI
-    u'\u03a8'   #  0x66 -> GREEK CAPITAL LETTER PSI
-    u'\u03a9'   #  0x67 -> GREEK CAPITAL LETTER OMEGA
-    u'\u03aa'   #  0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
-    u'\u03ab'   #  0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-    u'|'        #  0x6A -> VERTICAL LINE
-    u','        #  0x6B -> COMMA
-    u'%'        #  0x6C -> PERCENT SIGN
-    u'_'        #  0x6D -> LOW LINE
-    u'>'        #  0x6E -> GREATER-THAN SIGN
-    u'?'        #  0x6F -> QUESTION MARK
-    u'\xa8'     #  0x70 -> DIAERESIS
-    u'\u0386'   #  0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
-    u'\u0388'   #  0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
-    u'\u0389'   #  0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS
-    u'\xa0'     #  0x74 -> NO-BREAK SPACE
-    u'\u038a'   #  0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS
-    u'\u038c'   #  0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
-    u'\u038e'   #  0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
-    u'\u038f'   #  0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
-    u'`'        #  0x79 -> GRAVE ACCENT
-    u':'        #  0x7A -> COLON
-    u'#'        #  0x7B -> NUMBER SIGN
-    u'@'        #  0x7C -> COMMERCIAL AT
-    u"'"        #  0x7D -> APOSTROPHE
-    u'='        #  0x7E -> EQUALS SIGN
-    u'"'        #  0x7F -> QUOTATION MARK
-    u'\u0385'   #  0x80 -> GREEK DIALYTIKA TONOS
-    u'a'        #  0x81 -> LATIN SMALL LETTER A
-    u'b'        #  0x82 -> LATIN SMALL LETTER B
-    u'c'        #  0x83 -> LATIN SMALL LETTER C
-    u'd'        #  0x84 -> LATIN SMALL LETTER D
-    u'e'        #  0x85 -> LATIN SMALL LETTER E
-    u'f'        #  0x86 -> LATIN SMALL LETTER F
-    u'g'        #  0x87 -> LATIN SMALL LETTER G
-    u'h'        #  0x88 -> LATIN SMALL LETTER H
-    u'i'        #  0x89 -> LATIN SMALL LETTER I
-    u'\u03b1'   #  0x8A -> GREEK SMALL LETTER ALPHA
-    u'\u03b2'   #  0x8B -> GREEK SMALL LETTER BETA
-    u'\u03b3'   #  0x8C -> GREEK SMALL LETTER GAMMA
-    u'\u03b4'   #  0x8D -> GREEK SMALL LETTER DELTA
-    u'\u03b5'   #  0x8E -> GREEK SMALL LETTER EPSILON
-    u'\u03b6'   #  0x8F -> GREEK SMALL LETTER ZETA
-    u'\xb0'     #  0x90 -> DEGREE SIGN
-    u'j'        #  0x91 -> LATIN SMALL LETTER J
-    u'k'        #  0x92 -> LATIN SMALL LETTER K
-    u'l'        #  0x93 -> LATIN SMALL LETTER L
-    u'm'        #  0x94 -> LATIN SMALL LETTER M
-    u'n'        #  0x95 -> LATIN SMALL LETTER N
-    u'o'        #  0x96 -> LATIN SMALL LETTER O
-    u'p'        #  0x97 -> LATIN SMALL LETTER P
-    u'q'        #  0x98 -> LATIN SMALL LETTER Q
-    u'r'        #  0x99 -> LATIN SMALL LETTER R
-    u'\u03b7'   #  0x9A -> GREEK SMALL LETTER ETA
-    u'\u03b8'   #  0x9B -> GREEK SMALL LETTER THETA
-    u'\u03b9'   #  0x9C -> GREEK SMALL LETTER IOTA
-    u'\u03ba'   #  0x9D -> GREEK SMALL LETTER KAPPA
-    u'\u03bb'   #  0x9E -> GREEK SMALL LETTER LAMDA
-    u'\u03bc'   #  0x9F -> GREEK SMALL LETTER MU
-    u'\xb4'     #  0xA0 -> ACUTE ACCENT
-    u'~'        #  0xA1 -> TILDE
-    u's'        #  0xA2 -> LATIN SMALL LETTER S
-    u't'        #  0xA3 -> LATIN SMALL LETTER T
-    u'u'        #  0xA4 -> LATIN SMALL LETTER U
-    u'v'        #  0xA5 -> LATIN SMALL LETTER V
-    u'w'        #  0xA6 -> LATIN SMALL LETTER W
-    u'x'        #  0xA7 -> LATIN SMALL LETTER X
-    u'y'        #  0xA8 -> LATIN SMALL LETTER Y
-    u'z'        #  0xA9 -> LATIN SMALL LETTER Z
-    u'\u03bd'   #  0xAA -> GREEK SMALL LETTER NU
-    u'\u03be'   #  0xAB -> GREEK SMALL LETTER XI
-    u'\u03bf'   #  0xAC -> GREEK SMALL LETTER OMICRON
-    u'\u03c0'   #  0xAD -> GREEK SMALL LETTER PI
-    u'\u03c1'   #  0xAE -> GREEK SMALL LETTER RHO
-    u'\u03c3'   #  0xAF -> GREEK SMALL LETTER SIGMA
-    u'\xa3'     #  0xB0 -> POUND SIGN
-    u'\u03ac'   #  0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS
-    u'\u03ad'   #  0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS
-    u'\u03ae'   #  0xB3 -> GREEK SMALL LETTER ETA WITH TONOS
-    u'\u03ca'   #  0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
-    u'\u03af'   #  0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS
-    u'\u03cc'   #  0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS
-    u'\u03cd'   #  0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS
-    u'\u03cb'   #  0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
-    u'\u03ce'   #  0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS
-    u'\u03c2'   #  0xBA -> GREEK SMALL LETTER FINAL SIGMA
-    u'\u03c4'   #  0xBB -> GREEK SMALL LETTER TAU
-    u'\u03c5'   #  0xBC -> GREEK SMALL LETTER UPSILON
-    u'\u03c6'   #  0xBD -> GREEK SMALL LETTER PHI
-    u'\u03c7'   #  0xBE -> GREEK SMALL LETTER CHI
-    u'\u03c8'   #  0xBF -> GREEK SMALL LETTER PSI
-    u'{'        #  0xC0 -> LEFT CURLY BRACKET
-    u'A'        #  0xC1 -> LATIN CAPITAL LETTER A
-    u'B'        #  0xC2 -> LATIN CAPITAL LETTER B
-    u'C'        #  0xC3 -> LATIN CAPITAL LETTER C
-    u'D'        #  0xC4 -> LATIN CAPITAL LETTER D
-    u'E'        #  0xC5 -> LATIN CAPITAL LETTER E
-    u'F'        #  0xC6 -> LATIN CAPITAL LETTER F
-    u'G'        #  0xC7 -> LATIN CAPITAL LETTER G
-    u'H'        #  0xC8 -> LATIN CAPITAL LETTER H
-    u'I'        #  0xC9 -> LATIN CAPITAL LETTER I
-    u'\xad'     #  0xCA -> SOFT HYPHEN
-    u'\u03c9'   #  0xCB -> GREEK SMALL LETTER OMEGA
-    u'\u0390'   #  0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
-    u'\u03b0'   #  0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
-    u'\u2018'   #  0xCE -> LEFT SINGLE QUOTATION MARK
-    u'\u2015'   #  0xCF -> HORIZONTAL BAR
-    u'}'        #  0xD0 -> RIGHT CURLY BRACKET
-    u'J'        #  0xD1 -> LATIN CAPITAL LETTER J
-    u'K'        #  0xD2 -> LATIN CAPITAL LETTER K
-    u'L'        #  0xD3 -> LATIN CAPITAL LETTER L
-    u'M'        #  0xD4 -> LATIN CAPITAL LETTER M
-    u'N'        #  0xD5 -> LATIN CAPITAL LETTER N
-    u'O'        #  0xD6 -> LATIN CAPITAL LETTER O
-    u'P'        #  0xD7 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0xD9 -> LATIN CAPITAL LETTER R
-    u'\xb1'     #  0xDA -> PLUS-MINUS SIGN
-    u'\xbd'     #  0xDB -> VULGAR FRACTION ONE HALF
-    u'\x1a'     #  0xDC -> SUBSTITUTE
-    u'\u0387'   #  0xDD -> GREEK ANO TELEIA
-    u'\u2019'   #  0xDE -> RIGHT SINGLE QUOTATION MARK
-    u'\xa6'     #  0xDF -> BROKEN BAR
-    u'\\'       #  0xE0 -> REVERSE SOLIDUS
-    u'\x1a'     #  0xE1 -> SUBSTITUTE
-    u'S'        #  0xE2 -> LATIN CAPITAL LETTER S
-    u'T'        #  0xE3 -> LATIN CAPITAL LETTER T
-    u'U'        #  0xE4 -> LATIN CAPITAL LETTER U
-    u'V'        #  0xE5 -> LATIN CAPITAL LETTER V
-    u'W'        #  0xE6 -> LATIN CAPITAL LETTER W
-    u'X'        #  0xE7 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
-    u'\xb2'     #  0xEA -> SUPERSCRIPT TWO
-    u'\xa7'     #  0xEB -> SECTION SIGN
-    u'\x1a'     #  0xEC -> SUBSTITUTE
-    u'\x1a'     #  0xED -> SUBSTITUTE
-    u'\xab'     #  0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xEF -> NOT SIGN
-    u'0'        #  0xF0 -> DIGIT ZERO
-    u'1'        #  0xF1 -> DIGIT ONE
-    u'2'        #  0xF2 -> DIGIT TWO
-    u'3'        #  0xF3 -> DIGIT THREE
-    u'4'        #  0xF4 -> DIGIT FOUR
-    u'5'        #  0xF5 -> DIGIT FIVE
-    u'6'        #  0xF6 -> DIGIT SIX
-    u'7'        #  0xF7 -> DIGIT SEVEN
-    u'8'        #  0xF8 -> DIGIT EIGHT
-    u'9'        #  0xF9 -> DIGIT NINE
-    u'\xb3'     #  0xFA -> SUPERSCRIPT THREE
-    u'\xa9'     #  0xFB -> COPYRIGHT SIGN
-    u'\x1a'     #  0xFC -> SUBSTITUTE
-    u'\x1a'     #  0xFD -> SUBSTITUTE
-    u'\xbb'     #  0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\x9f'     #  0xFF -> CONTROL
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x9c'     #  0x04 -> CONTROL
+    '\t'       #  0x05 -> HORIZONTAL TABULATION
+    '\x86'     #  0x06 -> CONTROL
+    '\x7f'     #  0x07 -> DELETE
+    '\x97'     #  0x08 -> CONTROL
+    '\x8d'     #  0x09 -> CONTROL
+    '\x8e'     #  0x0A -> CONTROL
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x9d'     #  0x14 -> CONTROL
+    '\x85'     #  0x15 -> CONTROL
+    '\x08'     #  0x16 -> BACKSPACE
+    '\x87'     #  0x17 -> CONTROL
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x92'     #  0x1A -> CONTROL
+    '\x8f'     #  0x1B -> CONTROL
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    '\x80'     #  0x20 -> CONTROL
+    '\x81'     #  0x21 -> CONTROL
+    '\x82'     #  0x22 -> CONTROL
+    '\x83'     #  0x23 -> CONTROL
+    '\x84'     #  0x24 -> CONTROL
+    '\n'       #  0x25 -> LINE FEED
+    '\x17'     #  0x26 -> END OF TRANSMISSION BLOCK
+    '\x1b'     #  0x27 -> ESCAPE
+    '\x88'     #  0x28 -> CONTROL
+    '\x89'     #  0x29 -> CONTROL
+    '\x8a'     #  0x2A -> CONTROL
+    '\x8b'     #  0x2B -> CONTROL
+    '\x8c'     #  0x2C -> CONTROL
+    '\x05'     #  0x2D -> ENQUIRY
+    '\x06'     #  0x2E -> ACKNOWLEDGE
+    '\x07'     #  0x2F -> BELL
+    '\x90'     #  0x30 -> CONTROL
+    '\x91'     #  0x31 -> CONTROL
+    '\x16'     #  0x32 -> SYNCHRONOUS IDLE
+    '\x93'     #  0x33 -> CONTROL
+    '\x94'     #  0x34 -> CONTROL
+    '\x95'     #  0x35 -> CONTROL
+    '\x96'     #  0x36 -> CONTROL
+    '\x04'     #  0x37 -> END OF TRANSMISSION
+    '\x98'     #  0x38 -> CONTROL
+    '\x99'     #  0x39 -> CONTROL
+    '\x9a'     #  0x3A -> CONTROL
+    '\x9b'     #  0x3B -> CONTROL
+    '\x14'     #  0x3C -> DEVICE CONTROL FOUR
+    '\x15'     #  0x3D -> NEGATIVE ACKNOWLEDGE
+    '\x9e'     #  0x3E -> CONTROL
+    '\x1a'     #  0x3F -> SUBSTITUTE
+    ' '        #  0x40 -> SPACE
+    '\u0391'   #  0x41 -> GREEK CAPITAL LETTER ALPHA
+    '\u0392'   #  0x42 -> GREEK CAPITAL LETTER BETA
+    '\u0393'   #  0x43 -> GREEK CAPITAL LETTER GAMMA
+    '\u0394'   #  0x44 -> GREEK CAPITAL LETTER DELTA
+    '\u0395'   #  0x45 -> GREEK CAPITAL LETTER EPSILON
+    '\u0396'   #  0x46 -> GREEK CAPITAL LETTER ZETA
+    '\u0397'   #  0x47 -> GREEK CAPITAL LETTER ETA
+    '\u0398'   #  0x48 -> GREEK CAPITAL LETTER THETA
+    '\u0399'   #  0x49 -> GREEK CAPITAL LETTER IOTA
+    '['        #  0x4A -> LEFT SQUARE BRACKET
+    '.'        #  0x4B -> FULL STOP
+    '<'        #  0x4C -> LESS-THAN SIGN
+    '('        #  0x4D -> LEFT PARENTHESIS
+    '+'        #  0x4E -> PLUS SIGN
+    '!'        #  0x4F -> EXCLAMATION MARK
+    '&'        #  0x50 -> AMPERSAND
+    '\u039a'   #  0x51 -> GREEK CAPITAL LETTER KAPPA
+    '\u039b'   #  0x52 -> GREEK CAPITAL LETTER LAMDA
+    '\u039c'   #  0x53 -> GREEK CAPITAL LETTER MU
+    '\u039d'   #  0x54 -> GREEK CAPITAL LETTER NU
+    '\u039e'   #  0x55 -> GREEK CAPITAL LETTER XI
+    '\u039f'   #  0x56 -> GREEK CAPITAL LETTER OMICRON
+    '\u03a0'   #  0x57 -> GREEK CAPITAL LETTER PI
+    '\u03a1'   #  0x58 -> GREEK CAPITAL LETTER RHO
+    '\u03a3'   #  0x59 -> GREEK CAPITAL LETTER SIGMA
+    ']'        #  0x5A -> RIGHT SQUARE BRACKET
+    '$'        #  0x5B -> DOLLAR SIGN
+    '*'        #  0x5C -> ASTERISK
+    ')'        #  0x5D -> RIGHT PARENTHESIS
+    ';'        #  0x5E -> SEMICOLON
+    '^'        #  0x5F -> CIRCUMFLEX ACCENT
+    '-'        #  0x60 -> HYPHEN-MINUS
+    '/'        #  0x61 -> SOLIDUS
+    '\u03a4'   #  0x62 -> GREEK CAPITAL LETTER TAU
+    '\u03a5'   #  0x63 -> GREEK CAPITAL LETTER UPSILON
+    '\u03a6'   #  0x64 -> GREEK CAPITAL LETTER PHI
+    '\u03a7'   #  0x65 -> GREEK CAPITAL LETTER CHI
+    '\u03a8'   #  0x66 -> GREEK CAPITAL LETTER PSI
+    '\u03a9'   #  0x67 -> GREEK CAPITAL LETTER OMEGA
+    '\u03aa'   #  0x68 -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+    '\u03ab'   #  0x69 -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+    '|'        #  0x6A -> VERTICAL LINE
+    ','        #  0x6B -> COMMA
+    '%'        #  0x6C -> PERCENT SIGN
+    '_'        #  0x6D -> LOW LINE
+    '>'        #  0x6E -> GREATER-THAN SIGN
+    '?'        #  0x6F -> QUESTION MARK
+    '\xa8'     #  0x70 -> DIAERESIS
+    '\u0386'   #  0x71 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+    '\u0388'   #  0x72 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+    '\u0389'   #  0x73 -> GREEK CAPITAL LETTER ETA WITH TONOS
+    '\xa0'     #  0x74 -> NO-BREAK SPACE
+    '\u038a'   #  0x75 -> GREEK CAPITAL LETTER IOTA WITH TONOS
+    '\u038c'   #  0x76 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+    '\u038e'   #  0x77 -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+    '\u038f'   #  0x78 -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+    '`'        #  0x79 -> GRAVE ACCENT
+    ':'        #  0x7A -> COLON
+    '#'        #  0x7B -> NUMBER SIGN
+    '@'        #  0x7C -> COMMERCIAL AT
+    "'"        #  0x7D -> APOSTROPHE
+    '='        #  0x7E -> EQUALS SIGN
+    '"'        #  0x7F -> QUOTATION MARK
+    '\u0385'   #  0x80 -> GREEK DIALYTIKA TONOS
+    'a'        #  0x81 -> LATIN SMALL LETTER A
+    'b'        #  0x82 -> LATIN SMALL LETTER B
+    'c'        #  0x83 -> LATIN SMALL LETTER C
+    'd'        #  0x84 -> LATIN SMALL LETTER D
+    'e'        #  0x85 -> LATIN SMALL LETTER E
+    'f'        #  0x86 -> LATIN SMALL LETTER F
+    'g'        #  0x87 -> LATIN SMALL LETTER G
+    'h'        #  0x88 -> LATIN SMALL LETTER H
+    'i'        #  0x89 -> LATIN SMALL LETTER I
+    '\u03b1'   #  0x8A -> GREEK SMALL LETTER ALPHA
+    '\u03b2'   #  0x8B -> GREEK SMALL LETTER BETA
+    '\u03b3'   #  0x8C -> GREEK SMALL LETTER GAMMA
+    '\u03b4'   #  0x8D -> GREEK SMALL LETTER DELTA
+    '\u03b5'   #  0x8E -> GREEK SMALL LETTER EPSILON
+    '\u03b6'   #  0x8F -> GREEK SMALL LETTER ZETA
+    '\xb0'     #  0x90 -> DEGREE SIGN
+    'j'        #  0x91 -> LATIN SMALL LETTER J
+    'k'        #  0x92 -> LATIN SMALL LETTER K
+    'l'        #  0x93 -> LATIN SMALL LETTER L
+    'm'        #  0x94 -> LATIN SMALL LETTER M
+    'n'        #  0x95 -> LATIN SMALL LETTER N
+    'o'        #  0x96 -> LATIN SMALL LETTER O
+    'p'        #  0x97 -> LATIN SMALL LETTER P
+    'q'        #  0x98 -> LATIN SMALL LETTER Q
+    'r'        #  0x99 -> LATIN SMALL LETTER R
+    '\u03b7'   #  0x9A -> GREEK SMALL LETTER ETA
+    '\u03b8'   #  0x9B -> GREEK SMALL LETTER THETA
+    '\u03b9'   #  0x9C -> GREEK SMALL LETTER IOTA
+    '\u03ba'   #  0x9D -> GREEK SMALL LETTER KAPPA
+    '\u03bb'   #  0x9E -> GREEK SMALL LETTER LAMDA
+    '\u03bc'   #  0x9F -> GREEK SMALL LETTER MU
+    '\xb4'     #  0xA0 -> ACUTE ACCENT
+    '~'        #  0xA1 -> TILDE
+    's'        #  0xA2 -> LATIN SMALL LETTER S
+    't'        #  0xA3 -> LATIN SMALL LETTER T
+    'u'        #  0xA4 -> LATIN SMALL LETTER U
+    'v'        #  0xA5 -> LATIN SMALL LETTER V
+    'w'        #  0xA6 -> LATIN SMALL LETTER W
+    'x'        #  0xA7 -> LATIN SMALL LETTER X
+    'y'        #  0xA8 -> LATIN SMALL LETTER Y
+    'z'        #  0xA9 -> LATIN SMALL LETTER Z
+    '\u03bd'   #  0xAA -> GREEK SMALL LETTER NU
+    '\u03be'   #  0xAB -> GREEK SMALL LETTER XI
+    '\u03bf'   #  0xAC -> GREEK SMALL LETTER OMICRON
+    '\u03c0'   #  0xAD -> GREEK SMALL LETTER PI
+    '\u03c1'   #  0xAE -> GREEK SMALL LETTER RHO
+    '\u03c3'   #  0xAF -> GREEK SMALL LETTER SIGMA
+    '\xa3'     #  0xB0 -> POUND SIGN
+    '\u03ac'   #  0xB1 -> GREEK SMALL LETTER ALPHA WITH TONOS
+    '\u03ad'   #  0xB2 -> GREEK SMALL LETTER EPSILON WITH TONOS
+    '\u03ae'   #  0xB3 -> GREEK SMALL LETTER ETA WITH TONOS
+    '\u03ca'   #  0xB4 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+    '\u03af'   #  0xB5 -> GREEK SMALL LETTER IOTA WITH TONOS
+    '\u03cc'   #  0xB6 -> GREEK SMALL LETTER OMICRON WITH TONOS
+    '\u03cd'   #  0xB7 -> GREEK SMALL LETTER UPSILON WITH TONOS
+    '\u03cb'   #  0xB8 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+    '\u03ce'   #  0xB9 -> GREEK SMALL LETTER OMEGA WITH TONOS
+    '\u03c2'   #  0xBA -> GREEK SMALL LETTER FINAL SIGMA
+    '\u03c4'   #  0xBB -> GREEK SMALL LETTER TAU
+    '\u03c5'   #  0xBC -> GREEK SMALL LETTER UPSILON
+    '\u03c6'   #  0xBD -> GREEK SMALL LETTER PHI
+    '\u03c7'   #  0xBE -> GREEK SMALL LETTER CHI
+    '\u03c8'   #  0xBF -> GREEK SMALL LETTER PSI
+    '{'        #  0xC0 -> LEFT CURLY BRACKET
+    'A'        #  0xC1 -> LATIN CAPITAL LETTER A
+    'B'        #  0xC2 -> LATIN CAPITAL LETTER B
+    'C'        #  0xC3 -> LATIN CAPITAL LETTER C
+    'D'        #  0xC4 -> LATIN CAPITAL LETTER D
+    'E'        #  0xC5 -> LATIN CAPITAL LETTER E
+    'F'        #  0xC6 -> LATIN CAPITAL LETTER F
+    'G'        #  0xC7 -> LATIN CAPITAL LETTER G
+    'H'        #  0xC8 -> LATIN CAPITAL LETTER H
+    'I'        #  0xC9 -> LATIN CAPITAL LETTER I
+    '\xad'     #  0xCA -> SOFT HYPHEN
+    '\u03c9'   #  0xCB -> GREEK SMALL LETTER OMEGA
+    '\u0390'   #  0xCC -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+    '\u03b0'   #  0xCD -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+    '\u2018'   #  0xCE -> LEFT SINGLE QUOTATION MARK
+    '\u2015'   #  0xCF -> HORIZONTAL BAR
+    '}'        #  0xD0 -> RIGHT CURLY BRACKET
+    'J'        #  0xD1 -> LATIN CAPITAL LETTER J
+    'K'        #  0xD2 -> LATIN CAPITAL LETTER K
+    'L'        #  0xD3 -> LATIN CAPITAL LETTER L
+    'M'        #  0xD4 -> LATIN CAPITAL LETTER M
+    'N'        #  0xD5 -> LATIN CAPITAL LETTER N
+    'O'        #  0xD6 -> LATIN CAPITAL LETTER O
+    'P'        #  0xD7 -> LATIN CAPITAL LETTER P
+    'Q'        #  0xD8 -> LATIN CAPITAL LETTER Q
+    'R'        #  0xD9 -> LATIN CAPITAL LETTER R
+    '\xb1'     #  0xDA -> PLUS-MINUS SIGN
+    '\xbd'     #  0xDB -> VULGAR FRACTION ONE HALF
+    '\x1a'     #  0xDC -> SUBSTITUTE
+    '\u0387'   #  0xDD -> GREEK ANO TELEIA
+    '\u2019'   #  0xDE -> RIGHT SINGLE QUOTATION MARK
+    '\xa6'     #  0xDF -> BROKEN BAR
+    '\\'       #  0xE0 -> REVERSE SOLIDUS
+    '\x1a'     #  0xE1 -> SUBSTITUTE
+    'S'        #  0xE2 -> LATIN CAPITAL LETTER S
+    'T'        #  0xE3 -> LATIN CAPITAL LETTER T
+    'U'        #  0xE4 -> LATIN CAPITAL LETTER U
+    'V'        #  0xE5 -> LATIN CAPITAL LETTER V
+    'W'        #  0xE6 -> LATIN CAPITAL LETTER W
+    'X'        #  0xE7 -> LATIN CAPITAL LETTER X
+    'Y'        #  0xE8 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0xE9 -> LATIN CAPITAL LETTER Z
+    '\xb2'     #  0xEA -> SUPERSCRIPT TWO
+    '\xa7'     #  0xEB -> SECTION SIGN
+    '\x1a'     #  0xEC -> SUBSTITUTE
+    '\x1a'     #  0xED -> SUBSTITUTE
+    '\xab'     #  0xEE -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xEF -> NOT SIGN
+    '0'        #  0xF0 -> DIGIT ZERO
+    '1'        #  0xF1 -> DIGIT ONE
+    '2'        #  0xF2 -> DIGIT TWO
+    '3'        #  0xF3 -> DIGIT THREE
+    '4'        #  0xF4 -> DIGIT FOUR
+    '5'        #  0xF5 -> DIGIT FIVE
+    '6'        #  0xF6 -> DIGIT SIX
+    '7'        #  0xF7 -> DIGIT SEVEN
+    '8'        #  0xF8 -> DIGIT EIGHT
+    '9'        #  0xF9 -> DIGIT NINE
+    '\xb3'     #  0xFA -> SUPERSCRIPT THREE
+    '\xa9'     #  0xFB -> COPYRIGHT SIGN
+    '\x1a'     #  0xFC -> SUBSTITUTE
+    '\x1a'     #  0xFD -> SUBSTITUTE
+    '\xbb'     #  0xFE -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\x9f'     #  0xFF -> CONTROL
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/idna.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/idna.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/idna.py	Wed May  2 21:09:54 2007
@@ -4,11 +4,11 @@
 from unicodedata import ucd_3_2_0 as unicodedata
 
 # IDNA section 3.1
-dots = re.compile(u"[\u002E\u3002\uFF0E\uFF61]")
+dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")
 
 # IDNA section 5
 ace_prefix = "xn--"
-uace_prefix = unicode(ace_prefix, "ascii")
+uace_prefix = str(ace_prefix, "ascii")
 
 # This assumes query strings, so AllowUnassigned is true
 def nameprep(label):
@@ -19,7 +19,7 @@
             # Map to nothing
             continue
         newlabel.append(stringprep.map_table_b2(c))
-    label = u"".join(newlabel)
+    label = "".join(newlabel)
 
     # Normalize
     label = unicodedata.normalize("NFKC", label)
@@ -122,7 +122,7 @@
             raise UnicodeError("Invalid character in IDN label")
     # Step 3: Check for ACE prefix
     if not label.startswith(ace_prefix):
-        return unicode(label, "ascii")
+        return str(label, "ascii")
 
     # Step 4: Remove ACE prefix
     label1 = label[len(ace_prefix):]
@@ -171,28 +171,28 @@
             raise UnicodeError("Unsupported error handling "+errors)
 
         if not input:
-            return u"", 0
+            return "", 0
 
         # IDNA allows decoding to operate on Unicode strings, too.
-        if isinstance(input, unicode):
+        if isinstance(input, str):
             labels = dots.split(input)
         else:
             # Must be ASCII string
             input = str(input)
-            unicode(input, "ascii")
+            str(input, "ascii")
             labels = input.split(".")
 
         if labels and len(labels[-1]) == 0:
-            trailing_dot = u'.'
+            trailing_dot = '.'
             del labels[-1]
         else:
-            trailing_dot = u''
+            trailing_dot = ''
 
         result = []
         for label in labels:
             result.append(ToUnicode(label))
 
-        return u".".join(result)+trailing_dot, len(input)
+        return ".".join(result)+trailing_dot, len(input)
 
 class IncrementalEncoder(codecs.BufferedIncrementalEncoder):
     def _buffer_encode(self, input, errors, final):
@@ -204,7 +204,7 @@
             return ("", 0)
 
         labels = dots.split(input)
-        trailing_dot = u''
+        trailing_dot = ''
         if labels:
             if not labels[-1]:
                 trailing_dot = '.'
@@ -234,27 +234,27 @@
             raise UnicodeError("Unsupported error handling "+errors)
 
         if not input:
-            return (u"", 0)
+            return ("", 0)
 
         # IDNA allows decoding to operate on Unicode strings, too.
-        if isinstance(input, unicode):
+        if isinstance(input, str):
             labels = dots.split(input)
         else:
             # Must be ASCII string
             input = str(input)
-            unicode(input, "ascii")
+            str(input, "ascii")
             labels = input.split(".")
 
-        trailing_dot = u''
+        trailing_dot = ''
         if labels:
             if not labels[-1]:
-                trailing_dot = u'.'
+                trailing_dot = '.'
                 del labels[-1]
             elif not final:
                 # Keep potentially unfinished label until the next call
                 del labels[-1]
                 if labels:
-                    trailing_dot = u'.'
+                    trailing_dot = '.'
 
         result = []
         size = 0
@@ -264,7 +264,7 @@
                 size += 1
             size += len(label)
 
-        result = u".".join(result) + trailing_dot
+        result = ".".join(result) + trailing_dot
         size += len(trailing_dot)
         return (result, size)
 

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_1.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_1.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_1.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xbf'     #  0xBF -> INVERTED QUESTION MARK
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S (German)
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xfe'     #  0xFE -> LATIN SMALL LETTER THORN (Icelandic)
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xbf'     #  0xBF -> INVERTED QUESTION MARK
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S (German)
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xfe'     #  0xFE -> LATIN SMALL LETTER THORN (Icelandic)
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_10.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_10.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_10.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u0112'   #  0xA2 -> LATIN CAPITAL LETTER E WITH MACRON
-    u'\u0122'   #  0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA
-    u'\u012a'   #  0xA4 -> LATIN CAPITAL LETTER I WITH MACRON
-    u'\u0128'   #  0xA5 -> LATIN CAPITAL LETTER I WITH TILDE
-    u'\u0136'   #  0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\u013b'   #  0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA
-    u'\u0110'   #  0xA9 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\u0160'   #  0xAA -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u0166'   #  0xAB -> LATIN CAPITAL LETTER T WITH STROKE
-    u'\u017d'   #  0xAC -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\u016a'   #  0xAE -> LATIN CAPITAL LETTER U WITH MACRON
-    u'\u014a'   #  0xAF -> LATIN CAPITAL LETTER ENG
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\u0105'   #  0xB1 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u0113'   #  0xB2 -> LATIN SMALL LETTER E WITH MACRON
-    u'\u0123'   #  0xB3 -> LATIN SMALL LETTER G WITH CEDILLA
-    u'\u012b'   #  0xB4 -> LATIN SMALL LETTER I WITH MACRON
-    u'\u0129'   #  0xB5 -> LATIN SMALL LETTER I WITH TILDE
-    u'\u0137'   #  0xB6 -> LATIN SMALL LETTER K WITH CEDILLA
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\u013c'   #  0xB8 -> LATIN SMALL LETTER L WITH CEDILLA
-    u'\u0111'   #  0xB9 -> LATIN SMALL LETTER D WITH STROKE
-    u'\u0161'   #  0xBA -> LATIN SMALL LETTER S WITH CARON
-    u'\u0167'   #  0xBB -> LATIN SMALL LETTER T WITH STROKE
-    u'\u017e'   #  0xBC -> LATIN SMALL LETTER Z WITH CARON
-    u'\u2015'   #  0xBD -> HORIZONTAL BAR
-    u'\u016b'   #  0xBE -> LATIN SMALL LETTER U WITH MACRON
-    u'\u014b'   #  0xBF -> LATIN SMALL LETTER ENG
-    u'\u0100'   #  0xC0 -> LATIN CAPITAL LETTER A WITH MACRON
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\u012e'   #  0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK
-    u'\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\u0116'   #  0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
-    u'\u0145'   #  0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA
-    u'\u014c'   #  0xD2 -> LATIN CAPITAL LETTER O WITH MACRON
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\u0168'   #  0xD7 -> LATIN CAPITAL LETTER U WITH TILDE
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u0172'   #  0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S (German)
-    u'\u0101'   #  0xE0 -> LATIN SMALL LETTER A WITH MACRON
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\u012f'   #  0xE7 -> LATIN SMALL LETTER I WITH OGONEK
-    u'\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\u0117'   #  0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
-    u'\u0146'   #  0xF1 -> LATIN SMALL LETTER N WITH CEDILLA
-    u'\u014d'   #  0xF2 -> LATIN SMALL LETTER O WITH MACRON
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\u0169'   #  0xF7 -> LATIN SMALL LETTER U WITH TILDE
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\u0173'   #  0xF9 -> LATIN SMALL LETTER U WITH OGONEK
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xfe'     #  0xFE -> LATIN SMALL LETTER THORN (Icelandic)
-    u'\u0138'   #  0xFF -> LATIN SMALL LETTER KRA
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u0112'   #  0xA2 -> LATIN CAPITAL LETTER E WITH MACRON
+    '\u0122'   #  0xA3 -> LATIN CAPITAL LETTER G WITH CEDILLA
+    '\u012a'   #  0xA4 -> LATIN CAPITAL LETTER I WITH MACRON
+    '\u0128'   #  0xA5 -> LATIN CAPITAL LETTER I WITH TILDE
+    '\u0136'   #  0xA6 -> LATIN CAPITAL LETTER K WITH CEDILLA
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\u013b'   #  0xA8 -> LATIN CAPITAL LETTER L WITH CEDILLA
+    '\u0110'   #  0xA9 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\u0160'   #  0xAA -> LATIN CAPITAL LETTER S WITH CARON
+    '\u0166'   #  0xAB -> LATIN CAPITAL LETTER T WITH STROKE
+    '\u017d'   #  0xAC -> LATIN CAPITAL LETTER Z WITH CARON
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\u016a'   #  0xAE -> LATIN CAPITAL LETTER U WITH MACRON
+    '\u014a'   #  0xAF -> LATIN CAPITAL LETTER ENG
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\u0105'   #  0xB1 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u0113'   #  0xB2 -> LATIN SMALL LETTER E WITH MACRON
+    '\u0123'   #  0xB3 -> LATIN SMALL LETTER G WITH CEDILLA
+    '\u012b'   #  0xB4 -> LATIN SMALL LETTER I WITH MACRON
+    '\u0129'   #  0xB5 -> LATIN SMALL LETTER I WITH TILDE
+    '\u0137'   #  0xB6 -> LATIN SMALL LETTER K WITH CEDILLA
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\u013c'   #  0xB8 -> LATIN SMALL LETTER L WITH CEDILLA
+    '\u0111'   #  0xB9 -> LATIN SMALL LETTER D WITH STROKE
+    '\u0161'   #  0xBA -> LATIN SMALL LETTER S WITH CARON
+    '\u0167'   #  0xBB -> LATIN SMALL LETTER T WITH STROKE
+    '\u017e'   #  0xBC -> LATIN SMALL LETTER Z WITH CARON
+    '\u2015'   #  0xBD -> HORIZONTAL BAR
+    '\u016b'   #  0xBE -> LATIN SMALL LETTER U WITH MACRON
+    '\u014b'   #  0xBF -> LATIN SMALL LETTER ENG
+    '\u0100'   #  0xC0 -> LATIN CAPITAL LETTER A WITH MACRON
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\u012e'   #  0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK
+    '\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\u0116'   #  0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH (Icelandic)
+    '\u0145'   #  0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA
+    '\u014c'   #  0xD2 -> LATIN CAPITAL LETTER O WITH MACRON
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\u0168'   #  0xD7 -> LATIN CAPITAL LETTER U WITH TILDE
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u0172'   #  0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN (Icelandic)
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S (German)
+    '\u0101'   #  0xE0 -> LATIN SMALL LETTER A WITH MACRON
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\u012f'   #  0xE7 -> LATIN SMALL LETTER I WITH OGONEK
+    '\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\u0117'   #  0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH (Icelandic)
+    '\u0146'   #  0xF1 -> LATIN SMALL LETTER N WITH CEDILLA
+    '\u014d'   #  0xF2 -> LATIN SMALL LETTER O WITH MACRON
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\u0169'   #  0xF7 -> LATIN SMALL LETTER U WITH TILDE
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\u0173'   #  0xF9 -> LATIN SMALL LETTER U WITH OGONEK
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xfe'     #  0xFE -> LATIN SMALL LETTER THORN (Icelandic)
+    '\u0138'   #  0xFF -> LATIN SMALL LETTER KRA
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_11.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_11.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_11.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0e01'   #  0xA1 -> THAI CHARACTER KO KAI
-    u'\u0e02'   #  0xA2 -> THAI CHARACTER KHO KHAI
-    u'\u0e03'   #  0xA3 -> THAI CHARACTER KHO KHUAT
-    u'\u0e04'   #  0xA4 -> THAI CHARACTER KHO KHWAI
-    u'\u0e05'   #  0xA5 -> THAI CHARACTER KHO KHON
-    u'\u0e06'   #  0xA6 -> THAI CHARACTER KHO RAKHANG
-    u'\u0e07'   #  0xA7 -> THAI CHARACTER NGO NGU
-    u'\u0e08'   #  0xA8 -> THAI CHARACTER CHO CHAN
-    u'\u0e09'   #  0xA9 -> THAI CHARACTER CHO CHING
-    u'\u0e0a'   #  0xAA -> THAI CHARACTER CHO CHANG
-    u'\u0e0b'   #  0xAB -> THAI CHARACTER SO SO
-    u'\u0e0c'   #  0xAC -> THAI CHARACTER CHO CHOE
-    u'\u0e0d'   #  0xAD -> THAI CHARACTER YO YING
-    u'\u0e0e'   #  0xAE -> THAI CHARACTER DO CHADA
-    u'\u0e0f'   #  0xAF -> THAI CHARACTER TO PATAK
-    u'\u0e10'   #  0xB0 -> THAI CHARACTER THO THAN
-    u'\u0e11'   #  0xB1 -> THAI CHARACTER THO NANGMONTHO
-    u'\u0e12'   #  0xB2 -> THAI CHARACTER THO PHUTHAO
-    u'\u0e13'   #  0xB3 -> THAI CHARACTER NO NEN
-    u'\u0e14'   #  0xB4 -> THAI CHARACTER DO DEK
-    u'\u0e15'   #  0xB5 -> THAI CHARACTER TO TAO
-    u'\u0e16'   #  0xB6 -> THAI CHARACTER THO THUNG
-    u'\u0e17'   #  0xB7 -> THAI CHARACTER THO THAHAN
-    u'\u0e18'   #  0xB8 -> THAI CHARACTER THO THONG
-    u'\u0e19'   #  0xB9 -> THAI CHARACTER NO NU
-    u'\u0e1a'   #  0xBA -> THAI CHARACTER BO BAIMAI
-    u'\u0e1b'   #  0xBB -> THAI CHARACTER PO PLA
-    u'\u0e1c'   #  0xBC -> THAI CHARACTER PHO PHUNG
-    u'\u0e1d'   #  0xBD -> THAI CHARACTER FO FA
-    u'\u0e1e'   #  0xBE -> THAI CHARACTER PHO PHAN
-    u'\u0e1f'   #  0xBF -> THAI CHARACTER FO FAN
-    u'\u0e20'   #  0xC0 -> THAI CHARACTER PHO SAMPHAO
-    u'\u0e21'   #  0xC1 -> THAI CHARACTER MO MA
-    u'\u0e22'   #  0xC2 -> THAI CHARACTER YO YAK
-    u'\u0e23'   #  0xC3 -> THAI CHARACTER RO RUA
-    u'\u0e24'   #  0xC4 -> THAI CHARACTER RU
-    u'\u0e25'   #  0xC5 -> THAI CHARACTER LO LING
-    u'\u0e26'   #  0xC6 -> THAI CHARACTER LU
-    u'\u0e27'   #  0xC7 -> THAI CHARACTER WO WAEN
-    u'\u0e28'   #  0xC8 -> THAI CHARACTER SO SALA
-    u'\u0e29'   #  0xC9 -> THAI CHARACTER SO RUSI
-    u'\u0e2a'   #  0xCA -> THAI CHARACTER SO SUA
-    u'\u0e2b'   #  0xCB -> THAI CHARACTER HO HIP
-    u'\u0e2c'   #  0xCC -> THAI CHARACTER LO CHULA
-    u'\u0e2d'   #  0xCD -> THAI CHARACTER O ANG
-    u'\u0e2e'   #  0xCE -> THAI CHARACTER HO NOKHUK
-    u'\u0e2f'   #  0xCF -> THAI CHARACTER PAIYANNOI
-    u'\u0e30'   #  0xD0 -> THAI CHARACTER SARA A
-    u'\u0e31'   #  0xD1 -> THAI CHARACTER MAI HAN-AKAT
-    u'\u0e32'   #  0xD2 -> THAI CHARACTER SARA AA
-    u'\u0e33'   #  0xD3 -> THAI CHARACTER SARA AM
-    u'\u0e34'   #  0xD4 -> THAI CHARACTER SARA I
-    u'\u0e35'   #  0xD5 -> THAI CHARACTER SARA II
-    u'\u0e36'   #  0xD6 -> THAI CHARACTER SARA UE
-    u'\u0e37'   #  0xD7 -> THAI CHARACTER SARA UEE
-    u'\u0e38'   #  0xD8 -> THAI CHARACTER SARA U
-    u'\u0e39'   #  0xD9 -> THAI CHARACTER SARA UU
-    u'\u0e3a'   #  0xDA -> THAI CHARACTER PHINTHU
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\u0e3f'   #  0xDF -> THAI CURRENCY SYMBOL BAHT
-    u'\u0e40'   #  0xE0 -> THAI CHARACTER SARA E
-    u'\u0e41'   #  0xE1 -> THAI CHARACTER SARA AE
-    u'\u0e42'   #  0xE2 -> THAI CHARACTER SARA O
-    u'\u0e43'   #  0xE3 -> THAI CHARACTER SARA AI MAIMUAN
-    u'\u0e44'   #  0xE4 -> THAI CHARACTER SARA AI MAIMALAI
-    u'\u0e45'   #  0xE5 -> THAI CHARACTER LAKKHANGYAO
-    u'\u0e46'   #  0xE6 -> THAI CHARACTER MAIYAMOK
-    u'\u0e47'   #  0xE7 -> THAI CHARACTER MAITAIKHU
-    u'\u0e48'   #  0xE8 -> THAI CHARACTER MAI EK
-    u'\u0e49'   #  0xE9 -> THAI CHARACTER MAI THO
-    u'\u0e4a'   #  0xEA -> THAI CHARACTER MAI TRI
-    u'\u0e4b'   #  0xEB -> THAI CHARACTER MAI CHATTAWA
-    u'\u0e4c'   #  0xEC -> THAI CHARACTER THANTHAKHAT
-    u'\u0e4d'   #  0xED -> THAI CHARACTER NIKHAHIT
-    u'\u0e4e'   #  0xEE -> THAI CHARACTER YAMAKKAN
-    u'\u0e4f'   #  0xEF -> THAI CHARACTER FONGMAN
-    u'\u0e50'   #  0xF0 -> THAI DIGIT ZERO
-    u'\u0e51'   #  0xF1 -> THAI DIGIT ONE
-    u'\u0e52'   #  0xF2 -> THAI DIGIT TWO
-    u'\u0e53'   #  0xF3 -> THAI DIGIT THREE
-    u'\u0e54'   #  0xF4 -> THAI DIGIT FOUR
-    u'\u0e55'   #  0xF5 -> THAI DIGIT FIVE
-    u'\u0e56'   #  0xF6 -> THAI DIGIT SIX
-    u'\u0e57'   #  0xF7 -> THAI DIGIT SEVEN
-    u'\u0e58'   #  0xF8 -> THAI DIGIT EIGHT
-    u'\u0e59'   #  0xF9 -> THAI DIGIT NINE
-    u'\u0e5a'   #  0xFA -> THAI CHARACTER ANGKHANKHU
-    u'\u0e5b'   #  0xFB -> THAI CHARACTER KHOMUT
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0e01'   #  0xA1 -> THAI CHARACTER KO KAI
+    '\u0e02'   #  0xA2 -> THAI CHARACTER KHO KHAI
+    '\u0e03'   #  0xA3 -> THAI CHARACTER KHO KHUAT
+    '\u0e04'   #  0xA4 -> THAI CHARACTER KHO KHWAI
+    '\u0e05'   #  0xA5 -> THAI CHARACTER KHO KHON
+    '\u0e06'   #  0xA6 -> THAI CHARACTER KHO RAKHANG
+    '\u0e07'   #  0xA7 -> THAI CHARACTER NGO NGU
+    '\u0e08'   #  0xA8 -> THAI CHARACTER CHO CHAN
+    '\u0e09'   #  0xA9 -> THAI CHARACTER CHO CHING
+    '\u0e0a'   #  0xAA -> THAI CHARACTER CHO CHANG
+    '\u0e0b'   #  0xAB -> THAI CHARACTER SO SO
+    '\u0e0c'   #  0xAC -> THAI CHARACTER CHO CHOE
+    '\u0e0d'   #  0xAD -> THAI CHARACTER YO YING
+    '\u0e0e'   #  0xAE -> THAI CHARACTER DO CHADA
+    '\u0e0f'   #  0xAF -> THAI CHARACTER TO PATAK
+    '\u0e10'   #  0xB0 -> THAI CHARACTER THO THAN
+    '\u0e11'   #  0xB1 -> THAI CHARACTER THO NANGMONTHO
+    '\u0e12'   #  0xB2 -> THAI CHARACTER THO PHUTHAO
+    '\u0e13'   #  0xB3 -> THAI CHARACTER NO NEN
+    '\u0e14'   #  0xB4 -> THAI CHARACTER DO DEK
+    '\u0e15'   #  0xB5 -> THAI CHARACTER TO TAO
+    '\u0e16'   #  0xB6 -> THAI CHARACTER THO THUNG
+    '\u0e17'   #  0xB7 -> THAI CHARACTER THO THAHAN
+    '\u0e18'   #  0xB8 -> THAI CHARACTER THO THONG
+    '\u0e19'   #  0xB9 -> THAI CHARACTER NO NU
+    '\u0e1a'   #  0xBA -> THAI CHARACTER BO BAIMAI
+    '\u0e1b'   #  0xBB -> THAI CHARACTER PO PLA
+    '\u0e1c'   #  0xBC -> THAI CHARACTER PHO PHUNG
+    '\u0e1d'   #  0xBD -> THAI CHARACTER FO FA
+    '\u0e1e'   #  0xBE -> THAI CHARACTER PHO PHAN
+    '\u0e1f'   #  0xBF -> THAI CHARACTER FO FAN
+    '\u0e20'   #  0xC0 -> THAI CHARACTER PHO SAMPHAO
+    '\u0e21'   #  0xC1 -> THAI CHARACTER MO MA
+    '\u0e22'   #  0xC2 -> THAI CHARACTER YO YAK
+    '\u0e23'   #  0xC3 -> THAI CHARACTER RO RUA
+    '\u0e24'   #  0xC4 -> THAI CHARACTER RU
+    '\u0e25'   #  0xC5 -> THAI CHARACTER LO LING
+    '\u0e26'   #  0xC6 -> THAI CHARACTER LU
+    '\u0e27'   #  0xC7 -> THAI CHARACTER WO WAEN
+    '\u0e28'   #  0xC8 -> THAI CHARACTER SO SALA
+    '\u0e29'   #  0xC9 -> THAI CHARACTER SO RUSI
+    '\u0e2a'   #  0xCA -> THAI CHARACTER SO SUA
+    '\u0e2b'   #  0xCB -> THAI CHARACTER HO HIP
+    '\u0e2c'   #  0xCC -> THAI CHARACTER LO CHULA
+    '\u0e2d'   #  0xCD -> THAI CHARACTER O ANG
+    '\u0e2e'   #  0xCE -> THAI CHARACTER HO NOKHUK
+    '\u0e2f'   #  0xCF -> THAI CHARACTER PAIYANNOI
+    '\u0e30'   #  0xD0 -> THAI CHARACTER SARA A
+    '\u0e31'   #  0xD1 -> THAI CHARACTER MAI HAN-AKAT
+    '\u0e32'   #  0xD2 -> THAI CHARACTER SARA AA
+    '\u0e33'   #  0xD3 -> THAI CHARACTER SARA AM
+    '\u0e34'   #  0xD4 -> THAI CHARACTER SARA I
+    '\u0e35'   #  0xD5 -> THAI CHARACTER SARA II
+    '\u0e36'   #  0xD6 -> THAI CHARACTER SARA UE
+    '\u0e37'   #  0xD7 -> THAI CHARACTER SARA UEE
+    '\u0e38'   #  0xD8 -> THAI CHARACTER SARA U
+    '\u0e39'   #  0xD9 -> THAI CHARACTER SARA UU
+    '\u0e3a'   #  0xDA -> THAI CHARACTER PHINTHU
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\u0e3f'   #  0xDF -> THAI CURRENCY SYMBOL BAHT
+    '\u0e40'   #  0xE0 -> THAI CHARACTER SARA E
+    '\u0e41'   #  0xE1 -> THAI CHARACTER SARA AE
+    '\u0e42'   #  0xE2 -> THAI CHARACTER SARA O
+    '\u0e43'   #  0xE3 -> THAI CHARACTER SARA AI MAIMUAN
+    '\u0e44'   #  0xE4 -> THAI CHARACTER SARA AI MAIMALAI
+    '\u0e45'   #  0xE5 -> THAI CHARACTER LAKKHANGYAO
+    '\u0e46'   #  0xE6 -> THAI CHARACTER MAIYAMOK
+    '\u0e47'   #  0xE7 -> THAI CHARACTER MAITAIKHU
+    '\u0e48'   #  0xE8 -> THAI CHARACTER MAI EK
+    '\u0e49'   #  0xE9 -> THAI CHARACTER MAI THO
+    '\u0e4a'   #  0xEA -> THAI CHARACTER MAI TRI
+    '\u0e4b'   #  0xEB -> THAI CHARACTER MAI CHATTAWA
+    '\u0e4c'   #  0xEC -> THAI CHARACTER THANTHAKHAT
+    '\u0e4d'   #  0xED -> THAI CHARACTER NIKHAHIT
+    '\u0e4e'   #  0xEE -> THAI CHARACTER YAMAKKAN
+    '\u0e4f'   #  0xEF -> THAI CHARACTER FONGMAN
+    '\u0e50'   #  0xF0 -> THAI DIGIT ZERO
+    '\u0e51'   #  0xF1 -> THAI DIGIT ONE
+    '\u0e52'   #  0xF2 -> THAI DIGIT TWO
+    '\u0e53'   #  0xF3 -> THAI DIGIT THREE
+    '\u0e54'   #  0xF4 -> THAI DIGIT FOUR
+    '\u0e55'   #  0xF5 -> THAI DIGIT FIVE
+    '\u0e56'   #  0xF6 -> THAI DIGIT SIX
+    '\u0e57'   #  0xF7 -> THAI DIGIT SEVEN
+    '\u0e58'   #  0xF8 -> THAI DIGIT EIGHT
+    '\u0e59'   #  0xF9 -> THAI DIGIT NINE
+    '\u0e5a'   #  0xFA -> THAI CHARACTER ANGKHANKHU
+    '\u0e5b'   #  0xFB -> THAI CHARACTER KHOMUT
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_13.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_13.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_13.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u201d'   #  0xA1 -> RIGHT DOUBLE QUOTATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\u201e'   #  0xA5 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xd8'     #  0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u0156'   #  0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xc6'     #  0xAF -> LATIN CAPITAL LETTER AE
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\u201c'   #  0xB4 -> LEFT DOUBLE QUOTATION MARK
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xf8'     #  0xB8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\u0157'   #  0xBA -> LATIN SMALL LETTER R WITH CEDILLA
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xe6'     #  0xBF -> LATIN SMALL LETTER AE
-    u'\u0104'   #  0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u012e'   #  0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
-    u'\u0100'   #  0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
-    u'\u0106'   #  0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\u0118'   #  0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\u0112'   #  0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
-    u'\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0179'   #  0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\u0116'   #  0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
-    u'\u0122'   #  0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
-    u'\u0136'   #  0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
-    u'\u012a'   #  0xCE -> LATIN CAPITAL LETTER I WITH MACRON
-    u'\u013b'   #  0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
-    u'\u0160'   #  0xD0 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\u0145'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\u014c'   #  0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\u0172'   #  0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
-    u'\u0141'   #  0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\u015a'   #  0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\u016a'   #  0xDB -> LATIN CAPITAL LETTER U WITH MACRON
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u017b'   #  0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\u017d'   #  0xDE -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S (German)
-    u'\u0105'   #  0xE0 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u012f'   #  0xE1 -> LATIN SMALL LETTER I WITH OGONEK
-    u'\u0101'   #  0xE2 -> LATIN SMALL LETTER A WITH MACRON
-    u'\u0107'   #  0xE3 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\u0119'   #  0xE6 -> LATIN SMALL LETTER E WITH OGONEK
-    u'\u0113'   #  0xE7 -> LATIN SMALL LETTER E WITH MACRON
-    u'\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u017a'   #  0xEA -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\u0117'   #  0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
-    u'\u0123'   #  0xEC -> LATIN SMALL LETTER G WITH CEDILLA
-    u'\u0137'   #  0xED -> LATIN SMALL LETTER K WITH CEDILLA
-    u'\u012b'   #  0xEE -> LATIN SMALL LETTER I WITH MACRON
-    u'\u013c'   #  0xEF -> LATIN SMALL LETTER L WITH CEDILLA
-    u'\u0161'   #  0xF0 -> LATIN SMALL LETTER S WITH CARON
-    u'\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\u0146'   #  0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\u014d'   #  0xF4 -> LATIN SMALL LETTER O WITH MACRON
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\u0173'   #  0xF8 -> LATIN SMALL LETTER U WITH OGONEK
-    u'\u0142'   #  0xF9 -> LATIN SMALL LETTER L WITH STROKE
-    u'\u015b'   #  0xFA -> LATIN SMALL LETTER S WITH ACUTE
-    u'\u016b'   #  0xFB -> LATIN SMALL LETTER U WITH MACRON
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u017c'   #  0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\u017e'   #  0xFE -> LATIN SMALL LETTER Z WITH CARON
-    u'\u2019'   #  0xFF -> RIGHT SINGLE QUOTATION MARK
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u201d'   #  0xA1 -> RIGHT DOUBLE QUOTATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\u201e'   #  0xA5 -> DOUBLE LOW-9 QUOTATION MARK
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xd8'     #  0xA8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u0156'   #  0xAA -> LATIN CAPITAL LETTER R WITH CEDILLA
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xc6'     #  0xAF -> LATIN CAPITAL LETTER AE
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\u201c'   #  0xB4 -> LEFT DOUBLE QUOTATION MARK
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xf8'     #  0xB8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\u0157'   #  0xBA -> LATIN SMALL LETTER R WITH CEDILLA
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xe6'     #  0xBF -> LATIN SMALL LETTER AE
+    '\u0104'   #  0xC0 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u012e'   #  0xC1 -> LATIN CAPITAL LETTER I WITH OGONEK
+    '\u0100'   #  0xC2 -> LATIN CAPITAL LETTER A WITH MACRON
+    '\u0106'   #  0xC3 -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\u0118'   #  0xC6 -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\u0112'   #  0xC7 -> LATIN CAPITAL LETTER E WITH MACRON
+    '\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0179'   #  0xCA -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\u0116'   #  0xCB -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+    '\u0122'   #  0xCC -> LATIN CAPITAL LETTER G WITH CEDILLA
+    '\u0136'   #  0xCD -> LATIN CAPITAL LETTER K WITH CEDILLA
+    '\u012a'   #  0xCE -> LATIN CAPITAL LETTER I WITH MACRON
+    '\u013b'   #  0xCF -> LATIN CAPITAL LETTER L WITH CEDILLA
+    '\u0160'   #  0xD0 -> LATIN CAPITAL LETTER S WITH CARON
+    '\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\u0145'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CEDILLA
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\u014c'   #  0xD4 -> LATIN CAPITAL LETTER O WITH MACRON
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\u0172'   #  0xD8 -> LATIN CAPITAL LETTER U WITH OGONEK
+    '\u0141'   #  0xD9 -> LATIN CAPITAL LETTER L WITH STROKE
+    '\u015a'   #  0xDA -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\u016a'   #  0xDB -> LATIN CAPITAL LETTER U WITH MACRON
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u017b'   #  0xDD -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\u017d'   #  0xDE -> LATIN CAPITAL LETTER Z WITH CARON
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S (German)
+    '\u0105'   #  0xE0 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u012f'   #  0xE1 -> LATIN SMALL LETTER I WITH OGONEK
+    '\u0101'   #  0xE2 -> LATIN SMALL LETTER A WITH MACRON
+    '\u0107'   #  0xE3 -> LATIN SMALL LETTER C WITH ACUTE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\u0119'   #  0xE6 -> LATIN SMALL LETTER E WITH OGONEK
+    '\u0113'   #  0xE7 -> LATIN SMALL LETTER E WITH MACRON
+    '\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\u017a'   #  0xEA -> LATIN SMALL LETTER Z WITH ACUTE
+    '\u0117'   #  0xEB -> LATIN SMALL LETTER E WITH DOT ABOVE
+    '\u0123'   #  0xEC -> LATIN SMALL LETTER G WITH CEDILLA
+    '\u0137'   #  0xED -> LATIN SMALL LETTER K WITH CEDILLA
+    '\u012b'   #  0xEE -> LATIN SMALL LETTER I WITH MACRON
+    '\u013c'   #  0xEF -> LATIN SMALL LETTER L WITH CEDILLA
+    '\u0161'   #  0xF0 -> LATIN SMALL LETTER S WITH CARON
+    '\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+    '\u0146'   #  0xF2 -> LATIN SMALL LETTER N WITH CEDILLA
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\u014d'   #  0xF4 -> LATIN SMALL LETTER O WITH MACRON
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\u0173'   #  0xF8 -> LATIN SMALL LETTER U WITH OGONEK
+    '\u0142'   #  0xF9 -> LATIN SMALL LETTER L WITH STROKE
+    '\u015b'   #  0xFA -> LATIN SMALL LETTER S WITH ACUTE
+    '\u016b'   #  0xFB -> LATIN SMALL LETTER U WITH MACRON
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u017c'   #  0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\u017e'   #  0xFE -> LATIN SMALL LETTER Z WITH CARON
+    '\u2019'   #  0xFF -> RIGHT SINGLE QUOTATION MARK
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_14.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_14.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_14.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u1e02'   #  0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE
-    u'\u1e03'   #  0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\u010a'   #  0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE
-    u'\u010b'   #  0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE
-    u'\u1e0a'   #  0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\u1e80'   #  0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u1e82'   #  0xAA -> LATIN CAPITAL LETTER W WITH ACUTE
-    u'\u1e0b'   #  0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE
-    u'\u1ef2'   #  0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\u0178'   #  0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\u1e1e'   #  0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE
-    u'\u1e1f'   #  0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE
-    u'\u0120'   #  0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE
-    u'\u0121'   #  0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE
-    u'\u1e40'   #  0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE
-    u'\u1e41'   #  0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\u1e56'   #  0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE
-    u'\u1e81'   #  0xB8 -> LATIN SMALL LETTER W WITH GRAVE
-    u'\u1e57'   #  0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE
-    u'\u1e83'   #  0xBA -> LATIN SMALL LETTER W WITH ACUTE
-    u'\u1e60'   #  0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE
-    u'\u1ef3'   #  0xBC -> LATIN SMALL LETTER Y WITH GRAVE
-    u'\u1e84'   #  0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS
-    u'\u1e85'   #  0xBE -> LATIN SMALL LETTER W WITH DIAERESIS
-    u'\u1e61'   #  0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\u0174'   #  0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\u1e6a'   #  0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\u0176'   #  0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\u0175'   #  0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\u1e6b'   #  0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\u0177'   #  0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u1e02'   #  0xA1 -> LATIN CAPITAL LETTER B WITH DOT ABOVE
+    '\u1e03'   #  0xA2 -> LATIN SMALL LETTER B WITH DOT ABOVE
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\u010a'   #  0xA4 -> LATIN CAPITAL LETTER C WITH DOT ABOVE
+    '\u010b'   #  0xA5 -> LATIN SMALL LETTER C WITH DOT ABOVE
+    '\u1e0a'   #  0xA6 -> LATIN CAPITAL LETTER D WITH DOT ABOVE
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\u1e80'   #  0xA8 -> LATIN CAPITAL LETTER W WITH GRAVE
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u1e82'   #  0xAA -> LATIN CAPITAL LETTER W WITH ACUTE
+    '\u1e0b'   #  0xAB -> LATIN SMALL LETTER D WITH DOT ABOVE
+    '\u1ef2'   #  0xAC -> LATIN CAPITAL LETTER Y WITH GRAVE
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\u0178'   #  0xAF -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\u1e1e'   #  0xB0 -> LATIN CAPITAL LETTER F WITH DOT ABOVE
+    '\u1e1f'   #  0xB1 -> LATIN SMALL LETTER F WITH DOT ABOVE
+    '\u0120'   #  0xB2 -> LATIN CAPITAL LETTER G WITH DOT ABOVE
+    '\u0121'   #  0xB3 -> LATIN SMALL LETTER G WITH DOT ABOVE
+    '\u1e40'   #  0xB4 -> LATIN CAPITAL LETTER M WITH DOT ABOVE
+    '\u1e41'   #  0xB5 -> LATIN SMALL LETTER M WITH DOT ABOVE
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\u1e56'   #  0xB7 -> LATIN CAPITAL LETTER P WITH DOT ABOVE
+    '\u1e81'   #  0xB8 -> LATIN SMALL LETTER W WITH GRAVE
+    '\u1e57'   #  0xB9 -> LATIN SMALL LETTER P WITH DOT ABOVE
+    '\u1e83'   #  0xBA -> LATIN SMALL LETTER W WITH ACUTE
+    '\u1e60'   #  0xBB -> LATIN CAPITAL LETTER S WITH DOT ABOVE
+    '\u1ef3'   #  0xBC -> LATIN SMALL LETTER Y WITH GRAVE
+    '\u1e84'   #  0xBD -> LATIN CAPITAL LETTER W WITH DIAERESIS
+    '\u1e85'   #  0xBE -> LATIN SMALL LETTER W WITH DIAERESIS
+    '\u1e61'   #  0xBF -> LATIN SMALL LETTER S WITH DOT ABOVE
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\u0174'   #  0xD0 -> LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\u1e6a'   #  0xD7 -> LATIN CAPITAL LETTER T WITH DOT ABOVE
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\u0176'   #  0xDE -> LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\u0175'   #  0xF0 -> LATIN SMALL LETTER W WITH CIRCUMFLEX
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\u1e6b'   #  0xF7 -> LATIN SMALL LETTER T WITH DOT ABOVE
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+    '\u0177'   #  0xFE -> LATIN SMALL LETTER Y WITH CIRCUMFLEX
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_15.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_15.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_15.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\u20ac'   #  0xA4 -> EURO SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\u0160'   #  0xA6 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\u0161'   #  0xA8 -> LATIN SMALL LETTER S WITH CARON
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\u017d'   #  0xB4 -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\u017e'   #  0xB8 -> LATIN SMALL LETTER Z WITH CARON
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u0152'   #  0xBC -> LATIN CAPITAL LIGATURE OE
-    u'\u0153'   #  0xBD -> LATIN SMALL LIGATURE OE
-    u'\u0178'   #  0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\xbf'     #  0xBF -> INVERTED QUESTION MARK
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xfe'     #  0xFE -> LATIN SMALL LETTER THORN
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\u20ac'   #  0xA4 -> EURO SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\u0160'   #  0xA6 -> LATIN CAPITAL LETTER S WITH CARON
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\u0161'   #  0xA8 -> LATIN SMALL LETTER S WITH CARON
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\u017d'   #  0xB4 -> LATIN CAPITAL LETTER Z WITH CARON
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\u017e'   #  0xB8 -> LATIN SMALL LETTER Z WITH CARON
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u0152'   #  0xBC -> LATIN CAPITAL LIGATURE OE
+    '\u0153'   #  0xBD -> LATIN SMALL LIGATURE OE
+    '\u0178'   #  0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\xbf'     #  0xBF -> INVERTED QUESTION MARK
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xd0'     #  0xD0 -> LATIN CAPITAL LETTER ETH
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf0'     #  0xF0 -> LATIN SMALL LETTER ETH
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xfe'     #  0xFE -> LATIN SMALL LETTER THORN
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_16.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_16.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_16.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u0105'   #  0xA2 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u0141'   #  0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\u20ac'   #  0xA4 -> EURO SIGN
-    u'\u201e'   #  0xA5 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u0160'   #  0xA6 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\u0161'   #  0xA8 -> LATIN SMALL LETTER S WITH CARON
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u0218'   #  0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u0179'   #  0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\u017a'   #  0xAE -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u010c'   #  0xB2 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\u0142'   #  0xB3 -> LATIN SMALL LETTER L WITH STROKE
-    u'\u017d'   #  0xB4 -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\u201d'   #  0xB5 -> RIGHT DOUBLE QUOTATION MARK
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\u017e'   #  0xB8 -> LATIN SMALL LETTER Z WITH CARON
-    u'\u010d'   #  0xB9 -> LATIN SMALL LETTER C WITH CARON
-    u'\u0219'   #  0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u0152'   #  0xBC -> LATIN CAPITAL LIGATURE OE
-    u'\u0153'   #  0xBD -> LATIN SMALL LIGATURE OE
-    u'\u0178'   #  0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\u0106'   #  0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u0150'   #  0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\u015a'   #  0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\u0170'   #  0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u0118'   #  0xDD -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\u021a'   #  0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u0107'   #  0xE5 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
-    u'\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\u0151'   #  0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\u015b'   #  0xF7 -> LATIN SMALL LETTER S WITH ACUTE
-    u'\u0171'   #  0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u0119'   #  0xFD -> LATIN SMALL LETTER E WITH OGONEK
-    u'\u021b'   #  0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u0105'   #  0xA2 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u0141'   #  0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
+    '\u20ac'   #  0xA4 -> EURO SIGN
+    '\u201e'   #  0xA5 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u0160'   #  0xA6 -> LATIN CAPITAL LETTER S WITH CARON
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\u0161'   #  0xA8 -> LATIN SMALL LETTER S WITH CARON
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u0218'   #  0xAA -> LATIN CAPITAL LETTER S WITH COMMA BELOW
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u0179'   #  0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\u017a'   #  0xAE -> LATIN SMALL LETTER Z WITH ACUTE
+    '\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u010c'   #  0xB2 -> LATIN CAPITAL LETTER C WITH CARON
+    '\u0142'   #  0xB3 -> LATIN SMALL LETTER L WITH STROKE
+    '\u017d'   #  0xB4 -> LATIN CAPITAL LETTER Z WITH CARON
+    '\u201d'   #  0xB5 -> RIGHT DOUBLE QUOTATION MARK
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\u017e'   #  0xB8 -> LATIN SMALL LETTER Z WITH CARON
+    '\u010d'   #  0xB9 -> LATIN SMALL LETTER C WITH CARON
+    '\u0219'   #  0xBA -> LATIN SMALL LETTER S WITH COMMA BELOW
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u0152'   #  0xBC -> LATIN CAPITAL LIGATURE OE
+    '\u0153'   #  0xBD -> LATIN SMALL LIGATURE OE
+    '\u0178'   #  0xBE -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\u0106'   #  0xC5 -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u0150'   #  0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\u015a'   #  0xD7 -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\u0170'   #  0xD8 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u0118'   #  0xDD -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\u021a'   #  0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u0107'   #  0xE5 -> LATIN SMALL LETTER C WITH ACUTE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
+    '\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\u0151'   #  0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\u015b'   #  0xF7 -> LATIN SMALL LETTER S WITH ACUTE
+    '\u0171'   #  0xF8 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u0119'   #  0xFD -> LATIN SMALL LETTER E WITH OGONEK
+    '\u021b'   #  0xFE -> LATIN SMALL LETTER T WITH COMMA BELOW
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_2.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_2.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_2.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u02d8'   #  0xA2 -> BREVE
-    u'\u0141'   #  0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\u013d'   #  0xA5 -> LATIN CAPITAL LETTER L WITH CARON
-    u'\u015a'   #  0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\u0160'   #  0xA9 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u015e'   #  0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\u0164'   #  0xAB -> LATIN CAPITAL LETTER T WITH CARON
-    u'\u0179'   #  0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\u017d'   #  0xAE -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\u0105'   #  0xB1 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u02db'   #  0xB2 -> OGONEK
-    u'\u0142'   #  0xB3 -> LATIN SMALL LETTER L WITH STROKE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\u013e'   #  0xB5 -> LATIN SMALL LETTER L WITH CARON
-    u'\u015b'   #  0xB6 -> LATIN SMALL LETTER S WITH ACUTE
-    u'\u02c7'   #  0xB7 -> CARON
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\u0161'   #  0xB9 -> LATIN SMALL LETTER S WITH CARON
-    u'\u015f'   #  0xBA -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\u0165'   #  0xBB -> LATIN SMALL LETTER T WITH CARON
-    u'\u017a'   #  0xBC -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\u02dd'   #  0xBD -> DOUBLE ACUTE ACCENT
-    u'\u017e'   #  0xBE -> LATIN SMALL LETTER Z WITH CARON
-    u'\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\u0154'   #  0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\u0139'   #  0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
-    u'\u0106'   #  0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\u011a'   #  0xCC -> LATIN CAPITAL LETTER E WITH CARON
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\u010e'   #  0xCF -> LATIN CAPITAL LETTER D WITH CARON
-    u'\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\u0147'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CARON
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u0150'   #  0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\u0158'   #  0xD8 -> LATIN CAPITAL LETTER R WITH CARON
-    u'\u016e'   #  0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\u0170'   #  0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\u0162'   #  0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\u0155'   #  0xE0 -> LATIN SMALL LETTER R WITH ACUTE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u013a'   #  0xE5 -> LATIN SMALL LETTER L WITH ACUTE
-    u'\u0107'   #  0xE6 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\u011b'   #  0xEC -> LATIN SMALL LETTER E WITH CARON
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\u010f'   #  0xEF -> LATIN SMALL LETTER D WITH CARON
-    u'\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
-    u'\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\u0148'   #  0xF2 -> LATIN SMALL LETTER N WITH CARON
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\u0151'   #  0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\u0159'   #  0xF8 -> LATIN SMALL LETTER R WITH CARON
-    u'\u016f'   #  0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\u0171'   #  0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\u0163'   #  0xFE -> LATIN SMALL LETTER T WITH CEDILLA
-    u'\u02d9'   #  0xFF -> DOT ABOVE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u02d8'   #  0xA2 -> BREVE
+    '\u0141'   #  0xA3 -> LATIN CAPITAL LETTER L WITH STROKE
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\u013d'   #  0xA5 -> LATIN CAPITAL LETTER L WITH CARON
+    '\u015a'   #  0xA6 -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\u0160'   #  0xA9 -> LATIN CAPITAL LETTER S WITH CARON
+    '\u015e'   #  0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\u0164'   #  0xAB -> LATIN CAPITAL LETTER T WITH CARON
+    '\u0179'   #  0xAC -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\u017d'   #  0xAE -> LATIN CAPITAL LETTER Z WITH CARON
+    '\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\u0105'   #  0xB1 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u02db'   #  0xB2 -> OGONEK
+    '\u0142'   #  0xB3 -> LATIN SMALL LETTER L WITH STROKE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\u013e'   #  0xB5 -> LATIN SMALL LETTER L WITH CARON
+    '\u015b'   #  0xB6 -> LATIN SMALL LETTER S WITH ACUTE
+    '\u02c7'   #  0xB7 -> CARON
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\u0161'   #  0xB9 -> LATIN SMALL LETTER S WITH CARON
+    '\u015f'   #  0xBA -> LATIN SMALL LETTER S WITH CEDILLA
+    '\u0165'   #  0xBB -> LATIN SMALL LETTER T WITH CARON
+    '\u017a'   #  0xBC -> LATIN SMALL LETTER Z WITH ACUTE
+    '\u02dd'   #  0xBD -> DOUBLE ACUTE ACCENT
+    '\u017e'   #  0xBE -> LATIN SMALL LETTER Z WITH CARON
+    '\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\u0154'   #  0xC0 -> LATIN CAPITAL LETTER R WITH ACUTE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\u0102'   #  0xC3 -> LATIN CAPITAL LETTER A WITH BREVE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\u0139'   #  0xC5 -> LATIN CAPITAL LETTER L WITH ACUTE
+    '\u0106'   #  0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\u011a'   #  0xCC -> LATIN CAPITAL LETTER E WITH CARON
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\u010e'   #  0xCF -> LATIN CAPITAL LETTER D WITH CARON
+    '\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\u0143'   #  0xD1 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\u0147'   #  0xD2 -> LATIN CAPITAL LETTER N WITH CARON
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u0150'   #  0xD5 -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\u0158'   #  0xD8 -> LATIN CAPITAL LETTER R WITH CARON
+    '\u016e'   #  0xD9 -> LATIN CAPITAL LETTER U WITH RING ABOVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\u0170'   #  0xDB -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xDD -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\u0162'   #  0xDE -> LATIN CAPITAL LETTER T WITH CEDILLA
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\u0155'   #  0xE0 -> LATIN SMALL LETTER R WITH ACUTE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\u0103'   #  0xE3 -> LATIN SMALL LETTER A WITH BREVE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u013a'   #  0xE5 -> LATIN SMALL LETTER L WITH ACUTE
+    '\u0107'   #  0xE6 -> LATIN SMALL LETTER C WITH ACUTE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\u011b'   #  0xEC -> LATIN SMALL LETTER E WITH CARON
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\u010f'   #  0xEF -> LATIN SMALL LETTER D WITH CARON
+    '\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
+    '\u0144'   #  0xF1 -> LATIN SMALL LETTER N WITH ACUTE
+    '\u0148'   #  0xF2 -> LATIN SMALL LETTER N WITH CARON
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\u0151'   #  0xF5 -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\u0159'   #  0xF8 -> LATIN SMALL LETTER R WITH CARON
+    '\u016f'   #  0xF9 -> LATIN SMALL LETTER U WITH RING ABOVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\u0171'   #  0xFB -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xfd'     #  0xFD -> LATIN SMALL LETTER Y WITH ACUTE
+    '\u0163'   #  0xFE -> LATIN SMALL LETTER T WITH CEDILLA
+    '\u02d9'   #  0xFF -> DOT ABOVE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_3.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_3.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_3.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0126'   #  0xA1 -> LATIN CAPITAL LETTER H WITH STROKE
-    u'\u02d8'   #  0xA2 -> BREVE
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\ufffe'
-    u'\u0124'   #  0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\u0130'   #  0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
-    u'\u015e'   #  0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\u011e'   #  0xAB -> LATIN CAPITAL LETTER G WITH BREVE
-    u'\u0134'   #  0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\ufffe'
-    u'\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\u0127'   #  0xB1 -> LATIN SMALL LETTER H WITH STROKE
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\u0125'   #  0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\u0131'   #  0xB9 -> LATIN SMALL LETTER DOTLESS I
-    u'\u015f'   #  0xBA -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\u011f'   #  0xBB -> LATIN SMALL LETTER G WITH BREVE
-    u'\u0135'   #  0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\ufffe'
-    u'\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\ufffe'
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\u010a'   #  0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE
-    u'\u0108'   #  0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\ufffe'
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u0120'   #  0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\u011c'   #  0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u016c'   #  0xDD -> LATIN CAPITAL LETTER U WITH BREVE
-    u'\u015c'   #  0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\ufffe'
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u010b'   #  0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE
-    u'\u0109'   #  0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\ufffe'
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\u0121'   #  0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\u011d'   #  0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u016d'   #  0xFD -> LATIN SMALL LETTER U WITH BREVE
-    u'\u015d'   #  0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX
-    u'\u02d9'   #  0xFF -> DOT ABOVE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0126'   #  0xA1 -> LATIN CAPITAL LETTER H WITH STROKE
+    '\u02d8'   #  0xA2 -> BREVE
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\ufffe'
+    '\u0124'   #  0xA6 -> LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\u0130'   #  0xA9 -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+    '\u015e'   #  0xAA -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\u011e'   #  0xAB -> LATIN CAPITAL LETTER G WITH BREVE
+    '\u0134'   #  0xAC -> LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\ufffe'
+    '\u017b'   #  0xAF -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\u0127'   #  0xB1 -> LATIN SMALL LETTER H WITH STROKE
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\u0125'   #  0xB6 -> LATIN SMALL LETTER H WITH CIRCUMFLEX
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\u0131'   #  0xB9 -> LATIN SMALL LETTER DOTLESS I
+    '\u015f'   #  0xBA -> LATIN SMALL LETTER S WITH CEDILLA
+    '\u011f'   #  0xBB -> LATIN SMALL LETTER G WITH BREVE
+    '\u0135'   #  0xBC -> LATIN SMALL LETTER J WITH CIRCUMFLEX
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\ufffe'
+    '\u017c'   #  0xBF -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\ufffe'
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\u010a'   #  0xC5 -> LATIN CAPITAL LETTER C WITH DOT ABOVE
+    '\u0108'   #  0xC6 -> LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\ufffe'
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u0120'   #  0xD5 -> LATIN CAPITAL LETTER G WITH DOT ABOVE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\u011c'   #  0xD8 -> LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u016c'   #  0xDD -> LATIN CAPITAL LETTER U WITH BREVE
+    '\u015c'   #  0xDE -> LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\ufffe'
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u010b'   #  0xE5 -> LATIN SMALL LETTER C WITH DOT ABOVE
+    '\u0109'   #  0xE6 -> LATIN SMALL LETTER C WITH CIRCUMFLEX
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\ufffe'
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\u0121'   #  0xF5 -> LATIN SMALL LETTER G WITH DOT ABOVE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\u011d'   #  0xF8 -> LATIN SMALL LETTER G WITH CIRCUMFLEX
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u016d'   #  0xFD -> LATIN SMALL LETTER U WITH BREVE
+    '\u015d'   #  0xFE -> LATIN SMALL LETTER S WITH CIRCUMFLEX
+    '\u02d9'   #  0xFF -> DOT ABOVE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_4.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_4.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_4.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\u0138'   #  0xA2 -> LATIN SMALL LETTER KRA
-    u'\u0156'   #  0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\u0128'   #  0xA5 -> LATIN CAPITAL LETTER I WITH TILDE
-    u'\u013b'   #  0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\u0160'   #  0xA9 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u0112'   #  0xAA -> LATIN CAPITAL LETTER E WITH MACRON
-    u'\u0122'   #  0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA
-    u'\u0166'   #  0xAC -> LATIN CAPITAL LETTER T WITH STROKE
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\u017d'   #  0xAE -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\u0105'   #  0xB1 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u02db'   #  0xB2 -> OGONEK
-    u'\u0157'   #  0xB3 -> LATIN SMALL LETTER R WITH CEDILLA
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\u0129'   #  0xB5 -> LATIN SMALL LETTER I WITH TILDE
-    u'\u013c'   #  0xB6 -> LATIN SMALL LETTER L WITH CEDILLA
-    u'\u02c7'   #  0xB7 -> CARON
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\u0161'   #  0xB9 -> LATIN SMALL LETTER S WITH CARON
-    u'\u0113'   #  0xBA -> LATIN SMALL LETTER E WITH MACRON
-    u'\u0123'   #  0xBB -> LATIN SMALL LETTER G WITH CEDILLA
-    u'\u0167'   #  0xBC -> LATIN SMALL LETTER T WITH STROKE
-    u'\u014a'   #  0xBD -> LATIN CAPITAL LETTER ENG
-    u'\u017e'   #  0xBE -> LATIN SMALL LETTER Z WITH CARON
-    u'\u014b'   #  0xBF -> LATIN SMALL LETTER ENG
-    u'\u0100'   #  0xC0 -> LATIN CAPITAL LETTER A WITH MACRON
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\u012e'   #  0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK
-    u'\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\u0116'   #  0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\u012a'   #  0xCF -> LATIN CAPITAL LETTER I WITH MACRON
-    u'\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\u0145'   #  0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA
-    u'\u014c'   #  0xD2 -> LATIN CAPITAL LETTER O WITH MACRON
-    u'\u0136'   #  0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u0172'   #  0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u0168'   #  0xDD -> LATIN CAPITAL LETTER U WITH TILDE
-    u'\u016a'   #  0xDE -> LATIN CAPITAL LETTER U WITH MACRON
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\u0101'   #  0xE0 -> LATIN SMALL LETTER A WITH MACRON
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\u012f'   #  0xE7 -> LATIN SMALL LETTER I WITH OGONEK
-    u'\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\u0117'   #  0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\u012b'   #  0xEF -> LATIN SMALL LETTER I WITH MACRON
-    u'\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
-    u'\u0146'   #  0xF1 -> LATIN SMALL LETTER N WITH CEDILLA
-    u'\u014d'   #  0xF2 -> LATIN SMALL LETTER O WITH MACRON
-    u'\u0137'   #  0xF3 -> LATIN SMALL LETTER K WITH CEDILLA
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\u0173'   #  0xF9 -> LATIN SMALL LETTER U WITH OGONEK
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u0169'   #  0xFD -> LATIN SMALL LETTER U WITH TILDE
-    u'\u016b'   #  0xFE -> LATIN SMALL LETTER U WITH MACRON
-    u'\u02d9'   #  0xFF -> DOT ABOVE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0104'   #  0xA1 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\u0138'   #  0xA2 -> LATIN SMALL LETTER KRA
+    '\u0156'   #  0xA3 -> LATIN CAPITAL LETTER R WITH CEDILLA
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\u0128'   #  0xA5 -> LATIN CAPITAL LETTER I WITH TILDE
+    '\u013b'   #  0xA6 -> LATIN CAPITAL LETTER L WITH CEDILLA
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\u0160'   #  0xA9 -> LATIN CAPITAL LETTER S WITH CARON
+    '\u0112'   #  0xAA -> LATIN CAPITAL LETTER E WITH MACRON
+    '\u0122'   #  0xAB -> LATIN CAPITAL LETTER G WITH CEDILLA
+    '\u0166'   #  0xAC -> LATIN CAPITAL LETTER T WITH STROKE
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\u017d'   #  0xAE -> LATIN CAPITAL LETTER Z WITH CARON
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\u0105'   #  0xB1 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u02db'   #  0xB2 -> OGONEK
+    '\u0157'   #  0xB3 -> LATIN SMALL LETTER R WITH CEDILLA
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\u0129'   #  0xB5 -> LATIN SMALL LETTER I WITH TILDE
+    '\u013c'   #  0xB6 -> LATIN SMALL LETTER L WITH CEDILLA
+    '\u02c7'   #  0xB7 -> CARON
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\u0161'   #  0xB9 -> LATIN SMALL LETTER S WITH CARON
+    '\u0113'   #  0xBA -> LATIN SMALL LETTER E WITH MACRON
+    '\u0123'   #  0xBB -> LATIN SMALL LETTER G WITH CEDILLA
+    '\u0167'   #  0xBC -> LATIN SMALL LETTER T WITH STROKE
+    '\u014a'   #  0xBD -> LATIN CAPITAL LETTER ENG
+    '\u017e'   #  0xBE -> LATIN SMALL LETTER Z WITH CARON
+    '\u014b'   #  0xBF -> LATIN SMALL LETTER ENG
+    '\u0100'   #  0xC0 -> LATIN CAPITAL LETTER A WITH MACRON
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\u012e'   #  0xC7 -> LATIN CAPITAL LETTER I WITH OGONEK
+    '\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0118'   #  0xCA -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\u0116'   #  0xCC -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\u012a'   #  0xCF -> LATIN CAPITAL LETTER I WITH MACRON
+    '\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\u0145'   #  0xD1 -> LATIN CAPITAL LETTER N WITH CEDILLA
+    '\u014c'   #  0xD2 -> LATIN CAPITAL LETTER O WITH MACRON
+    '\u0136'   #  0xD3 -> LATIN CAPITAL LETTER K WITH CEDILLA
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u0172'   #  0xD9 -> LATIN CAPITAL LETTER U WITH OGONEK
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u0168'   #  0xDD -> LATIN CAPITAL LETTER U WITH TILDE
+    '\u016a'   #  0xDE -> LATIN CAPITAL LETTER U WITH MACRON
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\u0101'   #  0xE0 -> LATIN SMALL LETTER A WITH MACRON
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\u012f'   #  0xE7 -> LATIN SMALL LETTER I WITH OGONEK
+    '\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\u0119'   #  0xEA -> LATIN SMALL LETTER E WITH OGONEK
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\u0117'   #  0xEC -> LATIN SMALL LETTER E WITH DOT ABOVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\u012b'   #  0xEF -> LATIN SMALL LETTER I WITH MACRON
+    '\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
+    '\u0146'   #  0xF1 -> LATIN SMALL LETTER N WITH CEDILLA
+    '\u014d'   #  0xF2 -> LATIN SMALL LETTER O WITH MACRON
+    '\u0137'   #  0xF3 -> LATIN SMALL LETTER K WITH CEDILLA
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\u0173'   #  0xF9 -> LATIN SMALL LETTER U WITH OGONEK
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u0169'   #  0xFD -> LATIN SMALL LETTER U WITH TILDE
+    '\u016b'   #  0xFE -> LATIN SMALL LETTER U WITH MACRON
+    '\u02d9'   #  0xFF -> DOT ABOVE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_5.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_5.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_5.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u0401'   #  0xA1 -> CYRILLIC CAPITAL LETTER IO
-    u'\u0402'   #  0xA2 -> CYRILLIC CAPITAL LETTER DJE
-    u'\u0403'   #  0xA3 -> CYRILLIC CAPITAL LETTER GJE
-    u'\u0404'   #  0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
-    u'\u0405'   #  0xA5 -> CYRILLIC CAPITAL LETTER DZE
-    u'\u0406'   #  0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0407'   #  0xA7 -> CYRILLIC CAPITAL LETTER YI
-    u'\u0408'   #  0xA8 -> CYRILLIC CAPITAL LETTER JE
-    u'\u0409'   #  0xA9 -> CYRILLIC CAPITAL LETTER LJE
-    u'\u040a'   #  0xAA -> CYRILLIC CAPITAL LETTER NJE
-    u'\u040b'   #  0xAB -> CYRILLIC CAPITAL LETTER TSHE
-    u'\u040c'   #  0xAC -> CYRILLIC CAPITAL LETTER KJE
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\u040e'   #  0xAE -> CYRILLIC CAPITAL LETTER SHORT U
-    u'\u040f'   #  0xAF -> CYRILLIC CAPITAL LETTER DZHE
-    u'\u0410'   #  0xB0 -> CYRILLIC CAPITAL LETTER A
-    u'\u0411'   #  0xB1 -> CYRILLIC CAPITAL LETTER BE
-    u'\u0412'   #  0xB2 -> CYRILLIC CAPITAL LETTER VE
-    u'\u0413'   #  0xB3 -> CYRILLIC CAPITAL LETTER GHE
-    u'\u0414'   #  0xB4 -> CYRILLIC CAPITAL LETTER DE
-    u'\u0415'   #  0xB5 -> CYRILLIC CAPITAL LETTER IE
-    u'\u0416'   #  0xB6 -> CYRILLIC CAPITAL LETTER ZHE
-    u'\u0417'   #  0xB7 -> CYRILLIC CAPITAL LETTER ZE
-    u'\u0418'   #  0xB8 -> CYRILLIC CAPITAL LETTER I
-    u'\u0419'   #  0xB9 -> CYRILLIC CAPITAL LETTER SHORT I
-    u'\u041a'   #  0xBA -> CYRILLIC CAPITAL LETTER KA
-    u'\u041b'   #  0xBB -> CYRILLIC CAPITAL LETTER EL
-    u'\u041c'   #  0xBC -> CYRILLIC CAPITAL LETTER EM
-    u'\u041d'   #  0xBD -> CYRILLIC CAPITAL LETTER EN
-    u'\u041e'   #  0xBE -> CYRILLIC CAPITAL LETTER O
-    u'\u041f'   #  0xBF -> CYRILLIC CAPITAL LETTER PE
-    u'\u0420'   #  0xC0 -> CYRILLIC CAPITAL LETTER ER
-    u'\u0421'   #  0xC1 -> CYRILLIC CAPITAL LETTER ES
-    u'\u0422'   #  0xC2 -> CYRILLIC CAPITAL LETTER TE
-    u'\u0423'   #  0xC3 -> CYRILLIC CAPITAL LETTER U
-    u'\u0424'   #  0xC4 -> CYRILLIC CAPITAL LETTER EF
-    u'\u0425'   #  0xC5 -> CYRILLIC CAPITAL LETTER HA
-    u'\u0426'   #  0xC6 -> CYRILLIC CAPITAL LETTER TSE
-    u'\u0427'   #  0xC7 -> CYRILLIC CAPITAL LETTER CHE
-    u'\u0428'   #  0xC8 -> CYRILLIC CAPITAL LETTER SHA
-    u'\u0429'   #  0xC9 -> CYRILLIC CAPITAL LETTER SHCHA
-    u'\u042a'   #  0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN
-    u'\u042b'   #  0xCB -> CYRILLIC CAPITAL LETTER YERU
-    u'\u042c'   #  0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN
-    u'\u042d'   #  0xCD -> CYRILLIC CAPITAL LETTER E
-    u'\u042e'   #  0xCE -> CYRILLIC CAPITAL LETTER YU
-    u'\u042f'   #  0xCF -> CYRILLIC CAPITAL LETTER YA
-    u'\u0430'   #  0xD0 -> CYRILLIC SMALL LETTER A
-    u'\u0431'   #  0xD1 -> CYRILLIC SMALL LETTER BE
-    u'\u0432'   #  0xD2 -> CYRILLIC SMALL LETTER VE
-    u'\u0433'   #  0xD3 -> CYRILLIC SMALL LETTER GHE
-    u'\u0434'   #  0xD4 -> CYRILLIC SMALL LETTER DE
-    u'\u0435'   #  0xD5 -> CYRILLIC SMALL LETTER IE
-    u'\u0436'   #  0xD6 -> CYRILLIC SMALL LETTER ZHE
-    u'\u0437'   #  0xD7 -> CYRILLIC SMALL LETTER ZE
-    u'\u0438'   #  0xD8 -> CYRILLIC SMALL LETTER I
-    u'\u0439'   #  0xD9 -> CYRILLIC SMALL LETTER SHORT I
-    u'\u043a'   #  0xDA -> CYRILLIC SMALL LETTER KA
-    u'\u043b'   #  0xDB -> CYRILLIC SMALL LETTER EL
-    u'\u043c'   #  0xDC -> CYRILLIC SMALL LETTER EM
-    u'\u043d'   #  0xDD -> CYRILLIC SMALL LETTER EN
-    u'\u043e'   #  0xDE -> CYRILLIC SMALL LETTER O
-    u'\u043f'   #  0xDF -> CYRILLIC SMALL LETTER PE
-    u'\u0440'   #  0xE0 -> CYRILLIC SMALL LETTER ER
-    u'\u0441'   #  0xE1 -> CYRILLIC SMALL LETTER ES
-    u'\u0442'   #  0xE2 -> CYRILLIC SMALL LETTER TE
-    u'\u0443'   #  0xE3 -> CYRILLIC SMALL LETTER U
-    u'\u0444'   #  0xE4 -> CYRILLIC SMALL LETTER EF
-    u'\u0445'   #  0xE5 -> CYRILLIC SMALL LETTER HA
-    u'\u0446'   #  0xE6 -> CYRILLIC SMALL LETTER TSE
-    u'\u0447'   #  0xE7 -> CYRILLIC SMALL LETTER CHE
-    u'\u0448'   #  0xE8 -> CYRILLIC SMALL LETTER SHA
-    u'\u0449'   #  0xE9 -> CYRILLIC SMALL LETTER SHCHA
-    u'\u044a'   #  0xEA -> CYRILLIC SMALL LETTER HARD SIGN
-    u'\u044b'   #  0xEB -> CYRILLIC SMALL LETTER YERU
-    u'\u044c'   #  0xEC -> CYRILLIC SMALL LETTER SOFT SIGN
-    u'\u044d'   #  0xED -> CYRILLIC SMALL LETTER E
-    u'\u044e'   #  0xEE -> CYRILLIC SMALL LETTER YU
-    u'\u044f'   #  0xEF -> CYRILLIC SMALL LETTER YA
-    u'\u2116'   #  0xF0 -> NUMERO SIGN
-    u'\u0451'   #  0xF1 -> CYRILLIC SMALL LETTER IO
-    u'\u0452'   #  0xF2 -> CYRILLIC SMALL LETTER DJE
-    u'\u0453'   #  0xF3 -> CYRILLIC SMALL LETTER GJE
-    u'\u0454'   #  0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE
-    u'\u0455'   #  0xF5 -> CYRILLIC SMALL LETTER DZE
-    u'\u0456'   #  0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0457'   #  0xF7 -> CYRILLIC SMALL LETTER YI
-    u'\u0458'   #  0xF8 -> CYRILLIC SMALL LETTER JE
-    u'\u0459'   #  0xF9 -> CYRILLIC SMALL LETTER LJE
-    u'\u045a'   #  0xFA -> CYRILLIC SMALL LETTER NJE
-    u'\u045b'   #  0xFB -> CYRILLIC SMALL LETTER TSHE
-    u'\u045c'   #  0xFC -> CYRILLIC SMALL LETTER KJE
-    u'\xa7'     #  0xFD -> SECTION SIGN
-    u'\u045e'   #  0xFE -> CYRILLIC SMALL LETTER SHORT U
-    u'\u045f'   #  0xFF -> CYRILLIC SMALL LETTER DZHE
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u0401'   #  0xA1 -> CYRILLIC CAPITAL LETTER IO
+    '\u0402'   #  0xA2 -> CYRILLIC CAPITAL LETTER DJE
+    '\u0403'   #  0xA3 -> CYRILLIC CAPITAL LETTER GJE
+    '\u0404'   #  0xA4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    '\u0405'   #  0xA5 -> CYRILLIC CAPITAL LETTER DZE
+    '\u0406'   #  0xA6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0407'   #  0xA7 -> CYRILLIC CAPITAL LETTER YI
+    '\u0408'   #  0xA8 -> CYRILLIC CAPITAL LETTER JE
+    '\u0409'   #  0xA9 -> CYRILLIC CAPITAL LETTER LJE
+    '\u040a'   #  0xAA -> CYRILLIC CAPITAL LETTER NJE
+    '\u040b'   #  0xAB -> CYRILLIC CAPITAL LETTER TSHE
+    '\u040c'   #  0xAC -> CYRILLIC CAPITAL LETTER KJE
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\u040e'   #  0xAE -> CYRILLIC CAPITAL LETTER SHORT U
+    '\u040f'   #  0xAF -> CYRILLIC CAPITAL LETTER DZHE
+    '\u0410'   #  0xB0 -> CYRILLIC CAPITAL LETTER A
+    '\u0411'   #  0xB1 -> CYRILLIC CAPITAL LETTER BE
+    '\u0412'   #  0xB2 -> CYRILLIC CAPITAL LETTER VE
+    '\u0413'   #  0xB3 -> CYRILLIC CAPITAL LETTER GHE
+    '\u0414'   #  0xB4 -> CYRILLIC CAPITAL LETTER DE
+    '\u0415'   #  0xB5 -> CYRILLIC CAPITAL LETTER IE
+    '\u0416'   #  0xB6 -> CYRILLIC CAPITAL LETTER ZHE
+    '\u0417'   #  0xB7 -> CYRILLIC CAPITAL LETTER ZE
+    '\u0418'   #  0xB8 -> CYRILLIC CAPITAL LETTER I
+    '\u0419'   #  0xB9 -> CYRILLIC CAPITAL LETTER SHORT I
+    '\u041a'   #  0xBA -> CYRILLIC CAPITAL LETTER KA
+    '\u041b'   #  0xBB -> CYRILLIC CAPITAL LETTER EL
+    '\u041c'   #  0xBC -> CYRILLIC CAPITAL LETTER EM
+    '\u041d'   #  0xBD -> CYRILLIC CAPITAL LETTER EN
+    '\u041e'   #  0xBE -> CYRILLIC CAPITAL LETTER O
+    '\u041f'   #  0xBF -> CYRILLIC CAPITAL LETTER PE
+    '\u0420'   #  0xC0 -> CYRILLIC CAPITAL LETTER ER
+    '\u0421'   #  0xC1 -> CYRILLIC CAPITAL LETTER ES
+    '\u0422'   #  0xC2 -> CYRILLIC CAPITAL LETTER TE
+    '\u0423'   #  0xC3 -> CYRILLIC CAPITAL LETTER U
+    '\u0424'   #  0xC4 -> CYRILLIC CAPITAL LETTER EF
+    '\u0425'   #  0xC5 -> CYRILLIC CAPITAL LETTER HA
+    '\u0426'   #  0xC6 -> CYRILLIC CAPITAL LETTER TSE
+    '\u0427'   #  0xC7 -> CYRILLIC CAPITAL LETTER CHE
+    '\u0428'   #  0xC8 -> CYRILLIC CAPITAL LETTER SHA
+    '\u0429'   #  0xC9 -> CYRILLIC CAPITAL LETTER SHCHA
+    '\u042a'   #  0xCA -> CYRILLIC CAPITAL LETTER HARD SIGN
+    '\u042b'   #  0xCB -> CYRILLIC CAPITAL LETTER YERU
+    '\u042c'   #  0xCC -> CYRILLIC CAPITAL LETTER SOFT SIGN
+    '\u042d'   #  0xCD -> CYRILLIC CAPITAL LETTER E
+    '\u042e'   #  0xCE -> CYRILLIC CAPITAL LETTER YU
+    '\u042f'   #  0xCF -> CYRILLIC CAPITAL LETTER YA
+    '\u0430'   #  0xD0 -> CYRILLIC SMALL LETTER A
+    '\u0431'   #  0xD1 -> CYRILLIC SMALL LETTER BE
+    '\u0432'   #  0xD2 -> CYRILLIC SMALL LETTER VE
+    '\u0433'   #  0xD3 -> CYRILLIC SMALL LETTER GHE
+    '\u0434'   #  0xD4 -> CYRILLIC SMALL LETTER DE
+    '\u0435'   #  0xD5 -> CYRILLIC SMALL LETTER IE
+    '\u0436'   #  0xD6 -> CYRILLIC SMALL LETTER ZHE
+    '\u0437'   #  0xD7 -> CYRILLIC SMALL LETTER ZE
+    '\u0438'   #  0xD8 -> CYRILLIC SMALL LETTER I
+    '\u0439'   #  0xD9 -> CYRILLIC SMALL LETTER SHORT I
+    '\u043a'   #  0xDA -> CYRILLIC SMALL LETTER KA
+    '\u043b'   #  0xDB -> CYRILLIC SMALL LETTER EL
+    '\u043c'   #  0xDC -> CYRILLIC SMALL LETTER EM
+    '\u043d'   #  0xDD -> CYRILLIC SMALL LETTER EN
+    '\u043e'   #  0xDE -> CYRILLIC SMALL LETTER O
+    '\u043f'   #  0xDF -> CYRILLIC SMALL LETTER PE
+    '\u0440'   #  0xE0 -> CYRILLIC SMALL LETTER ER
+    '\u0441'   #  0xE1 -> CYRILLIC SMALL LETTER ES
+    '\u0442'   #  0xE2 -> CYRILLIC SMALL LETTER TE
+    '\u0443'   #  0xE3 -> CYRILLIC SMALL LETTER U
+    '\u0444'   #  0xE4 -> CYRILLIC SMALL LETTER EF
+    '\u0445'   #  0xE5 -> CYRILLIC SMALL LETTER HA
+    '\u0446'   #  0xE6 -> CYRILLIC SMALL LETTER TSE
+    '\u0447'   #  0xE7 -> CYRILLIC SMALL LETTER CHE
+    '\u0448'   #  0xE8 -> CYRILLIC SMALL LETTER SHA
+    '\u0449'   #  0xE9 -> CYRILLIC SMALL LETTER SHCHA
+    '\u044a'   #  0xEA -> CYRILLIC SMALL LETTER HARD SIGN
+    '\u044b'   #  0xEB -> CYRILLIC SMALL LETTER YERU
+    '\u044c'   #  0xEC -> CYRILLIC SMALL LETTER SOFT SIGN
+    '\u044d'   #  0xED -> CYRILLIC SMALL LETTER E
+    '\u044e'   #  0xEE -> CYRILLIC SMALL LETTER YU
+    '\u044f'   #  0xEF -> CYRILLIC SMALL LETTER YA
+    '\u2116'   #  0xF0 -> NUMERO SIGN
+    '\u0451'   #  0xF1 -> CYRILLIC SMALL LETTER IO
+    '\u0452'   #  0xF2 -> CYRILLIC SMALL LETTER DJE
+    '\u0453'   #  0xF3 -> CYRILLIC SMALL LETTER GJE
+    '\u0454'   #  0xF4 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+    '\u0455'   #  0xF5 -> CYRILLIC SMALL LETTER DZE
+    '\u0456'   #  0xF6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0457'   #  0xF7 -> CYRILLIC SMALL LETTER YI
+    '\u0458'   #  0xF8 -> CYRILLIC SMALL LETTER JE
+    '\u0459'   #  0xF9 -> CYRILLIC SMALL LETTER LJE
+    '\u045a'   #  0xFA -> CYRILLIC SMALL LETTER NJE
+    '\u045b'   #  0xFB -> CYRILLIC SMALL LETTER TSHE
+    '\u045c'   #  0xFC -> CYRILLIC SMALL LETTER KJE
+    '\xa7'     #  0xFD -> SECTION SIGN
+    '\u045e'   #  0xFE -> CYRILLIC SMALL LETTER SHORT U
+    '\u045f'   #  0xFF -> CYRILLIC SMALL LETTER DZHE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_6.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_6.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_6.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\u060c'   #  0xAC -> ARABIC COMMA
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\u061b'   #  0xBB -> ARABIC SEMICOLON
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\u061f'   #  0xBF -> ARABIC QUESTION MARK
-    u'\ufffe'
-    u'\u0621'   #  0xC1 -> ARABIC LETTER HAMZA
-    u'\u0622'   #  0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
-    u'\u0623'   #  0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
-    u'\u0624'   #  0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
-    u'\u0625'   #  0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
-    u'\u0626'   #  0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
-    u'\u0627'   #  0xC7 -> ARABIC LETTER ALEF
-    u'\u0628'   #  0xC8 -> ARABIC LETTER BEH
-    u'\u0629'   #  0xC9 -> ARABIC LETTER TEH MARBUTA
-    u'\u062a'   #  0xCA -> ARABIC LETTER TEH
-    u'\u062b'   #  0xCB -> ARABIC LETTER THEH
-    u'\u062c'   #  0xCC -> ARABIC LETTER JEEM
-    u'\u062d'   #  0xCD -> ARABIC LETTER HAH
-    u'\u062e'   #  0xCE -> ARABIC LETTER KHAH
-    u'\u062f'   #  0xCF -> ARABIC LETTER DAL
-    u'\u0630'   #  0xD0 -> ARABIC LETTER THAL
-    u'\u0631'   #  0xD1 -> ARABIC LETTER REH
-    u'\u0632'   #  0xD2 -> ARABIC LETTER ZAIN
-    u'\u0633'   #  0xD3 -> ARABIC LETTER SEEN
-    u'\u0634'   #  0xD4 -> ARABIC LETTER SHEEN
-    u'\u0635'   #  0xD5 -> ARABIC LETTER SAD
-    u'\u0636'   #  0xD6 -> ARABIC LETTER DAD
-    u'\u0637'   #  0xD7 -> ARABIC LETTER TAH
-    u'\u0638'   #  0xD8 -> ARABIC LETTER ZAH
-    u'\u0639'   #  0xD9 -> ARABIC LETTER AIN
-    u'\u063a'   #  0xDA -> ARABIC LETTER GHAIN
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\u0640'   #  0xE0 -> ARABIC TATWEEL
-    u'\u0641'   #  0xE1 -> ARABIC LETTER FEH
-    u'\u0642'   #  0xE2 -> ARABIC LETTER QAF
-    u'\u0643'   #  0xE3 -> ARABIC LETTER KAF
-    u'\u0644'   #  0xE4 -> ARABIC LETTER LAM
-    u'\u0645'   #  0xE5 -> ARABIC LETTER MEEM
-    u'\u0646'   #  0xE6 -> ARABIC LETTER NOON
-    u'\u0647'   #  0xE7 -> ARABIC LETTER HEH
-    u'\u0648'   #  0xE8 -> ARABIC LETTER WAW
-    u'\u0649'   #  0xE9 -> ARABIC LETTER ALEF MAKSURA
-    u'\u064a'   #  0xEA -> ARABIC LETTER YEH
-    u'\u064b'   #  0xEB -> ARABIC FATHATAN
-    u'\u064c'   #  0xEC -> ARABIC DAMMATAN
-    u'\u064d'   #  0xED -> ARABIC KASRATAN
-    u'\u064e'   #  0xEE -> ARABIC FATHA
-    u'\u064f'   #  0xEF -> ARABIC DAMMA
-    u'\u0650'   #  0xF0 -> ARABIC KASRA
-    u'\u0651'   #  0xF1 -> ARABIC SHADDA
-    u'\u0652'   #  0xF2 -> ARABIC SUKUN
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\u060c'   #  0xAC -> ARABIC COMMA
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\u061b'   #  0xBB -> ARABIC SEMICOLON
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\u061f'   #  0xBF -> ARABIC QUESTION MARK
+    '\ufffe'
+    '\u0621'   #  0xC1 -> ARABIC LETTER HAMZA
+    '\u0622'   #  0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+    '\u0623'   #  0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+    '\u0624'   #  0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
+    '\u0625'   #  0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
+    '\u0626'   #  0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
+    '\u0627'   #  0xC7 -> ARABIC LETTER ALEF
+    '\u0628'   #  0xC8 -> ARABIC LETTER BEH
+    '\u0629'   #  0xC9 -> ARABIC LETTER TEH MARBUTA
+    '\u062a'   #  0xCA -> ARABIC LETTER TEH
+    '\u062b'   #  0xCB -> ARABIC LETTER THEH
+    '\u062c'   #  0xCC -> ARABIC LETTER JEEM
+    '\u062d'   #  0xCD -> ARABIC LETTER HAH
+    '\u062e'   #  0xCE -> ARABIC LETTER KHAH
+    '\u062f'   #  0xCF -> ARABIC LETTER DAL
+    '\u0630'   #  0xD0 -> ARABIC LETTER THAL
+    '\u0631'   #  0xD1 -> ARABIC LETTER REH
+    '\u0632'   #  0xD2 -> ARABIC LETTER ZAIN
+    '\u0633'   #  0xD3 -> ARABIC LETTER SEEN
+    '\u0634'   #  0xD4 -> ARABIC LETTER SHEEN
+    '\u0635'   #  0xD5 -> ARABIC LETTER SAD
+    '\u0636'   #  0xD6 -> ARABIC LETTER DAD
+    '\u0637'   #  0xD7 -> ARABIC LETTER TAH
+    '\u0638'   #  0xD8 -> ARABIC LETTER ZAH
+    '\u0639'   #  0xD9 -> ARABIC LETTER AIN
+    '\u063a'   #  0xDA -> ARABIC LETTER GHAIN
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\u0640'   #  0xE0 -> ARABIC TATWEEL
+    '\u0641'   #  0xE1 -> ARABIC LETTER FEH
+    '\u0642'   #  0xE2 -> ARABIC LETTER QAF
+    '\u0643'   #  0xE3 -> ARABIC LETTER KAF
+    '\u0644'   #  0xE4 -> ARABIC LETTER LAM
+    '\u0645'   #  0xE5 -> ARABIC LETTER MEEM
+    '\u0646'   #  0xE6 -> ARABIC LETTER NOON
+    '\u0647'   #  0xE7 -> ARABIC LETTER HEH
+    '\u0648'   #  0xE8 -> ARABIC LETTER WAW
+    '\u0649'   #  0xE9 -> ARABIC LETTER ALEF MAKSURA
+    '\u064a'   #  0xEA -> ARABIC LETTER YEH
+    '\u064b'   #  0xEB -> ARABIC FATHATAN
+    '\u064c'   #  0xEC -> ARABIC DAMMATAN
+    '\u064d'   #  0xED -> ARABIC KASRATAN
+    '\u064e'   #  0xEE -> ARABIC FATHA
+    '\u064f'   #  0xEF -> ARABIC DAMMA
+    '\u0650'   #  0xF0 -> ARABIC KASRA
+    '\u0651'   #  0xF1 -> ARABIC SHADDA
+    '\u0652'   #  0xF2 -> ARABIC SUKUN
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_7.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_7.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_7.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\u2018'   #  0xA1 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xA2 -> RIGHT SINGLE QUOTATION MARK
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\u20ac'   #  0xA4 -> EURO SIGN
-    u'\u20af'   #  0xA5 -> DRACHMA SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u037a'   #  0xAA -> GREEK YPOGEGRAMMENI
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\ufffe'
-    u'\u2015'   #  0xAF -> HORIZONTAL BAR
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\u0384'   #  0xB4 -> GREEK TONOS
-    u'\u0385'   #  0xB5 -> GREEK DIALYTIKA TONOS
-    u'\u0386'   #  0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\u0388'   #  0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
-    u'\u0389'   #  0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS
-    u'\u038a'   #  0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u038c'   #  0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\u038e'   #  0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS
-    u'\u038f'   #  0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
-    u'\u0390'   #  0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
-    u'\u0391'   #  0xC1 -> GREEK CAPITAL LETTER ALPHA
-    u'\u0392'   #  0xC2 -> GREEK CAPITAL LETTER BETA
-    u'\u0393'   #  0xC3 -> GREEK CAPITAL LETTER GAMMA
-    u'\u0394'   #  0xC4 -> GREEK CAPITAL LETTER DELTA
-    u'\u0395'   #  0xC5 -> GREEK CAPITAL LETTER EPSILON
-    u'\u0396'   #  0xC6 -> GREEK CAPITAL LETTER ZETA
-    u'\u0397'   #  0xC7 -> GREEK CAPITAL LETTER ETA
-    u'\u0398'   #  0xC8 -> GREEK CAPITAL LETTER THETA
-    u'\u0399'   #  0xC9 -> GREEK CAPITAL LETTER IOTA
-    u'\u039a'   #  0xCA -> GREEK CAPITAL LETTER KAPPA
-    u'\u039b'   #  0xCB -> GREEK CAPITAL LETTER LAMDA
-    u'\u039c'   #  0xCC -> GREEK CAPITAL LETTER MU
-    u'\u039d'   #  0xCD -> GREEK CAPITAL LETTER NU
-    u'\u039e'   #  0xCE -> GREEK CAPITAL LETTER XI
-    u'\u039f'   #  0xCF -> GREEK CAPITAL LETTER OMICRON
-    u'\u03a0'   #  0xD0 -> GREEK CAPITAL LETTER PI
-    u'\u03a1'   #  0xD1 -> GREEK CAPITAL LETTER RHO
-    u'\ufffe'
-    u'\u03a3'   #  0xD3 -> GREEK CAPITAL LETTER SIGMA
-    u'\u03a4'   #  0xD4 -> GREEK CAPITAL LETTER TAU
-    u'\u03a5'   #  0xD5 -> GREEK CAPITAL LETTER UPSILON
-    u'\u03a6'   #  0xD6 -> GREEK CAPITAL LETTER PHI
-    u'\u03a7'   #  0xD7 -> GREEK CAPITAL LETTER CHI
-    u'\u03a8'   #  0xD8 -> GREEK CAPITAL LETTER PSI
-    u'\u03a9'   #  0xD9 -> GREEK CAPITAL LETTER OMEGA
-    u'\u03aa'   #  0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
-    u'\u03ab'   #  0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-    u'\u03ac'   #  0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS
-    u'\u03ad'   #  0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS
-    u'\u03ae'   #  0xDE -> GREEK SMALL LETTER ETA WITH TONOS
-    u'\u03af'   #  0xDF -> GREEK SMALL LETTER IOTA WITH TONOS
-    u'\u03b0'   #  0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
-    u'\u03b1'   #  0xE1 -> GREEK SMALL LETTER ALPHA
-    u'\u03b2'   #  0xE2 -> GREEK SMALL LETTER BETA
-    u'\u03b3'   #  0xE3 -> GREEK SMALL LETTER GAMMA
-    u'\u03b4'   #  0xE4 -> GREEK SMALL LETTER DELTA
-    u'\u03b5'   #  0xE5 -> GREEK SMALL LETTER EPSILON
-    u'\u03b6'   #  0xE6 -> GREEK SMALL LETTER ZETA
-    u'\u03b7'   #  0xE7 -> GREEK SMALL LETTER ETA
-    u'\u03b8'   #  0xE8 -> GREEK SMALL LETTER THETA
-    u'\u03b9'   #  0xE9 -> GREEK SMALL LETTER IOTA
-    u'\u03ba'   #  0xEA -> GREEK SMALL LETTER KAPPA
-    u'\u03bb'   #  0xEB -> GREEK SMALL LETTER LAMDA
-    u'\u03bc'   #  0xEC -> GREEK SMALL LETTER MU
-    u'\u03bd'   #  0xED -> GREEK SMALL LETTER NU
-    u'\u03be'   #  0xEE -> GREEK SMALL LETTER XI
-    u'\u03bf'   #  0xEF -> GREEK SMALL LETTER OMICRON
-    u'\u03c0'   #  0xF0 -> GREEK SMALL LETTER PI
-    u'\u03c1'   #  0xF1 -> GREEK SMALL LETTER RHO
-    u'\u03c2'   #  0xF2 -> GREEK SMALL LETTER FINAL SIGMA
-    u'\u03c3'   #  0xF3 -> GREEK SMALL LETTER SIGMA
-    u'\u03c4'   #  0xF4 -> GREEK SMALL LETTER TAU
-    u'\u03c5'   #  0xF5 -> GREEK SMALL LETTER UPSILON
-    u'\u03c6'   #  0xF6 -> GREEK SMALL LETTER PHI
-    u'\u03c7'   #  0xF7 -> GREEK SMALL LETTER CHI
-    u'\u03c8'   #  0xF8 -> GREEK SMALL LETTER PSI
-    u'\u03c9'   #  0xF9 -> GREEK SMALL LETTER OMEGA
-    u'\u03ca'   #  0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
-    u'\u03cb'   #  0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
-    u'\u03cc'   #  0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS
-    u'\u03cd'   #  0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS
-    u'\u03ce'   #  0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS
-    u'\ufffe'
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\u2018'   #  0xA1 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xA2 -> RIGHT SINGLE QUOTATION MARK
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\u20ac'   #  0xA4 -> EURO SIGN
+    '\u20af'   #  0xA5 -> DRACHMA SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u037a'   #  0xAA -> GREEK YPOGEGRAMMENI
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\ufffe'
+    '\u2015'   #  0xAF -> HORIZONTAL BAR
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\u0384'   #  0xB4 -> GREEK TONOS
+    '\u0385'   #  0xB5 -> GREEK DIALYTIKA TONOS
+    '\u0386'   #  0xB6 -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\u0388'   #  0xB8 -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+    '\u0389'   #  0xB9 -> GREEK CAPITAL LETTER ETA WITH TONOS
+    '\u038a'   #  0xBA -> GREEK CAPITAL LETTER IOTA WITH TONOS
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u038c'   #  0xBC -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\u038e'   #  0xBE -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+    '\u038f'   #  0xBF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+    '\u0390'   #  0xC0 -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+    '\u0391'   #  0xC1 -> GREEK CAPITAL LETTER ALPHA
+    '\u0392'   #  0xC2 -> GREEK CAPITAL LETTER BETA
+    '\u0393'   #  0xC3 -> GREEK CAPITAL LETTER GAMMA
+    '\u0394'   #  0xC4 -> GREEK CAPITAL LETTER DELTA
+    '\u0395'   #  0xC5 -> GREEK CAPITAL LETTER EPSILON
+    '\u0396'   #  0xC6 -> GREEK CAPITAL LETTER ZETA
+    '\u0397'   #  0xC7 -> GREEK CAPITAL LETTER ETA
+    '\u0398'   #  0xC8 -> GREEK CAPITAL LETTER THETA
+    '\u0399'   #  0xC9 -> GREEK CAPITAL LETTER IOTA
+    '\u039a'   #  0xCA -> GREEK CAPITAL LETTER KAPPA
+    '\u039b'   #  0xCB -> GREEK CAPITAL LETTER LAMDA
+    '\u039c'   #  0xCC -> GREEK CAPITAL LETTER MU
+    '\u039d'   #  0xCD -> GREEK CAPITAL LETTER NU
+    '\u039e'   #  0xCE -> GREEK CAPITAL LETTER XI
+    '\u039f'   #  0xCF -> GREEK CAPITAL LETTER OMICRON
+    '\u03a0'   #  0xD0 -> GREEK CAPITAL LETTER PI
+    '\u03a1'   #  0xD1 -> GREEK CAPITAL LETTER RHO
+    '\ufffe'
+    '\u03a3'   #  0xD3 -> GREEK CAPITAL LETTER SIGMA
+    '\u03a4'   #  0xD4 -> GREEK CAPITAL LETTER TAU
+    '\u03a5'   #  0xD5 -> GREEK CAPITAL LETTER UPSILON
+    '\u03a6'   #  0xD6 -> GREEK CAPITAL LETTER PHI
+    '\u03a7'   #  0xD7 -> GREEK CAPITAL LETTER CHI
+    '\u03a8'   #  0xD8 -> GREEK CAPITAL LETTER PSI
+    '\u03a9'   #  0xD9 -> GREEK CAPITAL LETTER OMEGA
+    '\u03aa'   #  0xDA -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+    '\u03ab'   #  0xDB -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+    '\u03ac'   #  0xDC -> GREEK SMALL LETTER ALPHA WITH TONOS
+    '\u03ad'   #  0xDD -> GREEK SMALL LETTER EPSILON WITH TONOS
+    '\u03ae'   #  0xDE -> GREEK SMALL LETTER ETA WITH TONOS
+    '\u03af'   #  0xDF -> GREEK SMALL LETTER IOTA WITH TONOS
+    '\u03b0'   #  0xE0 -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+    '\u03b1'   #  0xE1 -> GREEK SMALL LETTER ALPHA
+    '\u03b2'   #  0xE2 -> GREEK SMALL LETTER BETA
+    '\u03b3'   #  0xE3 -> GREEK SMALL LETTER GAMMA
+    '\u03b4'   #  0xE4 -> GREEK SMALL LETTER DELTA
+    '\u03b5'   #  0xE5 -> GREEK SMALL LETTER EPSILON
+    '\u03b6'   #  0xE6 -> GREEK SMALL LETTER ZETA
+    '\u03b7'   #  0xE7 -> GREEK SMALL LETTER ETA
+    '\u03b8'   #  0xE8 -> GREEK SMALL LETTER THETA
+    '\u03b9'   #  0xE9 -> GREEK SMALL LETTER IOTA
+    '\u03ba'   #  0xEA -> GREEK SMALL LETTER KAPPA
+    '\u03bb'   #  0xEB -> GREEK SMALL LETTER LAMDA
+    '\u03bc'   #  0xEC -> GREEK SMALL LETTER MU
+    '\u03bd'   #  0xED -> GREEK SMALL LETTER NU
+    '\u03be'   #  0xEE -> GREEK SMALL LETTER XI
+    '\u03bf'   #  0xEF -> GREEK SMALL LETTER OMICRON
+    '\u03c0'   #  0xF0 -> GREEK SMALL LETTER PI
+    '\u03c1'   #  0xF1 -> GREEK SMALL LETTER RHO
+    '\u03c2'   #  0xF2 -> GREEK SMALL LETTER FINAL SIGMA
+    '\u03c3'   #  0xF3 -> GREEK SMALL LETTER SIGMA
+    '\u03c4'   #  0xF4 -> GREEK SMALL LETTER TAU
+    '\u03c5'   #  0xF5 -> GREEK SMALL LETTER UPSILON
+    '\u03c6'   #  0xF6 -> GREEK SMALL LETTER PHI
+    '\u03c7'   #  0xF7 -> GREEK SMALL LETTER CHI
+    '\u03c8'   #  0xF8 -> GREEK SMALL LETTER PSI
+    '\u03c9'   #  0xF9 -> GREEK SMALL LETTER OMEGA
+    '\u03ca'   #  0xFA -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+    '\u03cb'   #  0xFB -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+    '\u03cc'   #  0xFC -> GREEK SMALL LETTER OMICRON WITH TONOS
+    '\u03cd'   #  0xFD -> GREEK SMALL LETTER UPSILON WITH TONOS
+    '\u03ce'   #  0xFE -> GREEK SMALL LETTER OMEGA WITH TONOS
+    '\ufffe'
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_8.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_8.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_8.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\ufffe'
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xd7'     #  0xAA -> MULTIPLICATION SIGN
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xf7'     #  0xBA -> DIVISION SIGN
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\u2017'   #  0xDF -> DOUBLE LOW LINE
-    u'\u05d0'   #  0xE0 -> HEBREW LETTER ALEF
-    u'\u05d1'   #  0xE1 -> HEBREW LETTER BET
-    u'\u05d2'   #  0xE2 -> HEBREW LETTER GIMEL
-    u'\u05d3'   #  0xE3 -> HEBREW LETTER DALET
-    u'\u05d4'   #  0xE4 -> HEBREW LETTER HE
-    u'\u05d5'   #  0xE5 -> HEBREW LETTER VAV
-    u'\u05d6'   #  0xE6 -> HEBREW LETTER ZAYIN
-    u'\u05d7'   #  0xE7 -> HEBREW LETTER HET
-    u'\u05d8'   #  0xE8 -> HEBREW LETTER TET
-    u'\u05d9'   #  0xE9 -> HEBREW LETTER YOD
-    u'\u05da'   #  0xEA -> HEBREW LETTER FINAL KAF
-    u'\u05db'   #  0xEB -> HEBREW LETTER KAF
-    u'\u05dc'   #  0xEC -> HEBREW LETTER LAMED
-    u'\u05dd'   #  0xED -> HEBREW LETTER FINAL MEM
-    u'\u05de'   #  0xEE -> HEBREW LETTER MEM
-    u'\u05df'   #  0xEF -> HEBREW LETTER FINAL NUN
-    u'\u05e0'   #  0xF0 -> HEBREW LETTER NUN
-    u'\u05e1'   #  0xF1 -> HEBREW LETTER SAMEKH
-    u'\u05e2'   #  0xF2 -> HEBREW LETTER AYIN
-    u'\u05e3'   #  0xF3 -> HEBREW LETTER FINAL PE
-    u'\u05e4'   #  0xF4 -> HEBREW LETTER PE
-    u'\u05e5'   #  0xF5 -> HEBREW LETTER FINAL TSADI
-    u'\u05e6'   #  0xF6 -> HEBREW LETTER TSADI
-    u'\u05e7'   #  0xF7 -> HEBREW LETTER QOF
-    u'\u05e8'   #  0xF8 -> HEBREW LETTER RESH
-    u'\u05e9'   #  0xF9 -> HEBREW LETTER SHIN
-    u'\u05ea'   #  0xFA -> HEBREW LETTER TAV
-    u'\ufffe'
-    u'\ufffe'
-    u'\u200e'   #  0xFD -> LEFT-TO-RIGHT MARK
-    u'\u200f'   #  0xFE -> RIGHT-TO-LEFT MARK
-    u'\ufffe'
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\ufffe'
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xd7'     #  0xAA -> MULTIPLICATION SIGN
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xf7'     #  0xBA -> DIVISION SIGN
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\u2017'   #  0xDF -> DOUBLE LOW LINE
+    '\u05d0'   #  0xE0 -> HEBREW LETTER ALEF
+    '\u05d1'   #  0xE1 -> HEBREW LETTER BET
+    '\u05d2'   #  0xE2 -> HEBREW LETTER GIMEL
+    '\u05d3'   #  0xE3 -> HEBREW LETTER DALET
+    '\u05d4'   #  0xE4 -> HEBREW LETTER HE
+    '\u05d5'   #  0xE5 -> HEBREW LETTER VAV
+    '\u05d6'   #  0xE6 -> HEBREW LETTER ZAYIN
+    '\u05d7'   #  0xE7 -> HEBREW LETTER HET
+    '\u05d8'   #  0xE8 -> HEBREW LETTER TET
+    '\u05d9'   #  0xE9 -> HEBREW LETTER YOD
+    '\u05da'   #  0xEA -> HEBREW LETTER FINAL KAF
+    '\u05db'   #  0xEB -> HEBREW LETTER KAF
+    '\u05dc'   #  0xEC -> HEBREW LETTER LAMED
+    '\u05dd'   #  0xED -> HEBREW LETTER FINAL MEM
+    '\u05de'   #  0xEE -> HEBREW LETTER MEM
+    '\u05df'   #  0xEF -> HEBREW LETTER FINAL NUN
+    '\u05e0'   #  0xF0 -> HEBREW LETTER NUN
+    '\u05e1'   #  0xF1 -> HEBREW LETTER SAMEKH
+    '\u05e2'   #  0xF2 -> HEBREW LETTER AYIN
+    '\u05e3'   #  0xF3 -> HEBREW LETTER FINAL PE
+    '\u05e4'   #  0xF4 -> HEBREW LETTER PE
+    '\u05e5'   #  0xF5 -> HEBREW LETTER FINAL TSADI
+    '\u05e6'   #  0xF6 -> HEBREW LETTER TSADI
+    '\u05e7'   #  0xF7 -> HEBREW LETTER QOF
+    '\u05e8'   #  0xF8 -> HEBREW LETTER RESH
+    '\u05e9'   #  0xF9 -> HEBREW LETTER SHIN
+    '\u05ea'   #  0xFA -> HEBREW LETTER TAV
+    '\ufffe'
+    '\ufffe'
+    '\u200e'   #  0xFD -> LEFT-TO-RIGHT MARK
+    '\u200f'   #  0xFE -> RIGHT-TO-LEFT MARK
+    '\ufffe'
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/iso8859_9.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/iso8859_9.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/iso8859_9.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\xa0'     #  0xA0 -> NO-BREAK SPACE
-    u'\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa4'     #  0xA4 -> CURRENCY SIGN
-    u'\xa5'     #  0xA5 -> YEN SIGN
-    u'\xa6'     #  0xA6 -> BROKEN BAR
-    u'\xa7'     #  0xA7 -> SECTION SIGN
-    u'\xa8'     #  0xA8 -> DIAERESIS
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
-    u'\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xac'     #  0xAC -> NOT SIGN
-    u'\xad'     #  0xAD -> SOFT HYPHEN
-    u'\xae'     #  0xAE -> REGISTERED SIGN
-    u'\xaf'     #  0xAF -> MACRON
-    u'\xb0'     #  0xB0 -> DEGREE SIGN
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\xb2'     #  0xB2 -> SUPERSCRIPT TWO
-    u'\xb3'     #  0xB3 -> SUPERSCRIPT THREE
-    u'\xb4'     #  0xB4 -> ACUTE ACCENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\xb6'     #  0xB6 -> PILCROW SIGN
-    u'\xb7'     #  0xB7 -> MIDDLE DOT
-    u'\xb8'     #  0xB8 -> CEDILLA
-    u'\xb9'     #  0xB9 -> SUPERSCRIPT ONE
-    u'\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
-    u'\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
-    u'\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
-    u'\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
-    u'\xbf'     #  0xBF -> INVERTED QUESTION MARK
-    u'\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
-    u'\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\u011e'   #  0xD0 -> LATIN CAPITAL LETTER G WITH BREVE
-    u'\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xd7'     #  0xD7 -> MULTIPLICATION SIGN
-    u'\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u0130'   #  0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE
-    u'\u015e'   #  0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
-    u'\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
-    u'\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\u011f'   #  0xF0 -> LATIN SMALL LETTER G WITH BREVE
-    u'\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
-    u'\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0xF7 -> DIVISION SIGN
-    u'\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
-    u'\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u0131'   #  0xFD -> LATIN SMALL LETTER DOTLESS I
-    u'\u015f'   #  0xFE -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\xa0'     #  0xA0 -> NO-BREAK SPACE
+    '\xa1'     #  0xA1 -> INVERTED EXCLAMATION MARK
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa4'     #  0xA4 -> CURRENCY SIGN
+    '\xa5'     #  0xA5 -> YEN SIGN
+    '\xa6'     #  0xA6 -> BROKEN BAR
+    '\xa7'     #  0xA7 -> SECTION SIGN
+    '\xa8'     #  0xA8 -> DIAERESIS
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\xaa'     #  0xAA -> FEMININE ORDINAL INDICATOR
+    '\xab'     #  0xAB -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xac'     #  0xAC -> NOT SIGN
+    '\xad'     #  0xAD -> SOFT HYPHEN
+    '\xae'     #  0xAE -> REGISTERED SIGN
+    '\xaf'     #  0xAF -> MACRON
+    '\xb0'     #  0xB0 -> DEGREE SIGN
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\xb2'     #  0xB2 -> SUPERSCRIPT TWO
+    '\xb3'     #  0xB3 -> SUPERSCRIPT THREE
+    '\xb4'     #  0xB4 -> ACUTE ACCENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\xb6'     #  0xB6 -> PILCROW SIGN
+    '\xb7'     #  0xB7 -> MIDDLE DOT
+    '\xb8'     #  0xB8 -> CEDILLA
+    '\xb9'     #  0xB9 -> SUPERSCRIPT ONE
+    '\xba'     #  0xBA -> MASCULINE ORDINAL INDICATOR
+    '\xbb'     #  0xBB -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbc'     #  0xBC -> VULGAR FRACTION ONE QUARTER
+    '\xbd'     #  0xBD -> VULGAR FRACTION ONE HALF
+    '\xbe'     #  0xBE -> VULGAR FRACTION THREE QUARTERS
+    '\xbf'     #  0xBF -> INVERTED QUESTION MARK
+    '\xc0'     #  0xC0 -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc1'     #  0xC1 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xc2'     #  0xC2 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xc3'     #  0xC3 -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xc4'     #  0xC4 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0xC5 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc6'     #  0xC6 -> LATIN CAPITAL LETTER AE
+    '\xc7'     #  0xC7 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc8'     #  0xC8 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xc9'     #  0xC9 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xca'     #  0xCA -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xcb'     #  0xCB -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xcc'     #  0xCC -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xcd'     #  0xCD -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xCE -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xCF -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\u011e'   #  0xD0 -> LATIN CAPITAL LETTER G WITH BREVE
+    '\xd1'     #  0xD1 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd2'     #  0xD2 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xd3'     #  0xD3 -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xD4 -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\xd5'     #  0xD5 -> LATIN CAPITAL LETTER O WITH TILDE
+    '\xd6'     #  0xD6 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xd7'     #  0xD7 -> MULTIPLICATION SIGN
+    '\xd8'     #  0xD8 -> LATIN CAPITAL LETTER O WITH STROKE
+    '\xd9'     #  0xD9 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\xda'     #  0xDA -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xDB -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xdc'     #  0xDC -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u0130'   #  0xDD -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+    '\u015e'   #  0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\xdf'     #  0xDF -> LATIN SMALL LETTER SHARP S
+    '\xe0'     #  0xE0 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe1'     #  0xE1 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe2'     #  0xE2 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe3'     #  0xE3 -> LATIN SMALL LETTER A WITH TILDE
+    '\xe4'     #  0xE4 -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe5'     #  0xE5 -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe6'     #  0xE6 -> LATIN SMALL LETTER AE
+    '\xe7'     #  0xE7 -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe8'     #  0xE8 -> LATIN SMALL LETTER E WITH GRAVE
+    '\xe9'     #  0xE9 -> LATIN SMALL LETTER E WITH ACUTE
+    '\xea'     #  0xEA -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0xEB -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xec'     #  0xEC -> LATIN SMALL LETTER I WITH GRAVE
+    '\xed'     #  0xED -> LATIN SMALL LETTER I WITH ACUTE
+    '\xee'     #  0xEE -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0xEF -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\u011f'   #  0xF0 -> LATIN SMALL LETTER G WITH BREVE
+    '\xf1'     #  0xF1 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf2'     #  0xF2 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf3'     #  0xF3 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf4'     #  0xF4 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf5'     #  0xF5 -> LATIN SMALL LETTER O WITH TILDE
+    '\xf6'     #  0xF6 -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0xF7 -> DIVISION SIGN
+    '\xf8'     #  0xF8 -> LATIN SMALL LETTER O WITH STROKE
+    '\xf9'     #  0xF9 -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfa'     #  0xFA -> LATIN SMALL LETTER U WITH ACUTE
+    '\xfb'     #  0xFB -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0xFC -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u0131'   #  0xFD -> LATIN SMALL LETTER DOTLESS I
+    '\u015f'   #  0xFE -> LATIN SMALL LETTER S WITH CEDILLA
+    '\xff'     #  0xFF -> LATIN SMALL LETTER Y WITH DIAERESIS
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/koi8_r.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/koi8_r.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/koi8_r.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u2500'   #  0x80 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u2502'   #  0x81 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u250c'   #  0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2510'   #  0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2518'   #  0x85 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u251c'   #  0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2524'   #  0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u252c'   #  0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u2534'   #  0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u253c'   #  0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u2580'   #  0x8B -> UPPER HALF BLOCK
-    u'\u2584'   #  0x8C -> LOWER HALF BLOCK
-    u'\u2588'   #  0x8D -> FULL BLOCK
-    u'\u258c'   #  0x8E -> LEFT HALF BLOCK
-    u'\u2590'   #  0x8F -> RIGHT HALF BLOCK
-    u'\u2591'   #  0x90 -> LIGHT SHADE
-    u'\u2592'   #  0x91 -> MEDIUM SHADE
-    u'\u2593'   #  0x92 -> DARK SHADE
-    u'\u2320'   #  0x93 -> TOP HALF INTEGRAL
-    u'\u25a0'   #  0x94 -> BLACK SQUARE
-    u'\u2219'   #  0x95 -> BULLET OPERATOR
-    u'\u221a'   #  0x96 -> SQUARE ROOT
-    u'\u2248'   #  0x97 -> ALMOST EQUAL TO
-    u'\u2264'   #  0x98 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0x99 -> GREATER-THAN OR EQUAL TO
-    u'\xa0'     #  0x9A -> NO-BREAK SPACE
-    u'\u2321'   #  0x9B -> BOTTOM HALF INTEGRAL
-    u'\xb0'     #  0x9C -> DEGREE SIGN
-    u'\xb2'     #  0x9D -> SUPERSCRIPT TWO
-    u'\xb7'     #  0x9E -> MIDDLE DOT
-    u'\xf7'     #  0x9F -> DIVISION SIGN
-    u'\u2550'   #  0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u2551'   #  0xA1 -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2552'   #  0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u0451'   #  0xA3 -> CYRILLIC SMALL LETTER IO
-    u'\u2553'   #  0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
-    u'\u2554'   #  0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u2555'   #  0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
-    u'\u2556'   #  0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
-    u'\u2557'   #  0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u2558'   #  0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2559'   #  0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u255b'   #  0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u255c'   #  0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
-    u'\u255d'   #  0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255e'   #  0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u2560'   #  0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2561'   #  0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u0401'   #  0xB3 -> CYRILLIC CAPITAL LETTER IO
-    u'\u2562'   #  0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
-    u'\u2563'   #  0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u2564'   #  0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
-    u'\u2565'   #  0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
-    u'\u2566'   #  0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2567'   #  0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2569'   #  0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u256a'   #  0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u256b'   #  0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
-    u'\u256c'   #  0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\xa9'     #  0xBF -> COPYRIGHT SIGN
-    u'\u044e'   #  0xC0 -> CYRILLIC SMALL LETTER YU
-    u'\u0430'   #  0xC1 -> CYRILLIC SMALL LETTER A
-    u'\u0431'   #  0xC2 -> CYRILLIC SMALL LETTER BE
-    u'\u0446'   #  0xC3 -> CYRILLIC SMALL LETTER TSE
-    u'\u0434'   #  0xC4 -> CYRILLIC SMALL LETTER DE
-    u'\u0435'   #  0xC5 -> CYRILLIC SMALL LETTER IE
-    u'\u0444'   #  0xC6 -> CYRILLIC SMALL LETTER EF
-    u'\u0433'   #  0xC7 -> CYRILLIC SMALL LETTER GHE
-    u'\u0445'   #  0xC8 -> CYRILLIC SMALL LETTER HA
-    u'\u0438'   #  0xC9 -> CYRILLIC SMALL LETTER I
-    u'\u0439'   #  0xCA -> CYRILLIC SMALL LETTER SHORT I
-    u'\u043a'   #  0xCB -> CYRILLIC SMALL LETTER KA
-    u'\u043b'   #  0xCC -> CYRILLIC SMALL LETTER EL
-    u'\u043c'   #  0xCD -> CYRILLIC SMALL LETTER EM
-    u'\u043d'   #  0xCE -> CYRILLIC SMALL LETTER EN
-    u'\u043e'   #  0xCF -> CYRILLIC SMALL LETTER O
-    u'\u043f'   #  0xD0 -> CYRILLIC SMALL LETTER PE
-    u'\u044f'   #  0xD1 -> CYRILLIC SMALL LETTER YA
-    u'\u0440'   #  0xD2 -> CYRILLIC SMALL LETTER ER
-    u'\u0441'   #  0xD3 -> CYRILLIC SMALL LETTER ES
-    u'\u0442'   #  0xD4 -> CYRILLIC SMALL LETTER TE
-    u'\u0443'   #  0xD5 -> CYRILLIC SMALL LETTER U
-    u'\u0436'   #  0xD6 -> CYRILLIC SMALL LETTER ZHE
-    u'\u0432'   #  0xD7 -> CYRILLIC SMALL LETTER VE
-    u'\u044c'   #  0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN
-    u'\u044b'   #  0xD9 -> CYRILLIC SMALL LETTER YERU
-    u'\u0437'   #  0xDA -> CYRILLIC SMALL LETTER ZE
-    u'\u0448'   #  0xDB -> CYRILLIC SMALL LETTER SHA
-    u'\u044d'   #  0xDC -> CYRILLIC SMALL LETTER E
-    u'\u0449'   #  0xDD -> CYRILLIC SMALL LETTER SHCHA
-    u'\u0447'   #  0xDE -> CYRILLIC SMALL LETTER CHE
-    u'\u044a'   #  0xDF -> CYRILLIC SMALL LETTER HARD SIGN
-    u'\u042e'   #  0xE0 -> CYRILLIC CAPITAL LETTER YU
-    u'\u0410'   #  0xE1 -> CYRILLIC CAPITAL LETTER A
-    u'\u0411'   #  0xE2 -> CYRILLIC CAPITAL LETTER BE
-    u'\u0426'   #  0xE3 -> CYRILLIC CAPITAL LETTER TSE
-    u'\u0414'   #  0xE4 -> CYRILLIC CAPITAL LETTER DE
-    u'\u0415'   #  0xE5 -> CYRILLIC CAPITAL LETTER IE
-    u'\u0424'   #  0xE6 -> CYRILLIC CAPITAL LETTER EF
-    u'\u0413'   #  0xE7 -> CYRILLIC CAPITAL LETTER GHE
-    u'\u0425'   #  0xE8 -> CYRILLIC CAPITAL LETTER HA
-    u'\u0418'   #  0xE9 -> CYRILLIC CAPITAL LETTER I
-    u'\u0419'   #  0xEA -> CYRILLIC CAPITAL LETTER SHORT I
-    u'\u041a'   #  0xEB -> CYRILLIC CAPITAL LETTER KA
-    u'\u041b'   #  0xEC -> CYRILLIC CAPITAL LETTER EL
-    u'\u041c'   #  0xED -> CYRILLIC CAPITAL LETTER EM
-    u'\u041d'   #  0xEE -> CYRILLIC CAPITAL LETTER EN
-    u'\u041e'   #  0xEF -> CYRILLIC CAPITAL LETTER O
-    u'\u041f'   #  0xF0 -> CYRILLIC CAPITAL LETTER PE
-    u'\u042f'   #  0xF1 -> CYRILLIC CAPITAL LETTER YA
-    u'\u0420'   #  0xF2 -> CYRILLIC CAPITAL LETTER ER
-    u'\u0421'   #  0xF3 -> CYRILLIC CAPITAL LETTER ES
-    u'\u0422'   #  0xF4 -> CYRILLIC CAPITAL LETTER TE
-    u'\u0423'   #  0xF5 -> CYRILLIC CAPITAL LETTER U
-    u'\u0416'   #  0xF6 -> CYRILLIC CAPITAL LETTER ZHE
-    u'\u0412'   #  0xF7 -> CYRILLIC CAPITAL LETTER VE
-    u'\u042c'   #  0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
-    u'\u042b'   #  0xF9 -> CYRILLIC CAPITAL LETTER YERU
-    u'\u0417'   #  0xFA -> CYRILLIC CAPITAL LETTER ZE
-    u'\u0428'   #  0xFB -> CYRILLIC CAPITAL LETTER SHA
-    u'\u042d'   #  0xFC -> CYRILLIC CAPITAL LETTER E
-    u'\u0429'   #  0xFD -> CYRILLIC CAPITAL LETTER SHCHA
-    u'\u0427'   #  0xFE -> CYRILLIC CAPITAL LETTER CHE
-    u'\u042a'   #  0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u2500'   #  0x80 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u2502'   #  0x81 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u250c'   #  0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2510'   #  0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2518'   #  0x85 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u251c'   #  0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2524'   #  0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u252c'   #  0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u2534'   #  0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u253c'   #  0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u2580'   #  0x8B -> UPPER HALF BLOCK
+    '\u2584'   #  0x8C -> LOWER HALF BLOCK
+    '\u2588'   #  0x8D -> FULL BLOCK
+    '\u258c'   #  0x8E -> LEFT HALF BLOCK
+    '\u2590'   #  0x8F -> RIGHT HALF BLOCK
+    '\u2591'   #  0x90 -> LIGHT SHADE
+    '\u2592'   #  0x91 -> MEDIUM SHADE
+    '\u2593'   #  0x92 -> DARK SHADE
+    '\u2320'   #  0x93 -> TOP HALF INTEGRAL
+    '\u25a0'   #  0x94 -> BLACK SQUARE
+    '\u2219'   #  0x95 -> BULLET OPERATOR
+    '\u221a'   #  0x96 -> SQUARE ROOT
+    '\u2248'   #  0x97 -> ALMOST EQUAL TO
+    '\u2264'   #  0x98 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0x99 -> GREATER-THAN OR EQUAL TO
+    '\xa0'     #  0x9A -> NO-BREAK SPACE
+    '\u2321'   #  0x9B -> BOTTOM HALF INTEGRAL
+    '\xb0'     #  0x9C -> DEGREE SIGN
+    '\xb2'     #  0x9D -> SUPERSCRIPT TWO
+    '\xb7'     #  0x9E -> MIDDLE DOT
+    '\xf7'     #  0x9F -> DIVISION SIGN
+    '\u2550'   #  0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u2551'   #  0xA1 -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2552'   #  0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u0451'   #  0xA3 -> CYRILLIC SMALL LETTER IO
+    '\u2553'   #  0xA4 -> BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+    '\u2554'   #  0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u2555'   #  0xA6 -> BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+    '\u2556'   #  0xA7 -> BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+    '\u2557'   #  0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u2558'   #  0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2559'   #  0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u255b'   #  0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u255c'   #  0xAD -> BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+    '\u255d'   #  0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255e'   #  0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u2560'   #  0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2561'   #  0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u0401'   #  0xB3 -> CYRILLIC CAPITAL LETTER IO
+    '\u2562'   #  0xB4 -> BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+    '\u2563'   #  0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u2564'   #  0xB6 -> BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+    '\u2565'   #  0xB7 -> BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+    '\u2566'   #  0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2567'   #  0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2569'   #  0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u256a'   #  0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u256b'   #  0xBD -> BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+    '\u256c'   #  0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\xa9'     #  0xBF -> COPYRIGHT SIGN
+    '\u044e'   #  0xC0 -> CYRILLIC SMALL LETTER YU
+    '\u0430'   #  0xC1 -> CYRILLIC SMALL LETTER A
+    '\u0431'   #  0xC2 -> CYRILLIC SMALL LETTER BE
+    '\u0446'   #  0xC3 -> CYRILLIC SMALL LETTER TSE
+    '\u0434'   #  0xC4 -> CYRILLIC SMALL LETTER DE
+    '\u0435'   #  0xC5 -> CYRILLIC SMALL LETTER IE
+    '\u0444'   #  0xC6 -> CYRILLIC SMALL LETTER EF
+    '\u0433'   #  0xC7 -> CYRILLIC SMALL LETTER GHE
+    '\u0445'   #  0xC8 -> CYRILLIC SMALL LETTER HA
+    '\u0438'   #  0xC9 -> CYRILLIC SMALL LETTER I
+    '\u0439'   #  0xCA -> CYRILLIC SMALL LETTER SHORT I
+    '\u043a'   #  0xCB -> CYRILLIC SMALL LETTER KA
+    '\u043b'   #  0xCC -> CYRILLIC SMALL LETTER EL
+    '\u043c'   #  0xCD -> CYRILLIC SMALL LETTER EM
+    '\u043d'   #  0xCE -> CYRILLIC SMALL LETTER EN
+    '\u043e'   #  0xCF -> CYRILLIC SMALL LETTER O
+    '\u043f'   #  0xD0 -> CYRILLIC SMALL LETTER PE
+    '\u044f'   #  0xD1 -> CYRILLIC SMALL LETTER YA
+    '\u0440'   #  0xD2 -> CYRILLIC SMALL LETTER ER
+    '\u0441'   #  0xD3 -> CYRILLIC SMALL LETTER ES
+    '\u0442'   #  0xD4 -> CYRILLIC SMALL LETTER TE
+    '\u0443'   #  0xD5 -> CYRILLIC SMALL LETTER U
+    '\u0436'   #  0xD6 -> CYRILLIC SMALL LETTER ZHE
+    '\u0432'   #  0xD7 -> CYRILLIC SMALL LETTER VE
+    '\u044c'   #  0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN
+    '\u044b'   #  0xD9 -> CYRILLIC SMALL LETTER YERU
+    '\u0437'   #  0xDA -> CYRILLIC SMALL LETTER ZE
+    '\u0448'   #  0xDB -> CYRILLIC SMALL LETTER SHA
+    '\u044d'   #  0xDC -> CYRILLIC SMALL LETTER E
+    '\u0449'   #  0xDD -> CYRILLIC SMALL LETTER SHCHA
+    '\u0447'   #  0xDE -> CYRILLIC SMALL LETTER CHE
+    '\u044a'   #  0xDF -> CYRILLIC SMALL LETTER HARD SIGN
+    '\u042e'   #  0xE0 -> CYRILLIC CAPITAL LETTER YU
+    '\u0410'   #  0xE1 -> CYRILLIC CAPITAL LETTER A
+    '\u0411'   #  0xE2 -> CYRILLIC CAPITAL LETTER BE
+    '\u0426'   #  0xE3 -> CYRILLIC CAPITAL LETTER TSE
+    '\u0414'   #  0xE4 -> CYRILLIC CAPITAL LETTER DE
+    '\u0415'   #  0xE5 -> CYRILLIC CAPITAL LETTER IE
+    '\u0424'   #  0xE6 -> CYRILLIC CAPITAL LETTER EF
+    '\u0413'   #  0xE7 -> CYRILLIC CAPITAL LETTER GHE
+    '\u0425'   #  0xE8 -> CYRILLIC CAPITAL LETTER HA
+    '\u0418'   #  0xE9 -> CYRILLIC CAPITAL LETTER I
+    '\u0419'   #  0xEA -> CYRILLIC CAPITAL LETTER SHORT I
+    '\u041a'   #  0xEB -> CYRILLIC CAPITAL LETTER KA
+    '\u041b'   #  0xEC -> CYRILLIC CAPITAL LETTER EL
+    '\u041c'   #  0xED -> CYRILLIC CAPITAL LETTER EM
+    '\u041d'   #  0xEE -> CYRILLIC CAPITAL LETTER EN
+    '\u041e'   #  0xEF -> CYRILLIC CAPITAL LETTER O
+    '\u041f'   #  0xF0 -> CYRILLIC CAPITAL LETTER PE
+    '\u042f'   #  0xF1 -> CYRILLIC CAPITAL LETTER YA
+    '\u0420'   #  0xF2 -> CYRILLIC CAPITAL LETTER ER
+    '\u0421'   #  0xF3 -> CYRILLIC CAPITAL LETTER ES
+    '\u0422'   #  0xF4 -> CYRILLIC CAPITAL LETTER TE
+    '\u0423'   #  0xF5 -> CYRILLIC CAPITAL LETTER U
+    '\u0416'   #  0xF6 -> CYRILLIC CAPITAL LETTER ZHE
+    '\u0412'   #  0xF7 -> CYRILLIC CAPITAL LETTER VE
+    '\u042c'   #  0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
+    '\u042b'   #  0xF9 -> CYRILLIC CAPITAL LETTER YERU
+    '\u0417'   #  0xFA -> CYRILLIC CAPITAL LETTER ZE
+    '\u0428'   #  0xFB -> CYRILLIC CAPITAL LETTER SHA
+    '\u042d'   #  0xFC -> CYRILLIC CAPITAL LETTER E
+    '\u0429'   #  0xFD -> CYRILLIC CAPITAL LETTER SHCHA
+    '\u0427'   #  0xFE -> CYRILLIC CAPITAL LETTER CHE
+    '\u042a'   #  0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/koi8_u.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/koi8_u.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/koi8_u.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\u2500'   #  0x80 -> BOX DRAWINGS LIGHT HORIZONTAL
-    u'\u2502'   #  0x81 -> BOX DRAWINGS LIGHT VERTICAL
-    u'\u250c'   #  0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT
-    u'\u2510'   #  0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT
-    u'\u2514'   #  0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT
-    u'\u2518'   #  0x85 -> BOX DRAWINGS LIGHT UP AND LEFT
-    u'\u251c'   #  0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
-    u'\u2524'   #  0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
-    u'\u252c'   #  0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
-    u'\u2534'   #  0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
-    u'\u253c'   #  0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
-    u'\u2580'   #  0x8B -> UPPER HALF BLOCK
-    u'\u2584'   #  0x8C -> LOWER HALF BLOCK
-    u'\u2588'   #  0x8D -> FULL BLOCK
-    u'\u258c'   #  0x8E -> LEFT HALF BLOCK
-    u'\u2590'   #  0x8F -> RIGHT HALF BLOCK
-    u'\u2591'   #  0x90 -> LIGHT SHADE
-    u'\u2592'   #  0x91 -> MEDIUM SHADE
-    u'\u2593'   #  0x92 -> DARK SHADE
-    u'\u2320'   #  0x93 -> TOP HALF INTEGRAL
-    u'\u25a0'   #  0x94 -> BLACK SQUARE
-    u'\u2219'   #  0x95 -> BULLET OPERATOR
-    u'\u221a'   #  0x96 -> SQUARE ROOT
-    u'\u2248'   #  0x97 -> ALMOST EQUAL TO
-    u'\u2264'   #  0x98 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0x99 -> GREATER-THAN OR EQUAL TO
-    u'\xa0'     #  0x9A -> NO-BREAK SPACE
-    u'\u2321'   #  0x9B -> BOTTOM HALF INTEGRAL
-    u'\xb0'     #  0x9C -> DEGREE SIGN
-    u'\xb2'     #  0x9D -> SUPERSCRIPT TWO
-    u'\xb7'     #  0x9E -> MIDDLE DOT
-    u'\xf7'     #  0x9F -> DIVISION SIGN
-    u'\u2550'   #  0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL
-    u'\u2551'   #  0xA1 -> BOX DRAWINGS DOUBLE VERTICAL
-    u'\u2552'   #  0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
-    u'\u0451'   #  0xA3 -> CYRILLIC SMALL LETTER IO
-    u'\u0454'   #  0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE
-    u'\u2554'   #  0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
-    u'\u0456'   #  0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0457'   #  0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN)
-    u'\u2557'   #  0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT
-    u'\u2558'   #  0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
-    u'\u2559'   #  0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
-    u'\u255a'   #  0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT
-    u'\u255b'   #  0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
-    u'\u0491'   #  0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
-    u'\u255d'   #  0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT
-    u'\u255e'   #  0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
-    u'\u255f'   #  0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
-    u'\u2560'   #  0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
-    u'\u2561'   #  0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
-    u'\u0401'   #  0xB3 -> CYRILLIC CAPITAL LETTER IO
-    u'\u0404'   #  0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
-    u'\u2563'   #  0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
-    u'\u0406'   #  0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\u0407'   #  0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
-    u'\u2566'   #  0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
-    u'\u2567'   #  0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
-    u'\u2568'   #  0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
-    u'\u2569'   #  0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
-    u'\u256a'   #  0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
-    u'\u0490'   #  0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
-    u'\u256c'   #  0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
-    u'\xa9'     #  0xBF -> COPYRIGHT SIGN
-    u'\u044e'   #  0xC0 -> CYRILLIC SMALL LETTER YU
-    u'\u0430'   #  0xC1 -> CYRILLIC SMALL LETTER A
-    u'\u0431'   #  0xC2 -> CYRILLIC SMALL LETTER BE
-    u'\u0446'   #  0xC3 -> CYRILLIC SMALL LETTER TSE
-    u'\u0434'   #  0xC4 -> CYRILLIC SMALL LETTER DE
-    u'\u0435'   #  0xC5 -> CYRILLIC SMALL LETTER IE
-    u'\u0444'   #  0xC6 -> CYRILLIC SMALL LETTER EF
-    u'\u0433'   #  0xC7 -> CYRILLIC SMALL LETTER GHE
-    u'\u0445'   #  0xC8 -> CYRILLIC SMALL LETTER HA
-    u'\u0438'   #  0xC9 -> CYRILLIC SMALL LETTER I
-    u'\u0439'   #  0xCA -> CYRILLIC SMALL LETTER SHORT I
-    u'\u043a'   #  0xCB -> CYRILLIC SMALL LETTER KA
-    u'\u043b'   #  0xCC -> CYRILLIC SMALL LETTER EL
-    u'\u043c'   #  0xCD -> CYRILLIC SMALL LETTER EM
-    u'\u043d'   #  0xCE -> CYRILLIC SMALL LETTER EN
-    u'\u043e'   #  0xCF -> CYRILLIC SMALL LETTER O
-    u'\u043f'   #  0xD0 -> CYRILLIC SMALL LETTER PE
-    u'\u044f'   #  0xD1 -> CYRILLIC SMALL LETTER YA
-    u'\u0440'   #  0xD2 -> CYRILLIC SMALL LETTER ER
-    u'\u0441'   #  0xD3 -> CYRILLIC SMALL LETTER ES
-    u'\u0442'   #  0xD4 -> CYRILLIC SMALL LETTER TE
-    u'\u0443'   #  0xD5 -> CYRILLIC SMALL LETTER U
-    u'\u0436'   #  0xD6 -> CYRILLIC SMALL LETTER ZHE
-    u'\u0432'   #  0xD7 -> CYRILLIC SMALL LETTER VE
-    u'\u044c'   #  0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN
-    u'\u044b'   #  0xD9 -> CYRILLIC SMALL LETTER YERU
-    u'\u0437'   #  0xDA -> CYRILLIC SMALL LETTER ZE
-    u'\u0448'   #  0xDB -> CYRILLIC SMALL LETTER SHA
-    u'\u044d'   #  0xDC -> CYRILLIC SMALL LETTER E
-    u'\u0449'   #  0xDD -> CYRILLIC SMALL LETTER SHCHA
-    u'\u0447'   #  0xDE -> CYRILLIC SMALL LETTER CHE
-    u'\u044a'   #  0xDF -> CYRILLIC SMALL LETTER HARD SIGN
-    u'\u042e'   #  0xE0 -> CYRILLIC CAPITAL LETTER YU
-    u'\u0410'   #  0xE1 -> CYRILLIC CAPITAL LETTER A
-    u'\u0411'   #  0xE2 -> CYRILLIC CAPITAL LETTER BE
-    u'\u0426'   #  0xE3 -> CYRILLIC CAPITAL LETTER TSE
-    u'\u0414'   #  0xE4 -> CYRILLIC CAPITAL LETTER DE
-    u'\u0415'   #  0xE5 -> CYRILLIC CAPITAL LETTER IE
-    u'\u0424'   #  0xE6 -> CYRILLIC CAPITAL LETTER EF
-    u'\u0413'   #  0xE7 -> CYRILLIC CAPITAL LETTER GHE
-    u'\u0425'   #  0xE8 -> CYRILLIC CAPITAL LETTER HA
-    u'\u0418'   #  0xE9 -> CYRILLIC CAPITAL LETTER I
-    u'\u0419'   #  0xEA -> CYRILLIC CAPITAL LETTER SHORT I
-    u'\u041a'   #  0xEB -> CYRILLIC CAPITAL LETTER KA
-    u'\u041b'   #  0xEC -> CYRILLIC CAPITAL LETTER EL
-    u'\u041c'   #  0xED -> CYRILLIC CAPITAL LETTER EM
-    u'\u041d'   #  0xEE -> CYRILLIC CAPITAL LETTER EN
-    u'\u041e'   #  0xEF -> CYRILLIC CAPITAL LETTER O
-    u'\u041f'   #  0xF0 -> CYRILLIC CAPITAL LETTER PE
-    u'\u042f'   #  0xF1 -> CYRILLIC CAPITAL LETTER YA
-    u'\u0420'   #  0xF2 -> CYRILLIC CAPITAL LETTER ER
-    u'\u0421'   #  0xF3 -> CYRILLIC CAPITAL LETTER ES
-    u'\u0422'   #  0xF4 -> CYRILLIC CAPITAL LETTER TE
-    u'\u0423'   #  0xF5 -> CYRILLIC CAPITAL LETTER U
-    u'\u0416'   #  0xF6 -> CYRILLIC CAPITAL LETTER ZHE
-    u'\u0412'   #  0xF7 -> CYRILLIC CAPITAL LETTER VE
-    u'\u042c'   #  0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
-    u'\u042b'   #  0xF9 -> CYRILLIC CAPITAL LETTER YERU
-    u'\u0417'   #  0xFA -> CYRILLIC CAPITAL LETTER ZE
-    u'\u0428'   #  0xFB -> CYRILLIC CAPITAL LETTER SHA
-    u'\u042d'   #  0xFC -> CYRILLIC CAPITAL LETTER E
-    u'\u0429'   #  0xFD -> CYRILLIC CAPITAL LETTER SHCHA
-    u'\u0427'   #  0xFE -> CYRILLIC CAPITAL LETTER CHE
-    u'\u042a'   #  0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\u2500'   #  0x80 -> BOX DRAWINGS LIGHT HORIZONTAL
+    '\u2502'   #  0x81 -> BOX DRAWINGS LIGHT VERTICAL
+    '\u250c'   #  0x82 -> BOX DRAWINGS LIGHT DOWN AND RIGHT
+    '\u2510'   #  0x83 -> BOX DRAWINGS LIGHT DOWN AND LEFT
+    '\u2514'   #  0x84 -> BOX DRAWINGS LIGHT UP AND RIGHT
+    '\u2518'   #  0x85 -> BOX DRAWINGS LIGHT UP AND LEFT
+    '\u251c'   #  0x86 -> BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+    '\u2524'   #  0x87 -> BOX DRAWINGS LIGHT VERTICAL AND LEFT
+    '\u252c'   #  0x88 -> BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+    '\u2534'   #  0x89 -> BOX DRAWINGS LIGHT UP AND HORIZONTAL
+    '\u253c'   #  0x8A -> BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+    '\u2580'   #  0x8B -> UPPER HALF BLOCK
+    '\u2584'   #  0x8C -> LOWER HALF BLOCK
+    '\u2588'   #  0x8D -> FULL BLOCK
+    '\u258c'   #  0x8E -> LEFT HALF BLOCK
+    '\u2590'   #  0x8F -> RIGHT HALF BLOCK
+    '\u2591'   #  0x90 -> LIGHT SHADE
+    '\u2592'   #  0x91 -> MEDIUM SHADE
+    '\u2593'   #  0x92 -> DARK SHADE
+    '\u2320'   #  0x93 -> TOP HALF INTEGRAL
+    '\u25a0'   #  0x94 -> BLACK SQUARE
+    '\u2219'   #  0x95 -> BULLET OPERATOR
+    '\u221a'   #  0x96 -> SQUARE ROOT
+    '\u2248'   #  0x97 -> ALMOST EQUAL TO
+    '\u2264'   #  0x98 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0x99 -> GREATER-THAN OR EQUAL TO
+    '\xa0'     #  0x9A -> NO-BREAK SPACE
+    '\u2321'   #  0x9B -> BOTTOM HALF INTEGRAL
+    '\xb0'     #  0x9C -> DEGREE SIGN
+    '\xb2'     #  0x9D -> SUPERSCRIPT TWO
+    '\xb7'     #  0x9E -> MIDDLE DOT
+    '\xf7'     #  0x9F -> DIVISION SIGN
+    '\u2550'   #  0xA0 -> BOX DRAWINGS DOUBLE HORIZONTAL
+    '\u2551'   #  0xA1 -> BOX DRAWINGS DOUBLE VERTICAL
+    '\u2552'   #  0xA2 -> BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+    '\u0451'   #  0xA3 -> CYRILLIC SMALL LETTER IO
+    '\u0454'   #  0xA4 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+    '\u2554'   #  0xA5 -> BOX DRAWINGS DOUBLE DOWN AND RIGHT
+    '\u0456'   #  0xA6 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0457'   #  0xA7 -> CYRILLIC SMALL LETTER YI (UKRAINIAN)
+    '\u2557'   #  0xA8 -> BOX DRAWINGS DOUBLE DOWN AND LEFT
+    '\u2558'   #  0xA9 -> BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+    '\u2559'   #  0xAA -> BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+    '\u255a'   #  0xAB -> BOX DRAWINGS DOUBLE UP AND RIGHT
+    '\u255b'   #  0xAC -> BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+    '\u0491'   #  0xAD -> CYRILLIC SMALL LETTER UKRAINIAN GHE WITH UPTURN
+    '\u255d'   #  0xAE -> BOX DRAWINGS DOUBLE UP AND LEFT
+    '\u255e'   #  0xAF -> BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+    '\u255f'   #  0xB0 -> BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+    '\u2560'   #  0xB1 -> BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+    '\u2561'   #  0xB2 -> BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+    '\u0401'   #  0xB3 -> CYRILLIC CAPITAL LETTER IO
+    '\u0404'   #  0xB4 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    '\u2563'   #  0xB5 -> BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+    '\u0406'   #  0xB6 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\u0407'   #  0xB7 -> CYRILLIC CAPITAL LETTER YI (UKRAINIAN)
+    '\u2566'   #  0xB8 -> BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+    '\u2567'   #  0xB9 -> BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+    '\u2568'   #  0xBA -> BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+    '\u2569'   #  0xBB -> BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+    '\u256a'   #  0xBC -> BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+    '\u0490'   #  0xBD -> CYRILLIC CAPITAL LETTER UKRAINIAN GHE WITH UPTURN
+    '\u256c'   #  0xBE -> BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+    '\xa9'     #  0xBF -> COPYRIGHT SIGN
+    '\u044e'   #  0xC0 -> CYRILLIC SMALL LETTER YU
+    '\u0430'   #  0xC1 -> CYRILLIC SMALL LETTER A
+    '\u0431'   #  0xC2 -> CYRILLIC SMALL LETTER BE
+    '\u0446'   #  0xC3 -> CYRILLIC SMALL LETTER TSE
+    '\u0434'   #  0xC4 -> CYRILLIC SMALL LETTER DE
+    '\u0435'   #  0xC5 -> CYRILLIC SMALL LETTER IE
+    '\u0444'   #  0xC6 -> CYRILLIC SMALL LETTER EF
+    '\u0433'   #  0xC7 -> CYRILLIC SMALL LETTER GHE
+    '\u0445'   #  0xC8 -> CYRILLIC SMALL LETTER HA
+    '\u0438'   #  0xC9 -> CYRILLIC SMALL LETTER I
+    '\u0439'   #  0xCA -> CYRILLIC SMALL LETTER SHORT I
+    '\u043a'   #  0xCB -> CYRILLIC SMALL LETTER KA
+    '\u043b'   #  0xCC -> CYRILLIC SMALL LETTER EL
+    '\u043c'   #  0xCD -> CYRILLIC SMALL LETTER EM
+    '\u043d'   #  0xCE -> CYRILLIC SMALL LETTER EN
+    '\u043e'   #  0xCF -> CYRILLIC SMALL LETTER O
+    '\u043f'   #  0xD0 -> CYRILLIC SMALL LETTER PE
+    '\u044f'   #  0xD1 -> CYRILLIC SMALL LETTER YA
+    '\u0440'   #  0xD2 -> CYRILLIC SMALL LETTER ER
+    '\u0441'   #  0xD3 -> CYRILLIC SMALL LETTER ES
+    '\u0442'   #  0xD4 -> CYRILLIC SMALL LETTER TE
+    '\u0443'   #  0xD5 -> CYRILLIC SMALL LETTER U
+    '\u0436'   #  0xD6 -> CYRILLIC SMALL LETTER ZHE
+    '\u0432'   #  0xD7 -> CYRILLIC SMALL LETTER VE
+    '\u044c'   #  0xD8 -> CYRILLIC SMALL LETTER SOFT SIGN
+    '\u044b'   #  0xD9 -> CYRILLIC SMALL LETTER YERU
+    '\u0437'   #  0xDA -> CYRILLIC SMALL LETTER ZE
+    '\u0448'   #  0xDB -> CYRILLIC SMALL LETTER SHA
+    '\u044d'   #  0xDC -> CYRILLIC SMALL LETTER E
+    '\u0449'   #  0xDD -> CYRILLIC SMALL LETTER SHCHA
+    '\u0447'   #  0xDE -> CYRILLIC SMALL LETTER CHE
+    '\u044a'   #  0xDF -> CYRILLIC SMALL LETTER HARD SIGN
+    '\u042e'   #  0xE0 -> CYRILLIC CAPITAL LETTER YU
+    '\u0410'   #  0xE1 -> CYRILLIC CAPITAL LETTER A
+    '\u0411'   #  0xE2 -> CYRILLIC CAPITAL LETTER BE
+    '\u0426'   #  0xE3 -> CYRILLIC CAPITAL LETTER TSE
+    '\u0414'   #  0xE4 -> CYRILLIC CAPITAL LETTER DE
+    '\u0415'   #  0xE5 -> CYRILLIC CAPITAL LETTER IE
+    '\u0424'   #  0xE6 -> CYRILLIC CAPITAL LETTER EF
+    '\u0413'   #  0xE7 -> CYRILLIC CAPITAL LETTER GHE
+    '\u0425'   #  0xE8 -> CYRILLIC CAPITAL LETTER HA
+    '\u0418'   #  0xE9 -> CYRILLIC CAPITAL LETTER I
+    '\u0419'   #  0xEA -> CYRILLIC CAPITAL LETTER SHORT I
+    '\u041a'   #  0xEB -> CYRILLIC CAPITAL LETTER KA
+    '\u041b'   #  0xEC -> CYRILLIC CAPITAL LETTER EL
+    '\u041c'   #  0xED -> CYRILLIC CAPITAL LETTER EM
+    '\u041d'   #  0xEE -> CYRILLIC CAPITAL LETTER EN
+    '\u041e'   #  0xEF -> CYRILLIC CAPITAL LETTER O
+    '\u041f'   #  0xF0 -> CYRILLIC CAPITAL LETTER PE
+    '\u042f'   #  0xF1 -> CYRILLIC CAPITAL LETTER YA
+    '\u0420'   #  0xF2 -> CYRILLIC CAPITAL LETTER ER
+    '\u0421'   #  0xF3 -> CYRILLIC CAPITAL LETTER ES
+    '\u0422'   #  0xF4 -> CYRILLIC CAPITAL LETTER TE
+    '\u0423'   #  0xF5 -> CYRILLIC CAPITAL LETTER U
+    '\u0416'   #  0xF6 -> CYRILLIC CAPITAL LETTER ZHE
+    '\u0412'   #  0xF7 -> CYRILLIC CAPITAL LETTER VE
+    '\u042c'   #  0xF8 -> CYRILLIC CAPITAL LETTER SOFT SIGN
+    '\u042b'   #  0xF9 -> CYRILLIC CAPITAL LETTER YERU
+    '\u0417'   #  0xFA -> CYRILLIC CAPITAL LETTER ZE
+    '\u0428'   #  0xFB -> CYRILLIC CAPITAL LETTER SHA
+    '\u042d'   #  0xFC -> CYRILLIC CAPITAL LETTER E
+    '\u0429'   #  0xFD -> CYRILLIC CAPITAL LETTER SHCHA
+    '\u0427'   #  0xFE -> CYRILLIC CAPITAL LETTER CHE
+    '\u042a'   #  0xFF -> CYRILLIC CAPITAL LETTER HARD SIGN
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_arabic.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_arabic.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_arabic.py	Wed May  2 21:09:54 2007
@@ -178,262 +178,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x0000 -> CONTROL CHARACTER
-    u'\x01'     #  0x0001 -> CONTROL CHARACTER
-    u'\x02'     #  0x0002 -> CONTROL CHARACTER
-    u'\x03'     #  0x0003 -> CONTROL CHARACTER
-    u'\x04'     #  0x0004 -> CONTROL CHARACTER
-    u'\x05'     #  0x0005 -> CONTROL CHARACTER
-    u'\x06'     #  0x0006 -> CONTROL CHARACTER
-    u'\x07'     #  0x0007 -> CONTROL CHARACTER
-    u'\x08'     #  0x0008 -> CONTROL CHARACTER
-    u'\t'       #  0x0009 -> CONTROL CHARACTER
-    u'\n'       #  0x000a -> CONTROL CHARACTER
-    u'\x0b'     #  0x000b -> CONTROL CHARACTER
-    u'\x0c'     #  0x000c -> CONTROL CHARACTER
-    u'\r'       #  0x000d -> CONTROL CHARACTER
-    u'\x0e'     #  0x000e -> CONTROL CHARACTER
-    u'\x0f'     #  0x000f -> CONTROL CHARACTER
-    u'\x10'     #  0x0010 -> CONTROL CHARACTER
-    u'\x11'     #  0x0011 -> CONTROL CHARACTER
-    u'\x12'     #  0x0012 -> CONTROL CHARACTER
-    u'\x13'     #  0x0013 -> CONTROL CHARACTER
-    u'\x14'     #  0x0014 -> CONTROL CHARACTER
-    u'\x15'     #  0x0015 -> CONTROL CHARACTER
-    u'\x16'     #  0x0016 -> CONTROL CHARACTER
-    u'\x17'     #  0x0017 -> CONTROL CHARACTER
-    u'\x18'     #  0x0018 -> CONTROL CHARACTER
-    u'\x19'     #  0x0019 -> CONTROL CHARACTER
-    u'\x1a'     #  0x001a -> CONTROL CHARACTER
-    u'\x1b'     #  0x001b -> CONTROL CHARACTER
-    u'\x1c'     #  0x001c -> CONTROL CHARACTER
-    u'\x1d'     #  0x001d -> CONTROL CHARACTER
-    u'\x1e'     #  0x001e -> CONTROL CHARACTER
-    u'\x1f'     #  0x001f -> CONTROL CHARACTER
-    u' '        #  0x0020 -> SPACE, left-right
-    u'!'        #  0x0021 -> EXCLAMATION MARK, left-right
-    u'"'        #  0x0022 -> QUOTATION MARK, left-right
-    u'#'        #  0x0023 -> NUMBER SIGN, left-right
-    u'$'        #  0x0024 -> DOLLAR SIGN, left-right
-    u'%'        #  0x0025 -> PERCENT SIGN, left-right
-    u'&'        #  0x0026 -> AMPERSAND, left-right
-    u"'"        #  0x0027 -> APOSTROPHE, left-right
-    u'('        #  0x0028 -> LEFT PARENTHESIS, left-right
-    u')'        #  0x0029 -> RIGHT PARENTHESIS, left-right
-    u'*'        #  0x002a -> ASTERISK, left-right
-    u'+'        #  0x002b -> PLUS SIGN, left-right
-    u','        #  0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR
-    u'-'        #  0x002d -> HYPHEN-MINUS, left-right
-    u'.'        #  0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR
-    u'/'        #  0x002f -> SOLIDUS, left-right
-    u'0'        #  0x0030 -> DIGIT ZERO;  in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO
-    u'1'        #  0x0031 -> DIGIT ONE;   in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE
-    u'2'        #  0x0032 -> DIGIT TWO;   in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO
-    u'3'        #  0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE
-    u'4'        #  0x0034 -> DIGIT FOUR;  in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR
-    u'5'        #  0x0035 -> DIGIT FIVE;  in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE
-    u'6'        #  0x0036 -> DIGIT SIX;   in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX
-    u'7'        #  0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN
-    u'8'        #  0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT
-    u'9'        #  0x0039 -> DIGIT NINE;  in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE
-    u':'        #  0x003a -> COLON, left-right
-    u';'        #  0x003b -> SEMICOLON, left-right
-    u'<'        #  0x003c -> LESS-THAN SIGN, left-right
-    u'='        #  0x003d -> EQUALS SIGN, left-right
-    u'>'        #  0x003e -> GREATER-THAN SIGN, left-right
-    u'?'        #  0x003f -> QUESTION MARK, left-right
-    u'@'        #  0x0040 -> COMMERCIAL AT
-    u'A'        #  0x0041 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x0042 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x0043 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x0044 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x0045 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x0046 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x0047 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x0048 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x0049 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x004a -> LATIN CAPITAL LETTER J
-    u'K'        #  0x004b -> LATIN CAPITAL LETTER K
-    u'L'        #  0x004c -> LATIN CAPITAL LETTER L
-    u'M'        #  0x004d -> LATIN CAPITAL LETTER M
-    u'N'        #  0x004e -> LATIN CAPITAL LETTER N
-    u'O'        #  0x004f -> LATIN CAPITAL LETTER O
-    u'P'        #  0x0050 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x0052 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x0053 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x0054 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x0055 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x0056 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x0057 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x0058 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
-    u'['        #  0x005b -> LEFT SQUARE BRACKET, left-right
-    u'\\'       #  0x005c -> REVERSE SOLIDUS, left-right
-    u']'        #  0x005d -> RIGHT SQUARE BRACKET, left-right
-    u'^'        #  0x005e -> CIRCUMFLEX ACCENT, left-right
-    u'_'        #  0x005f -> LOW LINE, left-right
-    u'`'        #  0x0060 -> GRAVE ACCENT
-    u'a'        #  0x0061 -> LATIN SMALL LETTER A
-    u'b'        #  0x0062 -> LATIN SMALL LETTER B
-    u'c'        #  0x0063 -> LATIN SMALL LETTER C
-    u'd'        #  0x0064 -> LATIN SMALL LETTER D
-    u'e'        #  0x0065 -> LATIN SMALL LETTER E
-    u'f'        #  0x0066 -> LATIN SMALL LETTER F
-    u'g'        #  0x0067 -> LATIN SMALL LETTER G
-    u'h'        #  0x0068 -> LATIN SMALL LETTER H
-    u'i'        #  0x0069 -> LATIN SMALL LETTER I
-    u'j'        #  0x006a -> LATIN SMALL LETTER J
-    u'k'        #  0x006b -> LATIN SMALL LETTER K
-    u'l'        #  0x006c -> LATIN SMALL LETTER L
-    u'm'        #  0x006d -> LATIN SMALL LETTER M
-    u'n'        #  0x006e -> LATIN SMALL LETTER N
-    u'o'        #  0x006f -> LATIN SMALL LETTER O
-    u'p'        #  0x0070 -> LATIN SMALL LETTER P
-    u'q'        #  0x0071 -> LATIN SMALL LETTER Q
-    u'r'        #  0x0072 -> LATIN SMALL LETTER R
-    u's'        #  0x0073 -> LATIN SMALL LETTER S
-    u't'        #  0x0074 -> LATIN SMALL LETTER T
-    u'u'        #  0x0075 -> LATIN SMALL LETTER U
-    u'v'        #  0x0076 -> LATIN SMALL LETTER V
-    u'w'        #  0x0077 -> LATIN SMALL LETTER W
-    u'x'        #  0x0078 -> LATIN SMALL LETTER X
-    u'y'        #  0x0079 -> LATIN SMALL LETTER Y
-    u'z'        #  0x007a -> LATIN SMALL LETTER Z
-    u'{'        #  0x007b -> LEFT CURLY BRACKET, left-right
-    u'|'        #  0x007c -> VERTICAL LINE, left-right
-    u'}'        #  0x007d -> RIGHT CURLY BRACKET, left-right
-    u'~'        #  0x007e -> TILDE
-    u'\x7f'     #  0x007f -> CONTROL CHARACTER
-    u'\xc4'     #  0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xa0'     #  0x0081 -> NO-BREAK SPACE, right-left
-    u'\xc7'     #  0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc9'     #  0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xd1'     #  0x0084 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd6'     #  0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x0087 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe0'     #  0x0088 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x008a -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u06ba'   #  0x008b -> ARABIC LETTER NOON GHUNNA
-    u'\xab'     #  0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
-    u'\xe7'     #  0x008d -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x008e -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x008f -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xed'     #  0x0092 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\u2026'   #  0x0093 -> HORIZONTAL ELLIPSIS, right-left
-    u'\xee'     #  0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf1'     #  0x0096 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf3'     #  0x0097 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xbb'     #  0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
-    u'\xf4'     #  0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x009a -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0x009b -> DIVISION SIGN, right-left
-    u'\xfa'     #  0x009c -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf9'     #  0x009d -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x009f -> LATIN SMALL LETTER U WITH DIAERESIS
-    u' '        #  0x00a0 -> SPACE, right-left
-    u'!'        #  0x00a1 -> EXCLAMATION MARK, right-left
-    u'"'        #  0x00a2 -> QUOTATION MARK, right-left
-    u'#'        #  0x00a3 -> NUMBER SIGN, right-left
-    u'$'        #  0x00a4 -> DOLLAR SIGN, right-left
-    u'\u066a'   #  0x00a5 -> ARABIC PERCENT SIGN
-    u'&'        #  0x00a6 -> AMPERSAND, right-left
-    u"'"        #  0x00a7 -> APOSTROPHE, right-left
-    u'('        #  0x00a8 -> LEFT PARENTHESIS, right-left
-    u')'        #  0x00a9 -> RIGHT PARENTHESIS, right-left
-    u'*'        #  0x00aa -> ASTERISK, right-left
-    u'+'        #  0x00ab -> PLUS SIGN, right-left
-    u'\u060c'   #  0x00ac -> ARABIC COMMA
-    u'-'        #  0x00ad -> HYPHEN-MINUS, right-left
-    u'.'        #  0x00ae -> FULL STOP, right-left
-    u'/'        #  0x00af -> SOLIDUS, right-left
-    u'\u0660'   #  0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override)
-    u'\u0661'   #  0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override)
-    u'\u0662'   #  0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override)
-    u'\u0663'   #  0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override)
-    u'\u0664'   #  0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override)
-    u'\u0665'   #  0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override)
-    u'\u0666'   #  0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override)
-    u'\u0667'   #  0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override)
-    u'\u0668'   #  0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override)
-    u'\u0669'   #  0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override)
-    u':'        #  0x00ba -> COLON, right-left
-    u'\u061b'   #  0x00bb -> ARABIC SEMICOLON
-    u'<'        #  0x00bc -> LESS-THAN SIGN, right-left
-    u'='        #  0x00bd -> EQUALS SIGN, right-left
-    u'>'        #  0x00be -> GREATER-THAN SIGN, right-left
-    u'\u061f'   #  0x00bf -> ARABIC QUESTION MARK
-    u'\u274a'   #  0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left
-    u'\u0621'   #  0x00c1 -> ARABIC LETTER HAMZA
-    u'\u0622'   #  0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
-    u'\u0623'   #  0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
-    u'\u0624'   #  0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
-    u'\u0625'   #  0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
-    u'\u0626'   #  0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
-    u'\u0627'   #  0x00c7 -> ARABIC LETTER ALEF
-    u'\u0628'   #  0x00c8 -> ARABIC LETTER BEH
-    u'\u0629'   #  0x00c9 -> ARABIC LETTER TEH MARBUTA
-    u'\u062a'   #  0x00ca -> ARABIC LETTER TEH
-    u'\u062b'   #  0x00cb -> ARABIC LETTER THEH
-    u'\u062c'   #  0x00cc -> ARABIC LETTER JEEM
-    u'\u062d'   #  0x00cd -> ARABIC LETTER HAH
-    u'\u062e'   #  0x00ce -> ARABIC LETTER KHAH
-    u'\u062f'   #  0x00cf -> ARABIC LETTER DAL
-    u'\u0630'   #  0x00d0 -> ARABIC LETTER THAL
-    u'\u0631'   #  0x00d1 -> ARABIC LETTER REH
-    u'\u0632'   #  0x00d2 -> ARABIC LETTER ZAIN
-    u'\u0633'   #  0x00d3 -> ARABIC LETTER SEEN
-    u'\u0634'   #  0x00d4 -> ARABIC LETTER SHEEN
-    u'\u0635'   #  0x00d5 -> ARABIC LETTER SAD
-    u'\u0636'   #  0x00d6 -> ARABIC LETTER DAD
-    u'\u0637'   #  0x00d7 -> ARABIC LETTER TAH
-    u'\u0638'   #  0x00d8 -> ARABIC LETTER ZAH
-    u'\u0639'   #  0x00d9 -> ARABIC LETTER AIN
-    u'\u063a'   #  0x00da -> ARABIC LETTER GHAIN
-    u'['        #  0x00db -> LEFT SQUARE BRACKET, right-left
-    u'\\'       #  0x00dc -> REVERSE SOLIDUS, right-left
-    u']'        #  0x00dd -> RIGHT SQUARE BRACKET, right-left
-    u'^'        #  0x00de -> CIRCUMFLEX ACCENT, right-left
-    u'_'        #  0x00df -> LOW LINE, right-left
-    u'\u0640'   #  0x00e0 -> ARABIC TATWEEL
-    u'\u0641'   #  0x00e1 -> ARABIC LETTER FEH
-    u'\u0642'   #  0x00e2 -> ARABIC LETTER QAF
-    u'\u0643'   #  0x00e3 -> ARABIC LETTER KAF
-    u'\u0644'   #  0x00e4 -> ARABIC LETTER LAM
-    u'\u0645'   #  0x00e5 -> ARABIC LETTER MEEM
-    u'\u0646'   #  0x00e6 -> ARABIC LETTER NOON
-    u'\u0647'   #  0x00e7 -> ARABIC LETTER HEH
-    u'\u0648'   #  0x00e8 -> ARABIC LETTER WAW
-    u'\u0649'   #  0x00e9 -> ARABIC LETTER ALEF MAKSURA
-    u'\u064a'   #  0x00ea -> ARABIC LETTER YEH
-    u'\u064b'   #  0x00eb -> ARABIC FATHATAN
-    u'\u064c'   #  0x00ec -> ARABIC DAMMATAN
-    u'\u064d'   #  0x00ed -> ARABIC KASRATAN
-    u'\u064e'   #  0x00ee -> ARABIC FATHA
-    u'\u064f'   #  0x00ef -> ARABIC DAMMA
-    u'\u0650'   #  0x00f0 -> ARABIC KASRA
-    u'\u0651'   #  0x00f1 -> ARABIC SHADDA
-    u'\u0652'   #  0x00f2 -> ARABIC SUKUN
-    u'\u067e'   #  0x00f3 -> ARABIC LETTER PEH
-    u'\u0679'   #  0x00f4 -> ARABIC LETTER TTEH
-    u'\u0686'   #  0x00f5 -> ARABIC LETTER TCHEH
-    u'\u06d5'   #  0x00f6 -> ARABIC LETTER AE
-    u'\u06a4'   #  0x00f7 -> ARABIC LETTER VEH
-    u'\u06af'   #  0x00f8 -> ARABIC LETTER GAF
-    u'\u0688'   #  0x00f9 -> ARABIC LETTER DDAL
-    u'\u0691'   #  0x00fa -> ARABIC LETTER RREH
-    u'{'        #  0x00fb -> LEFT CURLY BRACKET, right-left
-    u'|'        #  0x00fc -> VERTICAL LINE, right-left
-    u'}'        #  0x00fd -> RIGHT CURLY BRACKET, right-left
-    u'\u0698'   #  0x00fe -> ARABIC LETTER JEH
-    u'\u06d2'   #  0x00ff -> ARABIC LETTER YEH BARREE
+    '\x00'     #  0x0000 -> CONTROL CHARACTER
+    '\x01'     #  0x0001 -> CONTROL CHARACTER
+    '\x02'     #  0x0002 -> CONTROL CHARACTER
+    '\x03'     #  0x0003 -> CONTROL CHARACTER
+    '\x04'     #  0x0004 -> CONTROL CHARACTER
+    '\x05'     #  0x0005 -> CONTROL CHARACTER
+    '\x06'     #  0x0006 -> CONTROL CHARACTER
+    '\x07'     #  0x0007 -> CONTROL CHARACTER
+    '\x08'     #  0x0008 -> CONTROL CHARACTER
+    '\t'       #  0x0009 -> CONTROL CHARACTER
+    '\n'       #  0x000a -> CONTROL CHARACTER
+    '\x0b'     #  0x000b -> CONTROL CHARACTER
+    '\x0c'     #  0x000c -> CONTROL CHARACTER
+    '\r'       #  0x000d -> CONTROL CHARACTER
+    '\x0e'     #  0x000e -> CONTROL CHARACTER
+    '\x0f'     #  0x000f -> CONTROL CHARACTER
+    '\x10'     #  0x0010 -> CONTROL CHARACTER
+    '\x11'     #  0x0011 -> CONTROL CHARACTER
+    '\x12'     #  0x0012 -> CONTROL CHARACTER
+    '\x13'     #  0x0013 -> CONTROL CHARACTER
+    '\x14'     #  0x0014 -> CONTROL CHARACTER
+    '\x15'     #  0x0015 -> CONTROL CHARACTER
+    '\x16'     #  0x0016 -> CONTROL CHARACTER
+    '\x17'     #  0x0017 -> CONTROL CHARACTER
+    '\x18'     #  0x0018 -> CONTROL CHARACTER
+    '\x19'     #  0x0019 -> CONTROL CHARACTER
+    '\x1a'     #  0x001a -> CONTROL CHARACTER
+    '\x1b'     #  0x001b -> CONTROL CHARACTER
+    '\x1c'     #  0x001c -> CONTROL CHARACTER
+    '\x1d'     #  0x001d -> CONTROL CHARACTER
+    '\x1e'     #  0x001e -> CONTROL CHARACTER
+    '\x1f'     #  0x001f -> CONTROL CHARACTER
+    ' '        #  0x0020 -> SPACE, left-right
+    '!'        #  0x0021 -> EXCLAMATION MARK, left-right
+    '"'        #  0x0022 -> QUOTATION MARK, left-right
+    '#'        #  0x0023 -> NUMBER SIGN, left-right
+    '$'        #  0x0024 -> DOLLAR SIGN, left-right
+    '%'        #  0x0025 -> PERCENT SIGN, left-right
+    '&'        #  0x0026 -> AMPERSAND, left-right
+    "'"        #  0x0027 -> APOSTROPHE, left-right
+    '('        #  0x0028 -> LEFT PARENTHESIS, left-right
+    ')'        #  0x0029 -> RIGHT PARENTHESIS, left-right
+    '*'        #  0x002a -> ASTERISK, left-right
+    '+'        #  0x002b -> PLUS SIGN, left-right
+    ','        #  0x002c -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR
+    '-'        #  0x002d -> HYPHEN-MINUS, left-right
+    '.'        #  0x002e -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR
+    '/'        #  0x002f -> SOLIDUS, left-right
+    '0'        #  0x0030 -> DIGIT ZERO;  in Arabic-script context, displayed as 0x0660 ARABIC-INDIC DIGIT ZERO
+    '1'        #  0x0031 -> DIGIT ONE;   in Arabic-script context, displayed as 0x0661 ARABIC-INDIC DIGIT ONE
+    '2'        #  0x0032 -> DIGIT TWO;   in Arabic-script context, displayed as 0x0662 ARABIC-INDIC DIGIT TWO
+    '3'        #  0x0033 -> DIGIT THREE; in Arabic-script context, displayed as 0x0663 ARABIC-INDIC DIGIT THREE
+    '4'        #  0x0034 -> DIGIT FOUR;  in Arabic-script context, displayed as 0x0664 ARABIC-INDIC DIGIT FOUR
+    '5'        #  0x0035 -> DIGIT FIVE;  in Arabic-script context, displayed as 0x0665 ARABIC-INDIC DIGIT FIVE
+    '6'        #  0x0036 -> DIGIT SIX;   in Arabic-script context, displayed as 0x0666 ARABIC-INDIC DIGIT SIX
+    '7'        #  0x0037 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x0667 ARABIC-INDIC DIGIT SEVEN
+    '8'        #  0x0038 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x0668 ARABIC-INDIC DIGIT EIGHT
+    '9'        #  0x0039 -> DIGIT NINE;  in Arabic-script context, displayed as 0x0669 ARABIC-INDIC DIGIT NINE
+    ':'        #  0x003a -> COLON, left-right
+    ';'        #  0x003b -> SEMICOLON, left-right
+    '<'        #  0x003c -> LESS-THAN SIGN, left-right
+    '='        #  0x003d -> EQUALS SIGN, left-right
+    '>'        #  0x003e -> GREATER-THAN SIGN, left-right
+    '?'        #  0x003f -> QUESTION MARK, left-right
+    '@'        #  0x0040 -> COMMERCIAL AT
+    'A'        #  0x0041 -> LATIN CAPITAL LETTER A
+    'B'        #  0x0042 -> LATIN CAPITAL LETTER B
+    'C'        #  0x0043 -> LATIN CAPITAL LETTER C
+    'D'        #  0x0044 -> LATIN CAPITAL LETTER D
+    'E'        #  0x0045 -> LATIN CAPITAL LETTER E
+    'F'        #  0x0046 -> LATIN CAPITAL LETTER F
+    'G'        #  0x0047 -> LATIN CAPITAL LETTER G
+    'H'        #  0x0048 -> LATIN CAPITAL LETTER H
+    'I'        #  0x0049 -> LATIN CAPITAL LETTER I
+    'J'        #  0x004a -> LATIN CAPITAL LETTER J
+    'K'        #  0x004b -> LATIN CAPITAL LETTER K
+    'L'        #  0x004c -> LATIN CAPITAL LETTER L
+    'M'        #  0x004d -> LATIN CAPITAL LETTER M
+    'N'        #  0x004e -> LATIN CAPITAL LETTER N
+    'O'        #  0x004f -> LATIN CAPITAL LETTER O
+    'P'        #  0x0050 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x0051 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x0052 -> LATIN CAPITAL LETTER R
+    'S'        #  0x0053 -> LATIN CAPITAL LETTER S
+    'T'        #  0x0054 -> LATIN CAPITAL LETTER T
+    'U'        #  0x0055 -> LATIN CAPITAL LETTER U
+    'V'        #  0x0056 -> LATIN CAPITAL LETTER V
+    'W'        #  0x0057 -> LATIN CAPITAL LETTER W
+    'X'        #  0x0058 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x0059 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x005a -> LATIN CAPITAL LETTER Z
+    '['        #  0x005b -> LEFT SQUARE BRACKET, left-right
+    '\\'       #  0x005c -> REVERSE SOLIDUS, left-right
+    ']'        #  0x005d -> RIGHT SQUARE BRACKET, left-right
+    '^'        #  0x005e -> CIRCUMFLEX ACCENT, left-right
+    '_'        #  0x005f -> LOW LINE, left-right
+    '`'        #  0x0060 -> GRAVE ACCENT
+    'a'        #  0x0061 -> LATIN SMALL LETTER A
+    'b'        #  0x0062 -> LATIN SMALL LETTER B
+    'c'        #  0x0063 -> LATIN SMALL LETTER C
+    'd'        #  0x0064 -> LATIN SMALL LETTER D
+    'e'        #  0x0065 -> LATIN SMALL LETTER E
+    'f'        #  0x0066 -> LATIN SMALL LETTER F
+    'g'        #  0x0067 -> LATIN SMALL LETTER G
+    'h'        #  0x0068 -> LATIN SMALL LETTER H
+    'i'        #  0x0069 -> LATIN SMALL LETTER I
+    'j'        #  0x006a -> LATIN SMALL LETTER J
+    'k'        #  0x006b -> LATIN SMALL LETTER K
+    'l'        #  0x006c -> LATIN SMALL LETTER L
+    'm'        #  0x006d -> LATIN SMALL LETTER M
+    'n'        #  0x006e -> LATIN SMALL LETTER N
+    'o'        #  0x006f -> LATIN SMALL LETTER O
+    'p'        #  0x0070 -> LATIN SMALL LETTER P
+    'q'        #  0x0071 -> LATIN SMALL LETTER Q
+    'r'        #  0x0072 -> LATIN SMALL LETTER R
+    's'        #  0x0073 -> LATIN SMALL LETTER S
+    't'        #  0x0074 -> LATIN SMALL LETTER T
+    'u'        #  0x0075 -> LATIN SMALL LETTER U
+    'v'        #  0x0076 -> LATIN SMALL LETTER V
+    'w'        #  0x0077 -> LATIN SMALL LETTER W
+    'x'        #  0x0078 -> LATIN SMALL LETTER X
+    'y'        #  0x0079 -> LATIN SMALL LETTER Y
+    'z'        #  0x007a -> LATIN SMALL LETTER Z
+    '{'        #  0x007b -> LEFT CURLY BRACKET, left-right
+    '|'        #  0x007c -> VERTICAL LINE, left-right
+    '}'        #  0x007d -> RIGHT CURLY BRACKET, left-right
+    '~'        #  0x007e -> TILDE
+    '\x7f'     #  0x007f -> CONTROL CHARACTER
+    '\xc4'     #  0x0080 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xa0'     #  0x0081 -> NO-BREAK SPACE, right-left
+    '\xc7'     #  0x0082 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc9'     #  0x0083 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xd1'     #  0x0084 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd6'     #  0x0085 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x0086 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x0087 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe0'     #  0x0088 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x0089 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x008a -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u06ba'   #  0x008b -> ARABIC LETTER NOON GHUNNA
+    '\xab'     #  0x008c -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
+    '\xe7'     #  0x008d -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x008e -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x008f -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x0090 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x0091 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xed'     #  0x0092 -> LATIN SMALL LETTER I WITH ACUTE
+    '\u2026'   #  0x0093 -> HORIZONTAL ELLIPSIS, right-left
+    '\xee'     #  0x0094 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x0095 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf1'     #  0x0096 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf3'     #  0x0097 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xbb'     #  0x0098 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
+    '\xf4'     #  0x0099 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x009a -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0x009b -> DIVISION SIGN, right-left
+    '\xfa'     #  0x009c -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf9'     #  0x009d -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x009e -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x009f -> LATIN SMALL LETTER U WITH DIAERESIS
+    ' '        #  0x00a0 -> SPACE, right-left
+    '!'        #  0x00a1 -> EXCLAMATION MARK, right-left
+    '"'        #  0x00a2 -> QUOTATION MARK, right-left
+    '#'        #  0x00a3 -> NUMBER SIGN, right-left
+    '$'        #  0x00a4 -> DOLLAR SIGN, right-left
+    '\u066a'   #  0x00a5 -> ARABIC PERCENT SIGN
+    '&'        #  0x00a6 -> AMPERSAND, right-left
+    "'"        #  0x00a7 -> APOSTROPHE, right-left
+    '('        #  0x00a8 -> LEFT PARENTHESIS, right-left
+    ')'        #  0x00a9 -> RIGHT PARENTHESIS, right-left
+    '*'        #  0x00aa -> ASTERISK, right-left
+    '+'        #  0x00ab -> PLUS SIGN, right-left
+    '\u060c'   #  0x00ac -> ARABIC COMMA
+    '-'        #  0x00ad -> HYPHEN-MINUS, right-left
+    '.'        #  0x00ae -> FULL STOP, right-left
+    '/'        #  0x00af -> SOLIDUS, right-left
+    '\u0660'   #  0x00b0 -> ARABIC-INDIC DIGIT ZERO, right-left (need override)
+    '\u0661'   #  0x00b1 -> ARABIC-INDIC DIGIT ONE, right-left (need override)
+    '\u0662'   #  0x00b2 -> ARABIC-INDIC DIGIT TWO, right-left (need override)
+    '\u0663'   #  0x00b3 -> ARABIC-INDIC DIGIT THREE, right-left (need override)
+    '\u0664'   #  0x00b4 -> ARABIC-INDIC DIGIT FOUR, right-left (need override)
+    '\u0665'   #  0x00b5 -> ARABIC-INDIC DIGIT FIVE, right-left (need override)
+    '\u0666'   #  0x00b6 -> ARABIC-INDIC DIGIT SIX, right-left (need override)
+    '\u0667'   #  0x00b7 -> ARABIC-INDIC DIGIT SEVEN, right-left (need override)
+    '\u0668'   #  0x00b8 -> ARABIC-INDIC DIGIT EIGHT, right-left (need override)
+    '\u0669'   #  0x00b9 -> ARABIC-INDIC DIGIT NINE, right-left (need override)
+    ':'        #  0x00ba -> COLON, right-left
+    '\u061b'   #  0x00bb -> ARABIC SEMICOLON
+    '<'        #  0x00bc -> LESS-THAN SIGN, right-left
+    '='        #  0x00bd -> EQUALS SIGN, right-left
+    '>'        #  0x00be -> GREATER-THAN SIGN, right-left
+    '\u061f'   #  0x00bf -> ARABIC QUESTION MARK
+    '\u274a'   #  0x00c0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left
+    '\u0621'   #  0x00c1 -> ARABIC LETTER HAMZA
+    '\u0622'   #  0x00c2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+    '\u0623'   #  0x00c3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+    '\u0624'   #  0x00c4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
+    '\u0625'   #  0x00c5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
+    '\u0626'   #  0x00c6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
+    '\u0627'   #  0x00c7 -> ARABIC LETTER ALEF
+    '\u0628'   #  0x00c8 -> ARABIC LETTER BEH
+    '\u0629'   #  0x00c9 -> ARABIC LETTER TEH MARBUTA
+    '\u062a'   #  0x00ca -> ARABIC LETTER TEH
+    '\u062b'   #  0x00cb -> ARABIC LETTER THEH
+    '\u062c'   #  0x00cc -> ARABIC LETTER JEEM
+    '\u062d'   #  0x00cd -> ARABIC LETTER HAH
+    '\u062e'   #  0x00ce -> ARABIC LETTER KHAH
+    '\u062f'   #  0x00cf -> ARABIC LETTER DAL
+    '\u0630'   #  0x00d0 -> ARABIC LETTER THAL
+    '\u0631'   #  0x00d1 -> ARABIC LETTER REH
+    '\u0632'   #  0x00d2 -> ARABIC LETTER ZAIN
+    '\u0633'   #  0x00d3 -> ARABIC LETTER SEEN
+    '\u0634'   #  0x00d4 -> ARABIC LETTER SHEEN
+    '\u0635'   #  0x00d5 -> ARABIC LETTER SAD
+    '\u0636'   #  0x00d6 -> ARABIC LETTER DAD
+    '\u0637'   #  0x00d7 -> ARABIC LETTER TAH
+    '\u0638'   #  0x00d8 -> ARABIC LETTER ZAH
+    '\u0639'   #  0x00d9 -> ARABIC LETTER AIN
+    '\u063a'   #  0x00da -> ARABIC LETTER GHAIN
+    '['        #  0x00db -> LEFT SQUARE BRACKET, right-left
+    '\\'       #  0x00dc -> REVERSE SOLIDUS, right-left
+    ']'        #  0x00dd -> RIGHT SQUARE BRACKET, right-left
+    '^'        #  0x00de -> CIRCUMFLEX ACCENT, right-left
+    '_'        #  0x00df -> LOW LINE, right-left
+    '\u0640'   #  0x00e0 -> ARABIC TATWEEL
+    '\u0641'   #  0x00e1 -> ARABIC LETTER FEH
+    '\u0642'   #  0x00e2 -> ARABIC LETTER QAF
+    '\u0643'   #  0x00e3 -> ARABIC LETTER KAF
+    '\u0644'   #  0x00e4 -> ARABIC LETTER LAM
+    '\u0645'   #  0x00e5 -> ARABIC LETTER MEEM
+    '\u0646'   #  0x00e6 -> ARABIC LETTER NOON
+    '\u0647'   #  0x00e7 -> ARABIC LETTER HEH
+    '\u0648'   #  0x00e8 -> ARABIC LETTER WAW
+    '\u0649'   #  0x00e9 -> ARABIC LETTER ALEF MAKSURA
+    '\u064a'   #  0x00ea -> ARABIC LETTER YEH
+    '\u064b'   #  0x00eb -> ARABIC FATHATAN
+    '\u064c'   #  0x00ec -> ARABIC DAMMATAN
+    '\u064d'   #  0x00ed -> ARABIC KASRATAN
+    '\u064e'   #  0x00ee -> ARABIC FATHA
+    '\u064f'   #  0x00ef -> ARABIC DAMMA
+    '\u0650'   #  0x00f0 -> ARABIC KASRA
+    '\u0651'   #  0x00f1 -> ARABIC SHADDA
+    '\u0652'   #  0x00f2 -> ARABIC SUKUN
+    '\u067e'   #  0x00f3 -> ARABIC LETTER PEH
+    '\u0679'   #  0x00f4 -> ARABIC LETTER TTEH
+    '\u0686'   #  0x00f5 -> ARABIC LETTER TCHEH
+    '\u06d5'   #  0x00f6 -> ARABIC LETTER AE
+    '\u06a4'   #  0x00f7 -> ARABIC LETTER VEH
+    '\u06af'   #  0x00f8 -> ARABIC LETTER GAF
+    '\u0688'   #  0x00f9 -> ARABIC LETTER DDAL
+    '\u0691'   #  0x00fa -> ARABIC LETTER RREH
+    '{'        #  0x00fb -> LEFT CURLY BRACKET, right-left
+    '|'        #  0x00fc -> VERTICAL LINE, right-left
+    '}'        #  0x00fd -> RIGHT CURLY BRACKET, right-left
+    '\u0698'   #  0x00fe -> ARABIC LETTER JEH
+    '\u06d2'   #  0x00ff -> ARABIC LETTER YEH BARREE
 )
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/encodings/mac_centeuro.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_centeuro.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_centeuro.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\u0100'   #  0x81 -> LATIN CAPITAL LETTER A WITH MACRON
-    u'\u0101'   #  0x82 -> LATIN SMALL LETTER A WITH MACRON
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\u0104'   #  0x84 -> LATIN CAPITAL LETTER A WITH OGONEK
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\u0105'   #  0x88 -> LATIN SMALL LETTER A WITH OGONEK
-    u'\u010c'   #  0x89 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u010d'   #  0x8B -> LATIN SMALL LETTER C WITH CARON
-    u'\u0106'   #  0x8C -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\u0107'   #  0x8D -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\u0179'   #  0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
-    u'\u017a'   #  0x90 -> LATIN SMALL LETTER Z WITH ACUTE
-    u'\u010e'   #  0x91 -> LATIN CAPITAL LETTER D WITH CARON
-    u'\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\u010f'   #  0x93 -> LATIN SMALL LETTER D WITH CARON
-    u'\u0112'   #  0x94 -> LATIN CAPITAL LETTER E WITH MACRON
-    u'\u0113'   #  0x95 -> LATIN SMALL LETTER E WITH MACRON
-    u'\u0116'   #  0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
-    u'\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\u0117'   #  0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
-    u'\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
-    u'\u011a'   #  0x9D -> LATIN CAPITAL LETTER E WITH CARON
-    u'\u011b'   #  0x9E -> LATIN SMALL LETTER E WITH CARON
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u2020'   #  0xA0 -> DAGGER
-    u'\xb0'     #  0xA1 -> DEGREE SIGN
-    u'\u0118'   #  0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa7'     #  0xA4 -> SECTION SIGN
-    u'\u2022'   #  0xA5 -> BULLET
-    u'\xb6'     #  0xA6 -> PILCROW SIGN
-    u'\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u2122'   #  0xAA -> TRADE MARK SIGN
-    u'\u0119'   #  0xAB -> LATIN SMALL LETTER E WITH OGONEK
-    u'\xa8'     #  0xAC -> DIAERESIS
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\u0123'   #  0xAE -> LATIN SMALL LETTER G WITH CEDILLA
-    u'\u012e'   #  0xAF -> LATIN CAPITAL LETTER I WITH OGONEK
-    u'\u012f'   #  0xB0 -> LATIN SMALL LETTER I WITH OGONEK
-    u'\u012a'   #  0xB1 -> LATIN CAPITAL LETTER I WITH MACRON
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\u012b'   #  0xB4 -> LATIN SMALL LETTER I WITH MACRON
-    u'\u0136'   #  0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA
-    u'\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
-    u'\u2211'   #  0xB7 -> N-ARY SUMMATION
-    u'\u0142'   #  0xB8 -> LATIN SMALL LETTER L WITH STROKE
-    u'\u013b'   #  0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA
-    u'\u013c'   #  0xBA -> LATIN SMALL LETTER L WITH CEDILLA
-    u'\u013d'   #  0xBB -> LATIN CAPITAL LETTER L WITH CARON
-    u'\u013e'   #  0xBC -> LATIN SMALL LETTER L WITH CARON
-    u'\u0139'   #  0xBD -> LATIN CAPITAL LETTER L WITH ACUTE
-    u'\u013a'   #  0xBE -> LATIN SMALL LETTER L WITH ACUTE
-    u'\u0145'   #  0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA
-    u'\u0146'   #  0xC0 -> LATIN SMALL LETTER N WITH CEDILLA
-    u'\u0143'   #  0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u221a'   #  0xC3 -> SQUARE ROOT
-    u'\u0144'   #  0xC4 -> LATIN SMALL LETTER N WITH ACUTE
-    u'\u0147'   #  0xC5 -> LATIN CAPITAL LETTER N WITH CARON
-    u'\u2206'   #  0xC6 -> INCREMENT
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\u0148'   #  0xCB -> LATIN SMALL LETTER N WITH CARON
-    u'\u0150'   #  0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-    u'\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\u0151'   #  0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
-    u'\u014c'   #  0xCF -> LATIN CAPITAL LETTER O WITH MACRON
-    u'\u2013'   #  0xD0 -> EN DASH
-    u'\u2014'   #  0xD1 -> EM DASH
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u25ca'   #  0xD7 -> LOZENGE
-    u'\u014d'   #  0xD8 -> LATIN SMALL LETTER O WITH MACRON
-    u'\u0154'   #  0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE
-    u'\u0155'   #  0xDA -> LATIN SMALL LETTER R WITH ACUTE
-    u'\u0158'   #  0xDB -> LATIN CAPITAL LETTER R WITH CARON
-    u'\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u0159'   #  0xDE -> LATIN SMALL LETTER R WITH CARON
-    u'\u0156'   #  0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA
-    u'\u0157'   #  0xE0 -> LATIN SMALL LETTER R WITH CEDILLA
-    u'\u0160'   #  0xE1 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u0161'   #  0xE4 -> LATIN SMALL LETTER S WITH CARON
-    u'\u015a'   #  0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE
-    u'\u015b'   #  0xE6 -> LATIN SMALL LETTER S WITH ACUTE
-    u'\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\u0164'   #  0xE8 -> LATIN CAPITAL LETTER T WITH CARON
-    u'\u0165'   #  0xE9 -> LATIN SMALL LETTER T WITH CARON
-    u'\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\u017d'   #  0xEB -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\u017e'   #  0xEC -> LATIN SMALL LETTER Z WITH CARON
-    u'\u016a'   #  0xED -> LATIN CAPITAL LETTER U WITH MACRON
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u016b'   #  0xF0 -> LATIN SMALL LETTER U WITH MACRON
-    u'\u016e'   #  0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE
-    u'\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\u016f'   #  0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE
-    u'\u0170'   #  0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-    u'\u0171'   #  0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
-    u'\u0172'   #  0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK
-    u'\u0173'   #  0xF7 -> LATIN SMALL LETTER U WITH OGONEK
-    u'\xdd'     #  0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xfd'     #  0xF9 -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\u0137'   #  0xFA -> LATIN SMALL LETTER K WITH CEDILLA
-    u'\u017b'   #  0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
-    u'\u0141'   #  0xFC -> LATIN CAPITAL LETTER L WITH STROKE
-    u'\u017c'   #  0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
-    u'\u0122'   #  0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA
-    u'\u02c7'   #  0xFF -> CARON
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\u0100'   #  0x81 -> LATIN CAPITAL LETTER A WITH MACRON
+    '\u0101'   #  0x82 -> LATIN SMALL LETTER A WITH MACRON
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\u0104'   #  0x84 -> LATIN CAPITAL LETTER A WITH OGONEK
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
+    '\u0105'   #  0x88 -> LATIN SMALL LETTER A WITH OGONEK
+    '\u010c'   #  0x89 -> LATIN CAPITAL LETTER C WITH CARON
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u010d'   #  0x8B -> LATIN SMALL LETTER C WITH CARON
+    '\u0106'   #  0x8C -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\u0107'   #  0x8D -> LATIN SMALL LETTER C WITH ACUTE
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\u0179'   #  0x8F -> LATIN CAPITAL LETTER Z WITH ACUTE
+    '\u017a'   #  0x90 -> LATIN SMALL LETTER Z WITH ACUTE
+    '\u010e'   #  0x91 -> LATIN CAPITAL LETTER D WITH CARON
+    '\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
+    '\u010f'   #  0x93 -> LATIN SMALL LETTER D WITH CARON
+    '\u0112'   #  0x94 -> LATIN CAPITAL LETTER E WITH MACRON
+    '\u0113'   #  0x95 -> LATIN SMALL LETTER E WITH MACRON
+    '\u0116'   #  0x96 -> LATIN CAPITAL LETTER E WITH DOT ABOVE
+    '\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
+    '\u0117'   #  0x98 -> LATIN SMALL LETTER E WITH DOT ABOVE
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
+    '\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
+    '\u011a'   #  0x9D -> LATIN CAPITAL LETTER E WITH CARON
+    '\u011b'   #  0x9E -> LATIN SMALL LETTER E WITH CARON
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u2020'   #  0xA0 -> DAGGER
+    '\xb0'     #  0xA1 -> DEGREE SIGN
+    '\u0118'   #  0xA2 -> LATIN CAPITAL LETTER E WITH OGONEK
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa7'     #  0xA4 -> SECTION SIGN
+    '\u2022'   #  0xA5 -> BULLET
+    '\xb6'     #  0xA6 -> PILCROW SIGN
+    '\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u2122'   #  0xAA -> TRADE MARK SIGN
+    '\u0119'   #  0xAB -> LATIN SMALL LETTER E WITH OGONEK
+    '\xa8'     #  0xAC -> DIAERESIS
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\u0123'   #  0xAE -> LATIN SMALL LETTER G WITH CEDILLA
+    '\u012e'   #  0xAF -> LATIN CAPITAL LETTER I WITH OGONEK
+    '\u012f'   #  0xB0 -> LATIN SMALL LETTER I WITH OGONEK
+    '\u012a'   #  0xB1 -> LATIN CAPITAL LETTER I WITH MACRON
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\u012b'   #  0xB4 -> LATIN SMALL LETTER I WITH MACRON
+    '\u0136'   #  0xB5 -> LATIN CAPITAL LETTER K WITH CEDILLA
+    '\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
+    '\u2211'   #  0xB7 -> N-ARY SUMMATION
+    '\u0142'   #  0xB8 -> LATIN SMALL LETTER L WITH STROKE
+    '\u013b'   #  0xB9 -> LATIN CAPITAL LETTER L WITH CEDILLA
+    '\u013c'   #  0xBA -> LATIN SMALL LETTER L WITH CEDILLA
+    '\u013d'   #  0xBB -> LATIN CAPITAL LETTER L WITH CARON
+    '\u013e'   #  0xBC -> LATIN SMALL LETTER L WITH CARON
+    '\u0139'   #  0xBD -> LATIN CAPITAL LETTER L WITH ACUTE
+    '\u013a'   #  0xBE -> LATIN SMALL LETTER L WITH ACUTE
+    '\u0145'   #  0xBF -> LATIN CAPITAL LETTER N WITH CEDILLA
+    '\u0146'   #  0xC0 -> LATIN SMALL LETTER N WITH CEDILLA
+    '\u0143'   #  0xC1 -> LATIN CAPITAL LETTER N WITH ACUTE
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u221a'   #  0xC3 -> SQUARE ROOT
+    '\u0144'   #  0xC4 -> LATIN SMALL LETTER N WITH ACUTE
+    '\u0147'   #  0xC5 -> LATIN CAPITAL LETTER N WITH CARON
+    '\u2206'   #  0xC6 -> INCREMENT
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\u0148'   #  0xCB -> LATIN SMALL LETTER N WITH CARON
+    '\u0150'   #  0xCC -> LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+    '\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
+    '\u0151'   #  0xCE -> LATIN SMALL LETTER O WITH DOUBLE ACUTE
+    '\u014c'   #  0xCF -> LATIN CAPITAL LETTER O WITH MACRON
+    '\u2013'   #  0xD0 -> EN DASH
+    '\u2014'   #  0xD1 -> EM DASH
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u25ca'   #  0xD7 -> LOZENGE
+    '\u014d'   #  0xD8 -> LATIN SMALL LETTER O WITH MACRON
+    '\u0154'   #  0xD9 -> LATIN CAPITAL LETTER R WITH ACUTE
+    '\u0155'   #  0xDA -> LATIN SMALL LETTER R WITH ACUTE
+    '\u0158'   #  0xDB -> LATIN CAPITAL LETTER R WITH CARON
+    '\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u0159'   #  0xDE -> LATIN SMALL LETTER R WITH CARON
+    '\u0156'   #  0xDF -> LATIN CAPITAL LETTER R WITH CEDILLA
+    '\u0157'   #  0xE0 -> LATIN SMALL LETTER R WITH CEDILLA
+    '\u0160'   #  0xE1 -> LATIN CAPITAL LETTER S WITH CARON
+    '\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
+    '\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u0161'   #  0xE4 -> LATIN SMALL LETTER S WITH CARON
+    '\u015a'   #  0xE5 -> LATIN CAPITAL LETTER S WITH ACUTE
+    '\u015b'   #  0xE6 -> LATIN SMALL LETTER S WITH ACUTE
+    '\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\u0164'   #  0xE8 -> LATIN CAPITAL LETTER T WITH CARON
+    '\u0165'   #  0xE9 -> LATIN SMALL LETTER T WITH CARON
+    '\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\u017d'   #  0xEB -> LATIN CAPITAL LETTER Z WITH CARON
+    '\u017e'   #  0xEC -> LATIN SMALL LETTER Z WITH CARON
+    '\u016a'   #  0xED -> LATIN CAPITAL LETTER U WITH MACRON
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u016b'   #  0xF0 -> LATIN SMALL LETTER U WITH MACRON
+    '\u016e'   #  0xF1 -> LATIN CAPITAL LETTER U WITH RING ABOVE
+    '\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\u016f'   #  0xF3 -> LATIN SMALL LETTER U WITH RING ABOVE
+    '\u0170'   #  0xF4 -> LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+    '\u0171'   #  0xF5 -> LATIN SMALL LETTER U WITH DOUBLE ACUTE
+    '\u0172'   #  0xF6 -> LATIN CAPITAL LETTER U WITH OGONEK
+    '\u0173'   #  0xF7 -> LATIN SMALL LETTER U WITH OGONEK
+    '\xdd'     #  0xF8 -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xfd'     #  0xF9 -> LATIN SMALL LETTER Y WITH ACUTE
+    '\u0137'   #  0xFA -> LATIN SMALL LETTER K WITH CEDILLA
+    '\u017b'   #  0xFB -> LATIN CAPITAL LETTER Z WITH DOT ABOVE
+    '\u0141'   #  0xFC -> LATIN CAPITAL LETTER L WITH STROKE
+    '\u017c'   #  0xFD -> LATIN SMALL LETTER Z WITH DOT ABOVE
+    '\u0122'   #  0xFE -> LATIN CAPITAL LETTER G WITH CEDILLA
+    '\u02c7'   #  0xFF -> CARON
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_croatian.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_croatian.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_croatian.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
-    u'\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u2020'   #  0xA0 -> DAGGER
-    u'\xb0'     #  0xA1 -> DEGREE SIGN
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa7'     #  0xA4 -> SECTION SIGN
-    u'\u2022'   #  0xA5 -> BULLET
-    u'\xb6'     #  0xA6 -> PILCROW SIGN
-    u'\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\u0160'   #  0xA9 -> LATIN CAPITAL LETTER S WITH CARON
-    u'\u2122'   #  0xAA -> TRADE MARK SIGN
-    u'\xb4'     #  0xAB -> ACUTE ACCENT
-    u'\xa8'     #  0xAC -> DIAERESIS
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\u017d'   #  0xAE -> LATIN CAPITAL LETTER Z WITH CARON
-    u'\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u221e'   #  0xB0 -> INFINITY
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\u2206'   #  0xB4 -> INCREMENT
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
-    u'\u2211'   #  0xB7 -> N-ARY SUMMATION
-    u'\u220f'   #  0xB8 -> N-ARY PRODUCT
-    u'\u0161'   #  0xB9 -> LATIN SMALL LETTER S WITH CARON
-    u'\u222b'   #  0xBA -> INTEGRAL
-    u'\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
-    u'\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
-    u'\u017e'   #  0xBE -> LATIN SMALL LETTER Z WITH CARON
-    u'\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
-    u'\xbf'     #  0xC0 -> INVERTED QUESTION MARK
-    u'\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u221a'   #  0xC3 -> SQUARE ROOT
-    u'\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u2248'   #  0xC5 -> ALMOST EQUAL TO
-    u'\u0106'   #  0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
-    u'\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
-    u'\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
-    u'\u2014'   #  0xD1 -> EM DASH
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u25ca'   #  0xD7 -> LOZENGE
-    u'\uf8ff'   #  0xD8 -> Apple logo
-    u'\xa9'     #  0xD9 -> COPYRIGHT SIGN
-    u'\u2044'   #  0xDA -> FRACTION SLASH
-    u'\u20ac'   #  0xDB -> EURO SIGN
-    u'\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\xc6'     #  0xDE -> LATIN CAPITAL LETTER AE
-    u'\xbb'     #  0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2013'   #  0xE0 -> EN DASH
-    u'\xb7'     #  0xE1 -> MIDDLE DOT
-    u'\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2030'   #  0xE4 -> PER MILLE SIGN
-    u'\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\u0107'   #  0xE6 -> LATIN SMALL LETTER C WITH ACUTE
-    u'\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
-    u'\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
-    u'\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
-    u'\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u02dc'   #  0xF7 -> SMALL TILDE
-    u'\xaf'     #  0xF8 -> MACRON
-    u'\u03c0'   #  0xF9 -> GREEK SMALL LETTER PI
-    u'\xcb'     #  0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\u02da'   #  0xFB -> RING ABOVE
-    u'\xb8'     #  0xFC -> CEDILLA
-    u'\xca'     #  0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xe6'     #  0xFE -> LATIN SMALL LETTER AE
-    u'\u02c7'   #  0xFF -> CARON
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
+    '\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u2020'   #  0xA0 -> DAGGER
+    '\xb0'     #  0xA1 -> DEGREE SIGN
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa7'     #  0xA4 -> SECTION SIGN
+    '\u2022'   #  0xA5 -> BULLET
+    '\xb6'     #  0xA6 -> PILCROW SIGN
+    '\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\u0160'   #  0xA9 -> LATIN CAPITAL LETTER S WITH CARON
+    '\u2122'   #  0xAA -> TRADE MARK SIGN
+    '\xb4'     #  0xAB -> ACUTE ACCENT
+    '\xa8'     #  0xAC -> DIAERESIS
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\u017d'   #  0xAE -> LATIN CAPITAL LETTER Z WITH CARON
+    '\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u221e'   #  0xB0 -> INFINITY
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\u2206'   #  0xB4 -> INCREMENT
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
+    '\u2211'   #  0xB7 -> N-ARY SUMMATION
+    '\u220f'   #  0xB8 -> N-ARY PRODUCT
+    '\u0161'   #  0xB9 -> LATIN SMALL LETTER S WITH CARON
+    '\u222b'   #  0xBA -> INTEGRAL
+    '\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
+    '\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
+    '\u017e'   #  0xBE -> LATIN SMALL LETTER Z WITH CARON
+    '\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
+    '\xbf'     #  0xC0 -> INVERTED QUESTION MARK
+    '\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u221a'   #  0xC3 -> SQUARE ROOT
+    '\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
+    '\u2248'   #  0xC5 -> ALMOST EQUAL TO
+    '\u0106'   #  0xC6 -> LATIN CAPITAL LETTER C WITH ACUTE
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u010c'   #  0xC8 -> LATIN CAPITAL LETTER C WITH CARON
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
+    '\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
+    '\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
+    '\u0110'   #  0xD0 -> LATIN CAPITAL LETTER D WITH STROKE
+    '\u2014'   #  0xD1 -> EM DASH
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u25ca'   #  0xD7 -> LOZENGE
+    '\uf8ff'   #  0xD8 -> Apple logo
+    '\xa9'     #  0xD9 -> COPYRIGHT SIGN
+    '\u2044'   #  0xDA -> FRACTION SLASH
+    '\u20ac'   #  0xDB -> EURO SIGN
+    '\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\xc6'     #  0xDE -> LATIN CAPITAL LETTER AE
+    '\xbb'     #  0xDF -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2013'   #  0xE0 -> EN DASH
+    '\xb7'     #  0xE1 -> MIDDLE DOT
+    '\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
+    '\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2030'   #  0xE4 -> PER MILLE SIGN
+    '\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\u0107'   #  0xE6 -> LATIN SMALL LETTER C WITH ACUTE
+    '\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\u010d'   #  0xE8 -> LATIN SMALL LETTER C WITH CARON
+    '\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\u0111'   #  0xF0 -> LATIN SMALL LETTER D WITH STROKE
+    '\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
+    '\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u02dc'   #  0xF7 -> SMALL TILDE
+    '\xaf'     #  0xF8 -> MACRON
+    '\u03c0'   #  0xF9 -> GREEK SMALL LETTER PI
+    '\xcb'     #  0xFA -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\u02da'   #  0xFB -> RING ABOVE
+    '\xb8'     #  0xFC -> CEDILLA
+    '\xca'     #  0xFD -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xe6'     #  0xFE -> LATIN SMALL LETTER AE
+    '\u02c7'   #  0xFF -> CARON
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_cyrillic.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_cyrillic.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_cyrillic.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\u0410'   #  0x80 -> CYRILLIC CAPITAL LETTER A
-    u'\u0411'   #  0x81 -> CYRILLIC CAPITAL LETTER BE
-    u'\u0412'   #  0x82 -> CYRILLIC CAPITAL LETTER VE
-    u'\u0413'   #  0x83 -> CYRILLIC CAPITAL LETTER GHE
-    u'\u0414'   #  0x84 -> CYRILLIC CAPITAL LETTER DE
-    u'\u0415'   #  0x85 -> CYRILLIC CAPITAL LETTER IE
-    u'\u0416'   #  0x86 -> CYRILLIC CAPITAL LETTER ZHE
-    u'\u0417'   #  0x87 -> CYRILLIC CAPITAL LETTER ZE
-    u'\u0418'   #  0x88 -> CYRILLIC CAPITAL LETTER I
-    u'\u0419'   #  0x89 -> CYRILLIC CAPITAL LETTER SHORT I
-    u'\u041a'   #  0x8A -> CYRILLIC CAPITAL LETTER KA
-    u'\u041b'   #  0x8B -> CYRILLIC CAPITAL LETTER EL
-    u'\u041c'   #  0x8C -> CYRILLIC CAPITAL LETTER EM
-    u'\u041d'   #  0x8D -> CYRILLIC CAPITAL LETTER EN
-    u'\u041e'   #  0x8E -> CYRILLIC CAPITAL LETTER O
-    u'\u041f'   #  0x8F -> CYRILLIC CAPITAL LETTER PE
-    u'\u0420'   #  0x90 -> CYRILLIC CAPITAL LETTER ER
-    u'\u0421'   #  0x91 -> CYRILLIC CAPITAL LETTER ES
-    u'\u0422'   #  0x92 -> CYRILLIC CAPITAL LETTER TE
-    u'\u0423'   #  0x93 -> CYRILLIC CAPITAL LETTER U
-    u'\u0424'   #  0x94 -> CYRILLIC CAPITAL LETTER EF
-    u'\u0425'   #  0x95 -> CYRILLIC CAPITAL LETTER HA
-    u'\u0426'   #  0x96 -> CYRILLIC CAPITAL LETTER TSE
-    u'\u0427'   #  0x97 -> CYRILLIC CAPITAL LETTER CHE
-    u'\u0428'   #  0x98 -> CYRILLIC CAPITAL LETTER SHA
-    u'\u0429'   #  0x99 -> CYRILLIC CAPITAL LETTER SHCHA
-    u'\u042a'   #  0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN
-    u'\u042b'   #  0x9B -> CYRILLIC CAPITAL LETTER YERU
-    u'\u042c'   #  0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN
-    u'\u042d'   #  0x9D -> CYRILLIC CAPITAL LETTER E
-    u'\u042e'   #  0x9E -> CYRILLIC CAPITAL LETTER YU
-    u'\u042f'   #  0x9F -> CYRILLIC CAPITAL LETTER YA
-    u'\u2020'   #  0xA0 -> DAGGER
-    u'\xb0'     #  0xA1 -> DEGREE SIGN
-    u'\u0490'   #  0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa7'     #  0xA4 -> SECTION SIGN
-    u'\u2022'   #  0xA5 -> BULLET
-    u'\xb6'     #  0xA6 -> PILCROW SIGN
-    u'\u0406'   #  0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u2122'   #  0xAA -> TRADE MARK SIGN
-    u'\u0402'   #  0xAB -> CYRILLIC CAPITAL LETTER DJE
-    u'\u0452'   #  0xAC -> CYRILLIC SMALL LETTER DJE
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\u0403'   #  0xAE -> CYRILLIC CAPITAL LETTER GJE
-    u'\u0453'   #  0xAF -> CYRILLIC SMALL LETTER GJE
-    u'\u221e'   #  0xB0 -> INFINITY
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\u0456'   #  0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\u0491'   #  0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
-    u'\u0408'   #  0xB7 -> CYRILLIC CAPITAL LETTER JE
-    u'\u0404'   #  0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
-    u'\u0454'   #  0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE
-    u'\u0407'   #  0xBA -> CYRILLIC CAPITAL LETTER YI
-    u'\u0457'   #  0xBB -> CYRILLIC SMALL LETTER YI
-    u'\u0409'   #  0xBC -> CYRILLIC CAPITAL LETTER LJE
-    u'\u0459'   #  0xBD -> CYRILLIC SMALL LETTER LJE
-    u'\u040a'   #  0xBE -> CYRILLIC CAPITAL LETTER NJE
-    u'\u045a'   #  0xBF -> CYRILLIC SMALL LETTER NJE
-    u'\u0458'   #  0xC0 -> CYRILLIC SMALL LETTER JE
-    u'\u0405'   #  0xC1 -> CYRILLIC CAPITAL LETTER DZE
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u221a'   #  0xC3 -> SQUARE ROOT
-    u'\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u2248'   #  0xC5 -> ALMOST EQUAL TO
-    u'\u2206'   #  0xC6 -> INCREMENT
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\u040b'   #  0xCB -> CYRILLIC CAPITAL LETTER TSHE
-    u'\u045b'   #  0xCC -> CYRILLIC SMALL LETTER TSHE
-    u'\u040c'   #  0xCD -> CYRILLIC CAPITAL LETTER KJE
-    u'\u045c'   #  0xCE -> CYRILLIC SMALL LETTER KJE
-    u'\u0455'   #  0xCF -> CYRILLIC SMALL LETTER DZE
-    u'\u2013'   #  0xD0 -> EN DASH
-    u'\u2014'   #  0xD1 -> EM DASH
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u201e'   #  0xD7 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u040e'   #  0xD8 -> CYRILLIC CAPITAL LETTER SHORT U
-    u'\u045e'   #  0xD9 -> CYRILLIC SMALL LETTER SHORT U
-    u'\u040f'   #  0xDA -> CYRILLIC CAPITAL LETTER DZHE
-    u'\u045f'   #  0xDB -> CYRILLIC SMALL LETTER DZHE
-    u'\u2116'   #  0xDC -> NUMERO SIGN
-    u'\u0401'   #  0xDD -> CYRILLIC CAPITAL LETTER IO
-    u'\u0451'   #  0xDE -> CYRILLIC SMALL LETTER IO
-    u'\u044f'   #  0xDF -> CYRILLIC SMALL LETTER YA
-    u'\u0430'   #  0xE0 -> CYRILLIC SMALL LETTER A
-    u'\u0431'   #  0xE1 -> CYRILLIC SMALL LETTER BE
-    u'\u0432'   #  0xE2 -> CYRILLIC SMALL LETTER VE
-    u'\u0433'   #  0xE3 -> CYRILLIC SMALL LETTER GHE
-    u'\u0434'   #  0xE4 -> CYRILLIC SMALL LETTER DE
-    u'\u0435'   #  0xE5 -> CYRILLIC SMALL LETTER IE
-    u'\u0436'   #  0xE6 -> CYRILLIC SMALL LETTER ZHE
-    u'\u0437'   #  0xE7 -> CYRILLIC SMALL LETTER ZE
-    u'\u0438'   #  0xE8 -> CYRILLIC SMALL LETTER I
-    u'\u0439'   #  0xE9 -> CYRILLIC SMALL LETTER SHORT I
-    u'\u043a'   #  0xEA -> CYRILLIC SMALL LETTER KA
-    u'\u043b'   #  0xEB -> CYRILLIC SMALL LETTER EL
-    u'\u043c'   #  0xEC -> CYRILLIC SMALL LETTER EM
-    u'\u043d'   #  0xED -> CYRILLIC SMALL LETTER EN
-    u'\u043e'   #  0xEE -> CYRILLIC SMALL LETTER O
-    u'\u043f'   #  0xEF -> CYRILLIC SMALL LETTER PE
-    u'\u0440'   #  0xF0 -> CYRILLIC SMALL LETTER ER
-    u'\u0441'   #  0xF1 -> CYRILLIC SMALL LETTER ES
-    u'\u0442'   #  0xF2 -> CYRILLIC SMALL LETTER TE
-    u'\u0443'   #  0xF3 -> CYRILLIC SMALL LETTER U
-    u'\u0444'   #  0xF4 -> CYRILLIC SMALL LETTER EF
-    u'\u0445'   #  0xF5 -> CYRILLIC SMALL LETTER HA
-    u'\u0446'   #  0xF6 -> CYRILLIC SMALL LETTER TSE
-    u'\u0447'   #  0xF7 -> CYRILLIC SMALL LETTER CHE
-    u'\u0448'   #  0xF8 -> CYRILLIC SMALL LETTER SHA
-    u'\u0449'   #  0xF9 -> CYRILLIC SMALL LETTER SHCHA
-    u'\u044a'   #  0xFA -> CYRILLIC SMALL LETTER HARD SIGN
-    u'\u044b'   #  0xFB -> CYRILLIC SMALL LETTER YERU
-    u'\u044c'   #  0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
-    u'\u044d'   #  0xFD -> CYRILLIC SMALL LETTER E
-    u'\u044e'   #  0xFE -> CYRILLIC SMALL LETTER YU
-    u'\u20ac'   #  0xFF -> EURO SIGN
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\u0410'   #  0x80 -> CYRILLIC CAPITAL LETTER A
+    '\u0411'   #  0x81 -> CYRILLIC CAPITAL LETTER BE
+    '\u0412'   #  0x82 -> CYRILLIC CAPITAL LETTER VE
+    '\u0413'   #  0x83 -> CYRILLIC CAPITAL LETTER GHE
+    '\u0414'   #  0x84 -> CYRILLIC CAPITAL LETTER DE
+    '\u0415'   #  0x85 -> CYRILLIC CAPITAL LETTER IE
+    '\u0416'   #  0x86 -> CYRILLIC CAPITAL LETTER ZHE
+    '\u0417'   #  0x87 -> CYRILLIC CAPITAL LETTER ZE
+    '\u0418'   #  0x88 -> CYRILLIC CAPITAL LETTER I
+    '\u0419'   #  0x89 -> CYRILLIC CAPITAL LETTER SHORT I
+    '\u041a'   #  0x8A -> CYRILLIC CAPITAL LETTER KA
+    '\u041b'   #  0x8B -> CYRILLIC CAPITAL LETTER EL
+    '\u041c'   #  0x8C -> CYRILLIC CAPITAL LETTER EM
+    '\u041d'   #  0x8D -> CYRILLIC CAPITAL LETTER EN
+    '\u041e'   #  0x8E -> CYRILLIC CAPITAL LETTER O
+    '\u041f'   #  0x8F -> CYRILLIC CAPITAL LETTER PE
+    '\u0420'   #  0x90 -> CYRILLIC CAPITAL LETTER ER
+    '\u0421'   #  0x91 -> CYRILLIC CAPITAL LETTER ES
+    '\u0422'   #  0x92 -> CYRILLIC CAPITAL LETTER TE
+    '\u0423'   #  0x93 -> CYRILLIC CAPITAL LETTER U
+    '\u0424'   #  0x94 -> CYRILLIC CAPITAL LETTER EF
+    '\u0425'   #  0x95 -> CYRILLIC CAPITAL LETTER HA
+    '\u0426'   #  0x96 -> CYRILLIC CAPITAL LETTER TSE
+    '\u0427'   #  0x97 -> CYRILLIC CAPITAL LETTER CHE
+    '\u0428'   #  0x98 -> CYRILLIC CAPITAL LETTER SHA
+    '\u0429'   #  0x99 -> CYRILLIC CAPITAL LETTER SHCHA
+    '\u042a'   #  0x9A -> CYRILLIC CAPITAL LETTER HARD SIGN
+    '\u042b'   #  0x9B -> CYRILLIC CAPITAL LETTER YERU
+    '\u042c'   #  0x9C -> CYRILLIC CAPITAL LETTER SOFT SIGN
+    '\u042d'   #  0x9D -> CYRILLIC CAPITAL LETTER E
+    '\u042e'   #  0x9E -> CYRILLIC CAPITAL LETTER YU
+    '\u042f'   #  0x9F -> CYRILLIC CAPITAL LETTER YA
+    '\u2020'   #  0xA0 -> DAGGER
+    '\xb0'     #  0xA1 -> DEGREE SIGN
+    '\u0490'   #  0xA2 -> CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa7'     #  0xA4 -> SECTION SIGN
+    '\u2022'   #  0xA5 -> BULLET
+    '\xb6'     #  0xA6 -> PILCROW SIGN
+    '\u0406'   #  0xA7 -> CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u2122'   #  0xAA -> TRADE MARK SIGN
+    '\u0402'   #  0xAB -> CYRILLIC CAPITAL LETTER DJE
+    '\u0452'   #  0xAC -> CYRILLIC SMALL LETTER DJE
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\u0403'   #  0xAE -> CYRILLIC CAPITAL LETTER GJE
+    '\u0453'   #  0xAF -> CYRILLIC SMALL LETTER GJE
+    '\u221e'   #  0xB0 -> INFINITY
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\u0456'   #  0xB4 -> CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\u0491'   #  0xB6 -> CYRILLIC SMALL LETTER GHE WITH UPTURN
+    '\u0408'   #  0xB7 -> CYRILLIC CAPITAL LETTER JE
+    '\u0404'   #  0xB8 -> CYRILLIC CAPITAL LETTER UKRAINIAN IE
+    '\u0454'   #  0xB9 -> CYRILLIC SMALL LETTER UKRAINIAN IE
+    '\u0407'   #  0xBA -> CYRILLIC CAPITAL LETTER YI
+    '\u0457'   #  0xBB -> CYRILLIC SMALL LETTER YI
+    '\u0409'   #  0xBC -> CYRILLIC CAPITAL LETTER LJE
+    '\u0459'   #  0xBD -> CYRILLIC SMALL LETTER LJE
+    '\u040a'   #  0xBE -> CYRILLIC CAPITAL LETTER NJE
+    '\u045a'   #  0xBF -> CYRILLIC SMALL LETTER NJE
+    '\u0458'   #  0xC0 -> CYRILLIC SMALL LETTER JE
+    '\u0405'   #  0xC1 -> CYRILLIC CAPITAL LETTER DZE
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u221a'   #  0xC3 -> SQUARE ROOT
+    '\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
+    '\u2248'   #  0xC5 -> ALMOST EQUAL TO
+    '\u2206'   #  0xC6 -> INCREMENT
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\u040b'   #  0xCB -> CYRILLIC CAPITAL LETTER TSHE
+    '\u045b'   #  0xCC -> CYRILLIC SMALL LETTER TSHE
+    '\u040c'   #  0xCD -> CYRILLIC CAPITAL LETTER KJE
+    '\u045c'   #  0xCE -> CYRILLIC SMALL LETTER KJE
+    '\u0455'   #  0xCF -> CYRILLIC SMALL LETTER DZE
+    '\u2013'   #  0xD0 -> EN DASH
+    '\u2014'   #  0xD1 -> EM DASH
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u201e'   #  0xD7 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u040e'   #  0xD8 -> CYRILLIC CAPITAL LETTER SHORT U
+    '\u045e'   #  0xD9 -> CYRILLIC SMALL LETTER SHORT U
+    '\u040f'   #  0xDA -> CYRILLIC CAPITAL LETTER DZHE
+    '\u045f'   #  0xDB -> CYRILLIC SMALL LETTER DZHE
+    '\u2116'   #  0xDC -> NUMERO SIGN
+    '\u0401'   #  0xDD -> CYRILLIC CAPITAL LETTER IO
+    '\u0451'   #  0xDE -> CYRILLIC SMALL LETTER IO
+    '\u044f'   #  0xDF -> CYRILLIC SMALL LETTER YA
+    '\u0430'   #  0xE0 -> CYRILLIC SMALL LETTER A
+    '\u0431'   #  0xE1 -> CYRILLIC SMALL LETTER BE
+    '\u0432'   #  0xE2 -> CYRILLIC SMALL LETTER VE
+    '\u0433'   #  0xE3 -> CYRILLIC SMALL LETTER GHE
+    '\u0434'   #  0xE4 -> CYRILLIC SMALL LETTER DE
+    '\u0435'   #  0xE5 -> CYRILLIC SMALL LETTER IE
+    '\u0436'   #  0xE6 -> CYRILLIC SMALL LETTER ZHE
+    '\u0437'   #  0xE7 -> CYRILLIC SMALL LETTER ZE
+    '\u0438'   #  0xE8 -> CYRILLIC SMALL LETTER I
+    '\u0439'   #  0xE9 -> CYRILLIC SMALL LETTER SHORT I
+    '\u043a'   #  0xEA -> CYRILLIC SMALL LETTER KA
+    '\u043b'   #  0xEB -> CYRILLIC SMALL LETTER EL
+    '\u043c'   #  0xEC -> CYRILLIC SMALL LETTER EM
+    '\u043d'   #  0xED -> CYRILLIC SMALL LETTER EN
+    '\u043e'   #  0xEE -> CYRILLIC SMALL LETTER O
+    '\u043f'   #  0xEF -> CYRILLIC SMALL LETTER PE
+    '\u0440'   #  0xF0 -> CYRILLIC SMALL LETTER ER
+    '\u0441'   #  0xF1 -> CYRILLIC SMALL LETTER ES
+    '\u0442'   #  0xF2 -> CYRILLIC SMALL LETTER TE
+    '\u0443'   #  0xF3 -> CYRILLIC SMALL LETTER U
+    '\u0444'   #  0xF4 -> CYRILLIC SMALL LETTER EF
+    '\u0445'   #  0xF5 -> CYRILLIC SMALL LETTER HA
+    '\u0446'   #  0xF6 -> CYRILLIC SMALL LETTER TSE
+    '\u0447'   #  0xF7 -> CYRILLIC SMALL LETTER CHE
+    '\u0448'   #  0xF8 -> CYRILLIC SMALL LETTER SHA
+    '\u0449'   #  0xF9 -> CYRILLIC SMALL LETTER SHCHA
+    '\u044a'   #  0xFA -> CYRILLIC SMALL LETTER HARD SIGN
+    '\u044b'   #  0xFB -> CYRILLIC SMALL LETTER YERU
+    '\u044c'   #  0xFC -> CYRILLIC SMALL LETTER SOFT SIGN
+    '\u044d'   #  0xFD -> CYRILLIC SMALL LETTER E
+    '\u044e'   #  0xFE -> CYRILLIC SMALL LETTER YU
+    '\u20ac'   #  0xFF -> EURO SIGN
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_farsi.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_farsi.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_farsi.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE, left-right
-    u'!'        #  0x21 -> EXCLAMATION MARK, left-right
-    u'"'        #  0x22 -> QUOTATION MARK, left-right
-    u'#'        #  0x23 -> NUMBER SIGN, left-right
-    u'$'        #  0x24 -> DOLLAR SIGN, left-right
-    u'%'        #  0x25 -> PERCENT SIGN, left-right
-    u'&'        #  0x26 -> AMPERSAND, left-right
-    u"'"        #  0x27 -> APOSTROPHE, left-right
-    u'('        #  0x28 -> LEFT PARENTHESIS, left-right
-    u')'        #  0x29 -> RIGHT PARENTHESIS, left-right
-    u'*'        #  0x2A -> ASTERISK, left-right
-    u'+'        #  0x2B -> PLUS SIGN, left-right
-    u','        #  0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR
-    u'-'        #  0x2D -> HYPHEN-MINUS, left-right
-    u'.'        #  0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR
-    u'/'        #  0x2F -> SOLIDUS, left-right
-    u'0'        #  0x30 -> DIGIT ZERO;  in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE;   in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO;   in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR;  in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE;  in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX;   in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE;  in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE
-    u':'        #  0x3A -> COLON, left-right
-    u';'        #  0x3B -> SEMICOLON, left-right
-    u'<'        #  0x3C -> LESS-THAN SIGN, left-right
-    u'='        #  0x3D -> EQUALS SIGN, left-right
-    u'>'        #  0x3E -> GREATER-THAN SIGN, left-right
-    u'?'        #  0x3F -> QUESTION MARK, left-right
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET, left-right
-    u'\\'       #  0x5C -> REVERSE SOLIDUS, left-right
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET, left-right
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT, left-right
-    u'_'        #  0x5F -> LOW LINE, left-right
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET, left-right
-    u'|'        #  0x7C -> VERTICAL LINE, left-right
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET, left-right
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xa0'     #  0x81 -> NO-BREAK SPACE, right-left
-    u'\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u06ba'   #  0x8B -> ARABIC LETTER NOON GHUNNA
-    u'\xab'     #  0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
-    u'\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\u2026'   #  0x93 -> HORIZONTAL ELLIPSIS, right-left
-    u'\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xbb'     #  0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf7'     #  0x9B -> DIVISION SIGN, right-left
-    u'\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u' '        #  0xA0 -> SPACE, right-left
-    u'!'        #  0xA1 -> EXCLAMATION MARK, right-left
-    u'"'        #  0xA2 -> QUOTATION MARK, right-left
-    u'#'        #  0xA3 -> NUMBER SIGN, right-left
-    u'$'        #  0xA4 -> DOLLAR SIGN, right-left
-    u'\u066a'   #  0xA5 -> ARABIC PERCENT SIGN
-    u'&'        #  0xA6 -> AMPERSAND, right-left
-    u"'"        #  0xA7 -> APOSTROPHE, right-left
-    u'('        #  0xA8 -> LEFT PARENTHESIS, right-left
-    u')'        #  0xA9 -> RIGHT PARENTHESIS, right-left
-    u'*'        #  0xAA -> ASTERISK, right-left
-    u'+'        #  0xAB -> PLUS SIGN, right-left
-    u'\u060c'   #  0xAC -> ARABIC COMMA
-    u'-'        #  0xAD -> HYPHEN-MINUS, right-left
-    u'.'        #  0xAE -> FULL STOP, right-left
-    u'/'        #  0xAF -> SOLIDUS, right-left
-    u'\u06f0'   #  0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override)
-    u'\u06f1'   #  0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override)
-    u'\u06f2'   #  0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override)
-    u'\u06f3'   #  0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override)
-    u'\u06f4'   #  0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override)
-    u'\u06f5'   #  0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override)
-    u'\u06f6'   #  0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override)
-    u'\u06f7'   #  0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override)
-    u'\u06f8'   #  0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override)
-    u'\u06f9'   #  0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override)
-    u':'        #  0xBA -> COLON, right-left
-    u'\u061b'   #  0xBB -> ARABIC SEMICOLON
-    u'<'        #  0xBC -> LESS-THAN SIGN, right-left
-    u'='        #  0xBD -> EQUALS SIGN, right-left
-    u'>'        #  0xBE -> GREATER-THAN SIGN, right-left
-    u'\u061f'   #  0xBF -> ARABIC QUESTION MARK
-    u'\u274a'   #  0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left
-    u'\u0621'   #  0xC1 -> ARABIC LETTER HAMZA
-    u'\u0622'   #  0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
-    u'\u0623'   #  0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
-    u'\u0624'   #  0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
-    u'\u0625'   #  0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
-    u'\u0626'   #  0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
-    u'\u0627'   #  0xC7 -> ARABIC LETTER ALEF
-    u'\u0628'   #  0xC8 -> ARABIC LETTER BEH
-    u'\u0629'   #  0xC9 -> ARABIC LETTER TEH MARBUTA
-    u'\u062a'   #  0xCA -> ARABIC LETTER TEH
-    u'\u062b'   #  0xCB -> ARABIC LETTER THEH
-    u'\u062c'   #  0xCC -> ARABIC LETTER JEEM
-    u'\u062d'   #  0xCD -> ARABIC LETTER HAH
-    u'\u062e'   #  0xCE -> ARABIC LETTER KHAH
-    u'\u062f'   #  0xCF -> ARABIC LETTER DAL
-    u'\u0630'   #  0xD0 -> ARABIC LETTER THAL
-    u'\u0631'   #  0xD1 -> ARABIC LETTER REH
-    u'\u0632'   #  0xD2 -> ARABIC LETTER ZAIN
-    u'\u0633'   #  0xD3 -> ARABIC LETTER SEEN
-    u'\u0634'   #  0xD4 -> ARABIC LETTER SHEEN
-    u'\u0635'   #  0xD5 -> ARABIC LETTER SAD
-    u'\u0636'   #  0xD6 -> ARABIC LETTER DAD
-    u'\u0637'   #  0xD7 -> ARABIC LETTER TAH
-    u'\u0638'   #  0xD8 -> ARABIC LETTER ZAH
-    u'\u0639'   #  0xD9 -> ARABIC LETTER AIN
-    u'\u063a'   #  0xDA -> ARABIC LETTER GHAIN
-    u'['        #  0xDB -> LEFT SQUARE BRACKET, right-left
-    u'\\'       #  0xDC -> REVERSE SOLIDUS, right-left
-    u']'        #  0xDD -> RIGHT SQUARE BRACKET, right-left
-    u'^'        #  0xDE -> CIRCUMFLEX ACCENT, right-left
-    u'_'        #  0xDF -> LOW LINE, right-left
-    u'\u0640'   #  0xE0 -> ARABIC TATWEEL
-    u'\u0641'   #  0xE1 -> ARABIC LETTER FEH
-    u'\u0642'   #  0xE2 -> ARABIC LETTER QAF
-    u'\u0643'   #  0xE3 -> ARABIC LETTER KAF
-    u'\u0644'   #  0xE4 -> ARABIC LETTER LAM
-    u'\u0645'   #  0xE5 -> ARABIC LETTER MEEM
-    u'\u0646'   #  0xE6 -> ARABIC LETTER NOON
-    u'\u0647'   #  0xE7 -> ARABIC LETTER HEH
-    u'\u0648'   #  0xE8 -> ARABIC LETTER WAW
-    u'\u0649'   #  0xE9 -> ARABIC LETTER ALEF MAKSURA
-    u'\u064a'   #  0xEA -> ARABIC LETTER YEH
-    u'\u064b'   #  0xEB -> ARABIC FATHATAN
-    u'\u064c'   #  0xEC -> ARABIC DAMMATAN
-    u'\u064d'   #  0xED -> ARABIC KASRATAN
-    u'\u064e'   #  0xEE -> ARABIC FATHA
-    u'\u064f'   #  0xEF -> ARABIC DAMMA
-    u'\u0650'   #  0xF0 -> ARABIC KASRA
-    u'\u0651'   #  0xF1 -> ARABIC SHADDA
-    u'\u0652'   #  0xF2 -> ARABIC SUKUN
-    u'\u067e'   #  0xF3 -> ARABIC LETTER PEH
-    u'\u0679'   #  0xF4 -> ARABIC LETTER TTEH
-    u'\u0686'   #  0xF5 -> ARABIC LETTER TCHEH
-    u'\u06d5'   #  0xF6 -> ARABIC LETTER AE
-    u'\u06a4'   #  0xF7 -> ARABIC LETTER VEH
-    u'\u06af'   #  0xF8 -> ARABIC LETTER GAF
-    u'\u0688'   #  0xF9 -> ARABIC LETTER DDAL
-    u'\u0691'   #  0xFA -> ARABIC LETTER RREH
-    u'{'        #  0xFB -> LEFT CURLY BRACKET, right-left
-    u'|'        #  0xFC -> VERTICAL LINE, right-left
-    u'}'        #  0xFD -> RIGHT CURLY BRACKET, right-left
-    u'\u0698'   #  0xFE -> ARABIC LETTER JEH
-    u'\u06d2'   #  0xFF -> ARABIC LETTER YEH BARREE
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE, left-right
+    '!'        #  0x21 -> EXCLAMATION MARK, left-right
+    '"'        #  0x22 -> QUOTATION MARK, left-right
+    '#'        #  0x23 -> NUMBER SIGN, left-right
+    '$'        #  0x24 -> DOLLAR SIGN, left-right
+    '%'        #  0x25 -> PERCENT SIGN, left-right
+    '&'        #  0x26 -> AMPERSAND, left-right
+    "'"        #  0x27 -> APOSTROPHE, left-right
+    '('        #  0x28 -> LEFT PARENTHESIS, left-right
+    ')'        #  0x29 -> RIGHT PARENTHESIS, left-right
+    '*'        #  0x2A -> ASTERISK, left-right
+    '+'        #  0x2B -> PLUS SIGN, left-right
+    ','        #  0x2C -> COMMA, left-right; in Arabic-script context, displayed as 0x066C ARABIC THOUSANDS SEPARATOR
+    '-'        #  0x2D -> HYPHEN-MINUS, left-right
+    '.'        #  0x2E -> FULL STOP, left-right; in Arabic-script context, displayed as 0x066B ARABIC DECIMAL SEPARATOR
+    '/'        #  0x2F -> SOLIDUS, left-right
+    '0'        #  0x30 -> DIGIT ZERO;  in Arabic-script context, displayed as 0x06F0 EXTENDED ARABIC-INDIC DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE;   in Arabic-script context, displayed as 0x06F1 EXTENDED ARABIC-INDIC DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO;   in Arabic-script context, displayed as 0x06F2 EXTENDED ARABIC-INDIC DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE; in Arabic-script context, displayed as 0x06F3 EXTENDED ARABIC-INDIC DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR;  in Arabic-script context, displayed as 0x06F4 EXTENDED ARABIC-INDIC DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE;  in Arabic-script context, displayed as 0x06F5 EXTENDED ARABIC-INDIC DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX;   in Arabic-script context, displayed as 0x06F6 EXTENDED ARABIC-INDIC DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN; in Arabic-script context, displayed as 0x06F7 EXTENDED ARABIC-INDIC DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT; in Arabic-script context, displayed as 0x06F8 EXTENDED ARABIC-INDIC DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE;  in Arabic-script context, displayed as 0x06F9 EXTENDED ARABIC-INDIC DIGIT NINE
+    ':'        #  0x3A -> COLON, left-right
+    ';'        #  0x3B -> SEMICOLON, left-right
+    '<'        #  0x3C -> LESS-THAN SIGN, left-right
+    '='        #  0x3D -> EQUALS SIGN, left-right
+    '>'        #  0x3E -> GREATER-THAN SIGN, left-right
+    '?'        #  0x3F -> QUESTION MARK, left-right
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET, left-right
+    '\\'       #  0x5C -> REVERSE SOLIDUS, left-right
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET, left-right
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT, left-right
+    '_'        #  0x5F -> LOW LINE, left-right
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET, left-right
+    '|'        #  0x7C -> VERTICAL LINE, left-right
+    '}'        #  0x7D -> RIGHT CURLY BRACKET, left-right
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xa0'     #  0x81 -> NO-BREAK SPACE, right-left
+    '\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u06ba'   #  0x8B -> ARABIC LETTER NOON GHUNNA
+    '\xab'     #  0x8C -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
+    '\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
+    '\u2026'   #  0x93 -> HORIZONTAL ELLIPSIS, right-left
+    '\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xbb'     #  0x98 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK, right-left
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf7'     #  0x9B -> DIVISION SIGN, right-left
+    '\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    ' '        #  0xA0 -> SPACE, right-left
+    '!'        #  0xA1 -> EXCLAMATION MARK, right-left
+    '"'        #  0xA2 -> QUOTATION MARK, right-left
+    '#'        #  0xA3 -> NUMBER SIGN, right-left
+    '$'        #  0xA4 -> DOLLAR SIGN, right-left
+    '\u066a'   #  0xA5 -> ARABIC PERCENT SIGN
+    '&'        #  0xA6 -> AMPERSAND, right-left
+    "'"        #  0xA7 -> APOSTROPHE, right-left
+    '('        #  0xA8 -> LEFT PARENTHESIS, right-left
+    ')'        #  0xA9 -> RIGHT PARENTHESIS, right-left
+    '*'        #  0xAA -> ASTERISK, right-left
+    '+'        #  0xAB -> PLUS SIGN, right-left
+    '\u060c'   #  0xAC -> ARABIC COMMA
+    '-'        #  0xAD -> HYPHEN-MINUS, right-left
+    '.'        #  0xAE -> FULL STOP, right-left
+    '/'        #  0xAF -> SOLIDUS, right-left
+    '\u06f0'   #  0xB0 -> EXTENDED ARABIC-INDIC DIGIT ZERO, right-left (need override)
+    '\u06f1'   #  0xB1 -> EXTENDED ARABIC-INDIC DIGIT ONE, right-left (need override)
+    '\u06f2'   #  0xB2 -> EXTENDED ARABIC-INDIC DIGIT TWO, right-left (need override)
+    '\u06f3'   #  0xB3 -> EXTENDED ARABIC-INDIC DIGIT THREE, right-left (need override)
+    '\u06f4'   #  0xB4 -> EXTENDED ARABIC-INDIC DIGIT FOUR, right-left (need override)
+    '\u06f5'   #  0xB5 -> EXTENDED ARABIC-INDIC DIGIT FIVE, right-left (need override)
+    '\u06f6'   #  0xB6 -> EXTENDED ARABIC-INDIC DIGIT SIX, right-left (need override)
+    '\u06f7'   #  0xB7 -> EXTENDED ARABIC-INDIC DIGIT SEVEN, right-left (need override)
+    '\u06f8'   #  0xB8 -> EXTENDED ARABIC-INDIC DIGIT EIGHT, right-left (need override)
+    '\u06f9'   #  0xB9 -> EXTENDED ARABIC-INDIC DIGIT NINE, right-left (need override)
+    ':'        #  0xBA -> COLON, right-left
+    '\u061b'   #  0xBB -> ARABIC SEMICOLON
+    '<'        #  0xBC -> LESS-THAN SIGN, right-left
+    '='        #  0xBD -> EQUALS SIGN, right-left
+    '>'        #  0xBE -> GREATER-THAN SIGN, right-left
+    '\u061f'   #  0xBF -> ARABIC QUESTION MARK
+    '\u274a'   #  0xC0 -> EIGHT TEARDROP-SPOKED PROPELLER ASTERISK, right-left
+    '\u0621'   #  0xC1 -> ARABIC LETTER HAMZA
+    '\u0622'   #  0xC2 -> ARABIC LETTER ALEF WITH MADDA ABOVE
+    '\u0623'   #  0xC3 -> ARABIC LETTER ALEF WITH HAMZA ABOVE
+    '\u0624'   #  0xC4 -> ARABIC LETTER WAW WITH HAMZA ABOVE
+    '\u0625'   #  0xC5 -> ARABIC LETTER ALEF WITH HAMZA BELOW
+    '\u0626'   #  0xC6 -> ARABIC LETTER YEH WITH HAMZA ABOVE
+    '\u0627'   #  0xC7 -> ARABIC LETTER ALEF
+    '\u0628'   #  0xC8 -> ARABIC LETTER BEH
+    '\u0629'   #  0xC9 -> ARABIC LETTER TEH MARBUTA
+    '\u062a'   #  0xCA -> ARABIC LETTER TEH
+    '\u062b'   #  0xCB -> ARABIC LETTER THEH
+    '\u062c'   #  0xCC -> ARABIC LETTER JEEM
+    '\u062d'   #  0xCD -> ARABIC LETTER HAH
+    '\u062e'   #  0xCE -> ARABIC LETTER KHAH
+    '\u062f'   #  0xCF -> ARABIC LETTER DAL
+    '\u0630'   #  0xD0 -> ARABIC LETTER THAL
+    '\u0631'   #  0xD1 -> ARABIC LETTER REH
+    '\u0632'   #  0xD2 -> ARABIC LETTER ZAIN
+    '\u0633'   #  0xD3 -> ARABIC LETTER SEEN
+    '\u0634'   #  0xD4 -> ARABIC LETTER SHEEN
+    '\u0635'   #  0xD5 -> ARABIC LETTER SAD
+    '\u0636'   #  0xD6 -> ARABIC LETTER DAD
+    '\u0637'   #  0xD7 -> ARABIC LETTER TAH
+    '\u0638'   #  0xD8 -> ARABIC LETTER ZAH
+    '\u0639'   #  0xD9 -> ARABIC LETTER AIN
+    '\u063a'   #  0xDA -> ARABIC LETTER GHAIN
+    '['        #  0xDB -> LEFT SQUARE BRACKET, right-left
+    '\\'       #  0xDC -> REVERSE SOLIDUS, right-left
+    ']'        #  0xDD -> RIGHT SQUARE BRACKET, right-left
+    '^'        #  0xDE -> CIRCUMFLEX ACCENT, right-left
+    '_'        #  0xDF -> LOW LINE, right-left
+    '\u0640'   #  0xE0 -> ARABIC TATWEEL
+    '\u0641'   #  0xE1 -> ARABIC LETTER FEH
+    '\u0642'   #  0xE2 -> ARABIC LETTER QAF
+    '\u0643'   #  0xE3 -> ARABIC LETTER KAF
+    '\u0644'   #  0xE4 -> ARABIC LETTER LAM
+    '\u0645'   #  0xE5 -> ARABIC LETTER MEEM
+    '\u0646'   #  0xE6 -> ARABIC LETTER NOON
+    '\u0647'   #  0xE7 -> ARABIC LETTER HEH
+    '\u0648'   #  0xE8 -> ARABIC LETTER WAW
+    '\u0649'   #  0xE9 -> ARABIC LETTER ALEF MAKSURA
+    '\u064a'   #  0xEA -> ARABIC LETTER YEH
+    '\u064b'   #  0xEB -> ARABIC FATHATAN
+    '\u064c'   #  0xEC -> ARABIC DAMMATAN
+    '\u064d'   #  0xED -> ARABIC KASRATAN
+    '\u064e'   #  0xEE -> ARABIC FATHA
+    '\u064f'   #  0xEF -> ARABIC DAMMA
+    '\u0650'   #  0xF0 -> ARABIC KASRA
+    '\u0651'   #  0xF1 -> ARABIC SHADDA
+    '\u0652'   #  0xF2 -> ARABIC SUKUN
+    '\u067e'   #  0xF3 -> ARABIC LETTER PEH
+    '\u0679'   #  0xF4 -> ARABIC LETTER TTEH
+    '\u0686'   #  0xF5 -> ARABIC LETTER TCHEH
+    '\u06d5'   #  0xF6 -> ARABIC LETTER AE
+    '\u06a4'   #  0xF7 -> ARABIC LETTER VEH
+    '\u06af'   #  0xF8 -> ARABIC LETTER GAF
+    '\u0688'   #  0xF9 -> ARABIC LETTER DDAL
+    '\u0691'   #  0xFA -> ARABIC LETTER RREH
+    '{'        #  0xFB -> LEFT CURLY BRACKET, right-left
+    '|'        #  0xFC -> VERTICAL LINE, right-left
+    '}'        #  0xFD -> RIGHT CURLY BRACKET, right-left
+    '\u0698'   #  0xFE -> ARABIC LETTER JEH
+    '\u06d2'   #  0xFF -> ARABIC LETTER YEH BARREE
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_greek.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_greek.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_greek.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xb9'     #  0x81 -> SUPERSCRIPT ONE
-    u'\xb2'     #  0x82 -> SUPERSCRIPT TWO
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xb3'     #  0x84 -> SUPERSCRIPT THREE
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\u0385'   #  0x87 -> GREEK DIALYTIKA TONOS
-    u'\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\u0384'   #  0x8B -> GREEK TONOS
-    u'\xa8'     #  0x8C -> DIAERESIS
-    u'\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xa3'     #  0x92 -> POUND SIGN
-    u'\u2122'   #  0x93 -> TRADE MARK SIGN
-    u'\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\u2022'   #  0x96 -> BULLET
-    u'\xbd'     #  0x97 -> VULGAR FRACTION ONE HALF
-    u'\u2030'   #  0x98 -> PER MILLE SIGN
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xa6'     #  0x9B -> BROKEN BAR
-    u'\u20ac'   #  0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN
-    u'\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u2020'   #  0xA0 -> DAGGER
-    u'\u0393'   #  0xA1 -> GREEK CAPITAL LETTER GAMMA
-    u'\u0394'   #  0xA2 -> GREEK CAPITAL LETTER DELTA
-    u'\u0398'   #  0xA3 -> GREEK CAPITAL LETTER THETA
-    u'\u039b'   #  0xA4 -> GREEK CAPITAL LETTER LAMDA
-    u'\u039e'   #  0xA5 -> GREEK CAPITAL LETTER XI
-    u'\u03a0'   #  0xA6 -> GREEK CAPITAL LETTER PI
-    u'\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u03a3'   #  0xAA -> GREEK CAPITAL LETTER SIGMA
-    u'\u03aa'   #  0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
-    u'\xa7'     #  0xAC -> SECTION SIGN
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\xb0'     #  0xAE -> DEGREE SIGN
-    u'\xb7'     #  0xAF -> MIDDLE DOT
-    u'\u0391'   #  0xB0 -> GREEK CAPITAL LETTER ALPHA
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\xa5'     #  0xB4 -> YEN SIGN
-    u'\u0392'   #  0xB5 -> GREEK CAPITAL LETTER BETA
-    u'\u0395'   #  0xB6 -> GREEK CAPITAL LETTER EPSILON
-    u'\u0396'   #  0xB7 -> GREEK CAPITAL LETTER ZETA
-    u'\u0397'   #  0xB8 -> GREEK CAPITAL LETTER ETA
-    u'\u0399'   #  0xB9 -> GREEK CAPITAL LETTER IOTA
-    u'\u039a'   #  0xBA -> GREEK CAPITAL LETTER KAPPA
-    u'\u039c'   #  0xBB -> GREEK CAPITAL LETTER MU
-    u'\u03a6'   #  0xBC -> GREEK CAPITAL LETTER PHI
-    u'\u03ab'   #  0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-    u'\u03a8'   #  0xBE -> GREEK CAPITAL LETTER PSI
-    u'\u03a9'   #  0xBF -> GREEK CAPITAL LETTER OMEGA
-    u'\u03ac'   #  0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS
-    u'\u039d'   #  0xC1 -> GREEK CAPITAL LETTER NU
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u039f'   #  0xC3 -> GREEK CAPITAL LETTER OMICRON
-    u'\u03a1'   #  0xC4 -> GREEK CAPITAL LETTER RHO
-    u'\u2248'   #  0xC5 -> ALMOST EQUAL TO
-    u'\u03a4'   #  0xC6 -> GREEK CAPITAL LETTER TAU
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\u03a5'   #  0xCB -> GREEK CAPITAL LETTER UPSILON
-    u'\u03a7'   #  0xCC -> GREEK CAPITAL LETTER CHI
-    u'\u0386'   #  0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS
-    u'\u0388'   #  0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS
-    u'\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
-    u'\u2013'   #  0xD0 -> EN DASH
-    u'\u2015'   #  0xD1 -> HORIZONTAL BAR
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u0389'   #  0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS
-    u'\u038a'   #  0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS
-    u'\u038c'   #  0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
-    u'\u038e'   #  0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS
-    u'\u03ad'   #  0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS
-    u'\u03ae'   #  0xDC -> GREEK SMALL LETTER ETA WITH TONOS
-    u'\u03af'   #  0xDD -> GREEK SMALL LETTER IOTA WITH TONOS
-    u'\u03cc'   #  0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS
-    u'\u038f'   #  0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
-    u'\u03cd'   #  0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS
-    u'\u03b1'   #  0xE1 -> GREEK SMALL LETTER ALPHA
-    u'\u03b2'   #  0xE2 -> GREEK SMALL LETTER BETA
-    u'\u03c8'   #  0xE3 -> GREEK SMALL LETTER PSI
-    u'\u03b4'   #  0xE4 -> GREEK SMALL LETTER DELTA
-    u'\u03b5'   #  0xE5 -> GREEK SMALL LETTER EPSILON
-    u'\u03c6'   #  0xE6 -> GREEK SMALL LETTER PHI
-    u'\u03b3'   #  0xE7 -> GREEK SMALL LETTER GAMMA
-    u'\u03b7'   #  0xE8 -> GREEK SMALL LETTER ETA
-    u'\u03b9'   #  0xE9 -> GREEK SMALL LETTER IOTA
-    u'\u03be'   #  0xEA -> GREEK SMALL LETTER XI
-    u'\u03ba'   #  0xEB -> GREEK SMALL LETTER KAPPA
-    u'\u03bb'   #  0xEC -> GREEK SMALL LETTER LAMDA
-    u'\u03bc'   #  0xED -> GREEK SMALL LETTER MU
-    u'\u03bd'   #  0xEE -> GREEK SMALL LETTER NU
-    u'\u03bf'   #  0xEF -> GREEK SMALL LETTER OMICRON
-    u'\u03c0'   #  0xF0 -> GREEK SMALL LETTER PI
-    u'\u03ce'   #  0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS
-    u'\u03c1'   #  0xF2 -> GREEK SMALL LETTER RHO
-    u'\u03c3'   #  0xF3 -> GREEK SMALL LETTER SIGMA
-    u'\u03c4'   #  0xF4 -> GREEK SMALL LETTER TAU
-    u'\u03b8'   #  0xF5 -> GREEK SMALL LETTER THETA
-    u'\u03c9'   #  0xF6 -> GREEK SMALL LETTER OMEGA
-    u'\u03c2'   #  0xF7 -> GREEK SMALL LETTER FINAL SIGMA
-    u'\u03c7'   #  0xF8 -> GREEK SMALL LETTER CHI
-    u'\u03c5'   #  0xF9 -> GREEK SMALL LETTER UPSILON
-    u'\u03b6'   #  0xFA -> GREEK SMALL LETTER ZETA
-    u'\u03ca'   #  0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
-    u'\u03cb'   #  0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
-    u'\u0390'   #  0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
-    u'\u03b0'   #  0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
-    u'\xad'     #  0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xb9'     #  0x81 -> SUPERSCRIPT ONE
+    '\xb2'     #  0x82 -> SUPERSCRIPT TWO
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xb3'     #  0x84 -> SUPERSCRIPT THREE
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\u0385'   #  0x87 -> GREEK DIALYTIKA TONOS
+    '\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\u0384'   #  0x8B -> GREEK TONOS
+    '\xa8'     #  0x8C -> DIAERESIS
+    '\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xa3'     #  0x92 -> POUND SIGN
+    '\u2122'   #  0x93 -> TRADE MARK SIGN
+    '\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\u2022'   #  0x96 -> BULLET
+    '\xbd'     #  0x97 -> VULGAR FRACTION ONE HALF
+    '\u2030'   #  0x98 -> PER MILLE SIGN
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xa6'     #  0x9B -> BROKEN BAR
+    '\u20ac'   #  0x9C -> EURO SIGN # before Mac OS 9.2.2, was SOFT HYPHEN
+    '\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u2020'   #  0xA0 -> DAGGER
+    '\u0393'   #  0xA1 -> GREEK CAPITAL LETTER GAMMA
+    '\u0394'   #  0xA2 -> GREEK CAPITAL LETTER DELTA
+    '\u0398'   #  0xA3 -> GREEK CAPITAL LETTER THETA
+    '\u039b'   #  0xA4 -> GREEK CAPITAL LETTER LAMDA
+    '\u039e'   #  0xA5 -> GREEK CAPITAL LETTER XI
+    '\u03a0'   #  0xA6 -> GREEK CAPITAL LETTER PI
+    '\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u03a3'   #  0xAA -> GREEK CAPITAL LETTER SIGMA
+    '\u03aa'   #  0xAB -> GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+    '\xa7'     #  0xAC -> SECTION SIGN
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\xb0'     #  0xAE -> DEGREE SIGN
+    '\xb7'     #  0xAF -> MIDDLE DOT
+    '\u0391'   #  0xB0 -> GREEK CAPITAL LETTER ALPHA
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\xa5'     #  0xB4 -> YEN SIGN
+    '\u0392'   #  0xB5 -> GREEK CAPITAL LETTER BETA
+    '\u0395'   #  0xB6 -> GREEK CAPITAL LETTER EPSILON
+    '\u0396'   #  0xB7 -> GREEK CAPITAL LETTER ZETA
+    '\u0397'   #  0xB8 -> GREEK CAPITAL LETTER ETA
+    '\u0399'   #  0xB9 -> GREEK CAPITAL LETTER IOTA
+    '\u039a'   #  0xBA -> GREEK CAPITAL LETTER KAPPA
+    '\u039c'   #  0xBB -> GREEK CAPITAL LETTER MU
+    '\u03a6'   #  0xBC -> GREEK CAPITAL LETTER PHI
+    '\u03ab'   #  0xBD -> GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+    '\u03a8'   #  0xBE -> GREEK CAPITAL LETTER PSI
+    '\u03a9'   #  0xBF -> GREEK CAPITAL LETTER OMEGA
+    '\u03ac'   #  0xC0 -> GREEK SMALL LETTER ALPHA WITH TONOS
+    '\u039d'   #  0xC1 -> GREEK CAPITAL LETTER NU
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u039f'   #  0xC3 -> GREEK CAPITAL LETTER OMICRON
+    '\u03a1'   #  0xC4 -> GREEK CAPITAL LETTER RHO
+    '\u2248'   #  0xC5 -> ALMOST EQUAL TO
+    '\u03a4'   #  0xC6 -> GREEK CAPITAL LETTER TAU
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\u03a5'   #  0xCB -> GREEK CAPITAL LETTER UPSILON
+    '\u03a7'   #  0xCC -> GREEK CAPITAL LETTER CHI
+    '\u0386'   #  0xCD -> GREEK CAPITAL LETTER ALPHA WITH TONOS
+    '\u0388'   #  0xCE -> GREEK CAPITAL LETTER EPSILON WITH TONOS
+    '\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
+    '\u2013'   #  0xD0 -> EN DASH
+    '\u2015'   #  0xD1 -> HORIZONTAL BAR
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u0389'   #  0xD7 -> GREEK CAPITAL LETTER ETA WITH TONOS
+    '\u038a'   #  0xD8 -> GREEK CAPITAL LETTER IOTA WITH TONOS
+    '\u038c'   #  0xD9 -> GREEK CAPITAL LETTER OMICRON WITH TONOS
+    '\u038e'   #  0xDA -> GREEK CAPITAL LETTER UPSILON WITH TONOS
+    '\u03ad'   #  0xDB -> GREEK SMALL LETTER EPSILON WITH TONOS
+    '\u03ae'   #  0xDC -> GREEK SMALL LETTER ETA WITH TONOS
+    '\u03af'   #  0xDD -> GREEK SMALL LETTER IOTA WITH TONOS
+    '\u03cc'   #  0xDE -> GREEK SMALL LETTER OMICRON WITH TONOS
+    '\u038f'   #  0xDF -> GREEK CAPITAL LETTER OMEGA WITH TONOS
+    '\u03cd'   #  0xE0 -> GREEK SMALL LETTER UPSILON WITH TONOS
+    '\u03b1'   #  0xE1 -> GREEK SMALL LETTER ALPHA
+    '\u03b2'   #  0xE2 -> GREEK SMALL LETTER BETA
+    '\u03c8'   #  0xE3 -> GREEK SMALL LETTER PSI
+    '\u03b4'   #  0xE4 -> GREEK SMALL LETTER DELTA
+    '\u03b5'   #  0xE5 -> GREEK SMALL LETTER EPSILON
+    '\u03c6'   #  0xE6 -> GREEK SMALL LETTER PHI
+    '\u03b3'   #  0xE7 -> GREEK SMALL LETTER GAMMA
+    '\u03b7'   #  0xE8 -> GREEK SMALL LETTER ETA
+    '\u03b9'   #  0xE9 -> GREEK SMALL LETTER IOTA
+    '\u03be'   #  0xEA -> GREEK SMALL LETTER XI
+    '\u03ba'   #  0xEB -> GREEK SMALL LETTER KAPPA
+    '\u03bb'   #  0xEC -> GREEK SMALL LETTER LAMDA
+    '\u03bc'   #  0xED -> GREEK SMALL LETTER MU
+    '\u03bd'   #  0xEE -> GREEK SMALL LETTER NU
+    '\u03bf'   #  0xEF -> GREEK SMALL LETTER OMICRON
+    '\u03c0'   #  0xF0 -> GREEK SMALL LETTER PI
+    '\u03ce'   #  0xF1 -> GREEK SMALL LETTER OMEGA WITH TONOS
+    '\u03c1'   #  0xF2 -> GREEK SMALL LETTER RHO
+    '\u03c3'   #  0xF3 -> GREEK SMALL LETTER SIGMA
+    '\u03c4'   #  0xF4 -> GREEK SMALL LETTER TAU
+    '\u03b8'   #  0xF5 -> GREEK SMALL LETTER THETA
+    '\u03c9'   #  0xF6 -> GREEK SMALL LETTER OMEGA
+    '\u03c2'   #  0xF7 -> GREEK SMALL LETTER FINAL SIGMA
+    '\u03c7'   #  0xF8 -> GREEK SMALL LETTER CHI
+    '\u03c5'   #  0xF9 -> GREEK SMALL LETTER UPSILON
+    '\u03b6'   #  0xFA -> GREEK SMALL LETTER ZETA
+    '\u03ca'   #  0xFB -> GREEK SMALL LETTER IOTA WITH DIALYTIKA
+    '\u03cb'   #  0xFC -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA
+    '\u0390'   #  0xFD -> GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+    '\u03b0'   #  0xFE -> GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+    '\xad'     #  0xFF -> SOFT HYPHEN # before Mac OS 9.2.2, was undefined
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_iceland.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_iceland.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_iceland.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
-    u'\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\xdd'     #  0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE
-    u'\xb0'     #  0xA1 -> DEGREE SIGN
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa7'     #  0xA4 -> SECTION SIGN
-    u'\u2022'   #  0xA5 -> BULLET
-    u'\xb6'     #  0xA6 -> PILCROW SIGN
-    u'\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u2122'   #  0xAA -> TRADE MARK SIGN
-    u'\xb4'     #  0xAB -> ACUTE ACCENT
-    u'\xa8'     #  0xAC -> DIAERESIS
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\xc6'     #  0xAE -> LATIN CAPITAL LETTER AE
-    u'\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u221e'   #  0xB0 -> INFINITY
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\xa5'     #  0xB4 -> YEN SIGN
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
-    u'\u2211'   #  0xB7 -> N-ARY SUMMATION
-    u'\u220f'   #  0xB8 -> N-ARY PRODUCT
-    u'\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
-    u'\u222b'   #  0xBA -> INTEGRAL
-    u'\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
-    u'\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
-    u'\xe6'     #  0xBE -> LATIN SMALL LETTER AE
-    u'\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
-    u'\xbf'     #  0xC0 -> INVERTED QUESTION MARK
-    u'\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u221a'   #  0xC3 -> SQUARE ROOT
-    u'\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u2248'   #  0xC5 -> ALMOST EQUAL TO
-    u'\u2206'   #  0xC6 -> INCREMENT
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
-    u'\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
-    u'\u2013'   #  0xD0 -> EN DASH
-    u'\u2014'   #  0xD1 -> EM DASH
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u25ca'   #  0xD7 -> LOZENGE
-    u'\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\u2044'   #  0xDA -> FRACTION SLASH
-    u'\u20ac'   #  0xDB -> EURO SIGN
-    u'\xd0'     #  0xDC -> LATIN CAPITAL LETTER ETH
-    u'\xf0'     #  0xDD -> LATIN SMALL LETTER ETH
-    u'\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN
-    u'\xfe'     #  0xDF -> LATIN SMALL LETTER THORN
-    u'\xfd'     #  0xE0 -> LATIN SMALL LETTER Y WITH ACUTE
-    u'\xb7'     #  0xE1 -> MIDDLE DOT
-    u'\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2030'   #  0xE4 -> PER MILLE SIGN
-    u'\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\uf8ff'   #  0xF0 -> Apple logo
-    u'\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
-    u'\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u02dc'   #  0xF7 -> SMALL TILDE
-    u'\xaf'     #  0xF8 -> MACRON
-    u'\u02d8'   #  0xF9 -> BREVE
-    u'\u02d9'   #  0xFA -> DOT ABOVE
-    u'\u02da'   #  0xFB -> RING ABOVE
-    u'\xb8'     #  0xFC -> CEDILLA
-    u'\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
-    u'\u02db'   #  0xFE -> OGONEK
-    u'\u02c7'   #  0xFF -> CARON
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
+    '\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\xdd'     #  0xA0 -> LATIN CAPITAL LETTER Y WITH ACUTE
+    '\xb0'     #  0xA1 -> DEGREE SIGN
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa7'     #  0xA4 -> SECTION SIGN
+    '\u2022'   #  0xA5 -> BULLET
+    '\xb6'     #  0xA6 -> PILCROW SIGN
+    '\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u2122'   #  0xAA -> TRADE MARK SIGN
+    '\xb4'     #  0xAB -> ACUTE ACCENT
+    '\xa8'     #  0xAC -> DIAERESIS
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\xc6'     #  0xAE -> LATIN CAPITAL LETTER AE
+    '\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u221e'   #  0xB0 -> INFINITY
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\xa5'     #  0xB4 -> YEN SIGN
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
+    '\u2211'   #  0xB7 -> N-ARY SUMMATION
+    '\u220f'   #  0xB8 -> N-ARY PRODUCT
+    '\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
+    '\u222b'   #  0xBA -> INTEGRAL
+    '\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
+    '\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
+    '\xe6'     #  0xBE -> LATIN SMALL LETTER AE
+    '\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
+    '\xbf'     #  0xC0 -> INVERTED QUESTION MARK
+    '\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u221a'   #  0xC3 -> SQUARE ROOT
+    '\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
+    '\u2248'   #  0xC5 -> ALMOST EQUAL TO
+    '\u2206'   #  0xC6 -> INCREMENT
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
+    '\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
+    '\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
+    '\u2013'   #  0xD0 -> EN DASH
+    '\u2014'   #  0xD1 -> EM DASH
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u25ca'   #  0xD7 -> LOZENGE
+    '\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\u2044'   #  0xDA -> FRACTION SLASH
+    '\u20ac'   #  0xDB -> EURO SIGN
+    '\xd0'     #  0xDC -> LATIN CAPITAL LETTER ETH
+    '\xf0'     #  0xDD -> LATIN SMALL LETTER ETH
+    '\xde'     #  0xDE -> LATIN CAPITAL LETTER THORN
+    '\xfe'     #  0xDF -> LATIN SMALL LETTER THORN
+    '\xfd'     #  0xE0 -> LATIN SMALL LETTER Y WITH ACUTE
+    '\xb7'     #  0xE1 -> MIDDLE DOT
+    '\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
+    '\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2030'   #  0xE4 -> PER MILLE SIGN
+    '\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\uf8ff'   #  0xF0 -> Apple logo
+    '\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
+    '\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u02dc'   #  0xF7 -> SMALL TILDE
+    '\xaf'     #  0xF8 -> MACRON
+    '\u02d8'   #  0xF9 -> BREVE
+    '\u02d9'   #  0xFA -> DOT ABOVE
+    '\u02da'   #  0xFB -> RING ABOVE
+    '\xb8'     #  0xFC -> CEDILLA
+    '\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
+    '\u02db'   #  0xFE -> OGONEK
+    '\u02c7'   #  0xFF -> CARON
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_roman.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_roman.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_roman.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
-    u'\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u2020'   #  0xA0 -> DAGGER
-    u'\xb0'     #  0xA1 -> DEGREE SIGN
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa7'     #  0xA4 -> SECTION SIGN
-    u'\u2022'   #  0xA5 -> BULLET
-    u'\xb6'     #  0xA6 -> PILCROW SIGN
-    u'\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u2122'   #  0xAA -> TRADE MARK SIGN
-    u'\xb4'     #  0xAB -> ACUTE ACCENT
-    u'\xa8'     #  0xAC -> DIAERESIS
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\xc6'     #  0xAE -> LATIN CAPITAL LETTER AE
-    u'\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u221e'   #  0xB0 -> INFINITY
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\xa5'     #  0xB4 -> YEN SIGN
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
-    u'\u2211'   #  0xB7 -> N-ARY SUMMATION
-    u'\u220f'   #  0xB8 -> N-ARY PRODUCT
-    u'\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
-    u'\u222b'   #  0xBA -> INTEGRAL
-    u'\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
-    u'\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
-    u'\xe6'     #  0xBE -> LATIN SMALL LETTER AE
-    u'\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
-    u'\xbf'     #  0xC0 -> INVERTED QUESTION MARK
-    u'\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u221a'   #  0xC3 -> SQUARE ROOT
-    u'\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u2248'   #  0xC5 -> ALMOST EQUAL TO
-    u'\u2206'   #  0xC6 -> INCREMENT
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
-    u'\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
-    u'\u2013'   #  0xD0 -> EN DASH
-    u'\u2014'   #  0xD1 -> EM DASH
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u25ca'   #  0xD7 -> LOZENGE
-    u'\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\u2044'   #  0xDA -> FRACTION SLASH
-    u'\u20ac'   #  0xDB -> EURO SIGN
-    u'\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\ufb01'   #  0xDE -> LATIN SMALL LIGATURE FI
-    u'\ufb02'   #  0xDF -> LATIN SMALL LIGATURE FL
-    u'\u2021'   #  0xE0 -> DOUBLE DAGGER
-    u'\xb7'     #  0xE1 -> MIDDLE DOT
-    u'\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2030'   #  0xE4 -> PER MILLE SIGN
-    u'\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\uf8ff'   #  0xF0 -> Apple logo
-    u'\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
-    u'\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u02dc'   #  0xF7 -> SMALL TILDE
-    u'\xaf'     #  0xF8 -> MACRON
-    u'\u02d8'   #  0xF9 -> BREVE
-    u'\u02d9'   #  0xFA -> DOT ABOVE
-    u'\u02da'   #  0xFB -> RING ABOVE
-    u'\xb8'     #  0xFC -> CEDILLA
-    u'\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
-    u'\u02db'   #  0xFE -> OGONEK
-    u'\u02c7'   #  0xFF -> CARON
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
+    '\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u2020'   #  0xA0 -> DAGGER
+    '\xb0'     #  0xA1 -> DEGREE SIGN
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa7'     #  0xA4 -> SECTION SIGN
+    '\u2022'   #  0xA5 -> BULLET
+    '\xb6'     #  0xA6 -> PILCROW SIGN
+    '\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u2122'   #  0xAA -> TRADE MARK SIGN
+    '\xb4'     #  0xAB -> ACUTE ACCENT
+    '\xa8'     #  0xAC -> DIAERESIS
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\xc6'     #  0xAE -> LATIN CAPITAL LETTER AE
+    '\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u221e'   #  0xB0 -> INFINITY
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\xa5'     #  0xB4 -> YEN SIGN
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
+    '\u2211'   #  0xB7 -> N-ARY SUMMATION
+    '\u220f'   #  0xB8 -> N-ARY PRODUCT
+    '\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
+    '\u222b'   #  0xBA -> INTEGRAL
+    '\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
+    '\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
+    '\xe6'     #  0xBE -> LATIN SMALL LETTER AE
+    '\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
+    '\xbf'     #  0xC0 -> INVERTED QUESTION MARK
+    '\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u221a'   #  0xC3 -> SQUARE ROOT
+    '\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
+    '\u2248'   #  0xC5 -> ALMOST EQUAL TO
+    '\u2206'   #  0xC6 -> INCREMENT
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
+    '\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
+    '\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
+    '\u2013'   #  0xD0 -> EN DASH
+    '\u2014'   #  0xD1 -> EM DASH
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u25ca'   #  0xD7 -> LOZENGE
+    '\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\u2044'   #  0xDA -> FRACTION SLASH
+    '\u20ac'   #  0xDB -> EURO SIGN
+    '\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\ufb01'   #  0xDE -> LATIN SMALL LIGATURE FI
+    '\ufb02'   #  0xDF -> LATIN SMALL LIGATURE FL
+    '\u2021'   #  0xE0 -> DOUBLE DAGGER
+    '\xb7'     #  0xE1 -> MIDDLE DOT
+    '\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
+    '\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2030'   #  0xE4 -> PER MILLE SIGN
+    '\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\uf8ff'   #  0xF0 -> Apple logo
+    '\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
+    '\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u02dc'   #  0xF7 -> SMALL TILDE
+    '\xaf'     #  0xF8 -> MACRON
+    '\u02d8'   #  0xF9 -> BREVE
+    '\u02d9'   #  0xFA -> DOT ABOVE
+    '\u02da'   #  0xFB -> RING ABOVE
+    '\xb8'     #  0xFC -> CEDILLA
+    '\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
+    '\u02db'   #  0xFE -> OGONEK
+    '\u02c7'   #  0xFF -> CARON
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_romanian.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_romanian.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_romanian.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
-    u'\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u2020'   #  0xA0 -> DAGGER
-    u'\xb0'     #  0xA1 -> DEGREE SIGN
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa7'     #  0xA4 -> SECTION SIGN
-    u'\u2022'   #  0xA5 -> BULLET
-    u'\xb6'     #  0xA6 -> PILCROW SIGN
-    u'\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u2122'   #  0xAA -> TRADE MARK SIGN
-    u'\xb4'     #  0xAB -> ACUTE ACCENT
-    u'\xa8'     #  0xAC -> DIAERESIS
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\u0102'   #  0xAE -> LATIN CAPITAL LETTER A WITH BREVE
-    u'\u0218'   #  0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
-    u'\u221e'   #  0xB0 -> INFINITY
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\xa5'     #  0xB4 -> YEN SIGN
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
-    u'\u2211'   #  0xB7 -> N-ARY SUMMATION
-    u'\u220f'   #  0xB8 -> N-ARY PRODUCT
-    u'\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
-    u'\u222b'   #  0xBA -> INTEGRAL
-    u'\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
-    u'\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
-    u'\u0103'   #  0xBE -> LATIN SMALL LETTER A WITH BREVE
-    u'\u0219'   #  0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
-    u'\xbf'     #  0xC0 -> INVERTED QUESTION MARK
-    u'\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u221a'   #  0xC3 -> SQUARE ROOT
-    u'\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u2248'   #  0xC5 -> ALMOST EQUAL TO
-    u'\u2206'   #  0xC6 -> INCREMENT
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
-    u'\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
-    u'\u2013'   #  0xD0 -> EN DASH
-    u'\u2014'   #  0xD1 -> EM DASH
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u25ca'   #  0xD7 -> LOZENGE
-    u'\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\u2044'   #  0xDA -> FRACTION SLASH
-    u'\u20ac'   #  0xDB -> EURO SIGN
-    u'\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
-    u'\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
-    u'\u021a'   #  0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
-    u'\u021b'   #  0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
-    u'\u2021'   #  0xE0 -> DOUBLE DAGGER
-    u'\xb7'     #  0xE1 -> MIDDLE DOT
-    u'\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2030'   #  0xE4 -> PER MILLE SIGN
-    u'\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\uf8ff'   #  0xF0 -> Apple logo
-    u'\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
-    u'\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u02dc'   #  0xF7 -> SMALL TILDE
-    u'\xaf'     #  0xF8 -> MACRON
-    u'\u02d8'   #  0xF9 -> BREVE
-    u'\u02d9'   #  0xFA -> DOT ABOVE
-    u'\u02da'   #  0xFB -> RING ABOVE
-    u'\xb8'     #  0xFC -> CEDILLA
-    u'\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
-    u'\u02db'   #  0xFE -> OGONEK
-    u'\u02c7'   #  0xFF -> CARON
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
+    '\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u2020'   #  0xA0 -> DAGGER
+    '\xb0'     #  0xA1 -> DEGREE SIGN
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa7'     #  0xA4 -> SECTION SIGN
+    '\u2022'   #  0xA5 -> BULLET
+    '\xb6'     #  0xA6 -> PILCROW SIGN
+    '\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u2122'   #  0xAA -> TRADE MARK SIGN
+    '\xb4'     #  0xAB -> ACUTE ACCENT
+    '\xa8'     #  0xAC -> DIAERESIS
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\u0102'   #  0xAE -> LATIN CAPITAL LETTER A WITH BREVE
+    '\u0218'   #  0xAF -> LATIN CAPITAL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
+    '\u221e'   #  0xB0 -> INFINITY
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\xa5'     #  0xB4 -> YEN SIGN
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
+    '\u2211'   #  0xB7 -> N-ARY SUMMATION
+    '\u220f'   #  0xB8 -> N-ARY PRODUCT
+    '\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
+    '\u222b'   #  0xBA -> INTEGRAL
+    '\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
+    '\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
+    '\u0103'   #  0xBE -> LATIN SMALL LETTER A WITH BREVE
+    '\u0219'   #  0xBF -> LATIN SMALL LETTER S WITH COMMA BELOW # for Unicode 3.0 and later
+    '\xbf'     #  0xC0 -> INVERTED QUESTION MARK
+    '\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u221a'   #  0xC3 -> SQUARE ROOT
+    '\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
+    '\u2248'   #  0xC5 -> ALMOST EQUAL TO
+    '\u2206'   #  0xC6 -> INCREMENT
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
+    '\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
+    '\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
+    '\u2013'   #  0xD0 -> EN DASH
+    '\u2014'   #  0xD1 -> EM DASH
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u25ca'   #  0xD7 -> LOZENGE
+    '\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\u2044'   #  0xDA -> FRACTION SLASH
+    '\u20ac'   #  0xDB -> EURO SIGN
+    '\u2039'   #  0xDC -> SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+    '\u203a'   #  0xDD -> SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+    '\u021a'   #  0xDE -> LATIN CAPITAL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
+    '\u021b'   #  0xDF -> LATIN SMALL LETTER T WITH COMMA BELOW # for Unicode 3.0 and later
+    '\u2021'   #  0xE0 -> DOUBLE DAGGER
+    '\xb7'     #  0xE1 -> MIDDLE DOT
+    '\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
+    '\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2030'   #  0xE4 -> PER MILLE SIGN
+    '\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\uf8ff'   #  0xF0 -> Apple logo
+    '\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\u0131'   #  0xF5 -> LATIN SMALL LETTER DOTLESS I
+    '\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u02dc'   #  0xF7 -> SMALL TILDE
+    '\xaf'     #  0xF8 -> MACRON
+    '\u02d8'   #  0xF9 -> BREVE
+    '\u02d9'   #  0xFA -> DOT ABOVE
+    '\u02da'   #  0xFB -> RING ABOVE
+    '\xb8'     #  0xFC -> CEDILLA
+    '\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
+    '\u02db'   #  0xFE -> OGONEK
+    '\u02c7'   #  0xFF -> CARON
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/mac_turkish.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/mac_turkish.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/mac_turkish.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> CONTROL CHARACTER
-    u'\x01'     #  0x01 -> CONTROL CHARACTER
-    u'\x02'     #  0x02 -> CONTROL CHARACTER
-    u'\x03'     #  0x03 -> CONTROL CHARACTER
-    u'\x04'     #  0x04 -> CONTROL CHARACTER
-    u'\x05'     #  0x05 -> CONTROL CHARACTER
-    u'\x06'     #  0x06 -> CONTROL CHARACTER
-    u'\x07'     #  0x07 -> CONTROL CHARACTER
-    u'\x08'     #  0x08 -> CONTROL CHARACTER
-    u'\t'       #  0x09 -> CONTROL CHARACTER
-    u'\n'       #  0x0A -> CONTROL CHARACTER
-    u'\x0b'     #  0x0B -> CONTROL CHARACTER
-    u'\x0c'     #  0x0C -> CONTROL CHARACTER
-    u'\r'       #  0x0D -> CONTROL CHARACTER
-    u'\x0e'     #  0x0E -> CONTROL CHARACTER
-    u'\x0f'     #  0x0F -> CONTROL CHARACTER
-    u'\x10'     #  0x10 -> CONTROL CHARACTER
-    u'\x11'     #  0x11 -> CONTROL CHARACTER
-    u'\x12'     #  0x12 -> CONTROL CHARACTER
-    u'\x13'     #  0x13 -> CONTROL CHARACTER
-    u'\x14'     #  0x14 -> CONTROL CHARACTER
-    u'\x15'     #  0x15 -> CONTROL CHARACTER
-    u'\x16'     #  0x16 -> CONTROL CHARACTER
-    u'\x17'     #  0x17 -> CONTROL CHARACTER
-    u'\x18'     #  0x18 -> CONTROL CHARACTER
-    u'\x19'     #  0x19 -> CONTROL CHARACTER
-    u'\x1a'     #  0x1A -> CONTROL CHARACTER
-    u'\x1b'     #  0x1B -> CONTROL CHARACTER
-    u'\x1c'     #  0x1C -> CONTROL CHARACTER
-    u'\x1d'     #  0x1D -> CONTROL CHARACTER
-    u'\x1e'     #  0x1E -> CONTROL CHARACTER
-    u'\x1f'     #  0x1F -> CONTROL CHARACTER
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> CONTROL CHARACTER
-    u'\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
-    u'\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
-    u'\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
-    u'\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
-    u'\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
-    u'\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
-    u'\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
-    u'\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
-    u'\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
-    u'\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
-    u'\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
-    u'\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
-    u'\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
-    u'\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
-    u'\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
-    u'\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
-    u'\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
-    u'\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
-    u'\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
-    u'\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
-    u'\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
-    u'\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
-    u'\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
-    u'\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
-    u'\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
-    u'\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
-    u'\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
-    u'\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
-    u'\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
-    u'\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
-    u'\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
-    u'\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
-    u'\u2020'   #  0xA0 -> DAGGER
-    u'\xb0'     #  0xA1 -> DEGREE SIGN
-    u'\xa2'     #  0xA2 -> CENT SIGN
-    u'\xa3'     #  0xA3 -> POUND SIGN
-    u'\xa7'     #  0xA4 -> SECTION SIGN
-    u'\u2022'   #  0xA5 -> BULLET
-    u'\xb6'     #  0xA6 -> PILCROW SIGN
-    u'\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
-    u'\xae'     #  0xA8 -> REGISTERED SIGN
-    u'\xa9'     #  0xA9 -> COPYRIGHT SIGN
-    u'\u2122'   #  0xAA -> TRADE MARK SIGN
-    u'\xb4'     #  0xAB -> ACUTE ACCENT
-    u'\xa8'     #  0xAC -> DIAERESIS
-    u'\u2260'   #  0xAD -> NOT EQUAL TO
-    u'\xc6'     #  0xAE -> LATIN CAPITAL LETTER AE
-    u'\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
-    u'\u221e'   #  0xB0 -> INFINITY
-    u'\xb1'     #  0xB1 -> PLUS-MINUS SIGN
-    u'\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
-    u'\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
-    u'\xa5'     #  0xB4 -> YEN SIGN
-    u'\xb5'     #  0xB5 -> MICRO SIGN
-    u'\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
-    u'\u2211'   #  0xB7 -> N-ARY SUMMATION
-    u'\u220f'   #  0xB8 -> N-ARY PRODUCT
-    u'\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
-    u'\u222b'   #  0xBA -> INTEGRAL
-    u'\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
-    u'\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
-    u'\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
-    u'\xe6'     #  0xBE -> LATIN SMALL LETTER AE
-    u'\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
-    u'\xbf'     #  0xC0 -> INVERTED QUESTION MARK
-    u'\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
-    u'\xac'     #  0xC2 -> NOT SIGN
-    u'\u221a'   #  0xC3 -> SQUARE ROOT
-    u'\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
-    u'\u2248'   #  0xC5 -> ALMOST EQUAL TO
-    u'\u2206'   #  0xC6 -> INCREMENT
-    u'\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
-    u'\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
-    u'\xa0'     #  0xCA -> NO-BREAK SPACE
-    u'\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
-    u'\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
-    u'\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
-    u'\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
-    u'\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
-    u'\u2013'   #  0xD0 -> EN DASH
-    u'\u2014'   #  0xD1 -> EM DASH
-    u'\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
-    u'\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
-    u'\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
-    u'\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
-    u'\xf7'     #  0xD6 -> DIVISION SIGN
-    u'\u25ca'   #  0xD7 -> LOZENGE
-    u'\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
-    u'\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
-    u'\u011e'   #  0xDA -> LATIN CAPITAL LETTER G WITH BREVE
-    u'\u011f'   #  0xDB -> LATIN SMALL LETTER G WITH BREVE
-    u'\u0130'   #  0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE
-    u'\u0131'   #  0xDD -> LATIN SMALL LETTER DOTLESS I
-    u'\u015e'   #  0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
-    u'\u015f'   #  0xDF -> LATIN SMALL LETTER S WITH CEDILLA
-    u'\u2021'   #  0xE0 -> DOUBLE DAGGER
-    u'\xb7'     #  0xE1 -> MIDDLE DOT
-    u'\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
-    u'\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
-    u'\u2030'   #  0xE4 -> PER MILLE SIGN
-    u'\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
-    u'\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
-    u'\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
-    u'\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
-    u'\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
-    u'\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
-    u'\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
-    u'\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
-    u'\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
-    u'\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
-    u'\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
-    u'\uf8ff'   #  0xF0 -> Apple logo
-    u'\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
-    u'\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
-    u'\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
-    u'\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
-    u'\uf8a0'   #  0xF5 -> undefined1
-    u'\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
-    u'\u02dc'   #  0xF7 -> SMALL TILDE
-    u'\xaf'     #  0xF8 -> MACRON
-    u'\u02d8'   #  0xF9 -> BREVE
-    u'\u02d9'   #  0xFA -> DOT ABOVE
-    u'\u02da'   #  0xFB -> RING ABOVE
-    u'\xb8'     #  0xFC -> CEDILLA
-    u'\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
-    u'\u02db'   #  0xFE -> OGONEK
-    u'\u02c7'   #  0xFF -> CARON
+    '\x00'     #  0x00 -> CONTROL CHARACTER
+    '\x01'     #  0x01 -> CONTROL CHARACTER
+    '\x02'     #  0x02 -> CONTROL CHARACTER
+    '\x03'     #  0x03 -> CONTROL CHARACTER
+    '\x04'     #  0x04 -> CONTROL CHARACTER
+    '\x05'     #  0x05 -> CONTROL CHARACTER
+    '\x06'     #  0x06 -> CONTROL CHARACTER
+    '\x07'     #  0x07 -> CONTROL CHARACTER
+    '\x08'     #  0x08 -> CONTROL CHARACTER
+    '\t'       #  0x09 -> CONTROL CHARACTER
+    '\n'       #  0x0A -> CONTROL CHARACTER
+    '\x0b'     #  0x0B -> CONTROL CHARACTER
+    '\x0c'     #  0x0C -> CONTROL CHARACTER
+    '\r'       #  0x0D -> CONTROL CHARACTER
+    '\x0e'     #  0x0E -> CONTROL CHARACTER
+    '\x0f'     #  0x0F -> CONTROL CHARACTER
+    '\x10'     #  0x10 -> CONTROL CHARACTER
+    '\x11'     #  0x11 -> CONTROL CHARACTER
+    '\x12'     #  0x12 -> CONTROL CHARACTER
+    '\x13'     #  0x13 -> CONTROL CHARACTER
+    '\x14'     #  0x14 -> CONTROL CHARACTER
+    '\x15'     #  0x15 -> CONTROL CHARACTER
+    '\x16'     #  0x16 -> CONTROL CHARACTER
+    '\x17'     #  0x17 -> CONTROL CHARACTER
+    '\x18'     #  0x18 -> CONTROL CHARACTER
+    '\x19'     #  0x19 -> CONTROL CHARACTER
+    '\x1a'     #  0x1A -> CONTROL CHARACTER
+    '\x1b'     #  0x1B -> CONTROL CHARACTER
+    '\x1c'     #  0x1C -> CONTROL CHARACTER
+    '\x1d'     #  0x1D -> CONTROL CHARACTER
+    '\x1e'     #  0x1E -> CONTROL CHARACTER
+    '\x1f'     #  0x1F -> CONTROL CHARACTER
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> CONTROL CHARACTER
+    '\xc4'     #  0x80 -> LATIN CAPITAL LETTER A WITH DIAERESIS
+    '\xc5'     #  0x81 -> LATIN CAPITAL LETTER A WITH RING ABOVE
+    '\xc7'     #  0x82 -> LATIN CAPITAL LETTER C WITH CEDILLA
+    '\xc9'     #  0x83 -> LATIN CAPITAL LETTER E WITH ACUTE
+    '\xd1'     #  0x84 -> LATIN CAPITAL LETTER N WITH TILDE
+    '\xd6'     #  0x85 -> LATIN CAPITAL LETTER O WITH DIAERESIS
+    '\xdc'     #  0x86 -> LATIN CAPITAL LETTER U WITH DIAERESIS
+    '\xe1'     #  0x87 -> LATIN SMALL LETTER A WITH ACUTE
+    '\xe0'     #  0x88 -> LATIN SMALL LETTER A WITH GRAVE
+    '\xe2'     #  0x89 -> LATIN SMALL LETTER A WITH CIRCUMFLEX
+    '\xe4'     #  0x8A -> LATIN SMALL LETTER A WITH DIAERESIS
+    '\xe3'     #  0x8B -> LATIN SMALL LETTER A WITH TILDE
+    '\xe5'     #  0x8C -> LATIN SMALL LETTER A WITH RING ABOVE
+    '\xe7'     #  0x8D -> LATIN SMALL LETTER C WITH CEDILLA
+    '\xe9'     #  0x8E -> LATIN SMALL LETTER E WITH ACUTE
+    '\xe8'     #  0x8F -> LATIN SMALL LETTER E WITH GRAVE
+    '\xea'     #  0x90 -> LATIN SMALL LETTER E WITH CIRCUMFLEX
+    '\xeb'     #  0x91 -> LATIN SMALL LETTER E WITH DIAERESIS
+    '\xed'     #  0x92 -> LATIN SMALL LETTER I WITH ACUTE
+    '\xec'     #  0x93 -> LATIN SMALL LETTER I WITH GRAVE
+    '\xee'     #  0x94 -> LATIN SMALL LETTER I WITH CIRCUMFLEX
+    '\xef'     #  0x95 -> LATIN SMALL LETTER I WITH DIAERESIS
+    '\xf1'     #  0x96 -> LATIN SMALL LETTER N WITH TILDE
+    '\xf3'     #  0x97 -> LATIN SMALL LETTER O WITH ACUTE
+    '\xf2'     #  0x98 -> LATIN SMALL LETTER O WITH GRAVE
+    '\xf4'     #  0x99 -> LATIN SMALL LETTER O WITH CIRCUMFLEX
+    '\xf6'     #  0x9A -> LATIN SMALL LETTER O WITH DIAERESIS
+    '\xf5'     #  0x9B -> LATIN SMALL LETTER O WITH TILDE
+    '\xfa'     #  0x9C -> LATIN SMALL LETTER U WITH ACUTE
+    '\xf9'     #  0x9D -> LATIN SMALL LETTER U WITH GRAVE
+    '\xfb'     #  0x9E -> LATIN SMALL LETTER U WITH CIRCUMFLEX
+    '\xfc'     #  0x9F -> LATIN SMALL LETTER U WITH DIAERESIS
+    '\u2020'   #  0xA0 -> DAGGER
+    '\xb0'     #  0xA1 -> DEGREE SIGN
+    '\xa2'     #  0xA2 -> CENT SIGN
+    '\xa3'     #  0xA3 -> POUND SIGN
+    '\xa7'     #  0xA4 -> SECTION SIGN
+    '\u2022'   #  0xA5 -> BULLET
+    '\xb6'     #  0xA6 -> PILCROW SIGN
+    '\xdf'     #  0xA7 -> LATIN SMALL LETTER SHARP S
+    '\xae'     #  0xA8 -> REGISTERED SIGN
+    '\xa9'     #  0xA9 -> COPYRIGHT SIGN
+    '\u2122'   #  0xAA -> TRADE MARK SIGN
+    '\xb4'     #  0xAB -> ACUTE ACCENT
+    '\xa8'     #  0xAC -> DIAERESIS
+    '\u2260'   #  0xAD -> NOT EQUAL TO
+    '\xc6'     #  0xAE -> LATIN CAPITAL LETTER AE
+    '\xd8'     #  0xAF -> LATIN CAPITAL LETTER O WITH STROKE
+    '\u221e'   #  0xB0 -> INFINITY
+    '\xb1'     #  0xB1 -> PLUS-MINUS SIGN
+    '\u2264'   #  0xB2 -> LESS-THAN OR EQUAL TO
+    '\u2265'   #  0xB3 -> GREATER-THAN OR EQUAL TO
+    '\xa5'     #  0xB4 -> YEN SIGN
+    '\xb5'     #  0xB5 -> MICRO SIGN
+    '\u2202'   #  0xB6 -> PARTIAL DIFFERENTIAL
+    '\u2211'   #  0xB7 -> N-ARY SUMMATION
+    '\u220f'   #  0xB8 -> N-ARY PRODUCT
+    '\u03c0'   #  0xB9 -> GREEK SMALL LETTER PI
+    '\u222b'   #  0xBA -> INTEGRAL
+    '\xaa'     #  0xBB -> FEMININE ORDINAL INDICATOR
+    '\xba'     #  0xBC -> MASCULINE ORDINAL INDICATOR
+    '\u03a9'   #  0xBD -> GREEK CAPITAL LETTER OMEGA
+    '\xe6'     #  0xBE -> LATIN SMALL LETTER AE
+    '\xf8'     #  0xBF -> LATIN SMALL LETTER O WITH STROKE
+    '\xbf'     #  0xC0 -> INVERTED QUESTION MARK
+    '\xa1'     #  0xC1 -> INVERTED EXCLAMATION MARK
+    '\xac'     #  0xC2 -> NOT SIGN
+    '\u221a'   #  0xC3 -> SQUARE ROOT
+    '\u0192'   #  0xC4 -> LATIN SMALL LETTER F WITH HOOK
+    '\u2248'   #  0xC5 -> ALMOST EQUAL TO
+    '\u2206'   #  0xC6 -> INCREMENT
+    '\xab'     #  0xC7 -> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\xbb'     #  0xC8 -> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+    '\u2026'   #  0xC9 -> HORIZONTAL ELLIPSIS
+    '\xa0'     #  0xCA -> NO-BREAK SPACE
+    '\xc0'     #  0xCB -> LATIN CAPITAL LETTER A WITH GRAVE
+    '\xc3'     #  0xCC -> LATIN CAPITAL LETTER A WITH TILDE
+    '\xd5'     #  0xCD -> LATIN CAPITAL LETTER O WITH TILDE
+    '\u0152'   #  0xCE -> LATIN CAPITAL LIGATURE OE
+    '\u0153'   #  0xCF -> LATIN SMALL LIGATURE OE
+    '\u2013'   #  0xD0 -> EN DASH
+    '\u2014'   #  0xD1 -> EM DASH
+    '\u201c'   #  0xD2 -> LEFT DOUBLE QUOTATION MARK
+    '\u201d'   #  0xD3 -> RIGHT DOUBLE QUOTATION MARK
+    '\u2018'   #  0xD4 -> LEFT SINGLE QUOTATION MARK
+    '\u2019'   #  0xD5 -> RIGHT SINGLE QUOTATION MARK
+    '\xf7'     #  0xD6 -> DIVISION SIGN
+    '\u25ca'   #  0xD7 -> LOZENGE
+    '\xff'     #  0xD8 -> LATIN SMALL LETTER Y WITH DIAERESIS
+    '\u0178'   #  0xD9 -> LATIN CAPITAL LETTER Y WITH DIAERESIS
+    '\u011e'   #  0xDA -> LATIN CAPITAL LETTER G WITH BREVE
+    '\u011f'   #  0xDB -> LATIN SMALL LETTER G WITH BREVE
+    '\u0130'   #  0xDC -> LATIN CAPITAL LETTER I WITH DOT ABOVE
+    '\u0131'   #  0xDD -> LATIN SMALL LETTER DOTLESS I
+    '\u015e'   #  0xDE -> LATIN CAPITAL LETTER S WITH CEDILLA
+    '\u015f'   #  0xDF -> LATIN SMALL LETTER S WITH CEDILLA
+    '\u2021'   #  0xE0 -> DOUBLE DAGGER
+    '\xb7'     #  0xE1 -> MIDDLE DOT
+    '\u201a'   #  0xE2 -> SINGLE LOW-9 QUOTATION MARK
+    '\u201e'   #  0xE3 -> DOUBLE LOW-9 QUOTATION MARK
+    '\u2030'   #  0xE4 -> PER MILLE SIGN
+    '\xc2'     #  0xE5 -> LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+    '\xca'     #  0xE6 -> LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+    '\xc1'     #  0xE7 -> LATIN CAPITAL LETTER A WITH ACUTE
+    '\xcb'     #  0xE8 -> LATIN CAPITAL LETTER E WITH DIAERESIS
+    '\xc8'     #  0xE9 -> LATIN CAPITAL LETTER E WITH GRAVE
+    '\xcd'     #  0xEA -> LATIN CAPITAL LETTER I WITH ACUTE
+    '\xce'     #  0xEB -> LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+    '\xcf'     #  0xEC -> LATIN CAPITAL LETTER I WITH DIAERESIS
+    '\xcc'     #  0xED -> LATIN CAPITAL LETTER I WITH GRAVE
+    '\xd3'     #  0xEE -> LATIN CAPITAL LETTER O WITH ACUTE
+    '\xd4'     #  0xEF -> LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+    '\uf8ff'   #  0xF0 -> Apple logo
+    '\xd2'     #  0xF1 -> LATIN CAPITAL LETTER O WITH GRAVE
+    '\xda'     #  0xF2 -> LATIN CAPITAL LETTER U WITH ACUTE
+    '\xdb'     #  0xF3 -> LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+    '\xd9'     #  0xF4 -> LATIN CAPITAL LETTER U WITH GRAVE
+    '\uf8a0'   #  0xF5 -> undefined1
+    '\u02c6'   #  0xF6 -> MODIFIER LETTER CIRCUMFLEX ACCENT
+    '\u02dc'   #  0xF7 -> SMALL TILDE
+    '\xaf'     #  0xF8 -> MACRON
+    '\u02d8'   #  0xF9 -> BREVE
+    '\u02d9'   #  0xFA -> DOT ABOVE
+    '\u02da'   #  0xFB -> RING ABOVE
+    '\xb8'     #  0xFC -> CEDILLA
+    '\u02dd'   #  0xFD -> DOUBLE ACUTE ACCENT
+    '\u02db'   #  0xFE -> OGONEK
+    '\u02c7'   #  0xFF -> CARON
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/punycode.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/punycode.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/punycode.py	Wed May  2 21:09:54 2007
@@ -189,7 +189,7 @@
     else:
         base = text[:pos]
         extended = text[pos+1:]
-    base = unicode(base, "ascii", errors)
+    base = str(base, "ascii", errors)
     extended = extended.upper()
     return insertion_sort(base, extended, errors)
 

Modified: python/branches/py3k-struni/Lib/encodings/tis_620.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/tis_620.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/tis_620.py	Wed May  2 21:09:54 2007
@@ -45,262 +45,262 @@
 ### Decoding Table
 
 decoding_table = (
-    u'\x00'     #  0x00 -> NULL
-    u'\x01'     #  0x01 -> START OF HEADING
-    u'\x02'     #  0x02 -> START OF TEXT
-    u'\x03'     #  0x03 -> END OF TEXT
-    u'\x04'     #  0x04 -> END OF TRANSMISSION
-    u'\x05'     #  0x05 -> ENQUIRY
-    u'\x06'     #  0x06 -> ACKNOWLEDGE
-    u'\x07'     #  0x07 -> BELL
-    u'\x08'     #  0x08 -> BACKSPACE
-    u'\t'       #  0x09 -> HORIZONTAL TABULATION
-    u'\n'       #  0x0A -> LINE FEED
-    u'\x0b'     #  0x0B -> VERTICAL TABULATION
-    u'\x0c'     #  0x0C -> FORM FEED
-    u'\r'       #  0x0D -> CARRIAGE RETURN
-    u'\x0e'     #  0x0E -> SHIFT OUT
-    u'\x0f'     #  0x0F -> SHIFT IN
-    u'\x10'     #  0x10 -> DATA LINK ESCAPE
-    u'\x11'     #  0x11 -> DEVICE CONTROL ONE
-    u'\x12'     #  0x12 -> DEVICE CONTROL TWO
-    u'\x13'     #  0x13 -> DEVICE CONTROL THREE
-    u'\x14'     #  0x14 -> DEVICE CONTROL FOUR
-    u'\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
-    u'\x16'     #  0x16 -> SYNCHRONOUS IDLE
-    u'\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
-    u'\x18'     #  0x18 -> CANCEL
-    u'\x19'     #  0x19 -> END OF MEDIUM
-    u'\x1a'     #  0x1A -> SUBSTITUTE
-    u'\x1b'     #  0x1B -> ESCAPE
-    u'\x1c'     #  0x1C -> FILE SEPARATOR
-    u'\x1d'     #  0x1D -> GROUP SEPARATOR
-    u'\x1e'     #  0x1E -> RECORD SEPARATOR
-    u'\x1f'     #  0x1F -> UNIT SEPARATOR
-    u' '        #  0x20 -> SPACE
-    u'!'        #  0x21 -> EXCLAMATION MARK
-    u'"'        #  0x22 -> QUOTATION MARK
-    u'#'        #  0x23 -> NUMBER SIGN
-    u'$'        #  0x24 -> DOLLAR SIGN
-    u'%'        #  0x25 -> PERCENT SIGN
-    u'&'        #  0x26 -> AMPERSAND
-    u"'"        #  0x27 -> APOSTROPHE
-    u'('        #  0x28 -> LEFT PARENTHESIS
-    u')'        #  0x29 -> RIGHT PARENTHESIS
-    u'*'        #  0x2A -> ASTERISK
-    u'+'        #  0x2B -> PLUS SIGN
-    u','        #  0x2C -> COMMA
-    u'-'        #  0x2D -> HYPHEN-MINUS
-    u'.'        #  0x2E -> FULL STOP
-    u'/'        #  0x2F -> SOLIDUS
-    u'0'        #  0x30 -> DIGIT ZERO
-    u'1'        #  0x31 -> DIGIT ONE
-    u'2'        #  0x32 -> DIGIT TWO
-    u'3'        #  0x33 -> DIGIT THREE
-    u'4'        #  0x34 -> DIGIT FOUR
-    u'5'        #  0x35 -> DIGIT FIVE
-    u'6'        #  0x36 -> DIGIT SIX
-    u'7'        #  0x37 -> DIGIT SEVEN
-    u'8'        #  0x38 -> DIGIT EIGHT
-    u'9'        #  0x39 -> DIGIT NINE
-    u':'        #  0x3A -> COLON
-    u';'        #  0x3B -> SEMICOLON
-    u'<'        #  0x3C -> LESS-THAN SIGN
-    u'='        #  0x3D -> EQUALS SIGN
-    u'>'        #  0x3E -> GREATER-THAN SIGN
-    u'?'        #  0x3F -> QUESTION MARK
-    u'@'        #  0x40 -> COMMERCIAL AT
-    u'A'        #  0x41 -> LATIN CAPITAL LETTER A
-    u'B'        #  0x42 -> LATIN CAPITAL LETTER B
-    u'C'        #  0x43 -> LATIN CAPITAL LETTER C
-    u'D'        #  0x44 -> LATIN CAPITAL LETTER D
-    u'E'        #  0x45 -> LATIN CAPITAL LETTER E
-    u'F'        #  0x46 -> LATIN CAPITAL LETTER F
-    u'G'        #  0x47 -> LATIN CAPITAL LETTER G
-    u'H'        #  0x48 -> LATIN CAPITAL LETTER H
-    u'I'        #  0x49 -> LATIN CAPITAL LETTER I
-    u'J'        #  0x4A -> LATIN CAPITAL LETTER J
-    u'K'        #  0x4B -> LATIN CAPITAL LETTER K
-    u'L'        #  0x4C -> LATIN CAPITAL LETTER L
-    u'M'        #  0x4D -> LATIN CAPITAL LETTER M
-    u'N'        #  0x4E -> LATIN CAPITAL LETTER N
-    u'O'        #  0x4F -> LATIN CAPITAL LETTER O
-    u'P'        #  0x50 -> LATIN CAPITAL LETTER P
-    u'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
-    u'R'        #  0x52 -> LATIN CAPITAL LETTER R
-    u'S'        #  0x53 -> LATIN CAPITAL LETTER S
-    u'T'        #  0x54 -> LATIN CAPITAL LETTER T
-    u'U'        #  0x55 -> LATIN CAPITAL LETTER U
-    u'V'        #  0x56 -> LATIN CAPITAL LETTER V
-    u'W'        #  0x57 -> LATIN CAPITAL LETTER W
-    u'X'        #  0x58 -> LATIN CAPITAL LETTER X
-    u'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
-    u'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
-    u'['        #  0x5B -> LEFT SQUARE BRACKET
-    u'\\'       #  0x5C -> REVERSE SOLIDUS
-    u']'        #  0x5D -> RIGHT SQUARE BRACKET
-    u'^'        #  0x5E -> CIRCUMFLEX ACCENT
-    u'_'        #  0x5F -> LOW LINE
-    u'`'        #  0x60 -> GRAVE ACCENT
-    u'a'        #  0x61 -> LATIN SMALL LETTER A
-    u'b'        #  0x62 -> LATIN SMALL LETTER B
-    u'c'        #  0x63 -> LATIN SMALL LETTER C
-    u'd'        #  0x64 -> LATIN SMALL LETTER D
-    u'e'        #  0x65 -> LATIN SMALL LETTER E
-    u'f'        #  0x66 -> LATIN SMALL LETTER F
-    u'g'        #  0x67 -> LATIN SMALL LETTER G
-    u'h'        #  0x68 -> LATIN SMALL LETTER H
-    u'i'        #  0x69 -> LATIN SMALL LETTER I
-    u'j'        #  0x6A -> LATIN SMALL LETTER J
-    u'k'        #  0x6B -> LATIN SMALL LETTER K
-    u'l'        #  0x6C -> LATIN SMALL LETTER L
-    u'm'        #  0x6D -> LATIN SMALL LETTER M
-    u'n'        #  0x6E -> LATIN SMALL LETTER N
-    u'o'        #  0x6F -> LATIN SMALL LETTER O
-    u'p'        #  0x70 -> LATIN SMALL LETTER P
-    u'q'        #  0x71 -> LATIN SMALL LETTER Q
-    u'r'        #  0x72 -> LATIN SMALL LETTER R
-    u's'        #  0x73 -> LATIN SMALL LETTER S
-    u't'        #  0x74 -> LATIN SMALL LETTER T
-    u'u'        #  0x75 -> LATIN SMALL LETTER U
-    u'v'        #  0x76 -> LATIN SMALL LETTER V
-    u'w'        #  0x77 -> LATIN SMALL LETTER W
-    u'x'        #  0x78 -> LATIN SMALL LETTER X
-    u'y'        #  0x79 -> LATIN SMALL LETTER Y
-    u'z'        #  0x7A -> LATIN SMALL LETTER Z
-    u'{'        #  0x7B -> LEFT CURLY BRACKET
-    u'|'        #  0x7C -> VERTICAL LINE
-    u'}'        #  0x7D -> RIGHT CURLY BRACKET
-    u'~'        #  0x7E -> TILDE
-    u'\x7f'     #  0x7F -> DELETE
-    u'\x80'     #  0x80 -> <control>
-    u'\x81'     #  0x81 -> <control>
-    u'\x82'     #  0x82 -> <control>
-    u'\x83'     #  0x83 -> <control>
-    u'\x84'     #  0x84 -> <control>
-    u'\x85'     #  0x85 -> <control>
-    u'\x86'     #  0x86 -> <control>
-    u'\x87'     #  0x87 -> <control>
-    u'\x88'     #  0x88 -> <control>
-    u'\x89'     #  0x89 -> <control>
-    u'\x8a'     #  0x8A -> <control>
-    u'\x8b'     #  0x8B -> <control>
-    u'\x8c'     #  0x8C -> <control>
-    u'\x8d'     #  0x8D -> <control>
-    u'\x8e'     #  0x8E -> <control>
-    u'\x8f'     #  0x8F -> <control>
-    u'\x90'     #  0x90 -> <control>
-    u'\x91'     #  0x91 -> <control>
-    u'\x92'     #  0x92 -> <control>
-    u'\x93'     #  0x93 -> <control>
-    u'\x94'     #  0x94 -> <control>
-    u'\x95'     #  0x95 -> <control>
-    u'\x96'     #  0x96 -> <control>
-    u'\x97'     #  0x97 -> <control>
-    u'\x98'     #  0x98 -> <control>
-    u'\x99'     #  0x99 -> <control>
-    u'\x9a'     #  0x9A -> <control>
-    u'\x9b'     #  0x9B -> <control>
-    u'\x9c'     #  0x9C -> <control>
-    u'\x9d'     #  0x9D -> <control>
-    u'\x9e'     #  0x9E -> <control>
-    u'\x9f'     #  0x9F -> <control>
-    u'\ufffe'
-    u'\u0e01'   #  0xA1 -> THAI CHARACTER KO KAI
-    u'\u0e02'   #  0xA2 -> THAI CHARACTER KHO KHAI
-    u'\u0e03'   #  0xA3 -> THAI CHARACTER KHO KHUAT
-    u'\u0e04'   #  0xA4 -> THAI CHARACTER KHO KHWAI
-    u'\u0e05'   #  0xA5 -> THAI CHARACTER KHO KHON
-    u'\u0e06'   #  0xA6 -> THAI CHARACTER KHO RAKHANG
-    u'\u0e07'   #  0xA7 -> THAI CHARACTER NGO NGU
-    u'\u0e08'   #  0xA8 -> THAI CHARACTER CHO CHAN
-    u'\u0e09'   #  0xA9 -> THAI CHARACTER CHO CHING
-    u'\u0e0a'   #  0xAA -> THAI CHARACTER CHO CHANG
-    u'\u0e0b'   #  0xAB -> THAI CHARACTER SO SO
-    u'\u0e0c'   #  0xAC -> THAI CHARACTER CHO CHOE
-    u'\u0e0d'   #  0xAD -> THAI CHARACTER YO YING
-    u'\u0e0e'   #  0xAE -> THAI CHARACTER DO CHADA
-    u'\u0e0f'   #  0xAF -> THAI CHARACTER TO PATAK
-    u'\u0e10'   #  0xB0 -> THAI CHARACTER THO THAN
-    u'\u0e11'   #  0xB1 -> THAI CHARACTER THO NANGMONTHO
-    u'\u0e12'   #  0xB2 -> THAI CHARACTER THO PHUTHAO
-    u'\u0e13'   #  0xB3 -> THAI CHARACTER NO NEN
-    u'\u0e14'   #  0xB4 -> THAI CHARACTER DO DEK
-    u'\u0e15'   #  0xB5 -> THAI CHARACTER TO TAO
-    u'\u0e16'   #  0xB6 -> THAI CHARACTER THO THUNG
-    u'\u0e17'   #  0xB7 -> THAI CHARACTER THO THAHAN
-    u'\u0e18'   #  0xB8 -> THAI CHARACTER THO THONG
-    u'\u0e19'   #  0xB9 -> THAI CHARACTER NO NU
-    u'\u0e1a'   #  0xBA -> THAI CHARACTER BO BAIMAI
-    u'\u0e1b'   #  0xBB -> THAI CHARACTER PO PLA
-    u'\u0e1c'   #  0xBC -> THAI CHARACTER PHO PHUNG
-    u'\u0e1d'   #  0xBD -> THAI CHARACTER FO FA
-    u'\u0e1e'   #  0xBE -> THAI CHARACTER PHO PHAN
-    u'\u0e1f'   #  0xBF -> THAI CHARACTER FO FAN
-    u'\u0e20'   #  0xC0 -> THAI CHARACTER PHO SAMPHAO
-    u'\u0e21'   #  0xC1 -> THAI CHARACTER MO MA
-    u'\u0e22'   #  0xC2 -> THAI CHARACTER YO YAK
-    u'\u0e23'   #  0xC3 -> THAI CHARACTER RO RUA
-    u'\u0e24'   #  0xC4 -> THAI CHARACTER RU
-    u'\u0e25'   #  0xC5 -> THAI CHARACTER LO LING
-    u'\u0e26'   #  0xC6 -> THAI CHARACTER LU
-    u'\u0e27'   #  0xC7 -> THAI CHARACTER WO WAEN
-    u'\u0e28'   #  0xC8 -> THAI CHARACTER SO SALA
-    u'\u0e29'   #  0xC9 -> THAI CHARACTER SO RUSI
-    u'\u0e2a'   #  0xCA -> THAI CHARACTER SO SUA
-    u'\u0e2b'   #  0xCB -> THAI CHARACTER HO HIP
-    u'\u0e2c'   #  0xCC -> THAI CHARACTER LO CHULA
-    u'\u0e2d'   #  0xCD -> THAI CHARACTER O ANG
-    u'\u0e2e'   #  0xCE -> THAI CHARACTER HO NOKHUK
-    u'\u0e2f'   #  0xCF -> THAI CHARACTER PAIYANNOI
-    u'\u0e30'   #  0xD0 -> THAI CHARACTER SARA A
-    u'\u0e31'   #  0xD1 -> THAI CHARACTER MAI HAN-AKAT
-    u'\u0e32'   #  0xD2 -> THAI CHARACTER SARA AA
-    u'\u0e33'   #  0xD3 -> THAI CHARACTER SARA AM
-    u'\u0e34'   #  0xD4 -> THAI CHARACTER SARA I
-    u'\u0e35'   #  0xD5 -> THAI CHARACTER SARA II
-    u'\u0e36'   #  0xD6 -> THAI CHARACTER SARA UE
-    u'\u0e37'   #  0xD7 -> THAI CHARACTER SARA UEE
-    u'\u0e38'   #  0xD8 -> THAI CHARACTER SARA U
-    u'\u0e39'   #  0xD9 -> THAI CHARACTER SARA UU
-    u'\u0e3a'   #  0xDA -> THAI CHARACTER PHINTHU
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\u0e3f'   #  0xDF -> THAI CURRENCY SYMBOL BAHT
-    u'\u0e40'   #  0xE0 -> THAI CHARACTER SARA E
-    u'\u0e41'   #  0xE1 -> THAI CHARACTER SARA AE
-    u'\u0e42'   #  0xE2 -> THAI CHARACTER SARA O
-    u'\u0e43'   #  0xE3 -> THAI CHARACTER SARA AI MAIMUAN
-    u'\u0e44'   #  0xE4 -> THAI CHARACTER SARA AI MAIMALAI
-    u'\u0e45'   #  0xE5 -> THAI CHARACTER LAKKHANGYAO
-    u'\u0e46'   #  0xE6 -> THAI CHARACTER MAIYAMOK
-    u'\u0e47'   #  0xE7 -> THAI CHARACTER MAITAIKHU
-    u'\u0e48'   #  0xE8 -> THAI CHARACTER MAI EK
-    u'\u0e49'   #  0xE9 -> THAI CHARACTER MAI THO
-    u'\u0e4a'   #  0xEA -> THAI CHARACTER MAI TRI
-    u'\u0e4b'   #  0xEB -> THAI CHARACTER MAI CHATTAWA
-    u'\u0e4c'   #  0xEC -> THAI CHARACTER THANTHAKHAT
-    u'\u0e4d'   #  0xED -> THAI CHARACTER NIKHAHIT
-    u'\u0e4e'   #  0xEE -> THAI CHARACTER YAMAKKAN
-    u'\u0e4f'   #  0xEF -> THAI CHARACTER FONGMAN
-    u'\u0e50'   #  0xF0 -> THAI DIGIT ZERO
-    u'\u0e51'   #  0xF1 -> THAI DIGIT ONE
-    u'\u0e52'   #  0xF2 -> THAI DIGIT TWO
-    u'\u0e53'   #  0xF3 -> THAI DIGIT THREE
-    u'\u0e54'   #  0xF4 -> THAI DIGIT FOUR
-    u'\u0e55'   #  0xF5 -> THAI DIGIT FIVE
-    u'\u0e56'   #  0xF6 -> THAI DIGIT SIX
-    u'\u0e57'   #  0xF7 -> THAI DIGIT SEVEN
-    u'\u0e58'   #  0xF8 -> THAI DIGIT EIGHT
-    u'\u0e59'   #  0xF9 -> THAI DIGIT NINE
-    u'\u0e5a'   #  0xFA -> THAI CHARACTER ANGKHANKHU
-    u'\u0e5b'   #  0xFB -> THAI CHARACTER KHOMUT
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
-    u'\ufffe'
+    '\x00'     #  0x00 -> NULL
+    '\x01'     #  0x01 -> START OF HEADING
+    '\x02'     #  0x02 -> START OF TEXT
+    '\x03'     #  0x03 -> END OF TEXT
+    '\x04'     #  0x04 -> END OF TRANSMISSION
+    '\x05'     #  0x05 -> ENQUIRY
+    '\x06'     #  0x06 -> ACKNOWLEDGE
+    '\x07'     #  0x07 -> BELL
+    '\x08'     #  0x08 -> BACKSPACE
+    '\t'       #  0x09 -> HORIZONTAL TABULATION
+    '\n'       #  0x0A -> LINE FEED
+    '\x0b'     #  0x0B -> VERTICAL TABULATION
+    '\x0c'     #  0x0C -> FORM FEED
+    '\r'       #  0x0D -> CARRIAGE RETURN
+    '\x0e'     #  0x0E -> SHIFT OUT
+    '\x0f'     #  0x0F -> SHIFT IN
+    '\x10'     #  0x10 -> DATA LINK ESCAPE
+    '\x11'     #  0x11 -> DEVICE CONTROL ONE
+    '\x12'     #  0x12 -> DEVICE CONTROL TWO
+    '\x13'     #  0x13 -> DEVICE CONTROL THREE
+    '\x14'     #  0x14 -> DEVICE CONTROL FOUR
+    '\x15'     #  0x15 -> NEGATIVE ACKNOWLEDGE
+    '\x16'     #  0x16 -> SYNCHRONOUS IDLE
+    '\x17'     #  0x17 -> END OF TRANSMISSION BLOCK
+    '\x18'     #  0x18 -> CANCEL
+    '\x19'     #  0x19 -> END OF MEDIUM
+    '\x1a'     #  0x1A -> SUBSTITUTE
+    '\x1b'     #  0x1B -> ESCAPE
+    '\x1c'     #  0x1C -> FILE SEPARATOR
+    '\x1d'     #  0x1D -> GROUP SEPARATOR
+    '\x1e'     #  0x1E -> RECORD SEPARATOR
+    '\x1f'     #  0x1F -> UNIT SEPARATOR
+    ' '        #  0x20 -> SPACE
+    '!'        #  0x21 -> EXCLAMATION MARK
+    '"'        #  0x22 -> QUOTATION MARK
+    '#'        #  0x23 -> NUMBER SIGN
+    '$'        #  0x24 -> DOLLAR SIGN
+    '%'        #  0x25 -> PERCENT SIGN
+    '&'        #  0x26 -> AMPERSAND
+    "'"        #  0x27 -> APOSTROPHE
+    '('        #  0x28 -> LEFT PARENTHESIS
+    ')'        #  0x29 -> RIGHT PARENTHESIS
+    '*'        #  0x2A -> ASTERISK
+    '+'        #  0x2B -> PLUS SIGN
+    ','        #  0x2C -> COMMA
+    '-'        #  0x2D -> HYPHEN-MINUS
+    '.'        #  0x2E -> FULL STOP
+    '/'        #  0x2F -> SOLIDUS
+    '0'        #  0x30 -> DIGIT ZERO
+    '1'        #  0x31 -> DIGIT ONE
+    '2'        #  0x32 -> DIGIT TWO
+    '3'        #  0x33 -> DIGIT THREE
+    '4'        #  0x34 -> DIGIT FOUR
+    '5'        #  0x35 -> DIGIT FIVE
+    '6'        #  0x36 -> DIGIT SIX
+    '7'        #  0x37 -> DIGIT SEVEN
+    '8'        #  0x38 -> DIGIT EIGHT
+    '9'        #  0x39 -> DIGIT NINE
+    ':'        #  0x3A -> COLON
+    ';'        #  0x3B -> SEMICOLON
+    '<'        #  0x3C -> LESS-THAN SIGN
+    '='        #  0x3D -> EQUALS SIGN
+    '>'        #  0x3E -> GREATER-THAN SIGN
+    '?'        #  0x3F -> QUESTION MARK
+    '@'        #  0x40 -> COMMERCIAL AT
+    'A'        #  0x41 -> LATIN CAPITAL LETTER A
+    'B'        #  0x42 -> LATIN CAPITAL LETTER B
+    'C'        #  0x43 -> LATIN CAPITAL LETTER C
+    'D'        #  0x44 -> LATIN CAPITAL LETTER D
+    'E'        #  0x45 -> LATIN CAPITAL LETTER E
+    'F'        #  0x46 -> LATIN CAPITAL LETTER F
+    'G'        #  0x47 -> LATIN CAPITAL LETTER G
+    'H'        #  0x48 -> LATIN CAPITAL LETTER H
+    'I'        #  0x49 -> LATIN CAPITAL LETTER I
+    'J'        #  0x4A -> LATIN CAPITAL LETTER J
+    'K'        #  0x4B -> LATIN CAPITAL LETTER K
+    'L'        #  0x4C -> LATIN CAPITAL LETTER L
+    'M'        #  0x4D -> LATIN CAPITAL LETTER M
+    'N'        #  0x4E -> LATIN CAPITAL LETTER N
+    'O'        #  0x4F -> LATIN CAPITAL LETTER O
+    'P'        #  0x50 -> LATIN CAPITAL LETTER P
+    'Q'        #  0x51 -> LATIN CAPITAL LETTER Q
+    'R'        #  0x52 -> LATIN CAPITAL LETTER R
+    'S'        #  0x53 -> LATIN CAPITAL LETTER S
+    'T'        #  0x54 -> LATIN CAPITAL LETTER T
+    'U'        #  0x55 -> LATIN CAPITAL LETTER U
+    'V'        #  0x56 -> LATIN CAPITAL LETTER V
+    'W'        #  0x57 -> LATIN CAPITAL LETTER W
+    'X'        #  0x58 -> LATIN CAPITAL LETTER X
+    'Y'        #  0x59 -> LATIN CAPITAL LETTER Y
+    'Z'        #  0x5A -> LATIN CAPITAL LETTER Z
+    '['        #  0x5B -> LEFT SQUARE BRACKET
+    '\\'       #  0x5C -> REVERSE SOLIDUS
+    ']'        #  0x5D -> RIGHT SQUARE BRACKET
+    '^'        #  0x5E -> CIRCUMFLEX ACCENT
+    '_'        #  0x5F -> LOW LINE
+    '`'        #  0x60 -> GRAVE ACCENT
+    'a'        #  0x61 -> LATIN SMALL LETTER A
+    'b'        #  0x62 -> LATIN SMALL LETTER B
+    'c'        #  0x63 -> LATIN SMALL LETTER C
+    'd'        #  0x64 -> LATIN SMALL LETTER D
+    'e'        #  0x65 -> LATIN SMALL LETTER E
+    'f'        #  0x66 -> LATIN SMALL LETTER F
+    'g'        #  0x67 -> LATIN SMALL LETTER G
+    'h'        #  0x68 -> LATIN SMALL LETTER H
+    'i'        #  0x69 -> LATIN SMALL LETTER I
+    'j'        #  0x6A -> LATIN SMALL LETTER J
+    'k'        #  0x6B -> LATIN SMALL LETTER K
+    'l'        #  0x6C -> LATIN SMALL LETTER L
+    'm'        #  0x6D -> LATIN SMALL LETTER M
+    'n'        #  0x6E -> LATIN SMALL LETTER N
+    'o'        #  0x6F -> LATIN SMALL LETTER O
+    'p'        #  0x70 -> LATIN SMALL LETTER P
+    'q'        #  0x71 -> LATIN SMALL LETTER Q
+    'r'        #  0x72 -> LATIN SMALL LETTER R
+    's'        #  0x73 -> LATIN SMALL LETTER S
+    't'        #  0x74 -> LATIN SMALL LETTER T
+    'u'        #  0x75 -> LATIN SMALL LETTER U
+    'v'        #  0x76 -> LATIN SMALL LETTER V
+    'w'        #  0x77 -> LATIN SMALL LETTER W
+    'x'        #  0x78 -> LATIN SMALL LETTER X
+    'y'        #  0x79 -> LATIN SMALL LETTER Y
+    'z'        #  0x7A -> LATIN SMALL LETTER Z
+    '{'        #  0x7B -> LEFT CURLY BRACKET
+    '|'        #  0x7C -> VERTICAL LINE
+    '}'        #  0x7D -> RIGHT CURLY BRACKET
+    '~'        #  0x7E -> TILDE
+    '\x7f'     #  0x7F -> DELETE
+    '\x80'     #  0x80 -> <control>
+    '\x81'     #  0x81 -> <control>
+    '\x82'     #  0x82 -> <control>
+    '\x83'     #  0x83 -> <control>
+    '\x84'     #  0x84 -> <control>
+    '\x85'     #  0x85 -> <control>
+    '\x86'     #  0x86 -> <control>
+    '\x87'     #  0x87 -> <control>
+    '\x88'     #  0x88 -> <control>
+    '\x89'     #  0x89 -> <control>
+    '\x8a'     #  0x8A -> <control>
+    '\x8b'     #  0x8B -> <control>
+    '\x8c'     #  0x8C -> <control>
+    '\x8d'     #  0x8D -> <control>
+    '\x8e'     #  0x8E -> <control>
+    '\x8f'     #  0x8F -> <control>
+    '\x90'     #  0x90 -> <control>
+    '\x91'     #  0x91 -> <control>
+    '\x92'     #  0x92 -> <control>
+    '\x93'     #  0x93 -> <control>
+    '\x94'     #  0x94 -> <control>
+    '\x95'     #  0x95 -> <control>
+    '\x96'     #  0x96 -> <control>
+    '\x97'     #  0x97 -> <control>
+    '\x98'     #  0x98 -> <control>
+    '\x99'     #  0x99 -> <control>
+    '\x9a'     #  0x9A -> <control>
+    '\x9b'     #  0x9B -> <control>
+    '\x9c'     #  0x9C -> <control>
+    '\x9d'     #  0x9D -> <control>
+    '\x9e'     #  0x9E -> <control>
+    '\x9f'     #  0x9F -> <control>
+    '\ufffe'
+    '\u0e01'   #  0xA1 -> THAI CHARACTER KO KAI
+    '\u0e02'   #  0xA2 -> THAI CHARACTER KHO KHAI
+    '\u0e03'   #  0xA3 -> THAI CHARACTER KHO KHUAT
+    '\u0e04'   #  0xA4 -> THAI CHARACTER KHO KHWAI
+    '\u0e05'   #  0xA5 -> THAI CHARACTER KHO KHON
+    '\u0e06'   #  0xA6 -> THAI CHARACTER KHO RAKHANG
+    '\u0e07'   #  0xA7 -> THAI CHARACTER NGO NGU
+    '\u0e08'   #  0xA8 -> THAI CHARACTER CHO CHAN
+    '\u0e09'   #  0xA9 -> THAI CHARACTER CHO CHING
+    '\u0e0a'   #  0xAA -> THAI CHARACTER CHO CHANG
+    '\u0e0b'   #  0xAB -> THAI CHARACTER SO SO
+    '\u0e0c'   #  0xAC -> THAI CHARACTER CHO CHOE
+    '\u0e0d'   #  0xAD -> THAI CHARACTER YO YING
+    '\u0e0e'   #  0xAE -> THAI CHARACTER DO CHADA
+    '\u0e0f'   #  0xAF -> THAI CHARACTER TO PATAK
+    '\u0e10'   #  0xB0 -> THAI CHARACTER THO THAN
+    '\u0e11'   #  0xB1 -> THAI CHARACTER THO NANGMONTHO
+    '\u0e12'   #  0xB2 -> THAI CHARACTER THO PHUTHAO
+    '\u0e13'   #  0xB3 -> THAI CHARACTER NO NEN
+    '\u0e14'   #  0xB4 -> THAI CHARACTER DO DEK
+    '\u0e15'   #  0xB5 -> THAI CHARACTER TO TAO
+    '\u0e16'   #  0xB6 -> THAI CHARACTER THO THUNG
+    '\u0e17'   #  0xB7 -> THAI CHARACTER THO THAHAN
+    '\u0e18'   #  0xB8 -> THAI CHARACTER THO THONG
+    '\u0e19'   #  0xB9 -> THAI CHARACTER NO NU
+    '\u0e1a'   #  0xBA -> THAI CHARACTER BO BAIMAI
+    '\u0e1b'   #  0xBB -> THAI CHARACTER PO PLA
+    '\u0e1c'   #  0xBC -> THAI CHARACTER PHO PHUNG
+    '\u0e1d'   #  0xBD -> THAI CHARACTER FO FA
+    '\u0e1e'   #  0xBE -> THAI CHARACTER PHO PHAN
+    '\u0e1f'   #  0xBF -> THAI CHARACTER FO FAN
+    '\u0e20'   #  0xC0 -> THAI CHARACTER PHO SAMPHAO
+    '\u0e21'   #  0xC1 -> THAI CHARACTER MO MA
+    '\u0e22'   #  0xC2 -> THAI CHARACTER YO YAK
+    '\u0e23'   #  0xC3 -> THAI CHARACTER RO RUA
+    '\u0e24'   #  0xC4 -> THAI CHARACTER RU
+    '\u0e25'   #  0xC5 -> THAI CHARACTER LO LING
+    '\u0e26'   #  0xC6 -> THAI CHARACTER LU
+    '\u0e27'   #  0xC7 -> THAI CHARACTER WO WAEN
+    '\u0e28'   #  0xC8 -> THAI CHARACTER SO SALA
+    '\u0e29'   #  0xC9 -> THAI CHARACTER SO RUSI
+    '\u0e2a'   #  0xCA -> THAI CHARACTER SO SUA
+    '\u0e2b'   #  0xCB -> THAI CHARACTER HO HIP
+    '\u0e2c'   #  0xCC -> THAI CHARACTER LO CHULA
+    '\u0e2d'   #  0xCD -> THAI CHARACTER O ANG
+    '\u0e2e'   #  0xCE -> THAI CHARACTER HO NOKHUK
+    '\u0e2f'   #  0xCF -> THAI CHARACTER PAIYANNOI
+    '\u0e30'   #  0xD0 -> THAI CHARACTER SARA A
+    '\u0e31'   #  0xD1 -> THAI CHARACTER MAI HAN-AKAT
+    '\u0e32'   #  0xD2 -> THAI CHARACTER SARA AA
+    '\u0e33'   #  0xD3 -> THAI CHARACTER SARA AM
+    '\u0e34'   #  0xD4 -> THAI CHARACTER SARA I
+    '\u0e35'   #  0xD5 -> THAI CHARACTER SARA II
+    '\u0e36'   #  0xD6 -> THAI CHARACTER SARA UE
+    '\u0e37'   #  0xD7 -> THAI CHARACTER SARA UEE
+    '\u0e38'   #  0xD8 -> THAI CHARACTER SARA U
+    '\u0e39'   #  0xD9 -> THAI CHARACTER SARA UU
+    '\u0e3a'   #  0xDA -> THAI CHARACTER PHINTHU
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\u0e3f'   #  0xDF -> THAI CURRENCY SYMBOL BAHT
+    '\u0e40'   #  0xE0 -> THAI CHARACTER SARA E
+    '\u0e41'   #  0xE1 -> THAI CHARACTER SARA AE
+    '\u0e42'   #  0xE2 -> THAI CHARACTER SARA O
+    '\u0e43'   #  0xE3 -> THAI CHARACTER SARA AI MAIMUAN
+    '\u0e44'   #  0xE4 -> THAI CHARACTER SARA AI MAIMALAI
+    '\u0e45'   #  0xE5 -> THAI CHARACTER LAKKHANGYAO
+    '\u0e46'   #  0xE6 -> THAI CHARACTER MAIYAMOK
+    '\u0e47'   #  0xE7 -> THAI CHARACTER MAITAIKHU
+    '\u0e48'   #  0xE8 -> THAI CHARACTER MAI EK
+    '\u0e49'   #  0xE9 -> THAI CHARACTER MAI THO
+    '\u0e4a'   #  0xEA -> THAI CHARACTER MAI TRI
+    '\u0e4b'   #  0xEB -> THAI CHARACTER MAI CHATTAWA
+    '\u0e4c'   #  0xEC -> THAI CHARACTER THANTHAKHAT
+    '\u0e4d'   #  0xED -> THAI CHARACTER NIKHAHIT
+    '\u0e4e'   #  0xEE -> THAI CHARACTER YAMAKKAN
+    '\u0e4f'   #  0xEF -> THAI CHARACTER FONGMAN
+    '\u0e50'   #  0xF0 -> THAI DIGIT ZERO
+    '\u0e51'   #  0xF1 -> THAI DIGIT ONE
+    '\u0e52'   #  0xF2 -> THAI DIGIT TWO
+    '\u0e53'   #  0xF3 -> THAI DIGIT THREE
+    '\u0e54'   #  0xF4 -> THAI DIGIT FOUR
+    '\u0e55'   #  0xF5 -> THAI DIGIT FIVE
+    '\u0e56'   #  0xF6 -> THAI DIGIT SIX
+    '\u0e57'   #  0xF7 -> THAI DIGIT SEVEN
+    '\u0e58'   #  0xF8 -> THAI DIGIT EIGHT
+    '\u0e59'   #  0xF9 -> THAI DIGIT NINE
+    '\u0e5a'   #  0xFA -> THAI CHARACTER ANGKHANKHU
+    '\u0e5b'   #  0xFB -> THAI CHARACTER KHOMUT
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
+    '\ufffe'
 )
 
 ### Encoding table

Modified: python/branches/py3k-struni/Lib/encodings/utf_8_sig.py
==============================================================================
--- python/branches/py3k-struni/Lib/encodings/utf_8_sig.py	(original)
+++ python/branches/py3k-struni/Lib/encodings/utf_8_sig.py	Wed May  2 21:09:54 2007
@@ -57,7 +57,7 @@
                 if codecs.BOM_UTF8.startswith(input):
                     # not enough data to decide if this really is a BOM
                     # => try again on the next call
-                    return (u"", 0)
+                    return ("", 0)
                 else:
                     self.first = 0
             else:
@@ -106,7 +106,7 @@
         if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
             # not enough data to decide if this is a BOM
             # => try again on the next call
-            return (u"", 0)
+            return ("", 0)
         self.decode = codecs.utf_8_decode
         return decode(input, errors)
 

Modified: python/branches/py3k-struni/Lib/gettext.py
==============================================================================
--- python/branches/py3k-struni/Lib/gettext.py	(original)
+++ python/branches/py3k-struni/Lib/gettext.py	Wed May  2 21:09:54 2007
@@ -217,15 +217,15 @@
     def ugettext(self, message):
         if self._fallback:
             return self._fallback.ugettext(message)
-        return unicode(message)
+        return str(message)
 
     def ungettext(self, msgid1, msgid2, n):
         if self._fallback:
             return self._fallback.ungettext(msgid1, msgid2, n)
         if n == 1:
-            return unicode(msgid1)
+            return str(msgid1)
         else:
-            return unicode(msgid2)
+            return str(msgid2)
 
     def info(self):
         return self._info
@@ -239,14 +239,14 @@
     def set_output_charset(self, charset):
         self._output_charset = charset
 
-    def install(self, unicode=False, names=None):
+    def install(self, str=False, names=None):
         import __builtin__
-        __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
+        __builtin__.__dict__['_'] = str and self.ugettext or self.gettext
         if hasattr(names, "__contains__"):
             if "gettext" in names:
                 __builtin__.__dict__['gettext'] = __builtin__.__dict__['_']
             if "ngettext" in names:
-                __builtin__.__dict__['ngettext'] = (unicode and self.ungettext
+                __builtin__.__dict__['ngettext'] = (str and self.ungettext
                                                              or self.ngettext)
             if "lgettext" in names:
                 __builtin__.__dict__['lgettext'] = self.lgettext
@@ -327,14 +327,14 @@
                 msgid1, msgid2 = msg.split('\x00')
                 tmsg = tmsg.split('\x00')
                 if self._charset:
-                    msgid1 = unicode(msgid1, self._charset)
-                    tmsg = [unicode(x, self._charset) for x in tmsg]
+                    msgid1 = str(msgid1, self._charset)
+                    tmsg = [str(x, self._charset) for x in tmsg]
                 for i in range(len(tmsg)):
                     catalog[(msgid1, i)] = tmsg[i]
             else:
                 if self._charset:
-                    msg = unicode(msg, self._charset)
-                    tmsg = unicode(tmsg, self._charset)
+                    msg = str(msg, self._charset)
+                    tmsg = str(tmsg, self._charset)
                 catalog[msg] = tmsg
             # advance to next entry in the seek tables
             masteridx += 8
@@ -401,7 +401,7 @@
         if tmsg is missing:
             if self._fallback:
                 return self._fallback.ugettext(message)
-            return unicode(message)
+            return str(message)
         return tmsg
 
     def ungettext(self, msgid1, msgid2, n):
@@ -411,9 +411,9 @@
             if self._fallback:
                 return self._fallback.ungettext(msgid1, msgid2, n)
             if n == 1:
-                tmsg = unicode(msgid1)
+                tmsg = str(msgid1)
             else:
-                tmsg = unicode(msgid2)
+                tmsg = str(msgid2)
         return tmsg
 
 
@@ -489,9 +489,9 @@
     return result
 
 
-def install(domain, localedir=None, unicode=False, codeset=None, names=None):
+def install(domain, localedir=None, str=False, codeset=None, names=None):
     t = translation(domain, localedir, fallback=True, codeset=codeset)
-    t.install(unicode, names)
+    t.install(str, names)
 
 
 

Modified: python/branches/py3k-struni/Lib/glob.py
==============================================================================
--- python/branches/py3k-struni/Lib/glob.py	(original)
+++ python/branches/py3k-struni/Lib/glob.py	Wed May  2 21:09:54 2007
@@ -49,8 +49,8 @@
 def glob1(dirname, pattern):
     if not dirname:
         dirname = os.curdir
-    if isinstance(pattern, unicode) and not isinstance(dirname, unicode):
-        dirname = unicode(dirname, sys.getfilesystemencoding() or
+    if isinstance(pattern, str) and not isinstance(dirname, str):
+        dirname = str(dirname, sys.getfilesystemencoding() or
                                    sys.getdefaultencoding())
     try:
         names = os.listdir(dirname)

Modified: python/branches/py3k-struni/Lib/idlelib/EditorWindow.py
==============================================================================
--- python/branches/py3k-struni/Lib/idlelib/EditorWindow.py	(original)
+++ python/branches/py3k-struni/Lib/idlelib/EditorWindow.py	Wed May  2 21:09:54 2007
@@ -276,7 +276,7 @@
 
     def _filename_to_unicode(self, filename):
         """convert filename to unicode in order to display it in Tk"""
-        if isinstance(filename, unicode) or not filename:
+        if isinstance(filename, str) or not filename:
             return filename
         else:
             try:

Modified: python/branches/py3k-struni/Lib/idlelib/IOBinding.py
==============================================================================
--- python/branches/py3k-struni/Lib/idlelib/IOBinding.py	(original)
+++ python/branches/py3k-struni/Lib/idlelib/IOBinding.py	Wed May  2 21:09:54 2007
@@ -255,7 +255,7 @@
         firsteol = self.eol_re.search(chars)
         if firsteol:
             self.eol_convention = firsteol.group(0)
-            if isinstance(self.eol_convention, unicode):
+            if isinstance(self.eol_convention, str):
                 # Make sure it is an ASCII string
                 self.eol_convention = self.eol_convention.encode("ascii")
             chars = self.eol_re.sub(r"\n", chars)
@@ -298,18 +298,18 @@
             enc = None
         if enc:
             try:
-                return unicode(chars, enc)
+                return str(chars, enc)
             except UnicodeError:
                 pass
         # If it is ASCII, we need not to record anything
         try:
-            return unicode(chars, 'ascii')
+            return str(chars, 'ascii')
         except UnicodeError:
             pass
         # Finally, try the locale's encoding. This is deprecated;
         # the user should declare a non-ASCII encoding
         try:
-            chars = unicode(chars, encoding)
+            chars = str(chars, encoding)
             self.fileencoding = encoding
         except UnicodeError:
             pass
@@ -522,7 +522,7 @@
             self.opendialog = tkFileDialog.Open(master=self.text,
                                                 filetypes=self.filetypes)
         filename = self.opendialog.show(initialdir=dir, initialfile=base)
-        if isinstance(filename, unicode):
+        if isinstance(filename, str):
             filename = filename.encode(filesystemencoding)
         return filename
 
@@ -544,7 +544,7 @@
             self.savedialog = tkFileDialog.SaveAs(master=self.text,
                                                   filetypes=self.filetypes)
         filename = self.savedialog.show(initialdir=dir, initialfile=base)
-        if isinstance(filename, unicode):
+        if isinstance(filename, str):
             filename = filename.encode(filesystemencoding)
         return filename
 

Modified: python/branches/py3k-struni/Lib/idlelib/OutputWindow.py
==============================================================================
--- python/branches/py3k-struni/Lib/idlelib/OutputWindow.py	(original)
+++ python/branches/py3k-struni/Lib/idlelib/OutputWindow.py	Wed May  2 21:09:54 2007
@@ -39,7 +39,7 @@
         # we assume that they are in the locale's encoding
         if isinstance(s, str):
             try:
-                s = unicode(s, IOBinding.encoding)
+                s = str(s, IOBinding.encoding)
             except UnicodeError:
                 # some other encoding; let Tcl deal with it
                 pass

Modified: python/branches/py3k-struni/Lib/idlelib/PyParse.py
==============================================================================
--- python/branches/py3k-struni/Lib/idlelib/PyParse.py	(original)
+++ python/branches/py3k-struni/Lib/idlelib/PyParse.py	Wed May  2 21:09:54 2007
@@ -105,7 +105,7 @@
 del ch
 
 try:
-    UnicodeType = type(unicode(""))
+    UnicodeType = type(str(""))
 except NameError:
     UnicodeType = None
 

Modified: python/branches/py3k-struni/Lib/idlelib/PyShell.py
==============================================================================
--- python/branches/py3k-struni/Lib/idlelib/PyShell.py	(original)
+++ python/branches/py3k-struni/Lib/idlelib/PyShell.py	Wed May  2 21:09:54 2007
@@ -1008,7 +1008,7 @@
         line = self.text.get("iomark", "end-1c")
         if len(line) == 0:  # may be EOF if we quit our mainloop with Ctrl-C
             line = "\n"
-        if isinstance(line, unicode):
+        if isinstance(line, str):
             import IOBinding
             try:
                 line = line.encode(IOBinding.encoding)

Modified: python/branches/py3k-struni/Lib/lib-tk/Tkinter.py
==============================================================================
--- python/branches/py3k-struni/Lib/lib-tk/Tkinter.py	(original)
+++ python/branches/py3k-struni/Lib/lib-tk/Tkinter.py	Wed May  2 21:09:54 2007
@@ -3736,7 +3736,7 @@
     text = "This is Tcl/Tk version %s" % TclVersion
     if TclVersion >= 8.1:
         try:
-            text = text + unicode("\nThis should be a cedilla: \347",
+            text = text + str("\nThis should be a cedilla: \347",
                                   "iso-8859-1")
         except NameError:
             pass # no unicode support

Modified: python/branches/py3k-struni/Lib/msilib/schema.py
==============================================================================
--- python/branches/py3k-struni/Lib/msilib/schema.py	(original)
+++ python/branches/py3k-struni/Lib/msilib/schema.py	Wed May  2 21:09:54 2007
@@ -580,428 +580,428 @@
 tables=[_Validation, ActionText, AdminExecuteSequence, Condition, AdminUISequence, AdvtExecuteSequence, AdvtUISequence, AppId, AppSearch, Property, BBControl, Billboard, Feature, Binary, BindImage, File, CCPSearch, CheckBox, Class, Component, Icon, ProgId, ComboBox, CompLocator, Complus, Directory, Control, Dialog, ControlCondition, ControlEvent, CreateFolder, CustomAction, DrLocator, DuplicateFile, Environment, Error, EventMapping, Extension, MIME, FeatureComponents, FileSFPCatalog, SFPCatalog, Font, IniFile, IniLocator, InstallExecuteSequence, InstallUISequence, IsolatedComponent, LaunchCondition, ListBox, ListView, LockPermissions, Media, MoveFile, MsiAssembly, MsiAssemblyName, MsiDigitalCertificate, MsiDigitalSignature, MsiFileHash, MsiPatchHeaders, ODBCAttribute, ODBCDriver, ODBCDataSource, ODBCSourceAttribute, ODBCTranslator, Patch, PatchPackage, PublishComponent, RadioButton, Registry, RegLocator, RemoveFile, RemoveIniFile, RemoveRegistry, ReserveCost, SelfReg, ServiceControl, ServiceInstall, Shortcut, Signature, TextStyle, TypeLib, UIText, Upgrade, Verb]
 
 _Validation_records = [
-(u'_Validation',u'Table',u'N',None, None, None, None, u'Identifier',None, u'Name of table',),
-(u'_Validation',u'Column',u'N',None, None, None, None, u'Identifier',None, u'Name of column',),
-(u'_Validation',u'Description',u'Y',None, None, None, None, u'Text',None, u'Description of column',),
-(u'_Validation',u'Set',u'Y',None, None, None, None, u'Text',None, u'Set of values that are permitted',),
-(u'_Validation',u'Category',u'Y',None, None, None, None, None, u'Text;Formatted;Template;Condition;Guid;Path;Version;Language;Identifier;Binary;UpperCase;LowerCase;Filename;Paths;AnyPath;WildCardFilename;RegPath;KeyFormatted;CustomSource;Property;Cabinet;Shortcut;URL',u'String category',),
-(u'_Validation',u'KeyColumn',u'Y',1,32,None, None, None, None, u'Column to which foreign key connects',),
-(u'_Validation',u'KeyTable',u'Y',None, None, None, None, u'Identifier',None, u'For foreign key, Name of table to which data must link',),
-(u'_Validation',u'MaxValue',u'Y',-2147483647,2147483647,None, None, None, None, u'Maximum value allowed',),
-(u'_Validation',u'MinValue',u'Y',-2147483647,2147483647,None, None, None, None, u'Minimum value allowed',),
-(u'_Validation',u'Nullable',u'N',None, None, None, None, None, u'Y;N;@',u'Whether the column is nullable',),
-(u'ActionText',u'Description',u'Y',None, None, None, None, u'Text',None, u'Localized description displayed in progress dialog and log when action is executing.',),
-(u'ActionText',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Name of action to be described.',),
-(u'ActionText',u'Template',u'Y',None, None, None, None, u'Template',None, u'Optional localized format template used to format action data records for display during action execution.',),
-(u'AdminExecuteSequence',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Name of action to invoke, either in the engine or the handler DLL.',),
-(u'AdminExecuteSequence',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
-(u'AdminExecuteSequence',u'Sequence',u'Y',-4,32767,None, None, None, None, u'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
-(u'Condition',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'Expression evaluated to determine if Level in the Feature table is to change.',),
-(u'Condition',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'Reference to a Feature entry in Feature table.',),
-(u'Condition',u'Level',u'N',0,32767,None, None, None, None, u'New selection Level to set in Feature table if Condition evaluates to TRUE.',),
-(u'AdminUISequence',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Name of action to invoke, either in the engine or the handler DLL.',),
-(u'AdminUISequence',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
-(u'AdminUISequence',u'Sequence',u'Y',-4,32767,None, None, None, None, u'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
-(u'AdvtExecuteSequence',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Name of action to invoke, either in the engine or the handler DLL.',),
-(u'AdvtExecuteSequence',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
-(u'AdvtExecuteSequence',u'Sequence',u'Y',-4,32767,None, None, None, None, u'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
-(u'AdvtUISequence',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Name of action to invoke, either in the engine or the handler DLL.',),
-(u'AdvtUISequence',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
-(u'AdvtUISequence',u'Sequence',u'Y',-4,32767,None, None, None, None, u'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
-(u'AppId',u'AppId',u'N',None, None, None, None, u'Guid',None, None, ),
-(u'AppId',u'ActivateAtStorage',u'Y',0,1,None, None, None, None, None, ),
-(u'AppId',u'DllSurrogate',u'Y',None, None, None, None, u'Text',None, None, ),
-(u'AppId',u'LocalService',u'Y',None, None, None, None, u'Text',None, None, ),
-(u'AppId',u'RemoteServerName',u'Y',None, None, None, None, u'Formatted',None, None, ),
-(u'AppId',u'RunAsInteractiveUser',u'Y',0,1,None, None, None, None, None, ),
-(u'AppId',u'ServiceParameters',u'Y',None, None, None, None, u'Text',None, None, ),
-(u'AppSearch',u'Property',u'N',None, None, None, None, u'Identifier',None, u'The property associated with a Signature',),
-(u'AppSearch',u'Signature_',u'N',None, None, u'Signature;RegLocator;IniLocator;DrLocator;CompLocator',1,u'Identifier',None, u'The Signature_ represents a unique file signature and is also the foreign key in the Signature,  RegLocator, IniLocator, CompLocator and the DrLocator tables.',),
-(u'Property',u'Property',u'N',None, None, None, None, u'Identifier',None, u'Name of property, uppercase if settable by launcher or loader.',),
-(u'Property',u'Value',u'N',None, None, None, None, u'Text',None, u'String value for property.  Never null or empty.',),
-(u'BBControl',u'Type',u'N',None, None, None, None, u'Identifier',None, u'The type of the control.',),
-(u'BBControl',u'Y',u'N',0,32767,None, None, None, None, u'Vertical coordinate of the upper left corner of the bounding rectangle of the control.',),
-(u'BBControl',u'Text',u'Y',None, None, None, None, u'Text',None, u'A string used to set the initial text contained within a control (if appropriate).',),
-(u'BBControl',u'BBControl',u'N',None, None, None, None, u'Identifier',None, u'Name of the control. This name must be unique within a billboard, but can repeat on different billboard.',),
-(u'BBControl',u'Attributes',u'Y',0,2147483647,None, None, None, None, u'A 32-bit word that specifies the attribute flags to be applied to this control.',),
-(u'BBControl',u'Billboard_',u'N',None, None, u'Billboard',1,u'Identifier',None, u'External key to the Billboard table, name of the billboard.',),
-(u'BBControl',u'Height',u'N',0,32767,None, None, None, None, u'Height of the bounding rectangle of the control.',),
-(u'BBControl',u'Width',u'N',0,32767,None, None, None, None, u'Width of the bounding rectangle of the control.',),
-(u'BBControl',u'X',u'N',0,32767,None, None, None, None, u'Horizontal coordinate of the upper left corner of the bounding rectangle of the control.',),
-(u'Billboard',u'Action',u'Y',None, None, None, None, u'Identifier',None, u'The name of an action. The billboard is displayed during the progress messages received from this action.',),
-(u'Billboard',u'Billboard',u'N',None, None, None, None, u'Identifier',None, u'Name of the billboard.',),
-(u'Billboard',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'An external key to the Feature Table. The billboard is shown only if this feature is being installed.',),
-(u'Billboard',u'Ordering',u'Y',0,32767,None, None, None, None, u'A positive integer. If there is more than one billboard corresponding to an action they will be shown in the order defined by this column.',),
-(u'Feature',u'Description',u'Y',None, None, None, None, u'Text',None, u'Longer descriptive text describing a visible feature item.',),
-(u'Feature',u'Attributes',u'N',None, None, None, None, None, u'0;1;2;4;5;6;8;9;10;16;17;18;20;21;22;24;25;26;32;33;34;36;37;38;48;49;50;52;53;54',u'Feature attributes',),
-(u'Feature',u'Feature',u'N',None, None, None, None, u'Identifier',None, u'Primary key used to identify a particular feature record.',),
-(u'Feature',u'Directory_',u'Y',None, None, u'Directory',1,u'UpperCase',None, u'The name of the Directory that can be configured by the UI. A non-null value will enable the browse button.',),
-(u'Feature',u'Level',u'N',0,32767,None, None, None, None, u'The install level at which record will be initially selected. An install level of 0 will disable an item and prevent its display.',),
-(u'Feature',u'Title',u'Y',None, None, None, None, u'Text',None, u'Short text identifying a visible feature item.',),
-(u'Feature',u'Display',u'Y',0,32767,None, None, None, None, u'Numeric sort order, used to force a specific display ordering.',),
-(u'Feature',u'Feature_Parent',u'Y',None, None, u'Feature',1,u'Identifier',None, u'Optional key of a parent record in the same table. If the parent is not selected, then the record will not be installed. Null indicates a root item.',),
-(u'Binary',u'Name',u'N',None, None, None, None, u'Identifier',None, u'Unique key identifying the binary data.',),
-(u'Binary',u'Data',u'N',None, None, None, None, u'Binary',None, u'The unformatted binary data.',),
-(u'BindImage',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'The index into the File table. This must be an executable file.',),
-(u'BindImage',u'Path',u'Y',None, None, None, None, u'Paths',None, u'A list of ;  delimited paths that represent the paths to be searched for the import DLLS. The list is usually a list of properties each enclosed within square brackets [] .',),
-(u'File',u'Sequence',u'N',1,32767,None, None, None, None, u'Sequence with respect to the media images; order must track cabinet order.',),
-(u'File',u'Attributes',u'Y',0,32767,None, None, None, None, u'Integer containing bit flags representing file attributes (with the decimal value of each bit position in parentheses)',),
-(u'File',u'File',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token, must match identifier in cabinet.  For uncompressed files, this field is ignored.',),
-(u'File',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key referencing Component that controls the file.',),
-(u'File',u'FileName',u'N',None, None, None, None, u'Filename',None, u'File name used for installation, may be localized.  This may contain a "short name|long name" pair.',),
-(u'File',u'FileSize',u'N',0,2147483647,None, None, None, None, u'Size of file in bytes (long integer).',),
-(u'File',u'Language',u'Y',None, None, None, None, u'Language',None, u'List of decimal language Ids, comma-separated if more than one.',),
-(u'File',u'Version',u'Y',None, None, u'File',1,u'Version',None, u'Version string for versioned files;  Blank for unversioned files.',),
-(u'CCPSearch',u'Signature_',u'N',None, None, u'Signature;RegLocator;IniLocator;DrLocator;CompLocator',1,u'Identifier',None, u'The Signature_ represents a unique file signature and is also the foreign key in the Signature,  RegLocator, IniLocator, CompLocator and the DrLocator tables.',),
-(u'CheckBox',u'Property',u'N',None, None, None, None, u'Identifier',None, u'A named property to be tied to the item.',),
-(u'CheckBox',u'Value',u'Y',None, None, None, None, u'Formatted',None, u'The value string associated with the item.',),
-(u'Class',u'Description',u'Y',None, None, None, None, u'Text',None, u'Localized description for the Class.',),
-(u'Class',u'Attributes',u'Y',None, 32767,None, None, None, None, u'Class registration attributes.',),
-(u'Class',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the CLSID factory to be operational.',),
-(u'Class',u'AppId_',u'Y',None, None, u'AppId',1,u'Guid',None, u'Optional AppID containing DCOM information for associated application (string GUID).',),
-(u'Class',u'Argument',u'Y',None, None, None, None, u'Formatted',None, u'optional argument for LocalServers.',),
-(u'Class',u'CLSID',u'N',None, None, None, None, u'Guid',None, u'The CLSID of an OLE factory.',),
-(u'Class',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',),
-(u'Class',u'Context',u'N',None, None, None, None, u'Identifier',None, u'The numeric server context for this server. CLSCTX_xxxx',),
-(u'Class',u'DefInprocHandler',u'Y',None, None, None, None, u'Filename',u'1;2;3',u'Optional default inproc handler.  Only optionally provided if Context=CLSCTX_LOCAL_SERVER.  Typically "ole32.dll" or "mapi32.dll"',),
-(u'Class',u'FileTypeMask',u'Y',None, None, None, None, u'Text',None, u'Optional string containing information for the HKCRthis CLSID) key. If multiple patterns exist, they must be delimited by a semicolon, and numeric subkeys will be generated: 0,1,2...',),
-(u'Class',u'Icon_',u'Y',None, None, u'Icon',1,u'Identifier',None, u'Optional foreign key into the Icon Table, specifying the icon file associated with this CLSID. Will be written under the DefaultIcon key.',),
-(u'Class',u'IconIndex',u'Y',-32767,32767,None, None, None, None, u'Optional icon index.',),
-(u'Class',u'ProgId_Default',u'Y',None, None, u'ProgId',1,u'Text',None, u'Optional ProgId associated with this CLSID.',),
-(u'Component',u'Condition',u'Y',None, None, None, None, u'Condition',None, u"A conditional statement that will disable this component if the specified condition evaluates to the 'True' state. If a component is disabled, it will not be installed, regardless of the 'Action' state associated with the component.",),
-(u'Component',u'Attributes',u'N',None, None, None, None, None, None, u'Remote execution option, one of irsEnum',),
-(u'Component',u'Component',u'N',None, None, None, None, u'Identifier',None, u'Primary key used to identify a particular component record.',),
-(u'Component',u'ComponentId',u'Y',None, None, None, None, u'Guid',None, u'A string GUID unique to this component, version, and language.',),
-(u'Component',u'Directory_',u'N',None, None, u'Directory',1,u'Identifier',None, u'Required key of a Directory table record. This is actually a property name whose value contains the actual path, set either by the AppSearch action or with the default setting obtained from the Directory table.',),
-(u'Component',u'KeyPath',u'Y',None, None, u'File;Registry;ODBCDataSource',1,u'Identifier',None, u'Either the primary key into the File table, Registry table, or ODBCDataSource table. This extract path is stored when the component is installed, and is used to detect the presence of the component and to return the path to it.',),
-(u'Icon',u'Name',u'N',None, None, None, None, u'Identifier',None, u'Primary key. Name of the icon file.',),
-(u'Icon',u'Data',u'N',None, None, None, None, u'Binary',None, u'Binary stream. The binary icon data in PE (.DLL or .EXE) or icon (.ICO) format.',),
-(u'ProgId',u'Description',u'Y',None, None, None, None, u'Text',None, u'Localized description for the Program identifier.',),
-(u'ProgId',u'Icon_',u'Y',None, None, u'Icon',1,u'Identifier',None, u'Optional foreign key into the Icon Table, specifying the icon file associated with this ProgId. Will be written under the DefaultIcon key.',),
-(u'ProgId',u'IconIndex',u'Y',-32767,32767,None, None, None, None, u'Optional icon index.',),
-(u'ProgId',u'ProgId',u'N',None, None, None, None, u'Text',None, u'The Program Identifier. Primary key.',),
-(u'ProgId',u'Class_',u'Y',None, None, u'Class',1,u'Guid',None, u'The CLSID of an OLE factory corresponding to the ProgId.',),
-(u'ProgId',u'ProgId_Parent',u'Y',None, None, u'ProgId',1,u'Text',None, u'The Parent Program Identifier. If specified, the ProgId column becomes a version independent prog id.',),
-(u'ComboBox',u'Text',u'Y',None, None, None, None, u'Formatted',None, u'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',),
-(u'ComboBox',u'Property',u'N',None, None, None, None, u'Identifier',None, u'A named property to be tied to this item. All the items tied to the same property become part of the same combobox.',),
-(u'ComboBox',u'Value',u'N',None, None, None, None, u'Formatted',None, u'The value string associated with this item. Selecting the line will set the associated property to this value.',),
-(u'ComboBox',u'Order',u'N',1,32767,None, None, None, None, u'A positive integer used to determine the ordering of the items within one list.\tThe integers do not have to be consecutive.',),
-(u'CompLocator',u'Type',u'Y',0,1,None, None, None, None, u'A boolean value that determines if the registry value is a filename or a directory location.',),
-(u'CompLocator',u'Signature_',u'N',None, None, None, None, u'Identifier',None, u'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',),
-(u'CompLocator',u'ComponentId',u'N',None, None, None, None, u'Guid',None, u'A string GUID unique to this component, version, and language.',),
-(u'Complus',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key referencing Component that controls the ComPlus component.',),
-(u'Complus',u'ExpType',u'Y',0,32767,None, None, None, None, u'ComPlus component attributes.',),
-(u'Directory',u'Directory',u'N',None, None, None, None, u'Identifier',None, u'Unique identifier for directory entry, primary key. If a property by this name is defined, it contains the full path to the directory.',),
-(u'Directory',u'DefaultDir',u'N',None, None, None, None, u'DefaultDir',None, u"The default sub-path under parent's path.",),
-(u'Directory',u'Directory_Parent',u'Y',None, None, u'Directory',1,u'Identifier',None, u'Reference to the entry in this table specifying the default parent directory. A record parented to itself or with a Null parent represents a root of the install tree.',),
-(u'Control',u'Type',u'N',None, None, None, None, u'Identifier',None, u'The type of the control.',),
-(u'Control',u'Y',u'N',0,32767,None, None, None, None, u'Vertical coordinate of the upper left corner of the bounding rectangle of the control.',),
-(u'Control',u'Text',u'Y',None, None, None, None, u'Formatted',None, u'A string used to set the initial text contained within a control (if appropriate).',),
-(u'Control',u'Property',u'Y',None, None, None, None, u'Identifier',None, u'The name of a defined property to be linked to this control. ',),
-(u'Control',u'Attributes',u'Y',0,2147483647,None, None, None, None, u'A 32-bit word that specifies the attribute flags to be applied to this control.',),
-(u'Control',u'Height',u'N',0,32767,None, None, None, None, u'Height of the bounding rectangle of the control.',),
-(u'Control',u'Width',u'N',0,32767,None, None, None, None, u'Width of the bounding rectangle of the control.',),
-(u'Control',u'X',u'N',0,32767,None, None, None, None, u'Horizontal coordinate of the upper left corner of the bounding rectangle of the control.',),
-(u'Control',u'Control',u'N',None, None, None, None, u'Identifier',None, u'Name of the control. This name must be unique within a dialog, but can repeat on different dialogs. ',),
-(u'Control',u'Control_Next',u'Y',None, None, u'Control',2,u'Identifier',None, u'The name of an other control on the same dialog. This link defines the tab order of the controls. The links have to form one or more cycles!',),
-(u'Control',u'Dialog_',u'N',None, None, u'Dialog',1,u'Identifier',None, u'External key to the Dialog table, name of the dialog.',),
-(u'Control',u'Help',u'Y',None, None, None, None, u'Text',None, u'The help strings used with the button. The text is optional. ',),
-(u'Dialog',u'Attributes',u'Y',0,2147483647,None, None, None, None, u'A 32-bit word that specifies the attribute flags to be applied to this dialog.',),
-(u'Dialog',u'Height',u'N',0,32767,None, None, None, None, u'Height of the bounding rectangle of the dialog.',),
-(u'Dialog',u'Width',u'N',0,32767,None, None, None, None, u'Width of the bounding rectangle of the dialog.',),
-(u'Dialog',u'Dialog',u'N',None, None, None, None, u'Identifier',None, u'Name of the dialog.',),
-(u'Dialog',u'Control_Cancel',u'Y',None, None, u'Control',2,u'Identifier',None, u'Defines the cancel control. Hitting escape or clicking on the close icon on the dialog is equivalent to pushing this button.',),
-(u'Dialog',u'Control_Default',u'Y',None, None, u'Control',2,u'Identifier',None, u'Defines the default control. Hitting return is equivalent to pushing this button.',),
-(u'Dialog',u'Control_First',u'N',None, None, u'Control',2,u'Identifier',None, u'Defines the control that has the focus when the dialog is created.',),
-(u'Dialog',u'HCentering',u'N',0,100,None, None, None, None, u'Horizontal position of the dialog on a 0-100 scale. 0 means left end, 100 means right end of the screen, 50 center.',),
-(u'Dialog',u'Title',u'Y',None, None, None, None, u'Formatted',None, u"A text string specifying the title to be displayed in the title bar of the dialog's window.",),
-(u'Dialog',u'VCentering',u'N',0,100,None, None, None, None, u'Vertical position of the dialog on a 0-100 scale. 0 means top end, 100 means bottom end of the screen, 50 center.',),
-(u'ControlCondition',u'Action',u'N',None, None, None, None, None, u'Default;Disable;Enable;Hide;Show',u'The desired action to be taken on the specified control.',),
-(u'ControlCondition',u'Condition',u'N',None, None, None, None, u'Condition',None, u'A standard conditional statement that specifies under which conditions the action should be triggered.',),
-(u'ControlCondition',u'Dialog_',u'N',None, None, u'Dialog',1,u'Identifier',None, u'A foreign key to the Dialog table, name of the dialog.',),
-(u'ControlCondition',u'Control_',u'N',None, None, u'Control',2,u'Identifier',None, u'A foreign key to the Control table, name of the control.',),
-(u'ControlEvent',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'A standard conditional statement that specifies under which conditions an event should be triggered.',),
-(u'ControlEvent',u'Ordering',u'Y',0,2147483647,None, None, None, None, u'An integer used to order several events tied to the same control. Can be left blank.',),
-(u'ControlEvent',u'Argument',u'N',None, None, None, None, u'Formatted',None, u'A value to be used as a modifier when triggering a particular event.',),
-(u'ControlEvent',u'Dialog_',u'N',None, None, u'Dialog',1,u'Identifier',None, u'A foreign key to the Dialog table, name of the dialog.',),
-(u'ControlEvent',u'Control_',u'N',None, None, u'Control',2,u'Identifier',None, u'A foreign key to the Control table, name of the control',),
-(u'ControlEvent',u'Event',u'N',None, None, None, None, u'Formatted',None, u'An identifier that specifies the type of the event that should take place when the user interacts with control specified by the first two entries.',),
-(u'CreateFolder',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table.',),
-(u'CreateFolder',u'Directory_',u'N',None, None, u'Directory',1,u'Identifier',None, u'Primary key, could be foreign key into the Directory table.',),
-(u'CustomAction',u'Type',u'N',1,16383,None, None, None, None, u'The numeric custom action type, consisting of source location, code type, entry, option flags.',),
-(u'CustomAction',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Primary key, name of action, normally appears in sequence table unless private use.',),
-(u'CustomAction',u'Source',u'Y',None, None, None, None, u'CustomSource',None, u'The table reference of the source of the code.',),
-(u'CustomAction',u'Target',u'Y',None, None, None, None, u'Formatted',None, u'Excecution parameter, depends on the type of custom action',),
-(u'DrLocator',u'Signature_',u'N',None, None, None, None, u'Identifier',None, u'The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',),
-(u'DrLocator',u'Path',u'Y',None, None, None, None, u'AnyPath',None, u'The path on the user system. This is a either a subpath below the value of the Parent or a full path. The path may contain properties enclosed within [ ] that will be expanded.',),
-(u'DrLocator',u'Depth',u'Y',0,32767,None, None, None, None, u'The depth below the path to which the Signature_ is recursively searched. If absent, the depth is assumed to be 0.',),
-(u'DrLocator',u'Parent',u'Y',None, None, None, None, u'Identifier',None, u'The parent file signature. It is also a foreign key in the Signature table. If null and the Path column does not expand to a full path, then all the fixed drives of the user system are searched using the Path.',),
-(u'DuplicateFile',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'Foreign key referencing the source file to be duplicated.',),
-(u'DuplicateFile',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key referencing Component that controls the duplicate file.',),
-(u'DuplicateFile',u'DestFolder',u'Y',None, None, None, None, u'Identifier',None, u'Name of a property whose value is assumed to resolve to the full pathname to a destination folder.',),
-(u'DuplicateFile',u'DestName',u'Y',None, None, None, None, u'Filename',None, u'Filename to be given to the duplicate file.',),
-(u'DuplicateFile',u'FileKey',u'N',None, None, None, None, u'Identifier',None, u'Primary key used to identify a particular file entry',),
-(u'Environment',u'Name',u'N',None, None, None, None, u'Text',None, u'The name of the environmental value.',),
-(u'Environment',u'Value',u'Y',None, None, None, None, u'Formatted',None, u'The value to set in the environmental settings.',),
-(u'Environment',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table referencing component that controls the installing of the environmental value.',),
-(u'Environment',u'Environment',u'N',None, None, None, None, u'Identifier',None, u'Unique identifier for the environmental variable setting',),
-(u'Error',u'Error',u'N',0,32767,None, None, None, None, u'Integer error number, obtained from header file IError(...) macros.',),
-(u'Error',u'Message',u'Y',None, None, None, None, u'Template',None, u'Error formatting template, obtained from user ed. or localizers.',),
-(u'EventMapping',u'Dialog_',u'N',None, None, u'Dialog',1,u'Identifier',None, u'A foreign key to the Dialog table, name of the Dialog.',),
-(u'EventMapping',u'Control_',u'N',None, None, u'Control',2,u'Identifier',None, u'A foreign key to the Control table, name of the control.',),
-(u'EventMapping',u'Event',u'N',None, None, None, None, u'Identifier',None, u'An identifier that specifies the type of the event that the control subscribes to.',),
-(u'EventMapping',u'Attribute',u'N',None, None, None, None, u'Identifier',None, u'The name of the control attribute, that is set when this event is received.',),
-(u'Extension',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the CLSID factory to be operational.',),
-(u'Extension',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',),
-(u'Extension',u'Extension',u'N',None, None, None, None, u'Text',None, u'The extension associated with the table row.',),
-(u'Extension',u'MIME_',u'Y',None, None, u'MIME',1,u'Text',None, u'Optional Context identifier, typically "type/format" associated with the extension',),
-(u'Extension',u'ProgId_',u'Y',None, None, u'ProgId',1,u'Text',None, u'Optional ProgId associated with this extension.',),
-(u'MIME',u'CLSID',u'Y',None, None, None, None, u'Guid',None, u'Optional associated CLSID.',),
-(u'MIME',u'ContentType',u'N',None, None, None, None, u'Text',None, u'Primary key. Context identifier, typically "type/format".',),
-(u'MIME',u'Extension_',u'N',None, None, u'Extension',1,u'Text',None, u'Optional associated extension (without dot)',),
-(u'FeatureComponents',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'Foreign key into Feature table.',),
-(u'FeatureComponents',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into Component table.',),
-(u'FileSFPCatalog',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'File associated with the catalog',),
-(u'FileSFPCatalog',u'SFPCatalog_',u'N',None, None, u'SFPCatalog',1,u'Filename',None, u'Catalog associated with the file',),
-(u'SFPCatalog',u'SFPCatalog',u'N',None, None, None, None, u'Filename',None, u'File name for the catalog.',),
-(u'SFPCatalog',u'Catalog',u'N',None, None, None, None, u'Binary',None, u'SFP Catalog',),
-(u'SFPCatalog',u'Dependency',u'Y',None, None, None, None, u'Formatted',None, u'Parent catalog - only used by SFP',),
-(u'Font',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'Primary key, foreign key into File table referencing font file.',),
-(u'Font',u'FontTitle',u'Y',None, None, None, None, u'Text',None, u'Font name.',),
-(u'IniFile',u'Action',u'N',None, None, None, None, None, u'0;1;3',u'The type of modification to be made, one of iifEnum',),
-(u'IniFile',u'Value',u'N',None, None, None, None, u'Formatted',None, u'The value to be written.',),
-(u'IniFile',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table referencing component that controls the installing of the .INI value.',),
-(u'IniFile',u'FileName',u'N',None, None, None, None, u'Filename',None, u'The .INI file name in which to write the information',),
-(u'IniFile',u'IniFile',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token.',),
-(u'IniFile',u'DirProperty',u'Y',None, None, None, None, u'Identifier',None, u'Foreign key into the Directory table denoting the directory where the .INI file is.',),
-(u'IniFile',u'Key',u'N',None, None, None, None, u'Formatted',None, u'The .INI file key below Section.',),
-(u'IniFile',u'Section',u'N',None, None, None, None, u'Formatted',None, u'The .INI file Section.',),
-(u'IniLocator',u'Type',u'Y',0,2,None, None, None, None, u'An integer value that determines if the .INI value read is a filename or a directory location or to be used as is w/o interpretation.',),
-(u'IniLocator',u'Signature_',u'N',None, None, None, None, u'Identifier',None, u'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',),
-(u'IniLocator',u'FileName',u'N',None, None, None, None, u'Filename',None, u'The .INI file name.',),
-(u'IniLocator',u'Key',u'N',None, None, None, None, u'Text',None, u'Key value (followed by an equals sign in INI file).',),
-(u'IniLocator',u'Section',u'N',None, None, None, None, u'Text',None, u'Section name within in file (within square brackets in INI file).',),
-(u'IniLocator',u'Field',u'Y',0,32767,None, None, None, None, u'The field in the .INI line. If Field is null or 0 the entire line is read.',),
-(u'InstallExecuteSequence',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Name of action to invoke, either in the engine or the handler DLL.',),
-(u'InstallExecuteSequence',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
-(u'InstallExecuteSequence',u'Sequence',u'Y',-4,32767,None, None, None, None, u'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
-(u'InstallUISequence',u'Action',u'N',None, None, None, None, u'Identifier',None, u'Name of action to invoke, either in the engine or the handler DLL.',),
-(u'InstallUISequence',u'Condition',u'Y',None, None, None, None, u'Condition',None, u'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
-(u'InstallUISequence',u'Sequence',u'Y',-4,32767,None, None, None, None, u'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
-(u'IsolatedComponent',u'Component_Application',u'N',None, None, u'Component',1,u'Identifier',None, u'Key to Component table item for application',),
-(u'IsolatedComponent',u'Component_Shared',u'N',None, None, u'Component',1,u'Identifier',None, u'Key to Component table item to be isolated',),
-(u'LaunchCondition',u'Description',u'N',None, None, None, None, u'Formatted',None, u'Localizable text to display when condition fails and install must abort.',),
-(u'LaunchCondition',u'Condition',u'N',None, None, None, None, u'Condition',None, u'Expression which must evaluate to TRUE in order for install to commence.',),
-(u'ListBox',u'Text',u'Y',None, None, None, None, u'Text',None, u'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',),
-(u'ListBox',u'Property',u'N',None, None, None, None, u'Identifier',None, u'A named property to be tied to this item. All the items tied to the same property become part of the same listbox.',),
-(u'ListBox',u'Value',u'N',None, None, None, None, u'Formatted',None, u'The value string associated with this item. Selecting the line will set the associated property to this value.',),
-(u'ListBox',u'Order',u'N',1,32767,None, None, None, None, u'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',),
-(u'ListView',u'Text',u'Y',None, None, None, None, u'Text',None, u'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',),
-(u'ListView',u'Property',u'N',None, None, None, None, u'Identifier',None, u'A named property to be tied to this item. All the items tied to the same property become part of the same listview.',),
-(u'ListView',u'Value',u'N',None, None, None, None, u'Identifier',None, u'The value string associated with this item. Selecting the line will set the associated property to this value.',),
-(u'ListView',u'Order',u'N',1,32767,None, None, None, None, u'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',),
-(u'ListView',u'Binary_',u'Y',None, None, u'Binary',1,u'Identifier',None, u'The name of the icon to be displayed with the icon. The binary information is looked up from the Binary Table.',),
-(u'LockPermissions',u'Table',u'N',None, None, None, None, u'Identifier',u'Directory;File;Registry',u'Reference to another table name',),
-(u'LockPermissions',u'Domain',u'Y',None, None, None, None, u'Formatted',None, u'Domain name for user whose permissions are being set. (usually a property)',),
-(u'LockPermissions',u'LockObject',u'N',None, None, None, None, u'Identifier',None, u'Foreign key into Registry or File table',),
-(u'LockPermissions',u'Permission',u'Y',-2147483647,2147483647,None, None, None, None, u'Permission Access mask.  Full Control = 268435456 (GENERIC_ALL = 0x10000000)',),
-(u'LockPermissions',u'User',u'N',None, None, None, None, u'Formatted',None, u'User for permissions to be set.  (usually a property)',),
-(u'Media',u'Source',u'Y',None, None, None, None, u'Property',None, u'The property defining the location of the cabinet file.',),
-(u'Media',u'Cabinet',u'Y',None, None, None, None, u'Cabinet',None, u'If some or all of the files stored on the media are compressed in a cabinet, the name of that cabinet.',),
-(u'Media',u'DiskId',u'N',1,32767,None, None, None, None, u'Primary key, integer to determine sort order for table.',),
-(u'Media',u'DiskPrompt',u'Y',None, None, None, None, u'Text',None, u'Disk name: the visible text actually printed on the disk.  This will be used to prompt the user when this disk needs to be inserted.',),
-(u'Media',u'LastSequence',u'N',0,32767,None, None, None, None, u'File sequence number for the last file for this media.',),
-(u'Media',u'VolumeLabel',u'Y',None, None, None, None, u'Text',None, u'The label attributed to the volume.',),
-(u'ModuleComponents',u'Component',u'N',None, None, u'Component',1,u'Identifier',None, u'Component contained in the module.',),
-(u'ModuleComponents',u'Language',u'N',None, None, u'ModuleSignature',2,None, None, u'Default language ID for module (may be changed by transform).',),
-(u'ModuleComponents',u'ModuleID',u'N',None, None, u'ModuleSignature',1,u'Identifier',None, u'Module containing the component.',),
-(u'ModuleSignature',u'Language',u'N',None, None, None, None, None, None, u'Default decimal language of module.',),
-(u'ModuleSignature',u'Version',u'N',None, None, None, None, u'Version',None, u'Version of the module.',),
-(u'ModuleSignature',u'ModuleID',u'N',None, None, None, None, u'Identifier',None, u'Module identifier (String.GUID).',),
-(u'ModuleDependency',u'ModuleID',u'N',None, None, u'ModuleSignature',1,u'Identifier',None, u'Module requiring the dependency.',),
-(u'ModuleDependency',u'ModuleLanguage',u'N',None, None, u'ModuleSignature',2,None, None, u'Language of module requiring the dependency.',),
-(u'ModuleDependency',u'RequiredID',u'N',None, None, None, None, None, None, u'String.GUID of required module.',),
-(u'ModuleDependency',u'RequiredLanguage',u'N',None, None, None, None, None, None, u'LanguageID of the required module.',),
-(u'ModuleDependency',u'RequiredVersion',u'Y',None, None, None, None, u'Version',None, u'Version of the required version.',),
-(u'ModuleExclusion',u'ModuleID',u'N',None, None, u'ModuleSignature',1,u'Identifier',None, u'String.GUID of module with exclusion requirement.',),
-(u'ModuleExclusion',u'ModuleLanguage',u'N',None, None, u'ModuleSignature',2,None, None, u'LanguageID of module with exclusion requirement.',),
-(u'ModuleExclusion',u'ExcludedID',u'N',None, None, None, None, None, None, u'String.GUID of excluded module.',),
-(u'ModuleExclusion',u'ExcludedLanguage',u'N',None, None, None, None, None, None, u'Language of excluded module.',),
-(u'ModuleExclusion',u'ExcludedMaxVersion',u'Y',None, None, None, None, u'Version',None, u'Maximum version of excluded module.',),
-(u'ModuleExclusion',u'ExcludedMinVersion',u'Y',None, None, None, None, u'Version',None, u'Minimum version of excluded module.',),
-(u'MoveFile',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'If this component is not "selected" for installation or removal, no action will be taken on the associated MoveFile entry',),
-(u'MoveFile',u'DestFolder',u'N',None, None, None, None, u'Identifier',None, u'Name of a property whose value is assumed to resolve to the full path to the destination directory',),
-(u'MoveFile',u'DestName',u'Y',None, None, None, None, u'Filename',None, u'Name to be given to the original file after it is moved or copied.  If blank, the destination file will be given the same name as the source file',),
-(u'MoveFile',u'FileKey',u'N',None, None, None, None, u'Identifier',None, u'Primary key that uniquely identifies a particular MoveFile record',),
-(u'MoveFile',u'Options',u'N',0,1,None, None, None, None, u'Integer value specifying the MoveFile operating mode, one of imfoEnum',),
-(u'MoveFile',u'SourceFolder',u'Y',None, None, None, None, u'Identifier',None, u'Name of a property whose value is assumed to resolve to the full path to the source directory',),
-(u'MoveFile',u'SourceName',u'Y',None, None, None, None, u'Text',None, u"Name of the source file(s) to be moved or copied.  Can contain the '*' or '?' wildcards.",),
-(u'MsiAssembly',u'Attributes',u'Y',None, None, None, None, None, None, u'Assembly attributes',),
-(u'MsiAssembly',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'Foreign key into Feature table.',),
-(u'MsiAssembly',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into Component table.',),
-(u'MsiAssembly',u'File_Application',u'Y',None, None, u'File',1,u'Identifier',None, u'Foreign key into File table, denoting the application context for private assemblies. Null for global assemblies.',),
-(u'MsiAssembly',u'File_Manifest',u'Y',None, None, u'File',1,u'Identifier',None, u'Foreign key into the File table denoting the manifest file for the assembly.',),
-(u'MsiAssemblyName',u'Name',u'N',None, None, None, None, u'Text',None, u'The name part of the name-value pairs for the assembly name.',),
-(u'MsiAssemblyName',u'Value',u'N',None, None, None, None, u'Text',None, u'The value part of the name-value pairs for the assembly name.',),
-(u'MsiAssemblyName',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into Component table.',),
-(u'MsiDigitalCertificate',u'CertData',u'N',None, None, None, None, u'Binary',None, u'A certificate context blob for a signer certificate',),
-(u'MsiDigitalCertificate',u'DigitalCertificate',u'N',None, None, None, None, u'Identifier',None, u'A unique identifier for the row',),
-(u'MsiDigitalSignature',u'Table',u'N',None, None, None, None, None, u'Media',u'Reference to another table name (only Media table is supported)',),
-(u'MsiDigitalSignature',u'DigitalCertificate_',u'N',None, None, u'MsiDigitalCertificate',1,u'Identifier',None, u'Foreign key to MsiDigitalCertificate table identifying the signer certificate',),
-(u'MsiDigitalSignature',u'Hash',u'Y',None, None, None, None, u'Binary',None, u'The encoded hash blob from the digital signature',),
-(u'MsiDigitalSignature',u'SignObject',u'N',None, None, None, None, u'Text',None, u'Foreign key to Media table',),
-(u'MsiFileHash',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'Primary key, foreign key into File table referencing file with this hash',),
-(u'MsiFileHash',u'Options',u'N',0,32767,None, None, None, None, u'Various options and attributes for this hash.',),
-(u'MsiFileHash',u'HashPart1',u'N',None, None, None, None, None, None, u'Size of file in bytes (long integer).',),
-(u'MsiFileHash',u'HashPart2',u'N',None, None, None, None, None, None, u'Size of file in bytes (long integer).',),
-(u'MsiFileHash',u'HashPart3',u'N',None, None, None, None, None, None, u'Size of file in bytes (long integer).',),
-(u'MsiFileHash',u'HashPart4',u'N',None, None, None, None, None, None, u'Size of file in bytes (long integer).',),
-(u'MsiPatchHeaders',u'StreamRef',u'N',None, None, None, None, u'Identifier',None, u'Primary key. A unique identifier for the row.',),
-(u'MsiPatchHeaders',u'Header',u'N',None, None, None, None, u'Binary',None, u'Binary stream. The patch header, used for patch validation.',),
-(u'ODBCAttribute',u'Value',u'Y',None, None, None, None, u'Text',None, u'Value for ODBC driver attribute',),
-(u'ODBCAttribute',u'Attribute',u'N',None, None, None, None, u'Text',None, u'Name of ODBC driver attribute',),
-(u'ODBCAttribute',u'Driver_',u'N',None, None, u'ODBCDriver',1,u'Identifier',None, u'Reference to ODBC driver in ODBCDriver table',),
-(u'ODBCDriver',u'Description',u'N',None, None, None, None, u'Text',None, u'Text used as registered name for driver, non-localized',),
-(u'ODBCDriver',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'Reference to key driver file',),
-(u'ODBCDriver',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Reference to associated component',),
-(u'ODBCDriver',u'Driver',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized.internal token for driver',),
-(u'ODBCDriver',u'File_Setup',u'Y',None, None, u'File',1,u'Identifier',None, u'Optional reference to key driver setup DLL',),
-(u'ODBCDataSource',u'Description',u'N',None, None, None, None, u'Text',None, u'Text used as registered name for data source',),
-(u'ODBCDataSource',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Reference to associated component',),
-(u'ODBCDataSource',u'DataSource',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized.internal token for data source',),
-(u'ODBCDataSource',u'DriverDescription',u'N',None, None, None, None, u'Text',None, u'Reference to driver description, may be existing driver',),
-(u'ODBCDataSource',u'Registration',u'N',0,1,None, None, None, None, u'Registration option: 0=machine, 1=user, others t.b.d.',),
-(u'ODBCSourceAttribute',u'Value',u'Y',None, None, None, None, u'Text',None, u'Value for ODBC data source attribute',),
-(u'ODBCSourceAttribute',u'Attribute',u'N',None, None, None, None, u'Text',None, u'Name of ODBC data source attribute',),
-(u'ODBCSourceAttribute',u'DataSource_',u'N',None, None, u'ODBCDataSource',1,u'Identifier',None, u'Reference to ODBC data source in ODBCDataSource table',),
-(u'ODBCTranslator',u'Description',u'N',None, None, None, None, u'Text',None, u'Text used as registered name for translator',),
-(u'ODBCTranslator',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'Reference to key translator file',),
-(u'ODBCTranslator',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Reference to associated component',),
-(u'ODBCTranslator',u'File_Setup',u'Y',None, None, u'File',1,u'Identifier',None, u'Optional reference to key translator setup DLL',),
-(u'ODBCTranslator',u'Translator',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized.internal token for translator',),
-(u'Patch',u'Sequence',u'N',0,32767,None, None, None, None, u'Primary key, sequence with respect to the media images; order must track cabinet order.',),
-(u'Patch',u'Attributes',u'N',0,32767,None, None, None, None, u'Integer containing bit flags representing patch attributes',),
-(u'Patch',u'File_',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token, foreign key to File table, must match identifier in cabinet.',),
-(u'Patch',u'Header',u'Y',None, None, None, None, u'Binary',None, u'Binary stream. The patch header, used for patch validation.',),
-(u'Patch',u'PatchSize',u'N',0,2147483647,None, None, None, None, u'Size of patch in bytes (long integer).',),
-(u'Patch',u'StreamRef_',u'Y',None, None, None, None, u'Identifier',None, u'Identifier. Foreign key to the StreamRef column of the MsiPatchHeaders table.',),
-(u'PatchPackage',u'Media_',u'N',0,32767,None, None, None, None, u'Foreign key to DiskId column of Media table. Indicates the disk containing the patch package.',),
-(u'PatchPackage',u'PatchId',u'N',None, None, None, None, u'Guid',None, u'A unique string GUID representing this patch.',),
-(u'PublishComponent',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'Foreign key into the Feature table.',),
-(u'PublishComponent',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table.',),
-(u'PublishComponent',u'ComponentId',u'N',None, None, None, None, u'Guid',None, u'A string GUID that represents the component id that will be requested by the alien product.',),
-(u'PublishComponent',u'AppData',u'Y',None, None, None, None, u'Text',None, u'This is localisable Application specific data that can be associated with a Qualified Component.',),
-(u'PublishComponent',u'Qualifier',u'N',None, None, None, None, u'Text',None, u'This is defined only when the ComponentId column is an Qualified Component Id. This is the Qualifier for ProvideComponentIndirect.',),
-(u'RadioButton',u'Y',u'N',0,32767,None, None, None, None, u'The vertical coordinate of the upper left corner of the bounding rectangle of the radio button.',),
-(u'RadioButton',u'Text',u'Y',None, None, None, None, u'Text',None, u'The visible title to be assigned to the radio button.',),
-(u'RadioButton',u'Property',u'N',None, None, None, None, u'Identifier',None, u'A named property to be tied to this radio button. All the buttons tied to the same property become part of the same group.',),
-(u'RadioButton',u'Height',u'N',0,32767,None, None, None, None, u'The height of the button.',),
-(u'RadioButton',u'Width',u'N',0,32767,None, None, None, None, u'The width of the button.',),
-(u'RadioButton',u'X',u'N',0,32767,None, None, None, None, u'The horizontal coordinate of the upper left corner of the bounding rectangle of the radio button.',),
-(u'RadioButton',u'Value',u'N',None, None, None, None, u'Formatted',None, u'The value string associated with this button. Selecting the button will set the associated property to this value.',),
-(u'RadioButton',u'Order',u'N',1,32767,None, None, None, None, u'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',),
-(u'RadioButton',u'Help',u'Y',None, None, None, None, u'Text',None, u'The help strings used with the button. The text is optional.',),
-(u'Registry',u'Name',u'Y',None, None, None, None, u'Formatted',None, u'The registry value name.',),
-(u'Registry',u'Value',u'Y',None, None, None, None, u'Formatted',None, u'The registry value.',),
-(u'Registry',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table referencing component that controls the installing of the registry value.',),
-(u'Registry',u'Key',u'N',None, None, None, None, u'RegPath',None, u'The key for the registry value.',),
-(u'Registry',u'Registry',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token.',),
-(u'Registry',u'Root',u'N',-1,3,None, None, None, None, u'The predefined root key for the registry value, one of rrkEnum.',),
-(u'RegLocator',u'Name',u'Y',None, None, None, None, u'Formatted',None, u'The registry value name.',),
-(u'RegLocator',u'Type',u'Y',0,18,None, None, None, None, u'An integer value that determines if the registry value is a filename or a directory location or to be used as is w/o interpretation.',),
-(u'RegLocator',u'Signature_',u'N',None, None, None, None, u'Identifier',None, u'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table. If the type is 0, the registry values refers a directory, and _Signature is not a foreign key.',),
-(u'RegLocator',u'Key',u'N',None, None, None, None, u'RegPath',None, u'The key for the registry value.',),
-(u'RegLocator',u'Root',u'N',0,3,None, None, None, None, u'The predefined root key for the registry value, one of rrkEnum.',),
-(u'RemoveFile',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key referencing Component that controls the file to be removed.',),
-(u'RemoveFile',u'FileKey',u'N',None, None, None, None, u'Identifier',None, u'Primary key used to identify a particular file entry',),
-(u'RemoveFile',u'FileName',u'Y',None, None, None, None, u'WildCardFilename',None, u'Name of the file to be removed.',),
-(u'RemoveFile',u'DirProperty',u'N',None, None, None, None, u'Identifier',None, u'Name of a property whose value is assumed to resolve to the full pathname to the folder of the file to be removed.',),
-(u'RemoveFile',u'InstallMode',u'N',None, None, None, None, None, u'1;2;3',u'Installation option, one of iimEnum.',),
-(u'RemoveIniFile',u'Action',u'N',None, None, None, None, None, u'2;4',u'The type of modification to be made, one of iifEnum.',),
-(u'RemoveIniFile',u'Value',u'Y',None, None, None, None, u'Formatted',None, u'The value to be deleted. The value is required when Action is iifIniRemoveTag',),
-(u'RemoveIniFile',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table referencing component that controls the deletion of the .INI value.',),
-(u'RemoveIniFile',u'FileName',u'N',None, None, None, None, u'Filename',None, u'The .INI file name in which to delete the information',),
-(u'RemoveIniFile',u'DirProperty',u'Y',None, None, None, None, u'Identifier',None, u'Foreign key into the Directory table denoting the directory where the .INI file is.',),
-(u'RemoveIniFile',u'Key',u'N',None, None, None, None, u'Formatted',None, u'The .INI file key below Section.',),
-(u'RemoveIniFile',u'Section',u'N',None, None, None, None, u'Formatted',None, u'The .INI file Section.',),
-(u'RemoveIniFile',u'RemoveIniFile',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token.',),
-(u'RemoveRegistry',u'Name',u'Y',None, None, None, None, u'Formatted',None, u'The registry value name.',),
-(u'RemoveRegistry',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table referencing component that controls the deletion of the registry value.',),
-(u'RemoveRegistry',u'Key',u'N',None, None, None, None, u'RegPath',None, u'The key for the registry value.',),
-(u'RemoveRegistry',u'Root',u'N',-1,3,None, None, None, None, u'The predefined root key for the registry value, one of rrkEnum',),
-(u'RemoveRegistry',u'RemoveRegistry',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token.',),
-(u'ReserveCost',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Reserve a specified amount of space if this component is to be installed.',),
-(u'ReserveCost',u'ReserveFolder',u'Y',None, None, None, None, u'Identifier',None, u'Name of a property whose value is assumed to resolve to the full path to the destination directory',),
-(u'ReserveCost',u'ReserveKey',u'N',None, None, None, None, u'Identifier',None, u'Primary key that uniquely identifies a particular ReserveCost record',),
-(u'ReserveCost',u'ReserveLocal',u'N',0,2147483647,None, None, None, None, u'Disk space to reserve if linked component is installed locally.',),
-(u'ReserveCost',u'ReserveSource',u'N',0,2147483647,None, None, None, None, u'Disk space to reserve if linked component is installed to run from the source location.',),
-(u'SelfReg',u'File_',u'N',None, None, u'File',1,u'Identifier',None, u'Foreign key into the File table denoting the module that needs to be registered.',),
-(u'SelfReg',u'Cost',u'Y',0,32767,None, None, None, None, u'The cost of registering the module.',),
-(u'ServiceControl',u'Name',u'N',None, None, None, None, u'Formatted',None, u'Name of a service. /, \\, comma and space are invalid',),
-(u'ServiceControl',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Required foreign key into the Component Table that controls the startup of the service',),
-(u'ServiceControl',u'Event',u'N',0,187,None, None, None, None, u'Bit field:  Install:  0x1 = Start, 0x2 = Stop, 0x8 = Delete, Uninstall: 0x10 = Start, 0x20 = Stop, 0x80 = Delete',),
-(u'ServiceControl',u'ServiceControl',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token.',),
-(u'ServiceControl',u'Arguments',u'Y',None, None, None, None, u'Formatted',None, u'Arguments for the service.  Separate by [~].',),
-(u'ServiceControl',u'Wait',u'Y',0,1,None, None, None, None, u'Boolean for whether to wait for the service to fully start',),
-(u'ServiceInstall',u'Name',u'N',None, None, None, None, u'Formatted',None, u'Internal Name of the Service',),
-(u'ServiceInstall',u'Description',u'Y',None, None, None, None, u'Text',None, u'Description of service.',),
-(u'ServiceInstall',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Required foreign key into the Component Table that controls the startup of the service',),
-(u'ServiceInstall',u'Arguments',u'Y',None, None, None, None, u'Formatted',None, u'Arguments to include in every start of the service, passed to WinMain',),
-(u'ServiceInstall',u'ServiceInstall',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token.',),
-(u'ServiceInstall',u'Dependencies',u'Y',None, None, None, None, u'Formatted',None, u'Other services this depends on to start.  Separate by [~], and end with [~][~]',),
-(u'ServiceInstall',u'DisplayName',u'Y',None, None, None, None, u'Formatted',None, u'External Name of the Service',),
-(u'ServiceInstall',u'ErrorControl',u'N',-2147483647,2147483647,None, None, None, None, u'Severity of error if service fails to start',),
-(u'ServiceInstall',u'LoadOrderGroup',u'Y',None, None, None, None, u'Formatted',None, u'LoadOrderGroup',),
-(u'ServiceInstall',u'Password',u'Y',None, None, None, None, u'Formatted',None, u'password to run service with.  (with StartName)',),
-(u'ServiceInstall',u'ServiceType',u'N',-2147483647,2147483647,None, None, None, None, u'Type of the service',),
-(u'ServiceInstall',u'StartName',u'Y',None, None, None, None, u'Formatted',None, u'User or object name to run service as',),
-(u'ServiceInstall',u'StartType',u'N',0,4,None, None, None, None, u'Type of the service',),
-(u'Shortcut',u'Name',u'N',None, None, None, None, u'Filename',None, u'The name of the shortcut to be created.',),
-(u'Shortcut',u'Description',u'Y',None, None, None, None, u'Text',None, u'The description for the shortcut.',),
-(u'Shortcut',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Foreign key into the Component table denoting the component whose selection gates the the shortcut creation/deletion.',),
-(u'Shortcut',u'Icon_',u'Y',None, None, u'Icon',1,u'Identifier',None, u'Foreign key into the File table denoting the external icon file for the shortcut.',),
-(u'Shortcut',u'IconIndex',u'Y',-32767,32767,None, None, None, None, u'The icon index for the shortcut.',),
-(u'Shortcut',u'Directory_',u'N',None, None, u'Directory',1,u'Identifier',None, u'Foreign key into the Directory table denoting the directory where the shortcut file is created.',),
-(u'Shortcut',u'Target',u'N',None, None, None, None, u'Shortcut',None, u'The shortcut target. This is usually a property that is expanded to a file or a folder that the shortcut points to.',),
-(u'Shortcut',u'Arguments',u'Y',None, None, None, None, u'Formatted',None, u'The command-line arguments for the shortcut.',),
-(u'Shortcut',u'Shortcut',u'N',None, None, None, None, u'Identifier',None, u'Primary key, non-localized token.',),
-(u'Shortcut',u'Hotkey',u'Y',0,32767,None, None, None, None, u'The hotkey for the shortcut. It has the virtual-key code for the key in the low-order byte, and the modifier flags in the high-order byte. ',),
-(u'Shortcut',u'ShowCmd',u'Y',None, None, None, None, None, u'1;3;7',u'The show command for the application window.The following values may be used.',),
-(u'Shortcut',u'WkDir',u'Y',None, None, None, None, u'Identifier',None, u'Name of property defining location of working directory.',),
-(u'Signature',u'FileName',u'N',None, None, None, None, u'Filename',None, u'The name of the file. This may contain a "short name|long name" pair.',),
-(u'Signature',u'Signature',u'N',None, None, None, None, u'Identifier',None, u'The table key. The Signature represents a unique file signature.',),
-(u'Signature',u'Languages',u'Y',None, None, None, None, u'Language',None, u'The languages supported by the file.',),
-(u'Signature',u'MaxDate',u'Y',0,2147483647,None, None, None, None, u'The maximum creation date of the file.',),
-(u'Signature',u'MaxSize',u'Y',0,2147483647,None, None, None, None, u'The maximum size of the file. ',),
-(u'Signature',u'MaxVersion',u'Y',None, None, None, None, u'Text',None, u'The maximum version of the file.',),
-(u'Signature',u'MinDate',u'Y',0,2147483647,None, None, None, None, u'The minimum creation date of the file.',),
-(u'Signature',u'MinSize',u'Y',0,2147483647,None, None, None, None, u'The minimum size of the file.',),
-(u'Signature',u'MinVersion',u'Y',None, None, None, None, u'Text',None, u'The minimum version of the file.',),
-(u'TextStyle',u'TextStyle',u'N',None, None, None, None, u'Identifier',None, u'Name of the style. The primary key of this table. This name is embedded in the texts to indicate a style change.',),
-(u'TextStyle',u'Color',u'Y',0,16777215,None, None, None, None, u'A long integer indicating the color of the string in the RGB format (Red, Green, Blue each 0-255, RGB = R + 256*G + 256^2*B).',),
-(u'TextStyle',u'FaceName',u'N',None, None, None, None, u'Text',None, u'A string indicating the name of the font used. Required. The string must be at most 31 characters long.',),
-(u'TextStyle',u'Size',u'N',0,32767,None, None, None, None, u'The size of the font used. This size is given in our units (1/12 of the system font height). Assuming that the system font is set to 12 point size, this is equivalent to the point size.',),
-(u'TextStyle',u'StyleBits',u'Y',0,15,None, None, None, None, u'A combination of style bits.',),
-(u'TypeLib',u'Description',u'Y',None, None, None, None, u'Text',None, None, ),
-(u'TypeLib',u'Feature_',u'N',None, None, u'Feature',1,u'Identifier',None, u'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the type library to be operational.',),
-(u'TypeLib',u'Component_',u'N',None, None, u'Component',1,u'Identifier',None, u'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',),
-(u'TypeLib',u'Directory_',u'Y',None, None, u'Directory',1,u'Identifier',None, u'Optional. The foreign key into the Directory table denoting the path to the help file for the type library.',),
-(u'TypeLib',u'Language',u'N',0,32767,None, None, None, None, u'The language of the library.',),
-(u'TypeLib',u'Version',u'Y',0,16777215,None, None, None, None, u'The version of the library. The minor version is in the lower 8 bits of the integer. The major version is in the next 16 bits. ',),
-(u'TypeLib',u'Cost',u'Y',0,2147483647,None, None, None, None, u'The cost associated with the registration of the typelib. This column is currently optional.',),
-(u'TypeLib',u'LibID',u'N',None, None, None, None, u'Guid',None, u'The GUID that represents the library.',),
-(u'UIText',u'Text',u'Y',None, None, None, None, u'Text',None, u'The localized version of the string.',),
-(u'UIText',u'Key',u'N',None, None, None, None, u'Identifier',None, u'A unique key that identifies the particular string.',),
-(u'Upgrade',u'Attributes',u'N',0,2147483647,None, None, None, None, u'The attributes of this product set.',),
-(u'Upgrade',u'Language',u'Y',None, None, None, None, u'Language',None, u'A comma-separated list of languages for either products in this set or products not in this set.',),
-(u'Upgrade',u'ActionProperty',u'N',None, None, None, None, u'UpperCase',None, u'The property to set when a product in this set is found.',),
-(u'Upgrade',u'Remove',u'Y',None, None, None, None, u'Formatted',None, u'The list of features to remove when uninstalling a product from this set.  The default is "ALL".',),
-(u'Upgrade',u'UpgradeCode',u'N',None, None, None, None, u'Guid',None, u'The UpgradeCode GUID belonging to the products in this set.',),
-(u'Upgrade',u'VersionMax',u'Y',None, None, None, None, u'Text',None, u'The maximum ProductVersion of the products in this set.  The set may or may not include products with this particular version.',),
-(u'Upgrade',u'VersionMin',u'Y',None, None, None, None, u'Text',None, u'The minimum ProductVersion of the products in this set.  The set may or may not include products with this particular version.',),
-(u'Verb',u'Sequence',u'Y',0,32767,None, None, None, None, u'Order within the verbs for a particular extension. Also used simply to specify the default verb.',),
-(u'Verb',u'Argument',u'Y',None, None, None, None, u'Formatted',None, u'Optional value for the command arguments.',),
-(u'Verb',u'Extension_',u'N',None, None, u'Extension',1,u'Text',None, u'The extension associated with the table row.',),
-(u'Verb',u'Verb',u'N',None, None, None, None, u'Text',None, u'The verb for the command.',),
-(u'Verb',u'Command',u'Y',None, None, None, None, u'Formatted',None, u'The command text.',),
+('_Validation','Table','N',None, None, None, None, 'Identifier',None, 'Name of table',),
+('_Validation','Column','N',None, None, None, None, 'Identifier',None, 'Name of column',),
+('_Validation','Description','Y',None, None, None, None, 'Text',None, 'Description of column',),
+('_Validation','Set','Y',None, None, None, None, 'Text',None, 'Set of values that are permitted',),
+('_Validation','Category','Y',None, None, None, None, None, 'Text;Formatted;Template;Condition;Guid;Path;Version;Language;Identifier;Binary;UpperCase;LowerCase;Filename;Paths;AnyPath;WildCardFilename;RegPath;KeyFormatted;CustomSource;Property;Cabinet;Shortcut;URL','String category',),
+('_Validation','KeyColumn','Y',1,32,None, None, None, None, 'Column to which foreign key connects',),
+('_Validation','KeyTable','Y',None, None, None, None, 'Identifier',None, 'For foreign key, Name of table to which data must link',),
+('_Validation','MaxValue','Y',-2147483647,2147483647,None, None, None, None, 'Maximum value allowed',),
+('_Validation','MinValue','Y',-2147483647,2147483647,None, None, None, None, 'Minimum value allowed',),
+('_Validation','Nullable','N',None, None, None, None, None, 'Y;N;@','Whether the column is nullable',),
+('ActionText','Description','Y',None, None, None, None, 'Text',None, 'Localized description displayed in progress dialog and log when action is executing.',),
+('ActionText','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to be described.',),
+('ActionText','Template','Y',None, None, None, None, 'Template',None, 'Optional localized format template used to format action data records for display during action execution.',),
+('AdminExecuteSequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',),
+('AdminExecuteSequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
+('AdminExecuteSequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
+('Condition','Condition','Y',None, None, None, None, 'Condition',None, 'Expression evaluated to determine if Level in the Feature table is to change.',),
+('Condition','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Reference to a Feature entry in Feature table.',),
+('Condition','Level','N',0,32767,None, None, None, None, 'New selection Level to set in Feature table if Condition evaluates to TRUE.',),
+('AdminUISequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',),
+('AdminUISequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
+('AdminUISequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
+('AdvtExecuteSequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',),
+('AdvtExecuteSequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
+('AdvtExecuteSequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
+('AdvtUISequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',),
+('AdvtUISequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
+('AdvtUISequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
+('AppId','AppId','N',None, None, None, None, 'Guid',None, None, ),
+('AppId','ActivateAtStorage','Y',0,1,None, None, None, None, None, ),
+('AppId','DllSurrogate','Y',None, None, None, None, 'Text',None, None, ),
+('AppId','LocalService','Y',None, None, None, None, 'Text',None, None, ),
+('AppId','RemoteServerName','Y',None, None, None, None, 'Formatted',None, None, ),
+('AppId','RunAsInteractiveUser','Y',0,1,None, None, None, None, None, ),
+('AppId','ServiceParameters','Y',None, None, None, None, 'Text',None, None, ),
+('AppSearch','Property','N',None, None, None, None, 'Identifier',None, 'The property associated with a Signature',),
+('AppSearch','Signature_','N',None, None, 'Signature;RegLocator;IniLocator;DrLocator;CompLocator',1,'Identifier',None, 'The Signature_ represents a unique file signature and is also the foreign key in the Signature,  RegLocator, IniLocator, CompLocator and the DrLocator tables.',),
+('Property','Property','N',None, None, None, None, 'Identifier',None, 'Name of property, uppercase if settable by launcher or loader.',),
+('Property','Value','N',None, None, None, None, 'Text',None, 'String value for property.  Never null or empty.',),
+('BBControl','Type','N',None, None, None, None, 'Identifier',None, 'The type of the control.',),
+('BBControl','Y','N',0,32767,None, None, None, None, 'Vertical coordinate of the upper left corner of the bounding rectangle of the control.',),
+('BBControl','Text','Y',None, None, None, None, 'Text',None, 'A string used to set the initial text contained within a control (if appropriate).',),
+('BBControl','BBControl','N',None, None, None, None, 'Identifier',None, 'Name of the control. This name must be unique within a billboard, but can repeat on different billboard.',),
+('BBControl','Attributes','Y',0,2147483647,None, None, None, None, 'A 32-bit word that specifies the attribute flags to be applied to this control.',),
+('BBControl','Billboard_','N',None, None, 'Billboard',1,'Identifier',None, 'External key to the Billboard table, name of the billboard.',),
+('BBControl','Height','N',0,32767,None, None, None, None, 'Height of the bounding rectangle of the control.',),
+('BBControl','Width','N',0,32767,None, None, None, None, 'Width of the bounding rectangle of the control.',),
+('BBControl','X','N',0,32767,None, None, None, None, 'Horizontal coordinate of the upper left corner of the bounding rectangle of the control.',),
+('Billboard','Action','Y',None, None, None, None, 'Identifier',None, 'The name of an action. The billboard is displayed during the progress messages received from this action.',),
+('Billboard','Billboard','N',None, None, None, None, 'Identifier',None, 'Name of the billboard.',),
+('Billboard','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'An external key to the Feature Table. The billboard is shown only if this feature is being installed.',),
+('Billboard','Ordering','Y',0,32767,None, None, None, None, 'A positive integer. If there is more than one billboard corresponding to an action they will be shown in the order defined by this column.',),
+('Feature','Description','Y',None, None, None, None, 'Text',None, 'Longer descriptive text describing a visible feature item.',),
+('Feature','Attributes','N',None, None, None, None, None, '0;1;2;4;5;6;8;9;10;16;17;18;20;21;22;24;25;26;32;33;34;36;37;38;48;49;50;52;53;54','Feature attributes',),
+('Feature','Feature','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular feature record.',),
+('Feature','Directory_','Y',None, None, 'Directory',1,'UpperCase',None, 'The name of the Directory that can be configured by the UI. A non-null value will enable the browse button.',),
+('Feature','Level','N',0,32767,None, None, None, None, 'The install level at which record will be initially selected. An install level of 0 will disable an item and prevent its display.',),
+('Feature','Title','Y',None, None, None, None, 'Text',None, 'Short text identifying a visible feature item.',),
+('Feature','Display','Y',0,32767,None, None, None, None, 'Numeric sort order, used to force a specific display ordering.',),
+('Feature','Feature_Parent','Y',None, None, 'Feature',1,'Identifier',None, 'Optional key of a parent record in the same table. If the parent is not selected, then the record will not be installed. Null indicates a root item.',),
+('Binary','Name','N',None, None, None, None, 'Identifier',None, 'Unique key identifying the binary data.',),
+('Binary','Data','N',None, None, None, None, 'Binary',None, 'The unformatted binary data.',),
+('BindImage','File_','N',None, None, 'File',1,'Identifier',None, 'The index into the File table. This must be an executable file.',),
+('BindImage','Path','Y',None, None, None, None, 'Paths',None, 'A list of ;  delimited paths that represent the paths to be searched for the import DLLS. The list is usually a list of properties each enclosed within square brackets [] .',),
+('File','Sequence','N',1,32767,None, None, None, None, 'Sequence with respect to the media images; order must track cabinet order.',),
+('File','Attributes','Y',0,32767,None, None, None, None, 'Integer containing bit flags representing file attributes (with the decimal value of each bit position in parentheses)',),
+('File','File','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token, must match identifier in cabinet.  For uncompressed files, this field is ignored.',),
+('File','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the file.',),
+('File','FileName','N',None, None, None, None, 'Filename',None, 'File name used for installation, may be localized.  This may contain a "short name|long name" pair.',),
+('File','FileSize','N',0,2147483647,None, None, None, None, 'Size of file in bytes (long integer).',),
+('File','Language','Y',None, None, None, None, 'Language',None, 'List of decimal language Ids, comma-separated if more than one.',),
+('File','Version','Y',None, None, 'File',1,'Version',None, 'Version string for versioned files;  Blank for unversioned files.',),
+('CCPSearch','Signature_','N',None, None, 'Signature;RegLocator;IniLocator;DrLocator;CompLocator',1,'Identifier',None, 'The Signature_ represents a unique file signature and is also the foreign key in the Signature,  RegLocator, IniLocator, CompLocator and the DrLocator tables.',),
+('CheckBox','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to the item.',),
+('CheckBox','Value','Y',None, None, None, None, 'Formatted',None, 'The value string associated with the item.',),
+('Class','Description','Y',None, None, None, None, 'Text',None, 'Localized description for the Class.',),
+('Class','Attributes','Y',None, 32767,None, None, None, None, 'Class registration attributes.',),
+('Class','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the CLSID factory to be operational.',),
+('Class','AppId_','Y',None, None, 'AppId',1,'Guid',None, 'Optional AppID containing DCOM information for associated application (string GUID).',),
+('Class','Argument','Y',None, None, None, None, 'Formatted',None, 'optional argument for LocalServers.',),
+('Class','CLSID','N',None, None, None, None, 'Guid',None, 'The CLSID of an OLE factory.',),
+('Class','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',),
+('Class','Context','N',None, None, None, None, 'Identifier',None, 'The numeric server context for this server. CLSCTX_xxxx',),
+('Class','DefInprocHandler','Y',None, None, None, None, 'Filename','1;2;3','Optional default inproc handler.  Only optionally provided if Context=CLSCTX_LOCAL_SERVER.  Typically "ole32.dll" or "mapi32.dll"',),
+('Class','FileTypeMask','Y',None, None, None, None, 'Text',None, 'Optional string containing information for the HKCRthis CLSID) key. If multiple patterns exist, they must be delimited by a semicolon, and numeric subkeys will be generated: 0,1,2...',),
+('Class','Icon_','Y',None, None, 'Icon',1,'Identifier',None, 'Optional foreign key into the Icon Table, specifying the icon file associated with this CLSID. Will be written under the DefaultIcon key.',),
+('Class','IconIndex','Y',-32767,32767,None, None, None, None, 'Optional icon index.',),
+('Class','ProgId_Default','Y',None, None, 'ProgId',1,'Text',None, 'Optional ProgId associated with this CLSID.',),
+('Component','Condition','Y',None, None, None, None, 'Condition',None, "A conditional statement that will disable this component if the specified condition evaluates to the 'True' state. If a component is disabled, it will not be installed, regardless of the 'Action' state associated with the component.",),
+('Component','Attributes','N',None, None, None, None, None, None, 'Remote execution option, one of irsEnum',),
+('Component','Component','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular component record.',),
+('Component','ComponentId','Y',None, None, None, None, 'Guid',None, 'A string GUID unique to this component, version, and language.',),
+('Component','Directory_','N',None, None, 'Directory',1,'Identifier',None, 'Required key of a Directory table record. This is actually a property name whose value contains the actual path, set either by the AppSearch action or with the default setting obtained from the Directory table.',),
+('Component','KeyPath','Y',None, None, 'File;Registry;ODBCDataSource',1,'Identifier',None, 'Either the primary key into the File table, Registry table, or ODBCDataSource table. This extract path is stored when the component is installed, and is used to detect the presence of the component and to return the path to it.',),
+('Icon','Name','N',None, None, None, None, 'Identifier',None, 'Primary key. Name of the icon file.',),
+('Icon','Data','N',None, None, None, None, 'Binary',None, 'Binary stream. The binary icon data in PE (.DLL or .EXE) or icon (.ICO) format.',),
+('ProgId','Description','Y',None, None, None, None, 'Text',None, 'Localized description for the Program identifier.',),
+('ProgId','Icon_','Y',None, None, 'Icon',1,'Identifier',None, 'Optional foreign key into the Icon Table, specifying the icon file associated with this ProgId. Will be written under the DefaultIcon key.',),
+('ProgId','IconIndex','Y',-32767,32767,None, None, None, None, 'Optional icon index.',),
+('ProgId','ProgId','N',None, None, None, None, 'Text',None, 'The Program Identifier. Primary key.',),
+('ProgId','Class_','Y',None, None, 'Class',1,'Guid',None, 'The CLSID of an OLE factory corresponding to the ProgId.',),
+('ProgId','ProgId_Parent','Y',None, None, 'ProgId',1,'Text',None, 'The Parent Program Identifier. If specified, the ProgId column becomes a version independent prog id.',),
+('ComboBox','Text','Y',None, None, None, None, 'Formatted',None, 'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',),
+('ComboBox','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this item. All the items tied to the same property become part of the same combobox.',),
+('ComboBox','Value','N',None, None, None, None, 'Formatted',None, 'The value string associated with this item. Selecting the line will set the associated property to this value.',),
+('ComboBox','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list.\tThe integers do not have to be consecutive.',),
+('CompLocator','Type','Y',0,1,None, None, None, None, 'A boolean value that determines if the registry value is a filename or a directory location.',),
+('CompLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',),
+('CompLocator','ComponentId','N',None, None, None, None, 'Guid',None, 'A string GUID unique to this component, version, and language.',),
+('Complus','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the ComPlus component.',),
+('Complus','ExpType','Y',0,32767,None, None, None, None, 'ComPlus component attributes.',),
+('Directory','Directory','N',None, None, None, None, 'Identifier',None, 'Unique identifier for directory entry, primary key. If a property by this name is defined, it contains the full path to the directory.',),
+('Directory','DefaultDir','N',None, None, None, None, 'DefaultDir',None, "The default sub-path under parent's path.",),
+('Directory','Directory_Parent','Y',None, None, 'Directory',1,'Identifier',None, 'Reference to the entry in this table specifying the default parent directory. A record parented to itself or with a Null parent represents a root of the install tree.',),
+('Control','Type','N',None, None, None, None, 'Identifier',None, 'The type of the control.',),
+('Control','Y','N',0,32767,None, None, None, None, 'Vertical coordinate of the upper left corner of the bounding rectangle of the control.',),
+('Control','Text','Y',None, None, None, None, 'Formatted',None, 'A string used to set the initial text contained within a control (if appropriate).',),
+('Control','Property','Y',None, None, None, None, 'Identifier',None, 'The name of a defined property to be linked to this control. ',),
+('Control','Attributes','Y',0,2147483647,None, None, None, None, 'A 32-bit word that specifies the attribute flags to be applied to this control.',),
+('Control','Height','N',0,32767,None, None, None, None, 'Height of the bounding rectangle of the control.',),
+('Control','Width','N',0,32767,None, None, None, None, 'Width of the bounding rectangle of the control.',),
+('Control','X','N',0,32767,None, None, None, None, 'Horizontal coordinate of the upper left corner of the bounding rectangle of the control.',),
+('Control','Control','N',None, None, None, None, 'Identifier',None, 'Name of the control. This name must be unique within a dialog, but can repeat on different dialogs. ',),
+('Control','Control_Next','Y',None, None, 'Control',2,'Identifier',None, 'The name of an other control on the same dialog. This link defines the tab order of the controls. The links have to form one or more cycles!',),
+('Control','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'External key to the Dialog table, name of the dialog.',),
+('Control','Help','Y',None, None, None, None, 'Text',None, 'The help strings used with the button. The text is optional. ',),
+('Dialog','Attributes','Y',0,2147483647,None, None, None, None, 'A 32-bit word that specifies the attribute flags to be applied to this dialog.',),
+('Dialog','Height','N',0,32767,None, None, None, None, 'Height of the bounding rectangle of the dialog.',),
+('Dialog','Width','N',0,32767,None, None, None, None, 'Width of the bounding rectangle of the dialog.',),
+('Dialog','Dialog','N',None, None, None, None, 'Identifier',None, 'Name of the dialog.',),
+('Dialog','Control_Cancel','Y',None, None, 'Control',2,'Identifier',None, 'Defines the cancel control. Hitting escape or clicking on the close icon on the dialog is equivalent to pushing this button.',),
+('Dialog','Control_Default','Y',None, None, 'Control',2,'Identifier',None, 'Defines the default control. Hitting return is equivalent to pushing this button.',),
+('Dialog','Control_First','N',None, None, 'Control',2,'Identifier',None, 'Defines the control that has the focus when the dialog is created.',),
+('Dialog','HCentering','N',0,100,None, None, None, None, 'Horizontal position of the dialog on a 0-100 scale. 0 means left end, 100 means right end of the screen, 50 center.',),
+('Dialog','Title','Y',None, None, None, None, 'Formatted',None, "A text string specifying the title to be displayed in the title bar of the dialog's window.",),
+('Dialog','VCentering','N',0,100,None, None, None, None, 'Vertical position of the dialog on a 0-100 scale. 0 means top end, 100 means bottom end of the screen, 50 center.',),
+('ControlCondition','Action','N',None, None, None, None, None, 'Default;Disable;Enable;Hide;Show','The desired action to be taken on the specified control.',),
+('ControlCondition','Condition','N',None, None, None, None, 'Condition',None, 'A standard conditional statement that specifies under which conditions the action should be triggered.',),
+('ControlCondition','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'A foreign key to the Dialog table, name of the dialog.',),
+('ControlCondition','Control_','N',None, None, 'Control',2,'Identifier',None, 'A foreign key to the Control table, name of the control.',),
+('ControlEvent','Condition','Y',None, None, None, None, 'Condition',None, 'A standard conditional statement that specifies under which conditions an event should be triggered.',),
+('ControlEvent','Ordering','Y',0,2147483647,None, None, None, None, 'An integer used to order several events tied to the same control. Can be left blank.',),
+('ControlEvent','Argument','N',None, None, None, None, 'Formatted',None, 'A value to be used as a modifier when triggering a particular event.',),
+('ControlEvent','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'A foreign key to the Dialog table, name of the dialog.',),
+('ControlEvent','Control_','N',None, None, 'Control',2,'Identifier',None, 'A foreign key to the Control table, name of the control',),
+('ControlEvent','Event','N',None, None, None, None, 'Formatted',None, 'An identifier that specifies the type of the event that should take place when the user interacts with control specified by the first two entries.',),
+('CreateFolder','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table.',),
+('CreateFolder','Directory_','N',None, None, 'Directory',1,'Identifier',None, 'Primary key, could be foreign key into the Directory table.',),
+('CustomAction','Type','N',1,16383,None, None, None, None, 'The numeric custom action type, consisting of source location, code type, entry, option flags.',),
+('CustomAction','Action','N',None, None, None, None, 'Identifier',None, 'Primary key, name of action, normally appears in sequence table unless private use.',),
+('CustomAction','Source','Y',None, None, None, None, 'CustomSource',None, 'The table reference of the source of the code.',),
+('CustomAction','Target','Y',None, None, None, None, 'Formatted',None, 'Excecution parameter, depends on the type of custom action',),
+('DrLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',),
+('DrLocator','Path','Y',None, None, None, None, 'AnyPath',None, 'The path on the user system. This is a either a subpath below the value of the Parent or a full path. The path may contain properties enclosed within [ ] that will be expanded.',),
+('DrLocator','Depth','Y',0,32767,None, None, None, None, 'The depth below the path to which the Signature_ is recursively searched. If absent, the depth is assumed to be 0.',),
+('DrLocator','Parent','Y',None, None, None, None, 'Identifier',None, 'The parent file signature. It is also a foreign key in the Signature table. If null and the Path column does not expand to a full path, then all the fixed drives of the user system are searched using the Path.',),
+('DuplicateFile','File_','N',None, None, 'File',1,'Identifier',None, 'Foreign key referencing the source file to be duplicated.',),
+('DuplicateFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the duplicate file.',),
+('DuplicateFile','DestFolder','Y',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full pathname to a destination folder.',),
+('DuplicateFile','DestName','Y',None, None, None, None, 'Filename',None, 'Filename to be given to the duplicate file.',),
+('DuplicateFile','FileKey','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular file entry',),
+('Environment','Name','N',None, None, None, None, 'Text',None, 'The name of the environmental value.',),
+('Environment','Value','Y',None, None, None, None, 'Formatted',None, 'The value to set in the environmental settings.',),
+('Environment','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the installing of the environmental value.',),
+('Environment','Environment','N',None, None, None, None, 'Identifier',None, 'Unique identifier for the environmental variable setting',),
+('Error','Error','N',0,32767,None, None, None, None, 'Integer error number, obtained from header file IError(...) macros.',),
+('Error','Message','Y',None, None, None, None, 'Template',None, 'Error formatting template, obtained from user ed. or localizers.',),
+('EventMapping','Dialog_','N',None, None, 'Dialog',1,'Identifier',None, 'A foreign key to the Dialog table, name of the Dialog.',),
+('EventMapping','Control_','N',None, None, 'Control',2,'Identifier',None, 'A foreign key to the Control table, name of the control.',),
+('EventMapping','Event','N',None, None, None, None, 'Identifier',None, 'An identifier that specifies the type of the event that the control subscribes to.',),
+('EventMapping','Attribute','N',None, None, None, None, 'Identifier',None, 'The name of the control attribute, that is set when this event is received.',),
+('Extension','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the CLSID factory to be operational.',),
+('Extension','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',),
+('Extension','Extension','N',None, None, None, None, 'Text',None, 'The extension associated with the table row.',),
+('Extension','MIME_','Y',None, None, 'MIME',1,'Text',None, 'Optional Context identifier, typically "type/format" associated with the extension',),
+('Extension','ProgId_','Y',None, None, 'ProgId',1,'Text',None, 'Optional ProgId associated with this extension.',),
+('MIME','CLSID','Y',None, None, None, None, 'Guid',None, 'Optional associated CLSID.',),
+('MIME','ContentType','N',None, None, None, None, 'Text',None, 'Primary key. Context identifier, typically "type/format".',),
+('MIME','Extension_','N',None, None, 'Extension',1,'Text',None, 'Optional associated extension (without dot)',),
+('FeatureComponents','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Foreign key into Feature table.',),
+('FeatureComponents','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into Component table.',),
+('FileSFPCatalog','File_','N',None, None, 'File',1,'Identifier',None, 'File associated with the catalog',),
+('FileSFPCatalog','SFPCatalog_','N',None, None, 'SFPCatalog',1,'Filename',None, 'Catalog associated with the file',),
+('SFPCatalog','SFPCatalog','N',None, None, None, None, 'Filename',None, 'File name for the catalog.',),
+('SFPCatalog','Catalog','N',None, None, None, None, 'Binary',None, 'SFP Catalog',),
+('SFPCatalog','Dependency','Y',None, None, None, None, 'Formatted',None, 'Parent catalog - only used by SFP',),
+('Font','File_','N',None, None, 'File',1,'Identifier',None, 'Primary key, foreign key into File table referencing font file.',),
+('Font','FontTitle','Y',None, None, None, None, 'Text',None, 'Font name.',),
+('IniFile','Action','N',None, None, None, None, None, '0;1;3','The type of modification to be made, one of iifEnum',),
+('IniFile','Value','N',None, None, None, None, 'Formatted',None, 'The value to be written.',),
+('IniFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the installing of the .INI value.',),
+('IniFile','FileName','N',None, None, None, None, 'Filename',None, 'The .INI file name in which to write the information',),
+('IniFile','IniFile','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',),
+('IniFile','DirProperty','Y',None, None, None, None, 'Identifier',None, 'Foreign key into the Directory table denoting the directory where the .INI file is.',),
+('IniFile','Key','N',None, None, None, None, 'Formatted',None, 'The .INI file key below Section.',),
+('IniFile','Section','N',None, None, None, None, 'Formatted',None, 'The .INI file Section.',),
+('IniLocator','Type','Y',0,2,None, None, None, None, 'An integer value that determines if the .INI value read is a filename or a directory location or to be used as is w/o interpretation.',),
+('IniLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table.',),
+('IniLocator','FileName','N',None, None, None, None, 'Filename',None, 'The .INI file name.',),
+('IniLocator','Key','N',None, None, None, None, 'Text',None, 'Key value (followed by an equals sign in INI file).',),
+('IniLocator','Section','N',None, None, None, None, 'Text',None, 'Section name within in file (within square brackets in INI file).',),
+('IniLocator','Field','Y',0,32767,None, None, None, None, 'The field in the .INI line. If Field is null or 0 the entire line is read.',),
+('InstallExecuteSequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',),
+('InstallExecuteSequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
+('InstallExecuteSequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
+('InstallUISequence','Action','N',None, None, None, None, 'Identifier',None, 'Name of action to invoke, either in the engine or the handler DLL.',),
+('InstallUISequence','Condition','Y',None, None, None, None, 'Condition',None, 'Optional expression which skips the action if evaluates to expFalse.If the expression syntax is invalid, the engine will terminate, returning iesBadActionData.',),
+('InstallUISequence','Sequence','Y',-4,32767,None, None, None, None, 'Number that determines the sort order in which the actions are to be executed.  Leave blank to suppress action.',),
+('IsolatedComponent','Component_Application','N',None, None, 'Component',1,'Identifier',None, 'Key to Component table item for application',),
+('IsolatedComponent','Component_Shared','N',None, None, 'Component',1,'Identifier',None, 'Key to Component table item to be isolated',),
+('LaunchCondition','Description','N',None, None, None, None, 'Formatted',None, 'Localizable text to display when condition fails and install must abort.',),
+('LaunchCondition','Condition','N',None, None, None, None, 'Condition',None, 'Expression which must evaluate to TRUE in order for install to commence.',),
+('ListBox','Text','Y',None, None, None, None, 'Text',None, 'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',),
+('ListBox','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this item. All the items tied to the same property become part of the same listbox.',),
+('ListBox','Value','N',None, None, None, None, 'Formatted',None, 'The value string associated with this item. Selecting the line will set the associated property to this value.',),
+('ListBox','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',),
+('ListView','Text','Y',None, None, None, None, 'Text',None, 'The visible text to be assigned to the item. Optional. If this entry or the entire column is missing, the text is the same as the value.',),
+('ListView','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this item. All the items tied to the same property become part of the same listview.',),
+('ListView','Value','N',None, None, None, None, 'Identifier',None, 'The value string associated with this item. Selecting the line will set the associated property to this value.',),
+('ListView','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',),
+('ListView','Binary_','Y',None, None, 'Binary',1,'Identifier',None, 'The name of the icon to be displayed with the icon. The binary information is looked up from the Binary Table.',),
+('LockPermissions','Table','N',None, None, None, None, 'Identifier','Directory;File;Registry','Reference to another table name',),
+('LockPermissions','Domain','Y',None, None, None, None, 'Formatted',None, 'Domain name for user whose permissions are being set. (usually a property)',),
+('LockPermissions','LockObject','N',None, None, None, None, 'Identifier',None, 'Foreign key into Registry or File table',),
+('LockPermissions','Permission','Y',-2147483647,2147483647,None, None, None, None, 'Permission Access mask.  Full Control = 268435456 (GENERIC_ALL = 0x10000000)',),
+('LockPermissions','User','N',None, None, None, None, 'Formatted',None, 'User for permissions to be set.  (usually a property)',),
+('Media','Source','Y',None, None, None, None, 'Property',None, 'The property defining the location of the cabinet file.',),
+('Media','Cabinet','Y',None, None, None, None, 'Cabinet',None, 'If some or all of the files stored on the media are compressed in a cabinet, the name of that cabinet.',),
+('Media','DiskId','N',1,32767,None, None, None, None, 'Primary key, integer to determine sort order for table.',),
+('Media','DiskPrompt','Y',None, None, None, None, 'Text',None, 'Disk name: the visible text actually printed on the disk.  This will be used to prompt the user when this disk needs to be inserted.',),
+('Media','LastSequence','N',0,32767,None, None, None, None, 'File sequence number for the last file for this media.',),
+('Media','VolumeLabel','Y',None, None, None, None, 'Text',None, 'The label attributed to the volume.',),
+('ModuleComponents','Component','N',None, None, 'Component',1,'Identifier',None, 'Component contained in the module.',),
+('ModuleComponents','Language','N',None, None, 'ModuleSignature',2,None, None, 'Default language ID for module (may be changed by transform).',),
+('ModuleComponents','ModuleID','N',None, None, 'ModuleSignature',1,'Identifier',None, 'Module containing the component.',),
+('ModuleSignature','Language','N',None, None, None, None, None, None, 'Default decimal language of module.',),
+('ModuleSignature','Version','N',None, None, None, None, 'Version',None, 'Version of the module.',),
+('ModuleSignature','ModuleID','N',None, None, None, None, 'Identifier',None, 'Module identifier (String.GUID).',),
+('ModuleDependency','ModuleID','N',None, None, 'ModuleSignature',1,'Identifier',None, 'Module requiring the dependency.',),
+('ModuleDependency','ModuleLanguage','N',None, None, 'ModuleSignature',2,None, None, 'Language of module requiring the dependency.',),
+('ModuleDependency','RequiredID','N',None, None, None, None, None, None, 'String.GUID of required module.',),
+('ModuleDependency','RequiredLanguage','N',None, None, None, None, None, None, 'LanguageID of the required module.',),
+('ModuleDependency','RequiredVersion','Y',None, None, None, None, 'Version',None, 'Version of the required version.',),
+('ModuleExclusion','ModuleID','N',None, None, 'ModuleSignature',1,'Identifier',None, 'String.GUID of module with exclusion requirement.',),
+('ModuleExclusion','ModuleLanguage','N',None, None, 'ModuleSignature',2,None, None, 'LanguageID of module with exclusion requirement.',),
+('ModuleExclusion','ExcludedID','N',None, None, None, None, None, None, 'String.GUID of excluded module.',),
+('ModuleExclusion','ExcludedLanguage','N',None, None, None, None, None, None, 'Language of excluded module.',),
+('ModuleExclusion','ExcludedMaxVersion','Y',None, None, None, None, 'Version',None, 'Maximum version of excluded module.',),
+('ModuleExclusion','ExcludedMinVersion','Y',None, None, None, None, 'Version',None, 'Minimum version of excluded module.',),
+('MoveFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'If this component is not "selected" for installation or removal, no action will be taken on the associated MoveFile entry',),
+('MoveFile','DestFolder','N',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full path to the destination directory',),
+('MoveFile','DestName','Y',None, None, None, None, 'Filename',None, 'Name to be given to the original file after it is moved or copied.  If blank, the destination file will be given the same name as the source file',),
+('MoveFile','FileKey','N',None, None, None, None, 'Identifier',None, 'Primary key that uniquely identifies a particular MoveFile record',),
+('MoveFile','Options','N',0,1,None, None, None, None, 'Integer value specifying the MoveFile operating mode, one of imfoEnum',),
+('MoveFile','SourceFolder','Y',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full path to the source directory',),
+('MoveFile','SourceName','Y',None, None, None, None, 'Text',None, "Name of the source file(s) to be moved or copied.  Can contain the '*' or '?' wildcards.",),
+('MsiAssembly','Attributes','Y',None, None, None, None, None, None, 'Assembly attributes',),
+('MsiAssembly','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Foreign key into Feature table.',),
+('MsiAssembly','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into Component table.',),
+('MsiAssembly','File_Application','Y',None, None, 'File',1,'Identifier',None, 'Foreign key into File table, denoting the application context for private assemblies. Null for global assemblies.',),
+('MsiAssembly','File_Manifest','Y',None, None, 'File',1,'Identifier',None, 'Foreign key into the File table denoting the manifest file for the assembly.',),
+('MsiAssemblyName','Name','N',None, None, None, None, 'Text',None, 'The name part of the name-value pairs for the assembly name.',),
+('MsiAssemblyName','Value','N',None, None, None, None, 'Text',None, 'The value part of the name-value pairs for the assembly name.',),
+('MsiAssemblyName','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into Component table.',),
+('MsiDigitalCertificate','CertData','N',None, None, None, None, 'Binary',None, 'A certificate context blob for a signer certificate',),
+('MsiDigitalCertificate','DigitalCertificate','N',None, None, None, None, 'Identifier',None, 'A unique identifier for the row',),
+('MsiDigitalSignature','Table','N',None, None, None, None, None, 'Media','Reference to another table name (only Media table is supported)',),
+('MsiDigitalSignature','DigitalCertificate_','N',None, None, 'MsiDigitalCertificate',1,'Identifier',None, 'Foreign key to MsiDigitalCertificate table identifying the signer certificate',),
+('MsiDigitalSignature','Hash','Y',None, None, None, None, 'Binary',None, 'The encoded hash blob from the digital signature',),
+('MsiDigitalSignature','SignObject','N',None, None, None, None, 'Text',None, 'Foreign key to Media table',),
+('MsiFileHash','File_','N',None, None, 'File',1,'Identifier',None, 'Primary key, foreign key into File table referencing file with this hash',),
+('MsiFileHash','Options','N',0,32767,None, None, None, None, 'Various options and attributes for this hash.',),
+('MsiFileHash','HashPart1','N',None, None, None, None, None, None, 'Size of file in bytes (long integer).',),
+('MsiFileHash','HashPart2','N',None, None, None, None, None, None, 'Size of file in bytes (long integer).',),
+('MsiFileHash','HashPart3','N',None, None, None, None, None, None, 'Size of file in bytes (long integer).',),
+('MsiFileHash','HashPart4','N',None, None, None, None, None, None, 'Size of file in bytes (long integer).',),
+('MsiPatchHeaders','StreamRef','N',None, None, None, None, 'Identifier',None, 'Primary key. A unique identifier for the row.',),
+('MsiPatchHeaders','Header','N',None, None, None, None, 'Binary',None, 'Binary stream. The patch header, used for patch validation.',),
+('ODBCAttribute','Value','Y',None, None, None, None, 'Text',None, 'Value for ODBC driver attribute',),
+('ODBCAttribute','Attribute','N',None, None, None, None, 'Text',None, 'Name of ODBC driver attribute',),
+('ODBCAttribute','Driver_','N',None, None, 'ODBCDriver',1,'Identifier',None, 'Reference to ODBC driver in ODBCDriver table',),
+('ODBCDriver','Description','N',None, None, None, None, 'Text',None, 'Text used as registered name for driver, non-localized',),
+('ODBCDriver','File_','N',None, None, 'File',1,'Identifier',None, 'Reference to key driver file',),
+('ODBCDriver','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reference to associated component',),
+('ODBCDriver','Driver','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized.internal token for driver',),
+('ODBCDriver','File_Setup','Y',None, None, 'File',1,'Identifier',None, 'Optional reference to key driver setup DLL',),
+('ODBCDataSource','Description','N',None, None, None, None, 'Text',None, 'Text used as registered name for data source',),
+('ODBCDataSource','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reference to associated component',),
+('ODBCDataSource','DataSource','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized.internal token for data source',),
+('ODBCDataSource','DriverDescription','N',None, None, None, None, 'Text',None, 'Reference to driver description, may be existing driver',),
+('ODBCDataSource','Registration','N',0,1,None, None, None, None, 'Registration option: 0=machine, 1=user, others t.b.d.',),
+('ODBCSourceAttribute','Value','Y',None, None, None, None, 'Text',None, 'Value for ODBC data source attribute',),
+('ODBCSourceAttribute','Attribute','N',None, None, None, None, 'Text',None, 'Name of ODBC data source attribute',),
+('ODBCSourceAttribute','DataSource_','N',None, None, 'ODBCDataSource',1,'Identifier',None, 'Reference to ODBC data source in ODBCDataSource table',),
+('ODBCTranslator','Description','N',None, None, None, None, 'Text',None, 'Text used as registered name for translator',),
+('ODBCTranslator','File_','N',None, None, 'File',1,'Identifier',None, 'Reference to key translator file',),
+('ODBCTranslator','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reference to associated component',),
+('ODBCTranslator','File_Setup','Y',None, None, 'File',1,'Identifier',None, 'Optional reference to key translator setup DLL',),
+('ODBCTranslator','Translator','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized.internal token for translator',),
+('Patch','Sequence','N',0,32767,None, None, None, None, 'Primary key, sequence with respect to the media images; order must track cabinet order.',),
+('Patch','Attributes','N',0,32767,None, None, None, None, 'Integer containing bit flags representing patch attributes',),
+('Patch','File_','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token, foreign key to File table, must match identifier in cabinet.',),
+('Patch','Header','Y',None, None, None, None, 'Binary',None, 'Binary stream. The patch header, used for patch validation.',),
+('Patch','PatchSize','N',0,2147483647,None, None, None, None, 'Size of patch in bytes (long integer).',),
+('Patch','StreamRef_','Y',None, None, None, None, 'Identifier',None, 'Identifier. Foreign key to the StreamRef column of the MsiPatchHeaders table.',),
+('PatchPackage','Media_','N',0,32767,None, None, None, None, 'Foreign key to DiskId column of Media table. Indicates the disk containing the patch package.',),
+('PatchPackage','PatchId','N',None, None, None, None, 'Guid',None, 'A unique string GUID representing this patch.',),
+('PublishComponent','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Foreign key into the Feature table.',),
+('PublishComponent','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table.',),
+('PublishComponent','ComponentId','N',None, None, None, None, 'Guid',None, 'A string GUID that represents the component id that will be requested by the alien product.',),
+('PublishComponent','AppData','Y',None, None, None, None, 'Text',None, 'This is localisable Application specific data that can be associated with a Qualified Component.',),
+('PublishComponent','Qualifier','N',None, None, None, None, 'Text',None, 'This is defined only when the ComponentId column is an Qualified Component Id. This is the Qualifier for ProvideComponentIndirect.',),
+('RadioButton','Y','N',0,32767,None, None, None, None, 'The vertical coordinate of the upper left corner of the bounding rectangle of the radio button.',),
+('RadioButton','Text','Y',None, None, None, None, 'Text',None, 'The visible title to be assigned to the radio button.',),
+('RadioButton','Property','N',None, None, None, None, 'Identifier',None, 'A named property to be tied to this radio button. All the buttons tied to the same property become part of the same group.',),
+('RadioButton','Height','N',0,32767,None, None, None, None, 'The height of the button.',),
+('RadioButton','Width','N',0,32767,None, None, None, None, 'The width of the button.',),
+('RadioButton','X','N',0,32767,None, None, None, None, 'The horizontal coordinate of the upper left corner of the bounding rectangle of the radio button.',),
+('RadioButton','Value','N',None, None, None, None, 'Formatted',None, 'The value string associated with this button. Selecting the button will set the associated property to this value.',),
+('RadioButton','Order','N',1,32767,None, None, None, None, 'A positive integer used to determine the ordering of the items within one list..The integers do not have to be consecutive.',),
+('RadioButton','Help','Y',None, None, None, None, 'Text',None, 'The help strings used with the button. The text is optional.',),
+('Registry','Name','Y',None, None, None, None, 'Formatted',None, 'The registry value name.',),
+('Registry','Value','Y',None, None, None, None, 'Formatted',None, 'The registry value.',),
+('Registry','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the installing of the registry value.',),
+('Registry','Key','N',None, None, None, None, 'RegPath',None, 'The key for the registry value.',),
+('Registry','Registry','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',),
+('Registry','Root','N',-1,3,None, None, None, None, 'The predefined root key for the registry value, one of rrkEnum.',),
+('RegLocator','Name','Y',None, None, None, None, 'Formatted',None, 'The registry value name.',),
+('RegLocator','Type','Y',0,18,None, None, None, None, 'An integer value that determines if the registry value is a filename or a directory location or to be used as is w/o interpretation.',),
+('RegLocator','Signature_','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature_ represents a unique file signature and is also the foreign key in the Signature table. If the type is 0, the registry values refers a directory, and _Signature is not a foreign key.',),
+('RegLocator','Key','N',None, None, None, None, 'RegPath',None, 'The key for the registry value.',),
+('RegLocator','Root','N',0,3,None, None, None, None, 'The predefined root key for the registry value, one of rrkEnum.',),
+('RemoveFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key referencing Component that controls the file to be removed.',),
+('RemoveFile','FileKey','N',None, None, None, None, 'Identifier',None, 'Primary key used to identify a particular file entry',),
+('RemoveFile','FileName','Y',None, None, None, None, 'WildCardFilename',None, 'Name of the file to be removed.',),
+('RemoveFile','DirProperty','N',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full pathname to the folder of the file to be removed.',),
+('RemoveFile','InstallMode','N',None, None, None, None, None, '1;2;3','Installation option, one of iimEnum.',),
+('RemoveIniFile','Action','N',None, None, None, None, None, '2;4','The type of modification to be made, one of iifEnum.',),
+('RemoveIniFile','Value','Y',None, None, None, None, 'Formatted',None, 'The value to be deleted. The value is required when Action is iifIniRemoveTag',),
+('RemoveIniFile','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the deletion of the .INI value.',),
+('RemoveIniFile','FileName','N',None, None, None, None, 'Filename',None, 'The .INI file name in which to delete the information',),
+('RemoveIniFile','DirProperty','Y',None, None, None, None, 'Identifier',None, 'Foreign key into the Directory table denoting the directory where the .INI file is.',),
+('RemoveIniFile','Key','N',None, None, None, None, 'Formatted',None, 'The .INI file key below Section.',),
+('RemoveIniFile','Section','N',None, None, None, None, 'Formatted',None, 'The .INI file Section.',),
+('RemoveIniFile','RemoveIniFile','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',),
+('RemoveRegistry','Name','Y',None, None, None, None, 'Formatted',None, 'The registry value name.',),
+('RemoveRegistry','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table referencing component that controls the deletion of the registry value.',),
+('RemoveRegistry','Key','N',None, None, None, None, 'RegPath',None, 'The key for the registry value.',),
+('RemoveRegistry','Root','N',-1,3,None, None, None, None, 'The predefined root key for the registry value, one of rrkEnum',),
+('RemoveRegistry','RemoveRegistry','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',),
+('ReserveCost','Component_','N',None, None, 'Component',1,'Identifier',None, 'Reserve a specified amount of space if this component is to be installed.',),
+('ReserveCost','ReserveFolder','Y',None, None, None, None, 'Identifier',None, 'Name of a property whose value is assumed to resolve to the full path to the destination directory',),
+('ReserveCost','ReserveKey','N',None, None, None, None, 'Identifier',None, 'Primary key that uniquely identifies a particular ReserveCost record',),
+('ReserveCost','ReserveLocal','N',0,2147483647,None, None, None, None, 'Disk space to reserve if linked component is installed locally.',),
+('ReserveCost','ReserveSource','N',0,2147483647,None, None, None, None, 'Disk space to reserve if linked component is installed to run from the source location.',),
+('SelfReg','File_','N',None, None, 'File',1,'Identifier',None, 'Foreign key into the File table denoting the module that needs to be registered.',),
+('SelfReg','Cost','Y',0,32767,None, None, None, None, 'The cost of registering the module.',),
+('ServiceControl','Name','N',None, None, None, None, 'Formatted',None, 'Name of a service. /, \\, comma and space are invalid',),
+('ServiceControl','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table that controls the startup of the service',),
+('ServiceControl','Event','N',0,187,None, None, None, None, 'Bit field:  Install:  0x1 = Start, 0x2 = Stop, 0x8 = Delete, Uninstall: 0x10 = Start, 0x20 = Stop, 0x80 = Delete',),
+('ServiceControl','ServiceControl','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',),
+('ServiceControl','Arguments','Y',None, None, None, None, 'Formatted',None, 'Arguments for the service.  Separate by [~].',),
+('ServiceControl','Wait','Y',0,1,None, None, None, None, 'Boolean for whether to wait for the service to fully start',),
+('ServiceInstall','Name','N',None, None, None, None, 'Formatted',None, 'Internal Name of the Service',),
+('ServiceInstall','Description','Y',None, None, None, None, 'Text',None, 'Description of service.',),
+('ServiceInstall','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table that controls the startup of the service',),
+('ServiceInstall','Arguments','Y',None, None, None, None, 'Formatted',None, 'Arguments to include in every start of the service, passed to WinMain',),
+('ServiceInstall','ServiceInstall','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',),
+('ServiceInstall','Dependencies','Y',None, None, None, None, 'Formatted',None, 'Other services this depends on to start.  Separate by [~], and end with [~][~]',),
+('ServiceInstall','DisplayName','Y',None, None, None, None, 'Formatted',None, 'External Name of the Service',),
+('ServiceInstall','ErrorControl','N',-2147483647,2147483647,None, None, None, None, 'Severity of error if service fails to start',),
+('ServiceInstall','LoadOrderGroup','Y',None, None, None, None, 'Formatted',None, 'LoadOrderGroup',),
+('ServiceInstall','Password','Y',None, None, None, None, 'Formatted',None, 'password to run service with.  (with StartName)',),
+('ServiceInstall','ServiceType','N',-2147483647,2147483647,None, None, None, None, 'Type of the service',),
+('ServiceInstall','StartName','Y',None, None, None, None, 'Formatted',None, 'User or object name to run service as',),
+('ServiceInstall','StartType','N',0,4,None, None, None, None, 'Type of the service',),
+('Shortcut','Name','N',None, None, None, None, 'Filename',None, 'The name of the shortcut to be created.',),
+('Shortcut','Description','Y',None, None, None, None, 'Text',None, 'The description for the shortcut.',),
+('Shortcut','Component_','N',None, None, 'Component',1,'Identifier',None, 'Foreign key into the Component table denoting the component whose selection gates the the shortcut creation/deletion.',),
+('Shortcut','Icon_','Y',None, None, 'Icon',1,'Identifier',None, 'Foreign key into the File table denoting the external icon file for the shortcut.',),
+('Shortcut','IconIndex','Y',-32767,32767,None, None, None, None, 'The icon index for the shortcut.',),
+('Shortcut','Directory_','N',None, None, 'Directory',1,'Identifier',None, 'Foreign key into the Directory table denoting the directory where the shortcut file is created.',),
+('Shortcut','Target','N',None, None, None, None, 'Shortcut',None, 'The shortcut target. This is usually a property that is expanded to a file or a folder that the shortcut points to.',),
+('Shortcut','Arguments','Y',None, None, None, None, 'Formatted',None, 'The command-line arguments for the shortcut.',),
+('Shortcut','Shortcut','N',None, None, None, None, 'Identifier',None, 'Primary key, non-localized token.',),
+('Shortcut','Hotkey','Y',0,32767,None, None, None, None, 'The hotkey for the shortcut. It has the virtual-key code for the key in the low-order byte, and the modifier flags in the high-order byte. ',),
+('Shortcut','ShowCmd','Y',None, None, None, None, None, '1;3;7','The show command for the application window.The following values may be used.',),
+('Shortcut','WkDir','Y',None, None, None, None, 'Identifier',None, 'Name of property defining location of working directory.',),
+('Signature','FileName','N',None, None, None, None, 'Filename',None, 'The name of the file. This may contain a "short name|long name" pair.',),
+('Signature','Signature','N',None, None, None, None, 'Identifier',None, 'The table key. The Signature represents a unique file signature.',),
+('Signature','Languages','Y',None, None, None, None, 'Language',None, 'The languages supported by the file.',),
+('Signature','MaxDate','Y',0,2147483647,None, None, None, None, 'The maximum creation date of the file.',),
+('Signature','MaxSize','Y',0,2147483647,None, None, None, None, 'The maximum size of the file. ',),
+('Signature','MaxVersion','Y',None, None, None, None, 'Text',None, 'The maximum version of the file.',),
+('Signature','MinDate','Y',0,2147483647,None, None, None, None, 'The minimum creation date of the file.',),
+('Signature','MinSize','Y',0,2147483647,None, None, None, None, 'The minimum size of the file.',),
+('Signature','MinVersion','Y',None, None, None, None, 'Text',None, 'The minimum version of the file.',),
+('TextStyle','TextStyle','N',None, None, None, None, 'Identifier',None, 'Name of the style. The primary key of this table. This name is embedded in the texts to indicate a style change.',),
+('TextStyle','Color','Y',0,16777215,None, None, None, None, 'A long integer indicating the color of the string in the RGB format (Red, Green, Blue each 0-255, RGB = R + 256*G + 256^2*B).',),
+('TextStyle','FaceName','N',None, None, None, None, 'Text',None, 'A string indicating the name of the font used. Required. The string must be at most 31 characters long.',),
+('TextStyle','Size','N',0,32767,None, None, None, None, 'The size of the font used. This size is given in our units (1/12 of the system font height). Assuming that the system font is set to 12 point size, this is equivalent to the point size.',),
+('TextStyle','StyleBits','Y',0,15,None, None, None, None, 'A combination of style bits.',),
+('TypeLib','Description','Y',None, None, None, None, 'Text',None, None, ),
+('TypeLib','Feature_','N',None, None, 'Feature',1,'Identifier',None, 'Required foreign key into the Feature Table, specifying the feature to validate or install in order for the type library to be operational.',),
+('TypeLib','Component_','N',None, None, 'Component',1,'Identifier',None, 'Required foreign key into the Component Table, specifying the component for which to return a path when called through LocateComponent.',),
+('TypeLib','Directory_','Y',None, None, 'Directory',1,'Identifier',None, 'Optional. The foreign key into the Directory table denoting the path to the help file for the type library.',),
+('TypeLib','Language','N',0,32767,None, None, None, None, 'The language of the library.',),
+('TypeLib','Version','Y',0,16777215,None, None, None, None, 'The version of the library. The minor version is in the lower 8 bits of the integer. The major version is in the next 16 bits. ',),
+('TypeLib','Cost','Y',0,2147483647,None, None, None, None, 'The cost associated with the registration of the typelib. This column is currently optional.',),
+('TypeLib','LibID','N',None, None, None, None, 'Guid',None, 'The GUID that represents the library.',),
+('UIText','Text','Y',None, None, None, None, 'Text',None, 'The localized version of the string.',),
+('UIText','Key','N',None, None, None, None, 'Identifier',None, 'A unique key that identifies the particular string.',),
+('Upgrade','Attributes','N',0,2147483647,None, None, None, None, 'The attributes of this product set.',),
+('Upgrade','Language','Y',None, None, None, None, 'Language',None, 'A comma-separated list of languages for either products in this set or products not in this set.',),
+('Upgrade','ActionProperty','N',None, None, None, None, 'UpperCase',None, 'The property to set when a product in this set is found.',),
+('Upgrade','Remove','Y',None, None, None, None, 'Formatted',None, 'The list of features to remove when uninstalling a product from this set.  The default is "ALL".',),
+('Upgrade','UpgradeCode','N',None, None, None, None, 'Guid',None, 'The UpgradeCode GUID belonging to the products in this set.',),
+('Upgrade','VersionMax','Y',None, None, None, None, 'Text',None, 'The maximum ProductVersion of the products in this set.  The set may or may not include products with this particular version.',),
+('Upgrade','VersionMin','Y',None, None, None, None, 'Text',None, 'The minimum ProductVersion of the products in this set.  The set may or may not include products with this particular version.',),
+('Verb','Sequence','Y',0,32767,None, None, None, None, 'Order within the verbs for a particular extension. Also used simply to specify the default verb.',),
+('Verb','Argument','Y',None, None, None, None, 'Formatted',None, 'Optional value for the command arguments.',),
+('Verb','Extension_','N',None, None, 'Extension',1,'Text',None, 'The extension associated with the table row.',),
+('Verb','Verb','N',None, None, None, None, 'Text',None, 'The verb for the command.',),
+('Verb','Command','Y',None, None, None, None, 'Formatted',None, 'The command text.',),
 ]

Modified: python/branches/py3k-struni/Lib/msilib/sequence.py
==============================================================================
--- python/branches/py3k-struni/Lib/msilib/sequence.py	(original)
+++ python/branches/py3k-struni/Lib/msilib/sequence.py	Wed May  2 21:09:54 2007
@@ -1,126 +1,126 @@
 AdminExecuteSequence = [
-(u'InstallInitialize', None, 1500),
-(u'InstallFinalize', None, 6600),
-(u'InstallFiles', None, 4000),
-(u'InstallAdminPackage', None, 3900),
-(u'FileCost', None, 900),
-(u'CostInitialize', None, 800),
-(u'CostFinalize', None, 1000),
-(u'InstallValidate', None, 1400),
+('InstallInitialize', None, 1500),
+('InstallFinalize', None, 6600),
+('InstallFiles', None, 4000),
+('InstallAdminPackage', None, 3900),
+('FileCost', None, 900),
+('CostInitialize', None, 800),
+('CostFinalize', None, 1000),
+('InstallValidate', None, 1400),
 ]
 
 AdminUISequence = [
-(u'FileCost', None, 900),
-(u'CostInitialize', None, 800),
-(u'CostFinalize', None, 1000),
-(u'ExecuteAction', None, 1300),
-(u'ExitDialog', None, -1),
-(u'FatalError', None, -3),
-(u'UserExit', None, -2),
+('FileCost', None, 900),
+('CostInitialize', None, 800),
+('CostFinalize', None, 1000),
+('ExecuteAction', None, 1300),
+('ExitDialog', None, -1),
+('FatalError', None, -3),
+('UserExit', None, -2),
 ]
 
 AdvtExecuteSequence = [
-(u'InstallInitialize', None, 1500),
-(u'InstallFinalize', None, 6600),
-(u'CostInitialize', None, 800),
-(u'CostFinalize', None, 1000),
-(u'InstallValidate', None, 1400),
-(u'CreateShortcuts', None, 4500),
-(u'MsiPublishAssemblies', None, 6250),
-(u'PublishComponents', None, 6200),
-(u'PublishFeatures', None, 6300),
-(u'PublishProduct', None, 6400),
-(u'RegisterClassInfo', None, 4600),
-(u'RegisterExtensionInfo', None, 4700),
-(u'RegisterMIMEInfo', None, 4900),
-(u'RegisterProgIdInfo', None, 4800),
+('InstallInitialize', None, 1500),
+('InstallFinalize', None, 6600),
+('CostInitialize', None, 800),
+('CostFinalize', None, 1000),
+('InstallValidate', None, 1400),
+('CreateShortcuts', None, 4500),
+('MsiPublishAssemblies', None, 6250),
+('PublishComponents', None, 6200),
+('PublishFeatures', None, 6300),
+('PublishProduct', None, 6400),
+('RegisterClassInfo', None, 4600),
+('RegisterExtensionInfo', None, 4700),
+('RegisterMIMEInfo', None, 4900),
+('RegisterProgIdInfo', None, 4800),
 ]
 
 InstallExecuteSequence = [
-(u'InstallInitialize', None, 1500),
-(u'InstallFinalize', None, 6600),
-(u'InstallFiles', None, 4000),
-(u'FileCost', None, 900),
-(u'CostInitialize', None, 800),
-(u'CostFinalize', None, 1000),
-(u'InstallValidate', None, 1400),
-(u'CreateShortcuts', None, 4500),
-(u'MsiPublishAssemblies', None, 6250),
-(u'PublishComponents', None, 6200),
-(u'PublishFeatures', None, 6300),
-(u'PublishProduct', None, 6400),
-(u'RegisterClassInfo', None, 4600),
-(u'RegisterExtensionInfo', None, 4700),
-(u'RegisterMIMEInfo', None, 4900),
-(u'RegisterProgIdInfo', None, 4800),
-(u'AllocateRegistrySpace', u'NOT Installed', 1550),
-(u'AppSearch', None, 400),
-(u'BindImage', None, 4300),
-(u'CCPSearch', u'NOT Installed', 500),
-(u'CreateFolders', None, 3700),
-(u'DeleteServices', u'VersionNT', 2000),
-(u'DuplicateFiles', None, 4210),
-(u'FindRelatedProducts', None, 200),
-(u'InstallODBC', None, 5400),
-(u'InstallServices', u'VersionNT', 5800),
-(u'IsolateComponents', None, 950),
-(u'LaunchConditions', None, 100),
-(u'MigrateFeatureStates', None, 1200),
-(u'MoveFiles', None, 3800),
-(u'PatchFiles', None, 4090),
-(u'ProcessComponents', None, 1600),
-(u'RegisterComPlus', None, 5700),
-(u'RegisterFonts', None, 5300),
-(u'RegisterProduct', None, 6100),
-(u'RegisterTypeLibraries', None, 5500),
-(u'RegisterUser', None, 6000),
-(u'RemoveDuplicateFiles', None, 3400),
-(u'RemoveEnvironmentStrings', None, 3300),
-(u'RemoveExistingProducts', None, 6700),
-(u'RemoveFiles', None, 3500),
-(u'RemoveFolders', None, 3600),
-(u'RemoveIniValues', None, 3100),
-(u'RemoveODBC', None, 2400),
-(u'RemoveRegistryValues', None, 2600),
-(u'RemoveShortcuts', None, 3200),
-(u'RMCCPSearch', u'NOT Installed', 600),
-(u'SelfRegModules', None, 5600),
-(u'SelfUnregModules', None, 2200),
-(u'SetODBCFolders', None, 1100),
-(u'StartServices', u'VersionNT', 5900),
-(u'StopServices', u'VersionNT', 1900),
-(u'MsiUnpublishAssemblies', None, 1750),
-(u'UnpublishComponents', None, 1700),
-(u'UnpublishFeatures', None, 1800),
-(u'UnregisterClassInfo', None, 2700),
-(u'UnregisterComPlus', None, 2100),
-(u'UnregisterExtensionInfo', None, 2800),
-(u'UnregisterFonts', None, 2500),
-(u'UnregisterMIMEInfo', None, 3000),
-(u'UnregisterProgIdInfo', None, 2900),
-(u'UnregisterTypeLibraries', None, 2300),
-(u'ValidateProductID', None, 700),
-(u'WriteEnvironmentStrings', None, 5200),
-(u'WriteIniValues', None, 5100),
-(u'WriteRegistryValues', None, 5000),
+('InstallInitialize', None, 1500),
+('InstallFinalize', None, 6600),
+('InstallFiles', None, 4000),
+('FileCost', None, 900),
+('CostInitialize', None, 800),
+('CostFinalize', None, 1000),
+('InstallValidate', None, 1400),
+('CreateShortcuts', None, 4500),
+('MsiPublishAssemblies', None, 6250),
+('PublishComponents', None, 6200),
+('PublishFeatures', None, 6300),
+('PublishProduct', None, 6400),
+('RegisterClassInfo', None, 4600),
+('RegisterExtensionInfo', None, 4700),
+('RegisterMIMEInfo', None, 4900),
+('RegisterProgIdInfo', None, 4800),
+('AllocateRegistrySpace', 'NOT Installed', 1550),
+('AppSearch', None, 400),
+('BindImage', None, 4300),
+('CCPSearch', 'NOT Installed', 500),
+('CreateFolders', None, 3700),
+('DeleteServices', 'VersionNT', 2000),
+('DuplicateFiles', None, 4210),
+('FindRelatedProducts', None, 200),
+('InstallODBC', None, 5400),
+('InstallServices', 'VersionNT', 5800),
+('IsolateComponents', None, 950),
+('LaunchConditions', None, 100),
+('MigrateFeatureStates', None, 1200),
+('MoveFiles', None, 3800),
+('PatchFiles', None, 4090),
+('ProcessComponents', None, 1600),
+('RegisterComPlus', None, 5700),
+('RegisterFonts', None, 5300),
+('RegisterProduct', None, 6100),
+('RegisterTypeLibraries', None, 5500),
+('RegisterUser', None, 6000),
+('RemoveDuplicateFiles', None, 3400),
+('RemoveEnvironmentStrings', None, 3300),
+('RemoveExistingProducts', None, 6700),
+('RemoveFiles', None, 3500),
+('RemoveFolders', None, 3600),
+('RemoveIniValues', None, 3100),
+('RemoveODBC', None, 2400),
+('RemoveRegistryValues', None, 2600),
+('RemoveShortcuts', None, 3200),
+('RMCCPSearch', 'NOT Installed', 600),
+('SelfRegModules', None, 5600),
+('SelfUnregModules', None, 2200),
+('SetODBCFolders', None, 1100),
+('StartServices', 'VersionNT', 5900),
+('StopServices', 'VersionNT', 1900),
+('MsiUnpublishAssemblies', None, 1750),
+('UnpublishComponents', None, 1700),
+('UnpublishFeatures', None, 1800),
+('UnregisterClassInfo', None, 2700),
+('UnregisterComPlus', None, 2100),
+('UnregisterExtensionInfo', None, 2800),
+('UnregisterFonts', None, 2500),
+('UnregisterMIMEInfo', None, 3000),
+('UnregisterProgIdInfo', None, 2900),
+('UnregisterTypeLibraries', None, 2300),
+('ValidateProductID', None, 700),
+('WriteEnvironmentStrings', None, 5200),
+('WriteIniValues', None, 5100),
+('WriteRegistryValues', None, 5000),
 ]
 
 InstallUISequence = [
-(u'FileCost', None, 900),
-(u'CostInitialize', None, 800),
-(u'CostFinalize', None, 1000),
-(u'ExecuteAction', None, 1300),
-(u'ExitDialog', None, -1),
-(u'FatalError', None, -3),
-(u'UserExit', None, -2),
-(u'AppSearch', None, 400),
-(u'CCPSearch', u'NOT Installed', 500),
-(u'FindRelatedProducts', None, 200),
-(u'IsolateComponents', None, 950),
-(u'LaunchConditions', None, 100),
-(u'MigrateFeatureStates', None, 1200),
-(u'RMCCPSearch', u'NOT Installed', 600),
-(u'ValidateProductID', None, 700),
+('FileCost', None, 900),
+('CostInitialize', None, 800),
+('CostFinalize', None, 1000),
+('ExecuteAction', None, 1300),
+('ExitDialog', None, -1),
+('FatalError', None, -3),
+('UserExit', None, -2),
+('AppSearch', None, 400),
+('CCPSearch', 'NOT Installed', 500),
+('FindRelatedProducts', None, 200),
+('IsolateComponents', None, 950),
+('LaunchConditions', None, 100),
+('MigrateFeatureStates', None, 1200),
+('RMCCPSearch', 'NOT Installed', 600),
+('ValidateProductID', None, 700),
 ]
 
 tables=['AdminExecuteSequence', 'AdminUISequence', 'AdvtExecuteSequence', 'InstallExecuteSequence', 'InstallUISequence']

Modified: python/branches/py3k-struni/Lib/msilib/text.py
==============================================================================
--- python/branches/py3k-struni/Lib/msilib/text.py	(original)
+++ python/branches/py3k-struni/Lib/msilib/text.py	Wed May  2 21:09:54 2007
@@ -1,129 +1,129 @@
 import msilib,os;dirname=os.path.dirname(__file__)
 
 ActionText = [
-(u'InstallValidate', u'Validating install', None),
-(u'InstallFiles', u'Copying new files', u'File: [1],  Directory: [9],  Size: [6]'),
-(u'InstallAdminPackage', u'Copying network install files', u'File: [1], Directory: [9], Size: [6]'),
-(u'FileCost', u'Computing space requirements', None),
-(u'CostInitialize', u'Computing space requirements', None),
-(u'CostFinalize', u'Computing space requirements', None),
-(u'CreateShortcuts', u'Creating shortcuts', u'Shortcut: [1]'),
-(u'PublishComponents', u'Publishing Qualified Components', u'Component ID: [1], Qualifier: [2]'),
-(u'PublishFeatures', u'Publishing Product Features', u'Feature: [1]'),
-(u'PublishProduct', u'Publishing product information', None),
-(u'RegisterClassInfo', u'Registering Class servers', u'Class Id: [1]'),
-(u'RegisterExtensionInfo', u'Registering extension servers', u'Extension: [1]'),
-(u'RegisterMIMEInfo', u'Registering MIME info', u'MIME Content Type: [1], Extension: [2]'),
-(u'RegisterProgIdInfo', u'Registering program identifiers', u'ProgId: [1]'),
-(u'AllocateRegistrySpace', u'Allocating registry space', u'Free space: [1]'),
-(u'AppSearch', u'Searching for installed applications', u'Property: [1], Signature: [2]'),
-(u'BindImage', u'Binding executables', u'File: [1]'),
-(u'CCPSearch', u'Searching for qualifying products', None),
-(u'CreateFolders', u'Creating folders', u'Folder: [1]'),
-(u'DeleteServices', u'Deleting services', u'Service: [1]'),
-(u'DuplicateFiles', u'Creating duplicate files', u'File: [1],  Directory: [9],  Size: [6]'),
-(u'FindRelatedProducts', u'Searching for related applications', u'Found application: [1]'),
-(u'InstallODBC', u'Installing ODBC components', None),
-(u'InstallServices', u'Installing new services', u'Service: [2]'),
-(u'LaunchConditions', u'Evaluating launch conditions', None),
-(u'MigrateFeatureStates', u'Migrating feature states from related applications', u'Application: [1]'),
-(u'MoveFiles', u'Moving files', u'File: [1],  Directory: [9],  Size: [6]'),
-(u'PatchFiles', u'Patching files', u'File: [1],  Directory: [2],  Size: [3]'),
-(u'ProcessComponents', u'Updating component registration', None),
-(u'RegisterComPlus', u'Registering COM+ Applications and Components', u'AppId: [1]{{, AppType: [2], Users: [3], RSN: [4]}}'),
-(u'RegisterFonts', u'Registering fonts', u'Font: [1]'),
-(u'RegisterProduct', u'Registering product', u'[1]'),
-(u'RegisterTypeLibraries', u'Registering type libraries', u'LibID: [1]'),
-(u'RegisterUser', u'Registering user', u'[1]'),
-(u'RemoveDuplicateFiles', u'Removing duplicated files', u'File: [1], Directory: [9]'),
-(u'RemoveEnvironmentStrings', u'Updating environment strings', u'Name: [1], Value: [2], Action [3]'),
-(u'RemoveExistingProducts', u'Removing applications', u'Application: [1], Command line: [2]'),
-(u'RemoveFiles', u'Removing files', u'File: [1], Directory: [9]'),
-(u'RemoveFolders', u'Removing folders', u'Folder: [1]'),
-(u'RemoveIniValues', u'Removing INI files entries', u'File: [1],  Section: [2],  Key: [3], Value: [4]'),
-(u'RemoveODBC', u'Removing ODBC components', None),
-(u'RemoveRegistryValues', u'Removing system registry values', u'Key: [1], Name: [2]'),
-(u'RemoveShortcuts', u'Removing shortcuts', u'Shortcut: [1]'),
-(u'RMCCPSearch', u'Searching for qualifying products', None),
-(u'SelfRegModules', u'Registering modules', u'File: [1], Folder: [2]'),
-(u'SelfUnregModules', u'Unregistering modules', u'File: [1], Folder: [2]'),
-(u'SetODBCFolders', u'Initializing ODBC directories', None),
-(u'StartServices', u'Starting services', u'Service: [1]'),
-(u'StopServices', u'Stopping services', u'Service: [1]'),
-(u'UnpublishComponents', u'Unpublishing Qualified Components', u'Component ID: [1], Qualifier: [2]'),
-(u'UnpublishFeatures', u'Unpublishing Product Features', u'Feature: [1]'),
-(u'UnregisterClassInfo', u'Unregister Class servers', u'Class Id: [1]'),
-(u'UnregisterComPlus', u'Unregistering COM+ Applications and Components', u'AppId: [1]{{, AppType: [2]}}'),
-(u'UnregisterExtensionInfo', u'Unregistering extension servers', u'Extension: [1]'),
-(u'UnregisterFonts', u'Unregistering fonts', u'Font: [1]'),
-(u'UnregisterMIMEInfo', u'Unregistering MIME info', u'MIME Content Type: [1], Extension: [2]'),
-(u'UnregisterProgIdInfo', u'Unregistering program identifiers', u'ProgId: [1]'),
-(u'UnregisterTypeLibraries', u'Unregistering type libraries', u'LibID: [1]'),
-(u'WriteEnvironmentStrings', u'Updating environment strings', u'Name: [1], Value: [2], Action [3]'),
-(u'WriteIniValues', u'Writing INI files values', u'File: [1],  Section: [2],  Key: [3], Value: [4]'),
-(u'WriteRegistryValues', u'Writing system registry values', u'Key: [1], Name: [2], Value: [3]'),
-(u'Advertise', u'Advertising application', None),
-(u'GenerateScript', u'Generating script operations for action:', u'[1]'),
-(u'InstallSFPCatalogFile', u'Installing system catalog', u'File: [1],  Dependencies: [2]'),
-(u'MsiPublishAssemblies', u'Publishing assembly information', u'Application Context:[1], Assembly Name:[2]'),
-(u'MsiUnpublishAssemblies', u'Unpublishing assembly information', u'Application Context:[1], Assembly Name:[2]'),
-(u'Rollback', u'Rolling back action:', u'[1]'),
-(u'RollbackCleanup', u'Removing backup files', u'File: [1]'),
-(u'UnmoveFiles', u'Removing moved files', u'File: [1], Directory: [9]'),
-(u'UnpublishProduct', u'Unpublishing product information', None),
+('InstallValidate', 'Validating install', None),
+('InstallFiles', 'Copying new files', 'File: [1],  Directory: [9],  Size: [6]'),
+('InstallAdminPackage', 'Copying network install files', 'File: [1], Directory: [9], Size: [6]'),
+('FileCost', 'Computing space requirements', None),
+('CostInitialize', 'Computing space requirements', None),
+('CostFinalize', 'Computing space requirements', None),
+('CreateShortcuts', 'Creating shortcuts', 'Shortcut: [1]'),
+('PublishComponents', 'Publishing Qualified Components', 'Component ID: [1], Qualifier: [2]'),
+('PublishFeatures', 'Publishing Product Features', 'Feature: [1]'),
+('PublishProduct', 'Publishing product information', None),
+('RegisterClassInfo', 'Registering Class servers', 'Class Id: [1]'),
+('RegisterExtensionInfo', 'Registering extension servers', 'Extension: [1]'),
+('RegisterMIMEInfo', 'Registering MIME info', 'MIME Content Type: [1], Extension: [2]'),
+('RegisterProgIdInfo', 'Registering program identifiers', 'ProgId: [1]'),
+('AllocateRegistrySpace', 'Allocating registry space', 'Free space: [1]'),
+('AppSearch', 'Searching for installed applications', 'Property: [1], Signature: [2]'),
+('BindImage', 'Binding executables', 'File: [1]'),
+('CCPSearch', 'Searching for qualifying products', None),
+('CreateFolders', 'Creating folders', 'Folder: [1]'),
+('DeleteServices', 'Deleting services', 'Service: [1]'),
+('DuplicateFiles', 'Creating duplicate files', 'File: [1],  Directory: [9],  Size: [6]'),
+('FindRelatedProducts', 'Searching for related applications', 'Found application: [1]'),
+('InstallODBC', 'Installing ODBC components', None),
+('InstallServices', 'Installing new services', 'Service: [2]'),
+('LaunchConditions', 'Evaluating launch conditions', None),
+('MigrateFeatureStates', 'Migrating feature states from related applications', 'Application: [1]'),
+('MoveFiles', 'Moving files', 'File: [1],  Directory: [9],  Size: [6]'),
+('PatchFiles', 'Patching files', 'File: [1],  Directory: [2],  Size: [3]'),
+('ProcessComponents', 'Updating component registration', None),
+('RegisterComPlus', 'Registering COM+ Applications and Components', 'AppId: [1]{{, AppType: [2], Users: [3], RSN: [4]}}'),
+('RegisterFonts', 'Registering fonts', 'Font: [1]'),
+('RegisterProduct', 'Registering product', '[1]'),
+('RegisterTypeLibraries', 'Registering type libraries', 'LibID: [1]'),
+('RegisterUser', 'Registering user', '[1]'),
+('RemoveDuplicateFiles', 'Removing duplicated files', 'File: [1], Directory: [9]'),
+('RemoveEnvironmentStrings', 'Updating environment strings', 'Name: [1], Value: [2], Action [3]'),
+('RemoveExistingProducts', 'Removing applications', 'Application: [1], Command line: [2]'),
+('RemoveFiles', 'Removing files', 'File: [1], Directory: [9]'),
+('RemoveFolders', 'Removing folders', 'Folder: [1]'),
+('RemoveIniValues', 'Removing INI files entries', 'File: [1],  Section: [2],  Key: [3], Value: [4]'),
+('RemoveODBC', 'Removing ODBC components', None),
+('RemoveRegistryValues', 'Removing system registry values', 'Key: [1], Name: [2]'),
+('RemoveShortcuts', 'Removing shortcuts', 'Shortcut: [1]'),
+('RMCCPSearch', 'Searching for qualifying products', None),
+('SelfRegModules', 'Registering modules', 'File: [1], Folder: [2]'),
+('SelfUnregModules', 'Unregistering modules', 'File: [1], Folder: [2]'),
+('SetODBCFolders', 'Initializing ODBC directories', None),
+('StartServices', 'Starting services', 'Service: [1]'),
+('StopServices', 'Stopping services', 'Service: [1]'),
+('UnpublishComponents', 'Unpublishing Qualified Components', 'Component ID: [1], Qualifier: [2]'),
+('UnpublishFeatures', 'Unpublishing Product Features', 'Feature: [1]'),
+('UnregisterClassInfo', 'Unregister Class servers', 'Class Id: [1]'),
+('UnregisterComPlus', 'Unregistering COM+ Applications and Components', 'AppId: [1]{{, AppType: [2]}}'),
+('UnregisterExtensionInfo', 'Unregistering extension servers', 'Extension: [1]'),
+('UnregisterFonts', 'Unregistering fonts', 'Font: [1]'),
+('UnregisterMIMEInfo', 'Unregistering MIME info', 'MIME Content Type: [1], Extension: [2]'),
+('UnregisterProgIdInfo', 'Unregistering program identifiers', 'ProgId: [1]'),
+('UnregisterTypeLibraries', 'Unregistering type libraries', 'LibID: [1]'),
+('WriteEnvironmentStrings', 'Updating environment strings', 'Name: [1], Value: [2], Action [3]'),
+('WriteIniValues', 'Writing INI files values', 'File: [1],  Section: [2],  Key: [3], Value: [4]'),
+('WriteRegistryValues', 'Writing system registry values', 'Key: [1], Name: [2], Value: [3]'),
+('Advertise', 'Advertising application', None),
+('GenerateScript', 'Generating script operations for action:', '[1]'),
+('InstallSFPCatalogFile', 'Installing system catalog', 'File: [1],  Dependencies: [2]'),
+('MsiPublishAssemblies', 'Publishing assembly information', 'Application Context:[1], Assembly Name:[2]'),
+('MsiUnpublishAssemblies', 'Unpublishing assembly information', 'Application Context:[1], Assembly Name:[2]'),
+('Rollback', 'Rolling back action:', '[1]'),
+('RollbackCleanup', 'Removing backup files', 'File: [1]'),
+('UnmoveFiles', 'Removing moved files', 'File: [1], Directory: [9]'),
+('UnpublishProduct', 'Unpublishing product information', None),
 ]
 
 UIText = [
-(u'AbsentPath', None),
-(u'bytes', u'bytes'),
-(u'GB', u'GB'),
-(u'KB', u'KB'),
-(u'MB', u'MB'),
-(u'MenuAbsent', u'Entire feature will be unavailable'),
-(u'MenuAdvertise', u'Feature will be installed when required'),
-(u'MenuAllCD', u'Entire feature will be installed to run from CD'),
-(u'MenuAllLocal', u'Entire feature will be installed on local hard drive'),
-(u'MenuAllNetwork', u'Entire feature will be installed to run from network'),
-(u'MenuCD', u'Will be installed to run from CD'),
-(u'MenuLocal', u'Will be installed on local hard drive'),
-(u'MenuNetwork', u'Will be installed to run from network'),
-(u'ScriptInProgress', u'Gathering required information...'),
-(u'SelAbsentAbsent', u'This feature will remain uninstalled'),
-(u'SelAbsentAdvertise', u'This feature will be set to be installed when required'),
-(u'SelAbsentCD', u'This feature will be installed to run from CD'),
-(u'SelAbsentLocal', u'This feature will be installed on the local hard drive'),
-(u'SelAbsentNetwork', u'This feature will be installed to run from the network'),
-(u'SelAdvertiseAbsent', u'This feature will become unavailable'),
-(u'SelAdvertiseAdvertise', u'Will be installed when required'),
-(u'SelAdvertiseCD', u'This feature will be available to run from CD'),
-(u'SelAdvertiseLocal', u'This feature will be installed on your local hard drive'),
-(u'SelAdvertiseNetwork', u'This feature will be available to run from the network'),
-(u'SelCDAbsent', u"This feature will be uninstalled completely, you won't be able to run it from CD"),
-(u'SelCDAdvertise', u'This feature will change from run from CD state to set to be installed when required'),
-(u'SelCDCD', u'This feature will remain to be run from CD'),
-(u'SelCDLocal', u'This feature will change from run from CD state to be installed on the local hard drive'),
-(u'SelChildCostNeg', u'This feature frees up [1] on your hard drive.'),
-(u'SelChildCostPos', u'This feature requires [1] on your hard drive.'),
-(u'SelCostPending', u'Compiling cost for this feature...'),
-(u'SelLocalAbsent', u'This feature will be completely removed'),
-(u'SelLocalAdvertise', u'This feature will be removed from your local hard drive, but will be set to be installed when required'),
-(u'SelLocalCD', u'This feature will be removed from your local hard drive, but will be still available to run from CD'),
-(u'SelLocalLocal', u'This feature will remain on you local hard drive'),
-(u'SelLocalNetwork', u'This feature will be removed from your local hard drive, but will be still available to run from the network'),
-(u'SelNetworkAbsent', u"This feature will be uninstalled completely, you won't be able to run it from the network"),
-(u'SelNetworkAdvertise', u'This feature will change from run from network state to set to be installed when required'),
-(u'SelNetworkLocal', u'This feature will change from run from network state to be installed on the local hard drive'),
-(u'SelNetworkNetwork', u'This feature will remain to be run from the network'),
-(u'SelParentCostNegNeg', u'This feature frees up [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures free up [4] on your hard drive.'),
-(u'SelParentCostNegPos', u'This feature frees up [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures require [4] on your hard drive.'),
-(u'SelParentCostPosNeg', u'This feature requires [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures free up [4] on your hard drive.'),
-(u'SelParentCostPosPos', u'This feature requires [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures require [4] on your hard drive.'),
-(u'TimeRemaining', u'Time remaining: {[1] minutes }{[2] seconds}'),
-(u'VolumeCostAvailable', u'Available'),
-(u'VolumeCostDifference', u'Difference'),
-(u'VolumeCostRequired', u'Required'),
-(u'VolumeCostSize', u'Disk Size'),
-(u'VolumeCostVolume', u'Volume'),
+('AbsentPath', None),
+('bytes', 'bytes'),
+('GB', 'GB'),
+('KB', 'KB'),
+('MB', 'MB'),
+('MenuAbsent', 'Entire feature will be unavailable'),
+('MenuAdvertise', 'Feature will be installed when required'),
+('MenuAllCD', 'Entire feature will be installed to run from CD'),
+('MenuAllLocal', 'Entire feature will be installed on local hard drive'),
+('MenuAllNetwork', 'Entire feature will be installed to run from network'),
+('MenuCD', 'Will be installed to run from CD'),
+('MenuLocal', 'Will be installed on local hard drive'),
+('MenuNetwork', 'Will be installed to run from network'),
+('ScriptInProgress', 'Gathering required information...'),
+('SelAbsentAbsent', 'This feature will remain uninstalled'),
+('SelAbsentAdvertise', 'This feature will be set to be installed when required'),
+('SelAbsentCD', 'This feature will be installed to run from CD'),
+('SelAbsentLocal', 'This feature will be installed on the local hard drive'),
+('SelAbsentNetwork', 'This feature will be installed to run from the network'),
+('SelAdvertiseAbsent', 'This feature will become unavailable'),
+('SelAdvertiseAdvertise', 'Will be installed when required'),
+('SelAdvertiseCD', 'This feature will be available to run from CD'),
+('SelAdvertiseLocal', 'This feature will be installed on your local hard drive'),
+('SelAdvertiseNetwork', 'This feature will be available to run from the network'),
+('SelCDAbsent', "This feature will be uninstalled completely, you won't be able to run it from CD"),
+('SelCDAdvertise', 'This feature will change from run from CD state to set to be installed when required'),
+('SelCDCD', 'This feature will remain to be run from CD'),
+('SelCDLocal', 'This feature will change from run from CD state to be installed on the local hard drive'),
+('SelChildCostNeg', 'This feature frees up [1] on your hard drive.'),
+('SelChildCostPos', 'This feature requires [1] on your hard drive.'),
+('SelCostPending', 'Compiling cost for this feature...'),
+('SelLocalAbsent', 'This feature will be completely removed'),
+('SelLocalAdvertise', 'This feature will be removed from your local hard drive, but will be set to be installed when required'),
+('SelLocalCD', 'This feature will be removed from your local hard drive, but will be still available to run from CD'),
+('SelLocalLocal', 'This feature will remain on you local hard drive'),
+('SelLocalNetwork', 'This feature will be removed from your local hard drive, but will be still available to run from the network'),
+('SelNetworkAbsent', "This feature will be uninstalled completely, you won't be able to run it from the network"),
+('SelNetworkAdvertise', 'This feature will change from run from network state to set to be installed when required'),
+('SelNetworkLocal', 'This feature will change from run from network state to be installed on the local hard drive'),
+('SelNetworkNetwork', 'This feature will remain to be run from the network'),
+('SelParentCostNegNeg', 'This feature frees up [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures free up [4] on your hard drive.'),
+('SelParentCostNegPos', 'This feature frees up [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures require [4] on your hard drive.'),
+('SelParentCostPosNeg', 'This feature requires [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures free up [4] on your hard drive.'),
+('SelParentCostPosPos', 'This feature requires [1] on your hard drive. It has [2] of [3] subfeatures selected. The subfeatures require [4] on your hard drive.'),
+('TimeRemaining', 'Time remaining: {[1] minutes }{[2] seconds}'),
+('VolumeCostAvailable', 'Available'),
+('VolumeCostDifference', 'Difference'),
+('VolumeCostRequired', 'Required'),
+('VolumeCostSize', 'Disk Size'),
+('VolumeCostVolume', 'Volume'),
 ]
 
 tables=['ActionText', 'UIText']

Modified: python/branches/py3k-struni/Lib/pickle.py
==============================================================================
--- python/branches/py3k-struni/Lib/pickle.py	(original)
+++ python/branches/py3k-struni/Lib/pickle.py	Wed May  2 21:09:54 2007
@@ -523,22 +523,22 @@
     if StringType == UnicodeType:
         # This is true for Jython
         def save_string(self, obj, pack=struct.pack):
-            unicode = obj.isunicode()
+            str = obj.isunicode()
 
             if self.bin:
-                if unicode:
+                if str:
                     obj = obj.encode("utf-8")
                 l = len(obj)
-                if l < 256 and not unicode:
+                if l < 256 and not str:
                     self.write(SHORT_BINSTRING + chr(l) + obj)
                 else:
                     s = pack("<i", l)
-                    if unicode:
+                    if str:
                         self.write(BINUNICODE + s + obj)
                     else:
                         self.write(BINSTRING + s + obj)
             else:
-                if unicode:
+                if str:
                     obj = obj.replace("\\", "\\u005c")
                     obj = obj.replace("\n", "\\u000a")
                     obj = obj.encode('raw-unicode-escape')
@@ -956,12 +956,12 @@
     dispatch[BINSTRING] = load_binstring
 
     def load_unicode(self):
-        self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
+        self.append(str(self.readline()[:-1],'raw-unicode-escape'))
     dispatch[UNICODE] = load_unicode
 
     def load_binunicode(self):
         len = mloads('i' + self.read(4))
-        self.append(unicode(self.read(len),'utf-8'))
+        self.append(str(self.read(len),'utf-8'))
     dispatch[BINUNICODE] = load_binunicode
 
     def load_short_binstring(self):

Modified: python/branches/py3k-struni/Lib/pickletools.py
==============================================================================
--- python/branches/py3k-struni/Lib/pickletools.py	(original)
+++ python/branches/py3k-struni/Lib/pickletools.py	Wed May  2 21:09:54 2007
@@ -431,7 +431,7 @@
         raise ValueError("no newline found when trying to read "
                          "unicodestringnl")
     data = data[:-1]    # lose the newline
-    return unicode(data, 'raw-unicode-escape')
+    return str(data, 'raw-unicode-escape')
 
 unicodestringnl = ArgumentDescriptor(
                       name='unicodestringnl',
@@ -467,7 +467,7 @@
         raise ValueError("unicodestring4 byte count < 0: %d" % n)
     data = f.read(n)
     if len(data) == n:
-        return unicode(data, 'utf-8')
+        return str(data, 'utf-8')
     raise ValueError("expected %d bytes in a unicodestring4, but only %d "
                      "remain" % (n, len(data)))
 
@@ -750,7 +750,7 @@
 
 pyunicode = StackObject(
                 name='unicode',
-                obtype=unicode,
+                obtype=str,
                 doc="A Python Unicode string object.")
 
 pynone = StackObject(

Modified: python/branches/py3k-struni/Lib/plat-mac/EasyDialogs.py
==============================================================================
--- python/branches/py3k-struni/Lib/plat-mac/EasyDialogs.py	(original)
+++ python/branches/py3k-struni/Lib/plat-mac/EasyDialogs.py	Wed May  2 21:09:54 2007
@@ -662,7 +662,7 @@
         return tpwanted(rr.selection[0])
     if issubclass(tpwanted, str):
         return tpwanted(rr.selection_fsr[0].as_pathname())
-    if issubclass(tpwanted, unicode):
+    if issubclass(tpwanted, str):
         return tpwanted(rr.selection_fsr[0].as_pathname(), 'utf8')
     raise TypeError, "Unknown value for argument 'wanted': %s" % repr(tpwanted)
 
@@ -713,7 +713,7 @@
         raise TypeError, "Cannot pass wanted=FSRef to AskFileForSave"
     if issubclass(tpwanted, Carbon.File.FSSpec):
         return tpwanted(rr.selection[0])
-    if issubclass(tpwanted, (str, unicode)):
+    if issubclass(tpwanted, (str, str)):
         if sys.platform == 'mac':
             fullpath = rr.selection[0].as_pathname()
         else:
@@ -722,10 +722,10 @@
             pardir_fss = Carbon.File.FSSpec((vrefnum, dirid, ''))
             pardir_fsr = Carbon.File.FSRef(pardir_fss)
             pardir_path = pardir_fsr.FSRefMakePath()  # This is utf-8
-            name_utf8 = unicode(name, 'macroman').encode('utf8')
+            name_utf8 = str(name, 'macroman').encode('utf8')
             fullpath = os.path.join(pardir_path, name_utf8)
-        if issubclass(tpwanted, unicode):
-            return unicode(fullpath, 'utf8')
+        if issubclass(tpwanted, str):
+            return str(fullpath, 'utf8')
         return tpwanted(fullpath)
     raise TypeError, "Unknown value for argument 'wanted': %s" % repr(tpwanted)
 
@@ -775,7 +775,7 @@
         return tpwanted(rr.selection[0])
     if issubclass(tpwanted, str):
         return tpwanted(rr.selection_fsr[0].as_pathname())
-    if issubclass(tpwanted, unicode):
+    if issubclass(tpwanted, str):
         return tpwanted(rr.selection_fsr[0].as_pathname(), 'utf8')
     raise TypeError, "Unknown value for argument 'wanted': %s" % repr(tpwanted)
 

Modified: python/branches/py3k-struni/Lib/plat-mac/FrameWork.py
==============================================================================
--- python/branches/py3k-struni/Lib/plat-mac/FrameWork.py	(original)
+++ python/branches/py3k-struni/Lib/plat-mac/FrameWork.py	Wed May  2 21:09:54 2007
@@ -561,7 +561,7 @@
         self.menu.AppendMenu('x')           # add a dummy string
         self.items.append((label, shortcut, callback, kind))
         item = len(self.items)
-        if isinstance(label, unicode):
+        if isinstance(label, str):
             self.menu.SetMenuItemTextWithCFString(item, label)
         else:
             self.menu.SetMenuItemText(item, label)

Modified: python/branches/py3k-struni/Lib/plat-mac/aepack.py
==============================================================================
--- python/branches/py3k-struni/Lib/plat-mac/aepack.py	(original)
+++ python/branches/py3k-struni/Lib/plat-mac/aepack.py	Wed May  2 21:09:54 2007
@@ -154,7 +154,7 @@
     if t == typeChar:
         return desc.data
     if t == typeUnicodeText:
-        return unicode(desc.data, 'utf16')
+        return str(desc.data, 'utf16')
     # typeColorTable coerced to typeAEList
     # typeComp coerced to extended
     # typeData returned as unknown

Modified: python/branches/py3k-struni/Lib/plat-mac/buildtools.py
==============================================================================
--- python/branches/py3k-struni/Lib/plat-mac/buildtools.py	(original)
+++ python/branches/py3k-struni/Lib/plat-mac/buildtools.py	Wed May  2 21:09:54 2007
@@ -164,7 +164,7 @@
         output = Res.FSOpenResourceFile(destname, RESOURCE_FORK_NAME, WRITE)
     except MacOS.Error:
         destdir, destfile = os.path.split(destname)
-        Res.FSCreateResourceFile(destdir, unicode(destfile), RESOURCE_FORK_NAME)
+        Res.FSCreateResourceFile(destdir, str(destfile), RESOURCE_FORK_NAME)
         output = Res.FSOpenResourceFile(destname, RESOURCE_FORK_NAME, WRITE)
 
     # Copy the resources from the target specific resource template, if any

Modified: python/branches/py3k-struni/Lib/plat-mac/macostools.py
==============================================================================
--- python/branches/py3k-struni/Lib/plat-mac/macostools.py	(original)
+++ python/branches/py3k-struni/Lib/plat-mac/macostools.py	Wed May  2 21:09:54 2007
@@ -42,7 +42,7 @@
     else:
         alias = srcfsr.FSNewAliasMinimal()
 
-    dstfsr, dstfss = Res.FSCreateResourceFile(dstdirfsr, unicode(dstname),
+    dstfsr, dstfss = Res.FSCreateResourceFile(dstdirfsr, str(dstname),
         File.FSGetResourceForkName())
     h = Res.FSOpenResourceFile(dstfsr, File.FSGetResourceForkName(), 3)
     resource = Res.Resource(alias.data)

Modified: python/branches/py3k-struni/Lib/plat-mac/macresource.py
==============================================================================
--- python/branches/py3k-struni/Lib/plat-mac/macresource.py	(original)
+++ python/branches/py3k-struni/Lib/plat-mac/macresource.py	Wed May  2 21:09:54 2007
@@ -82,7 +82,7 @@
             # a data-fork based resource file or a AppleSingle file
             # from the CVS repository.
             try:
-                refno = Res.FSOpenResourceFile(pathname, u'', 1)
+                refno = Res.FSOpenResourceFile(pathname, '', 1)
             except Res.Error as arg:
                 if arg[0] != -199:
                     # -199 is "bad resource map"
@@ -91,7 +91,7 @@
                 return refno
             # Finally try decoding an AppleSingle file
             pathname = _decode(pathname, verbose=verbose)
-            refno = Res.FSOpenResourceFile(pathname, u'', 1)
+            refno = Res.FSOpenResourceFile(pathname, '', 1)
         else:
             raise
     return refno
@@ -109,7 +109,7 @@
             # a data-fork based resource file or a AppleSingle file
             # from the CVS repository.
             try:
-                refno = Res.FSOpenResourceFile(pathname, u'', 1)
+                refno = Res.FSOpenResourceFile(pathname, '', 1)
             except Res.Error as arg:
                 if arg[0] != -199:
                     # -199 is "bad resource map"

Modified: python/branches/py3k-struni/Lib/plat-mac/plistlib.py
==============================================================================
--- python/branches/py3k-struni/Lib/plat-mac/plistlib.py	(original)
+++ python/branches/py3k-struni/Lib/plat-mac/plistlib.py	Wed May  2 21:09:54 2007
@@ -70,7 +70,7 @@
     usually is a dictionary).
     """
     didOpen = 0
-    if isinstance(pathOrFile, (str, unicode)):
+    if isinstance(pathOrFile, (str, str)):
         pathOrFile = open(pathOrFile)
         didOpen = 1
     p = PlistParser()
@@ -85,7 +85,7 @@
     file name or a (writable) file object.
     """
     didOpen = 0
-    if isinstance(pathOrFile, (str, unicode)):
+    if isinstance(pathOrFile, (str, str)):
         pathOrFile = open(pathOrFile, "w")
         didOpen = 1
     writer = PlistWriter(pathOrFile)
@@ -231,7 +231,7 @@
         DumbXMLWriter.__init__(self, file, indentLevel, indent)
 
     def writeValue(self, value):
-        if isinstance(value, (str, unicode)):
+        if isinstance(value, (str, str)):
             self.simpleElement("string", value)
         elif isinstance(value, bool):
             # must switch for bool before int, as bool is a
@@ -270,7 +270,7 @@
         self.beginElement("dict")
         items = sorted(d.items())
         for key, value in items:
-            if not isinstance(key, (str, unicode)):
+            if not isinstance(key, (str, str)):
                 raise TypeError("keys must be strings")
             self.simpleElement("key", key)
             self.writeValue(value)

Modified: python/branches/py3k-struni/Lib/sqlite3/test/dbapi.py
==============================================================================
--- python/branches/py3k-struni/Lib/sqlite3/test/dbapi.py	(original)
+++ python/branches/py3k-struni/Lib/sqlite3/test/dbapi.py	Wed May  2 21:09:54 2007
@@ -612,7 +612,7 @@
     def CheckScriptStringUnicode(self):
         con = sqlite.connect(":memory:")
         cur = con.cursor()
-        cur.executescript(u"""
+        cur.executescript("""
             create table a(i);
             insert into a(i) values (5);
             select i from a;

Modified: python/branches/py3k-struni/Lib/sqlite3/test/factory.py
==============================================================================
--- python/branches/py3k-struni/Lib/sqlite3/test/factory.py	(original)
+++ python/branches/py3k-struni/Lib/sqlite3/test/factory.py	Wed May  2 21:09:54 2007
@@ -139,31 +139,31 @@
         self.con = sqlite.connect(":memory:")
 
     def CheckUnicode(self):
-        austria = unicode("Österreich", "latin1")
+        austria = str("Österreich", "latin1")
         row = self.con.execute("select ?", (austria,)).fetchone()
-        self.failUnless(type(row[0]) == unicode, "type of row[0] must be unicode")
+        self.failUnless(type(row[0]) == str, "type of row[0] must be unicode")
 
     def CheckString(self):
         self.con.text_factory = str
-        austria = unicode("Österreich", "latin1")
+        austria = str("Österreich", "latin1")
         row = self.con.execute("select ?", (austria,)).fetchone()
         self.failUnless(type(row[0]) == str, "type of row[0] must be str")
         self.failUnless(row[0] == austria.encode("utf-8"), "column must equal original data in UTF-8")
 
     def CheckCustom(self):
-        self.con.text_factory = lambda x: unicode(x, "utf-8", "ignore")
-        austria = unicode("Österreich", "latin1")
+        self.con.text_factory = lambda x: str(x, "utf-8", "ignore")
+        austria = str("Österreich", "latin1")
         row = self.con.execute("select ?", (austria.encode("latin1"),)).fetchone()
-        self.failUnless(type(row[0]) == unicode, "type of row[0] must be unicode")
-        self.failUnless(row[0].endswith(u"reich"), "column must contain original data")
+        self.failUnless(type(row[0]) == str, "type of row[0] must be unicode")
+        self.failUnless(row[0].endswith("reich"), "column must contain original data")
 
     def CheckOptimizedUnicode(self):
         self.con.text_factory = sqlite.OptimizedUnicode
-        austria = unicode("Österreich", "latin1")
-        germany = unicode("Deutchland")
+        austria = str("Österreich", "latin1")
+        germany = str("Deutchland")
         a_row = self.con.execute("select ?", (austria,)).fetchone()
         d_row = self.con.execute("select ?", (germany,)).fetchone()
-        self.failUnless(type(a_row[0]) == unicode, "type of non-ASCII row must be unicode")
+        self.failUnless(type(a_row[0]) == str, "type of non-ASCII row must be unicode")
         self.failUnless(type(d_row[0]) == str, "type of ASCII-only row must be str")
 
     def tearDown(self):

Modified: python/branches/py3k-struni/Lib/sqlite3/test/types.py
==============================================================================
--- python/branches/py3k-struni/Lib/sqlite3/test/types.py	(original)
+++ python/branches/py3k-struni/Lib/sqlite3/test/types.py	Wed May  2 21:09:54 2007
@@ -36,10 +36,10 @@
         self.con.close()
 
     def CheckString(self):
-        self.cur.execute("insert into test(s) values (?)", (u"Österreich",))
+        self.cur.execute("insert into test(s) values (?)", ("Österreich",))
         self.cur.execute("select s from test")
         row = self.cur.fetchone()
-        self.failUnlessEqual(row[0], u"Österreich")
+        self.failUnlessEqual(row[0], "Österreich")
 
     def CheckSmallInt(self):
         self.cur.execute("insert into test(i) values (?)", (42,))
@@ -69,9 +69,9 @@
         self.failUnlessEqual(row[0], val)
 
     def CheckUnicodeExecute(self):
-        self.cur.execute(u"select 'Österreich'")
+        self.cur.execute("select 'Österreich'")
         row = self.cur.fetchone()
-        self.failUnlessEqual(row[0], u"Österreich")
+        self.failUnlessEqual(row[0], "Österreich")
 
 class DeclTypesTests(unittest.TestCase):
     class Foo:
@@ -166,7 +166,7 @@
 
     def CheckUnicode(self):
         # default
-        val = u"\xd6sterreich"
+        val = "\xd6sterreich"
         self.cur.execute("insert into test(u) values (?)", (val,))
         self.cur.execute("select u from test")
         row = self.cur.fetchone()

Modified: python/branches/py3k-struni/Lib/sqlite3/test/userfunctions.py
==============================================================================
--- python/branches/py3k-struni/Lib/sqlite3/test/userfunctions.py	(original)
+++ python/branches/py3k-struni/Lib/sqlite3/test/userfunctions.py	Wed May  2 21:09:54 2007
@@ -28,7 +28,7 @@
 def func_returntext():
     return "foo"
 def func_returnunicode():
-    return u"bar"
+    return "bar"
 def func_returnint():
     return 42
 def func_returnfloat():
@@ -41,7 +41,7 @@
     5/0
 
 def func_isstring(v):
-    return type(v) is unicode
+    return type(v) is str
 def func_isint(v):
     return type(v) is int
 def func_isfloat(v):
@@ -100,7 +100,7 @@
         self.val = None
 
     def step(self, whichType, val):
-        theType = {"str": unicode, "int": int, "float": float, "None": type(None), "blob": buffer}
+        theType = {"str": str, "int": int, "float": float, "None": type(None), "blob": buffer}
         self.val = int(theType[whichType] is type(val))
 
     def finalize(self):
@@ -160,15 +160,15 @@
         cur = self.con.cursor()
         cur.execute("select returntext()")
         val = cur.fetchone()[0]
-        self.failUnlessEqual(type(val), unicode)
+        self.failUnlessEqual(type(val), str)
         self.failUnlessEqual(val, "foo")
 
     def CheckFuncReturnUnicode(self):
         cur = self.con.cursor()
         cur.execute("select returnunicode()")
         val = cur.fetchone()[0]
-        self.failUnlessEqual(type(val), unicode)
-        self.failUnlessEqual(val, u"bar")
+        self.failUnlessEqual(type(val), str)
+        self.failUnlessEqual(val, "bar")
 
     def CheckFuncReturnInt(self):
         cur = self.con.cursor()

Modified: python/branches/py3k-struni/Lib/stringprep.py
==============================================================================
--- python/branches/py3k-struni/Lib/stringprep.py	(original)
+++ python/branches/py3k-struni/Lib/stringprep.py	Wed May  2 21:09:54 2007
@@ -22,169 +22,169 @@
 
 
 b3_exceptions = {
-0xb5:u'\u03bc', 0xdf:u'ss', 0x130:u'i\u0307', 0x149:u'\u02bcn',
-0x17f:u's', 0x1f0:u'j\u030c', 0x345:u'\u03b9', 0x37a:u' \u03b9',
-0x390:u'\u03b9\u0308\u0301', 0x3b0:u'\u03c5\u0308\u0301', 0x3c2:u'\u03c3', 0x3d0:u'\u03b2',
-0x3d1:u'\u03b8', 0x3d2:u'\u03c5', 0x3d3:u'\u03cd', 0x3d4:u'\u03cb',
-0x3d5:u'\u03c6', 0x3d6:u'\u03c0', 0x3f0:u'\u03ba', 0x3f1:u'\u03c1',
-0x3f2:u'\u03c3', 0x3f5:u'\u03b5', 0x587:u'\u0565\u0582', 0x1e96:u'h\u0331',
-0x1e97:u't\u0308', 0x1e98:u'w\u030a', 0x1e99:u'y\u030a', 0x1e9a:u'a\u02be',
-0x1e9b:u'\u1e61', 0x1f50:u'\u03c5\u0313', 0x1f52:u'\u03c5\u0313\u0300', 0x1f54:u'\u03c5\u0313\u0301',
-0x1f56:u'\u03c5\u0313\u0342', 0x1f80:u'\u1f00\u03b9', 0x1f81:u'\u1f01\u03b9', 0x1f82:u'\u1f02\u03b9',
-0x1f83:u'\u1f03\u03b9', 0x1f84:u'\u1f04\u03b9', 0x1f85:u'\u1f05\u03b9', 0x1f86:u'\u1f06\u03b9',
-0x1f87:u'\u1f07\u03b9', 0x1f88:u'\u1f00\u03b9', 0x1f89:u'\u1f01\u03b9', 0x1f8a:u'\u1f02\u03b9',
-0x1f8b:u'\u1f03\u03b9', 0x1f8c:u'\u1f04\u03b9', 0x1f8d:u'\u1f05\u03b9', 0x1f8e:u'\u1f06\u03b9',
-0x1f8f:u'\u1f07\u03b9', 0x1f90:u'\u1f20\u03b9', 0x1f91:u'\u1f21\u03b9', 0x1f92:u'\u1f22\u03b9',
-0x1f93:u'\u1f23\u03b9', 0x1f94:u'\u1f24\u03b9', 0x1f95:u'\u1f25\u03b9', 0x1f96:u'\u1f26\u03b9',
-0x1f97:u'\u1f27\u03b9', 0x1f98:u'\u1f20\u03b9', 0x1f99:u'\u1f21\u03b9', 0x1f9a:u'\u1f22\u03b9',
-0x1f9b:u'\u1f23\u03b9', 0x1f9c:u'\u1f24\u03b9', 0x1f9d:u'\u1f25\u03b9', 0x1f9e:u'\u1f26\u03b9',
-0x1f9f:u'\u1f27\u03b9', 0x1fa0:u'\u1f60\u03b9', 0x1fa1:u'\u1f61\u03b9', 0x1fa2:u'\u1f62\u03b9',
-0x1fa3:u'\u1f63\u03b9', 0x1fa4:u'\u1f64\u03b9', 0x1fa5:u'\u1f65\u03b9', 0x1fa6:u'\u1f66\u03b9',
-0x1fa7:u'\u1f67\u03b9', 0x1fa8:u'\u1f60\u03b9', 0x1fa9:u'\u1f61\u03b9', 0x1faa:u'\u1f62\u03b9',
-0x1fab:u'\u1f63\u03b9', 0x1fac:u'\u1f64\u03b9', 0x1fad:u'\u1f65\u03b9', 0x1fae:u'\u1f66\u03b9',
-0x1faf:u'\u1f67\u03b9', 0x1fb2:u'\u1f70\u03b9', 0x1fb3:u'\u03b1\u03b9', 0x1fb4:u'\u03ac\u03b9',
-0x1fb6:u'\u03b1\u0342', 0x1fb7:u'\u03b1\u0342\u03b9', 0x1fbc:u'\u03b1\u03b9', 0x1fbe:u'\u03b9',
-0x1fc2:u'\u1f74\u03b9', 0x1fc3:u'\u03b7\u03b9', 0x1fc4:u'\u03ae\u03b9', 0x1fc6:u'\u03b7\u0342',
-0x1fc7:u'\u03b7\u0342\u03b9', 0x1fcc:u'\u03b7\u03b9', 0x1fd2:u'\u03b9\u0308\u0300', 0x1fd3:u'\u03b9\u0308\u0301',
-0x1fd6:u'\u03b9\u0342', 0x1fd7:u'\u03b9\u0308\u0342', 0x1fe2:u'\u03c5\u0308\u0300', 0x1fe3:u'\u03c5\u0308\u0301',
-0x1fe4:u'\u03c1\u0313', 0x1fe6:u'\u03c5\u0342', 0x1fe7:u'\u03c5\u0308\u0342', 0x1ff2:u'\u1f7c\u03b9',
-0x1ff3:u'\u03c9\u03b9', 0x1ff4:u'\u03ce\u03b9', 0x1ff6:u'\u03c9\u0342', 0x1ff7:u'\u03c9\u0342\u03b9',
-0x1ffc:u'\u03c9\u03b9', 0x20a8:u'rs', 0x2102:u'c', 0x2103:u'\xb0c',
-0x2107:u'\u025b', 0x2109:u'\xb0f', 0x210b:u'h', 0x210c:u'h',
-0x210d:u'h', 0x2110:u'i', 0x2111:u'i', 0x2112:u'l',
-0x2115:u'n', 0x2116:u'no', 0x2119:u'p', 0x211a:u'q',
-0x211b:u'r', 0x211c:u'r', 0x211d:u'r', 0x2120:u'sm',
-0x2121:u'tel', 0x2122:u'tm', 0x2124:u'z', 0x2128:u'z',
-0x212c:u'b', 0x212d:u'c', 0x2130:u'e', 0x2131:u'f',
-0x2133:u'm', 0x213e:u'\u03b3', 0x213f:u'\u03c0', 0x2145:u'd',
-0x3371:u'hpa', 0x3373:u'au', 0x3375:u'ov', 0x3380:u'pa',
-0x3381:u'na', 0x3382:u'\u03bca', 0x3383:u'ma', 0x3384:u'ka',
-0x3385:u'kb', 0x3386:u'mb', 0x3387:u'gb', 0x338a:u'pf',
-0x338b:u'nf', 0x338c:u'\u03bcf', 0x3390:u'hz', 0x3391:u'khz',
-0x3392:u'mhz', 0x3393:u'ghz', 0x3394:u'thz', 0x33a9:u'pa',
-0x33aa:u'kpa', 0x33ab:u'mpa', 0x33ac:u'gpa', 0x33b4:u'pv',
-0x33b5:u'nv', 0x33b6:u'\u03bcv', 0x33b7:u'mv', 0x33b8:u'kv',
-0x33b9:u'mv', 0x33ba:u'pw', 0x33bb:u'nw', 0x33bc:u'\u03bcw',
-0x33bd:u'mw', 0x33be:u'kw', 0x33bf:u'mw', 0x33c0:u'k\u03c9',
-0x33c1:u'm\u03c9', 0x33c3:u'bq', 0x33c6:u'c\u2215kg', 0x33c7:u'co.',
-0x33c8:u'db', 0x33c9:u'gy', 0x33cb:u'hp', 0x33cd:u'kk',
-0x33ce:u'km', 0x33d7:u'ph', 0x33d9:u'ppm', 0x33da:u'pr',
-0x33dc:u'sv', 0x33dd:u'wb', 0xfb00:u'ff', 0xfb01:u'fi',
-0xfb02:u'fl', 0xfb03:u'ffi', 0xfb04:u'ffl', 0xfb05:u'st',
-0xfb06:u'st', 0xfb13:u'\u0574\u0576', 0xfb14:u'\u0574\u0565', 0xfb15:u'\u0574\u056b',
-0xfb16:u'\u057e\u0576', 0xfb17:u'\u0574\u056d', 0x1d400:u'a', 0x1d401:u'b',
-0x1d402:u'c', 0x1d403:u'd', 0x1d404:u'e', 0x1d405:u'f',
-0x1d406:u'g', 0x1d407:u'h', 0x1d408:u'i', 0x1d409:u'j',
-0x1d40a:u'k', 0x1d40b:u'l', 0x1d40c:u'm', 0x1d40d:u'n',
-0x1d40e:u'o', 0x1d40f:u'p', 0x1d410:u'q', 0x1d411:u'r',
-0x1d412:u's', 0x1d413:u't', 0x1d414:u'u', 0x1d415:u'v',
-0x1d416:u'w', 0x1d417:u'x', 0x1d418:u'y', 0x1d419:u'z',
-0x1d434:u'a', 0x1d435:u'b', 0x1d436:u'c', 0x1d437:u'd',
-0x1d438:u'e', 0x1d439:u'f', 0x1d43a:u'g', 0x1d43b:u'h',
-0x1d43c:u'i', 0x1d43d:u'j', 0x1d43e:u'k', 0x1d43f:u'l',
-0x1d440:u'm', 0x1d441:u'n', 0x1d442:u'o', 0x1d443:u'p',
-0x1d444:u'q', 0x1d445:u'r', 0x1d446:u's', 0x1d447:u't',
-0x1d448:u'u', 0x1d449:u'v', 0x1d44a:u'w', 0x1d44b:u'x',
-0x1d44c:u'y', 0x1d44d:u'z', 0x1d468:u'a', 0x1d469:u'b',
-0x1d46a:u'c', 0x1d46b:u'd', 0x1d46c:u'e', 0x1d46d:u'f',
-0x1d46e:u'g', 0x1d46f:u'h', 0x1d470:u'i', 0x1d471:u'j',
-0x1d472:u'k', 0x1d473:u'l', 0x1d474:u'm', 0x1d475:u'n',
-0x1d476:u'o', 0x1d477:u'p', 0x1d478:u'q', 0x1d479:u'r',
-0x1d47a:u's', 0x1d47b:u't', 0x1d47c:u'u', 0x1d47d:u'v',
-0x1d47e:u'w', 0x1d47f:u'x', 0x1d480:u'y', 0x1d481:u'z',
-0x1d49c:u'a', 0x1d49e:u'c', 0x1d49f:u'd', 0x1d4a2:u'g',
-0x1d4a5:u'j', 0x1d4a6:u'k', 0x1d4a9:u'n', 0x1d4aa:u'o',
-0x1d4ab:u'p', 0x1d4ac:u'q', 0x1d4ae:u's', 0x1d4af:u't',
-0x1d4b0:u'u', 0x1d4b1:u'v', 0x1d4b2:u'w', 0x1d4b3:u'x',
-0x1d4b4:u'y', 0x1d4b5:u'z', 0x1d4d0:u'a', 0x1d4d1:u'b',
-0x1d4d2:u'c', 0x1d4d3:u'd', 0x1d4d4:u'e', 0x1d4d5:u'f',
-0x1d4d6:u'g', 0x1d4d7:u'h', 0x1d4d8:u'i', 0x1d4d9:u'j',
-0x1d4da:u'k', 0x1d4db:u'l', 0x1d4dc:u'm', 0x1d4dd:u'n',
-0x1d4de:u'o', 0x1d4df:u'p', 0x1d4e0:u'q', 0x1d4e1:u'r',
-0x1d4e2:u's', 0x1d4e3:u't', 0x1d4e4:u'u', 0x1d4e5:u'v',
-0x1d4e6:u'w', 0x1d4e7:u'x', 0x1d4e8:u'y', 0x1d4e9:u'z',
-0x1d504:u'a', 0x1d505:u'b', 0x1d507:u'd', 0x1d508:u'e',
-0x1d509:u'f', 0x1d50a:u'g', 0x1d50d:u'j', 0x1d50e:u'k',
-0x1d50f:u'l', 0x1d510:u'm', 0x1d511:u'n', 0x1d512:u'o',
-0x1d513:u'p', 0x1d514:u'q', 0x1d516:u's', 0x1d517:u't',
-0x1d518:u'u', 0x1d519:u'v', 0x1d51a:u'w', 0x1d51b:u'x',
-0x1d51c:u'y', 0x1d538:u'a', 0x1d539:u'b', 0x1d53b:u'd',
-0x1d53c:u'e', 0x1d53d:u'f', 0x1d53e:u'g', 0x1d540:u'i',
-0x1d541:u'j', 0x1d542:u'k', 0x1d543:u'l', 0x1d544:u'm',
-0x1d546:u'o', 0x1d54a:u's', 0x1d54b:u't', 0x1d54c:u'u',
-0x1d54d:u'v', 0x1d54e:u'w', 0x1d54f:u'x', 0x1d550:u'y',
-0x1d56c:u'a', 0x1d56d:u'b', 0x1d56e:u'c', 0x1d56f:u'd',
-0x1d570:u'e', 0x1d571:u'f', 0x1d572:u'g', 0x1d573:u'h',
-0x1d574:u'i', 0x1d575:u'j', 0x1d576:u'k', 0x1d577:u'l',
-0x1d578:u'm', 0x1d579:u'n', 0x1d57a:u'o', 0x1d57b:u'p',
-0x1d57c:u'q', 0x1d57d:u'r', 0x1d57e:u's', 0x1d57f:u't',
-0x1d580:u'u', 0x1d581:u'v', 0x1d582:u'w', 0x1d583:u'x',
-0x1d584:u'y', 0x1d585:u'z', 0x1d5a0:u'a', 0x1d5a1:u'b',
-0x1d5a2:u'c', 0x1d5a3:u'd', 0x1d5a4:u'e', 0x1d5a5:u'f',
-0x1d5a6:u'g', 0x1d5a7:u'h', 0x1d5a8:u'i', 0x1d5a9:u'j',
-0x1d5aa:u'k', 0x1d5ab:u'l', 0x1d5ac:u'm', 0x1d5ad:u'n',
-0x1d5ae:u'o', 0x1d5af:u'p', 0x1d5b0:u'q', 0x1d5b1:u'r',
-0x1d5b2:u's', 0x1d5b3:u't', 0x1d5b4:u'u', 0x1d5b5:u'v',
-0x1d5b6:u'w', 0x1d5b7:u'x', 0x1d5b8:u'y', 0x1d5b9:u'z',
-0x1d5d4:u'a', 0x1d5d5:u'b', 0x1d5d6:u'c', 0x1d5d7:u'd',
-0x1d5d8:u'e', 0x1d5d9:u'f', 0x1d5da:u'g', 0x1d5db:u'h',
-0x1d5dc:u'i', 0x1d5dd:u'j', 0x1d5de:u'k', 0x1d5df:u'l',
-0x1d5e0:u'm', 0x1d5e1:u'n', 0x1d5e2:u'o', 0x1d5e3:u'p',
-0x1d5e4:u'q', 0x1d5e5:u'r', 0x1d5e6:u's', 0x1d5e7:u't',
-0x1d5e8:u'u', 0x1d5e9:u'v', 0x1d5ea:u'w', 0x1d5eb:u'x',
-0x1d5ec:u'y', 0x1d5ed:u'z', 0x1d608:u'a', 0x1d609:u'b',
-0x1d60a:u'c', 0x1d60b:u'd', 0x1d60c:u'e', 0x1d60d:u'f',
-0x1d60e:u'g', 0x1d60f:u'h', 0x1d610:u'i', 0x1d611:u'j',
-0x1d612:u'k', 0x1d613:u'l', 0x1d614:u'm', 0x1d615:u'n',
-0x1d616:u'o', 0x1d617:u'p', 0x1d618:u'q', 0x1d619:u'r',
-0x1d61a:u's', 0x1d61b:u't', 0x1d61c:u'u', 0x1d61d:u'v',
-0x1d61e:u'w', 0x1d61f:u'x', 0x1d620:u'y', 0x1d621:u'z',
-0x1d63c:u'a', 0x1d63d:u'b', 0x1d63e:u'c', 0x1d63f:u'd',
-0x1d640:u'e', 0x1d641:u'f', 0x1d642:u'g', 0x1d643:u'h',
-0x1d644:u'i', 0x1d645:u'j', 0x1d646:u'k', 0x1d647:u'l',
-0x1d648:u'm', 0x1d649:u'n', 0x1d64a:u'o', 0x1d64b:u'p',
-0x1d64c:u'q', 0x1d64d:u'r', 0x1d64e:u's', 0x1d64f:u't',
-0x1d650:u'u', 0x1d651:u'v', 0x1d652:u'w', 0x1d653:u'x',
-0x1d654:u'y', 0x1d655:u'z', 0x1d670:u'a', 0x1d671:u'b',
-0x1d672:u'c', 0x1d673:u'd', 0x1d674:u'e', 0x1d675:u'f',
-0x1d676:u'g', 0x1d677:u'h', 0x1d678:u'i', 0x1d679:u'j',
-0x1d67a:u'k', 0x1d67b:u'l', 0x1d67c:u'm', 0x1d67d:u'n',
-0x1d67e:u'o', 0x1d67f:u'p', 0x1d680:u'q', 0x1d681:u'r',
-0x1d682:u's', 0x1d683:u't', 0x1d684:u'u', 0x1d685:u'v',
-0x1d686:u'w', 0x1d687:u'x', 0x1d688:u'y', 0x1d689:u'z',
-0x1d6a8:u'\u03b1', 0x1d6a9:u'\u03b2', 0x1d6aa:u'\u03b3', 0x1d6ab:u'\u03b4',
-0x1d6ac:u'\u03b5', 0x1d6ad:u'\u03b6', 0x1d6ae:u'\u03b7', 0x1d6af:u'\u03b8',
-0x1d6b0:u'\u03b9', 0x1d6b1:u'\u03ba', 0x1d6b2:u'\u03bb', 0x1d6b3:u'\u03bc',
-0x1d6b4:u'\u03bd', 0x1d6b5:u'\u03be', 0x1d6b6:u'\u03bf', 0x1d6b7:u'\u03c0',
-0x1d6b8:u'\u03c1', 0x1d6b9:u'\u03b8', 0x1d6ba:u'\u03c3', 0x1d6bb:u'\u03c4',
-0x1d6bc:u'\u03c5', 0x1d6bd:u'\u03c6', 0x1d6be:u'\u03c7', 0x1d6bf:u'\u03c8',
-0x1d6c0:u'\u03c9', 0x1d6d3:u'\u03c3', 0x1d6e2:u'\u03b1', 0x1d6e3:u'\u03b2',
-0x1d6e4:u'\u03b3', 0x1d6e5:u'\u03b4', 0x1d6e6:u'\u03b5', 0x1d6e7:u'\u03b6',
-0x1d6e8:u'\u03b7', 0x1d6e9:u'\u03b8', 0x1d6ea:u'\u03b9', 0x1d6eb:u'\u03ba',
-0x1d6ec:u'\u03bb', 0x1d6ed:u'\u03bc', 0x1d6ee:u'\u03bd', 0x1d6ef:u'\u03be',
-0x1d6f0:u'\u03bf', 0x1d6f1:u'\u03c0', 0x1d6f2:u'\u03c1', 0x1d6f3:u'\u03b8',
-0x1d6f4:u'\u03c3', 0x1d6f5:u'\u03c4', 0x1d6f6:u'\u03c5', 0x1d6f7:u'\u03c6',
-0x1d6f8:u'\u03c7', 0x1d6f9:u'\u03c8', 0x1d6fa:u'\u03c9', 0x1d70d:u'\u03c3',
-0x1d71c:u'\u03b1', 0x1d71d:u'\u03b2', 0x1d71e:u'\u03b3', 0x1d71f:u'\u03b4',
-0x1d720:u'\u03b5', 0x1d721:u'\u03b6', 0x1d722:u'\u03b7', 0x1d723:u'\u03b8',
-0x1d724:u'\u03b9', 0x1d725:u'\u03ba', 0x1d726:u'\u03bb', 0x1d727:u'\u03bc',
-0x1d728:u'\u03bd', 0x1d729:u'\u03be', 0x1d72a:u'\u03bf', 0x1d72b:u'\u03c0',
-0x1d72c:u'\u03c1', 0x1d72d:u'\u03b8', 0x1d72e:u'\u03c3', 0x1d72f:u'\u03c4',
-0x1d730:u'\u03c5', 0x1d731:u'\u03c6', 0x1d732:u'\u03c7', 0x1d733:u'\u03c8',
-0x1d734:u'\u03c9', 0x1d747:u'\u03c3', 0x1d756:u'\u03b1', 0x1d757:u'\u03b2',
-0x1d758:u'\u03b3', 0x1d759:u'\u03b4', 0x1d75a:u'\u03b5', 0x1d75b:u'\u03b6',
-0x1d75c:u'\u03b7', 0x1d75d:u'\u03b8', 0x1d75e:u'\u03b9', 0x1d75f:u'\u03ba',
-0x1d760:u'\u03bb', 0x1d761:u'\u03bc', 0x1d762:u'\u03bd', 0x1d763:u'\u03be',
-0x1d764:u'\u03bf', 0x1d765:u'\u03c0', 0x1d766:u'\u03c1', 0x1d767:u'\u03b8',
-0x1d768:u'\u03c3', 0x1d769:u'\u03c4', 0x1d76a:u'\u03c5', 0x1d76b:u'\u03c6',
-0x1d76c:u'\u03c7', 0x1d76d:u'\u03c8', 0x1d76e:u'\u03c9', 0x1d781:u'\u03c3',
-0x1d790:u'\u03b1', 0x1d791:u'\u03b2', 0x1d792:u'\u03b3', 0x1d793:u'\u03b4',
-0x1d794:u'\u03b5', 0x1d795:u'\u03b6', 0x1d796:u'\u03b7', 0x1d797:u'\u03b8',
-0x1d798:u'\u03b9', 0x1d799:u'\u03ba', 0x1d79a:u'\u03bb', 0x1d79b:u'\u03bc',
-0x1d79c:u'\u03bd', 0x1d79d:u'\u03be', 0x1d79e:u'\u03bf', 0x1d79f:u'\u03c0',
-0x1d7a0:u'\u03c1', 0x1d7a1:u'\u03b8', 0x1d7a2:u'\u03c3', 0x1d7a3:u'\u03c4',
-0x1d7a4:u'\u03c5', 0x1d7a5:u'\u03c6', 0x1d7a6:u'\u03c7', 0x1d7a7:u'\u03c8',
-0x1d7a8:u'\u03c9', 0x1d7bb:u'\u03c3', }
+0xb5:'\u03bc', 0xdf:'ss', 0x130:'i\u0307', 0x149:'\u02bcn',
+0x17f:'s', 0x1f0:'j\u030c', 0x345:'\u03b9', 0x37a:' \u03b9',
+0x390:'\u03b9\u0308\u0301', 0x3b0:'\u03c5\u0308\u0301', 0x3c2:'\u03c3', 0x3d0:'\u03b2',
+0x3d1:'\u03b8', 0x3d2:'\u03c5', 0x3d3:'\u03cd', 0x3d4:'\u03cb',
+0x3d5:'\u03c6', 0x3d6:'\u03c0', 0x3f0:'\u03ba', 0x3f1:'\u03c1',
+0x3f2:'\u03c3', 0x3f5:'\u03b5', 0x587:'\u0565\u0582', 0x1e96:'h\u0331',
+0x1e97:'t\u0308', 0x1e98:'w\u030a', 0x1e99:'y\u030a', 0x1e9a:'a\u02be',
+0x1e9b:'\u1e61', 0x1f50:'\u03c5\u0313', 0x1f52:'\u03c5\u0313\u0300', 0x1f54:'\u03c5\u0313\u0301',
+0x1f56:'\u03c5\u0313\u0342', 0x1f80:'\u1f00\u03b9', 0x1f81:'\u1f01\u03b9', 0x1f82:'\u1f02\u03b9',
+0x1f83:'\u1f03\u03b9', 0x1f84:'\u1f04\u03b9', 0x1f85:'\u1f05\u03b9', 0x1f86:'\u1f06\u03b9',
+0x1f87:'\u1f07\u03b9', 0x1f88:'\u1f00\u03b9', 0x1f89:'\u1f01\u03b9', 0x1f8a:'\u1f02\u03b9',
+0x1f8b:'\u1f03\u03b9', 0x1f8c:'\u1f04\u03b9', 0x1f8d:'\u1f05\u03b9', 0x1f8e:'\u1f06\u03b9',
+0x1f8f:'\u1f07\u03b9', 0x1f90:'\u1f20\u03b9', 0x1f91:'\u1f21\u03b9', 0x1f92:'\u1f22\u03b9',
+0x1f93:'\u1f23\u03b9', 0x1f94:'\u1f24\u03b9', 0x1f95:'\u1f25\u03b9', 0x1f96:'\u1f26\u03b9',
+0x1f97:'\u1f27\u03b9', 0x1f98:'\u1f20\u03b9', 0x1f99:'\u1f21\u03b9', 0x1f9a:'\u1f22\u03b9',
+0x1f9b:'\u1f23\u03b9', 0x1f9c:'\u1f24\u03b9', 0x1f9d:'\u1f25\u03b9', 0x1f9e:'\u1f26\u03b9',
+0x1f9f:'\u1f27\u03b9', 0x1fa0:'\u1f60\u03b9', 0x1fa1:'\u1f61\u03b9', 0x1fa2:'\u1f62\u03b9',
+0x1fa3:'\u1f63\u03b9', 0x1fa4:'\u1f64\u03b9', 0x1fa5:'\u1f65\u03b9', 0x1fa6:'\u1f66\u03b9',
+0x1fa7:'\u1f67\u03b9', 0x1fa8:'\u1f60\u03b9', 0x1fa9:'\u1f61\u03b9', 0x1faa:'\u1f62\u03b9',
+0x1fab:'\u1f63\u03b9', 0x1fac:'\u1f64\u03b9', 0x1fad:'\u1f65\u03b9', 0x1fae:'\u1f66\u03b9',
+0x1faf:'\u1f67\u03b9', 0x1fb2:'\u1f70\u03b9', 0x1fb3:'\u03b1\u03b9', 0x1fb4:'\u03ac\u03b9',
+0x1fb6:'\u03b1\u0342', 0x1fb7:'\u03b1\u0342\u03b9', 0x1fbc:'\u03b1\u03b9', 0x1fbe:'\u03b9',
+0x1fc2:'\u1f74\u03b9', 0x1fc3:'\u03b7\u03b9', 0x1fc4:'\u03ae\u03b9', 0x1fc6:'\u03b7\u0342',
+0x1fc7:'\u03b7\u0342\u03b9', 0x1fcc:'\u03b7\u03b9', 0x1fd2:'\u03b9\u0308\u0300', 0x1fd3:'\u03b9\u0308\u0301',
+0x1fd6:'\u03b9\u0342', 0x1fd7:'\u03b9\u0308\u0342', 0x1fe2:'\u03c5\u0308\u0300', 0x1fe3:'\u03c5\u0308\u0301',
+0x1fe4:'\u03c1\u0313', 0x1fe6:'\u03c5\u0342', 0x1fe7:'\u03c5\u0308\u0342', 0x1ff2:'\u1f7c\u03b9',
+0x1ff3:'\u03c9\u03b9', 0x1ff4:'\u03ce\u03b9', 0x1ff6:'\u03c9\u0342', 0x1ff7:'\u03c9\u0342\u03b9',
+0x1ffc:'\u03c9\u03b9', 0x20a8:'rs', 0x2102:'c', 0x2103:'\xb0c',
+0x2107:'\u025b', 0x2109:'\xb0f', 0x210b:'h', 0x210c:'h',
+0x210d:'h', 0x2110:'i', 0x2111:'i', 0x2112:'l',
+0x2115:'n', 0x2116:'no', 0x2119:'p', 0x211a:'q',
+0x211b:'r', 0x211c:'r', 0x211d:'r', 0x2120:'sm',
+0x2121:'tel', 0x2122:'tm', 0x2124:'z', 0x2128:'z',
+0x212c:'b', 0x212d:'c', 0x2130:'e', 0x2131:'f',
+0x2133:'m', 0x213e:'\u03b3', 0x213f:'\u03c0', 0x2145:'d',
+0x3371:'hpa', 0x3373:'au', 0x3375:'ov', 0x3380:'pa',
+0x3381:'na', 0x3382:'\u03bca', 0x3383:'ma', 0x3384:'ka',
+0x3385:'kb', 0x3386:'mb', 0x3387:'gb', 0x338a:'pf',
+0x338b:'nf', 0x338c:'\u03bcf', 0x3390:'hz', 0x3391:'khz',
+0x3392:'mhz', 0x3393:'ghz', 0x3394:'thz', 0x33a9:'pa',
+0x33aa:'kpa', 0x33ab:'mpa', 0x33ac:'gpa', 0x33b4:'pv',
+0x33b5:'nv', 0x33b6:'\u03bcv', 0x33b7:'mv', 0x33b8:'kv',
+0x33b9:'mv', 0x33ba:'pw', 0x33bb:'nw', 0x33bc:'\u03bcw',
+0x33bd:'mw', 0x33be:'kw', 0x33bf:'mw', 0x33c0:'k\u03c9',
+0x33c1:'m\u03c9', 0x33c3:'bq', 0x33c6:'c\u2215kg', 0x33c7:'co.',
+0x33c8:'db', 0x33c9:'gy', 0x33cb:'hp', 0x33cd:'kk',
+0x33ce:'km', 0x33d7:'ph', 0x33d9:'ppm', 0x33da:'pr',
+0x33dc:'sv', 0x33dd:'wb', 0xfb00:'ff', 0xfb01:'fi',
+0xfb02:'fl', 0xfb03:'ffi', 0xfb04:'ffl', 0xfb05:'st',
+0xfb06:'st', 0xfb13:'\u0574\u0576', 0xfb14:'\u0574\u0565', 0xfb15:'\u0574\u056b',
+0xfb16:'\u057e\u0576', 0xfb17:'\u0574\u056d', 0x1d400:'a', 0x1d401:'b',
+0x1d402:'c', 0x1d403:'d', 0x1d404:'e', 0x1d405:'f',
+0x1d406:'g', 0x1d407:'h', 0x1d408:'i', 0x1d409:'j',
+0x1d40a:'k', 0x1d40b:'l', 0x1d40c:'m', 0x1d40d:'n',
+0x1d40e:'o', 0x1d40f:'p', 0x1d410:'q', 0x1d411:'r',
+0x1d412:'s', 0x1d413:'t', 0x1d414:'u', 0x1d415:'v',
+0x1d416:'w', 0x1d417:'x', 0x1d418:'y', 0x1d419:'z',
+0x1d434:'a', 0x1d435:'b', 0x1d436:'c', 0x1d437:'d',
+0x1d438:'e', 0x1d439:'f', 0x1d43a:'g', 0x1d43b:'h',
+0x1d43c:'i', 0x1d43d:'j', 0x1d43e:'k', 0x1d43f:'l',
+0x1d440:'m', 0x1d441:'n', 0x1d442:'o', 0x1d443:'p',
+0x1d444:'q', 0x1d445:'r', 0x1d446:'s', 0x1d447:'t',
+0x1d448:'u', 0x1d449:'v', 0x1d44a:'w', 0x1d44b:'x',
+0x1d44c:'y', 0x1d44d:'z', 0x1d468:'a', 0x1d469:'b',
+0x1d46a:'c', 0x1d46b:'d', 0x1d46c:'e', 0x1d46d:'f',
+0x1d46e:'g', 0x1d46f:'h', 0x1d470:'i', 0x1d471:'j',
+0x1d472:'k', 0x1d473:'l', 0x1d474:'m', 0x1d475:'n',
+0x1d476:'o', 0x1d477:'p', 0x1d478:'q', 0x1d479:'r',
+0x1d47a:'s', 0x1d47b:'t', 0x1d47c:'u', 0x1d47d:'v',
+0x1d47e:'w', 0x1d47f:'x', 0x1d480:'y', 0x1d481:'z',
+0x1d49c:'a', 0x1d49e:'c', 0x1d49f:'d', 0x1d4a2:'g',
+0x1d4a5:'j', 0x1d4a6:'k', 0x1d4a9:'n', 0x1d4aa:'o',
+0x1d4ab:'p', 0x1d4ac:'q', 0x1d4ae:'s', 0x1d4af:'t',
+0x1d4b0:'u', 0x1d4b1:'v', 0x1d4b2:'w', 0x1d4b3:'x',
+0x1d4b4:'y', 0x1d4b5:'z', 0x1d4d0:'a', 0x1d4d1:'b',
+0x1d4d2:'c', 0x1d4d3:'d', 0x1d4d4:'e', 0x1d4d5:'f',
+0x1d4d6:'g', 0x1d4d7:'h', 0x1d4d8:'i', 0x1d4d9:'j',
+0x1d4da:'k', 0x1d4db:'l', 0x1d4dc:'m', 0x1d4dd:'n',
+0x1d4de:'o', 0x1d4df:'p', 0x1d4e0:'q', 0x1d4e1:'r',
+0x1d4e2:'s', 0x1d4e3:'t', 0x1d4e4:'u', 0x1d4e5:'v',
+0x1d4e6:'w', 0x1d4e7:'x', 0x1d4e8:'y', 0x1d4e9:'z',
+0x1d504:'a', 0x1d505:'b', 0x1d507:'d', 0x1d508:'e',
+0x1d509:'f', 0x1d50a:'g', 0x1d50d:'j', 0x1d50e:'k',
+0x1d50f:'l', 0x1d510:'m', 0x1d511:'n', 0x1d512:'o',
+0x1d513:'p', 0x1d514:'q', 0x1d516:'s', 0x1d517:'t',
+0x1d518:'u', 0x1d519:'v', 0x1d51a:'w', 0x1d51b:'x',
+0x1d51c:'y', 0x1d538:'a', 0x1d539:'b', 0x1d53b:'d',
+0x1d53c:'e', 0x1d53d:'f', 0x1d53e:'g', 0x1d540:'i',
+0x1d541:'j', 0x1d542:'k', 0x1d543:'l', 0x1d544:'m',
+0x1d546:'o', 0x1d54a:'s', 0x1d54b:'t', 0x1d54c:'u',
+0x1d54d:'v', 0x1d54e:'w', 0x1d54f:'x', 0x1d550:'y',
+0x1d56c:'a', 0x1d56d:'b', 0x1d56e:'c', 0x1d56f:'d',
+0x1d570:'e', 0x1d571:'f', 0x1d572:'g', 0x1d573:'h',
+0x1d574:'i', 0x1d575:'j', 0x1d576:'k', 0x1d577:'l',
+0x1d578:'m', 0x1d579:'n', 0x1d57a:'o', 0x1d57b:'p',
+0x1d57c:'q', 0x1d57d:'r', 0x1d57e:'s', 0x1d57f:'t',
+0x1d580:'u', 0x1d581:'v', 0x1d582:'w', 0x1d583:'x',
+0x1d584:'y', 0x1d585:'z', 0x1d5a0:'a', 0x1d5a1:'b',
+0x1d5a2:'c', 0x1d5a3:'d', 0x1d5a4:'e', 0x1d5a5:'f',
+0x1d5a6:'g', 0x1d5a7:'h', 0x1d5a8:'i', 0x1d5a9:'j',
+0x1d5aa:'k', 0x1d5ab:'l', 0x1d5ac:'m', 0x1d5ad:'n',
+0x1d5ae:'o', 0x1d5af:'p', 0x1d5b0:'q', 0x1d5b1:'r',
+0x1d5b2:'s', 0x1d5b3:'t', 0x1d5b4:'u', 0x1d5b5:'v',
+0x1d5b6:'w', 0x1d5b7:'x', 0x1d5b8:'y', 0x1d5b9:'z',
+0x1d5d4:'a', 0x1d5d5:'b', 0x1d5d6:'c', 0x1d5d7:'d',
+0x1d5d8:'e', 0x1d5d9:'f', 0x1d5da:'g', 0x1d5db:'h',
+0x1d5dc:'i', 0x1d5dd:'j', 0x1d5de:'k', 0x1d5df:'l',
+0x1d5e0:'m', 0x1d5e1:'n', 0x1d5e2:'o', 0x1d5e3:'p',
+0x1d5e4:'q', 0x1d5e5:'r', 0x1d5e6:'s', 0x1d5e7:'t',
+0x1d5e8:'u', 0x1d5e9:'v', 0x1d5ea:'w', 0x1d5eb:'x',
+0x1d5ec:'y', 0x1d5ed:'z', 0x1d608:'a', 0x1d609:'b',
+0x1d60a:'c', 0x1d60b:'d', 0x1d60c:'e', 0x1d60d:'f',
+0x1d60e:'g', 0x1d60f:'h', 0x1d610:'i', 0x1d611:'j',
+0x1d612:'k', 0x1d613:'l', 0x1d614:'m', 0x1d615:'n',
+0x1d616:'o', 0x1d617:'p', 0x1d618:'q', 0x1d619:'r',
+0x1d61a:'s', 0x1d61b:'t', 0x1d61c:'u', 0x1d61d:'v',
+0x1d61e:'w', 0x1d61f:'x', 0x1d620:'y', 0x1d621:'z',
+0x1d63c:'a', 0x1d63d:'b', 0x1d63e:'c', 0x1d63f:'d',
+0x1d640:'e', 0x1d641:'f', 0x1d642:'g', 0x1d643:'h',
+0x1d644:'i', 0x1d645:'j', 0x1d646:'k', 0x1d647:'l',
+0x1d648:'m', 0x1d649:'n', 0x1d64a:'o', 0x1d64b:'p',
+0x1d64c:'q', 0x1d64d:'r', 0x1d64e:'s', 0x1d64f:'t',
+0x1d650:'u', 0x1d651:'v', 0x1d652:'w', 0x1d653:'x',
+0x1d654:'y', 0x1d655:'z', 0x1d670:'a', 0x1d671:'b',
+0x1d672:'c', 0x1d673:'d', 0x1d674:'e', 0x1d675:'f',
+0x1d676:'g', 0x1d677:'h', 0x1d678:'i', 0x1d679:'j',
+0x1d67a:'k', 0x1d67b:'l', 0x1d67c:'m', 0x1d67d:'n',
+0x1d67e:'o', 0x1d67f:'p', 0x1d680:'q', 0x1d681:'r',
+0x1d682:'s', 0x1d683:'t', 0x1d684:'u', 0x1d685:'v',
+0x1d686:'w', 0x1d687:'x', 0x1d688:'y', 0x1d689:'z',
+0x1d6a8:'\u03b1', 0x1d6a9:'\u03b2', 0x1d6aa:'\u03b3', 0x1d6ab:'\u03b4',
+0x1d6ac:'\u03b5', 0x1d6ad:'\u03b6', 0x1d6ae:'\u03b7', 0x1d6af:'\u03b8',
+0x1d6b0:'\u03b9', 0x1d6b1:'\u03ba', 0x1d6b2:'\u03bb', 0x1d6b3:'\u03bc',
+0x1d6b4:'\u03bd', 0x1d6b5:'\u03be', 0x1d6b6:'\u03bf', 0x1d6b7:'\u03c0',
+0x1d6b8:'\u03c1', 0x1d6b9:'\u03b8', 0x1d6ba:'\u03c3', 0x1d6bb:'\u03c4',
+0x1d6bc:'\u03c5', 0x1d6bd:'\u03c6', 0x1d6be:'\u03c7', 0x1d6bf:'\u03c8',
+0x1d6c0:'\u03c9', 0x1d6d3:'\u03c3', 0x1d6e2:'\u03b1', 0x1d6e3:'\u03b2',
+0x1d6e4:'\u03b3', 0x1d6e5:'\u03b4', 0x1d6e6:'\u03b5', 0x1d6e7:'\u03b6',
+0x1d6e8:'\u03b7', 0x1d6e9:'\u03b8', 0x1d6ea:'\u03b9', 0x1d6eb:'\u03ba',
+0x1d6ec:'\u03bb', 0x1d6ed:'\u03bc', 0x1d6ee:'\u03bd', 0x1d6ef:'\u03be',
+0x1d6f0:'\u03bf', 0x1d6f1:'\u03c0', 0x1d6f2:'\u03c1', 0x1d6f3:'\u03b8',
+0x1d6f4:'\u03c3', 0x1d6f5:'\u03c4', 0x1d6f6:'\u03c5', 0x1d6f7:'\u03c6',
+0x1d6f8:'\u03c7', 0x1d6f9:'\u03c8', 0x1d6fa:'\u03c9', 0x1d70d:'\u03c3',
+0x1d71c:'\u03b1', 0x1d71d:'\u03b2', 0x1d71e:'\u03b3', 0x1d71f:'\u03b4',
+0x1d720:'\u03b5', 0x1d721:'\u03b6', 0x1d722:'\u03b7', 0x1d723:'\u03b8',
+0x1d724:'\u03b9', 0x1d725:'\u03ba', 0x1d726:'\u03bb', 0x1d727:'\u03bc',
+0x1d728:'\u03bd', 0x1d729:'\u03be', 0x1d72a:'\u03bf', 0x1d72b:'\u03c0',
+0x1d72c:'\u03c1', 0x1d72d:'\u03b8', 0x1d72e:'\u03c3', 0x1d72f:'\u03c4',
+0x1d730:'\u03c5', 0x1d731:'\u03c6', 0x1d732:'\u03c7', 0x1d733:'\u03c8',
+0x1d734:'\u03c9', 0x1d747:'\u03c3', 0x1d756:'\u03b1', 0x1d757:'\u03b2',
+0x1d758:'\u03b3', 0x1d759:'\u03b4', 0x1d75a:'\u03b5', 0x1d75b:'\u03b6',
+0x1d75c:'\u03b7', 0x1d75d:'\u03b8', 0x1d75e:'\u03b9', 0x1d75f:'\u03ba',
+0x1d760:'\u03bb', 0x1d761:'\u03bc', 0x1d762:'\u03bd', 0x1d763:'\u03be',
+0x1d764:'\u03bf', 0x1d765:'\u03c0', 0x1d766:'\u03c1', 0x1d767:'\u03b8',
+0x1d768:'\u03c3', 0x1d769:'\u03c4', 0x1d76a:'\u03c5', 0x1d76b:'\u03c6',
+0x1d76c:'\u03c7', 0x1d76d:'\u03c8', 0x1d76e:'\u03c9', 0x1d781:'\u03c3',
+0x1d790:'\u03b1', 0x1d791:'\u03b2', 0x1d792:'\u03b3', 0x1d793:'\u03b4',
+0x1d794:'\u03b5', 0x1d795:'\u03b6', 0x1d796:'\u03b7', 0x1d797:'\u03b8',
+0x1d798:'\u03b9', 0x1d799:'\u03ba', 0x1d79a:'\u03bb', 0x1d79b:'\u03bc',
+0x1d79c:'\u03bd', 0x1d79d:'\u03be', 0x1d79e:'\u03bf', 0x1d79f:'\u03c0',
+0x1d7a0:'\u03c1', 0x1d7a1:'\u03b8', 0x1d7a2:'\u03c3', 0x1d7a3:'\u03c4',
+0x1d7a4:'\u03c5', 0x1d7a5:'\u03c6', 0x1d7a6:'\u03c7', 0x1d7a7:'\u03c8',
+0x1d7a8:'\u03c9', 0x1d7bb:'\u03c3', }
 
 def map_table_b3(code):
     r = b3_exceptions.get(ord(code))
@@ -195,7 +195,7 @@
 def map_table_b2(a):
     al = map_table_b3(a)
     b = unicodedata.normalize("NFKC", al)
-    bl = u"".join([map_table_b3(ch) for ch in b])
+    bl = "".join([map_table_b3(ch) for ch in b])
     c = unicodedata.normalize("NFKC", bl)
     if b != c:
         return c
@@ -204,11 +204,11 @@
 
 
 def in_table_c11(code):
-    return code == u" "
+    return code == " "
 
 
 def in_table_c12(code):
-    return unicodedata.category(code) == "Zs" and code != u" "
+    return unicodedata.category(code) == "Zs" and code != " "
 
 def in_table_c11_c12(code):
     return unicodedata.category(code) == "Zs"

Modified: python/branches/py3k-struni/Lib/tarfile.py
==============================================================================
--- python/branches/py3k-struni/Lib/tarfile.py	(original)
+++ python/branches/py3k-struni/Lib/tarfile.py	Wed May  2 21:09:54 2007
@@ -1031,7 +1031,7 @@
         for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
             val = info[name]
             if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
-                pax_headers[name] = unicode(val)
+                pax_headers[name] = str(val)
                 info[name] = 0
 
         if pax_headers:
@@ -1054,12 +1054,12 @@
 
     @staticmethod
     def _to_unicode(value, encoding):
-        if isinstance(value, unicode):
+        if isinstance(value, str):
             return value
         elif isinstance(value, (int, float)):
-            return unicode(value)
+            return str(value)
         elif isinstance(value, str):
-            return unicode(value, encoding)
+            return str(value, encoding)
         else:
             raise ValueError("unable to convert to unicode: %r" % value)
 

Modified: python/branches/py3k-struni/Lib/test/bad_coding2.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/bad_coding2.py	(original)
+++ python/branches/py3k-struni/Lib/test/bad_coding2.py	Wed May  2 21:09:54 2007
@@ -1,2 +1,2 @@
 #coding: utf8
-print '我'
+print('我')

Modified: python/branches/py3k-struni/Lib/test/pickletester.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/pickletester.py	(original)
+++ python/branches/py3k-struni/Lib/test/pickletester.py	Wed May  2 21:09:54 2007
@@ -484,8 +484,8 @@
 
     if have_unicode:
         def test_unicode(self):
-            endcases = [unicode(''), unicode('<\\u>'), unicode('<\\\u1234>'),
-                        unicode('<\n>'),  unicode('<\\>')]
+            endcases = [str(''), str('<\\u>'), str('<\\\u1234>'),
+                        str('<\n>'),  str('<\\>')]
             for proto in protocols:
                 for u in endcases:
                     p = self.dumps(u, proto)
@@ -908,8 +908,8 @@
 class MyStr(str):
     sample = "hello"
 
-class MyUnicode(unicode):
-    sample = u"hello \u1234"
+class MyUnicode(str):
+    sample = "hello \u1234"
 
 class MyTuple(tuple):
     sample = (1, 2, 3)

Modified: python/branches/py3k-struni/Lib/test/string_tests.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/string_tests.py	(original)
+++ python/branches/py3k-struni/Lib/test/string_tests.py	Wed May  2 21:09:54 2007
@@ -589,7 +589,7 @@
         self.checkequal(['a']*19 + ['a '], aaa, 'split', None, 19)
 
         # mixed use of str and unicode
-        self.checkequal([u'a', u'b', u'c d'], 'a b c d', 'split', u' ', 2)
+        self.checkequal(['a', 'b', 'c d'], 'a b c d', 'split', ' ', 2)
 
     def test_additional_rsplit(self):
         self.checkequal(['this', 'is', 'the', 'rsplit', 'function'],
@@ -622,7 +622,7 @@
         self.checkequal([' a  a'] + ['a']*18, aaa, 'rsplit', None, 18)
 
         # mixed use of str and unicode
-        self.checkequal([u'a b', u'c', u'd'], 'a b c d', 'rsplit', u' ', 2)
+        self.checkequal(['a b', 'c', 'd'], 'a b c d', 'rsplit', ' ', 2)
 
     def test_strip(self):
         self.checkequal('hello', '   hello   ', 'strip')
@@ -644,14 +644,14 @@
 
         # strip/lstrip/rstrip with unicode arg
         if test_support.have_unicode:
-            self.checkequal(unicode('hello', 'ascii'), 'xyzzyhelloxyzzy',
-                 'strip', unicode('xyz', 'ascii'))
-            self.checkequal(unicode('helloxyzzy', 'ascii'), 'xyzzyhelloxyzzy',
-                 'lstrip', unicode('xyz', 'ascii'))
-            self.checkequal(unicode('xyzzyhello', 'ascii'), 'xyzzyhelloxyzzy',
-                 'rstrip', unicode('xyz', 'ascii'))
-            self.checkequal(unicode('hello', 'ascii'), 'hello',
-                 'strip', unicode('xyz', 'ascii'))
+            self.checkequal(str('hello', 'ascii'), 'xyzzyhelloxyzzy',
+                 'strip', str('xyz', 'ascii'))
+            self.checkequal(str('helloxyzzy', 'ascii'), 'xyzzyhelloxyzzy',
+                 'lstrip', str('xyz', 'ascii'))
+            self.checkequal(str('xyzzyhello', 'ascii'), 'xyzzyhelloxyzzy',
+                 'rstrip', str('xyz', 'ascii'))
+            self.checkequal(str('hello', 'ascii'), 'hello',
+                 'strip', str('xyz', 'ascii'))
 
         self.checkraises(TypeError, 'hello', 'strip', 42, 42)
         self.checkraises(TypeError, 'hello', 'lstrip', 42, 42)
@@ -908,13 +908,13 @@
         self.checkequal(False, '', '__contains__', 'asdf')    # vereq('asdf' in '', False)
 
     def test_subscript(self):
-        self.checkequal(u'a', 'abc', '__getitem__', 0)
-        self.checkequal(u'c', 'abc', '__getitem__', -1)
-        self.checkequal(u'a', 'abc', '__getitem__', 0)
-        self.checkequal(u'abc', 'abc', '__getitem__', slice(0, 3))
-        self.checkequal(u'abc', 'abc', '__getitem__', slice(0, 1000))
-        self.checkequal(u'a', 'abc', '__getitem__', slice(0, 1))
-        self.checkequal(u'', 'abc', '__getitem__', slice(0, 0))
+        self.checkequal('a', 'abc', '__getitem__', 0)
+        self.checkequal('c', 'abc', '__getitem__', -1)
+        self.checkequal('a', 'abc', '__getitem__', 0)
+        self.checkequal('abc', 'abc', '__getitem__', slice(0, 3))
+        self.checkequal('abc', 'abc', '__getitem__', slice(0, 1000))
+        self.checkequal('a', 'abc', '__getitem__', slice(0, 1))
+        self.checkequal('', 'abc', '__getitem__', slice(0, 0))
         # FIXME What about negative indices? This is handled differently by [] and __getitem__(slice)
 
         self.checkraises(TypeError, 'abc', '__getitem__', 'def')
@@ -957,11 +957,11 @@
         self.checkequal('abc', 'a', 'join', ('abc',))
         self.checkequal('z', 'a', 'join', UserList(['z']))
         if test_support.have_unicode:
-            self.checkequal(unicode('a.b.c'), unicode('.'), 'join', ['a', 'b', 'c'])
-            self.checkequal(unicode('a.b.c'), '.', 'join', [unicode('a'), 'b', 'c'])
-            self.checkequal(unicode('a.b.c'), '.', 'join', ['a', unicode('b'), 'c'])
-            self.checkequal(unicode('a.b.c'), '.', 'join', ['a', 'b', unicode('c')])
-            self.checkraises(TypeError, '.', 'join', ['a', unicode('b'), 3])
+            self.checkequal(str('a.b.c'), str('.'), 'join', ['a', 'b', 'c'])
+            self.checkequal(str('a.b.c'), '.', 'join', [str('a'), 'b', 'c'])
+            self.checkequal(str('a.b.c'), '.', 'join', ['a', str('b'), 'c'])
+            self.checkequal(str('a.b.c'), '.', 'join', ['a', 'b', str('c')])
+            self.checkraises(TypeError, '.', 'join', ['a', str('b'), 3])
         for i in [5, 25, 125]:
             self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
                  ['a' * i] * i)
@@ -1159,7 +1159,7 @@
         self.assert_(s1 is s2)
 
         # Should also test mixed-type join.
-        if t is unicode:
+        if t is str:
             s1 = subclass("abcd")
             s2 = "".join([s1])
             self.assert_(s1 is not s2)
@@ -1171,14 +1171,14 @@
 
         elif t is str:
             s1 = subclass("abcd")
-            s2 = u"".join([s1])
+            s2 = "".join([s1])
             self.assert_(s1 is not s2)
-            self.assert_(type(s2) is unicode) # promotes!
+            self.assert_(type(s2) is str) # promotes!
 
             s1 = t("abcd")
-            s2 = u"".join([s1])
+            s2 = "".join([s1])
             self.assert_(s1 is not s2)
-            self.assert_(type(s2) is unicode) # promotes!
+            self.assert_(type(s2) is str) # promotes!
 
         else:
             self.fail("unexpected type for MixinStrUnicodeTest %r" % t)

Modified: python/branches/py3k-struni/Lib/test/test_StringIO.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_StringIO.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_StringIO.py	Wed May  2 21:09:54 2007
@@ -112,10 +112,10 @@
         f = self.MODULE.StringIO()
         f.write(self._line[:6])
         f.seek(3)
-        f.write(unicode(self._line[20:26]))
-        f.write(unicode(self._line[52]))
+        f.write(str(self._line[20:26]))
+        f.write(str(self._line[52]))
         s = f.getvalue()
-        self.assertEqual(s, unicode('abcuvwxyz!'))
+        self.assertEqual(s, str('abcuvwxyz!'))
         self.assertEqual(type(s), types.UnicodeType)
 
 class TestcStringIO(TestGenericStringIO):
@@ -130,18 +130,18 @@
         # Check that this works.
 
         f = self.MODULE.StringIO()
-        f.write(unicode(self._line[:5]))
+        f.write(str(self._line[:5]))
         s = f.getvalue()
         self.assertEqual(s, 'abcde')
         self.assertEqual(type(s), types.StringType)
 
-        f = self.MODULE.StringIO(unicode(self._line[:5]))
+        f = self.MODULE.StringIO(str(self._line[:5]))
         s = f.getvalue()
         self.assertEqual(s, 'abcde')
         self.assertEqual(type(s), types.StringType)
 
         self.assertRaises(UnicodeEncodeError, self.MODULE.StringIO,
-                          unicode('\xf4', 'latin-1'))
+                          str('\xf4', 'latin-1'))
 
 import sys
 if sys.platform.startswith('java'):

Modified: python/branches/py3k-struni/Lib/test/test_array.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_array.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_array.py	Wed May  2 21:09:54 2007
@@ -747,7 +747,7 @@
 
     def test_nounicode(self):
         a = array.array(self.typecode, self.example)
-        self.assertRaises(ValueError, a.fromunicode, unicode(''))
+        self.assertRaises(ValueError, a.fromunicode, str(''))
         self.assertRaises(ValueError, a.tounicode)
 
 tests.append(CharacterTest)
@@ -755,27 +755,27 @@
 if test_support.have_unicode:
     class UnicodeTest(StringTest):
         typecode = 'u'
-        example = unicode(r'\x01\u263a\x00\ufeff', 'unicode-escape')
-        smallerexample = unicode(r'\x01\u263a\x00\ufefe', 'unicode-escape')
-        biggerexample = unicode(r'\x01\u263a\x01\ufeff', 'unicode-escape')
-        outside = unicode('\x33')
+        example = str(r'\x01\u263a\x00\ufeff', 'unicode-escape')
+        smallerexample = str(r'\x01\u263a\x00\ufefe', 'unicode-escape')
+        biggerexample = str(r'\x01\u263a\x01\ufeff', 'unicode-escape')
+        outside = str('\x33')
         minitemsize = 2
 
         def test_unicode(self):
-            self.assertRaises(TypeError, array.array, 'b', unicode('foo', 'ascii'))
+            self.assertRaises(TypeError, array.array, 'b', str('foo', 'ascii'))
 
-            a = array.array('u', unicode(r'\xa0\xc2\u1234', 'unicode-escape'))
-            a.fromunicode(unicode(' ', 'ascii'))
-            a.fromunicode(unicode('', 'ascii'))
-            a.fromunicode(unicode('', 'ascii'))
-            a.fromunicode(unicode(r'\x11abc\xff\u1234', 'unicode-escape'))
+            a = array.array('u', str(r'\xa0\xc2\u1234', 'unicode-escape'))
+            a.fromunicode(str(' ', 'ascii'))
+            a.fromunicode(str('', 'ascii'))
+            a.fromunicode(str('', 'ascii'))
+            a.fromunicode(str(r'\x11abc\xff\u1234', 'unicode-escape'))
             s = a.tounicode()
             self.assertEqual(
                 s,
-                unicode(r'\xa0\xc2\u1234 \x11abc\xff\u1234', 'unicode-escape')
+                str(r'\xa0\xc2\u1234 \x11abc\xff\u1234', 'unicode-escape')
             )
 
-            s = unicode(r'\x00="\'a\\b\x80\xff\u0000\u0001\u1234', 'unicode-escape')
+            s = str(r'\x00="\'a\\b\x80\xff\u0000\u0001\u1234', 'unicode-escape')
             a = array.array('u', s)
             self.assertEqual(
                 repr(a),

Modified: python/branches/py3k-struni/Lib/test/test_bigmem.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_bigmem.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_bigmem.py	Wed May  2 21:09:54 2007
@@ -562,11 +562,11 @@
 
     @bigmemtest(minsize=_2G + 2, memuse=16)
     def test_compare(self, size):
-        t1 = (u'',) * size
-        t2 = (u'',) * size
+        t1 = ('',) * size
+        t2 = ('',) * size
         self.failUnless(t1 == t2)
         del t2
-        t2 = (u'',) * (size + 1)
+        t2 = ('',) * (size + 1)
         self.failIf(t1 == t2)
         del t2
         t2 = (1,) * size
@@ -667,11 +667,11 @@
 
     @bigmemtest(minsize=_2G + 2, memuse=16)
     def test_compare(self, size):
-        l1 = [u''] * size
-        l2 = [u''] * size
+        l1 = [''] * size
+        l2 = [''] * size
         self.failUnless(l1 == l2)
         del l2
-        l2 = [u''] * (size + 1)
+        l2 = [''] * (size + 1)
         self.failIf(l1 == l2)
         del l2
         l2 = [2] * size
@@ -896,27 +896,27 @@
 
     @bigmemtest(minsize=_2G // 5 + 4, memuse=8 * 5)
     def test_pop(self, size):
-        l = [u"a", u"b", u"c", u"d", u"e"] * size
+        l = ["a", "b", "c", "d", "e"] * size
         size *= 5
         self.assertEquals(len(l), size)
 
         item = l.pop()
         size -= 1
         self.assertEquals(len(l), size)
-        self.assertEquals(item, u"e")
-        self.assertEquals(l[-2:], [u"c", u"d"])
+        self.assertEquals(item, "e")
+        self.assertEquals(l[-2:], ["c", "d"])
 
         item = l.pop(0)
         size -= 1
         self.assertEquals(len(l), size)
-        self.assertEquals(item, u"a")
-        self.assertEquals(l[:2], [u"b", u"c"])
+        self.assertEquals(item, "a")
+        self.assertEquals(l[:2], ["b", "c"])
 
         item = l.pop(size - 2)
         size -= 1
         self.assertEquals(len(l), size)
-        self.assertEquals(item, u"c")
-        self.assertEquals(l[-2:], [u"b", u"d"])
+        self.assertEquals(item, "c")
+        self.assertEquals(l[-2:], ["b", "d"])
 
     @bigmemtest(minsize=_2G + 10, memuse=8)
     def test_remove(self, size):

Modified: python/branches/py3k-struni/Lib/test/test_binascii.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_binascii.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_binascii.py	Wed May  2 21:09:54 2007
@@ -124,7 +124,7 @@
 
         # Verify the treatment of Unicode strings
         if test_support.have_unicode:
-            self.assertEqual(binascii.hexlify(unicode('a', 'ascii')), '61')
+            self.assertEqual(binascii.hexlify(str('a', 'ascii')), '61')
 
     def test_qp(self):
         # A test for SF bug 534347 (segfaults without the proper fix)

Modified: python/branches/py3k-struni/Lib/test/test_bool.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_bool.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_bool.py	Wed May  2 21:09:54 2007
@@ -208,28 +208,28 @@
         self.assertIs("xyz".startswith("z"), False)
 
         if test_support.have_unicode:
-            self.assertIs(unicode("xyz", 'ascii').endswith(unicode("z", 'ascii')), True)
-            self.assertIs(unicode("xyz", 'ascii').endswith(unicode("x", 'ascii')), False)
-            self.assertIs(unicode("xyz0123", 'ascii').isalnum(), True)
-            self.assertIs(unicode("@#$%", 'ascii').isalnum(), False)
-            self.assertIs(unicode("xyz", 'ascii').isalpha(), True)
-            self.assertIs(unicode("@#$%", 'ascii').isalpha(), False)
-            self.assertIs(unicode("0123", 'ascii').isdecimal(), True)
-            self.assertIs(unicode("xyz", 'ascii').isdecimal(), False)
-            self.assertIs(unicode("0123", 'ascii').isdigit(), True)
-            self.assertIs(unicode("xyz", 'ascii').isdigit(), False)
-            self.assertIs(unicode("xyz", 'ascii').islower(), True)
-            self.assertIs(unicode("XYZ", 'ascii').islower(), False)
-            self.assertIs(unicode("0123", 'ascii').isnumeric(), True)
-            self.assertIs(unicode("xyz", 'ascii').isnumeric(), False)
-            self.assertIs(unicode(" ", 'ascii').isspace(), True)
-            self.assertIs(unicode("XYZ", 'ascii').isspace(), False)
-            self.assertIs(unicode("X", 'ascii').istitle(), True)
-            self.assertIs(unicode("x", 'ascii').istitle(), False)
-            self.assertIs(unicode("XYZ", 'ascii').isupper(), True)
-            self.assertIs(unicode("xyz", 'ascii').isupper(), False)
-            self.assertIs(unicode("xyz", 'ascii').startswith(unicode("x", 'ascii')), True)
-            self.assertIs(unicode("xyz", 'ascii').startswith(unicode("z", 'ascii')), False)
+            self.assertIs(str("xyz", 'ascii').endswith(str("z", 'ascii')), True)
+            self.assertIs(str("xyz", 'ascii').endswith(str("x", 'ascii')), False)
+            self.assertIs(str("xyz0123", 'ascii').isalnum(), True)
+            self.assertIs(str("@#$%", 'ascii').isalnum(), False)
+            self.assertIs(str("xyz", 'ascii').isalpha(), True)
+            self.assertIs(str("@#$%", 'ascii').isalpha(), False)
+            self.assertIs(str("0123", 'ascii').isdecimal(), True)
+            self.assertIs(str("xyz", 'ascii').isdecimal(), False)
+            self.assertIs(str("0123", 'ascii').isdigit(), True)
+            self.assertIs(str("xyz", 'ascii').isdigit(), False)
+            self.assertIs(str("xyz", 'ascii').islower(), True)
+            self.assertIs(str("XYZ", 'ascii').islower(), False)
+            self.assertIs(str("0123", 'ascii').isnumeric(), True)
+            self.assertIs(str("xyz", 'ascii').isnumeric(), False)
+            self.assertIs(str(" ", 'ascii').isspace(), True)
+            self.assertIs(str("XYZ", 'ascii').isspace(), False)
+            self.assertIs(str("X", 'ascii').istitle(), True)
+            self.assertIs(str("x", 'ascii').istitle(), False)
+            self.assertIs(str("XYZ", 'ascii').isupper(), True)
+            self.assertIs(str("xyz", 'ascii').isupper(), False)
+            self.assertIs(str("xyz", 'ascii').startswith(str("x", 'ascii')), True)
+            self.assertIs(str("xyz", 'ascii').startswith(str("z", 'ascii')), False)
 
     def test_boolean(self):
         self.assertEqual(True & 1, 1)

Modified: python/branches/py3k-struni/Lib/test/test_builtin.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_builtin.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_builtin.py	Wed May  2 21:09:54 2007
@@ -74,22 +74,22 @@
 ]
 if have_unicode:
     L += [
-        (unicode('0'), 0),
-        (unicode('1'), 1),
-        (unicode('9'), 9),
-        (unicode('10'), 10),
-        (unicode('99'), 99),
-        (unicode('100'), 100),
-        (unicode('314'), 314),
-        (unicode(' 314'), 314),
-        (unicode(b'\u0663\u0661\u0664 ','raw-unicode-escape'), 314),
-        (unicode('  \t\t  314  \t\t  '), 314),
-        (unicode('  1x'), ValueError),
-        (unicode('  1  '), 1),
-        (unicode('  1\02  '), ValueError),
-        (unicode(''), ValueError),
-        (unicode(' '), ValueError),
-        (unicode('  \t\t  '), ValueError),
+        (str('0'), 0),
+        (str('1'), 1),
+        (str('9'), 9),
+        (str('10'), 10),
+        (str('99'), 99),
+        (str('100'), 100),
+        (str('314'), 314),
+        (str(' 314'), 314),
+        (str(b'\u0663\u0661\u0664 ','raw-unicode-escape'), 314),
+        (str('  \t\t  314  \t\t  '), 314),
+        (str('  1x'), ValueError),
+        (str('  1  '), 1),
+        (str('  1\02  '), ValueError),
+        (str(''), ValueError),
+        (str(' '), ValueError),
+        (str('  \t\t  '), ValueError),
         (unichr(0x200), ValueError),
 ]
 
@@ -220,9 +220,9 @@
         self.assertRaises(TypeError, compile, 'pass', '?', 'exec',
                           mode='eval', source='0', filename='tmp')
         if have_unicode:
-            compile(unicode(b'print(u"\xc3\xa5")\n', 'utf8'), '', 'exec')
+            compile(str(b'print(u"\xc3\xa5")\n', 'utf8'), '', 'exec')
             self.assertRaises(TypeError, compile, unichr(0), 'f', 'exec')
-            self.assertRaises(ValueError, compile, unicode('a = 1'), 'f', 'bad')
+            self.assertRaises(ValueError, compile, str('a = 1'), 'f', 'bad')
 
 
     def test_delattr(self):
@@ -329,19 +329,19 @@
         self.assertEqual(eval('b', globals, locals), 200)
         self.assertEqual(eval('c', globals, locals), 300)
         if have_unicode:
-            self.assertEqual(eval(unicode('1+1')), 2)
-            self.assertEqual(eval(unicode(' 1+1\n')), 2)
+            self.assertEqual(eval(str('1+1')), 2)
+            self.assertEqual(eval(str(' 1+1\n')), 2)
         globals = {'a': 1, 'b': 2}
         locals = {'b': 200, 'c': 300}
         if have_unicode:
-            self.assertEqual(eval(unicode('a'), globals), 1)
-            self.assertEqual(eval(unicode('a'), globals, locals), 1)
-            self.assertEqual(eval(unicode('b'), globals, locals), 200)
-            self.assertEqual(eval(unicode('c'), globals, locals), 300)
+            self.assertEqual(eval(str('a'), globals), 1)
+            self.assertEqual(eval(str('a'), globals, locals), 1)
+            self.assertEqual(eval(str('b'), globals, locals), 200)
+            self.assertEqual(eval(str('c'), globals, locals), 300)
             bom = '\xef\xbb\xbf'
             self.assertEqual(eval((bom + 'a').encode("latin-1"), globals, locals), 1)
-            self.assertEqual(eval(unicode(b'u"\xc3\xa5"', 'utf8'), globals),
-                             unicode(b'\xc3\xa5', 'utf8'))
+            self.assertEqual(eval(str(b'u"\xc3\xa5"', 'utf8'), globals),
+                             str(b'\xc3\xa5', 'utf8'))
         self.assertRaises(TypeError, eval)
         self.assertRaises(TypeError, eval, ())
 
@@ -472,7 +472,7 @@
             del g['__builtins__']
         self.assertEqual(g, {'z': 1})
 
-        exec(u'z = 1+1', g)
+        exec('z = 1+1', g)
         if '__builtins__' in g:
             del g['__builtins__']
         self.assertEqual(g, {'z': 2})
@@ -539,28 +539,28 @@
 
         if have_unicode:
             # test bltinmodule.c::filterunicode()
-            self.assertEqual(filter(None, unicode("12")), unicode("12"))
-            self.assertEqual(filter(lambda x: x>="3", unicode("1234")), unicode("34"))
-            self.assertRaises(TypeError, filter, 42, unicode("12"))
-            self.assertRaises(ValueError, filter, lambda x: x >="3", badstr(unicode("1234")))
+            self.assertEqual(filter(None, str("12")), str("12"))
+            self.assertEqual(filter(lambda x: x>="3", str("1234")), str("34"))
+            self.assertRaises(TypeError, filter, 42, str("12"))
+            self.assertRaises(ValueError, filter, lambda x: x >="3", badstr(str("1234")))
 
-            class badunicode(unicode):
+            class badunicode(str):
                 def __getitem__(self, index):
                     return 42
             self.assertRaises(TypeError, filter, lambda x: x >=42, badunicode("1234"))
 
-            class weirdunicode(unicode):
+            class weirdunicode(str):
                 def __getitem__(self, index):
-                    return weirdunicode(2*unicode.__getitem__(self, index))
+                    return weirdunicode(2*str.__getitem__(self, index))
             self.assertEqual(
-                filter(lambda x: x>=unicode("33"), weirdunicode("1234")), unicode("3344"))
+                filter(lambda x: x>=str("33"), weirdunicode("1234")), str("3344"))
 
-            class shiftunicode(unicode):
+            class shiftunicode(str):
                 def __getitem__(self, index):
-                    return unichr(ord(unicode.__getitem__(self, index))+1)
+                    return unichr(ord(str.__getitem__(self, index))+1)
             self.assertEqual(
-                filter(lambda x: x>=unicode("3"), shiftunicode("1234")),
-                unicode("345")
+                filter(lambda x: x>=str("3"), shiftunicode("1234")),
+                str("345")
             )
 
     def test_filter_subclasses(self):
@@ -578,12 +578,12 @@
             str2:   {"": "", "123": "112233"}
         }
         if have_unicode:
-            class unicode2(unicode):
+            class unicode2(str):
                 def __getitem__(self, index):
-                    return 2*unicode.__getitem__(self, index)
+                    return 2*str.__getitem__(self, index)
             inputs[unicode2] = {
-                unicode(): unicode(),
-                unicode("123"): unicode("112233")
+                str(): str(),
+                str("123"): str("112233")
             }
 
         for (cls, inps) in inputs.items():
@@ -607,10 +607,10 @@
         self.assertRaises(ValueError, float, "  0x3.1  ")
         self.assertRaises(ValueError, float, "  -0x3.p-1  ")
         if have_unicode:
-            self.assertEqual(float(unicode("  3.14  ")), 3.14)
-            self.assertEqual(float(unicode(b"  \u0663.\u0661\u0664  ",'raw-unicode-escape')), 3.14)
+            self.assertEqual(float(str("  3.14  ")), 3.14)
+            self.assertEqual(float(str(b"  \u0663.\u0661\u0664  ",'raw-unicode-escape')), 3.14)
             # Implementation limitation in PyFloat_FromString()
-            self.assertRaises(ValueError, float, unicode("1"*10000))
+            self.assertRaises(ValueError, float, str("1"*10000))
 
     @run_with_locale('LC_NUMERIC', 'fr_FR', 'de_DE')
     def test_float_with_comma(self):
@@ -692,7 +692,7 @@
         self.assertEqual(hash(1), hash(1.0))
         hash('spam')
         if have_unicode:
-            self.assertEqual(hash('spam'), hash(unicode('spam')))
+            self.assertEqual(hash('spam'), hash(str('spam')))
         hash((0,1,2,3))
         def f(): pass
         self.assertRaises(TypeError, hash, [])
@@ -743,7 +743,7 @@
         # Different base:
         self.assertEqual(int("10",16), 16)
         if have_unicode:
-            self.assertEqual(int(unicode("10"),16), 16)
+            self.assertEqual(int(str("10"),16), 16)
         # Test conversion from strings and various anomalies
         for s, v in L:
             for sign in "", "+", "-":
@@ -913,7 +913,7 @@
         self.assertRaises(TypeError, iter, 42, 42)
         lists = [("1", "2"), ["1", "2"], "12"]
         if have_unicode:
-            lists.append(unicode("12"))
+            lists.append(str("12"))
         for l in lists:
             i = iter(l)
             self.assertEqual(next(i), '1')
@@ -1012,11 +1012,11 @@
         self.assertEqual(int(-3.5), -3)
         self.assertEqual(int("-3"), -3)
         if have_unicode:
-            self.assertEqual(int(unicode("-3")), -3)
+            self.assertEqual(int(str("-3")), -3)
         # Different base:
         self.assertEqual(int("10",16), 16)
         if have_unicode:
-            self.assertEqual(int(unicode("10"),16), 16)
+            self.assertEqual(int(str("10"),16), 16)
         # Check conversions from string (same test set as for int(), and then some)
         LL = [
                 ('1' + '0'*20, 10**20),
@@ -1025,8 +1025,8 @@
         L2 = L[:]
         if have_unicode:
             L2 += [
-                (unicode('1') + unicode('0')*20, 10**20),
-                (unicode('1') + unicode('0')*100, 10**100),
+                (str('1') + str('0')*20, 10**20),
+                (str('1') + str('0')*100, 10**100),
         ]
         for s, v in L2 + LL:
             for sign in "", "+", "-":
@@ -1390,7 +1390,7 @@
             self.assertEqual(ord(unichr(sys.maxunicode)), sys.maxunicode)
         self.assertRaises(TypeError, ord, 42)
         if have_unicode:
-            self.assertRaises(TypeError, ord, unicode("12"))
+            self.assertRaises(TypeError, ord, str("12"))
 
     def test_pow(self):
         self.assertEqual(pow(0,0), 1)
@@ -1668,12 +1668,12 @@
 
     def test_unichr(self):
         if have_unicode:
-            self.assertEqual(unichr(32), unicode(' '))
-            self.assertEqual(unichr(65), unicode('A'))
-            self.assertEqual(unichr(97), unicode('a'))
+            self.assertEqual(unichr(32), str(' '))
+            self.assertEqual(unichr(65), str('A'))
+            self.assertEqual(unichr(97), str('a'))
             self.assertEqual(
                 unichr(sys.maxunicode),
-                unicode(('\\U%08x' % (sys.maxunicode)).encode("ascii"), 'unicode-escape')
+                str(('\\U%08x' % (sys.maxunicode)).encode("ascii"), 'unicode-escape')
             )
             self.assertRaises(ValueError, unichr, sys.maxunicode+1)
             self.assertRaises(TypeError, unichr)
@@ -1767,14 +1767,14 @@
         s = 'abracadabra'
         types = [list, tuple]
         if have_unicode:
-            types.insert(0, unicode)
+            types.insert(0, str)
         for T in types:
             self.assertEqual(sorted(s), sorted(T(s)))
 
         s = ''.join(dict.fromkeys(s).keys())  # unique letters only
         types = [set, frozenset, list, tuple, dict.fromkeys]
         if have_unicode:
-            types.insert(0, unicode)
+            types.insert(0, str)
         for T in types:
             self.assertEqual(sorted(s), sorted(T(s)))
 

Modified: python/branches/py3k-struni/Lib/test/test_bytes.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_bytes.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_bytes.py	Wed May  2 21:09:54 2007
@@ -132,10 +132,10 @@
 
         # But they should never compare equal to Unicode!
         # Test this for all expected byte orders and Unicode character sizes
-        self.assertEqual(b"\0a\0b\0c" == u"abc", False)
-        self.assertEqual(b"\0\0\0a\0\0\0b\0\0\0c" == u"abc", False)
-        self.assertEqual(b"a\0b\0c\0" == u"abc", False)
-        self.assertEqual(b"a\0\0\0b\0\0\0c\0\0\0" == u"abc", False)
+        self.assertEqual(b"\0a\0b\0c" == "abc", False)
+        self.assertEqual(b"\0\0\0a\0\0\0b\0\0\0c" == "abc", False)
+        self.assertEqual(b"a\0b\0c\0" == "abc", False)
+        self.assertEqual(b"a\0\0\0b\0\0\0c\0\0\0" == "abc", False)
 
     def test_nohash(self):
         self.assertRaises(TypeError, hash, bytes())
@@ -323,7 +323,7 @@
         self.assertEqual(b, bytes(list(range(8)) + list(range(256))))
 
     def test_encoding(self):
-        sample = u"Hello world\n\u1234\u5678\u9abc\udef0"
+        sample = "Hello world\n\u1234\u5678\u9abc\udef0"
         for enc in ("utf8", "utf16"):
             b = bytes(sample, enc)
             self.assertEqual(b, bytes(map(ord, sample.encode(enc))))
@@ -332,11 +332,11 @@
         self.assertEqual(b, bytes(sample[:-4]))
 
     def test_decode(self):
-        sample = u"Hello world\n\u1234\u5678\u9abc\def0\def0"
+        sample = "Hello world\n\u1234\u5678\u9abc\def0\def0"
         for enc in ("utf8", "utf16"):
             b = bytes(sample, enc)
             self.assertEqual(b.decode(enc), sample)
-        sample = u"Hello world\n\x80\x81\xfe\xff"
+        sample = "Hello world\n\x80\x81\xfe\xff"
         b = bytes(sample, "latin1")
         self.assertRaises(UnicodeDecodeError, b.decode, "utf8")
         self.assertEqual(b.decode("utf8", "ignore"), "Hello world\n")
@@ -366,8 +366,8 @@
         self.assertEqual(b1 + b2, bytes("abcdef"))
         self.assertEqual(b1 + "def", bytes("abcdef"))
         self.assertEqual("def" + b1, bytes("defabc"))
-        self.assertRaises(TypeError, lambda: b1 + u"def")
-        self.assertRaises(TypeError, lambda: u"abc" + b2)
+        self.assertRaises(TypeError, lambda: b1 + "def")
+        self.assertRaises(TypeError, lambda: "abc" + b2)
 
     def test_repeat(self):
         b = bytes("abc")
@@ -391,7 +391,7 @@
         b += "xyz"
         self.assertEqual(b, b"abcdefxyz")
         try:
-            b += u""
+            b += ""
         except TypeError:
             pass
         else:
@@ -476,10 +476,10 @@
 
     def test_literal(self):
         tests =  [
-            (b"Wonderful spam", u"Wonderful spam"),
-            (br"Wonderful spam too", u"Wonderful spam too"),
-            (b"\xaa\x00\000\200", u"\xaa\x00\000\200"),
-            (br"\xaa\x00\000\200", ur"\xaa\x00\000\200"),
+            (b"Wonderful spam", "Wonderful spam"),
+            (br"Wonderful spam too", "Wonderful spam too"),
+            (b"\xaa\x00\000\200", "\xaa\x00\000\200"),
+            (br"\xaa\x00\000\200", r"\xaa\x00\000\200"),
         ]
         for b, s in tests:
             self.assertEqual(b, bytes(s, 'latin-1'))

Modified: python/branches/py3k-struni/Lib/test/test_cfgparser.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_cfgparser.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_cfgparser.py	Wed May  2 21:09:54 2007
@@ -248,12 +248,12 @@
         cf.set("sect", "option2", "splat")
         cf.set("sect", "option2", mystr("splat"))
         try:
-            unicode
+            str
         except NameError:
             pass
         else:
-            cf.set("sect", "option1", unicode("splat"))
-            cf.set("sect", "option2", unicode("splat"))
+            cf.set("sect", "option1", str("splat"))
+            cf.set("sect", "option2", str("splat"))
 
     def test_read_returns_file_list(self):
         file1 = test_support.findfile("cfgparser.1")

Modified: python/branches/py3k-struni/Lib/test/test_charmapcodec.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_charmapcodec.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_charmapcodec.py	Wed May  2 21:09:54 2007
@@ -27,27 +27,27 @@
 
 class CharmapCodecTest(unittest.TestCase):
     def test_constructorx(self):
-        self.assertEquals(unicode('abc', codecname), u'abc')
-        self.assertEquals(unicode('xdef', codecname), u'abcdef')
-        self.assertEquals(unicode('defx', codecname), u'defabc')
-        self.assertEquals(unicode('dxf', codecname), u'dabcf')
-        self.assertEquals(unicode('dxfx', codecname), u'dabcfabc')
+        self.assertEquals(str('abc', codecname), 'abc')
+        self.assertEquals(str('xdef', codecname), 'abcdef')
+        self.assertEquals(str('defx', codecname), 'defabc')
+        self.assertEquals(str('dxf', codecname), 'dabcf')
+        self.assertEquals(str('dxfx', codecname), 'dabcfabc')
 
     def test_encodex(self):
-        self.assertEquals(u'abc'.encode(codecname), 'abc')
-        self.assertEquals(u'xdef'.encode(codecname), 'abcdef')
-        self.assertEquals(u'defx'.encode(codecname), 'defabc')
-        self.assertEquals(u'dxf'.encode(codecname), 'dabcf')
-        self.assertEquals(u'dxfx'.encode(codecname), 'dabcfabc')
+        self.assertEquals('abc'.encode(codecname), 'abc')
+        self.assertEquals('xdef'.encode(codecname), 'abcdef')
+        self.assertEquals('defx'.encode(codecname), 'defabc')
+        self.assertEquals('dxf'.encode(codecname), 'dabcf')
+        self.assertEquals('dxfx'.encode(codecname), 'dabcfabc')
 
     def test_constructory(self):
-        self.assertEquals(unicode('ydef', codecname), u'def')
-        self.assertEquals(unicode('defy', codecname), u'def')
-        self.assertEquals(unicode('dyf', codecname), u'df')
-        self.assertEquals(unicode('dyfy', codecname), u'df')
+        self.assertEquals(str('ydef', codecname), 'def')
+        self.assertEquals(str('defy', codecname), 'def')
+        self.assertEquals(str('dyf', codecname), 'df')
+        self.assertEquals(str('dyfy', codecname), 'df')
 
     def test_maptoundefined(self):
-        self.assertRaises(UnicodeError, unicode, 'abc\001', codecname)
+        self.assertRaises(UnicodeError, str, 'abc\001', codecname)
 
 def test_main():
     test.test_support.run_unittest(CharmapCodecTest)

Modified: python/branches/py3k-struni/Lib/test/test_codeccallbacks.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codeccallbacks.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codeccallbacks.py	Wed May  2 21:09:54 2007
@@ -16,18 +16,18 @@
         # otherwise we'd get an endless loop
         if realpos <= exc.start:
             self.pos = len(exc.object)
-        return (u"<?>", oldpos)
+        return ("<?>", oldpos)
 
 # A UnicodeEncodeError object with a bad start attribute
 class BadStartUnicodeEncodeError(UnicodeEncodeError):
     def __init__(self):
-        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
+        UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
         self.start = []
 
 # A UnicodeEncodeError object with a bad object attribute
 class BadObjectUnicodeEncodeError(UnicodeEncodeError):
     def __init__(self):
-        UnicodeEncodeError.__init__(self, "ascii", u"", 0, 1, "bad")
+        UnicodeEncodeError.__init__(self, "ascii", "", 0, 1, "bad")
         self.object = []
 
 # A UnicodeDecodeError object without an end attribute
@@ -45,19 +45,19 @@
 # A UnicodeTranslateError object without a start attribute
 class NoStartUnicodeTranslateError(UnicodeTranslateError):
     def __init__(self):
-        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
+        UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
         del self.start
 
 # A UnicodeTranslateError object without an end attribute
 class NoEndUnicodeTranslateError(UnicodeTranslateError):
     def __init__(self):
-        UnicodeTranslateError.__init__(self,  u"", 0, 1, "bad")
+        UnicodeTranslateError.__init__(self,  "", 0, 1, "bad")
         del self.end
 
 # A UnicodeTranslateError object without an object attribute
 class NoObjectUnicodeTranslateError(UnicodeTranslateError):
     def __init__(self):
-        UnicodeTranslateError.__init__(self, u"", 0, 1, "bad")
+        UnicodeTranslateError.__init__(self, "", 0, 1, "bad")
         del self.object
 
 class CodecCallbackTest(unittest.TestCase):
@@ -66,7 +66,7 @@
         # replace unencodable characters which numeric character entities.
         # For ascii, latin-1 and charmaps this is completely implemented
         # in C and should be reasonably fast.
-        s = u"\u30b9\u30d1\u30e2 \xe4nd eggs"
+        s = "\u30b9\u30d1\u30e2 \xe4nd eggs"
         self.assertEqual(
             s.encode("ascii", "xmlcharrefreplace"),
             "&#12473;&#12497;&#12514; &#228;nd eggs"
@@ -86,15 +86,15 @@
             l = []
             for c in exc.object[exc.start:exc.end]:
                 try:
-                    l.append(u"&%s;" % htmlentitydefs.codepoint2name[ord(c)])
+                    l.append("&%s;" % htmlentitydefs.codepoint2name[ord(c)])
                 except KeyError:
-                    l.append(u"&#%d;" % ord(c))
-            return (u"".join(l), exc.end)
+                    l.append("&#%d;" % ord(c))
+            return ("".join(l), exc.end)
 
         codecs.register_error(
             "test.xmlcharnamereplace", xmlcharnamereplace)
 
-        sin = u"\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
+        sin = "\xab\u211c\xbb = \u2329\u1234\u20ac\u232a"
         sout = "&laquo;&real;&raquo; = &lang;&#4660;&euro;&rang;"
         self.assertEqual(sin.encode("ascii", "test.xmlcharnamereplace"), sout)
         sout = "\xab&real;\xbb = &lang;&#4660;&euro;&rang;"
@@ -116,13 +116,13 @@
                 raise TypeError("don't know how to handle %r" % exc)
             l = []
             for c in exc.object[exc.start:exc.end]:
-                l.append(unicodedata.name(c, u"0x%x" % ord(c)))
-            return (u"\033[1m%s\033[0m" % u", ".join(l), exc.end)
+                l.append(unicodedata.name(c, "0x%x" % ord(c)))
+            return ("\033[1m%s\033[0m" % ", ".join(l), exc.end)
 
         codecs.register_error(
             "test.uninamereplace", uninamereplace)
 
-        sin = u"\xac\u1234\u20ac\u8000"
+        sin = "\xac\u1234\u20ac\u8000"
         sout = "\033[1mNOT SIGN, ETHIOPIC SYLLABLE SEE, EURO SIGN, CJK UNIFIED IDEOGRAPH-8000\033[0m"
         self.assertEqual(sin.encode("ascii", "test.uninamereplace"), sout)
 
@@ -135,7 +135,7 @@
     def test_backslashescape(self):
         # Does the same as the "unicode-escape" encoding, but with different
         # base encodings.
-        sin = u"a\xac\u1234\u20ac\u8000"
+        sin = "a\xac\u1234\u20ac\u8000"
         if sys.maxunicode > 0xffff:
             sin += unichr(sys.maxunicode)
         sout = "a\\xac\\u1234\\u20ac\\u8000"
@@ -163,7 +163,7 @@
             if not isinstance(exc, UnicodeDecodeError):
                 raise TypeError("don't know how to handle %r" % exc)
             if exc.object[exc.start:exc.end].startswith("\xc0\x80"):
-                return (u"\x00", exc.start+2) # retry after two bytes
+                return ("\x00", exc.start+2) # retry after two bytes
             else:
                 raise exc
 
@@ -171,7 +171,7 @@
             "test.relaxedutf8", relaxedutf8)
 
         sin = "a\x00b\xc0\x80c\xc3\xbc\xc0\x80\xc0\x80"
-        sout = u"a\x00b\x00c\xfc\x00\x00"
+        sout = "a\x00b\x00c\xfc\x00\x00"
         self.assertEqual(sin.decode("utf-8", "test.relaxedutf8"), sout)
         sin = "\xc0\x80\xc0\x81"
         self.assertRaises(UnicodeError, sin.decode, "utf-8", "test.relaxedutf8")
@@ -182,22 +182,22 @@
         # to be able to use e.g. the "replace" handler, the
         # charmap has to have a mapping for "?".
         charmap = dict([ (ord(c), 2*c.upper()) for c in "abcdefgh"])
-        sin = u"abc"
+        sin = "abc"
         sout = "AABBCC"
         self.assertEquals(codecs.charmap_encode(sin, "strict", charmap)[0], sout)
 
-        sin = u"abcA"
+        sin = "abcA"
         self.assertRaises(UnicodeError, codecs.charmap_encode, sin, "strict", charmap)
 
         charmap[ord("?")] = "XYZ"
-        sin = u"abcDEF"
+        sin = "abcDEF"
         sout = "AABBCCXYZXYZXYZ"
         self.assertEquals(codecs.charmap_encode(sin, "replace", charmap)[0], sout)
 
-        charmap[ord("?")] = u"XYZ"
+        charmap[ord("?")] = "XYZ"
         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
 
-        charmap[ord("?")] = u"XYZ"
+        charmap[ord("?")] = "XYZ"
         self.assertRaises(TypeError, codecs.charmap_encode, sin, "replace", charmap)
 
     def test_decodeunicodeinternal(self):
@@ -210,23 +210,23 @@
             def handler_unicodeinternal(exc):
                 if not isinstance(exc, UnicodeDecodeError):
                     raise TypeError("don't know how to handle %r" % exc)
-                return (u"\x01", 1)
+                return ("\x01", 1)
 
             self.assertEqual(
                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "ignore"),
-                u"\u0000"
+                "\u0000"
             )
 
             self.assertEqual(
                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "replace"),
-                u"\u0000\ufffd"
+                "\u0000\ufffd"
             )
 
             codecs.register_error("test.hui", handler_unicodeinternal)
 
             self.assertEqual(
                 "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui"),
-                u"\u0000\u0001\u0000"
+                "\u0000\u0001\u0000"
             )
 
     def test_callbacks(self):
@@ -234,16 +234,16 @@
             if not isinstance(exc, UnicodeEncodeError) \
                and not isinstance(exc, UnicodeDecodeError):
                 raise TypeError("don't know how to handle %r" % exc)
-            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
-            return (u"[%s]" % u"".join(l), exc.end)
+            l = ["<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
+            return ("[%s]" % "".join(l), exc.end)
 
         codecs.register_error("test.handler1", handler1)
 
         def handler2(exc):
             if not isinstance(exc, UnicodeDecodeError):
                 raise TypeError("don't know how to handle %r" % exc)
-            l = [u"<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
-            return (u"[%s]" % u"".join(l), exc.end+1) # skip one character
+            l = ["<%d>" % ord(exc.object[pos]) for pos in xrange(exc.start, exc.end)]
+            return ("[%s]" % "".join(l), exc.end+1) # skip one character
 
         codecs.register_error("test.handler2", handler2)
 
@@ -251,36 +251,36 @@
 
         self.assertEqual(
             s.decode("ascii", "test.handler1"),
-            u"\x00[<129>]\x7f[<128>][<255>]"
+            "\x00[<129>]\x7f[<128>][<255>]"
         )
         self.assertEqual(
             s.decode("ascii", "test.handler2"),
-            u"\x00[<129>][<128>]"
+            "\x00[<129>][<128>]"
         )
 
         self.assertEqual(
             "\\u3042\u3xxx".decode("unicode-escape", "test.handler1"),
-            u"\u3042[<92><117><51><120>]xx"
+            "\u3042[<92><117><51><120>]xx"
         )
 
         self.assertEqual(
             "\\u3042\u3xx".decode("unicode-escape", "test.handler1"),
-            u"\u3042[<92><117><51><120><120>]"
+            "\u3042[<92><117><51><120><120>]"
         )
 
         self.assertEqual(
-            codecs.charmap_decode("abc", "test.handler1", {ord("a"): u"z"})[0],
-            u"z[<98>][<99>]"
+            codecs.charmap_decode("abc", "test.handler1", {ord("a"): "z"})[0],
+            "z[<98>][<99>]"
         )
 
         self.assertEqual(
-            u"g\xfc\xdfrk".encode("ascii", "test.handler1"),
-            u"g[<252><223>]rk"
+            "g\xfc\xdfrk".encode("ascii", "test.handler1"),
+            "g[<252><223>]rk"
         )
 
         self.assertEqual(
-            u"g\xfc\xdf".encode("ascii", "test.handler1"),
-            u"g[<252><223>]"
+            "g\xfc\xdf".encode("ascii", "test.handler1"),
+            "g[<252><223>]"
         )
 
     def test_longstrings(self):
@@ -292,7 +292,7 @@
             codecs.register_error("test." + err, codecs.lookup_error(err))
         l = 1000
         errors += [ "test." + err for err in errors ]
-        for uni in [ s*l for s in (u"x", u"\u3042", u"a\xe4") ]:
+        for uni in [ s*l for s in ("x", "\u3042", "a\xe4") ]:
             for enc in ("ascii", "latin-1", "iso-8859-1", "iso-8859-15", "utf-8", "utf-7", "utf-16"):
                 for err in errors:
                     try:
@@ -307,7 +307,7 @@
         # check with one argument too much
         self.assertRaises(TypeError, exctype, *(args + ["too much"]))
         # check with one argument of the wrong type
-        wrongargs = [ "spam", u"eggs", 42, 1.0, None ]
+        wrongargs = [ "spam", "eggs", 42, 1.0, None ]
         for i in xrange(len(args)):
             for wrongarg in wrongargs:
                 if type(wrongarg) is type(args[i]):
@@ -328,33 +328,33 @@
     def test_unicodeencodeerror(self):
         self.check_exceptionobjectargs(
             UnicodeEncodeError,
-            ["ascii", u"g\xfcrk", 1, 2, "ouch"],
+            ["ascii", "g\xfcrk", 1, 2, "ouch"],
             "'ascii' codec can't encode character u'\\xfc' in position 1: ouch"
         )
         self.check_exceptionobjectargs(
             UnicodeEncodeError,
-            ["ascii", u"g\xfcrk", 1, 4, "ouch"],
+            ["ascii", "g\xfcrk", 1, 4, "ouch"],
             "'ascii' codec can't encode characters in position 1-3: ouch"
         )
         self.check_exceptionobjectargs(
             UnicodeEncodeError,
-            ["ascii", u"\xfcx", 0, 1, "ouch"],
+            ["ascii", "\xfcx", 0, 1, "ouch"],
             "'ascii' codec can't encode character u'\\xfc' in position 0: ouch"
         )
         self.check_exceptionobjectargs(
             UnicodeEncodeError,
-            ["ascii", u"\u0100x", 0, 1, "ouch"],
+            ["ascii", "\u0100x", 0, 1, "ouch"],
             "'ascii' codec can't encode character u'\\u0100' in position 0: ouch"
         )
         self.check_exceptionobjectargs(
             UnicodeEncodeError,
-            ["ascii", u"\uffffx", 0, 1, "ouch"],
+            ["ascii", "\uffffx", 0, 1, "ouch"],
             "'ascii' codec can't encode character u'\\uffff' in position 0: ouch"
         )
         if sys.maxunicode > 0xffff:
             self.check_exceptionobjectargs(
                 UnicodeEncodeError,
-                ["ascii", u"\U00010000x", 0, 1, "ouch"],
+                ["ascii", "\U00010000x", 0, 1, "ouch"],
                 "'ascii' codec can't encode character u'\\U00010000' in position 0: ouch"
             )
 
@@ -373,28 +373,28 @@
     def test_unicodetranslateerror(self):
         self.check_exceptionobjectargs(
             UnicodeTranslateError,
-            [u"g\xfcrk", 1, 2, "ouch"],
+            ["g\xfcrk", 1, 2, "ouch"],
             "can't translate character u'\\xfc' in position 1: ouch"
         )
         self.check_exceptionobjectargs(
             UnicodeTranslateError,
-            [u"g\u0100rk", 1, 2, "ouch"],
+            ["g\u0100rk", 1, 2, "ouch"],
             "can't translate character u'\\u0100' in position 1: ouch"
         )
         self.check_exceptionobjectargs(
             UnicodeTranslateError,
-            [u"g\uffffrk", 1, 2, "ouch"],
+            ["g\uffffrk", 1, 2, "ouch"],
             "can't translate character u'\\uffff' in position 1: ouch"
         )
         if sys.maxunicode > 0xffff:
             self.check_exceptionobjectargs(
                 UnicodeTranslateError,
-                [u"g\U00010000rk", 1, 2, "ouch"],
+                ["g\U00010000rk", 1, 2, "ouch"],
                 "can't translate character u'\\U00010000' in position 1: ouch"
             )
         self.check_exceptionobjectargs(
             UnicodeTranslateError,
-            [u"g\xfcrk", 1, 3, "ouch"],
+            ["g\xfcrk", 1, 3, "ouch"],
             "can't translate characters in position 1-2: ouch"
         )
 
@@ -416,7 +416,7 @@
         self.assertRaises(
             UnicodeEncodeError,
             codecs.strict_errors,
-            UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")
+            UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")
         )
 
     def test_badandgoodignoreexceptions(self):
@@ -434,16 +434,16 @@
         )
         # If the correct exception is passed in, "ignore" returns an empty replacement
         self.assertEquals(
-            codecs.ignore_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
-            (u"", 1)
+            codecs.ignore_errors(UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
+            ("", 1)
         )
         self.assertEquals(
             codecs.ignore_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
-            (u"", 1)
+            ("", 1)
         )
         self.assertEquals(
-            codecs.ignore_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
-            (u"", 1)
+            codecs.ignore_errors(UnicodeTranslateError("\u3042", 0, 1, "ouch")),
+            ("", 1)
         )
 
     def test_badandgoodreplaceexceptions(self):
@@ -471,16 +471,16 @@
         )
         # With the correct exception, "replace" returns an "?" or u"\ufffd" replacement
         self.assertEquals(
-            codecs.replace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
-            (u"?", 1)
+            codecs.replace_errors(UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
+            ("?", 1)
         )
         self.assertEquals(
             codecs.replace_errors(UnicodeDecodeError("ascii", "\xff", 0, 1, "ouch")),
-            (u"\ufffd", 1)
+            ("\ufffd", 1)
         )
         self.assertEquals(
-            codecs.replace_errors(UnicodeTranslateError(u"\u3042", 0, 1, "ouch")),
-            (u"\ufffd", 1)
+            codecs.replace_errors(UnicodeTranslateError("\u3042", 0, 1, "ouch")),
+            ("\ufffd", 1)
         )
 
     def test_badandgoodxmlcharrefreplaceexceptions(self):
@@ -505,7 +505,7 @@
         self.assertRaises(
             TypeError,
             codecs.xmlcharrefreplace_errors,
-            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
+            UnicodeTranslateError("\u3042", 0, 1, "ouch")
         )
         # Use the correct exception
         cs = (0, 1, 9, 10, 99, 100, 999, 1000, 9999, 10000, 0x3042)
@@ -514,7 +514,7 @@
             codecs.xmlcharrefreplace_errors(
                 UnicodeEncodeError("ascii", s, 0, len(s), "ouch")
             ),
-            (u"".join(u"&#%d;" % ord(c) for c in s), len(s))
+            ("".join("&#%d;" % ord(c) for c in s), len(s))
         )
 
     def test_badandgoodbackslashreplaceexceptions(self):
@@ -539,41 +539,41 @@
         self.assertRaises(
             TypeError,
             codecs.backslashreplace_errors,
-            UnicodeTranslateError(u"\u3042", 0, 1, "ouch")
+            UnicodeTranslateError("\u3042", 0, 1, "ouch")
         )
         # Use the correct exception
         self.assertEquals(
-            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u3042", 0, 1, "ouch")),
-            (u"\\u3042", 1)
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\u3042", 0, 1, "ouch")),
+            ("\\u3042", 1)
         )
         self.assertEquals(
-            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\x00", 0, 1, "ouch")),
-            (u"\\x00", 1)
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\x00", 0, 1, "ouch")),
+            ("\\x00", 1)
         )
         self.assertEquals(
-            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\xff", 0, 1, "ouch")),
-            (u"\\xff", 1)
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\xff", 0, 1, "ouch")),
+            ("\\xff", 1)
         )
         self.assertEquals(
-            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\u0100", 0, 1, "ouch")),
-            (u"\\u0100", 1)
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\u0100", 0, 1, "ouch")),
+            ("\\u0100", 1)
         )
         self.assertEquals(
-            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\uffff", 0, 1, "ouch")),
-            (u"\\uffff", 1)
+            codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\uffff", 0, 1, "ouch")),
+            ("\\uffff", 1)
         )
         if sys.maxunicode>0xffff:
             self.assertEquals(
-                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U00010000", 0, 1, "ouch")),
-                (u"\\U00010000", 1)
+                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\U00010000", 0, 1, "ouch")),
+                ("\\U00010000", 1)
             )
             self.assertEquals(
-                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", u"\U0010ffff", 0, 1, "ouch")),
-                (u"\\U0010ffff", 1)
+                codecs.backslashreplace_errors(UnicodeEncodeError("ascii", "\U0010ffff", 0, 1, "ouch")),
+                ("\\U0010ffff", 1)
             )
 
     def test_badhandlerresults(self):
-        results = ( 42, u"foo", (1,2,3), (u"foo", 1, 3), (u"foo", None), (u"foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
+        results = ( 42, "foo", (1,2,3), ("foo", 1, 3), ("foo", None), ("foo",), ("foo", 1, 3), ("foo", None), ("foo",) )
         encs = ("ascii", "latin-1", "iso-8859-1", "iso-8859-15")
 
         for res in results:
@@ -581,7 +581,7 @@
             for enc in encs:
                 self.assertRaises(
                     TypeError,
-                    u"\u3042".encode,
+                    "\u3042".encode,
                     enc,
                     "test.badhandler"
                 )
@@ -614,14 +614,14 @@
     def test_unencodablereplacement(self):
         def unencrepl(exc):
             if isinstance(exc, UnicodeEncodeError):
-                return (u"\u4242", exc.end)
+                return ("\u4242", exc.end)
             else:
                 raise TypeError("don't know how to handle %r" % exc)
         codecs.register_error("test.unencreplhandler", unencrepl)
         for enc in ("ascii", "iso-8859-1", "iso-8859-15"):
             self.assertRaises(
                 UnicodeEncodeError,
-                u"\u4242".encode,
+                "\u4242".encode,
                 enc,
                 "test.unencreplhandler"
             )
@@ -650,7 +650,7 @@
         v = (1, 5, 10, 50, 100, 500, 1000, 5000, 10000, 50000)
         if sys.maxunicode>=100000:
             v += (100000, 500000, 1000000)
-        s = u"".join([unichr(x) for x in v])
+        s = "".join([unichr(x) for x in v])
         codecs.register_error("test.xmlcharrefreplace", codecs.xmlcharrefreplace_errors)
         for enc in ("ascii", "iso-8859-15"):
             for err in ("xmlcharrefreplace", "test.xmlcharrefreplace"):
@@ -673,7 +673,7 @@
         self.assertRaises(TypeError, "\\uyyyy".decode, "raw-unicode-escape", "test.baddecodereturn1")
 
         def baddecodereturn2(exc):
-            return (u"?", None)
+            return ("?", None)
         codecs.register_error("test.baddecodereturn2", baddecodereturn2)
         self.assertRaises(TypeError, "\xff".decode, "ascii", "test.baddecodereturn2")
 
@@ -682,11 +682,11 @@
 
         # Valid negative position
         handler.pos = -1
-        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
+        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), "<?>0")
 
         # Valid negative position
         handler.pos = -2
-        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?><?>")
+        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), "<?><?>")
 
         # Negative position out of bounds
         handler.pos = -3
@@ -694,11 +694,11 @@
 
         # Valid positive position
         handler.pos = 1
-        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>0")
+        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), "<?>0")
 
         # Largest valid positive position (one beyond end of input)
         handler.pos = 2
-        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), u"<?>")
+        self.assertEquals("\xff0".decode("ascii", "test.posreturn"), "<?>")
 
         # Invalid positive position
         handler.pos = 3
@@ -706,7 +706,7 @@
 
         # Restart at the "0"
         handler.pos = 6
-        self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), u"<?>0")
+        self.assertEquals("\\uyyyy0".decode("raw-unicode-escape", "test.posreturn"), "<?>0")
 
         class D(dict):
             def __getitem__(self, key):
@@ -719,44 +719,44 @@
         # enhance coverage of:
         # Objects/unicodeobject.c::unicode_encode_call_errorhandler()
         # and callers
-        self.assertRaises(LookupError, u"\xff".encode, "ascii", "test.unknown")
+        self.assertRaises(LookupError, "\xff".encode, "ascii", "test.unknown")
 
         def badencodereturn1(exc):
             return 42
         codecs.register_error("test.badencodereturn1", badencodereturn1)
-        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn1")
+        self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn1")
 
         def badencodereturn2(exc):
-            return (u"?", None)
+            return ("?", None)
         codecs.register_error("test.badencodereturn2", badencodereturn2)
-        self.assertRaises(TypeError, u"\xff".encode, "ascii", "test.badencodereturn2")
+        self.assertRaises(TypeError, "\xff".encode, "ascii", "test.badencodereturn2")
 
         handler = PosReturn()
         codecs.register_error("test.posreturn", handler.handle)
 
         # Valid negative position
         handler.pos = -1
-        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
+        self.assertEquals("\xff0".encode("ascii", "test.posreturn"), "<?>0")
 
         # Valid negative position
         handler.pos = -2
-        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?><?>")
+        self.assertEquals("\xff0".encode("ascii", "test.posreturn"), "<?><?>")
 
         # Negative position out of bounds
         handler.pos = -3
-        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
+        self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn")
 
         # Valid positive position
         handler.pos = 1
-        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>0")
+        self.assertEquals("\xff0".encode("ascii", "test.posreturn"), "<?>0")
 
         # Largest valid positive position (one beyond end of input
         handler.pos = 2
-        self.assertEquals(u"\xff0".encode("ascii", "test.posreturn"), "<?>")
+        self.assertEquals("\xff0".encode("ascii", "test.posreturn"), "<?>")
 
         # Invalid positive position
         handler.pos = 3
-        self.assertRaises(IndexError, u"\xff0".encode, "ascii", "test.posreturn")
+        self.assertRaises(IndexError, "\xff0".encode, "ascii", "test.posreturn")
 
         handler.pos = 0
 
@@ -764,9 +764,9 @@
             def __getitem__(self, key):
                 raise ValueError
         for err in ("strict", "replace", "xmlcharrefreplace", "backslashreplace", "test.posreturn"):
-            self.assertRaises(UnicodeError, codecs.charmap_encode, u"\xff", err, {0xff: None})
-            self.assertRaises(ValueError, codecs.charmap_encode, u"\xff", err, D())
-            self.assertRaises(TypeError, codecs.charmap_encode, u"\xff", err, {0xff: 300})
+            self.assertRaises(UnicodeError, codecs.charmap_encode, "\xff", err, {0xff: None})
+            self.assertRaises(ValueError, codecs.charmap_encode, "\xff", err, D())
+            self.assertRaises(TypeError, codecs.charmap_encode, "\xff", err, {0xff: 300})
 
     def test_translatehelper(self):
         # enhance coverage of:
@@ -777,20 +777,20 @@
         class D(dict):
             def __getitem__(self, key):
                 raise ValueError
-        self.assertRaises(ValueError, u"\xff".translate, D())
-        self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
-        self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
+        self.assertRaises(ValueError, "\xff".translate, D())
+        self.assertRaises(TypeError, "\xff".translate, {0xff: sys.maxunicode+1})
+        self.assertRaises(TypeError, "\xff".translate, {0xff: ()})
 
     def test_bug828737(self):
         charmap = {
-            ord("&"): u"&amp;",
-            ord("<"): u"&lt;",
-            ord(">"): u"&gt;",
-            ord('"'): u"&quot;",
+            ord("&"): "&amp;",
+            ord("<"): "&lt;",
+            ord(">"): "&gt;",
+            ord('"'): "&quot;",
         }
 
         for n in (1, 10, 100, 1000):
-            text = u'abc<def>ghi'*n
+            text = 'abc<def>ghi'*n
             text.translate(charmap)
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_codecencodings_cn.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecencodings_cn.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecencodings_cn.py	Wed May  2 21:09:54 2007
@@ -15,9 +15,9 @@
         # invalid bytes
         ("abc\x81\x81\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x81\x81\xc1\xc4", "replace", u"abc\ufffd\u804a"),
-        ("abc\x81\x81\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
-        ("abc\x81\x81\xc1\xc4", "ignore",  u"abc\u804a"),
+        ("abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        ("abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        ("abc\x81\x81\xc1\xc4", "ignore",  "abc\u804a"),
         ("\xc1\x64", "strict", None),
     )
 
@@ -28,11 +28,11 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
         ("\x83\x34\x83\x31", "strict", None),
-        (u"\u30fb", "strict", None),
+        ("\u30fb", "strict", None),
     )
 
 class Test_GB18030(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -42,11 +42,11 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u804a"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u804a\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u804a"),
-        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", u"abc\ufffd\u804a"),
-        (u"\u30fb", "strict", "\x819\xa79"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u804a\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u804a"),
+        ("abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd\u804a"),
+        ("\u30fb", "strict", "\x819\xa79"),
     )
     has_iso10646 = True
 

Modified: python/branches/py3k-struni/Lib/test/test_codecencodings_hk.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecencodings_hk.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecencodings_hk.py	Wed May  2 21:09:54 2007
@@ -15,9 +15,9 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u8b10"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
     )
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_codecencodings_jp.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecencodings_jp.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecencodings_jp.py	Wed May  2 21:09:54 2007
@@ -15,12 +15,12 @@
         # invalid bytes
         ("abc\x81\x00\x81\x00\x82\x84", "strict",  None),
         ("abc\xf8", "strict",  None),
-        ("abc\x81\x00\x82\x84", "replace", u"abc\ufffd\uff44"),
-        ("abc\x81\x00\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
-        ("abc\x81\x00\x82\x84", "ignore",  u"abc\uff44"),
+        ("abc\x81\x00\x82\x84", "replace", "abc\ufffd\uff44"),
+        ("abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
+        ("abc\x81\x00\x82\x84", "ignore",  "abc\uff44"),
         # sjis vs cp932
-        ("\\\x7e", "replace", u"\\\x7e"),
-        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\uff3c\u2225\uff0d"),
+        ("\\\x7e", "replace", "\\\x7e"),
+        ("\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"),
     )
 
 class Test_EUC_JISX0213(test_multibytecodec_support.TestBase,
@@ -31,25 +31,25 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u7956"),
-        ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
+        ("abc\x8f\x83\x83", "replace", "abc\ufffd"),
         ("\xc1\x64", "strict", None),
-        ("\xa1\xc0", "strict", u"\uff3c"),
+        ("\xa1\xc0", "strict", "\uff3c"),
     )
     xmlcharnametest = (
-        u"\xab\u211c\xbb = \u2329\u1234\u232a",
+        "\xab\u211c\xbb = \u2329\u1234\u232a",
         "\xa9\xa8&real;\xa9\xb2 = &lang;&#4660;&rang;"
     )
 
 eucjp_commontests = (
     ("abc\x80\x80\xc1\xc4", "strict",  None),
     ("abc\xc8", "strict",  None),
-    ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u7956"),
-    ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u7956\ufffd"),
-    ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u7956"),
-    ("abc\x8f\x83\x83", "replace", u"abc\ufffd"),
+    ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u7956"),
+    ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u7956\ufffd"),
+    ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u7956"),
+    ("abc\x8f\x83\x83", "replace", "abc\ufffd"),
     ("\xc1\x64", "strict", None),
 )
 
@@ -58,25 +58,25 @@
     encoding = 'euc_jp'
     tstring = test_multibytecodec_support.load_teststring('euc_jp')
     codectests = eucjp_commontests + (
-        ("\xa1\xc0\\", "strict", u"\uff3c\\"),
-        (u"\xa5", "strict", "\x5c"),
-        (u"\u203e", "strict", "\x7e"),
+        ("\xa1\xc0\\", "strict", "\uff3c\\"),
+        ("\xa5", "strict", "\x5c"),
+        ("\u203e", "strict", "\x7e"),
     )
 
 shiftjis_commonenctests = (
     ("abc\x80\x80\x82\x84", "strict",  None),
     ("abc\xf8", "strict",  None),
-    ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
-    ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
-    ("abc\x80\x80\x82\x84def", "ignore",  u"abc\uff44def"),
+    ("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
+    ("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
+    ("abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
 )
 
 class Test_SJIS_COMPAT(test_multibytecodec_support.TestBase, unittest.TestCase):
     encoding = 'shift_jis'
     tstring = test_multibytecodec_support.load_teststring('shift_jis')
     codectests = shiftjis_commonenctests + (
-        ("\\\x7e", "strict", u"\\\x7e"),
-        ("\x81\x5f\x81\x61\x81\x7c", "strict", u"\uff3c\u2016\u2212"),
+        ("\\\x7e", "strict", "\\\x7e"),
+        ("\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"),
     )
 
 class Test_SJISX0213(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -86,15 +86,15 @@
         # invalid bytes
         ("abc\x80\x80\x82\x84", "strict",  None),
         ("abc\xf8", "strict",  None),
-        ("abc\x80\x80\x82\x84", "replace", u"abc\ufffd\uff44"),
-        ("abc\x80\x80\x82\x84\x88", "replace", u"abc\ufffd\uff44\ufffd"),
-        ("abc\x80\x80\x82\x84def", "ignore",  u"abc\uff44def"),
+        ("abc\x80\x80\x82\x84", "replace", "abc\ufffd\uff44"),
+        ("abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\uff44\ufffd"),
+        ("abc\x80\x80\x82\x84def", "ignore",  "abc\uff44def"),
         # sjis vs cp932
-        ("\\\x7e", "replace", u"\xa5\u203e"),
-        ("\x81\x5f\x81\x61\x81\x7c", "replace", u"\x5c\u2016\u2212"),
+        ("\\\x7e", "replace", "\xa5\u203e"),
+        ("\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"),
     )
     xmlcharnametest = (
-        u"\xab\u211c\xbb = \u2329\u1234\u232a",
+        "\xab\u211c\xbb = \u2329\u1234\u232a",
         "\x85G&real;\x85Q = &lang;&#4660;&rang;"
     )
 

Modified: python/branches/py3k-struni/Lib/test/test_codecencodings_kr.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecencodings_kr.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecencodings_kr.py	Wed May  2 21:09:54 2007
@@ -15,9 +15,9 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\uc894"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
     )
 
 class Test_EUCKR(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -27,9 +27,9 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\uc894"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\uc894\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\uc894"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\uc894"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\uc894\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\uc894"),
     )
 
 class Test_JOHAB(test_multibytecodec_support.TestBase, unittest.TestCase):
@@ -39,9 +39,9 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\ucd27"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\ucd27\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\ucd27"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ucd27"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ucd27\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\ucd27"),
     )
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_codecencodings_tw.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecencodings_tw.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecencodings_tw.py	Wed May  2 21:09:54 2007
@@ -15,9 +15,9 @@
         # invalid bytes
         ("abc\x80\x80\xc1\xc4", "strict",  None),
         ("abc\xc8", "strict",  None),
-        ("abc\x80\x80\xc1\xc4", "replace", u"abc\ufffd\u8b10"),
-        ("abc\x80\x80\xc1\xc4\xc8", "replace", u"abc\ufffd\u8b10\ufffd"),
-        ("abc\x80\x80\xc1\xc4", "ignore",  u"abc\u8b10"),
+        ("abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\u8b10"),
+        ("abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\u8b10\ufffd"),
+        ("abc\x80\x80\xc1\xc4", "ignore",  "abc\u8b10"),
     )
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_codecmaps_jp.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecmaps_jp.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecmaps_jp.py	Wed May  2 21:09:54 2007
@@ -14,11 +14,11 @@
     mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
                  'WINDOWS/CP932.TXT'
     supmaps = [
-        ('\x80', u'\u0080'),
-        ('\xa0', u'\uf8f0'),
-        ('\xfd', u'\uf8f1'),
-        ('\xfe', u'\uf8f2'),
-        ('\xff', u'\uf8f3'),
+        ('\x80', '\u0080'),
+        ('\xa0', '\uf8f0'),
+        ('\xfd', '\uf8f1'),
+        ('\xfe', '\uf8f2'),
+        ('\xff', '\uf8f3'),
     ]
     for i in range(0xa1, 0xe0):
         supmaps.append((chr(i), unichr(i+0xfec0)))
@@ -38,12 +38,12 @@
     mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE' \
                  '/EASTASIA/JIS/SHIFTJIS.TXT'
     pass_enctest = [
-        ('\x81_', u'\\'),
+        ('\x81_', '\\'),
     ]
     pass_dectest = [
-        ('\\', u'\xa5'),
-        ('~', u'\u203e'),
-        ('\x81_', u'\\'),
+        ('\\', '\xa5'),
+        ('~', '\u203e'),
+        ('\x81_', '\\'),
     ]
 
 class TestEUCJISX0213Map(test_multibytecodec_support.TestBase_Mapping,

Modified: python/branches/py3k-struni/Lib/test/test_codecmaps_kr.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecmaps_kr.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecmaps_kr.py	Wed May  2 21:09:54 2007
@@ -30,8 +30,8 @@
     # but, in early 90s that is the only era used johab widely,
     # the most softwares implements it as REVERSE SOLIDUS.
     # So, we ignore the standard here.
-    pass_enctest = [('\\', u'\u20a9')]
-    pass_dectest = [('\\', u'\u20a9')]
+    pass_enctest = [('\\', '\u20a9')]
+    pass_dectest = [('\\', '\u20a9')]
 
 def test_main():
     test_support.run_unittest(__name__)

Modified: python/branches/py3k-struni/Lib/test/test_codecmaps_tw.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecmaps_tw.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecmaps_tw.py	Wed May  2 21:09:54 2007
@@ -20,8 +20,8 @@
     mapfileurl = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/' \
                  'WINDOWS/CP950.TXT'
     pass_enctest = [
-        ('\xa2\xcc', u'\u5341'),
-        ('\xa2\xce', u'\u5345'),
+        ('\xa2\xcc', '\u5341'),
+        ('\xa2\xce', '\u5345'),
     ]
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_codecs.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_codecs.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_codecs.py	Wed May  2 21:09:54 2007
@@ -64,41 +64,41 @@
         # entries from partialresults.
         q = Queue()
         r = codecs.getreader(self.encoding)(q)
-        result = u""
+        result = ""
         for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
             q.write(c)
             result += r.read()
             self.assertEqual(result, partialresult)
         # check that there's nothing left in the buffers
-        self.assertEqual(r.read(), u"")
+        self.assertEqual(r.read(), "")
         self.assertEqual(r.bytebuffer, "")
-        self.assertEqual(r.charbuffer, u"")
+        self.assertEqual(r.charbuffer, "")
 
         # do the check again, this time using a incremental decoder
         d = codecs.getincrementaldecoder(self.encoding)()
-        result = u""
+        result = ""
         for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
             result += d.decode(c)
             self.assertEqual(result, partialresult)
         # check that there's nothing left in the buffers
-        self.assertEqual(d.decode("", True), u"")
+        self.assertEqual(d.decode("", True), "")
         self.assertEqual(d.buffer, "")
 
         # Check whether the rest method works properly
         d.reset()
-        result = u""
+        result = ""
         for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
             result += d.decode(c)
             self.assertEqual(result, partialresult)
         # check that there's nothing left in the buffers
-        self.assertEqual(d.decode("", True), u"")
+        self.assertEqual(d.decode("", True), "")
         self.assertEqual(d.buffer, "")
 
         # check iterdecode()
         encoded = input.encode(self.encoding)
         self.assertEqual(
             input,
-            u"".join(codecs.iterdecode(encoded, self.encoding))
+            "".join(codecs.iterdecode(encoded, self.encoding))
         )
 
     def test_readline(self):
@@ -116,9 +116,9 @@
                 lines.append(line)
             return "|".join(lines)
 
-        s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
-        sexpected = u"foo\n|bar\r\n|baz\r|spam\u2028|eggs"
-        sexpectednoends = u"foo|bar|baz|spam|eggs"
+        s = "foo\nbar\r\nbaz\rspam\u2028eggs"
+        sexpected = "foo\n|bar\r\n|baz\r|spam\u2028|eggs"
+        sexpectednoends = "foo|bar|baz|spam|eggs"
         self.assertEqual(readalllines(s, True), sexpected)
         self.assertEqual(readalllines(s, False), sexpectednoends)
         self.assertEqual(readalllines(s, True, 10), sexpected)
@@ -127,28 +127,28 @@
         # Test long lines (multiple calls to read() in readline())
         vw = []
         vwo = []
-        for (i, lineend) in enumerate(u"\n \r\n \r \u2028".split()):
-            vw.append((i*200)*u"\3042" + lineend)
-            vwo.append((i*200)*u"\3042")
+        for (i, lineend) in enumerate("\n \r\n \r \u2028".split()):
+            vw.append((i*200)*"\3042" + lineend)
+            vwo.append((i*200)*"\3042")
         self.assertEqual(readalllines("".join(vw), True), "".join(vw))
         self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
 
         # Test lines where the first read might end with \r, so the
         # reader has to look ahead whether this is a lone \r or a \r\n
         for size in xrange(80):
-            for lineend in u"\n \r\n \r \u2028".split():
-                s = 10*(size*u"a" + lineend + u"xxx\n")
+            for lineend in "\n \r\n \r \u2028".split():
+                s = 10*(size*"a" + lineend + "xxx\n")
                 reader = getreader(s)
                 for i in xrange(10):
                     self.assertEqual(
                         reader.readline(keepends=True),
-                        size*u"a" + lineend,
+                        size*"a" + lineend,
                     )
                 reader = getreader(s)
                 for i in xrange(10):
                     self.assertEqual(
                         reader.readline(keepends=False),
-                        size*u"a",
+                        size*"a",
                     )
 
     def test_bug1175396(self):
@@ -226,31 +226,31 @@
         reader = codecs.getreader(self.encoding)(q)
 
         # No lineends
-        writer.write(u"foo\r")
-        self.assertEqual(reader.readline(keepends=False), u"foo")
-        writer.write(u"\nbar\r")
-        self.assertEqual(reader.readline(keepends=False), u"")
-        self.assertEqual(reader.readline(keepends=False), u"bar")
-        writer.write(u"baz")
-        self.assertEqual(reader.readline(keepends=False), u"baz")
-        self.assertEqual(reader.readline(keepends=False), u"")
+        writer.write("foo\r")
+        self.assertEqual(reader.readline(keepends=False), "foo")
+        writer.write("\nbar\r")
+        self.assertEqual(reader.readline(keepends=False), "")
+        self.assertEqual(reader.readline(keepends=False), "bar")
+        writer.write("baz")
+        self.assertEqual(reader.readline(keepends=False), "baz")
+        self.assertEqual(reader.readline(keepends=False), "")
 
         # Lineends
-        writer.write(u"foo\r")
-        self.assertEqual(reader.readline(keepends=True), u"foo\r")
-        writer.write(u"\nbar\r")
-        self.assertEqual(reader.readline(keepends=True), u"\n")
-        self.assertEqual(reader.readline(keepends=True), u"bar\r")
-        writer.write(u"baz")
-        self.assertEqual(reader.readline(keepends=True), u"baz")
-        self.assertEqual(reader.readline(keepends=True), u"")
-        writer.write(u"foo\r\n")
-        self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
+        writer.write("foo\r")
+        self.assertEqual(reader.readline(keepends=True), "foo\r")
+        writer.write("\nbar\r")
+        self.assertEqual(reader.readline(keepends=True), "\n")
+        self.assertEqual(reader.readline(keepends=True), "bar\r")
+        writer.write("baz")
+        self.assertEqual(reader.readline(keepends=True), "baz")
+        self.assertEqual(reader.readline(keepends=True), "")
+        writer.write("foo\r\n")
+        self.assertEqual(reader.readline(keepends=True), "foo\r\n")
 
     def test_bug1098990_a(self):
-        s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
-        s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
-        s3 = u"next line.\r\n"
+        s1 = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
+        s2 = "offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
+        s3 = "next line.\r\n"
 
         s = (s1+s2+s3).encode(self.encoding)
         stream = StringIO.StringIO(s)
@@ -258,14 +258,14 @@
         self.assertEqual(reader.readline(), s1)
         self.assertEqual(reader.readline(), s2)
         self.assertEqual(reader.readline(), s3)
-        self.assertEqual(reader.readline(), u"")
+        self.assertEqual(reader.readline(), "")
 
     def test_bug1098990_b(self):
-        s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
-        s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
-        s3 = u"stillokay:bbbbxx\r\n"
-        s4 = u"broken!!!!badbad\r\n"
-        s5 = u"againokay.\r\n"
+        s1 = "aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
+        s2 = "bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
+        s3 = "stillokay:bbbbxx\r\n"
+        s4 = "broken!!!!badbad\r\n"
+        s5 = "againokay.\r\n"
 
         s = (s1+s2+s3+s4+s5).encode(self.encoding)
         stream = StringIO.StringIO(s)
@@ -275,7 +275,7 @@
         self.assertEqual(reader.readline(), s3)
         self.assertEqual(reader.readline(), s4)
         self.assertEqual(reader.readline(), s5)
-        self.assertEqual(reader.readline(), u"")
+        self.assertEqual(reader.readline(), "")
 
 class UTF16Test(ReadTest):
     encoding = "utf-16"
@@ -288,15 +288,15 @@
         # encode some stream
         s = StringIO.StringIO()
         f = writer(s)
-        f.write(u"spam")
-        f.write(u"spam")
+        f.write("spam")
+        f.write("spam")
         d = s.getvalue()
         # check whether there is exactly one BOM in it
         self.assert_(d == self.spamle or d == self.spambe)
         # try to read it back
         s = StringIO.StringIO(d)
         f = reader(s)
-        self.assertEquals(f.read(), u"spamspam")
+        self.assertEquals(f.read(), "spamspam")
 
     def test_badbom(self):
         s = StringIO.StringIO("\xff\xff")
@@ -309,18 +309,18 @@
 
     def test_partial(self):
         self.check_partial(
-            u"\x00\xff\u0100\uffff",
+            "\x00\xff\u0100\uffff",
             [
-                u"", # first byte of BOM read
-                u"", # second byte of BOM read => byteorder known
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
+                "", # first byte of BOM read
+                "", # second byte of BOM read => byteorder known
+                "",
+                "\x00",
+                "\x00",
+                "\x00\xff",
+                "\x00\xff",
+                "\x00\xff\u0100",
+                "\x00\xff\u0100",
+                "\x00\xff\u0100\uffff",
             ]
         )
 
@@ -330,25 +330,25 @@
 
     def test_decoder_state(self):
         self.check_state_handling_decode(self.encoding,
-                                         u"spamspam", self.spamle)
+                                         "spamspam", self.spamle)
         self.check_state_handling_decode(self.encoding,
-                                         u"spamspam", self.spambe)
+                                         "spamspam", self.spambe)
 
 class UTF16LETest(ReadTest):
     encoding = "utf-16-le"
 
     def test_partial(self):
         self.check_partial(
-            u"\x00\xff\u0100\uffff",
+            "\x00\xff\u0100\uffff",
             [
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
+                "",
+                "\x00",
+                "\x00",
+                "\x00\xff",
+                "\x00\xff",
+                "\x00\xff\u0100",
+                "\x00\xff\u0100",
+                "\x00\xff\u0100\uffff",
             ]
         )
 
@@ -361,16 +361,16 @@
 
     def test_partial(self):
         self.check_partial(
-            u"\x00\xff\u0100\uffff",
+            "\x00\xff\u0100\uffff",
             [
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
+                "",
+                "\x00",
+                "\x00",
+                "\x00\xff",
+                "\x00\xff",
+                "\x00\xff\u0100",
+                "\x00\xff\u0100",
+                "\x00\xff\u0100\uffff",
             ]
         )
 
@@ -383,24 +383,24 @@
 
     def test_partial(self):
         self.check_partial(
-            u"\x00\xff\u07ff\u0800\uffff",
+            "\x00\xff\u07ff\u0800\uffff",
             [
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u07ff",
-                u"\x00\xff\u07ff",
-                u"\x00\xff\u07ff",
-                u"\x00\xff\u07ff\u0800",
-                u"\x00\xff\u07ff\u0800",
-                u"\x00\xff\u07ff\u0800",
-                u"\x00\xff\u07ff\u0800\uffff",
+                "\x00",
+                "\x00",
+                "\x00\xff",
+                "\x00\xff",
+                "\x00\xff\u07ff",
+                "\x00\xff\u07ff",
+                "\x00\xff\u07ff",
+                "\x00\xff\u07ff\u0800",
+                "\x00\xff\u07ff\u0800",
+                "\x00\xff\u07ff\u0800",
+                "\x00\xff\u07ff\u0800\uffff",
             ]
         )
 
     def test_decoder_state(self):
-        u = u"\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
+        u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
         self.check_state_handling_decode(self.encoding,
                                          u, u.encode(self.encoding))
 
@@ -450,39 +450,39 @@
 
     def test_partial(self):
         self.check_partial(
-            u"\ufeff\x00\xff\u07ff\u0800\uffff",
+            "\ufeff\x00\xff\u07ff\u0800\uffff",
             [
-                u"",
-                u"",
-                u"", # First BOM has been read and skipped
-                u"",
-                u"",
-                u"\ufeff", # Second BOM has been read and emitted
-                u"\ufeff\x00", # "\x00" read and emitted
-                u"\ufeff\x00", # First byte of encoded u"\xff" read
-                u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
-                u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
-                u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
-                u"\ufeff\x00\xff\u07ff",
-                u"\ufeff\x00\xff\u07ff",
-                u"\ufeff\x00\xff\u07ff\u0800",
-                u"\ufeff\x00\xff\u07ff\u0800",
-                u"\ufeff\x00\xff\u07ff\u0800",
-                u"\ufeff\x00\xff\u07ff\u0800\uffff",
+                "",
+                "",
+                "", # First BOM has been read and skipped
+                "",
+                "",
+                "\ufeff", # Second BOM has been read and emitted
+                "\ufeff\x00", # "\x00" read and emitted
+                "\ufeff\x00", # First byte of encoded u"\xff" read
+                "\ufeff\x00\xff", # Second byte of encoded u"\xff" read
+                "\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
+                "\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
+                "\ufeff\x00\xff\u07ff",
+                "\ufeff\x00\xff\u07ff",
+                "\ufeff\x00\xff\u07ff\u0800",
+                "\ufeff\x00\xff\u07ff\u0800",
+                "\ufeff\x00\xff\u07ff\u0800",
+                "\ufeff\x00\xff\u07ff\u0800\uffff",
             ]
         )
 
     def test_bug1601501(self):
         # SF bug #1601501: check that the codec works with a buffer
-        unicode("\xef\xbb\xbf", "utf-8-sig")
+        str("\xef\xbb\xbf", "utf-8-sig")
 
     def test_bom(self):
         d = codecs.getincrementaldecoder("utf-8-sig")()
-        s = u"spam"
+        s = "spam"
         self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
 
     def test_decoder_state(self):
-        u = u"\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
+        u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
         self.check_state_handling_decode(self.encoding,
                                          u, u.encode(self.encoding))
 
@@ -494,7 +494,7 @@
     def test_recoding(self):
         f = StringIO.StringIO()
         f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
-        f2.write(u"a")
+        f2.write("a")
         f2.close()
         # Python used to crash on this at exit because of a refcount
         # bug in _codecsmodule.c
@@ -502,104 +502,104 @@
 # From RFC 3492
 punycode_testcases = [
     # A Arabic (Egyptian):
-    (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
-     u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
+    ("\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
+     "\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
      "egbpdaj6bu4bxfgehfvwxn"),
     # B Chinese (simplified):
-    (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
+    ("\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
      "ihqwcrb4cv8a8dqg056pqjye"),
     # C Chinese (traditional):
-    (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
+    ("\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
      "ihqwctvzc91f659drss3x8bo0yb"),
     # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
-    (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
-     u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
-     u"\u0065\u0073\u006B\u0079",
+    ("\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
+     "\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
+     "\u0065\u0073\u006B\u0079",
      "Proprostnemluvesky-uyb24dma41a"),
     # E Hebrew:
-    (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
-     u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
-     u"\u05D1\u05E8\u05D9\u05EA",
+    ("\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
+     "\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
+     "\u05D1\u05E8\u05D9\u05EA",
      "4dbcagdahymbxekheh6e0a7fei0b"),
     # F Hindi (Devanagari):
-    (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
-    u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
-    u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
-    u"\u0939\u0948\u0902",
+    ("\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
+    "\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
+    "\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
+    "\u0939\u0948\u0902",
     "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
 
     #(G) Japanese (kanji and hiragana):
-    (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
-    u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
+    ("\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
+    "\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
      "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
 
     # (H) Korean (Hangul syllables):
-    (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
-     u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
-     u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
+    ("\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
+     "\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
+     "\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
      "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
      "psd879ccm6fea98c"),
 
     # (I) Russian (Cyrillic):
-    (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
-     u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
-     u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
-     u"\u0438",
+    ("\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
+     "\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
+     "\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
+     "\u0438",
      "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
 
     # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
-    (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
-     u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
-     u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
-     u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
-     u"\u0061\u00F1\u006F\u006C",
+    ("\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
+     "\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
+     "\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
+     "\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
+     "\u0061\u00F1\u006F\u006C",
      "PorqunopuedensimplementehablarenEspaol-fmd56a"),
 
     # (K) Vietnamese:
     #  T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
     #   <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
-    (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
-     u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
-     u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
-     u"\u0056\u0069\u1EC7\u0074",
+    ("\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
+     "\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
+     "\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
+     "\u0056\u0069\u1EC7\u0074",
      "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
 
     #(L) 3<nen>B<gumi><kinpachi><sensei>
-    (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
+    ("\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
      "3B-ww4c5e180e575a65lsy2b"),
 
     # (M) <amuro><namie>-with-SUPER-MONKEYS
-    (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
-     u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
-     u"\u004F\u004E\u004B\u0045\u0059\u0053",
+    ("\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
+     "\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
+     "\u004F\u004E\u004B\u0045\u0059\u0053",
      "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
 
     # (N) Hello-Another-Way-<sorezore><no><basho>
-    (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
-     u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
-     u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
+    ("\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
+     "\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
+     "\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
      "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
 
     # (O) <hitotsu><yane><no><shita>2
-    (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
+    ("\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
      "2-u9tlzr9756bt3uc0v"),
 
     # (P) Maji<de>Koi<suru>5<byou><mae>
-    (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
-     u"\u308B\u0035\u79D2\u524D",
+    ("\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
+     "\u308B\u0035\u79D2\u524D",
      "MajiKoi5-783gue6qz075azm5e"),
 
      # (Q) <pafii>de<runba>
-    (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
+    ("\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
      "de-jg4avhby1noc0d"),
 
     # (R) <sono><supiido><de>
-    (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
+    ("\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
      "d9juau41awczczp"),
 
     # (S) -> $1.00 <-
-    (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
-     u"\u003C\u002D",
+    ("\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
+     "\u003C\u002D",
      "-> $1.00 <--")
     ]
 
@@ -627,9 +627,9 @@
         # points" above 0x10ffff on UCS-4 builds.
         if sys.maxunicode > 0xffff:
             ok = [
-                ("\x00\x10\xff\xff", u"\U0010ffff"),
-                ("\x00\x00\x01\x01", u"\U00000101"),
-                ("", u""),
+                ("\x00\x10\xff\xff", "\U0010ffff"),
+                ("\x00\x00\x01\x01", "\U00000101"),
+                ("", ""),
             ]
             not_ok = [
                 "\x7f\xff\xff\xff",
@@ -664,10 +664,10 @@
         if sys.maxunicode > 0xffff:
             codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
             decoder = codecs.getdecoder("unicode_internal")
-            ab = u"ab".encode("unicode_internal")
+            ab = "ab".encode("unicode_internal")
             ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
                 "UnicodeInternalTest")
-            self.assertEquals((u"ab", 12), ignored)
+            self.assertEquals(("ab", 12), ignored)
 
 # From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
 nameprep_tests = [
@@ -831,12 +831,12 @@
                 # Skipped
                 continue
             # The Unicode strings are given in UTF-8
-            orig = unicode(orig, "utf-8")
+            orig = str(orig, "utf-8")
             if prepped is None:
                 # Input contains prohibited characters
                 self.assertRaises(UnicodeError, nameprep, orig)
             else:
-                prepped = unicode(prepped, "utf-8")
+                prepped = str(prepped, "utf-8")
                 try:
                     self.assertEquals(nameprep(orig), prepped)
                 except Exception as e:
@@ -844,97 +844,97 @@
 
 class IDNACodecTest(unittest.TestCase):
     def test_builtin_decode(self):
-        self.assertEquals(unicode("python.org", "idna"), u"python.org")
-        self.assertEquals(unicode("python.org.", "idna"), u"python.org.")
-        self.assertEquals(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org")
-        self.assertEquals(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.")
+        self.assertEquals(str("python.org", "idna"), "python.org")
+        self.assertEquals(str("python.org.", "idna"), "python.org.")
+        self.assertEquals(str("xn--pythn-mua.org", "idna"), "pyth\xf6n.org")
+        self.assertEquals(str("xn--pythn-mua.org.", "idna"), "pyth\xf6n.org.")
 
     def test_builtin_encode(self):
-        self.assertEquals(u"python.org".encode("idna"), "python.org")
+        self.assertEquals("python.org".encode("idna"), "python.org")
         self.assertEquals("python.org.".encode("idna"), "python.org.")
-        self.assertEquals(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
-        self.assertEquals(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
+        self.assertEquals("pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
+        self.assertEquals("pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
 
     def test_stream(self):
         import StringIO
         r = codecs.getreader("idna")(StringIO.StringIO("abc"))
         r.read(3)
-        self.assertEquals(r.read(), u"")
+        self.assertEquals(r.read(), "")
 
     def test_incremental_decode(self):
         self.assertEquals(
             "".join(codecs.iterdecode("python.org", "idna")),
-            u"python.org"
+            "python.org"
         )
         self.assertEquals(
             "".join(codecs.iterdecode("python.org.", "idna")),
-            u"python.org."
+            "python.org."
         )
         self.assertEquals(
             "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
-            u"pyth\xf6n.org."
+            "pyth\xf6n.org."
         )
         self.assertEquals(
             "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
-            u"pyth\xf6n.org."
+            "pyth\xf6n.org."
         )
 
         decoder = codecs.getincrementaldecoder("idna")()
-        self.assertEquals(decoder.decode("xn--xam", ), u"")
-        self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
-        self.assertEquals(decoder.decode(u"rg"), u"")
-        self.assertEquals(decoder.decode(u"", True), u"org")
+        self.assertEquals(decoder.decode("xn--xam", ), "")
+        self.assertEquals(decoder.decode("ple-9ta.o", ), "\xe4xample.")
+        self.assertEquals(decoder.decode("rg"), "")
+        self.assertEquals(decoder.decode("", True), "org")
 
         decoder.reset()
-        self.assertEquals(decoder.decode("xn--xam", ), u"")
-        self.assertEquals(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
-        self.assertEquals(decoder.decode("rg."), u"org.")
-        self.assertEquals(decoder.decode("", True), u"")
+        self.assertEquals(decoder.decode("xn--xam", ), "")
+        self.assertEquals(decoder.decode("ple-9ta.o", ), "\xe4xample.")
+        self.assertEquals(decoder.decode("rg."), "org.")
+        self.assertEquals(decoder.decode("", True), "")
 
     def test_incremental_encode(self):
         self.assertEquals(
-            "".join(codecs.iterencode(u"python.org", "idna")),
+            "".join(codecs.iterencode("python.org", "idna")),
             "python.org"
         )
         self.assertEquals(
-            "".join(codecs.iterencode(u"python.org.", "idna")),
+            "".join(codecs.iterencode("python.org.", "idna")),
             "python.org."
         )
         self.assertEquals(
-            "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
+            "".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
             "xn--pythn-mua.org."
         )
         self.assertEquals(
-            "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
+            "".join(codecs.iterencode("pyth\xf6n.org.", "idna")),
             "xn--pythn-mua.org."
         )
 
         encoder = codecs.getincrementalencoder("idna")()
-        self.assertEquals(encoder.encode(u"\xe4x"), "")
-        self.assertEquals(encoder.encode(u"ample.org"), "xn--xample-9ta.")
-        self.assertEquals(encoder.encode(u"", True), "org")
+        self.assertEquals(encoder.encode("\xe4x"), "")
+        self.assertEquals(encoder.encode("ample.org"), "xn--xample-9ta.")
+        self.assertEquals(encoder.encode("", True), "org")
 
         encoder.reset()
-        self.assertEquals(encoder.encode(u"\xe4x"), "")
-        self.assertEquals(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")
-        self.assertEquals(encoder.encode(u"", True), "")
+        self.assertEquals(encoder.encode("\xe4x"), "")
+        self.assertEquals(encoder.encode("ample.org."), "xn--xample-9ta.org.")
+        self.assertEquals(encoder.encode("", True), "")
 
 class CodecsModuleTest(unittest.TestCase):
 
     def test_decode(self):
         self.assertEquals(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
-                          u'\xe4\xf6\xfc')
+                          '\xe4\xf6\xfc')
         self.assertRaises(TypeError, codecs.decode)
-        self.assertEquals(codecs.decode('abc'), u'abc')
+        self.assertEquals(codecs.decode('abc'), 'abc')
         self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
 
     def test_encode(self):
-        self.assertEquals(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
+        self.assertEquals(codecs.encode('\xe4\xf6\xfc', 'latin-1'),
                           '\xe4\xf6\xfc')
         self.assertRaises(TypeError, codecs.encode)
         self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
-        self.assertEquals(codecs.encode(u'abc'), 'abc')
-        self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
+        self.assertEquals(codecs.encode('abc'), 'abc')
+        self.assertRaises(UnicodeEncodeError, codecs.encode, '\xffff', 'ascii')
 
     def test_register(self):
         self.assertRaises(TypeError, codecs.register)
@@ -969,7 +969,7 @@
 
     def test_readlines(self):
         f = self.reader(self.stream)
-        self.assertEquals(f.readlines(), [u'\ud55c\n', u'\uae00'])
+        self.assertEquals(f.readlines(), ['\ud55c\n', '\uae00'])
 
 class EncodedFileTest(unittest.TestCase):
 
@@ -1154,7 +1154,7 @@
 
 class BasicUnicodeTest(unittest.TestCase, MixInCheckStateHandling):
     def test_basics(self):
-        s = u"abc123" # all codecs should be able to encode these
+        s = "abc123" # all codecs should be able to encode these
         for encoding in all_unicode_encodings:
             name = codecs.lookup(encoding).name
             if encoding.endswith("_codec"):
@@ -1178,7 +1178,7 @@
                     encodedresult += q.read()
                 q = Queue()
                 reader = codecs.getreader(encoding)(q)
-                decodedresult = u""
+                decodedresult = ""
                 for c in encodedresult:
                     q.write(c)
                     decodedresult += reader.read()
@@ -1197,9 +1197,9 @@
                     encodedresult = ""
                     for c in s:
                         encodedresult += encoder.encode(c)
-                    encodedresult += encoder.encode(u"", True)
+                    encodedresult += encoder.encode("", True)
                     decoder = codecs.getincrementaldecoder(encoding)()
-                    decodedresult = u""
+                    decodedresult = ""
                     for c in encodedresult:
                         decodedresult += decoder.decode(c)
                     decodedresult += decoder.decode("", True)
@@ -1209,21 +1209,21 @@
                     encodedresult = ""
                     for c in s:
                         encodedresult += cencoder.encode(c)
-                    encodedresult += cencoder.encode(u"", True)
+                    encodedresult += cencoder.encode("", True)
                     cdecoder = _testcapi.codec_incrementaldecoder(encoding)
-                    decodedresult = u""
+                    decodedresult = ""
                     for c in encodedresult:
                         decodedresult += cdecoder.decode(c)
                     decodedresult += cdecoder.decode("", True)
                     self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
 
                     # check iterencode()/iterdecode()
-                    result = u"".join(codecs.iterdecode(codecs.iterencode(s, encoding), encoding))
+                    result = "".join(codecs.iterdecode(codecs.iterencode(s, encoding), encoding))
                     self.assertEqual(result, s, "%r != %r (encoding=%r)" % (result, s, encoding))
 
                     # check iterencode()/iterdecode() with empty string
-                    result = u"".join(codecs.iterdecode(codecs.iterencode(u"", encoding), encoding))
-                    self.assertEqual(result, u"")
+                    result = "".join(codecs.iterdecode(codecs.iterencode("", encoding), encoding))
+                    self.assertEqual(result, "")
 
                 if encoding not in only_strict_mode:
                     # check incremental decoder/encoder with errors argument
@@ -1235,17 +1235,17 @@
                     else:
                         encodedresult = "".join(encoder.encode(c) for c in s)
                         decoder = codecs.getincrementaldecoder(encoding)("ignore")
-                        decodedresult = u"".join(decoder.decode(c) for c in encodedresult)
+                        decodedresult = "".join(decoder.decode(c) for c in encodedresult)
                         self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
 
                         encodedresult = "".join(cencoder.encode(c) for c in s)
                         cdecoder = _testcapi.codec_incrementaldecoder(encoding, "ignore")
-                        decodedresult = u"".join(cdecoder.decode(c) for c in encodedresult)
+                        decodedresult = "".join(cdecoder.decode(c) for c in encodedresult)
                         self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
 
     def test_seek(self):
         # all codecs should be able to encode these
-        s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
+        s = "%s\n%s\n" % (100*"abc123", 100*"def456")
         for encoding in all_unicode_encodings:
             if encoding == "idna": # FIXME: See SF bug #1163178
                 continue
@@ -1278,7 +1278,7 @@
 
     def test_decoder_state(self):
         # Check that getstate() and setstate() handle the state properly
-        u = u"abc123"
+        u = "abc123"
         for encoding in all_unicode_encodings:
             if encoding not in broken_incremental_coders:
                 self.check_state_handling_decode(encoding, u, u.encode(encoding))
@@ -1296,34 +1296,34 @@
 class CharmapTest(unittest.TestCase):
     def test_decode_with_string_map(self):
         self.assertEquals(
-            codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
-            (u"abc", 3)
+            codecs.charmap_decode("\x00\x01\x02", "strict", "abc"),
+            ("abc", 3)
         )
 
         self.assertEquals(
-            codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
-            (u"ab\ufffd", 3)
+            codecs.charmap_decode("\x00\x01\x02", "replace", "ab"),
+            ("ab\ufffd", 3)
         )
 
         self.assertEquals(
-            codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
-            (u"ab\ufffd", 3)
+            codecs.charmap_decode("\x00\x01\x02", "replace", "ab\ufffe"),
+            ("ab\ufffd", 3)
         )
 
         self.assertEquals(
-            codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
-            (u"ab", 3)
+            codecs.charmap_decode("\x00\x01\x02", "ignore", "ab"),
+            ("ab", 3)
         )
 
         self.assertEquals(
-            codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
-            (u"ab", 3)
+            codecs.charmap_decode("\x00\x01\x02", "ignore", "ab\ufffe"),
+            ("ab", 3)
         )
 
         allbytes = "".join(chr(i) for i in xrange(256))
         self.assertEquals(
-            codecs.charmap_decode(allbytes, "ignore", u""),
-            (u"", len(allbytes))
+            codecs.charmap_decode(allbytes, "ignore", ""),
+            ("", len(allbytes))
         )
 
 class WithStmtTest(unittest.TestCase):
@@ -1337,7 +1337,7 @@
         info = codecs.lookup("utf-8")
         with codecs.StreamReaderWriter(f, info.streamreader,
                                        info.streamwriter, 'strict') as srw:
-            self.assertEquals(srw.read(), u"\xfc")
+            self.assertEquals(srw.read(), "\xfc")
 
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_compile.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_compile.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_compile.py	Wed May  2 21:09:54 2007
@@ -318,7 +318,7 @@
         self.assertNotEqual(id(f1.__code__), id(f2.__code__))
 
     def test_unicode_encoding(self):
-        code = u"# -*- coding: utf-8 -*-\npass\n"
+        code = "# -*- coding: utf-8 -*-\npass\n"
         self.assertRaises(SyntaxError, compile, code, "tmp", "exec")
 
     def test_subscripts(self):

Modified: python/branches/py3k-struni/Lib/test/test_complex.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_complex.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_complex.py	Wed May  2 21:09:54 2007
@@ -227,7 +227,7 @@
 
         self.assertEqual(complex("  3.14+J  "), 3.14+1j)
         if test_support.have_unicode:
-            self.assertEqual(complex(unicode("  3.14+J  ")), 3.14+1j)
+            self.assertEqual(complex(str("  3.14+J  ")), 3.14+1j)
 
         # SF bug 543840:  complex(string) accepts strings with \0
         # Fixed in 2.3.
@@ -251,8 +251,8 @@
         self.assertRaises(ValueError, complex, "1+(2j)")
         self.assertRaises(ValueError, complex, "(1+2j)123")
         if test_support.have_unicode:
-            self.assertRaises(ValueError, complex, unicode("1"*500))
-            self.assertRaises(ValueError, complex, unicode("x"))
+            self.assertRaises(ValueError, complex, str("1"*500))
+            self.assertRaises(ValueError, complex, str("x"))
 
         class EvilExc(Exception):
             pass

Modified: python/branches/py3k-struni/Lib/test/test_contains.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_contains.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_contains.py	Wed May  2 21:09:54 2007
@@ -59,31 +59,31 @@
 
     # Test char in Unicode
 
-    check('c' in unicode('abc'), "'c' not in u'abc'")
-    check('d' not in unicode('abc'), "'d' in u'abc'")
+    check('c' in str('abc'), "'c' not in u'abc'")
+    check('d' not in str('abc'), "'d' in u'abc'")
 
-    check('' in unicode(''), "'' not in u''")
-    check(unicode('') in '', "u'' not in ''")
-    check(unicode('') in unicode(''), "u'' not in u''")
-    check('' in unicode('abc'), "'' not in u'abc'")
-    check(unicode('') in 'abc', "u'' not in 'abc'")
-    check(unicode('') in unicode('abc'), "u'' not in u'abc'")
+    check('' in str(''), "'' not in u''")
+    check(str('') in '', "u'' not in ''")
+    check(str('') in str(''), "u'' not in u''")
+    check('' in str('abc'), "'' not in u'abc'")
+    check(str('') in 'abc', "u'' not in 'abc'")
+    check(str('') in str('abc'), "u'' not in u'abc'")
 
     try:
-        None in unicode('abc')
+        None in str('abc')
         check(0, "None in u'abc' did not raise error")
     except TypeError:
         pass
 
     # Test Unicode char in Unicode
 
-    check(unicode('c') in unicode('abc'), "u'c' not in u'abc'")
-    check(unicode('d') not in unicode('abc'), "u'd' in u'abc'")
+    check(str('c') in str('abc'), "u'c' not in u'abc'")
+    check(str('d') not in str('abc'), "u'd' in u'abc'")
 
     # Test Unicode char in string
 
-    check(unicode('c') in 'abc', "u'c' not in 'abc'")
-    check(unicode('d') not in 'abc', "u'd' in 'abc'")
+    check(str('c') in 'abc', "u'c' not in 'abc'")
+    check(str('d') not in 'abc', "u'd' in 'abc'")
 
 # A collection of tests on builtin sequence types
 a = range(10)

Modified: python/branches/py3k-struni/Lib/test/test_cookielib.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_cookielib.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_cookielib.py	Wed May  2 21:09:54 2007
@@ -570,7 +570,7 @@
             ("/foo\031/bar", "/foo%19/bar"),
             ("/\175foo/bar", "/%7Dfoo/bar"),
             # unicode
-            (u"/foo/bar\uabcd", "/foo/bar%EA%AF%8D"),  # UTF-8 encoded
+            ("/foo/bar\uabcd", "/foo/bar%EA%AF%8D"),  # UTF-8 encoded
             ]
         for arg, result in cases:
             self.assertEquals(escape_path(arg), result)
@@ -1540,7 +1540,7 @@
         self.assert_(not cookie)
 
         # unicode URL doesn't raise exception
-        cookie = interact_2965(c, u"http://www.acme.com/\xfc")
+        cookie = interact_2965(c, "http://www.acme.com/\xfc")
 
     def test_mozilla(self):
         # Save / load Mozilla/Netscape cookie file format.

Modified: python/branches/py3k-struni/Lib/test/test_copy.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_copy.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_copy.py	Wed May  2 21:09:54 2007
@@ -83,7 +83,7 @@
         def f():
             pass
         tests = [None, 42, 2**100, 3.14, True, False, 1j,
-                 "hello", u"hello\u1234", f.__code__,
+                 "hello", "hello\u1234", f.__code__,
                  NewStyle, xrange(10), Classic, max]
         for x in tests:
             self.assert_(copy.copy(x) is x, repr(x))
@@ -256,7 +256,7 @@
         def f():
             pass
         tests = [None, 42, 2**100, 3.14, True, False, 1j,
-                 "hello", u"hello\u1234", f.__code__,
+                 "hello", "hello\u1234", f.__code__,
                  NewStyle, xrange(10), Classic, max]
         for x in tests:
             self.assert_(copy.deepcopy(x) is x, repr(x))

Modified: python/branches/py3k-struni/Lib/test/test_descr.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_descr.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_descr.py	Wed May  2 21:09:54 2007
@@ -264,7 +264,7 @@
     del junk
 
     # Just make sure these don't blow up!
-    for arg in 2, 2, 2j, 2e0, [2], "2", u"2", (2,), {2:2}, type, test_dir:
+    for arg in 2, 2, 2j, 2e0, [2], "2", "2", (2,), {2:2}, type, test_dir:
         dir(arg)
 
     # Test dir on custom classes. Since these have object as a
@@ -1100,25 +1100,25 @@
 
     # Test unicode slot names
     try:
-        unicode
+        str
     except NameError:
         pass
     else:
         # Test a single unicode string is not expanded as a sequence.
         class C(object):
-            __slots__ = unicode("abc")
+            __slots__ = str("abc")
         c = C()
         c.abc = 5
         vereq(c.abc, 5)
 
         # _unicode_to_string used to modify slots in certain circumstances
-        slots = (unicode("foo"), unicode("bar"))
+        slots = (str("foo"), str("bar"))
         class C(object):
             __slots__ = slots
         x = C()
         x.foo = 5
         vereq(x.foo, 5)
-        veris(type(slots[0]), unicode)
+        veris(type(slots[0]), str)
         # this used to leak references
         try:
             class C(object):
@@ -2301,64 +2301,64 @@
     verify(s.lower().__class__ is str)
     vereq(s.lower(), base)
 
-    class madunicode(unicode):
+    class madunicode(str):
         _rev = None
         def rev(self):
             if self._rev is not None:
                 return self._rev
             L = list(self)
             L.reverse()
-            self._rev = self.__class__(u"".join(L))
+            self._rev = self.__class__("".join(L))
             return self._rev
     u = madunicode("ABCDEF")
-    vereq(u, u"ABCDEF")
-    vereq(u.rev(), madunicode(u"FEDCBA"))
-    vereq(u.rev().rev(), madunicode(u"ABCDEF"))
-    base = u"12345"
+    vereq(u, "ABCDEF")
+    vereq(u.rev(), madunicode("FEDCBA"))
+    vereq(u.rev().rev(), madunicode("ABCDEF"))
+    base = "12345"
     u = madunicode(base)
-    vereq(unicode(u), base)
-    verify(unicode(u).__class__ is unicode)
+    vereq(str(u), base)
+    verify(str(u).__class__ is str)
     vereq(hash(u), hash(base))
     vereq({u: 1}[base], 1)
     vereq({base: 1}[u], 1)
-    verify(u.strip().__class__ is unicode)
+    verify(u.strip().__class__ is str)
     vereq(u.strip(), base)
-    verify(u.lstrip().__class__ is unicode)
+    verify(u.lstrip().__class__ is str)
     vereq(u.lstrip(), base)
-    verify(u.rstrip().__class__ is unicode)
+    verify(u.rstrip().__class__ is str)
     vereq(u.rstrip(), base)
-    verify(u.replace(u"x", u"x").__class__ is unicode)
-    vereq(u.replace(u"x", u"x"), base)
-    verify(u.replace(u"xy", u"xy").__class__ is unicode)
-    vereq(u.replace(u"xy", u"xy"), base)
-    verify(u.center(len(u)).__class__ is unicode)
+    verify(u.replace("x", "x").__class__ is str)
+    vereq(u.replace("x", "x"), base)
+    verify(u.replace("xy", "xy").__class__ is str)
+    vereq(u.replace("xy", "xy"), base)
+    verify(u.center(len(u)).__class__ is str)
     vereq(u.center(len(u)), base)
-    verify(u.ljust(len(u)).__class__ is unicode)
+    verify(u.ljust(len(u)).__class__ is str)
     vereq(u.ljust(len(u)), base)
-    verify(u.rjust(len(u)).__class__ is unicode)
+    verify(u.rjust(len(u)).__class__ is str)
     vereq(u.rjust(len(u)), base)
-    verify(u.lower().__class__ is unicode)
+    verify(u.lower().__class__ is str)
     vereq(u.lower(), base)
-    verify(u.upper().__class__ is unicode)
+    verify(u.upper().__class__ is str)
     vereq(u.upper(), base)
-    verify(u.capitalize().__class__ is unicode)
+    verify(u.capitalize().__class__ is str)
     vereq(u.capitalize(), base)
-    verify(u.title().__class__ is unicode)
+    verify(u.title().__class__ is str)
     vereq(u.title(), base)
-    verify((u + u"").__class__ is unicode)
-    vereq(u + u"", base)
-    verify((u"" + u).__class__ is unicode)
-    vereq(u"" + u, base)
-    verify((u * 0).__class__ is unicode)
-    vereq(u * 0, u"")
-    verify((u * 1).__class__ is unicode)
+    verify((u + "").__class__ is str)
+    vereq(u + "", base)
+    verify(("" + u).__class__ is str)
+    vereq("" + u, base)
+    verify((u * 0).__class__ is str)
+    vereq(u * 0, "")
+    verify((u * 1).__class__ is str)
     vereq(u * 1, base)
-    verify((u * 2).__class__ is unicode)
+    verify((u * 2).__class__ is str)
     vereq(u * 2, base + base)
-    verify(u[:].__class__ is unicode)
+    verify(u[:].__class__ is str)
     vereq(u[:], base)
-    verify(u[0:0].__class__ is unicode)
-    vereq(u[0:0], u"")
+    verify(u[0:0].__class__ is str)
+    vereq(u[0:0], "")
 
     class sublist(list):
         pass
@@ -2437,12 +2437,12 @@
     vereq(int(x=3), 3)
     vereq(complex(imag=42, real=666), complex(666, 42))
     vereq(str(object=500), '500')
-    vereq(unicode(string='abc', errors='strict'), u'abc')
+    vereq(str(string='abc', errors='strict'), 'abc')
     vereq(tuple(sequence=range(3)), (0, 1, 2))
     vereq(list(sequence=(0, 1, 2)), range(3))
     # note: as of Python 2.3, dict() no longer has an "items" keyword arg
 
-    for constructor in (int, float, int, complex, str, unicode,
+    for constructor in (int, float, int, complex, str, str,
                         tuple, list, file):
         try:
             constructor(bogus_keyword_arg=1)
@@ -2719,13 +2719,13 @@
     class H(object):
         __slots__ = ["b", "a"]
     try:
-        unicode
+        str
     except NameError:
         class I(object):
             __slots__ = ["a", "b"]
     else:
         class I(object):
-            __slots__ = [unicode("a"), unicode("b")]
+            __slots__ = [str("a"), str("b")]
     class J(object):
         __slots__ = ["c", "b"]
     class K(object):
@@ -3124,9 +3124,9 @@
 
     # It's not clear that unicode will continue to support the character
     # buffer interface, and this test will fail if that's taken away.
-    class MyUni(unicode):
+    class MyUni(str):
         pass
-    base = u'abc'
+    base = 'abc'
     m = MyUni(base)
     vereq(binascii.b2a_hex(m), binascii.b2a_hex(base))
 

Modified: python/branches/py3k-struni/Lib/test/test_doctest2.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_doctest2.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_doctest2.py	Wed May  2 21:09:54 2007
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-u"""A module to test whether doctest recognizes some 2.2 features,
+"""A module to test whether doctest recognizes some 2.2 features,
 like static and class methods.
 
 >>> print('yup')  # 1
@@ -15,7 +15,7 @@
 from test import test_support
 
 class C(object):
-    u"""Class C.
+    """Class C.
 
     >>> print(C())  # 2
     42

Modified: python/branches/py3k-struni/Lib/test/test_exceptions.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_exceptions.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_exceptions.py	Wed May  2 21:09:54 2007
@@ -251,19 +251,19 @@
                  'print_file_and_line' : None, 'msg' : 'msgStr',
                  'filename' : None, 'lineno' : None, 'offset' : None}),
             (UnicodeError, (), {'message' : '', 'args' : (),}),
-            (UnicodeEncodeError, ('ascii', u'a', 0, 1, 'ordinal not in range'),
-                {'message' : '', 'args' : ('ascii', u'a', 0, 1,
+            (UnicodeEncodeError, ('ascii', 'a', 0, 1, 'ordinal not in range'),
+                {'message' : '', 'args' : ('ascii', 'a', 0, 1,
                                            'ordinal not in range'),
-                 'encoding' : 'ascii', 'object' : u'a',
+                 'encoding' : 'ascii', 'object' : 'a',
                  'start' : 0, 'reason' : 'ordinal not in range'}),
             (UnicodeDecodeError, ('ascii', '\xff', 0, 1, 'ordinal not in range'),
                 {'message' : '', 'args' : ('ascii', '\xff', 0, 1,
                                            'ordinal not in range'),
                  'encoding' : 'ascii', 'object' : '\xff',
                  'start' : 0, 'reason' : 'ordinal not in range'}),
-            (UnicodeTranslateError, (u"\u3042", 0, 1, "ouch"),
-                {'message' : '', 'args' : (u'\u3042', 0, 1, 'ouch'),
-                 'object' : u'\u3042', 'reason' : 'ouch',
+            (UnicodeTranslateError, ("\u3042", 0, 1, "ouch"),
+                {'message' : '', 'args' : ('\u3042', 0, 1, 'ouch'),
+                 'object' : '\u3042', 'reason' : 'ouch',
                  'start' : 0, 'end' : 1}),
         ]
         try:
@@ -334,9 +334,9 @@
         # Make sure both instances and classes have a str and unicode
         # representation.
         self.failUnless(str(Exception))
-        self.failUnless(unicode(Exception))
+        self.failUnless(str(Exception))
+        self.failUnless(str(Exception('a')))
         self.failUnless(str(Exception('a')))
-        self.failUnless(unicode(Exception(u'a')))
 
     def testExceptionCleanup(self):
         # Make sure "except V as N" exceptions are cleaned up properly

Modified: python/branches/py3k-struni/Lib/test/test_file.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_file.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_file.py	Wed May  2 21:09:54 2007
@@ -145,7 +145,7 @@
 
     def testUnicodeOpen(self):
         # verify repr works for unicode too
-        f = open(unicode(TESTFN), "w")
+        f = open(str(TESTFN), "w")
         self.assert_(repr(f).startswith("<open file u'" + TESTFN))
         f.close()
         os.unlink(TESTFN)

Modified: python/branches/py3k-struni/Lib/test/test_fileinput.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_fileinput.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_fileinput.py	Wed May  2 21:09:54 2007
@@ -160,7 +160,7 @@
             encoding = sys.getfilesystemencoding()
             if encoding is None:
                 encoding = 'ascii'
-            fi = FileInput(files=unicode(t1, encoding))
+            fi = FileInput(files=str(t1, encoding))
             lines = list(fi)
             self.assertEqual(lines, ["A\n", "B"])
         finally:

Modified: python/branches/py3k-struni/Lib/test/test_fileio.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_fileio.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_fileio.py	Wed May  2 21:09:54 2007
@@ -149,7 +149,7 @@
 
     def testUnicodeOpen(self):
         # verify repr works for unicode too
-        f = _fileio._FileIO(unicode(TESTFN), "w")
+        f = _fileio._FileIO(str(TESTFN), "w")
         f.close()
         os.unlink(TESTFN)
 

Modified: python/branches/py3k-struni/Lib/test/test_format.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_format.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_format.py	Wed May  2 21:09:54 2007
@@ -35,7 +35,7 @@
 def testboth(formatstr, *args):
     testformat(formatstr, *args)
     if have_unicode:
-        testformat(unicode(formatstr), *args)
+        testformat(str(formatstr), *args)
 
 
 testboth("%.1d", (1,), "1")
@@ -216,18 +216,18 @@
 test_exc('abc %a', 1, ValueError,
          "unsupported format character 'a' (0x61) at index 5")
 if have_unicode:
-    test_exc(unicode('abc %\u3000','raw-unicode-escape'), 1, ValueError,
+    test_exc(str('abc %\u3000','raw-unicode-escape'), 1, ValueError,
              "unsupported format character '?' (0x3000) at index 5")
 
 test_exc('%d', '1', TypeError, "int argument required, not str")
 test_exc('%g', '1', TypeError, "float argument required, not str")
 test_exc('no format', '1', TypeError,
          "not all arguments converted during string formatting")
-test_exc('no format', u'1', TypeError,
+test_exc('no format', '1', TypeError,
          "not all arguments converted during string formatting")
-test_exc(u'no format', '1', TypeError,
+test_exc('no format', '1', TypeError,
          "not all arguments converted during string formatting")
-test_exc(u'no format', u'1', TypeError,
+test_exc('no format', '1', TypeError,
          "not all arguments converted during string formatting")
 
 class Foobar(int):

Modified: python/branches/py3k-struni/Lib/test/test_getargs.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_getargs.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_getargs.py	Wed May  2 21:09:54 2007
@@ -19,6 +19,6 @@
 
 if have_unicode:
     try:
-        marshal.loads(unicode(r"\222", 'unicode-escape'))
+        marshal.loads(str(r"\222", 'unicode-escape'))
     except UnicodeError:
         pass

Modified: python/branches/py3k-struni/Lib/test/test_gettext.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_gettext.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_gettext.py	Wed May  2 21:09:54 2007
@@ -95,33 +95,33 @@
         eq = self.assertEqual
         # test some translations
         eq(_('albatross'), 'albatross')
-        eq(_(u'mullusk'), 'bacon')
+        eq(_('mullusk'), 'bacon')
         eq(_(r'Raymond Luxury Yach-t'), 'Throatwobbler Mangrove')
-        eq(_(ur'nudge nudge'), 'wink wink')
+        eq(_(r'nudge nudge'), 'wink wink')
 
     def test_double_quotes(self):
         eq = self.assertEqual
         # double quotes
         eq(_("albatross"), 'albatross')
-        eq(_(u"mullusk"), 'bacon')
+        eq(_("mullusk"), 'bacon')
         eq(_(r"Raymond Luxury Yach-t"), 'Throatwobbler Mangrove')
-        eq(_(ur"nudge nudge"), 'wink wink')
+        eq(_(r"nudge nudge"), 'wink wink')
 
     def test_triple_single_quotes(self):
         eq = self.assertEqual
         # triple single quotes
         eq(_('''albatross'''), 'albatross')
-        eq(_(u'''mullusk'''), 'bacon')
+        eq(_('''mullusk'''), 'bacon')
         eq(_(r'''Raymond Luxury Yach-t'''), 'Throatwobbler Mangrove')
-        eq(_(ur'''nudge nudge'''), 'wink wink')
+        eq(_(r'''nudge nudge'''), 'wink wink')
 
     def test_triple_double_quotes(self):
         eq = self.assertEqual
         # triple double quotes
         eq(_("""albatross"""), 'albatross')
-        eq(_(u"""mullusk"""), 'bacon')
+        eq(_("""mullusk"""), 'bacon')
         eq(_(r"""Raymond Luxury Yach-t"""), 'Throatwobbler Mangrove')
-        eq(_(ur"""nudge nudge"""), 'wink wink')
+        eq(_(r"""nudge nudge"""), 'wink wink')
 
     def test_multiline_strings(self):
         eq = self.assertEqual
@@ -143,11 +143,11 @@
         t.install()
         eq(_('nudge nudge'), 'wink wink')
         # Try unicode return type
-        t.install(unicode=True)
+        t.install(str=True)
         eq(_('mullusk'), 'bacon')
         # Test installation of other methods
         import __builtin__
-        t.install(unicode=True, names=["gettext", "lgettext"])
+        t.install(str=True, names=["gettext", "lgettext"])
         eq(_, t.ugettext)
         eq(__builtin__.gettext, t.ugettext)
         eq(lgettext, t.lgettext)
@@ -175,33 +175,33 @@
         eq = self.assertEqual
         # test some translations
         eq(self._('albatross'), 'albatross')
-        eq(self._(u'mullusk'), 'bacon')
+        eq(self._('mullusk'), 'bacon')
         eq(self._(r'Raymond Luxury Yach-t'), 'Throatwobbler Mangrove')
-        eq(self._(ur'nudge nudge'), 'wink wink')
+        eq(self._(r'nudge nudge'), 'wink wink')
 
     def test_double_quotes(self):
         eq = self.assertEqual
         # double quotes
         eq(self._("albatross"), 'albatross')
-        eq(self._(u"mullusk"), 'bacon')
+        eq(self._("mullusk"), 'bacon')
         eq(self._(r"Raymond Luxury Yach-t"), 'Throatwobbler Mangrove')
-        eq(self._(ur"nudge nudge"), 'wink wink')
+        eq(self._(r"nudge nudge"), 'wink wink')
 
     def test_triple_single_quotes(self):
         eq = self.assertEqual
         # triple single quotes
         eq(self._('''albatross'''), 'albatross')
-        eq(self._(u'''mullusk'''), 'bacon')
+        eq(self._('''mullusk'''), 'bacon')
         eq(self._(r'''Raymond Luxury Yach-t'''), 'Throatwobbler Mangrove')
-        eq(self._(ur'''nudge nudge'''), 'wink wink')
+        eq(self._(r'''nudge nudge'''), 'wink wink')
 
     def test_triple_double_quotes(self):
         eq = self.assertEqual
         # triple double quotes
         eq(self._("""albatross"""), 'albatross')
-        eq(self._(u"""mullusk"""), 'bacon')
+        eq(self._("""mullusk"""), 'bacon')
         eq(self._(r"""Raymond Luxury Yach-t"""), 'Throatwobbler Mangrove')
-        eq(self._(ur"""nudge nudge"""), 'wink wink')
+        eq(self._(r"""nudge nudge"""), 'wink wink')
 
     def test_multiline_strings(self):
         eq = self.assertEqual
@@ -309,12 +309,12 @@
 
     def test_unicode_msgid(self):
         unless = self.failUnless
-        unless(isinstance(self._(''), unicode))
-        unless(isinstance(self._(u''), unicode))
+        unless(isinstance(self._(''), str))
+        unless(isinstance(self._(''), str))
 
     def test_unicode_msgstr(self):
         eq = self.assertEqual
-        eq(self._(u'ab\xde'), u'\xa4yz')
+        eq(self._('ab\xde'), '\xa4yz')
 
 
 class WeirdMetadataTest(GettextBaseTest):

Modified: python/branches/py3k-struni/Lib/test/test_glob.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_glob.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_glob.py	Wed May  2 21:09:54 2007
@@ -54,11 +54,11 @@
 
         # test return types are unicode, but only if os.listdir
         # returns unicode filenames
-        uniset = set([unicode])
-        tmp = os.listdir(u'.')
+        uniset = set([str])
+        tmp = os.listdir('.')
         if set(type(x) for x in tmp) == uniset:
-            u1 = glob.glob(u'*')
-            u2 = glob.glob(u'./*')
+            u1 = glob.glob('*')
+            u2 = glob.glob('./*')
             self.assertEquals(set(type(r) for r in u1), uniset)
             self.assertEquals(set(type(r) for r in u2), uniset)
 

Modified: python/branches/py3k-struni/Lib/test/test_htmlparser.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_htmlparser.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_htmlparser.py	Wed May  2 21:09:54 2007
@@ -311,7 +311,7 @@
 
     def test_entityrefs_in_attributes(self):
         self._run_check("<html foo='&euro;&amp;&#97;&#x61;&unsupported;'>", [
-                ("starttag", "html", [("foo", u"\u20AC&aa&unsupported;")])
+                ("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])
                 ])
 
 

Modified: python/branches/py3k-struni/Lib/test/test_index.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_index.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_index.py	Wed May  2 21:09:54 2007
@@ -161,7 +161,7 @@
     seq = "this is a test"
 
 class UnicodeTestCase(SeqTestCase):
-    seq = u"this is a test"
+    seq = "this is a test"
 
 
 class XRangeTestCase(unittest.TestCase):

Modified: python/branches/py3k-struni/Lib/test/test_io.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_io.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_io.py	Wed May  2 21:09:54 2007
@@ -542,13 +542,13 @@
     def multi_line_test(self, f, enc):
         f.seek(0)
         f.truncate()
-        sample = u"s\xff\u0fff\uffff"
+        sample = "s\xff\u0fff\uffff"
         wlines = []
         for size in (0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 62, 63, 64, 65, 1000):
             chars = []
             for i in xrange(size):
                 chars.append(sample[i % len(sample)])
-            line = u"".join(chars) + "\n"
+            line = "".join(chars) + "\n"
             wlines.append((f.tell(), line))
             f.write(line)
         f.seek(0)
@@ -564,19 +564,19 @@
     def testTelling(self):
         f = io.open(test_support.TESTFN, "w+", encoding="utf8")
         p0 = f.tell()
-        f.write(u"\xff\n")
+        f.write("\xff\n")
         p1 = f.tell()
-        f.write(u"\xff\n")
+        f.write("\xff\n")
         p2 = f.tell()
         f.seek(0)
         self.assertEquals(f.tell(), p0)
-        self.assertEquals(f.readline(), u"\xff\n")
+        self.assertEquals(f.readline(), "\xff\n")
         self.assertEquals(f.tell(), p1)
-        self.assertEquals(f.readline(), u"\xff\n")
+        self.assertEquals(f.readline(), "\xff\n")
         self.assertEquals(f.tell(), p2)
         f.seek(0)
         for line in f:
-            self.assertEquals(line, u"\xff\n")
+            self.assertEquals(line, "\xff\n")
             self.assertRaises(IOError, f.tell)
         self.assertEquals(f.tell(), p2)
         f.close()
@@ -584,10 +584,10 @@
     def testSeeking(self):
         chunk_size = io.TextIOWrapper._CHUNK_SIZE
         prefix_size = chunk_size - 2
-        u_prefix = u"a" * prefix_size
+        u_prefix = "a" * prefix_size
         prefix = bytes(u_prefix.encode("utf-8"))
         self.assertEquals(len(u_prefix), len(prefix))
-        u_suffix = u"\u8888\n"
+        u_suffix = "\u8888\n"
         suffix = bytes(u_suffix.encode("utf-8"))
         line = prefix + suffix
         f = io.open(test_support.TESTFN, "wb")
@@ -614,7 +614,7 @@
     def timingTest(self):
         timer = time.time
         enc = "utf8"
-        line = u"\0\x0f\xff\u0fff\uffff\U000fffff\U0010ffff"*3 + "\n"
+        line = "\0\x0f\xff\u0fff\uffff\U000fffff\U0010ffff"*3 + "\n"
         nlines = 10000
         nchars = len(line)
         nbytes = len(line.encode(enc))

Modified: python/branches/py3k-struni/Lib/test/test_isinstance.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_isinstance.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_isinstance.py	Wed May  2 21:09:54 2007
@@ -243,7 +243,7 @@
 
         self.assertEqual(True, issubclass(int, (int, (float, int))))
         if test_support.have_unicode:
-            self.assertEqual(True, issubclass(str, (unicode, (Child, NewChild, basestring))))
+            self.assertEqual(True, issubclass(str, (str, (Child, NewChild, basestring))))
 
     def test_subclass_recursion_limit(self):
         # make sure that issubclass raises RuntimeError before the C stack is

Modified: python/branches/py3k-struni/Lib/test/test_iter.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_iter.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_iter.py	Wed May  2 21:09:54 2007
@@ -216,9 +216,9 @@
     # Test a Unicode string
     if have_unicode:
         def test_iter_unicode(self):
-            self.check_for_loop(iter(unicode("abcde")),
-                                [unicode("a"), unicode("b"), unicode("c"),
-                                 unicode("d"), unicode("e")])
+            self.check_for_loop(iter(str("abcde")),
+                                [str("a"), str("b"), str("c"),
+                                 str("d"), str("e")])
 
     # Test a directory
     def test_iter_dict(self):
@@ -518,7 +518,7 @@
                 i = self.i
                 self.i = i+1
                 if i == 2:
-                    return unicode("fooled you!")
+                    return str("fooled you!")
                 return next(self.it)
 
         f = open(TESTFN, "w")
@@ -535,7 +535,7 @@
         # and pass that on to unicode.join().
         try:
             got = " - ".join(OhPhooey(f))
-            self.assertEqual(got, unicode("a\n - b\n - fooled you! - c\n"))
+            self.assertEqual(got, str("a\n - b\n - fooled you! - c\n"))
         finally:
             f.close()
             try:

Modified: python/branches/py3k-struni/Lib/test/test_macfs.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_macfs.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_macfs.py	Wed May  2 21:09:54 2007
@@ -32,7 +32,7 @@
 
     def test_fsref_unicode(self):
         if sys.getfilesystemencoding():
-            testfn_unicode = unicode(test_support.TESTFN)
+            testfn_unicode = str(test_support.TESTFN)
             fsr = macfs.FSRef(testfn_unicode)
             self.assertEqual(os.path.realpath(test_support.TESTFN), fsr.as_pathname())
 

Modified: python/branches/py3k-struni/Lib/test/test_marshal.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_marshal.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_marshal.py	Wed May  2 21:09:54 2007
@@ -106,7 +106,7 @@
 
 class StringTestCase(unittest.TestCase):
     def test_unicode(self):
-        for s in [u"", u"Andrè Previn", u"abc", u" "*10000]:
+        for s in ["", "Andrè Previn", "abc", " "*10000]:
             new = marshal.loads(marshal.dumps(s))
             self.assertEqual(s, new)
             self.assertEqual(type(s), type(new))
@@ -156,7 +156,7 @@
          'alist': ['.zyx.41'],
          'atuple': ('.zyx.41',)*10,
          'aboolean': False,
-         'aunicode': u"Andrè Previn"
+         'aunicode': "Andrè Previn"
          }
     def test_dict(self):
         new = marshal.loads(marshal.dumps(self.d))

Modified: python/branches/py3k-struni/Lib/test/test_minidom.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_minidom.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_minidom.py	Wed May  2 21:09:54 2007
@@ -166,7 +166,7 @@
 
     def testAppendChild(self):
         dom = parse(tstfile)
-        dom.documentElement.appendChild(dom.createComment(u"Hello"))
+        dom.documentElement.appendChild(dom.createComment("Hello"))
         self.confirm(dom.documentElement.childNodes[-1].nodeName == "#comment")
         self.confirm(dom.documentElement.childNodes[-1].data == "Hello")
         dom.unlink()
@@ -427,7 +427,7 @@
 
     def testElementReprAndStrUnicode(self):
         dom = Document()
-        el = dom.appendChild(dom.createElement(u"abc"))
+        el = dom.appendChild(dom.createElement("abc"))
         string1 = repr(el)
         string2 = str(el)
         self.confirm(string1 == string2)
@@ -436,7 +436,7 @@
     def testElementReprAndStrUnicodeNS(self):
         dom = Document()
         el = dom.appendChild(
-            dom.createElementNS(u"http://www.slashdot.org", u"slash:abc"))
+            dom.createElementNS("http://www.slashdot.org", "slash:abc"))
         string1 = repr(el)
         string2 = str(el)
         self.confirm(string1 == string2)
@@ -445,7 +445,7 @@
 
     def testAttributeRepr(self):
         dom = Document()
-        el = dom.appendChild(dom.createElement(u"abc"))
+        el = dom.appendChild(dom.createElement("abc"))
         node = el.setAttribute("abc", "def")
         self.confirm(str(node) == repr(node))
         dom.unlink()
@@ -869,7 +869,7 @@
 
     def testEncodings(self):
         doc = parseString('<foo>&#x20ac;</foo>')
-        self.confirm(doc.toxml() == u'<?xml version="1.0" ?><foo>\u20ac</foo>'
+        self.confirm(doc.toxml() == '<?xml version="1.0" ?><foo>\u20ac</foo>'
                 and doc.toxml('utf-8') ==
                 '<?xml version="1.0" encoding="utf-8"?><foo>\xe2\x82\xac</foo>'
                 and doc.toxml('iso-8859-15') ==

Modified: python/branches/py3k-struni/Lib/test/test_module.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_module.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_module.py	Wed May  2 21:09:54 2007
@@ -35,15 +35,15 @@
 
     def test_unicode_docstring(self):
         # Unicode docstring
-        foo = ModuleType("foo", u"foodoc\u1234")
+        foo = ModuleType("foo", "foodoc\u1234")
         self.assertEqual(foo.__name__, "foo")
-        self.assertEqual(foo.__doc__, u"foodoc\u1234")
+        self.assertEqual(foo.__doc__, "foodoc\u1234")
         self.assertEqual(foo.__dict__,
-                         {"__name__": "foo", "__doc__": u"foodoc\u1234"})
+                         {"__name__": "foo", "__doc__": "foodoc\u1234"})
 
     def test_reinit(self):
         # Reinitialization should not replace the __dict__
-        foo = ModuleType("foo", u"foodoc\u1234")
+        foo = ModuleType("foo", "foodoc\u1234")
         foo.bar = 42
         d = foo.__dict__
         foo.__init__("foo", "foodoc")

Modified: python/branches/py3k-struni/Lib/test/test_multibytecodec.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_multibytecodec.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_multibytecodec.py	Wed May  2 21:09:54 2007
@@ -30,9 +30,9 @@
 
     def test_nullcoding(self):
         for enc in ALL_CJKENCODINGS:
-            self.assertEqual(''.decode(enc), u'')
-            self.assertEqual(unicode('', enc), u'')
-            self.assertEqual(u''.encode(enc), '')
+            self.assertEqual(''.decode(enc), '')
+            self.assertEqual(str('', enc), '')
+            self.assertEqual(''.encode(enc), '')
 
     def test_str_decode(self):
         for enc in ALL_CJKENCODINGS:
@@ -40,7 +40,7 @@
 
     def test_errorcallback_longindex(self):
         dec = codecs.getdecoder('euc-kr')
-        myreplace  = lambda exc: (u'', sys.maxint+1)
+        myreplace  = lambda exc: ('', sys.maxint+1)
         codecs.register_error('test.cjktest', myreplace)
         self.assertRaises(IndexError, dec,
                           'apple\x92ham\x93spam', 'test.cjktest')
@@ -58,14 +58,14 @@
     def test_stateless(self):
         # cp949 encoder isn't stateful at all.
         encoder = codecs.getincrementalencoder('cp949')()
-        self.assertEqual(encoder.encode(u'\ud30c\uc774\uc36c \ub9c8\uc744'),
+        self.assertEqual(encoder.encode('\ud30c\uc774\uc36c \ub9c8\uc744'),
                          '\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
         self.assertEqual(encoder.reset(), None)
-        self.assertEqual(encoder.encode(u'\u2606\u223c\u2606', True),
+        self.assertEqual(encoder.encode('\u2606\u223c\u2606', True),
                          '\xa1\xd9\xa1\xad\xa1\xd9')
         self.assertEqual(encoder.reset(), None)
-        self.assertEqual(encoder.encode(u'', True), '')
-        self.assertEqual(encoder.encode(u'', False), '')
+        self.assertEqual(encoder.encode('', True), '')
+        self.assertEqual(encoder.encode('', False), '')
         self.assertEqual(encoder.reset(), None)
 
     def test_stateful(self):
@@ -75,29 +75,29 @@
         #   U+0300 => ABDC
 
         encoder = codecs.getincrementalencoder('jisx0213')()
-        self.assertEqual(encoder.encode(u'\u00e6\u0300'), '\xab\xc4')
-        self.assertEqual(encoder.encode(u'\u00e6'), '')
-        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xc4')
-        self.assertEqual(encoder.encode(u'\u00e6', True), '\xa9\xdc')
+        self.assertEqual(encoder.encode('\u00e6\u0300'), '\xab\xc4')
+        self.assertEqual(encoder.encode('\u00e6'), '')
+        self.assertEqual(encoder.encode('\u0300'), '\xab\xc4')
+        self.assertEqual(encoder.encode('\u00e6', True), '\xa9\xdc')
 
         self.assertEqual(encoder.reset(), None)
-        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
+        self.assertEqual(encoder.encode('\u0300'), '\xab\xdc')
 
-        self.assertEqual(encoder.encode(u'\u00e6'), '')
+        self.assertEqual(encoder.encode('\u00e6'), '')
         self.assertEqual(encoder.encode('', True), '\xa9\xdc')
         self.assertEqual(encoder.encode('', True), '')
 
     def test_stateful_keep_buffer(self):
         encoder = codecs.getincrementalencoder('jisx0213')()
-        self.assertEqual(encoder.encode(u'\u00e6'), '')
-        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
-        self.assertEqual(encoder.encode(u'\u0300\u00e6'), '\xab\xc4')
-        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
+        self.assertEqual(encoder.encode('\u00e6'), '')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
+        self.assertEqual(encoder.encode('\u0300\u00e6'), '\xab\xc4')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
         self.assertEqual(encoder.reset(), None)
-        self.assertEqual(encoder.encode(u'\u0300'), '\xab\xdc')
-        self.assertEqual(encoder.encode(u'\u00e6'), '')
-        self.assertRaises(UnicodeEncodeError, encoder.encode, u'\u0123')
-        self.assertEqual(encoder.encode(u'', True), '\xa9\xdc')
+        self.assertEqual(encoder.encode('\u0300'), '\xab\xdc')
+        self.assertEqual(encoder.encode('\u00e6'), '')
+        self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
+        self.assertEqual(encoder.encode('', True), '\xa9\xdc')
 
 
 class Test_IncrementalDecoder(unittest.TestCase):
@@ -106,52 +106,52 @@
         # cp949 decoder is simple with only 1 or 2 bytes sequences.
         decoder = codecs.getincrementaldecoder('cp949')()
         self.assertEqual(decoder.decode('\xc6\xc4\xc0\xcc\xbd'),
-                         u'\ud30c\uc774')
+                         '\ud30c\uc774')
         self.assertEqual(decoder.decode('\xe3 \xb8\xb6\xc0\xbb'),
-                         u'\uc36c \ub9c8\uc744')
-        self.assertEqual(decoder.decode(''), u'')
+                         '\uc36c \ub9c8\uc744')
+        self.assertEqual(decoder.decode(''), '')
 
     def test_dbcs_keep_buffer(self):
         decoder = codecs.getincrementaldecoder('cp949')()
-        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c')
         self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
-        self.assertEqual(decoder.decode('\xcc'), u'\uc774')
+        self.assertEqual(decoder.decode('\xcc'), '\uc774')
 
-        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), u'\ud30c')
+        self.assertEqual(decoder.decode('\xc6\xc4\xc0'), '\ud30c')
         self.assertRaises(UnicodeDecodeError, decoder.decode, '\xcc\xbd', True)
-        self.assertEqual(decoder.decode('\xcc'), u'\uc774')
+        self.assertEqual(decoder.decode('\xcc'), '\uc774')
 
     def test_iso2022(self):
         decoder = codecs.getincrementaldecoder('iso2022-jp')()
         ESC = '\x1b'
-        self.assertEqual(decoder.decode(ESC + '('), u'')
-        self.assertEqual(decoder.decode('B', True), u'')
-        self.assertEqual(decoder.decode(ESC + '$'), u'')
-        self.assertEqual(decoder.decode('B@$'), u'\u4e16')
-        self.assertEqual(decoder.decode('@$@'), u'\u4e16')
-        self.assertEqual(decoder.decode('$', True), u'\u4e16')
+        self.assertEqual(decoder.decode(ESC + '('), '')
+        self.assertEqual(decoder.decode('B', True), '')
+        self.assertEqual(decoder.decode(ESC + '$'), '')
+        self.assertEqual(decoder.decode('B@$'), '\u4e16')
+        self.assertEqual(decoder.decode('@$@'), '\u4e16')
+        self.assertEqual(decoder.decode('$', True), '\u4e16')
         self.assertEqual(decoder.reset(), None)
-        self.assertEqual(decoder.decode('@$'), u'@$')
-        self.assertEqual(decoder.decode(ESC + '$'), u'')
+        self.assertEqual(decoder.decode('@$'), '@$')
+        self.assertEqual(decoder.decode(ESC + '$'), '')
         self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
-        self.assertEqual(decoder.decode('B@$'), u'\u4e16')
+        self.assertEqual(decoder.decode('B@$'), '\u4e16')
 
 
 class Test_StreamWriter(unittest.TestCase):
-    if len(u'\U00012345') == 2: # UCS2
+    if len('\U00012345') == 2: # UCS2
         def test_gb18030(self):
             s= StringIO.StringIO()
             c = codecs.getwriter('gb18030')(s)
-            c.write(u'123')
+            c.write('123')
             self.assertEqual(s.getvalue(), '123')
-            c.write(u'\U00012345')
+            c.write('\U00012345')
             self.assertEqual(s.getvalue(), '123\x907\x959')
-            c.write(u'\U00012345'[0])
+            c.write('\U00012345'[0])
             self.assertEqual(s.getvalue(), '123\x907\x959')
-            c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
+            c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
             self.assertEqual(s.getvalue(),
                     '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
-            c.write(u'\U00012345'[0])
+            c.write('\U00012345'[0])
             self.assertEqual(s.getvalue(),
                     '123\x907\x959\x907\x959\x907\x959\x827\xcf5\x810\x851')
             self.assertRaises(UnicodeError, c.reset)
@@ -161,20 +161,20 @@
         def test_utf_8(self):
             s= StringIO.StringIO()
             c = codecs.getwriter('utf-8')(s)
-            c.write(u'123')
+            c.write('123')
             self.assertEqual(s.getvalue(), '123')
-            c.write(u'\U00012345')
+            c.write('\U00012345')
             self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
 
             # Python utf-8 codec can't buffer surrogate pairs yet.
             if 0:
-                c.write(u'\U00012345'[0])
+                c.write('\U00012345'[0])
                 self.assertEqual(s.getvalue(), '123\xf0\x92\x8d\x85')
-                c.write(u'\U00012345'[1] + u'\U00012345' + u'\uac00\u00ac')
+                c.write('\U00012345'[1] + '\U00012345' + '\uac00\u00ac')
                 self.assertEqual(s.getvalue(),
                     '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                     '\xea\xb0\x80\xc2\xac')
-                c.write(u'\U00012345'[0])
+                c.write('\U00012345'[0])
                 self.assertEqual(s.getvalue(),
                     '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                     '\xea\xb0\x80\xc2\xac')
@@ -182,7 +182,7 @@
                 self.assertEqual(s.getvalue(),
                     '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                     '\xea\xb0\x80\xc2\xac\xed\xa0\x88')
-                c.write(u'\U00012345'[1])
+                c.write('\U00012345'[1])
                 self.assertEqual(s.getvalue(),
                     '123\xf0\x92\x8d\x85\xf0\x92\x8d\x85\xf0\x92\x8d\x85'
                     '\xea\xb0\x80\xc2\xac\xed\xa0\x88\xed\xbd\x85')
@@ -199,13 +199,13 @@
 class Test_ISO2022(unittest.TestCase):
     def test_g2(self):
         iso2022jp2 = '\x1b(B:hu4:unit\x1b.A\x1bNi de famille'
-        uni = u':hu4:unit\xe9 de famille'
+        uni = ':hu4:unit\xe9 de famille'
         self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
 
     def test_iso2022_jp_g0(self):
-        self.failIf('\x0e' in u'\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
+        self.failIf('\x0e' in '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
         for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
-            e = u'\u3406'.encode(encoding)
+            e = '\u3406'.encode(encoding)
             self.failIf(filter(lambda x: x >= '\x80', e))
 
     def test_bug1572832(self):

Modified: python/branches/py3k-struni/Lib/test/test_multibytecodec_support.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_multibytecodec_support.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_multibytecodec_support.py	Wed May  2 21:09:54 2007
@@ -18,7 +18,7 @@
     roundtriptest   = 1    # set if roundtrip is possible with unicode
     has_iso10646    = 0    # set if this encoding contains whole iso10646 map
     xmlcharnametest = None # string to test xmlcharrefreplace
-    unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped.
+    unmappedunicode = '\udeee' # a unicode codepoint that is not mapped.
 
     def setUp(self):
         if self.codec is None:
@@ -54,7 +54,7 @@
         if self.has_iso10646:
             return
 
-        s = u"\u0b13\u0b23\u0b60 nd eggs"
+        s = "\u0b13\u0b23\u0b60 nd eggs"
         self.assertEqual(
             self.encode(s, "xmlcharrefreplace")[0],
             "&#2835;&#2851;&#2912; nd eggs"
@@ -72,17 +72,17 @@
             l = []
             for c in exc.object[exc.start:exc.end]:
                 if ord(c) in codepoint2name:
-                    l.append(u"&%s;" % codepoint2name[ord(c)])
+                    l.append("&%s;" % codepoint2name[ord(c)])
                 else:
-                    l.append(u"&#%d;" % ord(c))
-            return (u"".join(l), exc.end)
+                    l.append("&#%d;" % ord(c))
+            return ("".join(l), exc.end)
 
         codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)
 
         if self.xmlcharnametest:
             sin, sout = self.xmlcharnametest
         else:
-            sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
+            sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
             sout = "&laquo;&real;&raquo; = &lang;&#4660;&rang;"
         self.assertEqual(self.encode(sin,
                                     "test.xmlcharnamereplace")[0], sout)
@@ -98,20 +98,20 @@
 
     def test_callback_long_index(self):
         def myreplace(exc):
-            return (u'x', int(exc.end))
+            return ('x', int(exc.end))
         codecs.register_error("test.cjktest", myreplace)
-        self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
+        self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
                                      'test.cjktest'), ('abcdxefgh', 9))
 
         def myreplace(exc):
-            return (u'x', sys.maxint + 1)
+            return ('x', sys.maxint + 1)
         codecs.register_error("test.cjktest", myreplace)
         self.assertRaises(IndexError, self.encode, self.unmappedunicode,
                           'test.cjktest')
 
     def test_callback_None_index(self):
         def myreplace(exc):
-            return (u'x', None)
+            return ('x', None)
         codecs.register_error("test.cjktest", myreplace)
         self.assertRaises(TypeError, self.encode, self.unmappedunicode,
                           'test.cjktest')
@@ -120,25 +120,25 @@
         def myreplace(exc):
             if myreplace.limit > 0:
                 myreplace.limit -= 1
-                return (u'REPLACED', 0)
+                return ('REPLACED', 0)
             else:
-                return (u'TERMINAL', exc.end)
+                return ('TERMINAL', exc.end)
         myreplace.limit = 3
         codecs.register_error("test.cjktest", myreplace)
-        self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
+        self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
                                      'test.cjktest'),
                 ('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
 
     def test_callback_forward_index(self):
         def myreplace(exc):
-            return (u'REPLACED', exc.end + 2)
+            return ('REPLACED', exc.end + 2)
         codecs.register_error("test.cjktest", myreplace)
-        self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
+        self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
                                      'test.cjktest'), ('abcdREPLACEDgh', 9))
 
     def test_callback_index_outofbound(self):
         def myreplace(exc):
-            return (u'TERM', 100)
+            return ('TERM', 100)
         codecs.register_error("test.cjktest", myreplace)
         self.assertRaises(IndexError, self.encode, self.unmappedunicode,
                           'test.cjktest')
@@ -191,7 +191,7 @@
 
         e.reset()
         def tempreplace(exc):
-            return (u'called', exc.end)
+            return ('called', exc.end)
         codecs.register_error('test.incremental_error_callback', tempreplace)
         e.errors = 'test.incremental_error_callback'
         self.assertEqual(e.encode(inv, True), 'called')
@@ -243,7 +243,7 @@
 
                 self.assertEqual(ostream.getvalue(), self.tstring[0])
 
-if len(u'\U00012345') == 2: # ucs2 build
+if len('\U00012345') == 2: # ucs2 build
     _unichr = unichr
     def unichr(v):
         if v >= 0x10000:
@@ -272,7 +272,7 @@
         return test_support.open_urlresource(self.mapfileurl)
 
     def test_mapping_file(self):
-        unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
+        unichrs = lambda s: ''.join(map(unichr, map(eval, s.split('+'))))
         urt_wa = {}
 
         for line in self.open_mapping_file():
@@ -311,7 +311,7 @@
         if (csetch, unich) not in self.pass_enctest:
             self.assertEqual(unich.encode(self.encoding), csetch)
         if (csetch, unich) not in self.pass_dectest:
-            self.assertEqual(unicode(csetch, self.encoding), unich)
+            self.assertEqual(str(csetch, self.encoding), unich)
 
 def load_teststring(encoding):
     from test import cjkencodings_test

Modified: python/branches/py3k-struni/Lib/test/test_normalization.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_normalization.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_normalization.py	Wed May  2 21:09:54 2007
@@ -28,7 +28,7 @@
     for x in data:
         if x > sys.maxunicode:
             raise RangeError
-    return u"".join([unichr(x) for x in data])
+    return "".join([unichr(x) for x in data])
 
 class NormalizationTest(unittest.TestCase):
     def test_main(self):
@@ -84,7 +84,7 @@
 
     def test_bug_834676(self):
         # Check for bug 834676
-        normalize('NFC', u'\ud55c\uae00')
+        normalize('NFC', '\ud55c\uae00')
 
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_optparse.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_optparse.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_optparse.py	Wed May  2 21:09:54 2007
@@ -1520,8 +1520,8 @@
 
     def test_help_unicode(self):
         self.parser = InterceptingOptionParser(usage=SUPPRESS_USAGE)
-        self.parser.add_option("-a", action="store_true", help=u"ol\u00E9!")
-        expect = u"""\
+        self.parser.add_option("-a", action="store_true", help="ol\u00E9!")
+        expect = """\
 Options:
   -h, --help  show this help message and exit
   -a          ol\u00E9!
@@ -1530,8 +1530,8 @@
 
     def test_help_unicode_description(self):
         self.parser = InterceptingOptionParser(usage=SUPPRESS_USAGE,
-                                               description=u"ol\u00E9!")
-        expect = u"""\
+                                               description="ol\u00E9!")
+        expect = """\
 ol\u00E9!
 
 Options:

Modified: python/branches/py3k-struni/Lib/test/test_pep263.py
==============================================================================
Binary files. No diff available.

Modified: python/branches/py3k-struni/Lib/test/test_pep277.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_pep277.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_pep277.py	Wed May  2 21:09:54 2007
@@ -7,14 +7,14 @@
 
 filenames = [
     'abc',
-    u'ascii',
-    u'Gr\xfc\xdf-Gott',
-    u'\u0393\u03b5\u03b9\u03ac-\u03c3\u03b1\u03c2',
-    u'\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435',
-    u'\u306b\u307d\u3093',
-    u'\u05d4\u05e9\u05e7\u05e6\u05e5\u05e1',
-    u'\u66e8\u66e9\u66eb',
-    u'\u66e8\u05e9\u3093\u0434\u0393\xdf',
+    'ascii',
+    'Gr\xfc\xdf-Gott',
+    '\u0393\u03b5\u03b9\u03ac-\u03c3\u03b1\u03c2',
+    '\u0417\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435',
+    '\u306b\u307d\u3093',
+    '\u05d4\u05e9\u05e7\u05e6\u05e5\u05e1',
+    '\u66e8\u66e9\u66eb',
+    '\u66e8\u05e9\u3093\u0434\u0393\xdf',
     ]
 
 # Destroy directory dirname and all files under it, to one level.
@@ -23,7 +23,7 @@
     # an error if we can't remove it.
     if os.path.exists(dirname):
         # must pass unicode to os.listdir() so we get back unicode results.
-        for fname in os.listdir(unicode(dirname)):
+        for fname in os.listdir(str(dirname)):
             os.unlink(os.path.join(dirname, fname))
         os.rmdir(dirname)
 
@@ -80,7 +80,7 @@
         f1 = os.listdir(test_support.TESTFN)
         # Printing f1 is not appropriate, as specific filenames
         # returned depend on the local encoding
-        f2 = os.listdir(unicode(test_support.TESTFN,
+        f2 = os.listdir(str(test_support.TESTFN,
                                 sys.getfilesystemencoding()))
         f2.sort()
         print(f2)
@@ -91,8 +91,8 @@
             os.rename("tmp",name)
 
     def test_directory(self):
-        dirname = os.path.join(test_support.TESTFN,u'Gr\xfc\xdf-\u66e8\u66e9\u66eb')
-        filename = u'\xdf-\u66e8\u66e9\u66eb'
+        dirname = os.path.join(test_support.TESTFN,'Gr\xfc\xdf-\u66e8\u66e9\u66eb')
+        filename = '\xdf-\u66e8\u66e9\u66eb'
         oldwd = os.getcwd()
         os.mkdir(dirname)
         os.chdir(dirname)

Modified: python/branches/py3k-struni/Lib/test/test_pep292.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_pep292.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_pep292.py	Wed May  2 21:09:54 2007
@@ -134,8 +134,8 @@
 
     def test_unicode_values(self):
         s = Template('$who likes $what')
-        d = dict(who=u't\xffm', what=u'f\xfe\fed')
-        self.assertEqual(s.substitute(d), u't\xffm likes f\xfe\x0ced')
+        d = dict(who='t\xffm', what='f\xfe\fed')
+        self.assertEqual(s.substitute(d), 't\xffm likes f\xfe\x0ced')
 
     def test_keyword_arguments(self):
         eq = self.assertEqual

Modified: python/branches/py3k-struni/Lib/test/test_pep352.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_pep352.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_pep352.py	Wed May  2 21:09:54 2007
@@ -90,7 +90,7 @@
         arg = "spam"
         exc = Exception(arg)
         results = ([len(exc.args), 1], [exc.args[0], arg], [exc.message, arg],
-                [str(exc), str(arg)], [unicode(exc), unicode(arg)],
+                [str(exc), str(arg)], [str(exc), str(arg)],
             [repr(exc), exc.__class__.__name__ + repr(exc.args)])
         self.interface_test_driver(results)
 
@@ -101,7 +101,7 @@
         exc = Exception(*args)
         results = ([len(exc.args), arg_count], [exc.args, args],
                 [exc.message, ''], [str(exc), str(args)],
-                [unicode(exc), unicode(args)],
+                [str(exc), str(args)],
                 [repr(exc), exc.__class__.__name__ + repr(exc.args)])
         self.interface_test_driver(results)
 
@@ -109,7 +109,7 @@
         # Make sure that with no args that interface is correct
         exc = Exception()
         results = ([len(exc.args), 0], [exc.args, tuple()], [exc.message, ''],
-                [str(exc), ''], [unicode(exc), u''],
+                [str(exc), ''], [str(exc), ''],
                 [repr(exc), exc.__class__.__name__ + '()'])
         self.interface_test_driver(results)
 

Modified: python/branches/py3k-struni/Lib/test/test_plistlib.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_plistlib.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_plistlib.py	Wed May  2 21:09:54 2007
@@ -104,7 +104,7 @@
             anInt = 728,
             aDict=dict(
                 anotherString="<hello & 'hi' there!>",
-                aUnicodeValue=u'M\xe4ssig, Ma\xdf',
+                aUnicodeValue='M\xe4ssig, Ma\xdf',
                 aTrueValue=True,
                 aFalseValue=False,
                 deeperDict=dict(a=17, b=32.5, c=[1, 2, "text"]),
@@ -114,7 +114,7 @@
             nestedData = [plistlib.Data("<lots of binary gunk>\0\1\2\3" * 10)],
             aDate = datetime.datetime(2004, 10, 26, 10, 33, 33),
         )
-        pl[u'\xc5benraa'] = "That was a unicode key."
+        pl['\xc5benraa'] = "That was a unicode key."
         return pl
 
     def test_create(self):

Modified: python/branches/py3k-struni/Lib/test/test_pprint.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_pprint.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_pprint.py	Wed May  2 21:09:54 2007
@@ -3,7 +3,7 @@
 import unittest
 
 try:
-    uni = unicode
+    uni = str
 except NameError:
     def uni(x):
         return x

Modified: python/branches/py3k-struni/Lib/test/test_pyexpat.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_pyexpat.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_pyexpat.py	Wed May  2 21:09:54 2007
@@ -281,7 +281,7 @@
     def check(self, expected, label):
         self.assertEquals(self.stuff, expected,
                 "%s\nstuff    = %r\nexpected = %r"
-                % (label, self.stuff, map(unicode, expected)))
+                % (label, self.stuff, map(str, expected)))
 
     def CharacterDataHandler(self, text):
         self.stuff.append(text)

Modified: python/branches/py3k-struni/Lib/test/test_re.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_re.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_re.py	Wed May  2 21:09:54 2007
@@ -324,12 +324,12 @@
         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
         self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
         self.assertEqual(re.search(r"\b(b.)\b",
-                                   u"abcd abc bcd bx").group(1), "bx")
+                                   "abcd abc bcd bx").group(1), "bx")
         self.assertEqual(re.search(r"\B(b.)\B",
-                                   u"abc bcd bc abxd").group(1), "bx")
-        self.assertEqual(re.search(r"^abc$", u"\nabc\n", re.M).group(0), "abc")
-        self.assertEqual(re.search(r"^\Aabc\Z$", u"abc", re.M).group(0), "abc")
-        self.assertEqual(re.search(r"^\Aabc\Z$", u"\nabc\n", re.M), None)
+                                   "abc bcd bc abxd").group(1), "bx")
+        self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
+        self.assertEqual(re.search(r"^\Aabc\Z$", "\nabc\n", re.M), None)
         self.assertEqual(re.search(r"\d\D\w\W\s\S",
                                    "1aa! a").group(0), "1aa! a")
         self.assertEqual(re.search(r"\d\D\w\W\s\S",
@@ -339,13 +339,13 @@
 
     def test_ignore_case(self):
         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
-        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
 
     def test_bigcharset(self):
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222").group(1), u"\u2222")
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
+        self.assertEqual(re.match("([\u2222\u2223])",
+                                  "\u2222").group(1), "\u2222")
+        self.assertEqual(re.match("([\u2222\u2223])",
+                                  "\u2222", re.UNICODE).group(1), "\u2222")
 
     def test_anyall(self):
         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
@@ -387,7 +387,7 @@
         self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
 
         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
-        self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
+        self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
 
     def test_not_literal(self):
         self.assertEqual(re.search("\s([^a])", " b").group(1), "b")
@@ -493,7 +493,7 @@
         self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
 
     def test_bug_612074(self):
-        pat=u"["+re.escape(u"\u2039")+u"]"
+        pat="["+re.escape("\u2039")+"]"
         self.assertEqual(re.compile(pat) and 1, 1)
 
     def test_stack_overflow(self):
@@ -561,10 +561,10 @@
     def test_bug_764548(self):
         # bug 764548, re.compile() barfs on str/unicode subclasses
         try:
-            unicode
+            str
         except NameError:
             return  # no problem if we have no unicode
-        class my_unicode(unicode): pass
+        class my_unicode(str): pass
         pat = re.compile(my_unicode("abc"))
         self.assertEqual(pat.match("xyz"), None)
 
@@ -575,7 +575,7 @@
 
     def test_bug_926075(self):
         try:
-            unicode
+            str
         except NameError:
             return # no problem if we have no unicode
         self.assert_(re.compile('bug_926075') is not
@@ -583,7 +583,7 @@
 
     def test_bug_931848(self):
         try:
-            unicode
+            str
         except NameError:
             pass
         pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
@@ -689,7 +689,7 @@
                 # Try the match on a unicode string, and check that it
                 # still succeeds.
                 try:
-                    result = obj.search(unicode(s, "latin-1"))
+                    result = obj.search(str(s, "latin-1"))
                     if result is None:
                         print('=== Fails on unicode match', t)
                 except NameError:
@@ -699,7 +699,7 @@
 
                 # Try the match on a unicode pattern, and check that it
                 # still succeeds.
-                obj=re.compile(unicode(pattern, "latin-1"))
+                obj=re.compile(str(pattern, "latin-1"))
                 result = obj.search(s)
                 if result is None:
                     print('=== Fails on unicode pattern match', t)

Modified: python/branches/py3k-struni/Lib/test/test_set.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_set.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_set.py	Wed May  2 21:09:54 2007
@@ -72,7 +72,7 @@
         self.assertEqual(type(u), self.thetype)
         self.assertRaises(PassThru, self.s.union, check_pass_thru())
         self.assertRaises(TypeError, self.s.union, [[]])
-        for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+        for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
             self.assertEqual(self.thetype('abcba').union(C('cdc')), set('abcd'))
             self.assertEqual(self.thetype('abcba').union(C('efgfe')), set('abcefg'))
             self.assertEqual(self.thetype('abcba').union(C('ccb')), set('abc'))
@@ -96,7 +96,7 @@
         self.assertEqual(self.s, self.thetype(self.word))
         self.assertEqual(type(i), self.thetype)
         self.assertRaises(PassThru, self.s.intersection, check_pass_thru())
-        for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+        for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
             self.assertEqual(self.thetype('abcba').intersection(C('cdc')), set('cc'))
             self.assertEqual(self.thetype('abcba').intersection(C('efgfe')), set(''))
             self.assertEqual(self.thetype('abcba').intersection(C('ccb')), set('bc'))
@@ -121,7 +121,7 @@
         self.assertEqual(type(i), self.thetype)
         self.assertRaises(PassThru, self.s.difference, check_pass_thru())
         self.assertRaises(TypeError, self.s.difference, [[]])
-        for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+        for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
             self.assertEqual(self.thetype('abcba').difference(C('cdc')), set('ab'))
             self.assertEqual(self.thetype('abcba').difference(C('efgfe')), set('abc'))
             self.assertEqual(self.thetype('abcba').difference(C('ccb')), set('a'))
@@ -146,7 +146,7 @@
         self.assertEqual(type(i), self.thetype)
         self.assertRaises(PassThru, self.s.symmetric_difference, check_pass_thru())
         self.assertRaises(TypeError, self.s.symmetric_difference, [[]])
-        for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+        for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
             self.assertEqual(self.thetype('abcba').symmetric_difference(C('cdc')), set('abd'))
             self.assertEqual(self.thetype('abcba').symmetric_difference(C('efgfe')), set('abcefg'))
             self.assertEqual(self.thetype('abcba').symmetric_difference(C('ccb')), set('a'))
@@ -390,7 +390,7 @@
         self.assertRaises(PassThru, self.s.update, check_pass_thru())
         self.assertRaises(TypeError, self.s.update, [[]])
         for p, q in (('cdc', 'abcd'), ('efgfe', 'abcefg'), ('ccb', 'abc'), ('ef', 'abcef')):
-            for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+            for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
                 s = self.thetype('abcba')
                 self.assertEqual(s.update(C(p)), None)
                 self.assertEqual(s, set(q))
@@ -411,7 +411,7 @@
         self.assertRaises(PassThru, self.s.intersection_update, check_pass_thru())
         self.assertRaises(TypeError, self.s.intersection_update, [[]])
         for p, q in (('cdc', 'c'), ('efgfe', ''), ('ccb', 'bc'), ('ef', '')):
-            for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+            for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
                 s = self.thetype('abcba')
                 self.assertEqual(s.intersection_update(C(p)), None)
                 self.assertEqual(s, set(q))
@@ -436,7 +436,7 @@
         self.assertRaises(TypeError, self.s.difference_update, [[]])
         self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]])
         for p, q in (('cdc', 'ab'), ('efgfe', 'abc'), ('ccb', 'a'), ('ef', 'abc')):
-            for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+            for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
                 s = self.thetype('abcba')
                 self.assertEqual(s.difference_update(C(p)), None)
                 self.assertEqual(s, set(q))
@@ -460,7 +460,7 @@
         self.assertRaises(PassThru, self.s.symmetric_difference_update, check_pass_thru())
         self.assertRaises(TypeError, self.s.symmetric_difference_update, [[]])
         for p, q in (('cdc', 'abd'), ('efgfe', 'abcefg'), ('ccb', 'a'), ('ef', 'abcef')):
-            for C in set, frozenset, dict.fromkeys, str, unicode, list, tuple:
+            for C in set, frozenset, dict.fromkeys, str, str, list, tuple:
                 s = self.thetype('abcba')
                 self.assertEqual(s.symmetric_difference_update(C(p)), None)
                 self.assertEqual(s, set(q))

Modified: python/branches/py3k-struni/Lib/test/test_startfile.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_startfile.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_startfile.py	Wed May  2 21:09:54 2007
@@ -18,7 +18,7 @@
         self.assertRaises(OSError, startfile, "nonexisting.vbs")
 
     def test_nonexisting_u(self):
-        self.assertRaises(OSError, startfile, u"nonexisting.vbs")
+        self.assertRaises(OSError, startfile, "nonexisting.vbs")
 
     def test_empty(self):
         empty = path.join(path.dirname(__file__), "empty.vbs")
@@ -27,8 +27,8 @@
 
     def test_empty_u(self):
         empty = path.join(path.dirname(__file__), "empty.vbs")
-        startfile(unicode(empty, "mbcs"))
-        startfile(unicode(empty, "mbcs"), "open")
+        startfile(str(empty, "mbcs"))
+        startfile(str(empty, "mbcs"), "open")
 
 def test_main():
     test_support.run_unittest(TestCase)

Modified: python/branches/py3k-struni/Lib/test/test_str.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_str.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_str.py	Wed May  2 21:09:54 2007
@@ -31,7 +31,7 @@
         # Make sure __str__() behaves properly
         class Foo0:
             def __unicode__(self):
-                return u"foo"
+                return "foo"
 
         class Foo1:
             def __str__(self):
@@ -43,28 +43,28 @@
 
         class Foo3(object):
             def __str__(self):
-                return u"foo"
+                return "foo"
 
-        class Foo4(unicode):
+        class Foo4(str):
             def __str__(self):
-                return u"foo"
+                return "foo"
 
         class Foo5(str):
             def __str__(self):
-                return u"foo"
+                return "foo"
 
         class Foo6(str):
             def __str__(self):
                 return "foos"
 
             def __unicode__(self):
-                return u"foou"
+                return "foou"
 
-        class Foo7(unicode):
+        class Foo7(str):
             def __str__(self):
                 return "foos"
             def __unicode__(self):
-                return u"foou"
+                return "foou"
 
         class Foo8(str):
             def __new__(cls, content=""):
@@ -88,7 +88,7 @@
         self.assertEqual(str(Foo7("bar")), "foos")
         self.assertEqual(str(Foo8("foo")), "foofoo")
         self.assertEqual(str(Foo9("foo")), "string")
-        self.assertEqual(unicode(Foo9("foo")), u"not unicode")
+        self.assertEqual(str(Foo9("foo")), "not unicode")
 
 def test_main():
     test_support.run_unittest(StrTest)

Modified: python/branches/py3k-struni/Lib/test/test_stringprep.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_stringprep.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_stringprep.py	Wed May  2 21:09:54 2007
@@ -8,66 +8,66 @@
 
 class StringprepTests(unittest.TestCase):
     def test(self):
-        self.failUnless(in_table_a1(u"\u0221"))
-        self.failIf(in_table_a1(u"\u0222"))
+        self.failUnless(in_table_a1("\u0221"))
+        self.failIf(in_table_a1("\u0222"))
 
-        self.failUnless(in_table_b1(u"\u00ad"))
-        self.failIf(in_table_b1(u"\u00ae"))
+        self.failUnless(in_table_b1("\u00ad"))
+        self.failIf(in_table_b1("\u00ae"))
 
-        self.failUnless(map_table_b2(u"\u0041"), u"\u0061")
-        self.failUnless(map_table_b2(u"\u0061"), u"\u0061")
+        self.failUnless(map_table_b2("\u0041"), "\u0061")
+        self.failUnless(map_table_b2("\u0061"), "\u0061")
 
-        self.failUnless(map_table_b3(u"\u0041"), u"\u0061")
-        self.failUnless(map_table_b3(u"\u0061"), u"\u0061")
+        self.failUnless(map_table_b3("\u0041"), "\u0061")
+        self.failUnless(map_table_b3("\u0061"), "\u0061")
 
-        self.failUnless(in_table_c11(u"\u0020"))
-        self.failIf(in_table_c11(u"\u0021"))
+        self.failUnless(in_table_c11("\u0020"))
+        self.failIf(in_table_c11("\u0021"))
 
-        self.failUnless(in_table_c12(u"\u00a0"))
-        self.failIf(in_table_c12(u"\u00a1"))
+        self.failUnless(in_table_c12("\u00a0"))
+        self.failIf(in_table_c12("\u00a1"))
 
-        self.failUnless(in_table_c12(u"\u00a0"))
-        self.failIf(in_table_c12(u"\u00a1"))
+        self.failUnless(in_table_c12("\u00a0"))
+        self.failIf(in_table_c12("\u00a1"))
 
-        self.failUnless(in_table_c11_c12(u"\u00a0"))
-        self.failIf(in_table_c11_c12(u"\u00a1"))
+        self.failUnless(in_table_c11_c12("\u00a0"))
+        self.failIf(in_table_c11_c12("\u00a1"))
 
-        self.failUnless(in_table_c21(u"\u001f"))
-        self.failIf(in_table_c21(u"\u0020"))
+        self.failUnless(in_table_c21("\u001f"))
+        self.failIf(in_table_c21("\u0020"))
 
-        self.failUnless(in_table_c22(u"\u009f"))
-        self.failIf(in_table_c22(u"\u00a0"))
+        self.failUnless(in_table_c22("\u009f"))
+        self.failIf(in_table_c22("\u00a0"))
 
-        self.failUnless(in_table_c21_c22(u"\u009f"))
-        self.failIf(in_table_c21_c22(u"\u00a0"))
+        self.failUnless(in_table_c21_c22("\u009f"))
+        self.failIf(in_table_c21_c22("\u00a0"))
 
-        self.failUnless(in_table_c3(u"\ue000"))
-        self.failIf(in_table_c3(u"\uf900"))
+        self.failUnless(in_table_c3("\ue000"))
+        self.failIf(in_table_c3("\uf900"))
 
-        self.failUnless(in_table_c4(u"\uffff"))
-        self.failIf(in_table_c4(u"\u0000"))
+        self.failUnless(in_table_c4("\uffff"))
+        self.failIf(in_table_c4("\u0000"))
 
-        self.failUnless(in_table_c5(u"\ud800"))
-        self.failIf(in_table_c5(u"\ud7ff"))
+        self.failUnless(in_table_c5("\ud800"))
+        self.failIf(in_table_c5("\ud7ff"))
 
-        self.failUnless(in_table_c6(u"\ufff9"))
-        self.failIf(in_table_c6(u"\ufffe"))
+        self.failUnless(in_table_c6("\ufff9"))
+        self.failIf(in_table_c6("\ufffe"))
 
-        self.failUnless(in_table_c7(u"\u2ff0"))
-        self.failIf(in_table_c7(u"\u2ffc"))
+        self.failUnless(in_table_c7("\u2ff0"))
+        self.failIf(in_table_c7("\u2ffc"))
 
-        self.failUnless(in_table_c8(u"\u0340"))
-        self.failIf(in_table_c8(u"\u0342"))
+        self.failUnless(in_table_c8("\u0340"))
+        self.failIf(in_table_c8("\u0342"))
 
         # C.9 is not in the bmp
         # self.failUnless(in_table_c9(u"\U000E0001"))
         # self.failIf(in_table_c8(u"\U000E0002"))
 
-        self.failUnless(in_table_d1(u"\u05be"))
-        self.failIf(in_table_d1(u"\u05bf"))
+        self.failUnless(in_table_d1("\u05be"))
+        self.failIf(in_table_d1("\u05bf"))
 
-        self.failUnless(in_table_d2(u"\u0041"))
-        self.failIf(in_table_d2(u"\u0040"))
+        self.failUnless(in_table_d2("\u0041"))
+        self.failIf(in_table_d2("\u0040"))
 
         # This would generate a hash of all predicates. However, running
         # it is quite expensive, and only serves to detect changes in the

Modified: python/branches/py3k-struni/Lib/test/test_support.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_support.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_support.py	Wed May  2 21:09:54 2007
@@ -131,7 +131,7 @@
     return (x > y) - (x < y)
 
 try:
-    unicode
+    str
     have_unicode = True
 except NameError:
     have_unicode = False
@@ -151,13 +151,13 @@
         # Assuming sys.getfilesystemencoding()!=sys.getdefaultencoding()
         # TESTFN_UNICODE is a filename that can be encoded using the
         # file system encoding, but *not* with the default (ascii) encoding
-        if isinstance('', unicode):
+        if isinstance('', str):
             # python -U
             # XXX perhaps unicode() should accept Unicode strings?
             TESTFN_UNICODE = "@test-\xe0\xf2"
         else:
             # 2 latin characters.
-            TESTFN_UNICODE = unicode("@test-\xe0\xf2", "latin-1")
+            TESTFN_UNICODE = str("@test-\xe0\xf2", "latin-1")
         TESTFN_ENCODING = sys.getfilesystemencoding()
         # TESTFN_UNICODE_UNENCODEABLE is a filename that should *not* be
         # able to be encoded by *either* the default or filesystem encoding.

Modified: python/branches/py3k-struni/Lib/test/test_tarfile.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_tarfile.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_tarfile.py	Wed May  2 21:09:54 2007
@@ -711,7 +711,7 @@
 
     def _test_unicode_filename(self, encoding):
         tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT)
-        name = u"\u20ac".encode(encoding) # Euro sign
+        name = "\u20ac".encode(encoding) # Euro sign
         tar.encoding = encoding
         tar.addfile(tarfile.TarInfo(name))
         tar.close()
@@ -723,7 +723,7 @@
     def test_unicode_filename_error(self):
         # The euro sign filename cannot be translated to iso8859-1 encoding.
         tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="utf8")
-        name = u"\u20ac".encode("utf8") # Euro sign
+        name = "\u20ac".encode("utf8") # Euro sign
         tar.addfile(tarfile.TarInfo(name))
         tar.close()
 
@@ -732,13 +732,13 @@
     def test_pax_headers(self):
         self._test_pax_headers({"foo": "bar", "uid": 0, "mtime": 1.23})
 
-        self._test_pax_headers({"euro": u"\u20ac".encode("utf8")})
+        self._test_pax_headers({"euro": "\u20ac".encode("utf8")})
 
-        self._test_pax_headers({"euro": u"\u20ac"},
-                               {"euro": u"\u20ac".encode("utf8")})
+        self._test_pax_headers({"euro": "\u20ac"},
+                               {"euro": "\u20ac".encode("utf8")})
 
-        self._test_pax_headers({u"\u20ac": "euro"},
-                               {u"\u20ac".encode("utf8"): "euro"})
+        self._test_pax_headers({"\u20ac": "euro"},
+                               {"\u20ac".encode("utf8"): "euro"})
 
     def _test_pax_headers(self, pax_headers, cmp_headers=None):
         if cmp_headers is None:

Modified: python/branches/py3k-struni/Lib/test/test_textwrap.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_textwrap.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_textwrap.py	Wed May  2 21:09:54 2007
@@ -341,13 +341,13 @@
             # *Very* simple test of wrapping Unicode strings.  I'm sure
             # there's more to it than this, but let's at least make
             # sure textwrap doesn't crash on Unicode input!
-            text = u"Hello there, how are you today?"
-            self.check_wrap(text, 50, [u"Hello there, how are you today?"])
-            self.check_wrap(text, 20, [u"Hello there, how are", "you today?"])
+            text = "Hello there, how are you today?"
+            self.check_wrap(text, 50, ["Hello there, how are you today?"])
+            self.check_wrap(text, 20, ["Hello there, how are", "you today?"])
             olines = self.wrapper.wrap(text)
-            assert isinstance(olines, list) and isinstance(olines[0], unicode)
+            assert isinstance(olines, list) and isinstance(olines[0], str)
             otext = self.wrapper.fill(text)
-            assert isinstance(otext, unicode)
+            assert isinstance(otext, str)
 
     def test_split(self):
         # Ensure that the standard _split() method works as advertised

Modified: python/branches/py3k-struni/Lib/test/test_timeout.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_timeout.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_timeout.py	Wed May  2 21:09:54 2007
@@ -50,7 +50,7 @@
         self.sock.settimeout(0.0)
         self.sock.settimeout(None)
         self.assertRaises(TypeError, self.sock.settimeout, "")
-        self.assertRaises(TypeError, self.sock.settimeout, u"")
+        self.assertRaises(TypeError, self.sock.settimeout, "")
         self.assertRaises(TypeError, self.sock.settimeout, ())
         self.assertRaises(TypeError, self.sock.settimeout, [])
         self.assertRaises(TypeError, self.sock.settimeout, {})

Modified: python/branches/py3k-struni/Lib/test/test_types.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_types.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_types.py	Wed May  2 21:09:54 2007
@@ -200,16 +200,16 @@
         self.assertEqual(a[-100:100:2], '02468')
 
         if have_unicode:
-            a = unicode('0123456789', 'ascii')
+            a = str('0123456789', 'ascii')
             self.assertEqual(a[::], a)
-            self.assertEqual(a[::2], unicode('02468', 'ascii'))
-            self.assertEqual(a[1::2], unicode('13579', 'ascii'))
-            self.assertEqual(a[::-1], unicode('9876543210', 'ascii'))
-            self.assertEqual(a[::-2], unicode('97531', 'ascii'))
-            self.assertEqual(a[3::-2], unicode('31', 'ascii'))
+            self.assertEqual(a[::2], str('02468', 'ascii'))
+            self.assertEqual(a[1::2], str('13579', 'ascii'))
+            self.assertEqual(a[::-1], str('9876543210', 'ascii'))
+            self.assertEqual(a[::-2], str('97531', 'ascii'))
+            self.assertEqual(a[3::-2], str('31', 'ascii'))
             self.assertEqual(a[-100:100:], a)
             self.assertEqual(a[100:-100:-1], a[::-1])
-            self.assertEqual(a[-100:100:2], unicode('02468', 'ascii'))
+            self.assertEqual(a[-100:100:2], str('02468', 'ascii'))
 
 
     def test_type_function(self):

Modified: python/branches/py3k-struni/Lib/test/test_ucn.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_ucn.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_ucn.py	Wed May  2 21:09:54 2007
@@ -17,7 +17,7 @@
         # Helper that put all \N escapes inside eval'd raw strings,
         # to make sure this script runs even if the compiler
         # chokes on \N escapes
-        res = eval(ur'u"\N{%s}"' % name)
+        res = eval(r'u"\N{%s}"' % name)
         self.assertEqual(res, code)
         return res
 
@@ -51,10 +51,10 @@
             "LATIN SMALL LETTER P",
             "FULL STOP"
         ]
-        string = u"The rEd fOx ate the sheep."
+        string = "The rEd fOx ate the sheep."
 
         self.assertEqual(
-            u"".join([self.checkletter(*args) for args in zip(chars, string)]),
+            "".join([self.checkletter(*args) for args in zip(chars, string)]),
             string
         )
 
@@ -67,30 +67,30 @@
             self.assertEqual(unicodedata.name(code), name)
 
     def test_hangul_syllables(self):
-        self.checkletter("HANGUL SYLLABLE GA", u"\uac00")
-        self.checkletter("HANGUL SYLLABLE GGWEOSS", u"\uafe8")
-        self.checkletter("HANGUL SYLLABLE DOLS", u"\ub3d0")
-        self.checkletter("HANGUL SYLLABLE RYAN", u"\ub7b8")
-        self.checkletter("HANGUL SYLLABLE MWIK", u"\ubba0")
-        self.checkletter("HANGUL SYLLABLE BBWAEM", u"\ubf88")
-        self.checkletter("HANGUL SYLLABLE SSEOL", u"\uc370")
-        self.checkletter("HANGUL SYLLABLE YI", u"\uc758")
-        self.checkletter("HANGUL SYLLABLE JJYOSS", u"\ucb40")
-        self.checkletter("HANGUL SYLLABLE KYEOLS", u"\ucf28")
-        self.checkletter("HANGUL SYLLABLE PAN", u"\ud310")
-        self.checkletter("HANGUL SYLLABLE HWEOK", u"\ud6f8")
-        self.checkletter("HANGUL SYLLABLE HIH", u"\ud7a3")
+        self.checkletter("HANGUL SYLLABLE GA", "\uac00")
+        self.checkletter("HANGUL SYLLABLE GGWEOSS", "\uafe8")
+        self.checkletter("HANGUL SYLLABLE DOLS", "\ub3d0")
+        self.checkletter("HANGUL SYLLABLE RYAN", "\ub7b8")
+        self.checkletter("HANGUL SYLLABLE MWIK", "\ubba0")
+        self.checkletter("HANGUL SYLLABLE BBWAEM", "\ubf88")
+        self.checkletter("HANGUL SYLLABLE SSEOL", "\uc370")
+        self.checkletter("HANGUL SYLLABLE YI", "\uc758")
+        self.checkletter("HANGUL SYLLABLE JJYOSS", "\ucb40")
+        self.checkletter("HANGUL SYLLABLE KYEOLS", "\ucf28")
+        self.checkletter("HANGUL SYLLABLE PAN", "\ud310")
+        self.checkletter("HANGUL SYLLABLE HWEOK", "\ud6f8")
+        self.checkletter("HANGUL SYLLABLE HIH", "\ud7a3")
 
         import unicodedata
-        self.assertRaises(ValueError, unicodedata.name, u"\ud7a4")
+        self.assertRaises(ValueError, unicodedata.name, "\ud7a4")
 
     def test_cjk_unified_ideographs(self):
-        self.checkletter("CJK UNIFIED IDEOGRAPH-3400", u"\u3400")
-        self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", u"\u4db5")
-        self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", u"\u4e00")
-        self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", u"\u9fa5")
-        self.checkletter("CJK UNIFIED IDEOGRAPH-20000", u"\U00020000")
-        self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", u"\U0002a6d6")
+        self.checkletter("CJK UNIFIED IDEOGRAPH-3400", "\u3400")
+        self.checkletter("CJK UNIFIED IDEOGRAPH-4DB5", "\u4db5")
+        self.checkletter("CJK UNIFIED IDEOGRAPH-4E00", "\u4e00")
+        self.checkletter("CJK UNIFIED IDEOGRAPH-9FA5", "\u9fa5")
+        self.checkletter("CJK UNIFIED IDEOGRAPH-20000", "\U00020000")
+        self.checkletter("CJK UNIFIED IDEOGRAPH-2A6D6", "\U0002a6d6")
 
     def test_bmp_characters(self):
         import unicodedata
@@ -103,38 +103,38 @@
                 count += 1
 
     def test_misc_symbols(self):
-        self.checkletter("PILCROW SIGN", u"\u00b6")
-        self.checkletter("REPLACEMENT CHARACTER", u"\uFFFD")
-        self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", u"\uFF9F")
-        self.checkletter("FULLWIDTH LATIN SMALL LETTER A", u"\uFF41")
+        self.checkletter("PILCROW SIGN", "\u00b6")
+        self.checkletter("REPLACEMENT CHARACTER", "\uFFFD")
+        self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", "\uFF9F")
+        self.checkletter("FULLWIDTH LATIN SMALL LETTER A", "\uFF41")
 
     def test_errors(self):
         import unicodedata
         self.assertRaises(TypeError, unicodedata.name)
-        self.assertRaises(TypeError, unicodedata.name, u'xx')
+        self.assertRaises(TypeError, unicodedata.name, 'xx')
         self.assertRaises(TypeError, unicodedata.lookup)
-        self.assertRaises(KeyError, unicodedata.lookup, u'unknown')
+        self.assertRaises(KeyError, unicodedata.lookup, 'unknown')
 
     def test_strict_eror_handling(self):
         # bogus character name
         self.assertRaises(
             UnicodeError,
-            unicode, "\\N{blah}", 'unicode-escape', 'strict'
+            str, "\\N{blah}", 'unicode-escape', 'strict'
         )
         # long bogus character name
         self.assertRaises(
             UnicodeError,
-            unicode, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
+            str, "\\N{%s}" % ("x" * 100000), 'unicode-escape', 'strict'
         )
         # missing closing brace
         self.assertRaises(
             UnicodeError,
-            unicode, "\\N{SPACE", 'unicode-escape', 'strict'
+            str, "\\N{SPACE", 'unicode-escape', 'strict'
         )
         # missing opening brace
         self.assertRaises(
             UnicodeError,
-            unicode, "\\NSPACE", 'unicode-escape', 'strict'
+            str, "\\NSPACE", 'unicode-escape', 'strict'
         )
 
 def test_main():

Modified: python/branches/py3k-struni/Lib/test/test_unicode.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_unicode.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_unicode.py	Wed May  2 21:09:54 2007
@@ -32,7 +32,7 @@
     string_tests.MixinStrUnicodeUserStringTest,
     string_tests.MixinStrUnicodeTest,
     ):
-    type2test = unicode
+    type2test = str
 
     def checkequalnofix(self, result, object, methodname, *args):
         method = getattr(object, methodname)
@@ -43,9 +43,9 @@
         # if the original is returned make sure that
         # this doesn't happen with subclasses
         if realresult is object:
-            class usub(unicode):
+            class usub(str):
                 def __repr__(self):
-                    return 'usub(%r)' % unicode.__repr__(self)
+                    return 'usub(%r)' % str.__repr__(self)
             object = usub(object)
             method = getattr(object, methodname)
             realresult = method(*args)
@@ -53,8 +53,8 @@
             self.assert_(object is not realresult)
 
     def test_literals(self):
-        self.assertEqual(u'\xff', u'\u00ff')
-        self.assertEqual(u'\uffff', u'\U0000ffff')
+        self.assertEqual('\xff', '\u00ff')
+        self.assertEqual('\uffff', '\U0000ffff')
         self.assertRaises(UnicodeError, eval, 'u\'\\Ufffffffe\'')
         self.assertRaises(UnicodeError, eval, 'u\'\\Uffffffff\'')
         self.assertRaises(UnicodeError, eval, 'u\'\\U%08x\'' % 0x110000)
@@ -62,19 +62,19 @@
     def test_repr(self):
         if not sys.platform.startswith('java'):
             # Test basic sanity of repr()
-            self.assertEqual(repr(u'abc'), "u'abc'")
-            self.assertEqual(repr(u'ab\\c'), "u'ab\\\\c'")
-            self.assertEqual(repr(u'ab\\'), "u'ab\\\\'")
-            self.assertEqual(repr(u'\\c'), "u'\\\\c'")
-            self.assertEqual(repr(u'\\'), "u'\\\\'")
-            self.assertEqual(repr(u'\n'), "u'\\n'")
-            self.assertEqual(repr(u'\r'), "u'\\r'")
-            self.assertEqual(repr(u'\t'), "u'\\t'")
-            self.assertEqual(repr(u'\b'), "u'\\x08'")
-            self.assertEqual(repr(u"'\""), """u'\\'"'""")
-            self.assertEqual(repr(u"'\""), """u'\\'"'""")
-            self.assertEqual(repr(u"'"), '''u"'"''')
-            self.assertEqual(repr(u'"'), """u'"'""")
+            self.assertEqual(repr('abc'), "u'abc'")
+            self.assertEqual(repr('ab\\c'), "u'ab\\\\c'")
+            self.assertEqual(repr('ab\\'), "u'ab\\\\'")
+            self.assertEqual(repr('\\c'), "u'\\\\c'")
+            self.assertEqual(repr('\\'), "u'\\\\'")
+            self.assertEqual(repr('\n'), "u'\\n'")
+            self.assertEqual(repr('\r'), "u'\\r'")
+            self.assertEqual(repr('\t'), "u'\\t'")
+            self.assertEqual(repr('\b'), "u'\\x08'")
+            self.assertEqual(repr("'\""), """u'\\'"'""")
+            self.assertEqual(repr("'\""), """u'\\'"'""")
+            self.assertEqual(repr("'"), '''u"'"''')
+            self.assertEqual(repr('"'), """u'"'""")
             latin1repr = (
                 "u'\\x00\\x01\\x02\\x03\\x04\\x05\\x06\\x07\\x08\\t\\n\\x0b\\x0c\\r"
                 "\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a"
@@ -90,52 +90,52 @@
                 "\\xe2\\xe3\\xe4\\xe5\\xe6\\xe7\\xe8\\xe9\\xea\\xeb\\xec\\xed\\xee\\xef"
                 "\\xf0\\xf1\\xf2\\xf3\\xf4\\xf5\\xf6\\xf7\\xf8\\xf9\\xfa\\xfb\\xfc\\xfd"
                 "\\xfe\\xff'")
-            testrepr = repr(u''.join(map(unichr, xrange(256))))
+            testrepr = repr(''.join(map(unichr, xrange(256))))
             self.assertEqual(testrepr, latin1repr)
             # Test repr works on wide unicode escapes without overflow.
-            self.assertEqual(repr(u"\U00010000" * 39 + u"\uffff" * 4096),
-                             repr(u"\U00010000" * 39 + u"\uffff" * 4096))
+            self.assertEqual(repr("\U00010000" * 39 + "\uffff" * 4096),
+                             repr("\U00010000" * 39 + "\uffff" * 4096))
 
     def test_iterators(self):
         # Make sure unicode objects have an __iter__ method
-        it = u"\u1111\u2222\u3333".__iter__()
-        self.assertEqual(next(it), u"\u1111")
-        self.assertEqual(next(it), u"\u2222")
-        self.assertEqual(next(it), u"\u3333")
+        it = "\u1111\u2222\u3333".__iter__()
+        self.assertEqual(next(it), "\u1111")
+        self.assertEqual(next(it), "\u2222")
+        self.assertEqual(next(it), "\u3333")
         self.assertRaises(StopIteration, next, it)
 
     def test_count(self):
         string_tests.CommonTest.test_count(self)
         # check mixed argument types
-        self.checkequalnofix(3,  'aaa', 'count', u'a')
-        self.checkequalnofix(0,  'aaa', 'count', u'b')
-        self.checkequalnofix(3, u'aaa', 'count',  'a')
-        self.checkequalnofix(0, u'aaa', 'count',  'b')
-        self.checkequalnofix(0, u'aaa', 'count',  'b')
-        self.checkequalnofix(1, u'aaa', 'count',  'a', -1)
-        self.checkequalnofix(3, u'aaa', 'count',  'a', -10)
-        self.checkequalnofix(2, u'aaa', 'count',  'a', 0, -1)
-        self.checkequalnofix(0, u'aaa', 'count',  'a', 0, -10)
+        self.checkequalnofix(3,  'aaa', 'count', 'a')
+        self.checkequalnofix(0,  'aaa', 'count', 'b')
+        self.checkequalnofix(3, 'aaa', 'count',  'a')
+        self.checkequalnofix(0, 'aaa', 'count',  'b')
+        self.checkequalnofix(0, 'aaa', 'count',  'b')
+        self.checkequalnofix(1, 'aaa', 'count',  'a', -1)
+        self.checkequalnofix(3, 'aaa', 'count',  'a', -10)
+        self.checkequalnofix(2, 'aaa', 'count',  'a', 0, -1)
+        self.checkequalnofix(0, 'aaa', 'count',  'a', 0, -10)
 
     def test_find(self):
-        self.checkequalnofix(0,  u'abcdefghiabc', 'find', u'abc')
-        self.checkequalnofix(9,  u'abcdefghiabc', 'find', u'abc', 1)
-        self.checkequalnofix(-1, u'abcdefghiabc', 'find', u'def', 4)
+        self.checkequalnofix(0,  'abcdefghiabc', 'find', 'abc')
+        self.checkequalnofix(9,  'abcdefghiabc', 'find', 'abc', 1)
+        self.checkequalnofix(-1, 'abcdefghiabc', 'find', 'def', 4)
 
-        self.assertRaises(TypeError, u'hello'.find)
-        self.assertRaises(TypeError, u'hello'.find, 42)
+        self.assertRaises(TypeError, 'hello'.find)
+        self.assertRaises(TypeError, 'hello'.find, 42)
 
     def test_rfind(self):
         string_tests.CommonTest.test_rfind(self)
         # check mixed argument types
-        self.checkequalnofix(9,   'abcdefghiabc', 'rfind', u'abc')
-        self.checkequalnofix(12,  'abcdefghiabc', 'rfind', u'')
-        self.checkequalnofix(12, u'abcdefghiabc', 'rfind',  '')
+        self.checkequalnofix(9,   'abcdefghiabc', 'rfind', 'abc')
+        self.checkequalnofix(12,  'abcdefghiabc', 'rfind', '')
+        self.checkequalnofix(12, 'abcdefghiabc', 'rfind',  '')
 
     def test_index(self):
         string_tests.CommonTest.test_index(self)
         # check mixed argument types
-        for (t1, t2) in ((str, unicode), (unicode, str)):
+        for (t1, t2) in ((str, str), (str, str)):
             self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2(''))
             self.checkequalnofix(3, t1('abcdefghiabc'), 'index',  t2('def'))
             self.checkequalnofix(0, t1('abcdefghiabc'), 'index',  t2('abc'))
@@ -148,7 +148,7 @@
     def test_rindex(self):
         string_tests.CommonTest.test_rindex(self)
         # check mixed argument types
-        for (t1, t2) in ((str, unicode), (unicode, str)):
+        for (t1, t2) in ((str, str), (str, str)):
             self.checkequalnofix(12, t1('abcdefghiabc'), 'rindex',  t2(''))
             self.checkequalnofix(3,  t1('abcdefghiabc'), 'rindex',  t2('def'))
             self.checkequalnofix(9,  t1('abcdefghiabc'), 'rindex',  t2('abc'))
@@ -161,291 +161,291 @@
             self.assertRaises(ValueError, t1('abcdefghi').rindex,  t2('ghi'), 0, -1)
 
     def test_translate(self):
-        self.checkequalnofix(u'bbbc', u'abababc', 'translate', {ord('a'):None})
-        self.checkequalnofix(u'iiic', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
-        self.checkequalnofix(u'iiix', u'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
-        self.checkequalnofix(u'<i><i><i>c', u'abababc', 'translate', {ord('a'):None, ord('b'):u'<i>'})
-        self.checkequalnofix(u'c', u'abababc', 'translate', {ord('a'):None, ord('b'):u''})
-        self.checkequalnofix(u'xyyx', u'xzx', 'translate', {ord('z'):u'yy'})
+        self.checkequalnofix('bbbc', 'abababc', 'translate', {ord('a'):None})
+        self.checkequalnofix('iiic', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i')})
+        self.checkequalnofix('iiix', 'abababc', 'translate', {ord('a'):None, ord('b'):ord('i'), ord('c'):'x'})
+        self.checkequalnofix('<i><i><i>c', 'abababc', 'translate', {ord('a'):None, ord('b'):'<i>'})
+        self.checkequalnofix('c', 'abababc', 'translate', {ord('a'):None, ord('b'):''})
+        self.checkequalnofix('xyyx', 'xzx', 'translate', {ord('z'):'yy'})
 
-        self.assertRaises(TypeError, u'hello'.translate)
-        self.assertRaises(TypeError, u'abababc'.translate, {ord('a'):''})
+        self.assertRaises(TypeError, 'hello'.translate)
+        self.assertRaises(TypeError, 'abababc'.translate, {ord('a'):''})
 
     def test_split(self):
         string_tests.CommonTest.test_split(self)
 
         # Mixed arguments
-        self.checkequalnofix([u'a', u'b', u'c', u'd'], u'a//b//c//d', 'split', '//')
-        self.checkequalnofix([u'a', u'b', u'c', u'd'], 'a//b//c//d', 'split', u'//')
-        self.checkequalnofix([u'endcase ', u''], u'endcase test', 'split', 'test')
+        self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
+        self.checkequalnofix(['a', 'b', 'c', 'd'], 'a//b//c//d', 'split', '//')
+        self.checkequalnofix(['endcase ', ''], 'endcase test', 'split', 'test')
 
     def test_join(self):
         string_tests.MixinStrUnicodeUserStringTest.test_join(self)
 
         # mixed arguments
-        self.checkequalnofix(u'a b c d', u' ', 'join', ['a', 'b', u'c', u'd'])
-        self.checkequalnofix(u'abcd', u'', 'join', (u'a', u'b', u'c', u'd'))
-        self.checkequalnofix(u'w x y z', u' ', 'join', string_tests.Sequence('wxyz'))
-        self.checkequalnofix(u'a b c d', ' ', 'join', [u'a', u'b', u'c', u'd'])
-        self.checkequalnofix(u'a b c d', ' ', 'join', ['a', 'b', u'c', u'd'])
-        self.checkequalnofix(u'abcd', '', 'join', (u'a', u'b', u'c', u'd'))
-        self.checkequalnofix(u'w x y z', ' ', 'join', string_tests.Sequence(u'wxyz'))
+        self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
+        self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
+        self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
+        self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
+        self.checkequalnofix('a b c d', ' ', 'join', ['a', 'b', 'c', 'd'])
+        self.checkequalnofix('abcd', '', 'join', ('a', 'b', 'c', 'd'))
+        self.checkequalnofix('w x y z', ' ', 'join', string_tests.Sequence('wxyz'))
 
     def test_strip(self):
         string_tests.CommonTest.test_strip(self)
-        self.assertRaises(UnicodeError, u"hello".strip, "\xff")
+        self.assertRaises(UnicodeError, "hello".strip, "\xff")
 
     def test_replace(self):
         string_tests.CommonTest.test_replace(self)
 
         # method call forwarded from str implementation because of unicode argument
-        self.checkequalnofix(u'one at two!three!', 'one!two!three!', 'replace', u'!', u'@', 1)
-        self.assertRaises(TypeError, 'replace'.replace, u"r", 42)
+        self.checkequalnofix('one at two!three!', 'one!two!three!', 'replace', '!', '@', 1)
+        self.assertRaises(TypeError, 'replace'.replace, "r", 42)
 
     def test_comparison(self):
         # Comparisons:
-        self.assertEqual(u'abc', 'abc')
-        self.assertEqual('abc', u'abc')
-        self.assertEqual(u'abc', u'abc')
-        self.assert_(u'abcd' > 'abc')
-        self.assert_('abcd' > u'abc')
-        self.assert_(u'abcd' > u'abc')
-        self.assert_(u'abc' < 'abcd')
-        self.assert_('abc' < u'abcd')
-        self.assert_(u'abc' < u'abcd')
+        self.assertEqual('abc', 'abc')
+        self.assertEqual('abc', 'abc')
+        self.assertEqual('abc', 'abc')
+        self.assert_('abcd' > 'abc')
+        self.assert_('abcd' > 'abc')
+        self.assert_('abcd' > 'abc')
+        self.assert_('abc' < 'abcd')
+        self.assert_('abc' < 'abcd')
+        self.assert_('abc' < 'abcd')
 
         if 0:
             # Move these tests to a Unicode collation module test...
             # Testing UTF-16 code point order comparisons...
 
             # No surrogates, no fixup required.
-            self.assert_(u'\u0061' < u'\u20ac')
+            self.assert_('\u0061' < '\u20ac')
             # Non surrogate below surrogate value, no fixup required
-            self.assert_(u'\u0061' < u'\ud800\udc02')
+            self.assert_('\u0061' < '\ud800\udc02')
 
             # Non surrogate above surrogate value, fixup required
             def test_lecmp(s, s2):
                 self.assert_(s < s2)
 
             def test_fixup(s):
-                s2 = u'\ud800\udc01'
+                s2 = '\ud800\udc01'
                 test_lecmp(s, s2)
-                s2 = u'\ud900\udc01'
+                s2 = '\ud900\udc01'
                 test_lecmp(s, s2)
-                s2 = u'\uda00\udc01'
+                s2 = '\uda00\udc01'
                 test_lecmp(s, s2)
-                s2 = u'\udb00\udc01'
+                s2 = '\udb00\udc01'
                 test_lecmp(s, s2)
-                s2 = u'\ud800\udd01'
+                s2 = '\ud800\udd01'
                 test_lecmp(s, s2)
-                s2 = u'\ud900\udd01'
+                s2 = '\ud900\udd01'
                 test_lecmp(s, s2)
-                s2 = u'\uda00\udd01'
+                s2 = '\uda00\udd01'
                 test_lecmp(s, s2)
-                s2 = u'\udb00\udd01'
+                s2 = '\udb00\udd01'
                 test_lecmp(s, s2)
-                s2 = u'\ud800\ude01'
+                s2 = '\ud800\ude01'
                 test_lecmp(s, s2)
-                s2 = u'\ud900\ude01'
+                s2 = '\ud900\ude01'
                 test_lecmp(s, s2)
-                s2 = u'\uda00\ude01'
+                s2 = '\uda00\ude01'
                 test_lecmp(s, s2)
-                s2 = u'\udb00\ude01'
+                s2 = '\udb00\ude01'
                 test_lecmp(s, s2)
-                s2 = u'\ud800\udfff'
+                s2 = '\ud800\udfff'
                 test_lecmp(s, s2)
-                s2 = u'\ud900\udfff'
+                s2 = '\ud900\udfff'
                 test_lecmp(s, s2)
-                s2 = u'\uda00\udfff'
+                s2 = '\uda00\udfff'
                 test_lecmp(s, s2)
-                s2 = u'\udb00\udfff'
+                s2 = '\udb00\udfff'
                 test_lecmp(s, s2)
 
-                test_fixup(u'\ue000')
-                test_fixup(u'\uff61')
+                test_fixup('\ue000')
+                test_fixup('\uff61')
 
         # Surrogates on both sides, no fixup required
-        self.assert_(u'\ud800\udc02' < u'\ud84d\udc56')
+        self.assert_('\ud800\udc02' < '\ud84d\udc56')
 
     def test_islower(self):
         string_tests.MixinStrUnicodeUserStringTest.test_islower(self)
-        self.checkequalnofix(False, u'\u1FFc', 'islower')
+        self.checkequalnofix(False, '\u1FFc', 'islower')
 
     def test_isupper(self):
         string_tests.MixinStrUnicodeUserStringTest.test_isupper(self)
         if not sys.platform.startswith('java'):
-            self.checkequalnofix(False, u'\u1FFc', 'isupper')
+            self.checkequalnofix(False, '\u1FFc', 'isupper')
 
     def test_istitle(self):
         string_tests.MixinStrUnicodeUserStringTest.test_title(self)
-        self.checkequalnofix(True, u'\u1FFc', 'istitle')
-        self.checkequalnofix(True, u'Greek \u1FFcitlecases ...', 'istitle')
+        self.checkequalnofix(True, '\u1FFc', 'istitle')
+        self.checkequalnofix(True, 'Greek \u1FFcitlecases ...', 'istitle')
 
     def test_isspace(self):
         string_tests.MixinStrUnicodeUserStringTest.test_isspace(self)
-        self.checkequalnofix(True, u'\u2000', 'isspace')
-        self.checkequalnofix(True, u'\u200a', 'isspace')
-        self.checkequalnofix(False, u'\u2014', 'isspace')
+        self.checkequalnofix(True, '\u2000', 'isspace')
+        self.checkequalnofix(True, '\u200a', 'isspace')
+        self.checkequalnofix(False, '\u2014', 'isspace')
 
     def test_isalpha(self):
         string_tests.MixinStrUnicodeUserStringTest.test_isalpha(self)
-        self.checkequalnofix(True, u'\u1FFc', 'isalpha')
+        self.checkequalnofix(True, '\u1FFc', 'isalpha')
 
     def test_isdecimal(self):
-        self.checkequalnofix(False, u'', 'isdecimal')
-        self.checkequalnofix(False, u'a', 'isdecimal')
-        self.checkequalnofix(True, u'0', 'isdecimal')
-        self.checkequalnofix(False, u'\u2460', 'isdecimal') # CIRCLED DIGIT ONE
-        self.checkequalnofix(False, u'\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
-        self.checkequalnofix(True, u'\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
-        self.checkequalnofix(True, u'0123456789', 'isdecimal')
-        self.checkequalnofix(False, u'0123456789a', 'isdecimal')
+        self.checkequalnofix(False, '', 'isdecimal')
+        self.checkequalnofix(False, 'a', 'isdecimal')
+        self.checkequalnofix(True, '0', 'isdecimal')
+        self.checkequalnofix(False, '\u2460', 'isdecimal') # CIRCLED DIGIT ONE
+        self.checkequalnofix(False, '\xbc', 'isdecimal') # VULGAR FRACTION ONE QUARTER
+        self.checkequalnofix(True, '\u0660', 'isdecimal') # ARABIC-INDIC DIGIT ZERO
+        self.checkequalnofix(True, '0123456789', 'isdecimal')
+        self.checkequalnofix(False, '0123456789a', 'isdecimal')
 
         self.checkraises(TypeError, 'abc', 'isdecimal', 42)
 
     def test_isdigit(self):
         string_tests.MixinStrUnicodeUserStringTest.test_isdigit(self)
-        self.checkequalnofix(True, u'\u2460', 'isdigit')
-        self.checkequalnofix(False, u'\xbc', 'isdigit')
-        self.checkequalnofix(True, u'\u0660', 'isdigit')
+        self.checkequalnofix(True, '\u2460', 'isdigit')
+        self.checkequalnofix(False, '\xbc', 'isdigit')
+        self.checkequalnofix(True, '\u0660', 'isdigit')
 
     def test_isnumeric(self):
-        self.checkequalnofix(False, u'', 'isnumeric')
-        self.checkequalnofix(False, u'a', 'isnumeric')
-        self.checkequalnofix(True, u'0', 'isnumeric')
-        self.checkequalnofix(True, u'\u2460', 'isnumeric')
-        self.checkequalnofix(True, u'\xbc', 'isnumeric')
-        self.checkequalnofix(True, u'\u0660', 'isnumeric')
-        self.checkequalnofix(True, u'0123456789', 'isnumeric')
-        self.checkequalnofix(False, u'0123456789a', 'isnumeric')
+        self.checkequalnofix(False, '', 'isnumeric')
+        self.checkequalnofix(False, 'a', 'isnumeric')
+        self.checkequalnofix(True, '0', 'isnumeric')
+        self.checkequalnofix(True, '\u2460', 'isnumeric')
+        self.checkequalnofix(True, '\xbc', 'isnumeric')
+        self.checkequalnofix(True, '\u0660', 'isnumeric')
+        self.checkequalnofix(True, '0123456789', 'isnumeric')
+        self.checkequalnofix(False, '0123456789a', 'isnumeric')
 
-        self.assertRaises(TypeError, u"abc".isnumeric, 42)
+        self.assertRaises(TypeError, "abc".isnumeric, 42)
 
     def test_contains(self):
         # Testing Unicode contains method
-        self.assert_('a' in u'abdb')
-        self.assert_('a' in u'bdab')
-        self.assert_('a' in u'bdaba')
-        self.assert_('a' in u'bdba')
-        self.assert_('a' in u'bdba')
-        self.assert_(u'a' in u'bdba')
-        self.assert_(u'a' not in u'bdb')
-        self.assert_(u'a' not in 'bdb')
-        self.assert_(u'a' in 'bdba')
-        self.assert_(u'a' in ('a',1,None))
-        self.assert_(u'a' in (1,None,'a'))
-        self.assert_(u'a' in (1,None,u'a'))
+        self.assert_('a' in 'abdb')
+        self.assert_('a' in 'bdab')
+        self.assert_('a' in 'bdaba')
+        self.assert_('a' in 'bdba')
+        self.assert_('a' in 'bdba')
+        self.assert_('a' in 'bdba')
+        self.assert_('a' not in 'bdb')
+        self.assert_('a' not in 'bdb')
+        self.assert_('a' in 'bdba')
         self.assert_('a' in ('a',1,None))
         self.assert_('a' in (1,None,'a'))
-        self.assert_('a' in (1,None,u'a'))
-        self.assert_('a' not in ('x',1,u'y'))
+        self.assert_('a' in (1,None,'a'))
+        self.assert_('a' in ('a',1,None))
+        self.assert_('a' in (1,None,'a'))
+        self.assert_('a' in (1,None,'a'))
+        self.assert_('a' not in ('x',1,'y'))
         self.assert_('a' not in ('x',1,None))
-        self.assert_(u'abcd' not in u'abcxxxx')
-        self.assert_(u'ab' in u'abcd')
-        self.assert_('ab' in u'abc')
-        self.assert_(u'ab' in 'abc')
-        self.assert_(u'ab' in (1,None,u'ab'))
-        self.assert_(u'' in u'abc')
-        self.assert_('' in u'abc')
+        self.assert_('abcd' not in 'abcxxxx')
+        self.assert_('ab' in 'abcd')
+        self.assert_('ab' in 'abc')
+        self.assert_('ab' in 'abc')
+        self.assert_('ab' in (1,None,'ab'))
+        self.assert_('' in 'abc')
+        self.assert_('' in 'abc')
 
         # If the following fails either
         # the contains operator does not propagate UnicodeErrors or
         # someone has changed the default encoding
-        self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, u'\xe2')
+        self.assertRaises(UnicodeError, 'g\xe2teau'.__contains__, '\xe2')
 
-        self.assert_(u'' in '')
-        self.assert_('' in u'')
-        self.assert_(u'' in u'')
-        self.assert_(u'' in 'abc')
-        self.assert_('' in u'abc')
-        self.assert_(u'' in u'abc')
-        self.assert_(u'\0' not in 'abc')
-        self.assert_('\0' not in u'abc')
-        self.assert_(u'\0' not in u'abc')
-        self.assert_(u'\0' in '\0abc')
-        self.assert_('\0' in u'\0abc')
-        self.assert_(u'\0' in u'\0abc')
-        self.assert_(u'\0' in 'abc\0')
-        self.assert_('\0' in u'abc\0')
-        self.assert_(u'\0' in u'abc\0')
-        self.assert_(u'a' in '\0abc')
-        self.assert_('a' in u'\0abc')
-        self.assert_(u'a' in u'\0abc')
-        self.assert_(u'asdf' in 'asdf')
-        self.assert_('asdf' in u'asdf')
-        self.assert_(u'asdf' in u'asdf')
-        self.assert_(u'asdf' not in 'asd')
-        self.assert_('asdf' not in u'asd')
-        self.assert_(u'asdf' not in u'asd')
-        self.assert_(u'asdf' not in '')
-        self.assert_('asdf' not in u'')
-        self.assert_(u'asdf' not in u'')
+        self.assert_('' in '')
+        self.assert_('' in '')
+        self.assert_('' in '')
+        self.assert_('' in 'abc')
+        self.assert_('' in 'abc')
+        self.assert_('' in 'abc')
+        self.assert_('\0' not in 'abc')
+        self.assert_('\0' not in 'abc')
+        self.assert_('\0' not in 'abc')
+        self.assert_('\0' in '\0abc')
+        self.assert_('\0' in '\0abc')
+        self.assert_('\0' in '\0abc')
+        self.assert_('\0' in 'abc\0')
+        self.assert_('\0' in 'abc\0')
+        self.assert_('\0' in 'abc\0')
+        self.assert_('a' in '\0abc')
+        self.assert_('a' in '\0abc')
+        self.assert_('a' in '\0abc')
+        self.assert_('asdf' in 'asdf')
+        self.assert_('asdf' in 'asdf')
+        self.assert_('asdf' in 'asdf')
+        self.assert_('asdf' not in 'asd')
+        self.assert_('asdf' not in 'asd')
+        self.assert_('asdf' not in 'asd')
+        self.assert_('asdf' not in '')
+        self.assert_('asdf' not in '')
+        self.assert_('asdf' not in '')
 
-        self.assertRaises(TypeError, u"abc".__contains__)
+        self.assertRaises(TypeError, "abc".__contains__)
 
     def test_formatting(self):
         string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
         # Testing Unicode formatting strings...
-        self.assertEqual(u"%s, %s" % (u"abc", "abc"), u'abc, abc')
-        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, 2, 3), u'abc, abc, 1, 2.000000,  3.00')
-        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", 1, -2, 3), u'abc, abc, 1, -2.000000,  3.00')
-        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.5), u'abc, abc, -1, -2.000000,  3.50')
-        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 3.57), u'abc, abc, -1, -2.000000,  3.57')
-        self.assertEqual(u"%s, %s, %i, %f, %5.2f" % (u"abc", "abc", -1, -2, 1003.57), u'abc, abc, -1, -2.000000, 1003.57')
+        self.assertEqual("%s, %s" % ("abc", "abc"), 'abc, abc')
+        self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, 2, 3), 'abc, abc, 1, 2.000000,  3.00')
+        self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", 1, -2, 3), 'abc, abc, 1, -2.000000,  3.00')
+        self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.5), 'abc, abc, -1, -2.000000,  3.50')
+        self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 3.57), 'abc, abc, -1, -2.000000,  3.57')
+        self.assertEqual("%s, %s, %i, %f, %5.2f" % ("abc", "abc", -1, -2, 1003.57), 'abc, abc, -1, -2.000000, 1003.57')
         if not sys.platform.startswith('java'):
-            self.assertEqual(u"%r, %r" % (u"abc", "abc"), u"u'abc', 'abc'")
-        self.assertEqual(u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"}, u'abc, def')
-        self.assertEqual(u"%(x)s, %(\xfc)s" % {'x':u"abc", u'\xfc':"def"}, u'abc, def')
+            self.assertEqual("%r, %r" % ("abc", "abc"), "u'abc', 'abc'")
+        self.assertEqual("%(x)s, %(y)s" % {'x':"abc", 'y':"def"}, 'abc, def')
+        self.assertEqual("%(x)s, %(\xfc)s" % {'x':"abc", '\xfc':"def"}, 'abc, def')
 
-        self.assertEqual(u'%c' % 0x1234, u'\u1234')
-        self.assertRaises(OverflowError, u"%c".__mod__, (sys.maxunicode+1,))
+        self.assertEqual('%c' % 0x1234, '\u1234')
+        self.assertRaises(OverflowError, "%c".__mod__, (sys.maxunicode+1,))
 
         # formatting jobs delegated from the string implementation:
-        self.assertEqual('...%(foo)s...' % {'foo':u"abc"}, u'...abc...')
         self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
-        self.assertEqual('...%(foo)s...' % {u'foo':"abc"}, '...abc...')
-        self.assertEqual('...%(foo)s...' % {u'foo':u"abc"}, u'...abc...')
-        self.assertEqual('...%(foo)s...' % {u'foo':u"abc",'def':123},  u'...abc...')
-        self.assertEqual('...%(foo)s...' % {u'foo':u"abc",u'def':123}, u'...abc...')
-        self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...1...2...3...abc...')
-        self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,u"abc"), u'...%...%s...1...2...3...abc...')
-        self.assertEqual('...%s...' % u"abc", u'...abc...')
-        self.assertEqual('%*s' % (5,u'abc',), u'  abc')
-        self.assertEqual('%*s' % (-5,u'abc',), u'abc  ')
-        self.assertEqual('%*.*s' % (5,2,u'abc',), u'   ab')
-        self.assertEqual('%*.*s' % (5,3,u'abc',), u'  abc')
-        self.assertEqual('%i %*.*s' % (10, 5,3,u'abc',), u'10   abc')
-        self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, u'abc',), u'103   abc')
-        self.assertEqual('%c' % u'a', u'a')
+        self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
+        self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
+        self.assertEqual('...%(foo)s...' % {'foo':"abc"}, '...abc...')
+        self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123},  '...abc...')
+        self.assertEqual('...%(foo)s...' % {'foo':"abc",'def':123}, '...abc...')
+        self.assertEqual('...%s...%s...%s...%s...' % (1,2,3,"abc"), '...1...2...3...abc...')
+        self.assertEqual('...%%...%%s...%s...%s...%s...%s...' % (1,2,3,"abc"), '...%...%s...1...2...3...abc...')
+        self.assertEqual('...%s...' % "abc", '...abc...')
+        self.assertEqual('%*s' % (5,'abc',), '  abc')
+        self.assertEqual('%*s' % (-5,'abc',), 'abc  ')
+        self.assertEqual('%*.*s' % (5,2,'abc',), '   ab')
+        self.assertEqual('%*.*s' % (5,3,'abc',), '  abc')
+        self.assertEqual('%i %*.*s' % (10, 5,3,'abc',), '10   abc')
+        self.assertEqual('%i%s %*.*s' % (10, 3, 5, 3, 'abc',), '103   abc')
+        self.assertEqual('%c' % 'a', 'a')
         class Wrapper:
             def __str__(self):
-                return u'\u1234'
-        self.assertEqual('%s' % Wrapper(), u'\u1234')
+                return '\u1234'
+        self.assertEqual('%s' % Wrapper(), '\u1234')
 
     @test_support.run_with_locale('LC_ALL', 'de_DE', 'fr_FR')
     def test_format_float(self):
         # should not format with a comma, but always with C locale
-        self.assertEqual(u'1.0', u'%.1f' % 1.0)
+        self.assertEqual('1.0', '%.1f' % 1.0)
 
     def test_constructor(self):
         # unicode(obj) tests (this maps to PyObject_Unicode() at C level)
 
         self.assertEqual(
-            unicode(u'unicode remains unicode'),
-            u'unicode remains unicode'
+            str('unicode remains unicode'),
+            'unicode remains unicode'
         )
 
-        class UnicodeSubclass(unicode):
+        class UnicodeSubclass(str):
             pass
 
         self.assertEqual(
-            unicode(UnicodeSubclass('unicode subclass becomes unicode')),
-            u'unicode subclass becomes unicode'
+            str(UnicodeSubclass('unicode subclass becomes unicode')),
+            'unicode subclass becomes unicode'
         )
 
         self.assertEqual(
-            unicode('strings are converted to unicode'),
-            u'strings are converted to unicode'
+            str('strings are converted to unicode'),
+            'strings are converted to unicode'
         )
 
         class UnicodeCompat:
@@ -455,8 +455,8 @@
                 return self.x
 
         self.assertEqual(
-            unicode(UnicodeCompat('__unicode__ compatible objects are recognized')),
-            u'__unicode__ compatible objects are recognized')
+            str(UnicodeCompat('__unicode__ compatible objects are recognized')),
+            '__unicode__ compatible objects are recognized')
 
         class StringCompat:
             def __init__(self, x):
@@ -465,26 +465,26 @@
                 return self.x
 
         self.assertEqual(
-            unicode(StringCompat('__str__ compatible objects are recognized')),
-            u'__str__ compatible objects are recognized'
+            str(StringCompat('__str__ compatible objects are recognized')),
+            '__str__ compatible objects are recognized'
         )
 
         # unicode(obj) is compatible to str():
 
         o = StringCompat('unicode(obj) is compatible to str()')
-        self.assertEqual(unicode(o), u'unicode(obj) is compatible to str()')
+        self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
         self.assertEqual(str(o), 'unicode(obj) is compatible to str()')
 
         # %-formatting and .__unicode__()
-        self.assertEqual(u'%s' %
-                         UnicodeCompat(u"u'%s' % obj uses obj.__unicode__()"),
-                         u"u'%s' % obj uses obj.__unicode__()")
-        self.assertEqual(u'%s' %
-                         UnicodeCompat(u"u'%s' % obj falls back to obj.__str__()"),
-                         u"u'%s' % obj falls back to obj.__str__()")
+        self.assertEqual('%s' %
+                         UnicodeCompat("u'%s' % obj uses obj.__unicode__()"),
+                         "u'%s' % obj uses obj.__unicode__()")
+        self.assertEqual('%s' %
+                         UnicodeCompat("u'%s' % obj falls back to obj.__str__()"),
+                         "u'%s' % obj falls back to obj.__str__()")
 
         for obj in (123, 123.45, 123):
-            self.assertEqual(unicode(obj), unicode(str(obj)))
+            self.assertEqual(str(obj), str(str(obj)))
 
         # unicode(obj, encoding, error) tests (this maps to
         # PyUnicode_FromEncodedObject() at C level)
@@ -492,71 +492,71 @@
         if not sys.platform.startswith('java'):
             self.assertRaises(
                 TypeError,
-                unicode,
-                u'decoding unicode is not supported',
+                str,
+                'decoding unicode is not supported',
                 'utf-8',
                 'strict'
             )
 
         self.assertEqual(
-            unicode('strings are decoded to unicode', 'utf-8', 'strict'),
-            u'strings are decoded to unicode'
+            str('strings are decoded to unicode', 'utf-8', 'strict'),
+            'strings are decoded to unicode'
         )
 
         if not sys.platform.startswith('java'):
             self.assertEqual(
-                unicode(
+                str(
                     buffer('character buffers are decoded to unicode'),
                     'utf-8',
                     'strict'
                 ),
-                u'character buffers are decoded to unicode'
+                'character buffers are decoded to unicode'
             )
 
-        self.assertRaises(TypeError, unicode, 42, 42, 42)
+        self.assertRaises(TypeError, str, 42, 42, 42)
 
     def test_codecs_utf7(self):
         utfTests = [
-            (u'A\u2262\u0391.', 'A+ImIDkQ.'),             # RFC2152 example
-            (u'Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
-            (u'\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
-            (u'Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
-            (u'+', '+-'),
-            (u'+-', '+--'),
-            (u'+?', '+-?'),
-            (u'\?', '+AFw?'),
-            (u'+?', '+-?'),
-            (ur'\\?', '+AFwAXA?'),
-            (ur'\\\?', '+AFwAXABc?'),
-            (ur'++--', '+-+---')
+            ('A\u2262\u0391.', 'A+ImIDkQ.'),             # RFC2152 example
+            ('Hi Mom -\u263a-!', 'Hi Mom -+Jjo--!'),     # RFC2152 example
+            ('\u65E5\u672C\u8A9E', '+ZeVnLIqe-'),        # RFC2152 example
+            ('Item 3 is \u00a31.', 'Item 3 is +AKM-1.'), # RFC2152 example
+            ('+', '+-'),
+            ('+-', '+--'),
+            ('+?', '+-?'),
+            ('\?', '+AFw?'),
+            ('+?', '+-?'),
+            (r'\\?', '+AFwAXA?'),
+            (r'\\\?', '+AFwAXABc?'),
+            (r'++--', '+-+---')
         ]
 
         for (x, y) in utfTests:
             self.assertEqual(x.encode('utf-7'), y)
 
         # surrogates not supported
-        self.assertRaises(UnicodeError, unicode, '+3ADYAA-', 'utf-7')
+        self.assertRaises(UnicodeError, str, '+3ADYAA-', 'utf-7')
 
-        self.assertEqual(unicode('+3ADYAA-', 'utf-7', 'replace'), u'\ufffd')
+        self.assertEqual(str('+3ADYAA-', 'utf-7', 'replace'), '\ufffd')
 
     def test_codecs_utf8(self):
-        self.assertEqual(u''.encode('utf-8'), '')
-        self.assertEqual(u'\u20ac'.encode('utf-8'), '\xe2\x82\xac')
-        self.assertEqual(u'\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
-        self.assertEqual(u'\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
-        self.assertEqual(u'\ud800'.encode('utf-8'), '\xed\xa0\x80')
-        self.assertEqual(u'\udc00'.encode('utf-8'), '\xed\xb0\x80')
+        self.assertEqual(''.encode('utf-8'), '')
+        self.assertEqual('\u20ac'.encode('utf-8'), '\xe2\x82\xac')
+        self.assertEqual('\ud800\udc02'.encode('utf-8'), '\xf0\x90\x80\x82')
+        self.assertEqual('\ud84d\udc56'.encode('utf-8'), '\xf0\xa3\x91\x96')
+        self.assertEqual('\ud800'.encode('utf-8'), '\xed\xa0\x80')
+        self.assertEqual('\udc00'.encode('utf-8'), '\xed\xb0\x80')
         self.assertEqual(
-            (u'\ud800\udc02'*1000).encode('utf-8'),
+            ('\ud800\udc02'*1000).encode('utf-8'),
             '\xf0\x90\x80\x82'*1000
         )
         self.assertEqual(
-            u'\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
-            u'\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
-            u'\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
-            u'\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
-            u'\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
-            u' Nunstuck git und'.encode('utf-8'),
+            '\u6b63\u78ba\u306b\u8a00\u3046\u3068\u7ffb\u8a33\u306f'
+            '\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3002\u4e00'
+            '\u90e8\u306f\u30c9\u30a4\u30c4\u8a9e\u3067\u3059\u304c'
+            '\u3001\u3042\u3068\u306f\u3067\u305f\u3089\u3081\u3067'
+            '\u3059\u3002\u5b9f\u969b\u306b\u306f\u300cWenn ist das'
+            ' Nunstuck git und'.encode('utf-8'),
             '\xe6\xad\xa3\xe7\xa2\xba\xe3\x81\xab\xe8\xa8\x80\xe3\x81'
             '\x86\xe3\x81\xa8\xe7\xbf\xbb\xe8\xa8\xb3\xe3\x81\xaf\xe3'
             '\x81\x95\xe3\x82\x8c\xe3\x81\xa6\xe3\x81\x84\xe3\x81\xbe'
@@ -570,9 +570,9 @@
         )
 
         # UTF-8 specific decoding tests
-        self.assertEqual(unicode('\xf0\xa3\x91\x96', 'utf-8'), u'\U00023456' )
-        self.assertEqual(unicode('\xf0\x90\x80\x82', 'utf-8'), u'\U00010002' )
-        self.assertEqual(unicode('\xe2\x82\xac', 'utf-8'), u'\u20ac' )
+        self.assertEqual(str('\xf0\xa3\x91\x96', 'utf-8'), '\U00023456' )
+        self.assertEqual(str('\xf0\x90\x80\x82', 'utf-8'), '\U00010002' )
+        self.assertEqual(str('\xe2\x82\xac', 'utf-8'), '\u20ac' )
 
         # Other possible utf-8 test cases:
         # * strict decoding testing for all of the
@@ -580,55 +580,55 @@
 
     def test_codecs_idna(self):
         # Test whether trailing dot is preserved
-        self.assertEqual(u"www.python.org.".encode("idna"), "www.python.org.")
+        self.assertEqual("www.python.org.".encode("idna"), "www.python.org.")
 
     def test_codecs_errors(self):
         # Error handling (encoding)
-        self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii')
-        self.assertRaises(UnicodeError, u'Andr\202 x'.encode, 'ascii','strict')
-        self.assertEqual(u'Andr\202 x'.encode('ascii','ignore'), "Andr x")
-        self.assertEqual(u'Andr\202 x'.encode('ascii','replace'), "Andr? x")
+        self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii')
+        self.assertRaises(UnicodeError, 'Andr\202 x'.encode, 'ascii','strict')
+        self.assertEqual('Andr\202 x'.encode('ascii','ignore'), "Andr x")
+        self.assertEqual('Andr\202 x'.encode('ascii','replace'), "Andr? x")
 
         # Error handling (decoding)
-        self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii')
-        self.assertRaises(UnicodeError, unicode, 'Andr\202 x', 'ascii','strict')
-        self.assertEqual(unicode('Andr\202 x','ascii','ignore'), u"Andr x")
-        self.assertEqual(unicode('Andr\202 x','ascii','replace'), u'Andr\uFFFD x')
+        self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii')
+        self.assertRaises(UnicodeError, str, 'Andr\202 x', 'ascii','strict')
+        self.assertEqual(str('Andr\202 x','ascii','ignore'), "Andr x")
+        self.assertEqual(str('Andr\202 x','ascii','replace'), 'Andr\uFFFD x')
 
         # Error handling (unknown character names)
-        self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), u"xx")
+        self.assertEqual("\\N{foo}xx".decode("unicode-escape", "ignore"), "xx")
 
         # Error handling (truncated escape sequence)
         self.assertRaises(UnicodeError, "\\".decode, "unicode-escape")
 
         self.assertRaises(TypeError, "hello".decode, "test.unicode1")
-        self.assertRaises(TypeError, unicode, "hello", "test.unicode2")
-        self.assertRaises(TypeError, u"hello".encode, "test.unicode1")
-        self.assertRaises(TypeError, u"hello".encode, "test.unicode2")
+        self.assertRaises(TypeError, str, "hello", "test.unicode2")
+        self.assertRaises(TypeError, "hello".encode, "test.unicode1")
+        self.assertRaises(TypeError, "hello".encode, "test.unicode2")
         # executes PyUnicode_Encode()
         import imp
         self.assertRaises(
             ImportError,
             imp.find_module,
             "non-existing module",
-            [u"non-existing dir"]
+            ["non-existing dir"]
         )
 
         # Error handling (wrong arguments)
-        self.assertRaises(TypeError, u"hello".encode, 42, 42, 42)
+        self.assertRaises(TypeError, "hello".encode, 42, 42, 42)
 
         # Error handling (PyUnicode_EncodeDecimal())
-        self.assertRaises(UnicodeError, int, u"\u0200")
+        self.assertRaises(UnicodeError, int, "\u0200")
 
     def test_codecs(self):
         # Encoding
-        self.assertEqual(u'hello'.encode('ascii'), 'hello')
-        self.assertEqual(u'hello'.encode('utf-7'), 'hello')
-        self.assertEqual(u'hello'.encode('utf-8'), 'hello')
-        self.assertEqual(u'hello'.encode('utf8'), 'hello')
-        self.assertEqual(u'hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
-        self.assertEqual(u'hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
-        self.assertEqual(u'hello'.encode('latin-1'), 'hello')
+        self.assertEqual('hello'.encode('ascii'), 'hello')
+        self.assertEqual('hello'.encode('utf-7'), 'hello')
+        self.assertEqual('hello'.encode('utf-8'), 'hello')
+        self.assertEqual('hello'.encode('utf8'), 'hello')
+        self.assertEqual('hello'.encode('utf-16-le'), 'h\000e\000l\000l\000o\000')
+        self.assertEqual('hello'.encode('utf-16-be'), '\000h\000e\000l\000l\000o')
+        self.assertEqual('hello'.encode('latin-1'), 'hello')
 
         # Roundtrip safety for BMP (just the first 1024 chars)
         for c in xrange(1024):
@@ -636,34 +636,34 @@
             for encoding in ('utf-7', 'utf-8', 'utf-16', 'utf-16-le',
                              'utf-16-be', 'raw_unicode_escape',
                              'unicode_escape', 'unicode_internal'):
-                self.assertEqual(unicode(u.encode(encoding),encoding), u)
+                self.assertEqual(str(u.encode(encoding),encoding), u)
 
         # Roundtrip safety for BMP (just the first 256 chars)
         for c in xrange(256):
             u = unichr(c)
             for encoding in ('latin-1',):
-                self.assertEqual(unicode(u.encode(encoding),encoding), u)
+                self.assertEqual(str(u.encode(encoding),encoding), u)
 
         # Roundtrip safety for BMP (just the first 128 chars)
         for c in xrange(128):
             u = unichr(c)
             for encoding in ('ascii',):
-                self.assertEqual(unicode(u.encode(encoding),encoding), u)
+                self.assertEqual(str(u.encode(encoding),encoding), u)
 
         # Roundtrip safety for non-BMP (just a few chars)
-        u = u'\U00010001\U00020002\U00030003\U00040004\U00050005'
+        u = '\U00010001\U00020002\U00030003\U00040004\U00050005'
         for encoding in ('utf-8', 'utf-16', 'utf-16-le', 'utf-16-be',
                          #'raw_unicode_escape',
                          'unicode_escape', 'unicode_internal'):
-            self.assertEqual(unicode(u.encode(encoding),encoding), u)
+            self.assertEqual(str(u.encode(encoding),encoding), u)
 
         # UTF-8 must be roundtrip safe for all UCS-2 code points
         # This excludes surrogates: in the full range, there would be
         # a surrogate pair (\udbff\udc00), which gets converted back
         # to a non-BMP character (\U0010fc00)
-        u = u''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
+        u = ''.join(map(unichr, range(0,0xd800)+range(0xe000,0x10000)))
         for encoding in ('utf-8',):
-            self.assertEqual(unicode(u.encode(encoding),encoding), u)
+            self.assertEqual(str(u.encode(encoding),encoding), u)
 
     def test_codecs_charmap(self):
         # 0-127
@@ -692,7 +692,7 @@
             #'cp875'
 
             ):
-            self.assertEqual(unicode(s, encoding).encode(encoding), s)
+            self.assertEqual(str(s, encoding).encode(encoding), s)
 
         # 128-255
         s = ''.join(map(chr, xrange(128, 256)))
@@ -717,14 +717,14 @@
             #'cp1006', 'cp875', 'iso8859_8',
 
             ):
-            self.assertEqual(unicode(s, encoding).encode(encoding), s)
+            self.assertEqual(str(s, encoding).encode(encoding), s)
 
     def test_concatenation(self):
-        self.assertEqual((u"abc" u"def"), u"abcdef")
-        self.assertEqual(("abc" u"def"), u"abcdef")
-        self.assertEqual((u"abc" "def"), u"abcdef")
-        self.assertEqual((u"abc" u"def" "ghi"), u"abcdefghi")
-        self.assertEqual(("abc" "def" u"ghi"), u"abcdefghi")
+        self.assertEqual(("abc" "def"), "abcdef")
+        self.assertEqual(("abc" "def"), "abcdef")
+        self.assertEqual(("abc" "def"), "abcdef")
+        self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
+        self.assertEqual(("abc" "def" "ghi"), "abcdefghi")
 
     def test_printing(self):
         class BitBucket:
@@ -732,20 +732,20 @@
                 pass
 
         out = BitBucket()
-        print(u'abc', file=out)
-        print(u'abc', u'def', file=out)
-        print(u'abc', 'def', file=out)
-        print('abc', u'def', file=out)
-        print(u'abc\n', file=out)
-        print(u'abc\n', end=' ', file=out)
-        print(u'abc\n', end=' ', file=out)
-        print(u'def\n', file=out)
-        print(u'def\n', file=out)
+        print('abc', file=out)
+        print('abc', 'def', file=out)
+        print('abc', 'def', file=out)
+        print('abc', 'def', file=out)
+        print('abc\n', file=out)
+        print('abc\n', end=' ', file=out)
+        print('abc\n', end=' ', file=out)
+        print('def\n', file=out)
+        print('def\n', file=out)
 
     def test_ucs4(self):
         if sys.maxunicode == 0xFFFF:
             return
-        x = u'\U00100000'
+        x = '\U00100000'
         y = x.encode("raw-unicode-escape").decode("raw-unicode-escape")
         self.assertEqual(x, y)
 
@@ -757,11 +757,11 @@
 
         class Foo1:
             def __unicode__(self):
-                return u"foo"
+                return "foo"
 
         class Foo2(object):
             def __unicode__(self):
-                return u"foo"
+                return "foo"
 
         class Foo3(object):
             def __unicode__(self):
@@ -771,7 +771,7 @@
             def __unicode__(self):
                 return "foo"
 
-        class Foo5(unicode):
+        class Foo5(str):
             def __unicode__(self):
                 return "foo"
 
@@ -780,37 +780,37 @@
                 return "foos"
 
             def __unicode__(self):
-                return u"foou"
+                return "foou"
 
-        class Foo7(unicode):
+        class Foo7(str):
             def __str__(self):
                 return "foos"
             def __unicode__(self):
-                return u"foou"
+                return "foou"
 
-        class Foo8(unicode):
+        class Foo8(str):
             def __new__(cls, content=""):
-                return unicode.__new__(cls, 2*content)
+                return str.__new__(cls, 2*content)
             def __unicode__(self):
                 return self
 
-        class Foo9(unicode):
+        class Foo9(str):
             def __str__(self):
                 return "string"
             def __unicode__(self):
                 return "not unicode"
 
-        self.assertEqual(unicode(Foo0()), u"foo")
-        self.assertEqual(unicode(Foo1()), u"foo")
-        self.assertEqual(unicode(Foo2()), u"foo")
-        self.assertEqual(unicode(Foo3()), u"foo")
-        self.assertEqual(unicode(Foo4("bar")), u"foo")
-        self.assertEqual(unicode(Foo5("bar")), u"foo")
-        self.assertEqual(unicode(Foo6("bar")), u"foou")
-        self.assertEqual(unicode(Foo7("bar")), u"foou")
-        self.assertEqual(unicode(Foo8("foo")), u"foofoo")
+        self.assertEqual(str(Foo0()), "foo")
+        self.assertEqual(str(Foo1()), "foo")
+        self.assertEqual(str(Foo2()), "foo")
+        self.assertEqual(str(Foo3()), "foo")
+        self.assertEqual(str(Foo4("bar")), "foo")
+        self.assertEqual(str(Foo5("bar")), "foo")
+        self.assertEqual(str(Foo6("bar")), "foou")
+        self.assertEqual(str(Foo7("bar")), "foou")
+        self.assertEqual(str(Foo8("foo")), "foofoo")
         self.assertEqual(str(Foo9("foo")), "string")
-        self.assertEqual(unicode(Foo9("foo")), u"not unicode")
+        self.assertEqual(str(Foo9("foo")), "not unicode")
 
     def test_unicode_repr(self):
         class s1:
@@ -819,7 +819,7 @@
 
         class s2:
             def __repr__(self):
-                return u'\\n'
+                return '\\n'
 
         self.assertEqual(repr(s1()), '\\n')
         self.assertEqual(repr(s2()), '\\n')

Modified: python/branches/py3k-struni/Lib/test/test_unicode_file.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_unicode_file.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_unicode_file.py	Wed May  2 21:09:54 2007
@@ -20,7 +20,7 @@
     # encoding instead.
     import sys
     try:
-        TESTFN_UNICODE = unicode("@test-\xe0\xf2", sys.getfilesystemencoding())
+        TESTFN_UNICODE = str("@test-\xe0\xf2", sys.getfilesystemencoding())
         TESTFN_ENCODED = TESTFN_UNICODE.encode(TESTFN_ENCODING)
         if '?' in TESTFN_ENCODED:
             # MBCS will not report the error properly

Modified: python/branches/py3k-struni/Lib/test/test_unicodedata.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_unicodedata.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_unicodedata.py	Wed May  2 21:09:54 2007
@@ -24,26 +24,26 @@
             char = unichr(i)
             data = [
                 # Predicates (single char)
-                u"01"[char.isalnum()],
-                u"01"[char.isalpha()],
-                u"01"[char.isdecimal()],
-                u"01"[char.isdigit()],
-                u"01"[char.islower()],
-                u"01"[char.isnumeric()],
-                u"01"[char.isspace()],
-                u"01"[char.istitle()],
-                u"01"[char.isupper()],
+                "01"[char.isalnum()],
+                "01"[char.isalpha()],
+                "01"[char.isdecimal()],
+                "01"[char.isdigit()],
+                "01"[char.islower()],
+                "01"[char.isnumeric()],
+                "01"[char.isspace()],
+                "01"[char.istitle()],
+                "01"[char.isupper()],
 
                 # Predicates (multiple chars)
-                u"01"[(char + u'abc').isalnum()],
-                u"01"[(char + u'abc').isalpha()],
-                u"01"[(char + u'123').isdecimal()],
-                u"01"[(char + u'123').isdigit()],
-                u"01"[(char + u'abc').islower()],
-                u"01"[(char + u'123').isnumeric()],
-                u"01"[(char + u' \t').isspace()],
-                u"01"[(char + u'abc').istitle()],
-                u"01"[(char + u'ABC').isupper()],
+                "01"[(char + 'abc').isalnum()],
+                "01"[(char + 'abc').isalpha()],
+                "01"[(char + '123').isdecimal()],
+                "01"[(char + '123').isdigit()],
+                "01"[(char + 'abc').islower()],
+                "01"[(char + '123').isnumeric()],
+                "01"[(char + ' \t').isspace()],
+                "01"[(char + 'abc').istitle()],
+                "01"[(char + 'ABC').isupper()],
 
                 # Mappings (single char)
                 char.lower(),
@@ -51,13 +51,13 @@
                 char.title(),
 
                 # Mappings (multiple chars)
-                (char + u'abc').lower(),
-                (char + u'ABC').upper(),
-                (char + u'abc').title(),
-                (char + u'ABC').title(),
+                (char + 'abc').lower(),
+                (char + 'ABC').upper(),
+                (char + 'abc').title(),
+                (char + 'ABC').title(),
 
                 ]
-            h.update(u''.join(data).encode(encoding))
+            h.update(''.join(data).encode(encoding))
         result = h.hexdigest()
         self.assertEqual(result, self.expectedchecksum)
 
@@ -99,92 +99,92 @@
         self.assertEqual(result, self.expectedchecksum)
 
     def test_digit(self):
-        self.assertEqual(self.db.digit(u'A', None), None)
-        self.assertEqual(self.db.digit(u'9'), 9)
-        self.assertEqual(self.db.digit(u'\u215b', None), None)
-        self.assertEqual(self.db.digit(u'\u2468'), 9)
+        self.assertEqual(self.db.digit('A', None), None)
+        self.assertEqual(self.db.digit('9'), 9)
+        self.assertEqual(self.db.digit('\u215b', None), None)
+        self.assertEqual(self.db.digit('\u2468'), 9)
 
         self.assertRaises(TypeError, self.db.digit)
-        self.assertRaises(TypeError, self.db.digit, u'xx')
-        self.assertRaises(ValueError, self.db.digit, u'x')
+        self.assertRaises(TypeError, self.db.digit, 'xx')
+        self.assertRaises(ValueError, self.db.digit, 'x')
 
     def test_numeric(self):
-        self.assertEqual(self.db.numeric(u'A',None), None)
-        self.assertEqual(self.db.numeric(u'9'), 9)
-        self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
-        self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
+        self.assertEqual(self.db.numeric('A',None), None)
+        self.assertEqual(self.db.numeric('9'), 9)
+        self.assertEqual(self.db.numeric('\u215b'), 0.125)
+        self.assertEqual(self.db.numeric('\u2468'), 9.0)
 
         self.assertRaises(TypeError, self.db.numeric)
-        self.assertRaises(TypeError, self.db.numeric, u'xx')
-        self.assertRaises(ValueError, self.db.numeric, u'x')
+        self.assertRaises(TypeError, self.db.numeric, 'xx')
+        self.assertRaises(ValueError, self.db.numeric, 'x')
 
     def test_decimal(self):
-        self.assertEqual(self.db.decimal(u'A',None), None)
-        self.assertEqual(self.db.decimal(u'9'), 9)
-        self.assertEqual(self.db.decimal(u'\u215b', None), None)
-        self.assertEqual(self.db.decimal(u'\u2468', None), None)
+        self.assertEqual(self.db.decimal('A',None), None)
+        self.assertEqual(self.db.decimal('9'), 9)
+        self.assertEqual(self.db.decimal('\u215b', None), None)
+        self.assertEqual(self.db.decimal('\u2468', None), None)
 
         self.assertRaises(TypeError, self.db.decimal)
-        self.assertRaises(TypeError, self.db.decimal, u'xx')
-        self.assertRaises(ValueError, self.db.decimal, u'x')
+        self.assertRaises(TypeError, self.db.decimal, 'xx')
+        self.assertRaises(ValueError, self.db.decimal, 'x')
 
     def test_category(self):
-        self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
-        self.assertEqual(self.db.category(u'a'), 'Ll')
-        self.assertEqual(self.db.category(u'A'), 'Lu')
+        self.assertEqual(self.db.category('\uFFFE'), 'Cn')
+        self.assertEqual(self.db.category('a'), 'Ll')
+        self.assertEqual(self.db.category('A'), 'Lu')
 
         self.assertRaises(TypeError, self.db.category)
-        self.assertRaises(TypeError, self.db.category, u'xx')
+        self.assertRaises(TypeError, self.db.category, 'xx')
 
     def test_bidirectional(self):
-        self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
-        self.assertEqual(self.db.bidirectional(u' '), 'WS')
-        self.assertEqual(self.db.bidirectional(u'A'), 'L')
+        self.assertEqual(self.db.bidirectional('\uFFFE'), '')
+        self.assertEqual(self.db.bidirectional(' '), 'WS')
+        self.assertEqual(self.db.bidirectional('A'), 'L')
 
         self.assertRaises(TypeError, self.db.bidirectional)
-        self.assertRaises(TypeError, self.db.bidirectional, u'xx')
+        self.assertRaises(TypeError, self.db.bidirectional, 'xx')
 
     def test_decomposition(self):
-        self.assertEqual(self.db.decomposition(u'\uFFFE'),'')
-        self.assertEqual(self.db.decomposition(u'\u00bc'), '<fraction> 0031 2044 0034')
+        self.assertEqual(self.db.decomposition('\uFFFE'),'')
+        self.assertEqual(self.db.decomposition('\u00bc'), '<fraction> 0031 2044 0034')
 
         self.assertRaises(TypeError, self.db.decomposition)
-        self.assertRaises(TypeError, self.db.decomposition, u'xx')
+        self.assertRaises(TypeError, self.db.decomposition, 'xx')
 
     def test_mirrored(self):
-        self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
-        self.assertEqual(self.db.mirrored(u'a'), 0)
-        self.assertEqual(self.db.mirrored(u'\u2201'), 1)
+        self.assertEqual(self.db.mirrored('\uFFFE'), 0)
+        self.assertEqual(self.db.mirrored('a'), 0)
+        self.assertEqual(self.db.mirrored('\u2201'), 1)
 
         self.assertRaises(TypeError, self.db.mirrored)
-        self.assertRaises(TypeError, self.db.mirrored, u'xx')
+        self.assertRaises(TypeError, self.db.mirrored, 'xx')
 
     def test_combining(self):
-        self.assertEqual(self.db.combining(u'\uFFFE'), 0)
-        self.assertEqual(self.db.combining(u'a'), 0)
-        self.assertEqual(self.db.combining(u'\u20e1'), 230)
+        self.assertEqual(self.db.combining('\uFFFE'), 0)
+        self.assertEqual(self.db.combining('a'), 0)
+        self.assertEqual(self.db.combining('\u20e1'), 230)
 
         self.assertRaises(TypeError, self.db.combining)
-        self.assertRaises(TypeError, self.db.combining, u'xx')
+        self.assertRaises(TypeError, self.db.combining, 'xx')
 
     def test_normalize(self):
         self.assertRaises(TypeError, self.db.normalize)
-        self.assertRaises(ValueError, self.db.normalize, 'unknown', u'xx')
-        self.assertEqual(self.db.normalize('NFKC', u''), u'')
+        self.assertRaises(ValueError, self.db.normalize, 'unknown', 'xx')
+        self.assertEqual(self.db.normalize('NFKC', ''), '')
         # The rest can be found in test_normalization.py
         # which requires an external file.
 
     def test_east_asian_width(self):
         eaw = self.db.east_asian_width
         self.assertRaises(TypeError, eaw, 'a')
-        self.assertRaises(TypeError, eaw, u'')
-        self.assertRaises(TypeError, eaw, u'ra')
-        self.assertEqual(eaw(u'\x1e'), 'N')
-        self.assertEqual(eaw(u'\x20'), 'Na')
-        self.assertEqual(eaw(u'\uC894'), 'W')
-        self.assertEqual(eaw(u'\uFF66'), 'H')
-        self.assertEqual(eaw(u'\uFF1F'), 'F')
-        self.assertEqual(eaw(u'\u2010'), 'A')
+        self.assertRaises(TypeError, eaw, '')
+        self.assertRaises(TypeError, eaw, 'ra')
+        self.assertEqual(eaw('\x1e'), 'N')
+        self.assertEqual(eaw('\x20'), 'Na')
+        self.assertEqual(eaw('\uC894'), 'W')
+        self.assertEqual(eaw('\uFF66'), 'H')
+        self.assertEqual(eaw('\uFF1F'), 'F')
+        self.assertEqual(eaw('\u2010'), 'A')
 
 class UnicodeMiscTest(UnicodeDatabaseTest):
 

Modified: python/branches/py3k-struni/Lib/test/test_urllib.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_urllib.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_urllib.py	Wed May  2 21:09:54 2007
@@ -425,8 +425,8 @@
                          "using unquote_plus(): %s != %s" % (expect, result))
 
     def test_unquote_with_unicode(self):
-        r = urllib.unquote(u'br%C3%BCckner_sapporo_20050930.doc')
-        self.assertEqual(r, u'br\xc3\xbcckner_sapporo_20050930.doc')
+        r = urllib.unquote('br%C3%BCckner_sapporo_20050930.doc')
+        self.assertEqual(r, 'br\xc3\xbcckner_sapporo_20050930.doc')
 
 class urlencode_Tests(unittest.TestCase):
     """Tests for urlencode()"""

Modified: python/branches/py3k-struni/Lib/test/test_winreg.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_winreg.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_winreg.py	Wed May  2 21:09:54 2007
@@ -19,10 +19,10 @@
 ]
 if have_unicode:
     test_data+=[
-    (unicode("Unicode Val"),  unicode("A Unicode value"),                      REG_SZ,),
-    ("UnicodeExpand", unicode("The path is %path%"),                   REG_EXPAND_SZ),
-    ("Multi-unicode", [unicode("Lots"), unicode("of"), unicode("unicode"), unicode("values")], REG_MULTI_SZ),
-    ("Multi-mixed",   [unicode("Unicode"), unicode("and"), "string", "values"],REG_MULTI_SZ),
+    (str("Unicode Val"),  str("A Unicode value"),                      REG_SZ,),
+    ("UnicodeExpand", str("The path is %path%"),                   REG_EXPAND_SZ),
+    ("Multi-unicode", [str("Lots"), str("of"), str("unicode"), str("values")], REG_MULTI_SZ),
+    ("Multi-mixed",   [str("Unicode"), str("and"), "string", "values"],REG_MULTI_SZ),
     ]
 
 def WriteTestData(root_key):

Modified: python/branches/py3k-struni/Lib/test/test_xmlrpc.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/test_xmlrpc.py	(original)
+++ python/branches/py3k-struni/Lib/test/test_xmlrpc.py	Wed May  2 21:09:54 2007
@@ -5,7 +5,7 @@
 from test import test_support
 
 try:
-    unicode
+    str
 except NameError:
     have_unicode = False
 else:
@@ -18,8 +18,8 @@
           'anotherlist': ['.zyx.41'],
           'abase64': xmlrpclib.Binary("my dog has fleas"),
           'boolean': xmlrpclib.False,
-          'unicode': u'\u4000\u6000\u8000',
-          u'ukey\u4000': 'regular value',
+          'unicode': '\u4000\u6000\u8000',
+          'ukey\u4000': 'regular value',
           'datetime1': xmlrpclib.DateTime('20050210T11:41:23'),
           'datetime2': xmlrpclib.DateTime(
                         (2005, 02, 10, 11, 41, 23, 0, 1, -1)),
@@ -147,11 +147,11 @@
 
         items = list(d.items())
         if have_unicode:
-            self.assertEquals(s, u"abc \x95")
-            self.assert_(isinstance(s, unicode))
-            self.assertEquals(items, [(u"def \x96", u"ghi \x97")])
-            self.assert_(isinstance(items[0][0], unicode))
-            self.assert_(isinstance(items[0][1], unicode))
+            self.assertEquals(s, "abc \x95")
+            self.assert_(isinstance(s, str))
+            self.assertEquals(items, [("def \x96", "ghi \x97")])
+            self.assert_(isinstance(items[0][0], str))
+            self.assert_(isinstance(items[0][1], str))
         else:
             self.assertEquals(s, "abc \xc2\x95")
             self.assertEquals(items, [("def \xc2\x96", "ghi \xc2\x97")])

Modified: python/branches/py3k-struni/Lib/test/testcodec.py
==============================================================================
--- python/branches/py3k-struni/Lib/test/testcodec.py	(original)
+++ python/branches/py3k-struni/Lib/test/testcodec.py	Wed May  2 21:09:54 2007
@@ -35,10 +35,10 @@
 
 decoding_map = codecs.make_identity_dict(range(256))
 decoding_map.update({
-        0x78: u"abc", # 1-n decoding mapping
+        0x78: "abc", # 1-n decoding mapping
         "abc": 0x0078,# 1-n encoding mapping
         0x01: None,   # decoding mapping to <undefined>
-        0x79: u"",    # decoding mapping to <remove character>
+        0x79: "",    # decoding mapping to <remove character>
 })
 
 ### Encoding Map

Modified: python/branches/py3k-struni/Lib/textwrap.py
==============================================================================
--- python/branches/py3k-struni/Lib/textwrap.py	(original)
+++ python/branches/py3k-struni/Lib/textwrap.py	Wed May  2 21:09:54 2007
@@ -70,7 +70,7 @@
     whitespace_trans = string.maketrans(_whitespace, ' ' * len(_whitespace))
 
     unicode_whitespace_trans = {}
-    uspace = ord(u' ')
+    uspace = ord(' ')
     for x in map(ord, _whitespace):
         unicode_whitespace_trans[x] = uspace
 
@@ -127,7 +127,7 @@
         if self.replace_whitespace:
             if isinstance(text, str):
                 text = text.translate(self.whitespace_trans)
-            elif isinstance(text, unicode):
+            elif isinstance(text, str):
                 text = text.translate(self.unicode_whitespace_trans)
         return text
 

Modified: python/branches/py3k-struni/Lib/types.py
==============================================================================
--- python/branches/py3k-struni/Lib/types.py	(original)
+++ python/branches/py3k-struni/Lib/types.py	Wed May  2 21:09:54 2007
@@ -28,7 +28,7 @@
 # types.StringTypes", you should use "isinstance(x, basestring)".  But
 # we keep around for compatibility with Python 2.2.
 try:
-    UnicodeType = unicode
+    UnicodeType = str
     StringTypes = (StringType, UnicodeType)
 except NameError:
     StringTypes = (StringType,)

Modified: python/branches/py3k-struni/Lib/urllib.py
==============================================================================
--- python/branches/py3k-struni/Lib/urllib.py	(original)
+++ python/branches/py3k-struni/Lib/urllib.py	Wed May  2 21:09:54 2007
@@ -984,13 +984,13 @@
 # quote('abc def') -> 'abc%20def')
 
 try:
-    unicode
+    str
 except NameError:
     def _is_unicode(x):
         return 0
 else:
     def _is_unicode(x):
-        return isinstance(x, unicode)
+        return isinstance(x, str)
 
 def toBytes(url):
     """toBytes(u"URL") --> 'URL'."""

Modified: python/branches/py3k-struni/Lib/xml/dom/minicompat.py
==============================================================================
--- python/branches/py3k-struni/Lib/xml/dom/minicompat.py	(original)
+++ python/branches/py3k-struni/Lib/xml/dom/minicompat.py	Wed May  2 21:09:54 2007
@@ -41,11 +41,11 @@
 import xml.dom
 
 try:
-    unicode
+    str
 except NameError:
     StringTypes = type(''),
 else:
-    StringTypes = type(''), type(unicode(''))
+    StringTypes = type(''), type(str(''))
 
 
 class NodeList(list):

Modified: python/branches/py3k-struni/Lib/xmlrpclib.py
==============================================================================
--- python/branches/py3k-struni/Lib/xmlrpclib.py	(original)
+++ python/branches/py3k-struni/Lib/xmlrpclib.py	Wed May  2 21:09:54 2007
@@ -144,9 +144,9 @@
 # Internal stuff
 
 try:
-    unicode
+    str
 except NameError:
-    unicode = None # unicode support not available
+    str = None # unicode support not available
 
 try:
     import datetime
@@ -160,8 +160,8 @@
 
 def _decode(data, encoding, is8bit=re.compile("[\x80-\xff]").search):
     # decode non-ascii string (if possible)
-    if unicode and encoding and is8bit(data):
-        data = unicode(data, encoding)
+    if str and encoding and is8bit(data):
+        data = str(data, encoding)
     return data
 
 def escape(s):
@@ -169,7 +169,7 @@
     s = s.replace("<", "&lt;")
     return s.replace(">", "&gt;",)
 
-if unicode:
+if str:
     def _stringify(string):
         # convert to 7-bit ascii if possible
         try:
@@ -632,7 +632,7 @@
         write("</string></value>\n")
     dispatch[StringType] = dump_string
 
-    if unicode:
+    if str:
         def dump_unicode(self, value, write, escape=escape):
             value = value.encode(self.encoding)
             write("<value><string>")
@@ -664,7 +664,7 @@
         for k, v in value.items():
             write("<member>\n")
             if type(k) is not StringType:
-                if unicode and type(k) is UnicodeType:
+                if str and type(k) is UnicodeType:
                     k = k.encode(self.encoding)
                 else:
                     raise TypeError, "dictionary key must be string"


More information about the Python-checkins mailing list