[pypy-svn] r48591 - in pypy/branch/more-unicode-improvements/pypy/module/_codecs: . test

cfbolz at codespeak.net cfbolz at codespeak.net
Mon Nov 12 03:25:44 CET 2007


Author: cfbolz
Date: Mon Nov 12 03:25:43 2007
New Revision: 48591

Added:
   pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py   (contents, props changed)
Modified:
   pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py
   pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
   pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py
Log:
start moving some bits of the _codecs module to interplevel


Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py	Mon Nov 12 03:25:43 2007
@@ -13,15 +13,11 @@
          'escape_encode' :  'app_codecs.escape_encode',
          'latin_1_decode' :  'app_codecs.latin_1_decode',
          'latin_1_encode' :  'app_codecs.latin_1_encode',
-         'lookup' :  'app_codecs.lookup',
-         'lookup_error' :  'app_codecs.lookup_error',
          'mbcs_decode' :  'app_codecs.mbcs_decode',
          'mbcs_encode' :  'app_codecs.mbcs_encode',
          'raw_unicode_escape_decode' :  'app_codecs.raw_unicode_escape_decode',
          'raw_unicode_escape_encode' :  'app_codecs.raw_unicode_escape_encode',
          'readbuffer_encode' :  'app_codecs.readbuffer_encode',
-         'register' :  'app_codecs.register',
-         'register_error' :  'app_codecs.register_error',
          'unicode_escape_decode' :  'app_codecs.unicode_escape_decode',
          'unicode_escape_encode' :  'app_codecs.unicode_escape_encode',
          'unicode_internal_decode' :  'app_codecs.unicode_internal_decode',
@@ -37,8 +33,21 @@
          'utf_7_encode' :  'app_codecs.utf_7_encode',
          'utf_8_decode' :  'app_codecs.utf_8_decode',
          'utf_8_encode' :  'app_codecs.utf_8_encode',
-         'encode': 'app_codecs.encode',
-         'decode': 'app_codecs.decode'
+         '_register_existing_errors': 'app_codecs._register_existing_errors',
     }
     interpleveldefs = {
+         'encode':         'interp_codecs.encode',
+         'decode':         'interp_codecs.decode',
+         'lookup':         'interp_codecs.lookup_codec',
+         'lookup_error':   'interp_codecs.lookup_error',
+         'register':       'interp_codecs.register_codec',
+         'register_error': 'interp_codecs.register_error',
     }
+
+    def setup_after_space_initialization(self):
+        "NOT_RPYTHON"
+        self.space.appexec([], """():
+            import _codecs
+            _codecs._register_existing_errors()
+            del _codecs._register_existing_errors
+        """)

Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py	Mon Nov 12 03:25:43 2007
@@ -41,100 +41,6 @@
 #from unicodecodec import *
 
 import sys
-#/* --- Registry ----------------------------------------------------------- */
-codec_search_path = []
-codec_search_cache = {}
-codec_error_registry = {}
-codec_need_encodings = [True]
-
-def codec_register( search_function ):
-    """register(search_function)
-    
-    Register a codec search function. Search functions are expected to take
-    one argument, the encoding name in all lower case letters, and return
-    a tuple of functions (encoder, decoder, stream_reader, stream_writer).
-    """
-
-    if callable(search_function):
-        codec_search_path.append(search_function)
-
-register = codec_register
-
-def codec_lookup(encoding):
-    """lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
-    Looks up a codec tuple in the Python codec registry and returns
-    a tuple of functions.
-    """
-    if not isinstance(encoding, str):
-        raise TypeError("Encoding must be a string")
-    normalized_encoding = encoding.replace(" ", "-").lower()    
-    result = codec_search_cache.get(normalized_encoding, None)
-    if not result:
-        if codec_need_encodings:
-            import encodings
-            if len(codec_search_path) == 0:
-                raise LookupError("no codec search functions registered: can't find encoding")
-            del codec_need_encodings[:]
-        for search in codec_search_path:
-            result = search(normalized_encoding)
-            if result:
-                if not (type(result) == tuple and len(result) == 4):
-                    raise TypeError("codec search functions must return 4-tuples")
-                else:
-                    codec_search_cache[normalized_encoding] = result 
-                    return result
-        if not result:
-            raise LookupError("unknown encoding: %s" % encoding)
-    return result
-    
-
-lookup = codec_lookup
-
-def encode(v, encoding=None, errors='strict'):
-    """encode(obj, [encoding[,errors]]) -> object
-    
-    Encodes obj using the codec registered for encoding. encoding defaults
-    to the default encoding. errors may be given to set a different error
-    handling scheme. Default is 'strict' meaning that encoding errors raise
-    a ValueError. Other possible values are 'ignore', 'replace' and
-    'xmlcharrefreplace' as well as any other name registered with
-    codecs.register_error that can handle ValueErrors.
-    """
-    if encoding == None:
-        encoding = sys.getdefaultencoding()
-    if isinstance(encoding, str):
-        encoder = lookup(encoding)[0]
-        if encoder and isinstance(errors, str):
-            res = encoder(v, errors)
-            return res[0]
-        else:
-            raise TypeError("Errors must be a string")
-    else:
-        raise TypeError("Encoding must be a string")
-
-def decode(obj, encoding=None, errors='strict'):
-    """decode(obj, [encoding[,errors]]) -> object
-
-    Decodes obj using the codec registered for encoding. encoding defaults
-    to the default encoding. errors may be given to set a different error
-    handling scheme. Default is 'strict' meaning that encoding errors raise
-    a ValueError. Other possible values are 'ignore' and 'replace'
-    as well as any other name registerd with codecs.register_error that is
-    able to handle ValueErrors.
-    """
-    if encoding == None:
-        encoding = sys.getdefaultencoding()
-    if isinstance(encoding, str):
-        decoder = lookup(encoding)[1]
-        if decoder and isinstance(errors, str):
-            res = decoder(obj, errors)
-            if not isinstance(res, tuple) or len(res) != 2:
-                raise TypeError("encoder must return a tuple (object, integer)")
-            return res[0]
-        else:
-            raise TypeError("Errors must be a string")
-    else:
-        raise TypeError("Encoding must be a string")
 
 def latin_1_encode( obj, errors='strict'):
     """None
@@ -500,6 +406,14 @@
         raise TypeError("don't know how to handle %.400s in error callback"%type(exc))
 
 
+def _register_existing_errors():
+    import _codecs
+    _codecs.register_error("strict", strict_errors)
+    _codecs.register_error("ignore", ignore_errors)
+    _codecs.register_error("replace", replace_errors)
+    _codecs.register_error("xmlcharrefreplace", xmlcharrefreplace_errors)
+    _codecs.register_error("backslashreplace", backslashreplace_errors)
+
 #  ----------------------------------------------------------------------
 
 ##import sys
@@ -531,38 +445,7 @@
 unicode_latin1 = [None]*256
 
 
-def lookup_error(errors):
-    """lookup_error(errors) -> handler
-
-    Return the error handler for the specified error handling name
-    or raise a LookupError, if no handler exists under this name.
-    """
-    
-    try:
-        err_handler = codec_error_registry[errors]
-    except KeyError:
-        raise LookupError("unknown error handler name %s"%errors)
-    return err_handler
-
-def register_error(errors, handler):
-    """register_error(errors, handler)
-
-    Register the specified error handler under the name
-    errors. handler must be a callable object, that
-    will be called with an exception instance containing
-    information about the location of the encoding/decoding
-    error and must return a (replacement, new position) tuple.
-    """
-    if callable(handler):
-        codec_error_registry[errors] = handler
-    else:
-        raise TypeError("handler must be callable")
 
-register_error("strict", strict_errors)
-register_error("ignore", ignore_errors)
-register_error("replace", replace_errors)
-register_error("xmlcharrefreplace", xmlcharrefreplace_errors)
-register_error("backslashreplace", backslashreplace_errors)
     
 def SPECIAL(c, encodeO, encodeWS):
     c = ord(c)
@@ -1032,7 +915,8 @@
 def unicode_call_errorhandler(errors,  encoding, 
                 reason, input, startinpos, endinpos, decode=True):
     
-    errorHandler = lookup_error(errors)
+    import _codecs
+    errorHandler = _codecs.lookup_error(errors)
     if decode:
         exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason)
     else:

Added: pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py
==============================================================================
--- (empty file)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py	Mon Nov 12 03:25:43 2007
@@ -0,0 +1,147 @@
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import ObjSpace, NoneNotWrapped
+from pypy.interpreter.baseobjspace import W_Root
+
+class CodecState(object):
+    def __init__(self, space):
+        self.codec_search_path = []
+        self.codec_search_cache = {}
+        self.codec_error_registry = {}
+        self.codec_need_encodings = True
+
+def register_codec(space, w_search_function):
+    """register(search_function)
+    
+    Register a codec search function. Search functions are expected to take
+    one argument, the encoding name in all lower case letters, and return
+    a tuple of functions (encoder, decoder, stream_reader, stream_writer).
+    """
+    #import pdb; pdb.set_trace()
+    state = space.fromcache(CodecState)
+    if space.is_true(space.callable(w_search_function)):
+        state.codec_search_path.append(w_search_function)
+    else:
+        raise OperationError(
+            space.w_TypeError,
+            space.wrap("argument must be callable"))
+register_codec.unwrap_spec = [ObjSpace, W_Root]
+
+
+def lookup_codec(space, encoding):
+    """lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
+    Looks up a codec tuple in the Python codec registry and returns
+    a tuple of functions.
+    """
+    #import pdb; pdb.set_trace()
+    state = space.fromcache(CodecState)
+    normalized_encoding = encoding.replace(" ", "-").lower()    
+    w_result = state.codec_search_cache.get(normalized_encoding, None)
+    if w_result is not None:
+        return w_result
+    if state.codec_need_encodings:
+        w_import = space.getattr(space.builtin, space.wrap("__import__"))
+        # registers new codecs
+        space.call_function(w_import, space.wrap("encodings"))
+        state.codec_need_encodings = False
+        if len(state.codec_search_path) == 0:
+            raise OperationError(
+                space.w_LookupError,
+                space.wrap("no codec search functions registered: "
+                           "can't find encoding"))
+    for w_search in state.codec_search_path:
+        w_result = space.call_function(w_search,
+                                       space.wrap(normalized_encoding))
+        if not space.is_w(w_result, space.w_None):
+            if not (space.is_true(space.is_(space.type(w_result),
+                                            space.w_tuple)) and
+                    space.int_w(space.len(w_result)) == 4):
+                raise OperationError(
+                    space.w_TypeError,
+                    space.wrap("codec search functions must return 4-tuples"))
+            else:
+                state.codec_search_cache[normalized_encoding] = w_result 
+                return w_result
+    raise OperationError(
+        space.w_LookupError,
+        space.wrap("unknown encoding: %s" % encoding))
+lookup_codec.unwrap_spec = [ObjSpace, str]
+    
+
+def lookup_error(space, errors):
+    """lookup_error(errors) -> handler
+
+    Return the error handler for the specified error handling name
+    or raise a LookupError, if no handler exists under this name.
+    """
+    
+    state = space.fromcache(CodecState)
+    try:
+        w_err_handler = state.codec_error_registry[errors]
+    except KeyError:
+        raise OperationError(
+            space.w_LookupError,
+            space.wrap("unknown error handler name %s" % errors))
+    return w_err_handler
+lookup_error.unwrap_spec = [ObjSpace, str]
+
+
+def encode(space, w_obj, encoding=NoneNotWrapped, errors='strict'):
+    """encode(obj, [encoding[,errors]]) -> object
+    
+    Encodes obj using the codec registered for encoding. encoding defaults
+    to the default encoding. errors may be given to set a different error
+    handling scheme. Default is 'strict' meaning that encoding errors raise
+    a ValueError. Other possible values are 'ignore', 'replace' and
+    'xmlcharrefreplace' as well as any other name registered with
+    codecs.register_error that can handle ValueErrors.
+    """
+    if encoding is None:
+        encoding = space.sys.defaultencoding
+    w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
+    if space.is_true(w_encoder):
+        w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
+        return space.getitem(w_res, space.wrap(0))
+    else:
+        assert 0, "XXX, what to do here?"
+encode.unwrap_spec = [ObjSpace, W_Root, str, str]
+
+def decode(space, w_obj, encoding=NoneNotWrapped, errors='strict'):
+    """decode(obj, [encoding[,errors]]) -> object
+
+    Decodes obj using the codec registered for encoding. encoding defaults
+    to the default encoding. errors may be given to set a different error
+    handling scheme. Default is 'strict' meaning that encoding errors raise
+    a ValueError. Other possible values are 'ignore' and 'replace'
+    as well as any other name registerd with codecs.register_error that is
+    able to handle ValueErrors.
+    """
+    if encoding is None:
+        encoding = sys.getdefaultencoding()
+    w_decoder = space.getitem(lookup_codec(space, encoding), space.wrap(1))
+    if space.is_true(w_decoder):
+        w_res = space.call_function(w_decoder, w_obj, space.wrap(errors))
+        if (not space.is_true(space.isinstance(w_res, space.w_tuple))
+            or space.int_w(space.len(w_res)) != 2):
+            raise TypeError("encoder must return a tuple (object, integer)")
+        return space.getitem(w_res, space.wrap(0))
+    else:
+        assert 0, "XXX, what to do here?"
+decode.unwrap_spec = [ObjSpace, W_Root, str, str]
+
+def register_error(space, errors, w_handler):
+    """register_error(errors, handler)
+
+    Register the specified error handler under the name
+    errors. handler must be a callable object, that
+    will be called with an exception instance containing
+    information about the location of the encoding/decoding
+    error and must return a (replacement, new position) tuple.
+    """
+    state = space.fromcache(CodecState)
+    if space.is_true(space.callable(w_handler)):
+        state.codec_error_registry[errors] = w_handler
+    else:
+        raise OperationError(
+            space.w_TypeError,
+            space.wrap("handler must be callable"))
+register_error.unwrap_spec = [ObjSpace, str, W_Root]

Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py	(original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py	Mon Nov 12 03:25:43 2007
@@ -6,6 +6,10 @@
         space = gettestobjspace(usemodules=('unicodedata',))
         cls.space = space
 
+    def test_register_noncallable(self):
+        import _codecs
+        raises(TypeError, _codecs.register, 1)
+
     def test_bigU_codecs(self):
         import sys
         oldmaxunicode = sys.maxunicode



More information about the Pypy-commit mailing list