[pypy-svn] r48591 - in pypy/branch/more-unicode-improvements/pypy/module/_codecs: . test
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Nov 12 03:25:44 CET 2007
Author: cfbolz
Date: Mon Nov 12 03:25:43 2007
New Revision: 48591
Added:
pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py (contents, props changed)
Modified:
pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py
pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py
Log:
start moving some bits of the _codecs module to interplevel
Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py Mon Nov 12 03:25:43 2007
@@ -13,15 +13,11 @@
'escape_encode' : 'app_codecs.escape_encode',
'latin_1_decode' : 'app_codecs.latin_1_decode',
'latin_1_encode' : 'app_codecs.latin_1_encode',
- 'lookup' : 'app_codecs.lookup',
- 'lookup_error' : 'app_codecs.lookup_error',
'mbcs_decode' : 'app_codecs.mbcs_decode',
'mbcs_encode' : 'app_codecs.mbcs_encode',
'raw_unicode_escape_decode' : 'app_codecs.raw_unicode_escape_decode',
'raw_unicode_escape_encode' : 'app_codecs.raw_unicode_escape_encode',
'readbuffer_encode' : 'app_codecs.readbuffer_encode',
- 'register' : 'app_codecs.register',
- 'register_error' : 'app_codecs.register_error',
'unicode_escape_decode' : 'app_codecs.unicode_escape_decode',
'unicode_escape_encode' : 'app_codecs.unicode_escape_encode',
'unicode_internal_decode' : 'app_codecs.unicode_internal_decode',
@@ -37,8 +33,21 @@
'utf_7_encode' : 'app_codecs.utf_7_encode',
'utf_8_decode' : 'app_codecs.utf_8_decode',
'utf_8_encode' : 'app_codecs.utf_8_encode',
- 'encode': 'app_codecs.encode',
- 'decode': 'app_codecs.decode'
+ '_register_existing_errors': 'app_codecs._register_existing_errors',
}
interpleveldefs = {
+ 'encode': 'interp_codecs.encode',
+ 'decode': 'interp_codecs.decode',
+ 'lookup': 'interp_codecs.lookup_codec',
+ 'lookup_error': 'interp_codecs.lookup_error',
+ 'register': 'interp_codecs.register_codec',
+ 'register_error': 'interp_codecs.register_error',
}
+
+ def setup_after_space_initialization(self):
+ "NOT_RPYTHON"
+ self.space.appexec([], """():
+ import _codecs
+ _codecs._register_existing_errors()
+ del _codecs._register_existing_errors
+ """)
Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/app_codecs.py Mon Nov 12 03:25:43 2007
@@ -41,100 +41,6 @@
#from unicodecodec import *
import sys
-#/* --- Registry ----------------------------------------------------------- */
-codec_search_path = []
-codec_search_cache = {}
-codec_error_registry = {}
-codec_need_encodings = [True]
-
-def codec_register( search_function ):
- """register(search_function)
-
- Register a codec search function. Search functions are expected to take
- one argument, the encoding name in all lower case letters, and return
- a tuple of functions (encoder, decoder, stream_reader, stream_writer).
- """
-
- if callable(search_function):
- codec_search_path.append(search_function)
-
-register = codec_register
-
-def codec_lookup(encoding):
- """lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
- Looks up a codec tuple in the Python codec registry and returns
- a tuple of functions.
- """
- if not isinstance(encoding, str):
- raise TypeError("Encoding must be a string")
- normalized_encoding = encoding.replace(" ", "-").lower()
- result = codec_search_cache.get(normalized_encoding, None)
- if not result:
- if codec_need_encodings:
- import encodings
- if len(codec_search_path) == 0:
- raise LookupError("no codec search functions registered: can't find encoding")
- del codec_need_encodings[:]
- for search in codec_search_path:
- result = search(normalized_encoding)
- if result:
- if not (type(result) == tuple and len(result) == 4):
- raise TypeError("codec search functions must return 4-tuples")
- else:
- codec_search_cache[normalized_encoding] = result
- return result
- if not result:
- raise LookupError("unknown encoding: %s" % encoding)
- return result
-
-
-lookup = codec_lookup
-
-def encode(v, encoding=None, errors='strict'):
- """encode(obj, [encoding[,errors]]) -> object
-
- Encodes obj using the codec registered for encoding. encoding defaults
- to the default encoding. errors may be given to set a different error
- handling scheme. Default is 'strict' meaning that encoding errors raise
- a ValueError. Other possible values are 'ignore', 'replace' and
- 'xmlcharrefreplace' as well as any other name registered with
- codecs.register_error that can handle ValueErrors.
- """
- if encoding == None:
- encoding = sys.getdefaultencoding()
- if isinstance(encoding, str):
- encoder = lookup(encoding)[0]
- if encoder and isinstance(errors, str):
- res = encoder(v, errors)
- return res[0]
- else:
- raise TypeError("Errors must be a string")
- else:
- raise TypeError("Encoding must be a string")
-
-def decode(obj, encoding=None, errors='strict'):
- """decode(obj, [encoding[,errors]]) -> object
-
- Decodes obj using the codec registered for encoding. encoding defaults
- to the default encoding. errors may be given to set a different error
- handling scheme. Default is 'strict' meaning that encoding errors raise
- a ValueError. Other possible values are 'ignore' and 'replace'
- as well as any other name registerd with codecs.register_error that is
- able to handle ValueErrors.
- """
- if encoding == None:
- encoding = sys.getdefaultencoding()
- if isinstance(encoding, str):
- decoder = lookup(encoding)[1]
- if decoder and isinstance(errors, str):
- res = decoder(obj, errors)
- if not isinstance(res, tuple) or len(res) != 2:
- raise TypeError("encoder must return a tuple (object, integer)")
- return res[0]
- else:
- raise TypeError("Errors must be a string")
- else:
- raise TypeError("Encoding must be a string")
def latin_1_encode( obj, errors='strict'):
"""None
@@ -500,6 +406,14 @@
raise TypeError("don't know how to handle %.400s in error callback"%type(exc))
+def _register_existing_errors():
+ import _codecs
+ _codecs.register_error("strict", strict_errors)
+ _codecs.register_error("ignore", ignore_errors)
+ _codecs.register_error("replace", replace_errors)
+ _codecs.register_error("xmlcharrefreplace", xmlcharrefreplace_errors)
+ _codecs.register_error("backslashreplace", backslashreplace_errors)
+
# ----------------------------------------------------------------------
##import sys
@@ -531,38 +445,7 @@
unicode_latin1 = [None]*256
-def lookup_error(errors):
- """lookup_error(errors) -> handler
-
- Return the error handler for the specified error handling name
- or raise a LookupError, if no handler exists under this name.
- """
-
- try:
- err_handler = codec_error_registry[errors]
- except KeyError:
- raise LookupError("unknown error handler name %s"%errors)
- return err_handler
-
-def register_error(errors, handler):
- """register_error(errors, handler)
-
- Register the specified error handler under the name
- errors. handler must be a callable object, that
- will be called with an exception instance containing
- information about the location of the encoding/decoding
- error and must return a (replacement, new position) tuple.
- """
- if callable(handler):
- codec_error_registry[errors] = handler
- else:
- raise TypeError("handler must be callable")
-register_error("strict", strict_errors)
-register_error("ignore", ignore_errors)
-register_error("replace", replace_errors)
-register_error("xmlcharrefreplace", xmlcharrefreplace_errors)
-register_error("backslashreplace", backslashreplace_errors)
def SPECIAL(c, encodeO, encodeWS):
c = ord(c)
@@ -1032,7 +915,8 @@
def unicode_call_errorhandler(errors, encoding,
reason, input, startinpos, endinpos, decode=True):
- errorHandler = lookup_error(errors)
+ import _codecs
+ errorHandler = _codecs.lookup_error(errors)
if decode:
exceptionObject = UnicodeDecodeError(encoding, input, startinpos, endinpos, reason)
else:
Added: pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py
==============================================================================
--- (empty file)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py Mon Nov 12 03:25:43 2007
@@ -0,0 +1,147 @@
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import ObjSpace, NoneNotWrapped
+from pypy.interpreter.baseobjspace import W_Root
+
+class CodecState(object):
+ def __init__(self, space):
+ self.codec_search_path = []
+ self.codec_search_cache = {}
+ self.codec_error_registry = {}
+ self.codec_need_encodings = True
+
+def register_codec(space, w_search_function):
+ """register(search_function)
+
+ Register a codec search function. Search functions are expected to take
+ one argument, the encoding name in all lower case letters, and return
+ a tuple of functions (encoder, decoder, stream_reader, stream_writer).
+ """
+ #import pdb; pdb.set_trace()
+ state = space.fromcache(CodecState)
+ if space.is_true(space.callable(w_search_function)):
+ state.codec_search_path.append(w_search_function)
+ else:
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap("argument must be callable"))
+register_codec.unwrap_spec = [ObjSpace, W_Root]
+
+
+def lookup_codec(space, encoding):
+ """lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
+ Looks up a codec tuple in the Python codec registry and returns
+ a tuple of functions.
+ """
+ #import pdb; pdb.set_trace()
+ state = space.fromcache(CodecState)
+ normalized_encoding = encoding.replace(" ", "-").lower()
+ w_result = state.codec_search_cache.get(normalized_encoding, None)
+ if w_result is not None:
+ return w_result
+ if state.codec_need_encodings:
+ w_import = space.getattr(space.builtin, space.wrap("__import__"))
+ # registers new codecs
+ space.call_function(w_import, space.wrap("encodings"))
+ state.codec_need_encodings = False
+ if len(state.codec_search_path) == 0:
+ raise OperationError(
+ space.w_LookupError,
+ space.wrap("no codec search functions registered: "
+ "can't find encoding"))
+ for w_search in state.codec_search_path:
+ w_result = space.call_function(w_search,
+ space.wrap(normalized_encoding))
+ if not space.is_w(w_result, space.w_None):
+ if not (space.is_true(space.is_(space.type(w_result),
+ space.w_tuple)) and
+ space.int_w(space.len(w_result)) == 4):
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap("codec search functions must return 4-tuples"))
+ else:
+ state.codec_search_cache[normalized_encoding] = w_result
+ return w_result
+ raise OperationError(
+ space.w_LookupError,
+ space.wrap("unknown encoding: %s" % encoding))
+lookup_codec.unwrap_spec = [ObjSpace, str]
+
+
+def lookup_error(space, errors):
+ """lookup_error(errors) -> handler
+
+ Return the error handler for the specified error handling name
+ or raise a LookupError, if no handler exists under this name.
+ """
+
+ state = space.fromcache(CodecState)
+ try:
+ w_err_handler = state.codec_error_registry[errors]
+ except KeyError:
+ raise OperationError(
+ space.w_LookupError,
+ space.wrap("unknown error handler name %s" % errors))
+ return w_err_handler
+lookup_error.unwrap_spec = [ObjSpace, str]
+
+
+def encode(space, w_obj, encoding=NoneNotWrapped, errors='strict'):
+ """encode(obj, [encoding[,errors]]) -> object
+
+ Encodes obj using the codec registered for encoding. encoding defaults
+ to the default encoding. errors may be given to set a different error
+ handling scheme. Default is 'strict' meaning that encoding errors raise
+ a ValueError. Other possible values are 'ignore', 'replace' and
+ 'xmlcharrefreplace' as well as any other name registered with
+ codecs.register_error that can handle ValueErrors.
+ """
+ if encoding is None:
+ encoding = space.sys.defaultencoding
+ w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
+ if space.is_true(w_encoder):
+ w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
+ return space.getitem(w_res, space.wrap(0))
+ else:
+ assert 0, "XXX, what to do here?"
+encode.unwrap_spec = [ObjSpace, W_Root, str, str]
+
+def decode(space, w_obj, encoding=NoneNotWrapped, errors='strict'):
+ """decode(obj, [encoding[,errors]]) -> object
+
+ Decodes obj using the codec registered for encoding. encoding defaults
+ to the default encoding. errors may be given to set a different error
+ handling scheme. Default is 'strict' meaning that encoding errors raise
+ a ValueError. Other possible values are 'ignore' and 'replace'
+ as well as any other name registerd with codecs.register_error that is
+ able to handle ValueErrors.
+ """
+ if encoding is None:
+ encoding = sys.getdefaultencoding()
+ w_decoder = space.getitem(lookup_codec(space, encoding), space.wrap(1))
+ if space.is_true(w_decoder):
+ w_res = space.call_function(w_decoder, w_obj, space.wrap(errors))
+ if (not space.is_true(space.isinstance(w_res, space.w_tuple))
+ or space.int_w(space.len(w_res)) != 2):
+ raise TypeError("encoder must return a tuple (object, integer)")
+ return space.getitem(w_res, space.wrap(0))
+ else:
+ assert 0, "XXX, what to do here?"
+decode.unwrap_spec = [ObjSpace, W_Root, str, str]
+
+def register_error(space, errors, w_handler):
+ """register_error(errors, handler)
+
+ Register the specified error handler under the name
+ errors. handler must be a callable object, that
+ will be called with an exception instance containing
+ information about the location of the encoding/decoding
+ error and must return a (replacement, new position) tuple.
+ """
+ state = space.fromcache(CodecState)
+ if space.is_true(space.callable(w_handler)):
+ state.codec_error_registry[errors] = w_handler
+ else:
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap("handler must be callable"))
+register_error.unwrap_spec = [ObjSpace, str, W_Root]
Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/test/test_codecs.py Mon Nov 12 03:25:43 2007
@@ -6,6 +6,10 @@
space = gettestobjspace(usemodules=('unicodedata',))
cls.space = space
+ def test_register_noncallable(self):
+ import _codecs
+ raises(TypeError, _codecs.register, 1)
+
def test_bigU_codecs(self):
import sys
oldmaxunicode = sys.maxunicode
More information about the Pypy-commit
mailing list