[pypy-svn] r48615 - pypy/branch/more-unicode-improvements/pypy/module/_codecs
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Nov 12 22:23:28 CET 2007
Author: cfbolz
Date: Mon Nov 12 22:23:27 2007
New Revision: 48615
Modified:
pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py
pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py
Log:
plug the RPython decoders and encoders into the _codecs module.
Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/__init__.py Mon Nov 12 22:23:27 2007
@@ -4,15 +4,11 @@
appleveldefs = {
'__doc__' : 'app_codecs.__doc__',
'__name__' : 'app_codecs.__name__',
- 'ascii_decode' : 'app_codecs.ascii_decode',
- 'ascii_encode' : 'app_codecs.ascii_encode',
'charbuffer_encode' : 'app_codecs.charbuffer_encode',
'charmap_decode' : 'app_codecs.charmap_decode',
'charmap_encode' : 'app_codecs.charmap_encode',
'escape_decode' : 'app_codecs.escape_decode',
'escape_encode' : 'app_codecs.escape_encode',
- 'latin_1_decode' : 'app_codecs.latin_1_decode',
- 'latin_1_encode' : 'app_codecs.latin_1_encode',
'mbcs_decode' : 'app_codecs.mbcs_decode',
'mbcs_encode' : 'app_codecs.mbcs_encode',
'raw_unicode_escape_decode' : 'app_codecs.raw_unicode_escape_decode',
@@ -22,17 +18,9 @@
'unicode_escape_encode' : 'app_codecs.unicode_escape_encode',
'unicode_internal_decode' : 'app_codecs.unicode_internal_decode',
'unicode_internal_encode' : 'app_codecs.unicode_internal_encode',
- 'utf_16_be_decode' : 'app_codecs.utf_16_be_decode',
- 'utf_16_be_encode' : 'app_codecs.utf_16_be_encode',
- 'utf_16_decode' : 'app_codecs.utf_16_decode',
- 'utf_16_encode' : 'app_codecs.utf_16_encode',
- 'utf_16_ex_decode' : 'app_codecs.utf_16_ex_decode',
- 'utf_16_le_decode' : 'app_codecs.utf_16_le_decode',
- 'utf_16_le_encode' : 'app_codecs.utf_16_le_encode',
'utf_7_decode' : 'app_codecs.utf_7_decode',
'utf_7_encode' : 'app_codecs.utf_7_encode',
- 'utf_8_decode' : 'app_codecs.utf_8_decode',
- 'utf_8_encode' : 'app_codecs.utf_8_encode',
+ 'utf_16_ex_decode' : 'app_codecs.utf_16_ex_decode',
'_register_existing_errors': 'app_codecs._register_existing_errors',
}
interpleveldefs = {
@@ -42,6 +30,20 @@
'lookup_error': 'interp_codecs.lookup_error',
'register': 'interp_codecs.register_codec',
'register_error': 'interp_codecs.register_error',
+
+ # encoders and decoders
+ 'ascii_decode' : 'interp_codecs.ascii_decode',
+ 'ascii_encode' : 'interp_codecs.ascii_encode',
+ 'latin_1_decode' : 'interp_codecs.latin_1_decode',
+ 'latin_1_encode' : 'interp_codecs.latin_1_encode',
+ 'utf_8_decode' : 'interp_codecs.utf_8_decode',
+ 'utf_8_encode' : 'interp_codecs.utf_8_encode',
+ 'utf_16_be_decode' : 'interp_codecs.utf_16_be_decode',
+ 'utf_16_be_encode' : 'interp_codecs.utf_16_be_encode',
+ 'utf_16_decode' : 'interp_codecs.utf_16_decode',
+ 'utf_16_encode' : 'interp_codecs.utf_16_encode',
+ 'utf_16_le_decode' : 'interp_codecs.utf_16_le_decode',
+ 'utf_16_le_encode' : 'interp_codecs.utf_16_le_encode',
}
def setup_after_space_initialization(self):
@@ -49,5 +51,4 @@
self.space.appexec([], """():
import _codecs
_codecs._register_existing_errors()
- del _codecs._register_existing_errors
""")
Modified: pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/module/_codecs/interp_codecs.py Mon Nov 12 22:23:27 2007
@@ -8,6 +8,52 @@
self.codec_search_cache = {}
self.codec_error_registry = {}
self.codec_need_encodings = True
+ self.error_handler = self.make_errorhandler(space)
+
+ def make_errorhandler(self, space):
+ def unicode_call_errorhandler(errors, encoding, reason, input,
+ startinpos, endinpos, decode=True):
+
+ w_errorhandler = lookup_error(space, errors)
+ if decode:
+ w_cls = space.w_UnicodeDecodeError
+ else:
+ w_cls = space.w_UnicodeEncodeError
+ w_exc = space.call_function(
+ w_cls,
+ space.wrap(encoding),
+ space.wrap(input),
+ space.wrap(startinpos),
+ space.wrap(endingpos),
+ space.wrap(reason))
+ w_res = space.call_function(w_errorhandler, w_exc)
+ try:
+ w_replace, w_newpos = space.unpacktuple(w_res, 2)
+ except OperationError, e:
+ if not e.match(space, space.w_TypeError):
+ raise
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap("encoding error handler must return "
+ "(unicode, int) tuple, not %s" % (
+ space.str_w(space.repr(w_res)))))
+ newpos = space.int_w(w_newpos)
+ if (newpos < 0):
+ newpos = len(input) + newpos
+ if newpos < 0 or newpos > len(input):
+ raise OperationError(
+ space.w_IndexError,
+ space.wrap("position %d from error handler "
+ "out of bounds" % newpos))
+ if decode:
+ replace = space.unicode_w(w_replace)
+ return replace, newpos
+ else:
+ replace = space.str_w(w_replace)
+ return replace, newpos
+ unicode_call_errorhandler._annspecialcase_ = "specialize:arg(6)"
+ return unicode_call_errorhandler
+
def register_codec(space, w_search_function):
"""register(search_function)
@@ -95,6 +141,7 @@
'xmlcharrefreplace' as well as any other name registered with
codecs.register_error that can handle ValueErrors.
"""
+ #import pdb; pdb.set_trace()
if encoding is None:
encoding = space.sys.defaultencoding
w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
@@ -145,3 +192,50 @@
space.w_TypeError,
space.wrap("handler must be callable"))
register_error.unwrap_spec = [ObjSpace, str, W_Root]
+
+# ____________________________________________________________
+# delegation to runicode
+
+from pypy.rlib import runicode
+
+def make_encoder_wrapper(name):
+ rname = "unicode_encode_%s" % (name.replace("_encode", ""), )
+ def wrap_encoder(space, uni, errors="strict"):
+ state = space.fromcache(CodecState)
+ func = getattr(runicode, rname)
+ result = func(uni, len(uni), errors, state.error_handler)
+ return space.newtuple([space.wrap(result), space.wrap(len(result))])
+ wrap_encoder.unwrap_spec = [ObjSpace, unicode, str]
+ globals()[name] = wrap_encoder
+
+def make_decoder_wrapper(name):
+ rname = "str_decode_%s" % (name.replace("_decode", ""), )
+ def wrap_decoder(space, string, errors="strict", w_final=True):
+ final = space.is_true(w_final)
+ state = space.fromcache(CodecState)
+ func = getattr(runicode, rname)
+ result, consumed = func(string, len(string), errors,
+ final, state.error_handler)
+ return space.newtuple([space.wrap(result), space.wrap(consumed)])
+ wrap_decoder.unwrap_spec = [ObjSpace, str, str, W_Root]
+ globals()[name] = wrap_decoder
+
+for encoders in [
+ "ascii_encode",
+ "latin_1_encode",
+ "utf_8_encode",
+ "utf_16_encode",
+ "utf_16_be_encode",
+ "utf_16_le_encode",
+ ]:
+ make_encoder_wrapper(encoders)
+
+for decoders in [
+ "ascii_decode",
+ "latin_1_decode",
+ "utf_8_decode",
+ "utf_16_decode",
+ "utf_16_be_decode",
+ "utf_16_le_decode",
+ ]:
+ make_decoder_wrapper(decoders)
More information about the Pypy-commit
mailing list