[pypy-svn] r61600 - in pypy/trunk/pypy: module/_codecs module/_codecs/test rlib
afa at codespeak.net
afa at codespeak.net
Fri Feb 6 23:37:19 CET 2009
Author: afa
Date: Fri Feb 6 23:37:18 2009
New Revision: 61600
Modified:
pypy/trunk/pypy/module/_codecs/__init__.py
pypy/trunk/pypy/module/_codecs/interp_codecs.py
pypy/trunk/pypy/module/_codecs/test/test_codecs.py
pypy/trunk/pypy/rlib/runicode.py
Log:
Provide an approximation of the mbcs codec, almost correct
on a Western installation of Windows (cp1252).
I do have a working complete implementation based on WideCharToMultiByte,
but it needs a lot of missing functions from rffi:
get_nonmoving_unicodebuffer, unicode_from_buffer &co
that I don't want to submit just before the sprint :-)
Modified: pypy/trunk/pypy/module/_codecs/__init__.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/__init__.py (original)
+++ pypy/trunk/pypy/module/_codecs/__init__.py Fri Feb 6 23:37:18 2009
@@ -1,5 +1,6 @@
from pypy.interpreter.mixedmodule import MixedModule
-
+from pypy.rlib import runicode
+
class Module(MixedModule):
appleveldefs = {
'__doc__' : 'app_codecs.__doc__',
@@ -45,6 +46,10 @@
'readbuffer_encode': 'interp_codecs.buffer_encode',
}
+ if hasattr(runicode, 'str_decode_mbcs'):
+ interpleveldefs['mbcs_encode'] = 'interp_codecs.mbcs_encode'
+ interpleveldefs['mbcs_decode'] = 'interp_codecs.mbcs_decode'
+
def setup_after_space_initialization(self):
"NOT_RPYTHON"
self.space.appexec([], """():
Modified: pypy/trunk/pypy/module/_codecs/interp_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/interp_codecs.py (original)
+++ pypy/trunk/pypy/module/_codecs/interp_codecs.py Fri Feb 6 23:37:18 2009
@@ -247,6 +247,9 @@
]:
make_decoder_wrapper(decoders)
+if hasattr(runicode, 'str_decode_mbcs'):
+ make_encoder_wrapper('mbcs_encode')
+ make_decoder_wrapper('mbcs_decode')
def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=False):
"""None
Modified: pypy/trunk/pypy/module/_codecs/test/test_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/test/test_codecs.py (original)
+++ pypy/trunk/pypy/module/_codecs/test/test_codecs.py Fri Feb 6 23:37:18 2009
@@ -528,6 +528,15 @@
assert u'\\'.encode('unicode-escape') == '\\\\'
assert '\\\\'.decode('unicode-escape') == u'\\'
+ def test_mbcs(self):
+ import sys
+ if sys.platform != 'win32':
+ return
+ assert u'test'.encode('mbcs') == 'test'
+ assert u'caf\xe9'.encode('mbcs') == 'caf\xe9'
+ assert u'\u040a'.encode('mbcs') == '?' # some cyrillic letter
+ assert 'cafx\e9'.decode('mbcs') == u'cafx\e9'
+
class TestDirect:
def test_charmap_encode(self):
Modified: pypy/trunk/pypy/rlib/runicode.py
==============================================================================
--- pypy/trunk/pypy/rlib/runicode.py (original)
+++ pypy/trunk/pypy/rlib/runicode.py Fri Feb 6 23:37:18 2009
@@ -451,3 +451,18 @@
def unicode_encode_utf_16_le(s, size, errors,
errorhandler=None):
return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "little")
+
+
+if sys.platform == 'win32':
+ def str_decode_mbcs(s, size, errors, final=False,
+ errorhandler=None):
+ # XXX MultiByteToWideChar should be used instead.
+ return str_decode_latin_1(s, size, errors="replace",
+ final=final, errorhandler=errorhandler)
+
+ def unicode_encode_mbcs(p, size, errors, errorhandler=None):
+ # XXX This is only roughly correct, even on a Western Windows.
+ # For example, some greek letters do have a translation (phi -> f)
+ # WideCharToMultiByte should be used instead.
+ return unicode_encode_latin_1(p, size, errors="replace",
+ errorhandler=errorhandler)
More information about the Pypy-commit
mailing list