[pypy-svn] r48612 - in pypy/branch/more-unicode-improvements/pypy/rlib: . test
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Nov 12 21:37:26 CET 2007
Author: cfbolz
Date: Mon Nov 12 21:37:24 2007
New Revision: 48612
Modified:
pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
Log:
change names to be more consistent with CPython
Modified: pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/runicode.py Mon Nov 12 21:37:24 2007
@@ -37,7 +37,7 @@
4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0
]
-def str_decode_utf8(s, size, errors, final=False,
+def str_decode_utf_8(s, size, errors, final=False,
errorhandler=raise_unicode_exception):
if (size == 0):
return u'', 0
@@ -56,13 +56,13 @@
if not final:
break
else:
- r, pos = errorhandler(errors, "utf8",
+ r, pos = errorhandler(errors, "utf-8",
"unexpected end of data", s, pos, size)
p += r
if (pos + n > size):
break
if n == 0:
- res = errorhandler(errors, "utf8", "unexpected code byte",
+ res = errorhandler(errors, "utf-8", "unexpected code byte",
s, pos, pos + 1)
p += res[0]
pos = res[1]
@@ -76,13 +76,13 @@
y, six = splitter[5, 3](ordch1)
assert six == 6
if (two != 2):
- r, pos = errorhandler(errors, "utf8", "invalid data",
+ r, pos = errorhandler(errors, "utf-8", "invalid data",
s, pos, pos + 2)
p += r
else:
c = (y << 6) + z
if c < 0x80:
- r, pos = errorhandler(errors, "utf8", "illegal encoding",
+ r, pos = errorhandler(errors, "utf-8", "illegal encoding",
s, pos, pos + 2)
p += r
else:
@@ -97,7 +97,7 @@
x, fourteen = splitter[4, 4](ordch1)
assert fourteen == 14
if (two1 != 2 or two2 != 2):
- r, pos = errorhandler(errors, "utf8", "invalid data",
+ r, pos = errorhandler(errors, "utf-8", "invalid data",
s, pos, pos + 3)
p += r
else:
@@ -108,7 +108,7 @@
# to recombine the surrogates into a single code
# unit.
if c < 0x0800:
- r, pos = errorhandler(errors, "utf8", "illegal encoding",
+ r, pos = errorhandler(errors, "utf-8", "illegal encoding",
s, pos, pos + 3)
p += r
else:
@@ -126,7 +126,7 @@
w, thirty = splitter[3, 5](ordch1)
assert thirty == 30
if (two1 != 2 or two2 != 2 or two3 != 2):
- r, pos = errorhandler(errors, "utf8", "invalid data",
+ r, pos = errorhandler(errors, "utf-8", "invalid data",
s, pos, pos + 4)
p += r
else:
@@ -134,7 +134,7 @@
# minimum value allowed for 4 byte encoding
# maximum value allowed for UTF-16
if ((c < 0x10000) or (c > 0x10ffff)):
- r, pos = errorhandler(errors, "utf8", "illegal encoding",
+ r, pos = errorhandler(errors, "utf-8", "illegal encoding",
s, pos, pos + 4)
p += r
else:
@@ -151,7 +151,7 @@
p.append(unichr(0xDC00 + (c & 0x03FF)))
pos += n
else:
- r, pos = errorhandler(errors, "utf8",
+ r, pos = errorhandler(errors, "utf-8",
"unsupported Unicode code range",
s, pos, pos + n)
p += r
@@ -159,27 +159,27 @@
return u"".join(p), pos
-def str_decode_utf16(s, size, errors, final=True,
+def str_decode_utf_16(s, size, errors, final=True,
errorhandler=raise_unicode_exception):
- result, length, byteorder = str_decode_utf16_helper(s, size, errors, final,
- errorhandler, "native")
+ result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final,
+ errorhandler, "native")
return result, length
-def str_decode_utf16be(s, size, errors, final=True,
+def str_decode_utf_16_be(s, size, errors, final=True,
errorhandler=raise_unicode_exception):
- result, length, byteorder = str_decode_utf16_helper(s, size, errors, final,
- errorhandler, "big")
+ result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final,
+ errorhandler, "big")
return result, length
-def str_decode_utf16le(s, size, errors, final=True,
- errorhandler=raise_unicode_exception):
- result, length, byteorder = str_decode_utf16_helper(s, size, errors, final,
- errorhandler, "little")
+def str_decode_utf_16_le(s, size, errors, final=True,
+ errorhandler=raise_unicode_exception):
+ result, length, byteorder = str_decode_utf_16_helper(s, size, errors, final,
+ errorhandler, "little")
return result, length
-def str_decode_utf16_helper(s, size, errors, final=True,
- errorhandler=raise_unicode_exception,
- byteorder="native"):
+def str_decode_utf_16_helper(s, size, errors, final=True,
+ errorhandler=raise_unicode_exception,
+ byteorder="native"):
bo = 0
consumed = 0
@@ -236,7 +236,7 @@
if len(s) - pos < 2:
if not final:
break
- r, pos = errorhandler(errors, 'utf16', "truncated data",
+ r, pos = errorhandler(errors, 'utf-16', "truncated data",
s, pos, len(s), True)
result.append(r)
if len(s) - pos < 2:
@@ -251,7 +251,7 @@
if not final:
break
errmsg = "unexpected end of data"
- r, pos = errorhandler(errors, 'utf16', errmsg, s, pos - 2, len(s))
+ r, pos = errorhandler(errors, 'utf-16', errmsg, s, pos - 2, len(s))
result.append(r)
if len(s) - pos < 2:
break
@@ -266,7 +266,7 @@
result += unichr((((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000)
continue
else:
- r, pos = errorhandler(errors, 'utf16',
+ r, pos = errorhandler(errors, 'utf-16',
"illegal UTF-16 surrogate",
s, pos - 4, pos - 2)
result.append(r)
@@ -306,7 +306,7 @@
# unicode encoding
-def unicode_encode_utf8(s, size, errors, errorhandler=raise_unicode_exception):
+def unicode_encode_utf_8(s, size, errors, errorhandler=raise_unicode_exception):
assert(size >= 0)
p = []
i = 0
@@ -400,9 +400,9 @@
p.append(hi)
p.append(lo)
-def unicode_encode_utf16_helper(s, size, errors,
- errorhandler=raise_unicode_exception,
- byteorder='little'):
+def unicode_encode_utf_16_helper(s, size, errors,
+ errorhandler=raise_unicode_exception,
+ byteorder='little'):
p = []
if (byteorder == 'native'):
_STORECHAR(p, 0xFEFF, sys.byteorder)
@@ -426,16 +426,16 @@
return "".join(p)
-def unicode_encode_utf16(s, size, errors,
- errorhandler=raise_unicode_exception):
- return unicode_encode_utf16_helper(s, size, errors, errorhandler, "native")
+def unicode_encode_utf_16(s, size, errors,
+ errorhandler=raise_unicode_exception):
+ return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "native")
-def unicode_encode_utf16be(s, size, errors,
+def unicode_encode_utf_16_be(s, size, errors,
errorhandler=raise_unicode_exception):
- return unicode_encode_utf16_helper(s, size, errors, errorhandler, "big")
+ return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "big")
-def unicode_encode_utf16le(s, size, errors,
- errorhandler=raise_unicode_exception):
- return unicode_encode_utf16_helper(s, size, errors, errorhandler, "little")
+def unicode_encode_utf_16_le(s, size, errors,
+ errorhandler=raise_unicode_exception):
+ return unicode_encode_utf_16_helper(s, size, errors, errorhandler, "little")
Modified: pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/rlib/test/test_runicode.py Mon Nov 12 21:37:24 2007
@@ -6,8 +6,15 @@
assert x == y
assert type(x) is type(y)
+ def getdecoder(self, encoding):
+ return getattr(runicode, "str_decode_%s" % encoding.replace("-", "_"))
+
+ def getencoder(self, encoding):
+ return getattr(runicode,
+ "unicode_encode_%s" % encoding.replace("-", "_"))
+
def checkdecode(self, s, encoding):
- decoder = getattr(runicode, "str_decode_%s" % encoding.replace("-", ""))
+ decoder = self.getdecoder(encoding)
if isinstance(s, str):
trueresult = s.decode(encoding)
else:
@@ -18,8 +25,7 @@
self.typeequals(trueresult, result)
def checkencode(self, s, encoding):
- encoder = getattr(runicode,
- "unicode_encode_%s" % encoding.replace("-", ""))
+ encoder = self.getencoder(encoding)
if isinstance(s, unicode):
trueresult = s.encode(encoding)
else:
@@ -40,8 +46,7 @@
assert stop == endingpos
assert not decode
return "42424242", stop
- encoder = getattr(runicode,
- "unicode_encode_%s" % encoding.replace("-", ""))
+ encoder = self.getencoder(encoding)
result = encoder(s, len(s), "foo!", errorhandler)
assert called[0]
assert "42424242" in result
@@ -60,8 +65,7 @@
assert decode
return u"42424242", stop
return "", endingpos
- decoder = getattr(runicode,
- "str_decode_%s" % encoding.replace("-", ""))
+ decoder = self.getdecoder(encoding)
if addstuff:
s += "some rest in ascii"
result, _ = decoder(s, len(s), "foo!", True, errorhandler)
@@ -78,28 +82,28 @@
def test_all_ascii(self):
for i in range(128):
- for encoding in "utf8 latin1 ascii".split():
+ for encoding in "utf-8 latin1 ascii".split():
self.checkdecode(chr(i), encoding)
def test_all_first_256(self):
for i in range(256):
- for encoding in "utf8 latin1 utf16 utf-16-be utf-16-le".split():
+ for encoding in "utf-8 latin1 utf-16 utf-16-be utf-16-le".split():
self.checkdecode(unichr(i), encoding)
def test_random(self):
for i in range(10000):
uni = unichr(random.randrange(sys.maxunicode))
- for encoding in "utf8 utf16 utf-16-be utf-16-le".split():
+ for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
self.checkdecode(unichr(i), encoding)
def test_single_chars_utf8(self):
for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
- self.checkdecode(s, "utf8")
+ self.checkdecode(s, "utf-8")
def test_utf8_errors(self):
for s in [# unexpected end of data
"\xd7", "\xd6", "\xeb\x96", "\xf0\x90\x91"]:
- self.checkdecodeerror(s, "utf8", 0, len(s), addstuff=False)
+ self.checkdecodeerror(s, "utf-8", 0, len(s), addstuff=False)
for s in [# unexpected code byte
"\x81", "\xbf",
@@ -113,7 +117,7 @@
"\xf0\x90\x51\x93", "\xf0\x90\x01\x93", "\xf0\x90\xd1\x93",
"\xf0\x90\x91\x53", "\xf0\x90\x91\x03", "\xf0\x90\x91\xd3",
]:
- self.checkdecodeerror(s, "utf8", 0, len(s), addstuff=True)
+ self.checkdecodeerror(s, "utf-8", 0, len(s), addstuff=True)
def test_ascii_error(self):
self.checkdecodeerror("abc\xFF\xFF\xFFcde", "ascii", 3, 4)
@@ -121,7 +125,7 @@
def test_utf16_errors(self):
# trunkated BOM
for s in ["\xff", "\xfe"]:
- self.checkdecodeerror(s, "utf16", 0, len(s), addstuff=False)
+ self.checkdecodeerror(s, "utf-16", 0, len(s), addstuff=False)
for s in [
# unexpected end of data ascii
@@ -129,33 +133,33 @@
# unexpected end of data
'\xff\xfe\xc0\xdb\x00', '\xff\xfe\xc0\xdb', '\xff\xfe\xc0',
]:
- self.checkdecodeerror(s, "utf16", 2, len(s), addstuff=False)
+ self.checkdecodeerror(s, "utf-16", 2, len(s), addstuff=False)
for s in [
# illegal surrogate
"\xff\xfe\xff\xdb\xff\xff",
]:
- self.checkdecodeerror(s, "utf16", 2, 4, addstuff=False)
+ self.checkdecodeerror(s, "utf-16", 2, 4, addstuff=False)
class TestEncoding(UnicodeTests):
def test_all_ascii(self):
for i in range(128):
- for encoding in "utf8 latin1 ascii".split():
+ for encoding in "utf-8 latin1 ascii".split():
self.checkencode(unichr(i), encoding)
def test_all_first_256(self):
for i in range(256):
- for encoding in "utf8 latin1 utf16 utf-16-be utf-16-le".split():
+ for encoding in "utf-8 latin1 utf-16 utf-16-be utf-16-le".split():
self.checkencode(unichr(i), encoding)
def test_random(self):
for i in range(10000):
uni = unichr(random.randrange(sys.maxunicode))
- for encoding in "utf8 utf16 utf-16-be utf-16-le".split():
+ for encoding in "utf-8 utf-16 utf-16-be utf-16-le".split():
self.checkencode(unichr(i), encoding)
def test_single_chars_utf8(self):
for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
- self.checkencode(s, "utf8")
+ self.checkencode(s, "utf-8")
def test_ascii_error(self):
self.checkencodeerror(u"abc\xFF\xFF\xFFcde", "ascii", 3, 6)
More information about the Pypy-commit
mailing list