[pypy-svn] r48558 - in pypy/dist/pypy: annotation annotation/test interpreter interpreter/test module/__builtin__ module/_sre module/sys module/sys/test module/unicodedata objspace objspace/cpy objspace/cpy/test objspace/fake objspace/std objspace/std/test rlib rlib/test translator translator/cli translator/jvm translator/jvm/src/pypy translator/jvm/test translator/oosupport
fijal at codespeak.net
fijal at codespeak.net
Sun Nov 11 13:38:14 CET 2007
Author: fijal
Date: Sun Nov 11 13:38:13 2007
New Revision: 48558
Added:
pypy/dist/pypy/module/sys/interp_encoding.py (contents, props changed)
pypy/dist/pypy/translator/jvm/test/test_unicode.py (contents, props changed)
Modified:
pypy/dist/pypy/annotation/binaryop.py
pypy/dist/pypy/annotation/test/test_annrpython.py
pypy/dist/pypy/interpreter/baseobjspace.py
pypy/dist/pypy/interpreter/test/test_objspace.py
pypy/dist/pypy/interpreter/test/test_typedef.py
pypy/dist/pypy/module/__builtin__/operation.py
pypy/dist/pypy/module/_sre/interp_sre.py
pypy/dist/pypy/module/sys/__init__.py
pypy/dist/pypy/module/sys/app.py
pypy/dist/pypy/module/sys/test/test_sysmodule.py
pypy/dist/pypy/module/unicodedata/interp_ucd.py
pypy/dist/pypy/objspace/cpy/objspace.py
pypy/dist/pypy/objspace/cpy/test/test_objspace.py
pypy/dist/pypy/objspace/dump.py
pypy/dist/pypy/objspace/fake/objspace.py
pypy/dist/pypy/objspace/logic.py
pypy/dist/pypy/objspace/std/default.py
pypy/dist/pypy/objspace/std/formatting.py
pypy/dist/pypy/objspace/std/objspace.py
pypy/dist/pypy/objspace/std/stringobject.py
pypy/dist/pypy/objspace/std/test/test_index.py
pypy/dist/pypy/objspace/std/test/test_unicodeobject.py
pypy/dist/pypy/objspace/std/unicodeobject.py
pypy/dist/pypy/objspace/std/unicodetype.py
pypy/dist/pypy/objspace/thunk.py
pypy/dist/pypy/rlib/rsocket.py
pypy/dist/pypy/rlib/test/test_rsocket.py
pypy/dist/pypy/translator/cli/function.py
pypy/dist/pypy/translator/cli/metavm.py
pypy/dist/pypy/translator/cli/opcodes.py
pypy/dist/pypy/translator/geninterplevel.py
pypy/dist/pypy/translator/jvm/builtin.py
pypy/dist/pypy/translator/jvm/conftest.py
pypy/dist/pypy/translator/jvm/database.py
pypy/dist/pypy/translator/jvm/generator.py
pypy/dist/pypy/translator/jvm/opcodes.py
pypy/dist/pypy/translator/jvm/prebuiltnodes.py
pypy/dist/pypy/translator/jvm/src/pypy/Interlink.java
pypy/dist/pypy/translator/jvm/src/pypy/PyPy.java
pypy/dist/pypy/translator/jvm/test/runtest.py
pypy/dist/pypy/translator/oosupport/metavm.py
Log:
(antocuni, cfbolz, fijal) - Merge the unicode-objspace branch, history below
------------------------------------------------------------------------
r48508 | cfbolz | 2007-11-10 16:14:06 +0100 (Sat, 10 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
helper function for unwrapping encoding and error. will be needed in various
places
------------------------------------------------------------------------
r48507 | cfbolz | 2007-11-10 15:54:15 +0100 (Sat, 10 Nov 2007) | 4 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/module/sys/__init__.py
M /pypy/branch/unicode-objspace/pypy/module/sys/interp_encoding.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
yet another approach: do the same thing as with the recursionlimit and stick
the encoding directly onto the module. probably a good idea, it's easily
accessible and doesn't require a function call. Thanks Alexander.
------------------------------------------------------------------------
r48506 | cfbolz | 2007-11-10 15:47:40 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/interpreter/baseobjspace.py
M /pypy/branch/unicode-objspace/pypy/module/sys/interp_encoding.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
sticking the encoding to the space is a bad idea
------------------------------------------------------------------------
r48505 | fijal | 2007-11-10 14:13:39 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/formatting.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
Another towards-annotation-complete fixes.
------------------------------------------------------------------------
r48503 | fijal | 2007-11-10 13:56:59 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
getname eats space as well
------------------------------------------------------------------------
r48502 | fijal | 2007-11-10 13:38:56 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/test/test_unicodeobject.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
I don't get it, but at least test is passing
------------------------------------------------------------------------
r48501 | fijal | 2007-11-10 13:07:03 +0100 (Sat, 10 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
This is wrong optimisation. It would not work for ie tproxy of unicode or
different unicode implementation.
------------------------------------------------------------------------
r48500 | fijal | 2007-11-10 13:05:49 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/stringobject.py
annotator seems not to like this
------------------------------------------------------------------------
r48499 | fijal | 2007-11-10 12:52:38 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
A real annotation problem this time
------------------------------------------------------------------------
r48498 | fijal | 2007-11-10 12:43:38 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
Another type :-/
------------------------------------------------------------------------
r48497 | fijal | 2007-11-10 12:36:15 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
Another typo. Test anyone?
------------------------------------------------------------------------
r48496 | fijal | 2007-11-10 12:30:29 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
Hum. A typo?
------------------------------------------------------------------------
r48495 | antocuni | 2007-11-10 11:47:06 +0100 (Sat, 10 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/translator/cli/function.py
M /pypy/branch/unicode-objspace/pypy/translator/cli/metavm.py
M /pypy/branch/unicode-objspace/pypy/translator/cli/opcodes.py
M /pypy/branch/unicode-objspace/pypy/translator/jvm/builtin.py
M /pypy/branch/unicode-objspace/pypy/translator/jvm/conftest.py
M /pypy/branch/unicode-objspace/pypy/translator/jvm/database.py
M /pypy/branch/unicode-objspace/pypy/translator/jvm/generator.py
M /pypy/branch/unicode-objspace/pypy/translator/jvm/opcodes.py
M /pypy/branch/unicode-objspace/pypy/translator/jvm/prebuiltnodes.py
M /pypy/branch/unicode-objspace/pypy/translator/jvm/src/pypy/Interlink.java
M /pypy/branch/unicode-objspace/pypy/translator/jvm/src/pypy/PyPy.java
M /pypy/branch/unicode-objspace/pypy/translator/jvm/test/runtest.py
A /pypy/branch/unicode-objspace/pypy/translator/jvm/test/test_unicode.py
M /pypy/branch/unicode-objspace/pypy/translator/oosupport/metavm.py
add unicode support to genjvm
------------------------------------------------------------------------
r48494 | cfbolz | 2007-11-10 03:13:55 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
part 2: no more applevel code in the __new__
------------------------------------------------------------------------
r48493 | cfbolz | 2007-11-10 02:56:44 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
more applevel moving: this time in unicode.__new__ (!). Part 1
------------------------------------------------------------------------
r48492 | cfbolz | 2007-11-10 02:42:28 +0100 (Sat, 10 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/test/test_unicodeobject.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
move translate from app to interplevel. add tests. Why is the test coverage of
unicode so terribly spotty?
------------------------------------------------------------------------
r48491 | cfbolz | 2007-11-10 02:26:22 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/test/test_unicodeobject.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
move expandtabs from app to interplevel
------------------------------------------------------------------------
r48490 | cfbolz | 2007-11-10 02:13:44 +0100 (Sat, 10 Nov 2007) | 4 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/interpreter/baseobjspace.py
M /pypy/branch/unicode-objspace/pypy/module/sys/__init__.py
M /pypy/branch/unicode-objspace/pypy/module/sys/app.py
A /pypy/branch/unicode-objspace/pypy/module/sys/interp_encoding.py
M /pypy/branch/unicode-objspace/pypy/module/sys/test/test_sysmodule.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
attach the default-encoding to the space for easier accessibility. not sure
this is a very nice solution, but I think it is way better than a global in an
applevel implementation module in the sys mixed-module.
------------------------------------------------------------------------
r48486 | cfbolz | 2007-11-10 01:28:38 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
whoops, that was nonsense
------------------------------------------------------------------------
r48485 | cfbolz | 2007-11-10 01:25:59 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/test/test_index.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
move more stuff to interplevel. also fix indexing behaviour + tests
------------------------------------------------------------------------
r48484 | cfbolz | 2007-11-10 00:50:26 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
partition, rpartition go from applevel to interplevel
------------------------------------------------------------------------
r48483 | cfbolz | 2007-11-10 00:40:56 +0100 (Sat, 10 Nov 2007) | 2 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/interpreter/baseobjspace.py
M /pypy/branch/unicode-objspace/pypy/interpreter/test/test_objspace.py
M /pypy/branch/unicode-objspace/pypy/interpreter/test/test_typedef.py
M /pypy/branch/unicode-objspace/pypy/module/__builtin__/operation.py
M /pypy/branch/unicode-objspace/pypy/objspace/cpy/objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/cpy/test/test_objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/dump.py
M /pypy/branch/unicode-objspace/pypy/objspace/fake/objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/logic.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/thunk.py
M /pypy/branch/unicode-objspace/pypy/translator/geninterplevel.py
get rid of newstring as well
------------------------------------------------------------------------
r48482 | cfbolz | 2007-11-10 00:24:58 +0100 (Sat, 10 Nov 2007) | 3 lines
Changed paths:
M /pypy/branch/unicode-objspace/pypy/interpreter/baseobjspace.py
M /pypy/branch/unicode-objspace/pypy/module/__builtin__/operation.py
M /pypy/branch/unicode-objspace/pypy/module/_sre/interp_sre.py
M /pypy/branch/unicode-objspace/pypy/module/unicodedata/interp_ucd.py
M /pypy/branch/unicode-objspace/pypy/objspace/cpy/objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/cpy/test/test_objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/dump.py
M /pypy/branch/unicode-objspace/pypy/objspace/fake/objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/logic.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/default.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/formatting.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/objspace.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/stringobject.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/test/test_unicodeobject.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodeobject.py
M /pypy/branch/unicode-objspace/pypy/objspace/std/unicodetype.py
M /pypy/branch/unicode-objspace/pypy/objspace/thunk.py
plotch. refactor the std object space to use rpython unicode objects to
represent unicode objects.
Modified: pypy/dist/pypy/annotation/binaryop.py
==============================================================================
--- pypy/dist/pypy/annotation/binaryop.py (original)
+++ pypy/dist/pypy/annotation/binaryop.py Sun Nov 11 13:38:13 2007
@@ -599,19 +599,29 @@
lst1.listdef.resize()
delitem.can_only_throw = [IndexError]
+def check_negative_slice(s_slice):
+ if isinstance(s_slice.start, SomeInteger) and not s_slice.start.nonneg:
+ raise TypeError("%s not proven to have negative start" % s_slice)
+ if isinstance(s_slice.stop, SomeInteger) and not s_slice.stop.nonneg and\
+ getattr(s_slice.stop, 'const', 0) != -1:
+ raise TypeError("%s not proven to have negative stop" % s_slice)
+
class __extend__(pairtype(SomeList, SomeSlice)):
def getitem((lst, slic)):
+ check_negative_slice(slic)
return lst.listdef.offspring()
getitem.can_only_throw = []
def setitem((lst, slic), s_iterable):
+ check_negative_slice(slic)
# we need the same unifying effect as the extend() method for
# the case lst1[x:y] = lst2.
lst.method_extend(s_iterable)
setitem.can_only_throw = []
def delitem((lst1, slic)):
+ check_negative_slice(slic)
lst1.listdef.resize()
delitem.can_only_throw = []
@@ -619,6 +629,7 @@
pairtype(SomeUnicodeString, SomeSlice)):
def getitem((str1, slic)):
+ check_negative_slice(slic)
return str1.basestringclass()
getitem.can_only_throw = []
Modified: pypy/dist/pypy/annotation/test/test_annrpython.py
==============================================================================
--- pypy/dist/pypy/annotation/test/test_annrpython.py (original)
+++ pypy/dist/pypy/annotation/test/test_annrpython.py Sun Nov 11 13:38:13 2007
@@ -2962,6 +2962,19 @@
s = a.build_types(f, [unicode, str])
assert isinstance(s, annmodel.SomeUnicodeCodePoint)
+ def test_negative_slice(self):
+ def f(s, e):
+ return [1, 2, 3][s:e]
+
+ a = self.RPythonAnnotator()
+ py.test.raises(TypeError, "a.build_types(f, [int, int])")
+ a.build_types(f, [annmodel.SomeInteger(nonneg=True),
+ annmodel.SomeInteger(nonneg=True)])
+ def f(x):
+ return x[:-1]
+
+ a.build_types(f, [str])
+
def g(n):
return [0,1,2,n]
Modified: pypy/dist/pypy/interpreter/baseobjspace.py
==============================================================================
--- pypy/dist/pypy/interpreter/baseobjspace.py (original)
+++ pypy/dist/pypy/interpreter/baseobjspace.py Sun Nov 11 13:38:13 2007
@@ -1036,8 +1036,6 @@
# is_true(w_x) -> True or False
# newtuple([w_1, w_2,...]) -> w_tuple
# newlist([w_1, w_2,...]) -> w_list
-# newstring([w_1, w_2,...]) -> w_string from ascii numbers (bytes)
-# newunicode([i1, i2,...]) -> w_unicode from integers
# newdict() -> empty w_dict
# newslice(w_start,w_stop,w_step) -> w_slice
# call_args(w_obj,Arguments()) -> w_result
@@ -1049,15 +1047,13 @@
'float_w',
'uint_w',
'bigint_w',
- 'unichars_w',
+ 'unicode_w',
'interpclass_w',
'unwrap',
'is_true',
'is_w',
'newtuple',
'newlist',
- 'newstring',
- 'newunicode',
'newdict',
'newslice',
'call_args',
Modified: pypy/dist/pypy/interpreter/test/test_objspace.py
==============================================================================
--- pypy/dist/pypy/interpreter/test/test_objspace.py (original)
+++ pypy/dist/pypy/interpreter/test/test_objspace.py Sun Nov 11 13:38:13 2007
@@ -9,23 +9,6 @@
# -- it's more to test that it's *there*
class TestObjSpace:
- def test_newstring(self):
- w = self.space.wrap
- s = 'abc'
- chars_w = [w(ord(c)) for c in s]
- assert self.space.eq_w(w(s), self.space.newstring(chars_w))
-
- def test_newstring_fail(self):
- w = self.space.wrap
- s = 'abc'
- not_chars_w = [w(c) for c in s]
- self.space.raises_w(self.space.w_TypeError,
- self.space.newstring,
- not_chars_w)
- self.space.raises_w(self.space.w_ValueError,
- self.space.newstring,
- [w(-1)])
-
def test_newlist(self):
w = self.space.wrap
l = range(10)
Modified: pypy/dist/pypy/interpreter/test/test_typedef.py
==============================================================================
--- pypy/dist/pypy/interpreter/test/test_typedef.py (original)
+++ pypy/dist/pypy/interpreter/test/test_typedef.py Sun Nov 11 13:38:13 2007
@@ -5,6 +5,7 @@
class AppTestTraceBackAttributes:
def test_newstring(self):
+ # XXX why is this called newstring?
import sys
def f():
raise TypeError, "hello"
Modified: pypy/dist/pypy/module/__builtin__/operation.py
==============================================================================
--- pypy/dist/pypy/module/__builtin__/operation.py (original)
+++ pypy/dist/pypy/module/__builtin__/operation.py Sun Nov 11 13:38:13 2007
@@ -14,8 +14,12 @@
def chr(space, w_ascii):
"Return a string of one character with the given ascii code."
- w_character = space.newstring([w_ascii])
- return w_character
+ try:
+ char = __builtin__.chr(space.int_w(w_ascii))
+ except ValueError: # chr(out-of-range)
+ raise OperationError(space.w_ValueError,
+ space.wrap("character code not in range(256)"))
+ return space.wrap(char)
def unichr(space, code):
"Return a Unicode string of one character with the given ordinal."
@@ -25,7 +29,7 @@
except ValueError:
raise OperationError(space.w_ValueError,
space.wrap("unichr() arg out of range"))
- return space.newunicode([c])
+ return space.wrap(c)
unichr.unwrap_spec = [ObjSpace, int]
def len(space, w_obj):
Modified: pypy/dist/pypy/module/_sre/interp_sre.py
==============================================================================
--- pypy/dist/pypy/module/_sre/interp_sre.py (original)
+++ pypy/dist/pypy/module/_sre/interp_sre.py Sun Nov 11 13:38:13 2007
@@ -150,11 +150,11 @@
rsre.insert_sre_methods(locals(), 'unicode')
def unwrap_object(self):
- self.unichars = self.space.unichars_w(self.w_string)
- return len(self.unichars)
+ self.unicode = self.space.unicode_w(self.w_string)
+ return len(self.unicode)
def get_char_ord(self, p):
- return ord(self.unichars[p])
+ return ord(self.unicode[p])
class W_GenericState(W_State):
Modified: pypy/dist/pypy/module/sys/__init__.py
==============================================================================
--- pypy/dist/pypy/module/sys/__init__.py (original)
+++ pypy/dist/pypy/module/sys/__init__.py Sun Nov 11 13:38:13 2007
@@ -8,6 +8,7 @@
super(Module, self).__init__(space, w_name)
self.checkinterval = 100
self.recursionlimit = 100
+ self.defaultencoding = "ascii"
interpleveldefs = {
'__name__' : '(space.wrap("sys"))',
@@ -64,6 +65,9 @@
'path_hooks' : 'space.wrap([])',
'path_importer_cache' : 'space.wrap({})',
#'subversion' : added in Python 2.5
+
+ 'getdefaultencoding' : 'interp_encoding.getdefaultencoding',
+ 'setdefaultencoding' : 'interp_encoding.setdefaultencoding',
}
appleveldefs = {
#'displayhook' : 'app.displayhook',
@@ -75,8 +79,6 @@
'pypy__exithandlers__' : 'app.pypy__exithandlers__', # internal
'getfilesystemencoding' : 'app.getfilesystemencoding',
'callstats' : 'app.callstats',
- 'getdefaultencoding' : 'app.getdefaultencoding',
- 'setdefaultencoding' : 'app.setdefaultencoding',
}
def setbuiltinmodule(self, w_module, name):
Modified: pypy/dist/pypy/module/sys/app.py
==============================================================================
--- pypy/dist/pypy/module/sys/app.py (original)
+++ pypy/dist/pypy/module/sys/app.py Sun Nov 11 13:38:13 2007
@@ -46,17 +46,3 @@
"""Not implemented."""
return None
-defaultencoding = 'ascii'
-
-def getdefaultencoding():
- """Return the current default string encoding used by the Unicode
-implementation."""
- return defaultencoding
-
-def setdefaultencoding(encoding):
- """Set the current default string encoding used by the Unicode
-implementation."""
- global defaultencoding
- import codecs
- codecs.lookup(encoding)
- defaultencoding = encoding
Added: pypy/dist/pypy/module/sys/interp_encoding.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/module/sys/interp_encoding.py Sun Nov 11 13:38:13 2007
@@ -0,0 +1,14 @@
+def getdefaultencoding(space):
+ """Return the current default string encoding used by the Unicode
+implementation."""
+ return space.wrap(space.sys.defaultencoding)
+
+def setdefaultencoding(space, w_encoding):
+ """Set the current default string encoding used by the Unicode
+implementation."""
+ encoding = space.str_w(w_encoding)
+ mod = space.getbuiltinmodule("_codecs")
+ w_lookup = space.getattr(mod, space.wrap("lookup"))
+ # check whether the encoding is there
+ space.call_function(w_lookup, w_encoding)
+ space.sys.defaultencoding = encoding
Modified: pypy/dist/pypy/module/sys/test/test_sysmodule.py
==============================================================================
--- pypy/dist/pypy/module/sys/test/test_sysmodule.py (original)
+++ pypy/dist/pypy/module/sys/test/test_sysmodule.py Sun Nov 11 13:38:13 2007
@@ -246,6 +246,16 @@
raises(TypeError, sys.getdefaultencoding, 42)
# can't check more than the type, as the user might have changed it
assert isinstance(sys.getdefaultencoding(), str)
+
+ def test_getdefaultencoding(self):
+ encoding = sys.getdefaultencoding()
+ sys.setdefaultencoding("ascii")
+ try:
+ assert sys.getdefaultencoding() == 'ascii'
+ raises(UnicodeDecodeError, unicode, '\x80')
+ finally:
+ sys.setdefaultencoding(encoding)
+
# testing sys.settrace() is done in test_trace.py
# testing sys.setprofile() is done in test_profile.py
Modified: pypy/dist/pypy/module/unicodedata/interp_ucd.py
==============================================================================
--- pypy/dist/pypy/module/unicodedata/interp_ucd.py (original)
+++ pypy/dist/pypy/module/unicodedata/interp_ucd.py Sun Nov 11 13:38:13 2007
@@ -215,10 +215,10 @@
result[0] = ch
if not composed: # If decomposed normalization we are done
- return space.newunicode([unichr(i) for i in result[:j]])
+ return space.wrap(u''.join([unichr(i) for i in result[:j]]))
if j <= 1:
- return space.newunicode([unichr(i) for i in result[:j]])
+ return space.wrap(u''.join([unichr(i) for i in result[:j]]))
current = result[0]
starter_pos = 0
@@ -268,7 +268,7 @@
result[starter_pos] = current
- return space.newunicode([unichr(i) for i in result[:next_insert]])
+ return space.wrap(u''.join([unichr(i) for i in result[:next_insert]]))
normalize.unwrap_spec = ['self', ObjSpace, str, W_Root]
Modified: pypy/dist/pypy/objspace/cpy/objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/cpy/objspace.py (original)
+++ pypy/dist/pypy/objspace/cpy/objspace.py Sun Nov 11 13:38:13 2007
@@ -71,6 +71,9 @@
return PyInt_FromLong(x)
if isinstance(x, str):
return PyString_FromStringAndSize(x, len(x))
+ if isinstance(x, str):
+ # XXX fix me
+ raise NotImplementedError
if isinstance(x, float):
return PyFloat_FromDouble(x)
if isinstance(x, r_uint):
@@ -206,7 +209,7 @@
buf[i] = p[i]
return buf.raw
- def unichars_w(self, w_obj):
+ def unicode_w(self, w_obj):
not_implemented_sorry
def call_function(self, w_callable, *args_w):
@@ -227,20 +230,6 @@
PyString_InternInPlace(byref(w_s))
return w_s
- def newstring(self, bytes_w):
- length = len(bytes_w)
- buf = ctypes.create_string_buffer(length)
- for i in range(length):
- buf[i] = chr(self.int_w(bytes_w[i]))
- return PyString_FromStringAndSize(buf, length)
-
- def newunicode(self, codes):
- # XXX inefficient
- lst = [PyUnicode_FromOrdinal(ord(code)) for code in codes]
- w_lst = self.newlist(lst)
- w_emptyunicode = PyUnicode_FromUnicode(None, 0)
- return self.call_method(w_emptyunicode, 'join', w_lst)
-
def newint(self, intval):
return PyInt_FromLong(intval)
Modified: pypy/dist/pypy/objspace/cpy/test/test_objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/cpy/test/test_objspace.py (original)
+++ pypy/dist/pypy/objspace/cpy/test/test_objspace.py Sun Nov 11 13:38:13 2007
@@ -49,28 +49,30 @@
w2 = space.wrap(11)
raises_w(space, space.w_TypeError, space.sub, w1, w2)
-def test_newstring():
+def test_wrapstring():
space = CPyObjSpace()
- w = space.newstring([space.wrap(65), space.wrap(66)])
+ w = space.wrap('AB')
assert space.str_w(w) == 'AB'
-def test_newunicode():
+def test_wrapunicode():
+ py.test.skip("fix me")
space = CPyObjSpace()
- w = space.newunicode([unichr(65), unichr(66)])
+ w = space.wrap(unichr(65) + unichr(66))
assert space.is_w(space.type(w), space.w_unicode)
for i in range(2):
code = space.int_w(space.ord(space.getitem(w, space.wrap(i))))
assert code == 65+i
def test_ord():
+ py.test.skip("fix me")
space = CPyObjSpace()
w = space.wrap('A')
assert space.int_w(space.ord(w)) == 65
w = space.wrap('\x00')
assert space.int_w(space.ord(w)) == 0
- w = space.newunicode([unichr(65)])
+ w = space.wrap(unichr(65))
assert space.int_w(space.ord(w)) == 65
- w = space.newunicode([unichr(0)])
+ w = space.wrap(unichr(0))
assert space.int_w(space.ord(w)) == 0
def test_id():
Modified: pypy/dist/pypy/objspace/dump.py
==============================================================================
--- pypy/dist/pypy/objspace/dump.py (original)
+++ pypy/dist/pypy/objspace/dump.py Sun Nov 11 13:38:13 2007
@@ -145,7 +145,7 @@
'int_w': 1,
'float_w': 1,
'uint_w': 1,
- 'unichars_w': 1,
+ 'unicode_w': 1,
'bigint_w': 1,
'interpclass_w': 1,
'unwrap': 1,
@@ -153,8 +153,6 @@
'is_w': 2,
'newtuple': 0,
'newlist': 0,
- 'newstring': 0,
- 'newunicode': 0,
'newdict': 0,
'newslice': 0,
'call_args': 1,
@@ -165,8 +163,6 @@
'wrap': True,
'newtuple': True,
'newlist': True,
- 'newstring': True,
- 'newunicode': True,
'newdict': True,
'newslice': True,
'call_args': True,
Modified: pypy/dist/pypy/objspace/fake/objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/fake/objspace.py (original)
+++ pypy/dist/pypy/objspace/fake/objspace.py Sun Nov 11 13:38:13 2007
@@ -23,7 +23,7 @@
uint_dummy = make_dummy(r_uint(42), r_uint(43))
str_dummy = make_dummy('foo', 'bar')
bool_dummy = make_dummy(True, False)
-unichars_dummy = make_dummy([u'a', u'b'], [u'c', u'd'])
+unicode_dummy = make_dummy(u'abc', u'cde')
bigint_dummy = make_dummy(rbigint([0]), rbigint([1]))
class FakeObjSpace(ObjSpace):
@@ -75,7 +75,7 @@
int_w = int_dummy
uint_w = uint_dummy
float_w = float_dummy
- unichars_w = unichars_dummy
+ unicode_w = unicode_dummy
bigint_w = bigint_dummy
iter = make_dummy()
type = make_dummy()
@@ -88,8 +88,6 @@
str_w = str_dummy
call_args = make_dummy()
new_interned_str = make_dummy()
- newstring = make_dummy()
- newunicode = make_dummy()
newint = make_dummy()
newlong = make_dummy()
newfloat = make_dummy()
Modified: pypy/dist/pypy/objspace/logic.py
==============================================================================
--- pypy/dist/pypy/objspace/logic.py (original)
+++ pypy/dist/pypy/objspace/logic.py Sun Nov 11 13:38:13 2007
@@ -49,7 +49,7 @@
'int_w': 1,
'float_w': 1,
'uint_w': 1,
- 'unichars_w': 1,
+ 'unicode_w': 1,
'bigint_w': 1,
'interpclass_w': 1,
'unwrap': 1,
@@ -57,8 +57,6 @@
'is_w': 2,
'newtuple': 0,
'newlist': 0,
- 'newstring': 0,
- 'newunicode': 0,
'newdict': 0,
'newslice': 0,
'call_args': 1,
Modified: pypy/dist/pypy/objspace/std/default.py
==============================================================================
--- pypy/dist/pypy/objspace/std/default.py (original)
+++ pypy/dist/pypy/objspace/std/default.py Sun Nov 11 13:38:13 2007
@@ -35,9 +35,9 @@
raise OperationError(space.w_TypeError,
typed_unwrap_error_msg(space, "integer", w_obj))
-def unichars_w__ANY(space,w_obj):
+def unicode_w__ANY(space,w_obj):
raise OperationError(space.w_TypeError,
- typed_unwrap_error_msg(space, "string", w_obj))
+ typed_unwrap_error_msg(space, "unicode", w_obj))
def bigint_w__ANY(space,w_obj):
raise OperationError(space.w_TypeError,
Modified: pypy/dist/pypy/objspace/std/formatting.py
==============================================================================
--- pypy/dist/pypy/objspace/std/formatting.py (original)
+++ pypy/dist/pypy/objspace/std/formatting.py Sun Nov 11 13:38:13 2007
@@ -132,11 +132,16 @@
# to build two subclasses of the BaseStringFormatter class,
# each one getting its own subtle differences and RPython types.
+ if do_unicode:
+ const = unicode
+ else:
+ const = str
+
class StringFormatter(BaseStringFormatter):
def __init__(self, space, fmt, values_w, w_valuedict):
BaseStringFormatter.__init__(self, space, values_w, w_valuedict)
- self.fmt = fmt # either a string or a list of unichars
+ self.fmt = fmt # either a string or a unicode
def peekchr(self):
# return the 'current' character
@@ -176,10 +181,7 @@
if self.w_valuedict is None:
raise OperationError(space.w_TypeError,
space.wrap("format requires a mapping"))
- if do_unicode:
- w_key = space.newunicode(key)
- else:
- w_key = space.wrap(key)
+ w_key = space.wrap(key)
return space.getitem(self.w_valuedict, w_key)
def parse_fmt(self):
@@ -265,9 +267,9 @@
break
i += 1
else:
- result += fmt[i0:]
+ result.append(const(fmt[i0:]))
break # end of 'fmt' string
- result += fmt[i0:i]
+ result.append(const(fmt[i0:i]))
self.fmtpos = i + 1
# interpret the next formatter
@@ -301,7 +303,7 @@
if do_unicode:
w_defaultencoding = space.call_function(
space.sys.get('getdefaultencoding'))
- w_s = space.call_method(space.newunicode([c]),
+ w_s = space.call_method(space.wrap(c),
"encode",
w_defaultencoding,
space.wrap('replace'))
@@ -320,10 +322,12 @@
result = self.result
padding = self.width - length
if not self.f_ljust:
- result += ' ' * padding # add any padding at the left of 'r'
+ result.append(const(' ' * padding))
+ # add any padding at the left of 'r'
padding = 0
- result += r[:length] # add 'r' itself
- result += ' ' * padding # add any remaining padding at the right
+ result.append(const(r[:length])) # add 'r' itself
+ result.append(const(' ' * padding))
+ # add any remaining padding at the right
std_wp._annspecialcase_ = 'specialize:argtype(1)'
def std_wp_number(self, r, prefix=''):
@@ -350,15 +354,15 @@
padnumber = '>'
if padnumber == '>':
- result += ' ' * padding # pad with spaces on the left
+ result.append(const(' ' * padding)) # pad with spaces on the left
if sign:
- result.append(r[0]) # the sign
- result += prefix # the prefix
+ result.append(const(r[0])) # the sign
+ result.append(const(prefix)) # the prefix
if padnumber == '0':
- result += '0' * padding # pad with zeroes
- result += r[int(sign):] # the rest of the number
+ result.append(const('0' * padding)) # pad with zeroes
+ result.append(const(r[int(sign):])) # the rest of the number
if padnumber == '<': # spaces on the right
- result += ' ' * padding
+ result.append(const(' ' * padding))
def fmt_s(self, w_value):
space = self.space
@@ -371,7 +375,7 @@
else:
if not got_unicode:
w_value = space.call_function(space.w_unicode, w_value)
- s = space.unichars_w(w_value)
+ s = space.unicode_w(w_value)
self.std_wp(s)
def fmt_r(self, w_value):
@@ -389,11 +393,11 @@
elif space.is_true(space.isinstance(w_value, space.w_unicode)):
if not do_unicode:
raise NeedUnicodeFormattingError
- lst = space.unichars_w(w_value)
- if len(lst) != 1:
+ ustr = space.unicode_w(w_value)
+ if len(ustr) != 1:
raise OperationError(space.w_TypeError,
space.wrap("%c requires int or unichar"))
- self.std_wp(lst)
+ self.std_wp(ustr)
else:
n = space.int_w(w_value)
if do_unicode:
@@ -402,7 +406,7 @@
except ValueError:
raise OperationError(space.w_OverflowError,
space.wrap("unicode character code out of range"))
- self.std_wp([c])
+ self.std_wp(c)
else:
try:
s = chr(n)
@@ -438,14 +442,14 @@
result = formatter.format()
except NeedUnicodeFormattingError:
# fall through to the unicode case
- fmt = [c for c in fmt] # string => list of unichars
+ fmt = unicode(fmt)
else:
return space.wrap(''.join(result))
else:
- fmt = space.unichars_w(w_fmt)
+ fmt = space.unicode_w(w_fmt)
formatter = UnicodeFormatter(space, fmt, values_w, w_valuedict)
result = formatter.format()
- return space.newunicode(result)
+ return space.wrap(u''.join(result))
def mod_format(space, w_format, w_values, do_unicode=False):
if space.is_true(space.isinstance(w_values, space.w_tuple)):
Modified: pypy/dist/pypy/objspace/std/objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/std/objspace.py (original)
+++ pypy/dist/pypy/objspace/std/objspace.py Sun Nov 11 13:38:13 2007
@@ -400,7 +400,7 @@
from pypy.objspace.std.stringtype import wrapstr
return wrapstr(self, x)
if isinstance(x, unicode):
- return W_UnicodeObject([unichr(ord(u)) for u in x]) # xxx
+ return W_UnicodeObject(x)
if isinstance(x, float):
return W_FloatObject(x)
if isinstance(x, Wrappable):
@@ -525,17 +525,6 @@
def newslice(self, w_start, w_end, w_step):
return W_SliceObject(w_start, w_end, w_step)
- def newstring(self, chars_w):
- try:
- chars = [chr(self.int_w(w_c)) for w_c in chars_w]
- except ValueError: # chr(out-of-range)
- raise OperationError(self.w_ValueError,
- self.wrap("character code not in range(256)"))
- return self.wrap(''.join(chars))
-
- def newunicode(self, chars):
- return W_UnicodeObject(chars)
-
def newseqiter(self, w_obj):
return W_SeqIterObject(w_obj)
@@ -653,7 +642,7 @@
str_w = StdObjSpaceMultiMethod('str_w', 1, []) # returns an unwrapped string
float_w = StdObjSpaceMultiMethod('float_w', 1, []) # returns an unwrapped float
uint_w = StdObjSpaceMultiMethod('uint_w', 1, []) # returns an unwrapped unsigned int (r_uint)
- unichars_w = StdObjSpaceMultiMethod('unichars_w', 1, []) # returns an unwrapped list of unicode characters
+ unicode_w = StdObjSpaceMultiMethod('unicode_w', 1, []) # returns an unwrapped list of unicode characters
bigint_w = StdObjSpaceMultiMethod('bigint_w', 1, []) # returns an unwrapped rbigint
# NOTE: when adding more sometype_w() methods, you need to write a
# stub in default.py to raise a space.w_TypeError
Modified: pypy/dist/pypy/objspace/std/stringobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/stringobject.py (original)
+++ pypy/dist/pypy/objspace/std/stringobject.py Sun Nov 11 13:38:13 2007
@@ -36,6 +36,21 @@
W_StringObject.PREBUILT = [W_StringObject(chr(i)) for i in range(256)]
del i
+def _decode_ascii(space, s):
+ try:
+ return s.decode("ascii")
+ except UnicodeDecodeError:
+ for i in range(len(s)):
+ if ord(s[i]) > 127:
+ raise OperationError(
+ space.w_UnicodeDecodeError,
+ space.wrap(("'ascii' codec can't decode byte %s in position %s:"
+ " ordinal not in range(128)") % (hex(ord(s[i])), i)))
+
+def unicode_w__String(space, w_self):
+ # XXX should this use the default encoding?
+ return _decode_ascii(space, w_self._value)
+
def _is_generic(space, w_self, fun):
v = w_self._value
Modified: pypy/dist/pypy/objspace/std/test/test_index.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_index.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_index.py Sun Nov 11 13:38:13 2007
@@ -223,15 +223,50 @@
SeqTestCase.setup_method(self, method)
self.w_seq = self.space.newtuple([self.space.wrap(x) for x in (0,10,20,30,40,50)])
-class AppTest_StringTestCase(SeqTestCase):
+class StringTestCase(object):
+ def test_startswith(self):
+ self.o.ind = 1
+ assert self.const('abc').startswith(self.const('b'), self.o)
+ self.o.ind = 2
+ assert not self.const('abc').startswith(self.const('abc'), 0, self.o)
+
+ def test_endswith(self):
+ self.o.ind = 1
+ assert self.const('abc').endswith(self.const('a'), 0, self.o)
+ self.o.ind = 2
+ assert not self.const('abc').endswith(self.const('abc'), 0, self.o)
+
+ def test_index(self):
+ self.o.ind = 3
+ assert self.const('abcabc').index(self.const('abc'), 0, self.o) == 0
+ assert self.const('abcabc').index(self.const('abc'), self.o) == 3
+ assert self.const('abcabc').rindex(self.const('abc'), 0, self.o) == 0
+ assert self.const('abcabc').rindex(self.const('abc'), self.o) == 3
+
+ def test_find(self):
+ self.o.ind = 3
+ assert self.const('abcabc').find(self.const('abc'), 0, self.o) == 0
+ assert self.const('abcabc').find(self.const('abc'), self.o) == 3
+ assert self.const('abcabc').rfind(self.const('abc'), 0, self.o) == 0
+ assert self.const('abcabc').rfind(self.const('abc'), self.o) == 3
+
+ def test_count(self):
+ self.o.ind = 3
+ assert self.const('abcabc').count(self.const('abc'), 0, self.o) == 1
+ assert self.const('abcabc').count(self.const('abc'), self.o) == 1
+
+
+class AppTest_StringTestCase(SeqTestCase, StringTestCase):
def setup_method(self, method):
SeqTestCase.setup_method(self, method)
self.w_seq = self.space.wrap("this is a test")
+ self.w_const = self.space.w_str
-class AppTest_UnicodeTestCase(SeqTestCase):
+class AppTest_UnicodeTestCase(SeqTestCase, StringTestCase):
def setup_method(self, method):
SeqTestCase.setup_method(self, method)
self.w_seq = self.space.wrap(u"this is a test")
+ self.w_const = self.space.w_unicode
class AppTest_XRangeTestCase:
Modified: pypy/dist/pypy/objspace/std/test/test_unicodeobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_unicodeobject.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_unicodeobject.py Sun Nov 11 13:38:13 2007
@@ -21,9 +21,6 @@
check(u'a' + 'b', u'ab')
check('a' + u'b', u'ab')
- def test_hash(self):
- assert hash(u'') == 0
-
def test_join(self):
def check(a, b):
assert a == b
@@ -278,3 +275,105 @@
else:
raise Exception("DID NOT RAISE")
+ def test_startswith(self):
+ assert u'ab'.startswith(u'ab') is True
+ assert u'ab'.startswith(u'a') is True
+ assert u'ab'.startswith(u'') is True
+ assert u'x'.startswith(u'a') is False
+ assert u'x'.startswith(u'x') is True
+ assert u''.startswith(u'') is True
+ assert u''.startswith(u'a') is False
+ assert u'x'.startswith(u'xx') is False
+ assert u'y'.startswith(u'xx') is False
+
+ def test_startswith_more(self):
+ assert u'ab'.startswith(u'a', 0) is True
+ assert u'ab'.startswith(u'a', 1) is False
+ assert u'ab'.startswith(u'b', 1) is True
+ assert u'abc'.startswith(u'bc', 1, 2) is False
+ assert u'abc'.startswith(u'c', -1, 4) is True
+
+ def test_startswith_tuples(self):
+ assert u'hello'.startswith((u'he', u'ha'))
+ assert not u'hello'.startswith((u'lo', u'llo'))
+ assert u'hello'.startswith((u'hellox', u'hello'))
+ assert not u'hello'.startswith(())
+ assert u'helloworld'.startswith((u'hellowo', u'rld', u'lowo'), 3)
+ assert not u'helloworld'.startswith((u'hellowo', u'ello', u'rld'), 3)
+ assert u'hello'.startswith((u'lo', u'he'), 0, -1)
+ assert not u'hello'.startswith((u'he', u'hel'), 0, 1)
+ assert u'hello'.startswith((u'he', u'hel'), 0, 2)
+ raises(TypeError, u'hello'.startswith, (42,))
+
+ def test_endswith(self):
+ assert u'ab'.endswith(u'ab') is True
+ assert u'ab'.endswith(u'b') is True
+ assert u'ab'.endswith(u'') is True
+ assert u'x'.endswith(u'a') is False
+ assert u'x'.endswith(u'x') is True
+ assert u''.endswith(u'') is True
+ assert u''.endswith(u'a') is False
+ assert u'x'.endswith(u'xx') is False
+ assert u'y'.endswith(u'xx') is False
+
+ def test_endswith_more(self):
+ assert u'abc'.endswith(u'ab', 0, 2) is True
+ assert u'abc'.endswith(u'bc', 1) is True
+ assert u'abc'.endswith(u'bc', 2) is False
+ assert u'abc'.endswith(u'b', -3, -1) is True
+
+ def test_endswith_tuple(self):
+ assert not u'hello'.endswith((u'he', u'ha'))
+ assert u'hello'.endswith((u'lo', u'llo'))
+ assert u'hello'.endswith((u'hellox', u'hello'))
+ assert not u'hello'.endswith(())
+ assert u'helloworld'.endswith((u'hellowo', u'rld', u'lowo'), 3)
+ assert not u'helloworld'.endswith((u'hellowo', u'ello', u'rld'), 3, -1)
+ assert u'hello'.endswith((u'hell', u'ell'), 0, -1)
+ assert not u'hello'.endswith((u'he', u'hel'), 0, 1)
+ assert u'hello'.endswith((u'he', u'hell'), 0, 4)
+ raises(TypeError, u'hello'.endswith, (42,))
+
+ def test_expandtabs(self):
+ assert u'abc\rab\tdef\ng\thi'.expandtabs() == u'abc\rab def\ng hi'
+ assert u'abc\rab\tdef\ng\thi'.expandtabs(8) == u'abc\rab def\ng hi'
+ assert u'abc\rab\tdef\ng\thi'.expandtabs(4) == u'abc\rab def\ng hi'
+ assert u'abc\r\nab\tdef\ng\thi'.expandtabs(4) == u'abc\r\nab def\ng hi'
+ assert u'abc\rab\tdef\ng\thi'.expandtabs() == u'abc\rab def\ng hi'
+ assert u'abc\rab\tdef\ng\thi'.expandtabs(8) == u'abc\rab def\ng hi'
+ assert u'abc\r\nab\r\ndef\ng\r\nhi'.expandtabs(4) == u'abc\r\nab\r\ndef\ng\r\nhi'
+
+ s = u'xy\t'
+ assert s.expandtabs() =='xy '
+
+ s = u'\txy\t'
+ assert s.expandtabs() ==' xy '
+ assert s.expandtabs(1) ==' xy '
+ assert s.expandtabs(2) ==' xy '
+ assert s.expandtabs(3) ==' xy '
+
+ assert u'xy'.expandtabs() =='xy'
+ assert u''.expandtabs() ==''
+
+ def test_translate(self):
+ assert u'bbbc' == u'abababc'.translate({ord('a'):None})
+ assert u'iiic' == u'abababc'.translate({ord('a'):None, ord('b'):ord('i')})
+ assert u'iiix' == u'abababc'.translate({ord('a'):None, ord('b'):ord('i'), ord('c'):u'x'})
+ assert u'<i><i><i>c' == u'abababc'.translate({ord('a'):None, ord('b'):u'<i>'})
+ assert u'c' == u'abababc'.translate({ord('a'):None, ord('b'):u''})
+ assert u'xyyx' == u'xzx'.translate({ord('z'):u'yy'})
+
+ raises(TypeError, u'hello'.translate)
+ raises(TypeError, u'abababc'.translate, {ord('a'):''})
+
+ def test_unicode_form_encoded_object(self):
+ assert unicode('x', 'utf-8') == u'x'
+ assert unicode('x', 'utf-8', 'strict') == u'x'
+
+ def test_unicode_startswith_tuple(self):
+ assert u'xxx'.startswith(('x', 'y', 'z'), 0)
+ assert u'xxx'.endswith(('x', 'y', 'z'), 0)
+
+ def test_missing_cases(self):
+ # some random cases, which are discovered to not be tested during annotation
+ assert u'xxx'[1:1] == u''
Modified: pypy/dist/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/unicodeobject.py (original)
+++ pypy/dist/pypy/objspace/std/unicodeobject.py Sun Nov 11 13:38:13 2007
@@ -4,26 +4,29 @@
from pypy.objspace.std.ropeobject import W_RopeObject
from pypy.objspace.std.noneobject import W_NoneObject
from pypy.objspace.std.sliceobject import W_SliceObject
+from pypy.objspace.std import slicetype
from pypy.objspace.std.tupleobject import W_TupleObject
from pypy.rlib.rarithmetic import intmask, ovfcheck
from pypy.module.unicodedata import unicodedb_3_2_0 as unicodedb
+from pypy.tool.sourcetools import func_with_new_name
from pypy.objspace.std.formatting import mod_format
class W_UnicodeObject(W_Object):
from pypy.objspace.std.unicodetype import unicode_typedef as typedef
- def __init__(w_self, unicodechars):
- w_self._value = unicodechars
- w_self.w_hash = None
+ def __init__(w_self, unistr):
+ assert isinstance(unistr, unicode)
+ w_self._value = unistr
+
def __repr__(w_self):
""" representation for debugging purposes """
return "%s(%r)" % (w_self.__class__.__name__, w_self._value)
def unwrap(w_self, space):
- # For faked functions taking unicodearguments.
- # Remove when we no longer need faking.
- return u''.join(w_self._value)
+ # for testing
+ return w_self._value
+W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
registerimplementation(W_UnicodeObject)
@@ -63,7 +66,7 @@
def str_w__Unicode(space, w_uni):
return space.str_w(space.str(w_uni))
-def unichars_w__Unicode(space, w_uni):
+def unicode_w__Unicode(space, w_uni):
return w_uni._value
def str__Unicode(space, w_uni):
@@ -75,11 +78,7 @@
def lt__Unicode_Unicode(space, w_left, w_right):
left = w_left._value
right = w_right._value
- for i in range(min(len(left), len(right))):
- if left[i] != right[i]:
- return space.newbool(ord(left[i]) < ord(right[i]))
- # NB. 'unichar < unichar' is not RPython at the moment
- return space.newbool(len(left) < len(right))
+ return space.newbool(left < right)
def ord__Unicode(space, w_uni):
if len(w_uni._value) != 1:
@@ -107,108 +106,59 @@
contains__Rope_Unicode = contains__String_Unicode
-def _find(self, sub, start, end):
- if len(sub) == 0:
- return start
- if start >= end:
- return -1
- for i in range(start, end - len(sub) + 1):
- for j in range(len(sub)):
- if self[i + j] != sub[j]:
- break
- else:
- return i
- return -1
-
-def _rfind(self, sub, start, end):
- if len(sub) == 0:
- return end
- if end - start < len(sub):
- return -1
- for i in range(end - len(sub), start - 1, -1):
- for j in range(len(sub)):
- if self[i + j] != sub[j]:
- break
- else:
- return i
- return -1
-
def contains__Unicode_Unicode(space, w_container, w_item):
item = w_item._value
container = w_container._value
- return space.newbool(_find(container, item, 0, len(container)) >= 0)
+ return space.newbool(container.find(item) != -1)
def unicode_join__Unicode_ANY(space, w_self, w_list):
- list = space.unpackiterable(w_list)
+ l = space.unpackiterable(w_list)
delim = w_self._value
totlen = 0
- if len(list) == 0:
- return W_UnicodeObject([])
- if (len(list) == 1 and
- space.is_w(space.type(list[0]), space.w_unicode)):
- return list[0]
+ if len(l) == 0:
+ return W_UnicodeObject.EMPTY
+ if (len(l) == 1 and
+ space.is_w(space.type(l[0]), space.w_unicode)):
+ return l[0]
- values_list = [None] * len(list)
- values_list[0] = [u'\0']
- for i in range(len(list)):
- item = list[i]
- if space.is_true(space.isinstance(item, space.w_unicode)):
- pass
+ values_list = []
+ for i in range(len(l)):
+ item = l[i]
+ if isinstance(item, W_UnicodeObject):
+ # shortcut for performane
+ item = item._value
elif space.is_true(space.isinstance(item, space.w_str)):
- item = space.call_function(space.w_unicode, item)
+ item = space.unicode_w(item)
else:
w_msg = space.mod(space.wrap('sequence item %d: expected string or Unicode'),
space.wrap(i))
raise OperationError(space.w_TypeError, w_msg)
- assert isinstance(item, W_UnicodeObject)
- item = item._value
- totlen += len(item)
- values_list[i] = item
- totlen += len(delim) * (len(values_list) - 1)
- if len(values_list) == 1:
- return W_UnicodeObject(values_list[0])
- # Allocate result
- result = [u'\0'] * totlen
- first = values_list[0]
- for i in range(len(first)):
- result[i] = first[i]
- offset = len(first)
- for i in range(1, len(values_list)):
- item = values_list[i]
- # Add delimiter
- for j in range(len(delim)):
- result[offset + j] = delim[j]
- offset += len(delim)
- # Add item from values_list
- for j in range(len(item)):
- result[offset + j] = item[j]
- offset += len(item)
- return W_UnicodeObject(result)
-
+ values_list.append(item)
+ return W_UnicodeObject(w_self._value.join(values_list))
def hash__Unicode(space, w_uni):
- if w_uni.w_hash is None:
- # hrmpf
- chars = w_uni._value
- if len(chars) == 0:
+ s = w_uni._value
+ if space.config.objspace.std.withrope:
+ # be compatible with the special ropes hash
+ # XXX no caching
+ if len(s) == 0:
return space.wrap(0)
- if space.config.objspace.std.withrope:
- x = 0
- for c in chars:
- x = intmask((1000003 * x) + ord(c))
- x <<= 1
- x ^= len(chars)
- x ^= ord(chars[0])
- h = intmask(x)
- else:
- x = ord(chars[0]) << 7
- for c in chars:
- x = intmask((1000003 * x) ^ ord(c))
- h = intmask(x ^ len(chars))
- if h == -1:
- h = -2
- w_uni.w_hash = space.wrap(h)
- return w_uni.w_hash
+ x = 0
+ for c in s:
+ x = intmask((1000003 * x) + ord(c))
+ x <<= 1
+ x ^= len(s)
+ x ^= ord(s[0])
+ h = intmask(x)
+ return space.wrap(h)
+ if we_are_translated():
+ x = hash(s) # to use the hash cache in rpython strings
+ else:
+ from pypy.rlib.rarithmetic import _hash_string
+ x = _hash_string(s) # to make sure we get the same hash as rpython
+ # (otherwise translation will freeze W_DictObjects where we can't find
+ # the keys any more!)
+ return space.wrap(x)
def len__Unicode(space, w_uni):
return space.wrap(len(w_uni._value))
@@ -223,19 +173,19 @@
exc = space.call_function(space.w_IndexError,
space.wrap("unicode index out of range"))
raise OperationError(space.w_IndexError, exc)
- return W_UnicodeObject([uni[ival]])
+ return W_UnicodeObject(uni[ival])
def getitem__Unicode_Slice(space, w_uni, w_slice):
uni = w_uni._value
length = len(uni)
start, stop, step, sl = w_slice.indices4(space, length)
if sl == 0:
- r = []
+ r = u""
elif step == 1:
assert start >= 0 and stop >= 0
r = uni[start:stop]
else:
- r = [uni[start + i*step] for i in range(sl)]
+ r = u"".join([uni[start + i*step] for i in range(sl)])
return W_UnicodeObject(r)
def mul__Unicode_ANY(space, w_uni, w_times):
@@ -245,18 +195,13 @@
if e.match(space, space.w_TypeError):
raise FailedToImplement
raise
- chars = w_uni._value
- charlen = len(chars)
- if times <= 0 or charlen == 0:
- return W_UnicodeObject([])
- if times == 1:
- return space.call_function(space.w_unicode, w_uni)
- if charlen == 1:
- return W_UnicodeObject([w_uni._value[0]] * times)
-
+ uni = w_uni._value
+ length = len(uni)
+ if times <= 0 or length == 0:
+ return W_UnicodeObject.EMPTY
try:
- result_size = ovfcheck(charlen * times)
- result = chars * times
+ result_size = ovfcheck(length * times)
+ result = u''.join([uni] * times)
except (OverflowError, MemoryError):
raise OperationError(space.w_OverflowError, space.wrap('repeated string is too long'))
return W_UnicodeObject(result)
@@ -267,53 +212,23 @@
def _isspace(uchar):
return unicodedb.isspace(ord(uchar))
-def unicode_isspace__Unicode(space, w_unicode):
- if len(w_unicode._value) == 0:
- return space.w_False
- for uchar in w_unicode._value:
- if not unicodedb.isspace(ord(uchar)):
- return space.w_False
- return space.w_True
-
-def unicode_isalpha__Unicode(space, w_unicode):
- if len(w_unicode._value) == 0:
- return space.w_False
- for uchar in w_unicode._value:
- if not unicodedb.isalpha(ord(uchar)):
- return space.w_False
- return space.w_True
-
-def unicode_isalnum__Unicode(space, w_unicode):
- if len(w_unicode._value) == 0:
- return space.w_False
- for uchar in w_unicode._value:
- if not unicodedb.isalnum(ord(uchar)):
- return space.w_False
- return space.w_True
-
-def unicode_isdecimal__Unicode(space, w_unicode):
- if len(w_unicode._value) == 0:
- return space.w_False
- for uchar in w_unicode._value:
- if not unicodedb.isdecimal(ord(uchar)):
+def make_generic(funcname):
+ def func(space, w_self):
+ v = w_self._value
+ if len(v) == 0:
return space.w_False
- return space.w_True
+ for idx in range(len(v)):
+ if not getattr(unicodedb, funcname)(ord(v[idx])):
+ return space.w_False
+ return space.w_True
+ return func_with_new_name(func, "unicode_%s__Unicode" % (funcname, ))
-def unicode_isdigit__Unicode(space, w_unicode):
- if len(w_unicode._value) == 0:
- return space.w_False
- for uchar in w_unicode._value:
- if not unicodedb.isdigit(ord(uchar)):
- return space.w_False
- return space.w_True
-
-def unicode_isnumeric__Unicode(space, w_unicode):
- if len(w_unicode._value) == 0:
- return space.w_False
- for uchar in w_unicode._value:
- if not unicodedb.isnumeric(ord(uchar)):
- return space.w_False
- return space.w_True
+unicode_isspace__Unicode = make_generic("isspace")
+unicode_isalpha__Unicode = make_generic("isalpha")
+unicode_isalnum__Unicode = make_generic("isalnum")
+unicode_isdecimal__Unicode = make_generic("isdecimal")
+unicode_isdigit__Unicode = make_generic("isdigit")
+unicode_isnumeric__Unicode = make_generic("isnumeric")
def unicode_islower__Unicode(space, w_unicode):
cased = False
@@ -423,12 +338,12 @@
def unicode_capitalize__Unicode(space, w_self):
input = w_self._value
if len(input) == 0:
- return W_UnicodeObject([])
+ return W_UnicodeObject.EMPTY
result = [u'\0'] * len(input)
result[0] = unichr(unicodedb.toupper(ord(input[0])))
for i in range(1, len(input)):
result[i] = unichr(unicodedb.tolower(ord(input[i])))
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_title__Unicode(space, w_self):
input = w_self._value
@@ -436,7 +351,7 @@
return w_self
result = [u'\0'] * len(input)
- previous_is_cased = 0
+ previous_is_cased = False
for i in range(len(input)):
unichar = ord(input[i])
if previous_is_cased:
@@ -444,21 +359,21 @@
else:
result[i] = unichr(unicodedb.totitle(unichar))
previous_is_cased = unicodedb.iscased(unichar)
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_lower__Unicode(space, w_self):
input = w_self._value
result = [u'\0'] * len(input)
for i in range(len(input)):
result[i] = unichr(unicodedb.tolower(ord(input[i])))
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_upper__Unicode(space, w_self):
input = w_self._value
result = [u'\0'] * len(input)
for i in range(len(input)):
result[i] = unichr(unicodedb.toupper(ord(input[i])))
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_swapcase__Unicode(space, w_self):
input = w_self._value
@@ -471,7 +386,7 @@
result[i] = unichr(unicodedb.tolower(unichar))
else:
result[i] = input[i]
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def _normalize_index(length, index):
if index < 0:
@@ -482,41 +397,73 @@
index = length
return index
-def unicode_endswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+def _convert_idx_params(space, w_self, w_start, w_end):
self = w_self._value
- start = _normalize_index(len(self), space.int_w(w_start))
- end = _normalize_index(len(self), space.int_w(w_end))
+ start = slicetype.adapt_bound(space, len(self), w_start)
+ end = slicetype.adapt_bound(space, len(self), w_end)
- substr = w_substr._value
+ assert start >= 0
+ assert end >= 0
+
+ return (self, start, end)
+
+def _check_startswith_substring(str, substr, start, end):
substr_len = len(substr)
if end - start < substr_len:
- return space.w_False # substring is too long
+ return False # substring is too long
+
+ for i in range(substr_len):
+ if str[start + i] != substr[i]:
+ return False
+ return True
+
+def _check_endswith_substring(str, substr, start, end):
+ substr_len = len(substr)
+
+ if end - start < substr_len:
+ return False # substring is too long
start = end - substr_len
for i in range(substr_len):
- if self[start + i] != substr[i]:
- return space.w_False
- return space.w_True
+ if str[start + i] != substr[i]:
+ return False
+ return True
+
+def unicode_endswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
+ substr = w_substr._value
+ return space.wrap(_check_endswith_substring(self, substr, start, end))
def unicode_startswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- self = w_self._value
- start = _normalize_index(len(self), space.int_w(w_start))
- end = _normalize_index(len(self), space.int_w(w_end))
+ self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
+ # XXX this stuff can be waaay better for ootypebased backends if
+ # we re-use more of our rpython machinery (ie implement startswith
+ # with additional parameters as rpython)
substr = w_substr._value
- substr_len = len(substr)
-
- if end - start < substr_len:
- return space.w_False # substring is too long
-
- for i in range(substr_len):
- if self[start + i] != substr[i]:
- return space.w_False
- return space.w_True
+ return space.wrap(_check_startswith_substring(self, substr, start, end))
+
+def unicode_startswith__Unicode_Tuple_ANY_ANY(space, w_unistr, w_prefixes,
+ w_start, w_end):
+ unistr, start, end = _convert_idx_params(space, w_unistr, w_start, w_end)
+ for w_prefix in space.unpacktuple(w_prefixes):
+ prefix = space.unicode_w(w_prefix)
+ if _check_startswith_substring(unistr, prefix, start, end):
+ return space.w_True
+ return space.w_False
+
+def unicode_endswith__Unicode_Tuple_ANY_ANY(space, w_unistr, w_suffixes,
+ w_start, w_end):
+ unistr, start, end = _convert_idx_params(space, w_unistr, w_start, w_end)
+ for w_suffix in space.unpacktuple(w_suffixes):
+ suffix = space.unicode_w(w_suffix)
+ if _check_endswith_substring(unistr, suffix, start, end):
+ return space.w_True
+ return space.w_False
def _to_unichar_w(space, w_char):
try:
- w_unichar = unicodetype.unicode_from_object(space, w_char)
+ unistr = space.unicode_w(w_char)
except OperationError, e:
if e.match(space, space.w_TypeError):
msg = 'The fill character cannot be converted to Unicode'
@@ -524,10 +471,9 @@
else:
raise
- if space.int_w(space.len(w_unichar)) != 1:
+ if len(unistr) != 1:
raise OperationError(space.w_TypeError, space.wrap('The fill character must be exactly one character long'))
- unichar = unichr(space.int_w(space.ord(w_unichar)))
- return unichar
+ return unistr[0]
def unicode_center__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
self = w_self._value
@@ -540,7 +486,7 @@
result = [fillchar] * width
for i in range(len(self)):
result[leftpad + i] = self[i]
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_ljust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
self = w_self._value
@@ -552,7 +498,7 @@
result = [fillchar] * width
for i in range(len(self)):
result[i] = self[i]
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_rjust__Unicode_ANY_ANY(space, w_self, w_width, w_fillchar):
self = w_self._value
@@ -564,13 +510,13 @@
result = [fillchar] * width
for i in range(len(self)):
result[padding + i] = self[i]
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_zfill__Unicode_ANY(space, w_self, w_width):
self = w_self._value
width = space.int_w(w_width)
if len(self) == 0:
- return W_UnicodeObject([u'0'] * width)
+ return W_UnicodeObject(u'0' * width)
padding = width - len(self)
if padding <= 0:
return space.call_function(space.w_unicode, w_self)
@@ -581,7 +527,7 @@
if self[0] in (u'+', u'-'):
result[0] = self[0]
result[padding] = u'0'
- return W_UnicodeObject(result)
+ return W_UnicodeObject(u''.join(result))
def unicode_splitlines__Unicode_ANY(space, w_self, w_keepends):
self = w_self._value
@@ -613,55 +559,37 @@
return space.newlist(lines)
def unicode_find__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- self = w_self._value
- start = _normalize_index(len(self), space.int_w(w_start))
- end = _normalize_index(len(self), space.int_w(w_end))
+ self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
substr = w_substr._value
- return space.wrap(_find(self, substr, start, end))
+ return space.wrap(self.find(substr, start, end))
def unicode_rfind__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- self = w_self._value
- start = _normalize_index(len(self), space.int_w(w_start))
- end = _normalize_index(len(self), space.int_w(w_end))
+ self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
substr = w_substr._value
- return space.wrap(_rfind(self, substr, start, end))
+ return space.wrap(self.rfind(substr, start, end))
def unicode_index__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- self = w_self._value
- start = _normalize_index(len(self), space.int_w(w_start))
- end = _normalize_index(len(self), space.int_w(w_end))
+ self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
substr = w_substr._value
- index = _find(self, substr, start, end)
+ index = self.find(substr, start, end)
if index < 0:
raise OperationError(space.w_ValueError,
space.wrap('substring not found'))
return space.wrap(index)
def unicode_rindex__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- self = w_self._value
- start = _normalize_index(len(self), space.int_w(w_start))
- end = _normalize_index(len(self), space.int_w(w_end))
+ self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
substr = w_substr._value
- index = _rfind(self, substr, start, end)
+ index = self.rfind(substr, start, end)
if index < 0:
raise OperationError(space.w_ValueError,
space.wrap('substring not found'))
return space.wrap(index)
def unicode_count__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
- self = w_self._value
- start = _normalize_index(len(self), space.int_w(w_start))
- end = _normalize_index(len(self), space.int_w(w_end))
+ self, start, end = _convert_idx_params(space, w_self, w_start, w_end)
substr = w_substr._value
- count = 0
- while start <= end:
- index = _find(self, substr, start, end)
- if index < 0:
- break
- start = index + 1
- count += 1
- return space.wrap(count)
-
+ return space.wrap(self.count(substr, start, end))
def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
self = w_self._value
@@ -703,18 +631,8 @@
if delim_len == 0:
raise OperationError(space.w_ValueError,
space.wrap('empty separator'))
- parts = []
- start = 0
- end = len(self)
- while maxsplit != 0:
- index = _find(self, delim, start, end)
- if index < 0:
- break
- parts.append(W_UnicodeObject(self[start:index]))
- start = index + delim_len
- maxsplit -= 1
- parts.append(W_UnicodeObject(self[start:]))
- return space.newlist(parts)
+ parts = _split_with(self, delim, maxsplit)
+ return space.newlist([W_UnicodeObject(part) for part in parts])
def unicode_rsplit__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
@@ -764,7 +682,7 @@
start = 0
end = len(self)
while maxsplit != 0:
- index = _rfind(self, delim, 0, end)
+ index = self.rfind(delim, 0, end)
if index < 0:
break
parts.append(W_UnicodeObject(self[index+delim_len:end]))
@@ -774,76 +692,52 @@
parts.reverse()
return space.newlist(parts)
-def _split(space, self, maxsplit):
+def _split_into_chars(self, maxsplit):
if maxsplit == 0:
- return [W_UnicodeObject(self)]
+ return [self]
index = 0
end = len(self)
- parts = [W_UnicodeObject([])]
+ parts = [u'']
maxsplit -= 1
while maxsplit != 0:
if index >= end:
break
- parts.append(W_UnicodeObject([self[index]]))
+ parts.append(self[index])
index += 1
maxsplit -= 1
- parts.append(W_UnicodeObject(self[index:]))
+ parts.append(self[index:])
+ return parts
+
+def _split_with(self, with_, maxsplit=-1):
+ parts = []
+ start = 0
+ end = len(self)
+ length = len(with_)
+ while maxsplit != 0:
+ index = self.find(with_, start, end)
+ if index < 0:
+ break
+ parts.append(self[start:index])
+ start = index + length
+ maxsplit -= 1
+ parts.append(self[start:])
return parts
def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old,
w_new, w_maxsplit):
if len(w_old._value):
- w_parts = space.call_method(w_self, 'split', w_old, w_maxsplit)
+ parts = _split_with(w_self._value, w_old._value,
+ space.int_w(w_maxsplit))
else:
self = w_self._value
maxsplit = space.int_w(w_maxsplit)
- w_parts = space.newlist(_split(space, self, maxsplit))
- return space.call_method(w_new, 'join', w_parts)
+ parts = _split_into_chars(self, maxsplit)
+ return W_UnicodeObject(w_new._value.join(parts))
app = gateway.applevel(r'''
import sys
-def unicode_expandtabs__Unicode_ANY(self, tabsize):
- parts = self.split(u'\t')
- result = [ parts[0] ]
- prevsize = 0
- for ch in parts[0]:
- prevsize += 1
- if ch in (u"\n", u"\r"):
- prevsize = 0
- for i in range(1, len(parts)):
- pad = tabsize - prevsize % tabsize
- result.append(u' ' * pad)
- nextpart = parts[i]
- result.append(nextpart)
- prevsize = 0
- for ch in nextpart:
- prevsize += 1
- if ch in (u"\n", u"\r"):
- prevsize = 0
- return u''.join(result)
-
-def unicode_translate__Unicode_ANY(self, table):
- result = []
- for unichar in self:
- try:
- newval = table[ord(unichar)]
- except KeyError:
- result.append(unichar)
- else:
- if newval is None:
- continue
- elif isinstance(newval, int):
- if newval < 0 or newval > sys.maxunicode:
- raise TypeError("character mapping must be in range(0x%x)"%(sys.maxunicode + 1,))
- result.append(unichr(newval))
- elif isinstance(newval, unicode):
- result.append(newval)
- else:
- raise TypeError("character mapping must return integer, None or unicode")
- return ''.join(result)
-
def unicode_encode__Unicode_ANY_ANY(unistr, encoding=None, errors=None):
import codecs, sys
if encoding is None:
@@ -858,44 +752,96 @@
raise TypeError("encoder did not return a string object (type=%s)" %
type(retval).__name__)
return retval
+''')
+
-# XXX: These should probably be written on interplevel
-def unicode_partition__Unicode_Unicode(unistr, unisub):
+unicode_encode__Unicode_ANY_ANY = app.interphook('unicode_encode__Unicode_ANY_ANY')
+
+def unicode_partition__Unicode_Unicode(space, w_unistr, w_unisub):
+ unistr = w_unistr._value
+ unisub = w_unisub._value
+ if not unisub:
+ raise OperationError(space.w_ValueError,
+ space.wrap("empty separator"))
pos = unistr.find(unisub)
if pos == -1:
- return (unistr, u'', u'')
+ return space.newtuple([w_unistr, W_UnicodeObject.EMPTY,
+ W_UnicodeObject.EMPTY])
else:
- return (unistr[:pos], unisub, unistr[pos+len(unisub):])
+ assert pos > 0
+ return space.newtuple([space.wrap(unistr[:pos]), w_unisub,
+ space.wrap(unistr[pos+len(unisub):])])
-def unicode_rpartition__Unicode_Unicode(unistr, unisub):
+def unicode_rpartition__Unicode_Unicode(space, w_unistr, w_unisub):
+ unistr = w_unistr._value
+ unisub = w_unisub._value
+ if not unisub:
+ raise OperationError(space.w_ValueError,
+ space.wrap("empty separator"))
pos = unistr.rfind(unisub)
if pos == -1:
- return (u'', u'', unistr)
+ return space.newtuple([W_UnicodeObject.EMPTY,
+ W_UnicodeObject.EMPTY, w_unistr])
else:
- return (unistr[:pos], unisub, unistr[pos+len(unisub):])
+ assert pos > 0
+ return space.newtuple([space.wrap(unistr[:pos]), w_unisub,
+ space.wrap(unistr[pos+len(unisub):])])
-def unicode_startswith__Unicode_Tuple_ANY_ANY(unistr, prefixes, start, end):
- for prefix in prefixes:
- if unistr.startswith(prefix):
- return True
- return False
-
-def unicode_endswith__Unicode_Tuple_ANY_ANY(unistr, suffixes, start, end):
- for suffix in suffixes:
- if unistr.endswith(suffix):
- return True
- return False
-''')
+def unicode_expandtabs__Unicode_ANY(space, w_self, w_tabsize):
+ self = w_self._value
+ tabsize = space.int_w(w_tabsize)
+ parts = _split_with(self, u'\t')
+ result = [parts[0]]
+ prevsize = 0
+ for ch in parts[0]:
+ prevsize += 1
+ if ch == u"\n" or ch == u"\r":
+ prevsize = 0
+ for i in range(1, len(parts)):
+ pad = tabsize - prevsize % tabsize
+ result.append(u' ' * pad)
+ nextpart = parts[i]
+ result.append(nextpart)
+ prevsize = 0
+ for ch in nextpart:
+ prevsize += 1
+ if ch in (u"\n", u"\r"):
+ prevsize = 0
+ return space.wrap(u''.join(result))
-unicode_expandtabs__Unicode_ANY = app.interphook('unicode_expandtabs__Unicode_ANY')
-unicode_translate__Unicode_ANY = app.interphook('unicode_translate__Unicode_ANY')
-unicode_encode__Unicode_ANY_ANY = app.interphook('unicode_encode__Unicode_ANY_ANY')
-unicode_partition__Unicode_Unicode = app.interphook('unicode_partition__Unicode_Unicode')
-unicode_rpartition__Unicode_Unicode = app.interphook('unicode_rpartition__Unicode_Unicode')
-unicode_startswith__Unicode_Tuple_ANY_ANY = app.interphook('unicode_startswith__Unicode_Tuple_ANY_ANY')
-unicode_endswith__Unicode_Tuple_ANY_ANY = app.interphook('unicode_endswith__Unicode_Tuple_ANY_ANY')
+
+def unicode_translate__Unicode_ANY(space, w_self, w_table):
+ self = w_self._value
+ w_sys = space.getbuiltinmodule('sys')
+ maxunicode = space.int_w(space.getattr(w_sys, space.wrap("maxunicode")))
+ result = []
+ for unichar in self:
+ try:
+ w_newval = space.getitem(w_table, space.wrap(ord(unichar)))
+ except OperationError, e:
+ if e.match(space, space.w_KeyError):
+ result.append(unichar)
+ else:
+ raise
+ else:
+ if space.is_w(w_newval, space.w_None):
+ continue
+ elif space.is_true(space.isinstance(w_newval, space.w_int)):
+ newval = space.int_w(w_newval)
+ if newval < 0 or newval > maxunicode:
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap("character mapping must be in range(0x%x)" % (maxunicode + 1,)))
+ result.append(unichr(newval))
+ elif space.is_true(space.isinstance(w_newval, space.w_unicode)):
+ result.append(space.unicode_w(w_newval))
+ else:
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap("character mapping must return integer, None or unicode"))
+ return W_UnicodeObject(u''.join(result))
# Move this into the _codecs module as 'unicodeescape_string (Remember to cater for quotes)'
def repr__Unicode(space, w_unicode):
@@ -1016,7 +962,6 @@
i += 1
return space.wrap(''.join(result[:i]))
-#repr__Unicode = app.interphook('repr__Unicode') # uncomment when repr code is moved to _codecs
def mod__Unicode_ANY(space, w_format, w_values):
return mod_format(space, w_format, w_values, do_unicode=True)
@@ -1028,6 +973,10 @@
# str.strip(unicode) needs to convert self to unicode and call unicode.strip we
# use the following magic to register strip_string_unicode as a String
# multimethod.
+
+# XXX couldn't string and unicode _share_ the multimethods that make up their
+# methods?
+
class str_methods:
import stringtype
W_UnicodeObject = W_UnicodeObject
Modified: pypy/dist/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/dist/pypy/objspace/std/unicodetype.py (original)
+++ pypy/dist/pypy/objspace/std/unicodetype.py Sun Nov 11 13:38:13 2007
@@ -142,73 +142,87 @@
# ____________________________________________________________
-app = gateway.applevel('''
-def unicode_from_encoded_object(obj, encoding, errors):
- import codecs, sys
+def getdefaultencoding(space):
+ return space.sys.defaultencoding
+
+def unicode_from_encoded_object(space, w_obj, encoding, errors):
+ w_codecs = space.getbuiltinmodule("_codecs")
if encoding is None:
- encoding = sys.getdefaultencoding()
- decoder = codecs.getdecoder(encoding)
+ encoding = getdefaultencoding(space)
+ w_decode = space.getattr(w_codecs, space.wrap("decode"))
if errors is None:
- retval, length = decoder(obj)
+ w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding))
else:
- retval, length = decoder(obj, errors)
- if not isinstance(retval, unicode):
- raise TypeError("decoder did not return an unicode object (type=%s)" %
- type(retval).__name__)
- return retval
-
-def unicode_from_object(obj):
- if isinstance(obj, str):
- res = obj
+ w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding),
+ space.wrap(errors))
+ if not space.is_true(space.isinstance(w_retval, space.w_unicode)):
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap(
+ "decoder did not return an unicode object (type=%s)" %
+ space.type(w_retval).getname(space, '?')))
+ return w_retval
+
+
+def unicode_from_object(space, w_obj):
+ if space.is_true(space.isinstance(w_obj, space.w_str)):
+ w_res = w_obj
else:
try:
- unicode_method = obj.__unicode__
- except AttributeError:
- res = str(obj)
+ # XXX should we have a space.unicode so we can go through
+ # descroperation?
+ w_unicode_method = space.getattr(w_obj, space.wrap("__unicode__"))
+ except OperationError, e:
+ if e.match(space, space.w_AttributeError):
+ w_res = space.str(w_obj)
+ else:
+ raise
else:
- res = unicode_method()
- if isinstance(res, unicode):
- return res
- return unicode_from_encoded_object(res, None, "strict")
-
-''')
-unicode_from_object = app.interphook('unicode_from_object')
-unicode_from_encoded_object = app.interphook('unicode_from_encoded_object')
+ w_res = space.call_function(w_unicode_method)
+ if space.is_true(space.isinstance(w_res, space.w_unicode)):
+ return w_res
+ return unicode_from_encoded_object(space, w_res, None, "strict")
def unicode_from_string(space, w_str):
# this is a performance and bootstrapping hack
from pypy.objspace.std.unicodeobject import W_UnicodeObject
- w_encoding = space.call_function(space.sys.get('getdefaultencoding'))
- if not space.eq_w(w_encoding, space.wrap('ascii')):
+ encoding = getdefaultencoding(space)
+ if encoding != 'ascii':
return unicode_from_object(space, w_str)
s = space.str_w(w_str)
- codelist = []
- for i in range(len(s)):
- code = ord(s[i])
- if code >= 128:
- # raising UnicodeDecodeError is messy, so "please crash for me"
- return unicode_from_object(space, w_str)
- codelist.append(unichr(code))
- return W_UnicodeObject(codelist)
+ try:
+ return W_UnicodeObject(s.decode("ascii"))
+ except UnicodeDecodeError:
+ # raising UnicodeDecodeError is messy, "please crash for me"
+ return unicode_from_object(space, w_str)
+
+def _get_encoding_and_errors(space, w_encoding, w_errors):
+ if space.is_w(w_encoding, space.w_None):
+ encoding = None
+ else:
+ encoding = space.str_w(w_encoding)
+ if space.is_w(w_errors, space.w_None):
+ errors = None
+ else:
+ errors = space.str_w(w_errors)
+ return encoding, errors
-def descr__new__(space, w_unicodetype, w_string='', w_encoding=None, w_errors=None):
- # NB. the default value of w_string is really a *wrapped* empty string:
+def descr__new__(space, w_unicodetype, w_obj='', w_encoding=None, w_errors=None):
+ # NB. the default value of w_obj is really a *wrapped* empty string:
# there is gateway magic at work
from pypy.objspace.std.unicodeobject import W_UnicodeObject
- w_obj = w_string
w_obj_type = space.type(w_obj)
+ encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
if space.is_w(w_obj_type, space.w_unicode):
- if (not space.is_w(w_encoding, space.w_None) or
- not space.is_w(w_errors, space.w_None)):
+ if encoding is not None or errors is not None:
raise OperationError(space.w_TypeError,
space.wrap('decoding Unicode is not supported'))
if space.is_w(w_unicodetype, space.w_unicode):
return w_obj
w_value = w_obj
- elif (space.is_w(w_encoding, space.w_None) and
- space.is_w(w_errors, space.w_None)):
+ elif encoding is None and errors is None:
if space.is_true(space.isinstance(w_obj, space.w_str)):
w_value = unicode_from_string(space, w_obj)
elif space.is_true(space.isinstance(w_obj, space.w_unicode)):
@@ -216,7 +230,7 @@
else:
w_value = unicode_from_object(space, w_obj)
else:
- w_value = unicode_from_encoded_object(space, w_obj, w_encoding, w_errors)
+ w_value = unicode_from_encoded_object(space, w_obj, encoding, errors)
# help the annotator! also the ._value depends on W_UnicodeObject layout
assert isinstance(w_value, W_UnicodeObject)
w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
Modified: pypy/dist/pypy/objspace/thunk.py
==============================================================================
--- pypy/dist/pypy/objspace/thunk.py (original)
+++ pypy/dist/pypy/objspace/thunk.py Sun Nov 11 13:38:13 2007
@@ -150,7 +150,7 @@
'int_w': 1,
'float_w': 1,
'uint_w': 1,
- 'unichars_w': 1,
+ 'unicode_w': 1,
'bigint_w': 1,
'interpclass_w': 1,
'unwrap': 1,
@@ -158,8 +158,6 @@
'is_w': 2,
'newtuple': 0,
'newlist': 0,
- 'newstring': 0,
- 'newunicode': 0,
'newdict': 0,
'newslice': 0,
'call_args': 1,
Modified: pypy/dist/pypy/rlib/rsocket.py
==============================================================================
--- pypy/dist/pypy/rlib/rsocket.py (original)
+++ pypy/dist/pypy/rlib/rsocket.py Sun Nov 11 13:38:13 2007
@@ -128,6 +128,11 @@
raise RSocketError("unknown address family")
from_object = staticmethod(from_object)
+ def fill_from_object(self, space, w_address):
+ """ Purely abstract
+ """
+ raise NotImplementedError
+
# ____________________________________________________________
def makeipaddr(name, result=None):
@@ -189,6 +194,11 @@
host, serv = getnameinfo(self, NI_NUMERICHOST | NI_NUMERICSERV)
return host
+ def lock_in_addr(self):
+ """ Purely abstract
+ """
+ raise NotImplementedError
+
# ____________________________________________________________
class INETAddress(IPAddress):
Modified: pypy/dist/pypy/rlib/test/test_rsocket.py
==============================================================================
--- pypy/dist/pypy/rlib/test/test_rsocket.py (original)
+++ pypy/dist/pypy/rlib/test/test_rsocket.py Sun Nov 11 13:38:13 2007
@@ -1,6 +1,7 @@
import py, errno, sys
from pypy.rlib import rsocket
from pypy.rlib.rsocket import *
+import socket as cpy_socket
# cannot test error codes in Win32 because ll2ctypes doesn't save
# the errors that WSAGetLastError() should return, making it likely
@@ -80,8 +81,8 @@
assert getservbyname('http', 'tcp') == 80
def test_getservbyport():
- assert getservbyport(80) == 'http'
- assert getservbyport(80, 'tcp') == 'http'
+ assert getservbyport(80) == cpy_socket.getservbyport(80)
+ assert getservbyport(80, 'tcp') == cpy_socket.getservbyport(80)
def test_getprotobyname():
assert getprotobyname('tcp') == IPPROTO_TCP
Modified: pypy/dist/pypy/translator/cli/function.py
==============================================================================
--- pypy/dist/pypy/translator/cli/function.py (original)
+++ pypy/dist/pypy/translator/cli/function.py Sun Nov 11 13:38:13 2007
@@ -162,6 +162,17 @@
for link, lbl in cases.itervalues():
self.render_switch_case(link, lbl)
+ def call_oostring(self, ARGTYPE):
+ if isinstance(ARGTYPE, ootype.Instance):
+ argtype = self.cts.types.object
+ else:
+ argtype = self.cts.lltype_to_cts(ARGTYPE)
+ self.call_signature('string [pypylib]pypy.runtime.Utils::OOString(%s, int32)' % argtype)
+
+ def call_oounicode(self, ARGTYPE):
+ argtype = self.cts.lltype_to_cts(ARGTYPE)
+ self.call_signature('string [pypylib]pypy.runtime.Utils::OOUnicode(%s)' % argtype)
+
# Those parts of the generator interface that are function
# specific
Modified: pypy/dist/pypy/translator/cli/metavm.py
==============================================================================
--- pypy/dist/pypy/translator/cli/metavm.py (original)
+++ pypy/dist/pypy/translator/cli/metavm.py Sun Nov 11 13:38:13 2007
@@ -95,28 +95,6 @@
generator.call_signature('object [pypylib]pypy.runtime.Utils::RuntimeNew(class [mscorlib]System.Type)')
generator.cast_to(op.result.concretetype)
-class _OOString(MicroInstruction):
- def render(self, generator, op):
- ARGTYPE = op.args[0].concretetype
- if isinstance(ARGTYPE, ootype.Instance):
- argtype = 'object'
- else:
- argtype = generator.cts.lltype_to_cts(ARGTYPE)
- generator.load(op.args[0])
- generator.load(op.args[1])
- generator.call_signature('string [pypylib]pypy.runtime.Utils::OOString(%s, int32)' % argtype)
-
-class _OOUnicode(MicroInstruction):
- def render(self, generator, op):
- from pypy.objspace.flow.model import Constant
- ARGTYPE = op.args[0].concretetype
- argtype = generator.cts.lltype_to_cts(ARGTYPE)
- v_base = op.args[1]
- assert v_base.value == -1, "The second argument of oounicode must be -1"
-
- generator.load(op.args[0])
- generator.call_signature('string [pypylib]pypy.runtime.Utils::OOUnicode(%s)' % argtype)
-
class _NewCustomDict(MicroInstruction):
def render(self, generator, op):
DICT = op.args[0].value
@@ -236,8 +214,6 @@
CallMethod = _CallMethod()
IndirectCall = _IndirectCall()
RuntimeNew = _RuntimeNew()
-OOString = _OOString()
-OOUnicode = _OOUnicode()
NewCustomDict = _NewCustomDict()
#CastWeakAdrToPtr = _CastWeakAdrToPtr()
Box = _Box()
Modified: pypy/dist/pypy/translator/cli/opcodes.py
==============================================================================
--- pypy/dist/pypy/translator/cli/opcodes.py (original)
+++ pypy/dist/pypy/translator/cli/opcodes.py Sun Nov 11 13:38:13 2007
@@ -1,9 +1,9 @@
from pypy.translator.cli.metavm import Call, CallMethod, \
- IndirectCall, GetField, SetField, OOString, DownCast, NewCustomDict,\
+ IndirectCall, GetField, SetField, DownCast, NewCustomDict,\
MapException, Box, Unbox, NewArray, GetArrayElem, SetArrayElem,\
- TypeOf, CastPrimitive, OOUnicode
+ TypeOf, CastPrimitive
from pypy.translator.oosupport.metavm import PushArg, PushAllArgs, StoreResult, InstructionList,\
- New, RuntimeNew, CastTo, PushPrimitive
+ New, RuntimeNew, CastTo, PushPrimitive, OOString, OOUnicode
from pypy.translator.cli.cts import WEAKREF
from pypy.rpython.ootypesystem import ootype
Modified: pypy/dist/pypy/translator/geninterplevel.py
==============================================================================
--- pypy/dist/pypy/translator/geninterplevel.py (original)
+++ pypy/dist/pypy/translator/geninterplevel.py Sun Nov 11 13:38:13 2007
@@ -177,7 +177,7 @@
# special constructors:
self.has_listarg = {}
- for name in "newtuple newlist newstring".split():
+ for name in "newtuple newlist".split():
self.has_listarg[name] = name
# catching all builtins in advance, to avoid problems
Modified: pypy/dist/pypy/translator/jvm/builtin.py
==============================================================================
--- pypy/dist/pypy/translator/jvm/builtin.py (original)
+++ pypy/dist/pypy/translator/jvm/builtin.py Sun Nov 11 13:38:13 2007
@@ -149,3 +149,18 @@
jvmgen.Method.v(jArrayList, "get", (jInt,), jObject),
}
+
+# ootype.String[Builder] and ootype.Unicode[Builder] are mapped to the
+# same JVM type, so we reuse the same builtin methods also for them
+def add_unicode_methods():
+ mapping = {
+ ootype.String.__class__: ootype.Unicode.__class__,
+ ootype.StringBuilder.__class__: ootype.UnicodeBuilder.__class__
+ }
+
+ for (TYPE, name), value in built_in_methods.items():
+ if TYPE in mapping:
+ TYPE = mapping[TYPE]
+ built_in_methods[TYPE, name] = value
+add_unicode_methods()
+del add_unicode_methods
Modified: pypy/dist/pypy/translator/jvm/conftest.py
==============================================================================
--- pypy/dist/pypy/translator/jvm/conftest.py (original)
+++ pypy/dist/pypy/translator/jvm/conftest.py Sun Nov 11 13:38:13 2007
@@ -15,8 +15,8 @@
help="don't assemble jasmin files"),
Option('--package', action='store', dest='package', default='pypy',
help='Package to output generated classes into'),
- Option('--trace', action='store_true', dest='trace', default=False,
- help='Trace execution of generated code'),
+## Option('--trace', action='store_true', dest='trace', default=False,
+## help='Trace execution of generated code'),
Option('--byte-arrays', action='store_true', dest='byte-arrays',
default=False, help='Use byte arrays rather than native strings'),
)
Modified: pypy/dist/pypy/translator/jvm/database.py
==============================================================================
--- pypy/dist/pypy/translator/jvm/database.py (original)
+++ pypy/dist/pypy/translator/jvm/database.py Sun Nov 11 13:38:13 2007
@@ -412,7 +412,9 @@
ootype.Bool:jvmgen.PYPYSERIALIZEBOOLEAN,
ootype.Void:jvmgen.PYPYSERIALIZEVOID,
ootype.Char:jvmgen.PYPYESCAPEDCHAR,
+ ootype.UniChar:jvmgen.PYPYESCAPEDUNICHAR,
ootype.String:jvmgen.PYPYESCAPEDSTRING,
+ ootype.Unicode:jvmgen.PYPYESCAPEDUNICODE,
}
def toString_method_for_ootype(self, OOTYPE):
@@ -466,7 +468,9 @@
# will return a JvmBuiltInType based on the value
ootype_to_builtin = {
ootype.String: jvmtype.jString,
+ ootype.Unicode: jvmtype.jString,
ootype.StringBuilder: jvmtype.jStringBuilder,
+ ootype.UnicodeBuilder: jvmtype.jStringBuilder,
ootype.List: jvmtype.jArrayList,
ootype.Dict: jvmtype.jHashMap,
ootype.DictItemsIterator:jvmtype.jPyPyDictItemsIterator,
Modified: pypy/dist/pypy/translator/jvm/generator.py
==============================================================================
--- pypy/dist/pypy/translator/jvm/generator.py (original)
+++ pypy/dist/pypy/translator/jvm/generator.py Sun Nov 11 13:38:13 2007
@@ -389,7 +389,9 @@
PYPYSERIALIZEULONG = Method.s(jPyPy, 'serialize_ulonglong', (jLong,), jString)
PYPYSERIALIZEVOID = Method.s(jPyPy, 'serialize_void', (), jString)
PYPYESCAPEDCHAR = Method.s(jPyPy, 'escaped_char', (jChar,), jString)
+PYPYESCAPEDUNICHAR = Method.s(jPyPy, 'escaped_unichar', (jChar,), jString)
PYPYESCAPEDSTRING = Method.s(jPyPy, 'escaped_string', (jString,), jString)
+PYPYESCAPEDUNICODE = Method.s(jPyPy, 'escaped_unicode', (jString,), jString)
PYPYSERIALIZEOBJECT = Method.s(jPyPy, 'serializeObject', (jObject,), jString)
PYPYRUNTIMENEW = Method.s(jPyPy, 'RuntimeNew', (jClass,), jObject)
PYPYSTRING2BYTES = Method.s(jPyPy, 'string2bytes', (jString,), jByteArray)
@@ -1018,6 +1020,13 @@
self.emit(mthd)
if self.db.using_byte_array:
self.emit(PYPYSTRING2BYTES)
+
+ def call_oounicode(self, OOTYPE):
+ cts_type = self.db.lltype_to_cts(OOTYPE)
+ mthd = Method.s(jPyPy, 'oounicode', [cts_type], jString)
+ self.emit(mthd)
+ if self.db.using_byte_array:
+ self.emit(PYPYSTRING2BYTES)
def new(self, TYPE):
jtype = self.db.lltype_to_cts(TYPE)
@@ -1087,11 +1096,13 @@
self._push_long_constant(value)
elif TYPE is ootype.Float:
self._push_double_constant(float(value))
- elif TYPE is ootype.String:
- if value == ootype.null(ootype.String):
+ elif TYPE in (ootype.String, ootype.Unicode):
+ if value == ootype.null(TYPE):
self.emit(ACONST_NULL)
else:
self.load_string(str(value._str))
+ else:
+ assert False, 'Unknown constant type: %s' % TYPE
def _push_long_constant(self, value):
if value == 0:
@@ -1336,7 +1347,7 @@
return str(arg)
strargs = [jasmin_syntax(arg) for arg in args]
instr_text = '%s %s' % (jvmstr, " ".join(strargs))
- #self.curclass.out(' .line %d\n' % self.curfunc.instr_counter)
+ self.curclass.out(' .line %d\n' % self.curfunc.instr_counter)
self.curclass.out(' %-60s\n' % (instr_text,))
self.curfunc.instr_counter+=1
Modified: pypy/dist/pypy/translator/jvm/opcodes.py
==============================================================================
--- pypy/dist/pypy/translator/jvm/opcodes.py (original)
+++ pypy/dist/pypy/translator/jvm/opcodes.py Sun Nov 11 13:38:13 2007
@@ -7,7 +7,8 @@
from pypy.translator.oosupport.metavm import \
PushArg, PushAllArgs, StoreResult, InstructionList, New, DoNothing, Call,\
- SetField, GetField, DownCast, RuntimeNew, OOString, CastTo, PushPrimitive
+ SetField, GetField, DownCast, RuntimeNew, OOString, OOUnicode, \
+ CastTo, PushPrimitive
from pypy.translator.jvm.metavm import \
IndirectCall, JvmCallMethod, TranslateException, NewCustomDict, \
CastPrimitive
@@ -56,6 +57,7 @@
'ooidentityhash': [PushAllArgs, jvmgen.OBJHASHCODE, StoreResult],
'oohash': [PushAllArgs, jvmgen.OBJHASHCODE, StoreResult],
'oostring': [OOString, StoreResult],
+ 'oounicode': [OOUnicode, StoreResult],
#'ooparse_int': [PushAllArgs, 'call int32 [pypylib]pypy.runtime.Utils::OOParseInt(string, int32)'],
'ooparse_float': jvmgen.PYPYOOPARSEFLOAT,
'oonewcustomdict': [NewCustomDict, StoreResult],
Modified: pypy/dist/pypy/translator/jvm/prebuiltnodes.py
==============================================================================
--- pypy/dist/pypy/translator/jvm/prebuiltnodes.py (original)
+++ pypy/dist/pypy/translator/jvm/prebuiltnodes.py Sun Nov 11 13:38:13 2007
@@ -14,6 +14,9 @@
def throwValueError():
raise ValueError
+def throwUnicodeDecodeError():
+ raise UnicodeDecodeError
+
# ___________________________________________________________________________
def create_interlink_node(db):
Modified: pypy/dist/pypy/translator/jvm/src/pypy/Interlink.java
==============================================================================
--- pypy/dist/pypy/translator/jvm/src/pypy/Interlink.java (original)
+++ pypy/dist/pypy/translator/jvm/src/pypy/Interlink.java Sun Nov 11 13:38:13 2007
@@ -14,5 +14,6 @@
public void throwIndexError();
public void throwOverflowError();
public void throwValueError();
+ public void throwUnicodeDecodeError();
public void throwOSError(int errCode);
}
Modified: pypy/dist/pypy/translator/jvm/src/pypy/PyPy.java
==============================================================================
--- pypy/dist/pypy/translator/jvm/src/pypy/PyPy.java (original)
+++ pypy/dist/pypy/translator/jvm/src/pypy/PyPy.java Sun Nov 11 13:38:13 2007
@@ -319,6 +319,10 @@
return sb.toString();
}
+ public static String escaped_unichar(char c) {
+ return "u" + escaped_char(c);
+ }
+
public static String escaped_string(String b) {
if (b == null)
return "None";
@@ -332,6 +336,10 @@
return sb.toString();
}
+ public static String escaped_unicode(String b) {
+ return "u" + escaped_string(b);
+ }
+
// used in running unit tests
// not really part of the dump_XXX set of objects, hence the lack
// of an indent parameter
@@ -806,6 +814,24 @@
}
// ----------------------------------------------------------------------
+ // OOUnicode support
+
+ public static String oounicode(char ch)
+ {
+ return new Character(ch).toString();
+ }
+
+ public static String oounicode(String s)
+ {
+ for(int i=0; i<s.length(); i++) {
+ char ch = s.charAt(i);
+ if ((int)ch > 127)
+ throwUnicodeDecodeError();
+ }
+ return s;
+ }
+
+ // ----------------------------------------------------------------------
// Primitive built-in functions
public static double ll_time_clock() {
@@ -957,6 +983,10 @@
public static void throwValueError() {
interlink.throwValueError();
}
+
+ public static void throwUnicodeDecodeError() {
+ interlink.throwUnicodeDecodeError();
+ }
// ----------------------------------------------------------------------
// Self Test
Modified: pypy/dist/pypy/translator/jvm/test/runtest.py
==============================================================================
--- pypy/dist/pypy/translator/jvm/test/runtest.py (original)
+++ pypy/dist/pypy/translator/jvm/test/runtest.py Sun Nov 11 13:38:13 2007
@@ -140,6 +140,9 @@
def ll_to_string(self, s):
return s
+ def ll_to_unicode(self, s):
+ return s
+
def ll_to_list(self, l):
return l
Added: pypy/dist/pypy/translator/jvm/test/test_unicode.py
==============================================================================
--- (empty file)
+++ pypy/dist/pypy/translator/jvm/test/test_unicode.py Sun Nov 11 13:38:13 2007
@@ -0,0 +1,20 @@
+import py
+from pypy.translator.jvm.test.runtest import JvmTest
+from pypy.rpython.test.test_runicode import BaseTestRUnicode
+
+# ====> ../../../rpython/test/test_runicode.py
+
+class TestJvmUnicode(JvmTest, BaseTestRUnicode):
+
+ EMPTY_STRING_HASH = 0
+
+ def test_unichar_const(self):
+ py.test.skip("JVM doesn't support unicode for command line arguments")
+ test_unichar_eq = test_unichar_const
+ test_unichar_ord = test_unichar_const
+ test_unichar_hash = test_unichar_const
+ test_char_unichar_eq = test_unichar_const
+ test_char_unichar_eq_2 = test_unichar_const
+
+ def test_getitem_exc(self):
+ py.test.skip('fixme!')
Modified: pypy/dist/pypy/translator/oosupport/metavm.py
==============================================================================
--- pypy/dist/pypy/translator/oosupport/metavm.py (original)
+++ pypy/dist/pypy/translator/oosupport/metavm.py Sun Nov 11 13:38:13 2007
@@ -452,6 +452,15 @@
generator.load(op.args[1])
generator.call_oostring(ARGTYPE)
+class _OOUnicode(MicroInstruction):
+ def render(self, generator, op):
+ v_base = op.args[1]
+ assert v_base.value == -1, "The second argument of oounicode must be -1"
+
+ ARGTYPE = op.args[0].concretetype
+ generator.load(op.args[0])
+ generator.call_oounicode(ARGTYPE)
+
class _CastTo(MicroInstruction):
def render(self, generator, op):
generator.load(op.args[0])
@@ -471,5 +480,6 @@
CallMethod = _CallMethod()
RuntimeNew = _RuntimeNew()
OOString = _OOString()
+OOUnicode = _OOUnicode()
CastTo = _CastTo()
More information about the Pypy-commit
mailing list