[pypy-svn] r12735 - in pypy/dist/pypy: documentation interpreter lib module/__builtin__ module/unicodedata objspace/std objspace/std/test
ac at codespeak.net
ac at codespeak.net
Mon May 23 13:05:59 CEST 2005
Author: ac
Date: Mon May 23 13:05:59 2005
New Revision: 12735
Added:
pypy/dist/pypy/module/unicodedata/ (props changed)
- copied from r12617, pypy/branch/non-fake-unicode/pypy/module/unicodedata/
Modified:
pypy/dist/pypy/documentation/objspace.txt
pypy/dist/pypy/interpreter/baseobjspace.py
pypy/dist/pypy/lib/_formatting.py
pypy/dist/pypy/module/__builtin__/__init__.py
pypy/dist/pypy/module/__builtin__/app_misc.py
pypy/dist/pypy/module/__builtin__/compiling.py
pypy/dist/pypy/module/__builtin__/operation.py
pypy/dist/pypy/module/unicodedata/__init__.py (contents, props changed)
pypy/dist/pypy/module/unicodedata/functions.py (props changed)
pypy/dist/pypy/module/unicodedata/generate_unicodedb.py (props changed)
pypy/dist/pypy/module/unicodedata/unicodedb.py (props changed)
pypy/dist/pypy/objspace/std/floattype.py
pypy/dist/pypy/objspace/std/inttype.py
pypy/dist/pypy/objspace/std/longtype.py
pypy/dist/pypy/objspace/std/objspace.py
pypy/dist/pypy/objspace/std/test/test_unicodestring.py
pypy/dist/pypy/objspace/std/unicodeobject.py
pypy/dist/pypy/objspace/std/unicodetype.py
Log:
Merge the 'non-fake-unicode' branch.
Modified: pypy/dist/pypy/documentation/objspace.txt
==============================================================================
--- pypy/dist/pypy/documentation/objspace.txt (original)
+++ pypy/dist/pypy/documentation/objspace.txt Mon May 23 13:05:59 2005
@@ -93,6 +93,9 @@
**newstring(asciilist):**
Creates a string from a list of wrapped integers.
+**newunicode(codelist):**
+ Creates a unicode string from a list of wrapped integers.
+
Conversions from Application Level to Interpreter Level
----------------------------------------------------------
Modified: pypy/dist/pypy/interpreter/baseobjspace.py
==============================================================================
--- pypy/dist/pypy/interpreter/baseobjspace.py (original)
+++ pypy/dist/pypy/interpreter/baseobjspace.py Mon May 23 13:05:59 2005
@@ -129,6 +129,7 @@
w_builtin = self.wrap(self.builtin)
self.setitem(w_modules, w_name, w_builtin)
self.setitem(self.builtin.w_dict, self.wrap('__builtins__'), w_builtin)
+ self.setbuiltinmodule('unicodedata')
# XXX we need to resolve unwrapping issues to
# make this the default _sre module
@@ -532,6 +533,7 @@
# newtuple([w_1, w_2,...]) -> w_tuple
# newlist([w_1, w_2,...]) -> w_list
# newstring([w_1, w_2,...]) -> w_string from ascii numbers (bytes)
+# newunicode([w_1, w_2,...]) -> w_unicode from numbers
# newdict([(w_key,w_value),...]) -> w_dict
# newslice(w_start,w_stop,w_step) -> w_slice
# call_args(w_obj,Arguments()) -> w_result
@@ -549,6 +551,7 @@
'newtuple',
'newlist',
'newstring',
+ 'newunicode',
'newdict',
'newslice',
'call_args'
Modified: pypy/dist/pypy/lib/_formatting.py
==============================================================================
--- pypy/dist/pypy/lib/_formatting.py (original)
+++ pypy/dist/pypy/lib/_formatting.py Mon May 23 13:05:59 2005
@@ -6,6 +6,7 @@
# (1) rounding isn't always right (see comments in _float_formatting).
# (2) something goes wrong in the f_alt case of %g handling.
# (3) it's really, really slow.
+import sys
class _Flags(object):
def __repr__(self):
@@ -103,6 +104,9 @@
return (c, flags, width, prec, value)
+class NeedUnicodeFormattingError(Exception):
+ pass
+
class Formatter(object):
def __init__(self, char, flags, width, prec, value):
self.char = char
@@ -314,6 +318,9 @@
v = self.value
if len(v) != 1:
raise TypeError, "%c requires int or char"
+
+ elif isinstance(self.value, unicode):
+ raise NeedUnicodeFormattingError
else:
i = maybe_int(self.value)
if not 0 <= i <= 255:
@@ -323,8 +330,15 @@
self.prec = None
return self.std_wp(v)
+class StringFormatter(Formatter):
+ def format(self):
+ if isinstance(self.value, unicode):
+ raise NeedUnicodeFormattingError
+ return self.std_wp(str(self.value))
+
-format_registry = {
+
+str_format_registry = {
'd':IntFormatter,
'i':IntFormatter,
'o':OctFormatter,
@@ -338,13 +352,61 @@
'g':FloatGFormatter,
'G':FloatGFormatter,
'c':CharFormatter,
- 's':funcFormatter(str),
+ 's':StringFormatter,
'r':funcFormatter(repr),
# this *can* get accessed, by e.g. '%()4%'%{'':1}.
# The usual %% case has to be handled specially as it
# doesn't consume a value.
'%':funcFormatter(lambda x:'%'),
}
+
+class UnicodeStringFormatter(Formatter):
+ def format(self):
+ if isinstance(self.value, unicode):
+ return self.std_wp(self.value)
+ return self.std_wp(str(self.value))
+
+class UnicodeCharFormatter(Formatter):
+ def format(self):
+ if isinstance(self.value, unicode):
+ v = self.value
+ if len(v) != 1:
+ raise TypeError, "%c requires int or unicode char"
+ elif isinstance(self.value, str):
+ v = unicode(self.value)
+ if len(v) != 1:
+ raise TypeError, "%c requires int or unicode char"
+ else:
+ i = maybe_int(self.value)
+ if not 0 <= i <= sys.maxunicode:
+ raise OverflowError("OverflowError: unsigned byte "
+ "integer is greater than maximum")
+ v = unichr(i)
+ self.prec = None
+ return self.std_wp(v)
+
+unicode_format_registry = {
+ u'd':IntFormatter,
+ u'i':IntFormatter,
+ u'o':OctFormatter,
+ u'u':IntFormatter,
+ u'x':HexFormatter,
+ u'X':HexFormatter,
+ u'e':FloatEFormatter,
+ u'E':FloatEFormatter,
+ u'f':FloatFFormatter,
+ u'F':FloatFFormatter,
+ u'g':FloatGFormatter,
+ u'G':FloatGFormatter,
+ u'c':UnicodeCharFormatter,
+ u's':UnicodeStringFormatter,
+ u'r':funcFormatter(repr),
+ # this *can* get accessed, by e.g. '%()4%'%{'':1}.
+ # The usual %% case has to be handled specially as it
+ # doesn't consume a value.
+ u'%':funcFormatter(lambda x:u'%'),
+ }
+
del funcFormatter # don't irritate flow space
@@ -375,7 +437,12 @@
return self.fmt[i:j]
-def format(fmt, values, valuedict=None):
+def format(fmt, values, valuedict=None, do_unicode=False):
+ if do_unicode:
+ format_registry = unicode_format_registry
+ else:
+ format_registry = str_format_registry
+
fmtiter = FmtIter(fmt)
valueiter = iter(values)
r = []
@@ -394,7 +461,20 @@
# so let's be explicit about the args:
# r.append(f(*t).format())
char, flags, width, prec, value = t
- r.append(f(char, flags, width, prec, value).format())
+ try:
+ r.append(f(char, flags, width, prec, value).format())
+ except NeedUnicodeFormattingError:
+ # Switch to using the unicode formatters and retry.
+ do_unicode = True
+ format_registry = unicode_format_registry
+ try:
+ f = format_registry[t[0]]
+ except KeyError:
+ raise ValueError("unsupported format character "
+ "'%s' (0x%x) at index %d"
+ %(t[0], ord(t[0]), fmtiter.i-1))
+ r.append(f(char, flags, width, prec, value).format())
+
else:
# efficiency hack:
r.append(c + fmtiter.skip_to_fmt())
@@ -408,5 +488,7 @@
if valuedict is None:
raise TypeError('not all arguments converted '
'during string formatting')
+ if do_unicode:
+ return u''.join(r)
return ''.join(r)
Modified: pypy/dist/pypy/module/__builtin__/__init__.py
==============================================================================
--- pypy/dist/pypy/module/__builtin__/__init__.py (original)
+++ pypy/dist/pypy/module/__builtin__/__init__.py Mon May 23 13:05:59 2005
@@ -47,7 +47,6 @@
'complex' : 'app_complex.complex',
'intern' : 'app_misc.intern',
- 'unichr' : 'app_misc.unichr',
'buffer' : 'app_buffer.buffer',
'reload' : 'app_misc.reload',
}
@@ -64,7 +63,7 @@
'object' : '(space.w_object)',
'file' : '(space.wrap(file))',
'open' : '(space.wrap(file))',
- 'unicode' : '(space.wrap(unicode))', # XXX faked
+ 'unicode' : '(space.w_unicode)',
# old-style classes dummy support
'_classobj' : 'space.w_classobj',
@@ -76,6 +75,7 @@
# interp-level function definitions
'abs' : 'operation.abs',
'chr' : 'operation.chr',
+ 'unichr' : 'operation.unichr',
'len' : 'operation.len',
'ord' : 'operation.ord',
'pow' : 'operation.pow',
Modified: pypy/dist/pypy/module/__builtin__/app_misc.py
==============================================================================
--- pypy/dist/pypy/module/__builtin__/app_misc.py (original)
+++ pypy/dist/pypy/module/__builtin__/app_misc.py Mon May 23 13:05:59 2005
@@ -11,13 +11,6 @@
return _stringtable.setdefault(s,s)
-def unichr(code):
- import sys
- if (code < 0 or code > sys.maxunicode):
- raise ValueError('unichr() arg not in range(%#x)'%(sys.maxunicode + 1))
- return unicode('\\U%08x' %(code), 'unicode-escape')
-
-
def reload(module):
import imp, sys, errno
Modified: pypy/dist/pypy/module/__builtin__/compiling.py
==============================================================================
--- pypy/dist/pypy/module/__builtin__/compiling.py (original)
+++ pypy/dist/pypy/module/__builtin__/compiling.py Mon May 23 13:05:59 2005
@@ -9,7 +9,7 @@
def compile(space, w_source, filename, mode, flags=0, dont_inherit=0):
if space.is_true(space.isinstance(w_source, space.w_unicode)):
- str_ = space.unwrap(w_source) # xxx generic unwrap
+ str_ = u''.join(w_source._value) # Bad exposing of unicode internals
else:
str_ = space.str_w(w_source)
Modified: pypy/dist/pypy/module/__builtin__/operation.py
==============================================================================
--- pypy/dist/pypy/module/__builtin__/operation.py (original)
+++ pypy/dist/pypy/module/__builtin__/operation.py Mon May 23 13:05:59 2005
@@ -15,6 +15,9 @@
w_character = space.newstring([w_ascii])
return w_character
+def unichr(space, w_code):
+ return space.newunicode([w_code])
+
def len(space, w_obj):
"len(object) -> integer\n\nReturn the number of items of a sequence or mapping."
return space.len(w_obj)
Modified: pypy/dist/pypy/module/unicodedata/__init__.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/module/unicodedata/__init__.py (original)
+++ pypy/dist/pypy/module/unicodedata/__init__.py Mon May 23 13:05:59 2005
@@ -1,6 +1,6 @@
-from pypy.interpreter.lazymodule import LazyModule
+from pypy.interpreter.mixedmodule import MixedModule
-class Module(LazyModule):
+class Module(MixedModule):
appleveldefs = {
}
interpleveldefs = {
Modified: pypy/dist/pypy/objspace/std/floattype.py
==============================================================================
--- pypy/dist/pypy/objspace/std/floattype.py (original)
+++ pypy/dist/pypy/objspace/std/floattype.py Mon May 23 13:05:59 2005
@@ -1,5 +1,6 @@
from pypy.objspace.std.stdtypedef import *
from pypy.interpreter.error import OperationError
+from pypy.objspace.std.strutil import ParseStringError
def descr__new__(space, w_floattype, w_x=0.0):
from pypy.objspace.std.floatobject import W_FloatObject
@@ -10,6 +11,14 @@
except ValueError, e:
raise OperationError(space.w_ValueError,
space.wrap(str(e)))
+ elif space.is_true(space.isinstance(w_value, space.w_unicode)):
+ try:
+ # XXX can produce unwrapped long
+ from unicodeobject import unicode_to_decimal_w
+ value = float(unicode_to_decimal_w(space, w_value))
+ except ParseStringError, e:
+ raise OperationError(space.w_ValueError,
+ space.wrap(e.msg))
else:
w_obj = space.float(w_value)
if space.is_true(space.is_(w_floattype, space.w_float)):
Modified: pypy/dist/pypy/objspace/std/inttype.py
==============================================================================
--- pypy/dist/pypy/objspace/std/inttype.py (original)
+++ pypy/dist/pypy/objspace/std/inttype.py Mon May 23 13:05:59 2005
@@ -28,6 +28,15 @@
space.wrap(e.msg))
except ParseStringOverflowError, e:
w_longval = retry_to_w_long(space, e.parser)
+ elif space.is_true(space.isinstance(w_value, space.w_unicode)):
+ try:
+ from unicodeobject import unicode_to_decimal_w
+ value = string_to_int(space, unicode_to_decimal_w(space, w_value))
+ except ParseStringError, e:
+ raise OperationError(space.w_ValueError,
+ space.wrap(e.msg))
+ except ParseStringOverflowError, e:
+ w_longval = retry_to_w_long(space, e.parser)
else:
# otherwise, use the __int__() method
w_obj = space.int(w_value)
Modified: pypy/dist/pypy/objspace/std/longtype.py
==============================================================================
--- pypy/dist/pypy/objspace/std/longtype.py (original)
+++ pypy/dist/pypy/objspace/std/longtype.py Mon May 23 13:05:59 2005
@@ -18,6 +18,14 @@
except ParseStringError, e:
raise OperationError(space.w_ValueError,
space.wrap(e.msg))
+ elif space.is_true(space.isinstance(w_value, space.w_unicode)):
+ try:
+ # XXX can produce unwrapped long
+ from unicodeobject import unicode_to_decimal_w
+ value = string_to_long(unicode_to_decimal_w(space, w_value))
+ except ParseStringError, e:
+ raise OperationError(space.w_ValueError,
+ space.wrap(e.msg))
else:
# otherwise, use the __long__() method
w_obj = space.long(w_value)
Modified: pypy/dist/pypy/objspace/std/objspace.py
==============================================================================
--- pypy/dist/pypy/objspace/std/objspace.py (original)
+++ pypy/dist/pypy/objspace/std/objspace.py Mon May 23 13:05:59 2005
@@ -199,6 +199,8 @@
return W_IntObject(self, x)
if isinstance(x, str):
return W_StringObject(self, x)
+ if isinstance(x, unicode):
+ return W_UnicodeObject(self, [u for u in x])
if isinstance(x, dict):
items_w = [(self.wrap(k), self.wrap(v)) for (k, v) in x.iteritems()]
return W_DictObject(self, items_w)
@@ -283,6 +285,14 @@
self.wrap("character code not in range(256)"))
return W_StringObject(self, ''.join(chars))
+ def newunicode(self, chars_w):
+ try:
+ chars = [unichr(self.int_w(w_c)) for w_c in chars_w]
+ except ValueError, e: # unichr(out-of-range)
+ raise OperationError(self.w_ValueError,
+ self.wrap("character code not in range(0x110000)"))
+ return W_UnicodeObject(self, chars)
+
def newseqiter(self, w_obj):
return W_SeqIterObject(self, w_obj)
Modified: pypy/dist/pypy/objspace/std/test/test_unicodestring.py
==============================================================================
--- pypy/dist/pypy/objspace/std/test/test_unicodestring.py (original)
+++ pypy/dist/pypy/objspace/std/test/test_unicodestring.py Mon May 23 13:05:59 2005
@@ -38,3 +38,29 @@
def test_contains(self):
assert u'a' in 'abc'
assert 'a' in u'abc'
+
+ def test_splitlines(self):
+ assert u''.splitlines() == []
+ assert u''.splitlines(1) == []
+ assert u'\n'.splitlines() == [u'']
+ assert u'a'.splitlines() == [u'a']
+ assert u'one\ntwo'.splitlines() == [u'one', u'two']
+ assert u'\ntwo\nthree'.splitlines() == [u'', u'two', u'three']
+ assert u'\n\n'.splitlines() == [u'', u'']
+ assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c']
+ assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n']
+
+ def test_zfill(self):
+ assert u'123'.zfill(6) == u'000123'
+ assert u'123'.zfill(2) == u'123'
+ assert u'123'.zfill(6) == u'000123'
+ assert u'+123'.zfill(2) == u'+123'
+ assert u'+123'.zfill(4) == u'+123'
+ assert u'+123'.zfill(6) == u'+00123'
+
+ def test_split(self):
+ assert (u'this is the split function'.split() ==
+ [u'this', u'is', u'the', u'split', u'function'])
+ assert (u'this!is!the!split!function'.split('!') ==
+ [u'this', u'is', u'the', u'split', u'function'])
+
Modified: pypy/dist/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/dist/pypy/objspace/std/unicodeobject.py (original)
+++ pypy/dist/pypy/objspace/std/unicodeobject.py Mon May 23 13:05:59 2005
@@ -1,129 +1,824 @@
from pypy.objspace.std.objspace import *
-from pypy.objspace.std.fake import fake_type, wrap_exception
+from pypy.interpreter import gateway
+from pypy.objspace.std.fake import wrap_exception
from pypy.objspace.std.stringobject import W_StringObject
-from pypy.objspace.std.strutil import string_to_w_long, ParseStringError
+from pypy.objspace.std.noneobject import W_NoneObject
+from pypy.objspace.std.sliceobject import W_SliceObject
+from pypy.objspace.std import slicetype
+from pypy.objspace.std.strutil import string_to_int, string_to_long, ParseStringError
+from pypy.rpython.rarithmetic import intmask
+from pypy.module.unicodedata import unicodedb
+
+class W_UnicodeObject(W_Object):
+ from pypy.objspace.std.unicodetype import unicode_typedef as typedef
+
+ def __init__(w_self, space, unicodechars):
+ W_Object.__init__(w_self, space)
+ w_self._value = unicodechars
+ if len(unicodechars) == 0:
+ w_self.w_hash = space.wrap(0)
+ else:
+ w_self.w_hash = None
+ def __repr__(w_self):
+ """ representation for debugging purposes """
+ return "%s(%r)" % (w_self.__class__.__name__, w_self._value)
-W_UnicodeObject = fake_type(unicode)
+registerimplementation(W_UnicodeObject)
# Helper for converting int/long
-import unicodedata
def unicode_to_decimal_w(space, w_unistr):
- result = []
- for uchr in space.unwrap(w_unistr):
- if uchr.isspace():
- result.append(' ')
+ unistr = w_unistr._value
+ result = ['\0'] * len(unistr)
+ digits = [ '0', '1', '2', '3', '4',
+ '5', '6', '7', '8', '9']
+ for i in xrange(len(unistr)):
+ uchr = ord(unistr[i])
+ if unicodedb.isspace(uchr):
+ result[i] = ' '
continue
try:
- result.append(chr(ord('0') + unicodedata.decimal(uchr)))
- continue
- except ValueError:
- ch = ord(uchr)
- if 0 < ch < 256:
- result.append(chr(ch))
- continue
- raise OperationError(space.w_UnicodeEncodeError, space.wrap('invalid decimal Unicode string'))
+ result[i] = digits[unicodedb.decimal(uchr)]
+ except KeyError:
+ if 0 < uchr < 256:
+ result[i] = chr(uchr)
+ else:
+ raise OperationError(space.w_UnicodeEncodeError, space.wrap('invalid decimal Unicode string'))
return ''.join(result)
# string-to-unicode delegation
def delegate_String2Unicode(w_str):
space = w_str.space
- return W_UnicodeObject(space, unicode(space.str_w(w_str)))
+ return space.call_function(space.w_unicode, w_str)
def str_w__Unicode(space, w_uni):
- return space.str_w(space.call_method(w_uni, 'encode'))
+ return space.str_w(space.str(w_uni))
-def eq__Unicode_ANY(space, w_uni, w_other):
- try:
- return space.newbool(space.unwrap(w_uni) == space.unwrap(w_other))
- except:
- wrap_exception(space)
+def repr__Unicode(space, w_uni):
+ return space.wrap(repr(u''.join(w_uni._value)))
-def ne__Unicode_ANY(space, w_uni, w_other):
- try:
- return space.newbool(space.unwrap(w_uni) != space.unwrap(w_other))
- except:
- wrap_exception(space)
+def str__Unicode(space, w_uni):
+ return space.call_method(w_uni, 'encode')
+def cmp__Unicode_Unicode(space, w_left, w_right):
+ left = w_left._value
+ right = w_right._value
+ for i in range(min(len(left), len(right))):
+ test = ord(left[i]) - ord(right[i])
+ if test < 0:
+ return space.wrap(-1)
+ if test > 0:
+ return space.wrap(1)
+
+ test = len(left) - len(right)
+ if test < 0:
+ return space.wrap(-1)
+ if test > 0:
+ return space.wrap(1)
+ return space.wrap(0)
-def lt__Unicode_ANY(space, w_uni, w_other):
+def cmp__Unicode_ANY(space, w_left, w_right):
try:
- return space.newbool(space.unwrap(w_uni) < space.unwrap(w_other))
+ w_right = space.call_function(space.w_unicode, w_right)
except:
- wrap_exception(space)
+ return space.wrap(1)
+ return space.cmp(w_left, w_right)
-def gt__Unicode_ANY(space, w_uni, w_other):
- try:
- return space.newbool(space.unwrap(w_uni) > space.unwrap(w_other))
- except:
- wrap_exception(space)
+def ord__Unicode(space, w_uni):
+ if len(w_uni._value) != 1:
+ raise OperationError(space.w_TypeError, space.wrap('ord() expected a character'))
+ return space.wrap(ord(w_uni._value[0]))
-def le__Unicode_ANY(space, w_uni, w_other):
- try:
- return space.newbool(space.unwrap(w_uni) <= space.unwrap(w_other))
- except:
- wrap_exception(space)
+def add__Unicode_Unicode(space, w_left, w_right):
+ left = w_left._value
+ right = w_right._value
+ leftlen = len(left)
+ rightlen = len(right)
+ result = [u'\0'] * (leftlen + rightlen)
+ for i in range(leftlen):
+ result[i] = left[i]
+ for i in range(rightlen):
+ result[i + leftlen] = right[i]
+ return W_UnicodeObject(space, result)
+
+def add__String_Unicode(space, w_left, w_right):
+ return space.add(space.call_function(space.w_unicode, w_left) , w_right)
+
+def add__Unicode_String(space, w_left, w_right):
+ return space.add(w_left, space.call_function(space.w_unicode, w_right))
+
+def contains__String_Unicode(space, w_container, w_item):
+ return space.contains(space.call_function(space.w_unicode, w_container), w_item )
+
+def _find(self, sub, start, end):
+ if len(sub) == 0:
+ return start
+ if start >= end:
+ return -1
+ for i in range(start, end - len(sub) + 1):
+ for j in range(len(sub)):
+ if self[i + j] != sub[j]:
+ break
+ else:
+ return i
+ return -1
+
+def _rfind(self, sub, start, end):
+ if len(sub) == 0:
+ return end
+ if end - start < len(sub):
+ return -1
+ for i in range(end - len(sub), start - 1, -1):
+ for j in range(len(sub)):
+ if self[i + j] != sub[j]:
+ break
+ else:
+ return i
+ return -1
+
+def contains__Unicode_Unicode(space, w_container, w_item):
+ item = w_item._value
+ container = w_container._value
+ return space.newbool(_find(container, item, 0, len(container)) >= 0)
+
+def unicode_join__Unicode_ANY(space, w_self, w_list):
+ list = space.unpackiterable(w_list)
+ delim = w_self._value
+ totlen = 0
+ if len(list) == 0:
+ return W_UnicodeObject(space, [])
+ for i in range(len(list)):
+ item = list[i]
+ if space.is_true(space.isinstance(item, space.w_unicode)):
+ list[i] = item._value
+ elif space.is_true(space.isinstance(item, space.w_str)):
+ list[i] = space.call_function(space.w_unicode, item)._value
+ else:
+ w_msg = space.mod(space.wrap('sequence item %d: expected string or Unicode'),
+ space.wrap(i))
+ raise OperationError(space.w_TypeError, w_msg)
+ totlen += len(list[i])
+ totlen += len(delim) * (len(list) - 1)
+ if len(list) == 1:
+ return W_UnicodeObject(space, list[0])
+ # Allocate result
+ result = [u'\0'] * totlen
+ first = list[0]
+ for i in range(len(first)):
+ result[i] = first[i]
+ offset = len(first)
+ for i in range(1, len(list)):
+ item = list[i]
+ # Add delimiter
+ for j in range(len(delim)):
+ result[offset + j] = delim[j]
+ offset += len(delim)
+ # Add item from list
+ for j in range(len(item)):
+ result[offset + j] = item[j]
+ offset += len(item)
+ return W_UnicodeObject(space, result)
+
+def unicode_encode__Unicode_String_String(space, w_self, w_encoding, w_errors):
+ try:
+ return space.wrap(u''.join(w_self._value).encode(space.str_w(w_encoding), space.str_w(w_errors)))
+ except:
+ wrap_exception(space)
+
+def unicode_encode__Unicode_String_None(space, w_self, w_encoding, w_none):
+ try:
+ return space.wrap(u''.join(w_self._value).encode(space.str_w(w_encoding)))
+ except:
+ wrap_exception(space)
+
+def unicode_encode__Unicode_None_None(space, w_self, w_encoding, w_errors):
+ try:
+ return space.wrap(u''.join(w_self._value).encode())
+ except:
+ wrap_exception(space)
+
+def hash__Unicode(space, w_uni):
+ if w_uni.w_hash is None:
+ chars = w_uni._value
+ x = ord(chars[0]) << 7
+ for c in chars:
+ x = intmask((1000003 * x) ^ ord(c))
+ h = intmask(x ^ len(chars))
+ if h == -1:
+ h = -2
+ w_uni.w_hash = space.wrap(h)
+ return w_uni.w_hash
+
+def len__Unicode(space, w_uni):
+ return space.wrap(len(w_uni._value))
+
+def getitem__Unicode_ANY(space, w_uni, w_index):
+ ival = space.int_w(w_index)
+ uni = w_uni._value
+ ulen = len(uni)
+ if ival < 0:
+ ival += ulen
+ if ival < 0 or ival >= ulen:
+ exc = space.call_function(space.w_IndexError,
+ space.wrap("unicode index out of range"))
+ raise OperationError(space.w_IndexError, exc)
+ return W_UnicodeObject(space, [uni[ival]])
+
+def getitem__Unicode_Slice(space, w_uni, w_slice):
+ uni = w_uni._value
+ length = len(uni)
+ start, stop, step, sl = slicetype.indices4(space, w_slice, length)
+ r = [uni[start + i*step] for i in range(sl)]
+ return W_UnicodeObject(space, r)
+
+def unicode_getslice__Unicode_ANY_ANY(space, w_uni, w_start, w_end):
+ w_slice = space.call_function(space.w_slice, w_start, w_end)
+ uni = w_uni._value
+ length = len(uni)
+ start, stop, step, sl = slicetype.indices4(space, w_slice, length)
+ return W_UnicodeObject(space, uni[start:stop])
+
+def mul__Unicode_ANY(space, w_uni, w_times):
+ chars = w_uni._value
+ charlen = len(chars)
+ times = space.int_w(w_times)
+ if times <= 0 or charlen == 0:
+ return W_UnicodeObject(space, [])
+ if times == 1:
+ return space.call_function(space.w_unicode, w_uni)
+ if charlen == 1:
+ return W_UnicodeObject(space, [w_uni._value[0]] * times)
+
+ try:
+ result = [u'\0'] * (charlen * times)
+ except OverflowError:
+ raise OperationError(space.w_OverflowError, space.wrap('repeated string is too long'))
+ for i in range(times):
+ offset = i * charlen
+ for j in range(charlen):
+ result[offset + j] = chars[j]
+ return W_UnicodeObject(space, result)
+
+def mul__ANY_Unicode(space, w_times, w_uni):
+ return space.mul(w_uni, w_times)
+
+def _isspace(uchar):
+ return unicodedb.isspace(ord(uchar))
+
+def unicode_isspace__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isspace(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isalpha__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isalpha(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isalnum__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not (unicodedb.isalpha(ord(uchar)) or
+ unicodedb.isnumeric(ord(uchar))):
+ return space.w_False
+ return space.w_True
+
+def unicode_isdecimal__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isdecimal(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isdigit__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isdigit(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isnumeric__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isnumeric(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_islower__Unicode(space, w_unicode):
+ cased = False
+ for uchar in w_unicode._value:
+ if (unicodedb.isupper(ord(uchar)) or
+ unicodedb.istitle(ord(uchar))):
+ return space.w_False
+ if not cased and unicodedb.islower(ord(uchar)):
+ cased = True
+ return space.newbool(cased)
+
+def unicode_isupper__Unicode(space, w_unicode):
+ cased = False
+ for uchar in w_unicode._value:
+ if (unicodedb.islower(ord(uchar)) or
+ unicodedb.istitle(ord(uchar))):
+ return space.w_False
+ if not cased and unicodedb.isupper(ord(uchar)):
+ cased = True
+ return space.newbool(cased)
+
+def unicode_istitle__Unicode(space, w_unicode):
+ cased = False
+ previous_is_cased = False
+ for uchar in w_unicode._value:
+ if (unicodedb.isupper(ord(uchar)) or
+ unicodedb.istitle(ord(uchar))):
+ if previous_is_cased:
+ return space.w_False
+ previous_is_cased = cased = True
+ elif unicodedb.islower(ord(uchar)):
+ if not previous_is_cased:
+ return space.w_False
+ previous_is_cased = cased = True
+ else:
+ previous_is_cased = False
+ return space.newbool(cased)
+
+def _strip(space, w_self, w_chars, left, right):
+ "internal function called by str_xstrip methods"
+ u_self = w_self._value
+ u_chars = w_chars._value
+
+ lpos = 0
+ rpos = len(u_self)
+
+ if left:
+ while lpos < rpos and u_self[lpos] in u_chars:
+ lpos += 1
+
+ if right:
+ while rpos > lpos and u_self[rpos - 1] in u_chars:
+ rpos -= 1
+
+ result = [u'\0'] * (rpos - lpos)
+ for i in range(rpos - lpos):
+ result[i] = u_self[lpos + i]
+ return W_UnicodeObject(space, result)
+
+def _strip_none(space, w_self, left, right):
+ "internal function called by str_xstrip methods"
+ u_self = w_self._value
+
+ lpos = 0
+ rpos = len(u_self)
+
+ if left:
+ while lpos < rpos and _isspace(u_self[lpos]):
+ lpos += 1
+
+ if right:
+ while rpos > lpos and _isspace(u_self[rpos - 1]):
+ rpos -= 1
+
+ result = [u'\0'] * (rpos - lpos)
+ for i in range(rpos - lpos):
+ result[i] = u_self[lpos + i]
+ return W_UnicodeObject(space, result)
+
+def unicode_strip__Unicode_None(space, w_self, w_chars):
+ return _strip_none(space, w_self, 1, 1)
+def unicode_strip__Unicode_Unicode(space, w_self, w_chars):
+ return _strip(space, w_self, w_chars, 1, 1)
+def unicode_strip__Unicode_String(space, w_self, w_chars):
+ return space.call_method(w_self, 'strip',
+ space.call_function(space.w_unicode, w_chars))
+
+def unicode_lstrip__Unicode_None(space, w_self, w_chars):
+ return _strip_none(space, w_self, 1, 0)
+def unicode_lstrip__Unicode_Unicode(space, w_self, w_chars):
+ return _strip(space, w_self, w_chars, 1, 0)
+def unicode_lstrip__Unicode_String(space, w_self, w_chars):
+ return space.call_method(w_self, 'lstrip',
+ space.call_function(space.w_unicode, w_chars))
+
+def unicode_rstrip__Unicode_None(space, w_self, w_chars):
+ return _strip_none(space, w_self, 0, 1)
+def unicode_rstrip__Unicode_Unicode(space, w_self, w_chars):
+ return _strip(space, w_self, w_chars, 0, 1)
+def unicode_rstrip__Unicode_String(space, w_self, w_chars):
+ return space.call_method(w_self, 'rstrip',
+ space.call_function(space.w_unicode, w_chars))
+
+def unicode_capitalize__Unicode(space, w_self):
+ input = w_self._value
+ if len(input) == 0:
+ return W_UnicodeObject(space, [])
+ result = [u'\0'] * len(input)
+ result[0] = unichr(unicodedb.toupper(ord(input[0])))
+ for i in range(1, len(input)):
+ result[i] = unichr(unicodedb.tolower(ord(input[i])))
+ return W_UnicodeObject(space, result)
+
+def unicode_title__Unicode(space, w_self):
+ input = w_self._value
+ if len(input) == 0:
+ return w_self
+ result = [u'\0'] * len(input)
+
+ previous_is_cased = 0
+ for i in range(len(input)):
+ unichar = ord(input[i])
+ if previous_is_cased:
+ result[i] = unichr(unicodedb.tolower(unichar))
+ else:
+ result[i] = unichr(unicodedb.totitle(unichar))
+ previous_is_cased = unicodedb.iscased(unichar)
+ return W_UnicodeObject(space, result)
+
+def unicode_lower__Unicode(space, w_self):
+ input = w_self._value
+ result = [u'\0'] * len(input)
+ for i in range(len(input)):
+ result[i] = unichr(unicodedb.tolower(ord(input[i])))
+ return W_UnicodeObject(space, result)
+
+def unicode_upper__Unicode(space, w_self):
+ input = w_self._value
+ result = [u'\0'] * len(input)
+ for i in range(len(input)):
+ result[i] = unichr(unicodedb.toupper(ord(input[i])))
+ return W_UnicodeObject(space, result)
+
+def unicode_swapcase__Unicode(space, w_self):
+ input = w_self._value
+ result = [u'\0'] * len(input)
+ for i in range(len(input)):
+ unichar = ord(input[i])
+ if unicodedb.islower(unichar):
+ result[i] = unichr(unicodedb.toupper(unichar))
+ elif unicodedb.isupper(unichar):
+ result[i] = unichr(unicodedb.tolower(unichar))
+ else:
+ result[i] = input[i]
+ return W_UnicodeObject(space, result)
+
+def _normalize_index(length, index):
+ if index < 0:
+ index += length
+ if index < 0:
+ index = 0
+ elif index > length:
+ index = length
+ return index
-def ge__Unicode_ANY(space, w_uni, w_other):
- try:
- return space.newbool(space.unwrap(w_uni) >= space.unwrap(w_other))
- except:
- wrap_exception(space)
+def unicode_endswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
-def ord__Unicode(space, w_uni):
- try:
- return space.wrap(ord(space.unwrap(w_uni)))
- except:
- wrap_exception(space)
+ substr = w_substr._value
+ substr_len = len(substr)
+
+ if end - start < substr_len:
+ return space.w_False # substring is too long
+ start = end - substr_len
+ for i in range(substr_len):
+ if self[start + i] != substr[i]:
+ return space.w_False
+ return space.w_True
-# xxx unicode.__float__ should not exist. For now this approach avoids to deal with unicode in more places
-def float__Unicode(space, w_uni):
- try:
- return space.wrap(float(unicode_to_decimal_w(space, w_uni)))
- except:
- wrap_exception(space)
-
-# xxx unicode.__int__ should not exist
-def int__Unicode(space, w_uni):
- try:
- s = unicode_to_decimal_w(space, w_uni)
- except:
- wrap_exception(space)
- raise
- return space.call_function(space.w_int, space.wrap(s))
+def unicode_startswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
-# xxx unicode.__long__ should not exist
-def long__Unicode(space, w_uni):
- try:
- return string_to_w_long(space, unicode_to_decimal_w(space, w_uni))
- except ParseStringError, e:
- raise OperationError(space.w_ValueError, space.wrap(e.msg))
- except:
- wrap_exception(space)
+ substr = w_substr._value
+ substr_len = len(substr)
+
+ if end - start < substr_len:
+ return space.w_False # substring is too long
+
+ for i in range(substr_len):
+ if self[start + i] != substr[i]:
+ return space.w_False
+ return space.w_True
-def add__Unicode_Unicode(space, w_left, w_right):
- return space.wrap(space.unwrap(w_left) + space.unwrap(w_right))
+def unicode_center__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ padding = width - len(self)
+ if padding < 0:
+ return space.call_function(space.w_unicode, w_self)
+ leftpad = padding // 2 + (padding & width & 1)
+ result = [u' '] * width
+ for i in range(len(self)):
+ result[leftpad + i] = self[i]
+ return W_UnicodeObject(space, result)
-def contains__String_Unicode(space, w_left, w_right):
- try:
- return space.wrap(space.unwrap(w_right) in space.unwrap(w_left))
- except:
- wrap_exception(space)
-def contains__Unicode_Unicode(space, w_left, w_right):
- return space.wrap(space.unwrap(w_right) in space.unwrap(w_left))
+def unicode_ljust__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ padding = width - len(self)
+ if padding < 0:
+ return space.call_function(space.w_unicode, w_self)
+ result = [u' '] * width
+ for i in range(len(self)):
+ result[i] = self[i]
+ return W_UnicodeObject(space, result)
-# str.strip(unicode) needs to convert self to unicode and call unicode.strip
-def str_strip__String_Unicode(space, w_self, w_chars ):
+def unicode_rjust__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ padding = width - len(self)
+ if padding < 0:
+ return space.call_function(space.w_unicode, w_self)
+ result = [u' '] * width
+ for i in range(len(self)):
+ result[padding + i] = self[i]
+ return W_UnicodeObject(space, result)
+
+def unicode_zfill__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ if len(self) == 0:
+ return W_UnicodeObject(space, [u'0'] * width)
+ padding = width - len(self)
+ if padding <= 0:
+ return space.call_function(space.w_unicode, w_self)
+ result = [u'0'] * width
+ for i in range(len(self)):
+ result[padding + i] = self[i]
+ # Move sign to first position
+ if self[0] in (u'+', u'-'):
+ result[0] = self[0]
+ result[padding] = u'0'
+ return W_UnicodeObject(space, result)
+
+def unicode_splitlines__Unicode_ANY(space, w_self, w_keepends):
+ self = w_self._value
+ keepends = 0
+ if space.int_w(w_keepends):
+ keepends = 1
+ if len(self) == 0:
+ return space.newlist([])
+
+ start = 0
+ end = len(self)
+ pos = 0
+ lines = []
+ while pos < end:
+ if unicodedb.islinebreak(ord(self[pos])):
+ if (self[pos] == u'\r' and pos + 1 < end and
+ self[pos + 1] == u'\n'):
+ # Count CRLF as one linebreak
+ lines.append(W_UnicodeObject(space,
+ self[start:pos + keepends * 2]))
+ pos += 1
+ else:
+ lines.append(W_UnicodeObject(space,
+ self[start:pos + keepends]))
+ pos += 1
+ start = pos
+ else:
+ pos += 1
+ if not unicodedb.islinebreak(ord(self[end - 1])):
+ lines.append(W_UnicodeObject(space, self[start:]))
+ return space.newlist(lines)
+
+def unicode_find__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ return space.wrap(_find(self, substr, start, end))
+
+def unicode_rfind__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ return space.wrap(_rfind(self, substr, start, end))
+
+def unicode_index__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
self = w_self._value
- return space.wrap( unicode(self).strip( space.unwrap(w_chars) ) )
-def str_lstrip__String_Unicode(space, w_self, w_chars ):
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ index = _find(self, substr, start, end)
+ if index < 0:
+ raise OperationError(space.w_ValueError,
+ space.wrap('substring not found'))
+ return space.wrap(index)
+
+def unicode_rindex__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
self = w_self._value
- return space.wrap( unicode(self).lstrip( space.unwrap(w_chars) ) )
-def str_rstrip__String_Unicode(space, w_self, w_chars ):
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ index = _rfind(self, substr, start, end)
+ if index < 0:
+ raise OperationError(space.w_ValueError,
+ space.wrap('substring not found'))
+ return space.wrap(index)
+
+def unicode_count__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
self = w_self._value
- return space.wrap( unicode(self).rstrip( space.unwrap(w_chars) ) )
-# we use the following magic to register strip_string_unicode as a String multimethod
-import stringtype
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ count = 0
+ while start <= end:
+ index = _find(self, substr, start, end)
+ if index < 0:
+ break
+ start = index + 1
+ count += 1
+ return space.wrap(count)
-register_all(vars(), stringtype)
+def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
+ self = w_self._value
+ maxsplit = space.int_w(w_maxsplit)
+ parts = []
+ if len(self) == 0:
+ return space.newlist([])
+ start = 0
+ end = len(self)
+ while maxsplit != 0 and start < end:
+ index = start
+ for index in range(start, end):
+ if _isspace(self[index]):
+ break
+ else:
+ break
+ parts.append(W_UnicodeObject(space, self[start:index]))
+ maxsplit -= 1
+ # Eat whitespace
+ for start in range(index + 1, end):
+ if not _isspace(self[start]):
+ break
+ else:
+ return space.newlist(parts)
+ parts.append(W_UnicodeObject(space, self[start:]))
+ return space.newlist(parts)
+
+
+def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
+ self = w_self._value
+ delim = w_delim._value
+ maxsplit = space.int_w(w_maxsplit)
+ delim_len = len(delim)
+ if delim_len == 0:
+ raise OperationError(space.w_ValueError,
+ space.wrap('empty separator'))
+ parts = []
+ if len(self) == 0:
+ return space.newlist([])
+ start = 0
+ end = len(self)
+ while maxsplit != 0:
+ index = _find(self, delim, start, end)
+ if index < 0:
+ break
+ parts.append(W_UnicodeObject(space, self[start:index]))
+ start = index + delim_len
+ maxsplit -= 1
+ parts.append(W_UnicodeObject(space, self[start:]))
+ return space.newlist(parts)
+
+def _split(space, self, maxsplit):
+ if len(self) == 0:
+ return []
+ if maxsplit == 0:
+ return [W_UnicodeObject(space, self)]
+ index = 0
+ end = len(self)
+ parts = [W_UnicodeObject(space, [])]
+ maxsplit -= 1
+ while maxsplit != 0:
+ if index >= end:
+ break
+ parts.append(W_UnicodeObject(space, [self[index]]))
+ index += 1
+ maxsplit -= 1
+ parts.append(W_UnicodeObject(space, self[index:]))
+ return parts
+
+def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old,
+ w_new, w_maxsplit):
+ if len(w_old._value):
+ w_parts = space.call_method(w_self, 'split', w_old, w_maxsplit)
+ else:
+ self = w_self._value
+ maxsplit = space.int_w(w_maxsplit)
+ w_parts = space.newlist(_split(space, self, maxsplit))
+ return space.call_method(w_new, 'join', w_parts)
+
+
+'translate'
+app = gateway.applevel(r'''
+import sys
+
+def unicode_expandtabs__Unicode_ANY(self, tabsize):
+ parts = self.split(u'\t')
+ result = [ parts[0] ]
+ prevsize = 0
+ for ch in parts[0]:
+ prevsize += 1
+ if ch in (u"\n", u"\r"):
+ prevsize = 0
+ for i in range(1, len(parts)):
+ pad = tabsize - prevsize % tabsize
+ result.append(u' ' * pad)
+ nextpart = parts[i]
+ result.append(nextpart)
+ prevsize = 0
+ for ch in nextpart:
+ prevsize += 1
+ if ch in (u"\n", u"\r"):
+ prevsize = 0
+ return u''.join(result)
+
+def unicode_translate__Unicode_ANY(self, table):
+ result = []
+ for unichar in self:
+ try:
+ newval = table[ord(unichar)]
+ except KeyError:
+ result.append(unichar)
+ else:
+ if newval is None:
+ continue
+ elif isinstance(newval, int):
+ if newval < 0 or newval > sys.maxunicode:
+ raise TypeError("character mapping must be in range(0x%x)"%(sys.maxunicode + 1,))
+ result.append(unichr(newval))
+ elif isinstance(newval, unicode):
+ result.append(newval)
+ else:
+ raise TypeError("character mapping must return integer, None or unicode")
+ return ''.join(result)
+
+def mod__Unicode_ANY(format, values):
+ import _formatting
+ if isinstance(values, tuple):
+ return _formatting.format(format, values, None, do_unicode=True)
+ if hasattr(values, 'keys'):
+ return _formatting.format(format, (values,), values, do_unicode=True)
+ return _formatting.format(format, (values,), None, do_unicode=True)
+''')
+unicode_expandtabs__Unicode_ANY = app.interphook('unicode_expandtabs__Unicode_ANY')
+unicode_translate__Unicode_ANY = app.interphook('unicode_translate__Unicode_ANY')
+mod__Unicode_ANY = app.interphook('mod__Unicode_ANY')
+
+import unicodetype
+register_all(vars(), unicodetype)
+
+# str.strip(unicode) needs to convert self to unicode and call unicode.strip
+# we use the following magic to register strip_string_unicode as a String multimethod.
+class str_methods:
+ import stringtype
+ W_UnicodeObject = W_UnicodeObject
+ from pypy.objspace.std.stringobject import W_StringObject
+ def str_strip__String_Unicode(space, w_self, w_chars):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'strip', w_chars)
+ def str_lstrip__String_Unicode(space, w_self, w_chars):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'lstrip', w_chars)
+ self = w_self._value
+ def str_rstrip__String_Unicode(space, w_self, w_chars):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'rstrip', w_chars)
+ def str_count__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'count', w_substr, w_start, w_end)
+ def str_find__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'find', w_substr, w_start, w_end)
+ def str_rfind__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'rfind', w_substr, w_start, w_end)
+ def str_index__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'index', w_substr, w_start, w_end)
+ def str_rindex__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'rindex', w_substr, w_start, w_end)
+
+ def str_replace__String_Unicode_Unicode_ANY(space, w_self, w_old, w_new, w_maxsplit):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'replace', w_old, w_new, w_maxsplit)
+
+ def str_split__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'split', w_delim, w_maxsplit)
+
+ register_all(vars(), stringtype)
Modified: pypy/dist/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/dist/pypy/objspace/std/unicodetype.py (original)
+++ pypy/dist/pypy/objspace/std/unicodetype.py Mon May 23 13:05:59 2005
@@ -1,3 +1,119 @@
-from pypy.objspace.std.fake import fake_type
+from pypy.objspace.std.stdtypedef import *
+from pypy.objspace.std.basestringtype import basestring_typedef
+from pypy.interpreter.error import OperationError
-unicode_typedef = fake_type(unicode).typedef
+from sys import maxint
+
+unicode_capitalize = MultiMethod('capitalize', 1)
+unicode_center = MultiMethod('center', 2, )
+unicode_count = MultiMethod('count', 4, defaults=(0, maxint))
+unicode_encode = MultiMethod('encode', 3, defaults=(None, None))
+unicode_endswith = MultiMethod('endswith', 4, defaults=(0,maxint))
+unicode_expandtabs = MultiMethod('expandtabs', 2, defaults=(8,))
+unicode_find = MultiMethod('find', 4, defaults=(0, maxint))
+unicode_index = MultiMethod('index', 4, defaults=(0, maxint))
+unicode_isalnum = MultiMethod('isalnum', 1)
+unicode_isalpha = MultiMethod('isalpha', 1)
+unicode_isdecimal = MultiMethod('isdecimal', 1)
+unicode_isdigit = MultiMethod('isdigit', 1)
+unicode_islower = MultiMethod('islower', 1)
+unicode_isnumeric = MultiMethod('isnumeric', 1)
+unicode_isspace = MultiMethod('isspace', 1)
+unicode_istitle = MultiMethod('istitle', 1)
+unicode_isupper = MultiMethod('isupper', 1)
+unicode_join = MultiMethod('join', 2)
+unicode_ljust = MultiMethod('ljust', 2)
+unicode_lower = MultiMethod('lower', 1)
+unicode_lstrip = MultiMethod('lstrip', 2, defaults=(None,))
+unicode_replace = MultiMethod('replace', 4, defaults=(-1,))
+unicode_rfind = MultiMethod('rfind', 4, defaults=(0, maxint))
+unicode_rindex = MultiMethod('rindex', 4, defaults=(0, maxint))
+unicode_rjust = MultiMethod('rjust', 2)
+unicode_rstrip = MultiMethod('rstrip', 2, defaults=(None,))
+unicode_split = MultiMethod('split', 3, defaults=(None,-1))
+unicode_splitlines = MultiMethod('splitlines', 2, defaults=(0,))
+unicode_startswith = MultiMethod('startswith', 4, defaults=(0,maxint))
+unicode_strip = MultiMethod('strip', 2, defaults=(None,))
+unicode_swapcase = MultiMethod('swapcase', 1)
+unicode_title = MultiMethod('title', 1)
+unicode_translate = MultiMethod('translate', 2)
+unicode_upper = MultiMethod('upper', 1)
+unicode_zfill = MultiMethod('zfill', 2)
+unicode_getslice = MultiMethod('__getslice__', 3)
+# ____________________________________________________________
+
+app = gateway.applevel('''
+import codecs, sys
+
+def unicode_from_encoded_object(obj, encoding, errors):
+ # Fix later for buffer
+ if type(obj).__name__ == 'buffer':
+ obj = obj.buf
+ if encoding is None:
+ encoding = sys.getdefaultencoding()
+ decoder = codecs.getdecoder(encoding)
+ if errors is None:
+ retval, lenght = decoder(obj)
+ else:
+ retval, length = decoder(obj, errors)
+ if not isinstance(retval, unicode):
+ raise TypeError("decoder did not return an unicode object (type=%s)" %
+ type(retval).__name__)
+ return retval
+
+def unicode_from_object(obj):
+ if isinstance(obj, str):
+ res = obj
+ else:
+ try:
+ unicode_method = obj.__unicode__
+ except AttributeError:
+ res = str(obj)
+ else:
+ res = unicode_method()
+ if isinstance(res, unicode):
+ return res
+ return unicode_from_encoded_object(res, None, "strict")
+
+''')
+unicode_from_object = app.interphook('unicode_from_object')
+unicode_from_encoded_object = app.interphook('unicode_from_encoded_object')
+
+
+def descr__new__(space, w_unicodetype, w_obj=None, w_encoding=None, w_errors=None):
+ from pypy.objspace.std.unicodeobject import W_UnicodeObject
+ w_obj_type = space.type(w_obj)
+
+ if space.is_w(w_obj_type, space.w_unicode):
+ if (not space.is_w(w_encoding, space.w_None) or
+ not space.is_w(w_errors, space.w_None)):
+ raise OperationError(space.w_TypeError,
+ space.wrap('decoding Unicode is not supported'))
+ if space.is_w(w_unicodetype, space.w_unicode):
+ return w_obj
+ w_value = w_obj
+ elif space.is_w(w_obj, space.w_None):
+ w_value = W_UnicodeObject(space, [])
+ elif (space.is_w(w_encoding, space.w_None) and
+ space.is_w(w_errors, space.w_None)):
+ if space.is_true(space.isinstance(w_obj, space.w_unicode)):
+ w_value = w_obj
+ else:
+ w_value = unicode_from_object(space, w_obj)
+ else:
+ w_value = unicode_from_encoded_object(space, w_obj, w_encoding, w_errors)
+ w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
+ w_newobj.__init__(space, w_value._value)
+ return w_newobj
+
+# ____________________________________________________________
+
+unicode_typedef = StdTypeDef("unicode", basestring_typedef,
+ __new__ = newmethod(descr__new__),
+ __doc__ = '''unicode(string [, encoding[, errors]]) -> object
+
+Create a new Unicode object from the given encoded string.
+encoding defaults to the current default string encoding.
+errors can be 'strict', 'replace' or 'ignore' and defaults to 'strict'.'''
+ )
+unicode_typedef.registermethods(globals())
More information about the Pypy-commit
mailing list