[pypy-svn] r12419 - in pypy/branch/non-fake-unicode/pypy/objspace/std: . test
ac at codespeak.net
ac at codespeak.net
Tue May 17 17:56:58 CEST 2005
Author: ac
Date: Tue May 17 17:56:58 2005
New Revision: 12419
Modified:
pypy/branch/non-fake-unicode/pypy/objspace/std/test/test_unicodestring.py
pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py
pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py
Log:
Add methods to unicode. Only __mod__ missing now.
Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/test/test_unicodestring.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/test/test_unicodestring.py (original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/test/test_unicodestring.py Tue May 17 17:56:58 2005
@@ -38,3 +38,29 @@
def test_contains(self):
assert u'a' in 'abc'
assert 'a' in u'abc'
+
+ def test_splitlines(self):
+ assert u''.splitlines() == []
+ assert u''.splitlines(1) == []
+ assert u'\n'.splitlines() == [u'']
+ assert u'a'.splitlines() == [u'a']
+ assert u'one\ntwo'.splitlines() == [u'one', u'two']
+ assert u'\ntwo\nthree'.splitlines() == [u'', u'two', u'three']
+ assert u'\n\n'.splitlines() == [u'', u'']
+ assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c']
+ assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n']
+
+ def test_zfill(self):
+ assert u'123'.zfill(6) == u'000123'
+ assert u'123'.zfill(2) == u'123'
+ assert u'123'.zfill(6) == u'000123'
+ assert u'+123'.zfill(2) == u'+123'
+ assert u'+123'.zfill(4) == u'+123'
+ assert u'+123'.zfill(6) == u'+00123'
+
+ def test_split(self):
+ assert (u'this is the split function'.split() ==
+ [u'this', u'is', u'the', u'split', u'function'])
+ assert (u'this!is!the!split!function'.split('!') ==
+ [u'this', u'is', u'the', u'split', u'function'])
+
Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py (original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/unicodeobject.py Tue May 17 17:56:58 2005
@@ -1,4 +1,5 @@
from pypy.objspace.std.objspace import *
+from pypy.interpreter import gateway
from pypy.objspace.std.fake import wrap_exception
from pypy.objspace.std.stringobject import W_StringObject
from pypy.objspace.std.noneobject import W_NoneObject
@@ -25,26 +26,23 @@
registerimplementation(W_UnicodeObject)
# Helper for converting int/long
-import unicodedata
def unicode_to_decimal_w(space, w_unistr):
unistr = w_unistr._value
result = ['\0'] * len(unistr)
digits = [ '0', '1', '2', '3', '4',
'5', '6', '7', '8', '9']
for i in xrange(len(unistr)):
- uchr = unistr[i]
- if _isspace(uchr):
+ uchr = ord(unistr[i])
+ if unicodedb.isspace(uchr):
result[i] = ' '
continue
try:
- result[i] = digits[unicodedata.decimal(uchr)]
- continue
- except ValueError:
- ch = ord(uchr)
- if 0 < ch < 256:
- result[i] = chr(ch)
- continue
- raise OperationError(space.w_UnicodeEncodeError, space.wrap('invalid decimal Unicode string'))
+ result[i] = digits[unicodedb.decimal(uchr)]
+ except KeyError:
+ if 0 < uchr < 256:
+ result[i] = chr(uchr)
+ else:
+ raise OperationError(space.w_UnicodeEncodeError, space.wrap('invalid decimal Unicode string'))
return ''.join(result)
# string-to-unicode delegation
@@ -77,7 +75,14 @@
if test > 0:
return space.wrap(1)
return space.wrap(0)
-
+
+def cmp__Unicode_ANY(space, w_left, w_right):
+ try:
+ w_right = space.call_function(space.w_unicode, w_right)
+ except:
+ return space.wrap(1)
+ return space.cmp(w_left, w_right)
+
def ord__Unicode(space, w_uni):
if len(w_uni._value) != 1:
raise OperationError(space.w_TypeError, space.wrap('ord() expected a character'))
@@ -104,18 +109,36 @@
def contains__String_Unicode(space, w_container, w_item):
return space.contains(space.call_function(space.w_unicode, w_container), w_item )
+def _find(self, sub, start, end):
+ if len(sub) == 0:
+ return start
+ if start >= end:
+ return -1
+ for i in range(start, end - len(sub) + 1):
+ for j in range(len(sub)):
+ if self[i + j] != sub[j]:
+ break
+ else:
+ return i
+ return -1
+
+def _rfind(self, sub, start, end):
+ if len(sub) == 0:
+ return end
+ if end - start < len(sub):
+ return -1
+ for i in range(end - len(sub), start - 1, -1):
+ for j in range(len(sub)):
+ if self[i + j] != sub[j]:
+ break
+ else:
+ return i
+ return -1
+
def contains__Unicode_Unicode(space, w_container, w_item):
item = w_item._value
container = w_container._value
- if len(item) == 0:
- return space.w_True
- for i in range(len(container) - len(item) + 1):
- for j in range(len(item)):
- if container[i + j] != item[j]:
- break
- else:
- return space.w_True
- return space.w_False
+ return space.newbool(_find(container, item, 0, len(container)) >= 0)
def unicode_join__Unicode_ANY(space, w_self, w_list):
list = space.unpackiterable(w_list)
@@ -123,12 +146,20 @@
totlen = 0
if len(list) == 0:
return W_UnicodeObject(space, [])
- if len(list) == 1:
- return space.call_function(space.w_unicode, list[0])
for i in range(len(list)):
- list[i] = space.call_function(space.w_unicode, list[i])._value
+ item = list[i]
+ if space.is_true(space.isinstance(item, space.w_unicode)):
+ list[i] = item._value
+ elif space.is_true(space.isinstance(item, space.w_str)):
+ list[i] = space.call_function(space.w_unicode, item)._value
+ else:
+ w_msg = space.mod(space.wrap('sequence item %d: expected string or Unicode'),
+ space.wrap(i))
+ raise OperationError(space.w_TypeError, w_msg)
totlen += len(list[i])
totlen += len(delim) * (len(list) - 1)
+ if len(list) == 1:
+ return W_UnicodeObject(space, list[0])
# Allocate result
result = [u'\0'] * totlen
first = list[0]
@@ -152,6 +183,7 @@
return space.wrap(u''.join(w_self._value).encode(space.str_w(w_encoding), space.str_w(w_errors)))
except:
wrap_exception(space)
+
def unicode_encode__Unicode_String_None(space, w_self, w_encoding, w_none):
try:
return space.wrap(u''.join(w_self._value).encode(space.str_w(w_encoding)))
@@ -198,6 +230,12 @@
r = [uni[start + i*step] for i in range(sl)]
return W_UnicodeObject(space, r)
+def unicode_getslice__Unicode_ANY_ANY(space, w_uni, w_start, w_end):
+ w_slice = space.call_function(space.w_slice, w_start, w_end)
+ uni = w_uni._value
+ length = len(uni)
+ start, stop, step, sl = slicetype.indices4(space, w_slice, length)
+ return W_UnicodeObject(space, uni[start:stop])
def mul__Unicode_ANY(space, w_uni, w_times):
chars = w_uni._value
@@ -206,11 +244,14 @@
if times <= 0 or charlen == 0:
return W_UnicodeObject(space, [])
if times == 1:
- return w_uni
+ return space.call_function(space.w_unicode, w_uni)
if charlen == 1:
return W_UnicodeObject(space, [w_uni._value[0]] * times)
- result = [u'\0'] * (charlen * times)
+ try:
+ result = [u'\0'] * (charlen * times)
+ except OverflowError:
+ raise OperationError(space.w_OverflowError, space.wrap('repeated string is too long'))
for i in range(times):
offset = i * charlen
for j in range(charlen):
@@ -221,11 +262,93 @@
return space.mul(w_uni, w_times)
def _isspace(uchar):
- code = ord(uchar)
- try:
- return unicodedb.category[code] == 'Zs' or unicodedb.bidirectional[code] in ("WS", "B", "S")
- except:
- return False
+ return unicodedb.isspace(ord(uchar))
+
+def unicode_isspace__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isspace(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isalpha__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isalpha(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isalnum__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not (unicodedb.isalpha(ord(uchar)) or
+ unicodedb.isnumeric(ord(uchar))):
+ return space.w_False
+ return space.w_True
+
+def unicode_isdecimal__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isdecimal(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isdigit__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isdigit(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_isnumeric__Unicode(space, w_unicode):
+ if len(w_unicode._value) == 0:
+ return space.w_False
+ for uchar in w_unicode._value:
+ if not unicodedb.isnumeric(ord(uchar)):
+ return space.w_False
+ return space.w_True
+
+def unicode_islower__Unicode(space, w_unicode):
+ cased = False
+ for uchar in w_unicode._value:
+ if (unicodedb.isupper(ord(uchar)) or
+ unicodedb.istitle(ord(uchar))):
+ return space.w_False
+ if not cased and unicodedb.islower(ord(uchar)):
+ cased = True
+ return space.newbool(cased)
+
+def unicode_isupper__Unicode(space, w_unicode):
+ cased = False
+ for uchar in w_unicode._value:
+ if (unicodedb.islower(ord(uchar)) or
+ unicodedb.istitle(ord(uchar))):
+ return space.w_False
+ if not cased and unicodedb.isupper(ord(uchar)):
+ cased = True
+ return space.newbool(cased)
+
+def unicode_istitle__Unicode(space, w_unicode):
+ cased = False
+ previous_is_cased = False
+ for uchar in w_unicode._value:
+ if (unicodedb.isupper(ord(uchar)) or
+ unicodedb.istitle(ord(uchar))):
+ if previous_is_cased:
+ return space.w_False
+ previous_is_cased = cased = True
+ elif unicodedb.islower(ord(uchar)):
+ if not previous_is_cased:
+ return space.w_False
+ previous_is_cased = cased = True
+ else:
+ previous_is_cased = False
+ return space.newbool(cased)
def _strip(space, w_self, w_chars, left, right):
"internal function called by str_xstrip methods"
@@ -292,6 +415,361 @@
return space.call_method(w_self, 'rstrip',
space.call_function(space.w_unicode, w_chars))
+def unicode_capitalize__Unicode(space, w_self):
+ input = w_self._value
+ if len(input) == 0:
+ return W_UnicodeObject(space, [])
+ result = [u'\0'] * len(input)
+ result[0] = unichr(unicodedb.toupper(ord(input[0])))
+ for i in range(1, len(input)):
+ result[i] = unichr(unicodedb.tolower(ord(input[i])))
+ return W_UnicodeObject(space, result)
+
+def unicode_title__Unicode(space, w_self):
+ input = w_self._value
+ if len(input) == 0:
+ return w_self
+ result = [u'\0'] * len(input)
+
+ previous_is_cased = 0
+ for i in range(len(input)):
+ unichar = ord(input[i])
+ if previous_is_cased:
+ result[i] = unichr(unicodedb.tolower(unichar))
+ else:
+ result[i] = unichr(unicodedb.totitle(unichar))
+ previous_is_cased = unicodedb.iscased(unichar)
+ return W_UnicodeObject(space, result)
+
+def unicode_lower__Unicode(space, w_self):
+ input = w_self._value
+ result = [u'\0'] * len(input)
+ for i in range(len(input)):
+ result[i] = unichr(unicodedb.tolower(ord(input[i])))
+ return W_UnicodeObject(space, result)
+
+def unicode_upper__Unicode(space, w_self):
+ input = w_self._value
+ result = [u'\0'] * len(input)
+ for i in range(len(input)):
+ result[i] = unichr(unicodedb.toupper(ord(input[i])))
+ return W_UnicodeObject(space, result)
+
+def unicode_swapcase__Unicode(space, w_self):
+ input = w_self._value
+ result = [u'\0'] * len(input)
+ for i in range(len(input)):
+ unichar = ord(input[i])
+ if unicodedb.islower(unichar):
+ result[i] = unichr(unicodedb.toupper(unichar))
+ elif unicodedb.isupper(unichar):
+ result[i] = unichr(unicodedb.tolower(unichar))
+ else:
+ result[i] = input[i]
+ return W_UnicodeObject(space, result)
+
+def _normalize_index(length, index):
+ if index < 0:
+ index += length
+ if index < 0:
+ index = 0
+ elif index > length:
+ index = length
+ return index
+
+def unicode_endswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+
+ substr = w_substr._value
+ substr_len = len(substr)
+
+ if end - start < substr_len:
+ return space.w_False # substring is too long
+ start = end - substr_len
+ for i in range(substr_len):
+ if self[start + i] != substr[i]:
+ return space.w_False
+ return space.w_True
+
+def unicode_startswith__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+
+ substr = w_substr._value
+ substr_len = len(substr)
+
+ if end - start < substr_len:
+ return space.w_False # substring is too long
+
+ for i in range(substr_len):
+ if self[start + i] != substr[i]:
+ return space.w_False
+ return space.w_True
+
+def unicode_center__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ padding = width - len(self)
+ if padding < 0:
+ return space.call_function(space.w_unicode, w_self)
+ leftpad = padding // 2 + (padding & width & 1)
+ result = [u' '] * width
+ for i in range(len(self)):
+ result[leftpad + i] = self[i]
+ return W_UnicodeObject(space, result)
+
+
+def unicode_ljust__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ padding = width - len(self)
+ if padding < 0:
+ return space.call_function(space.w_unicode, w_self)
+ result = [u' '] * width
+ for i in range(len(self)):
+ result[i] = self[i]
+ return W_UnicodeObject(space, result)
+
+def unicode_rjust__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ padding = width - len(self)
+ if padding < 0:
+ return space.call_function(space.w_unicode, w_self)
+ result = [u' '] * width
+ for i in range(len(self)):
+ result[padding + i] = self[i]
+ return W_UnicodeObject(space, result)
+
+def unicode_zfill__Unicode_ANY(space, w_self, w_width):
+ self = w_self._value
+ width = space.int_w(w_width)
+ if len(self) == 0:
+ return W_UnicodeObject(space, [u'0'] * width)
+ padding = width - len(self)
+ if padding <= 0:
+ return space.call_function(space.w_unicode, w_self)
+ result = [u'0'] * width
+ for i in range(len(self)):
+ result[padding + i] = self[i]
+ # Move sign to first position
+ if self[0] in (u'+', u'-'):
+ result[0] = self[0]
+ result[padding] = u'0'
+ return W_UnicodeObject(space, result)
+
+def unicode_splitlines__Unicode_ANY(space, w_self, w_keepends):
+ self = w_self._value
+ keepends = 0
+ if space.int_w(w_keepends):
+ keepends = 1
+ if len(self) == 0:
+ return space.newlist([])
+
+ start = 0
+ end = len(self)
+ pos = 0
+ lines = []
+ while pos < end:
+ if unicodedb.islinebreak(ord(self[pos])):
+ if (self[pos] == u'\r' and pos + 1 < end and
+ self[pos + 1] == u'\n'):
+ # Count CRLF as one linebreak
+ lines.append(W_UnicodeObject(space,
+ self[start:pos + keepends * 2]))
+ pos += 1
+ else:
+ lines.append(W_UnicodeObject(space,
+ self[start:pos + keepends]))
+ pos += 1
+ start = pos
+ else:
+ pos += 1
+ if not unicodedb.islinebreak(ord(self[end - 1])):
+ lines.append(W_UnicodeObject(space, self[start:]))
+ return space.newlist(lines)
+
+def unicode_find__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ return space.wrap(_find(self, substr, start, end))
+
+def unicode_rfind__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ return space.wrap(_rfind(self, substr, start, end))
+
+def unicode_index__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ index = _find(self, substr, start, end)
+ if index < 0:
+ raise OperationError(space.w_ValueError,
+ space.wrap('substring not found'))
+ return space.wrap(index)
+
+def unicode_rindex__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ index = _rfind(self, substr, start, end)
+ if index < 0:
+ raise OperationError(space.w_ValueError,
+ space.wrap('substring not found'))
+ return space.wrap(index)
+
+def unicode_count__Unicode_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ self = w_self._value
+ start = _normalize_index(len(self), space.int_w(w_start))
+ end = _normalize_index(len(self), space.int_w(w_end))
+ substr = w_substr._value
+ count = 0
+ while start <= end:
+ index = _find(self, substr, start, end)
+ if index < 0:
+ break
+ start = index + 1
+ count += 1
+ return space.wrap(count)
+
+
+def unicode_split__Unicode_None_ANY(space, w_self, w_none, w_maxsplit):
+ self = w_self._value
+ maxsplit = space.int_w(w_maxsplit)
+ parts = []
+ if len(self) == 0:
+ return space.newlist([])
+ start = 0
+ end = len(self)
+ while maxsplit != 0 and start < end:
+ index = start
+ for index in range(start, end):
+ if _isspace(self[index]):
+ break
+ else:
+ break
+ parts.append(W_UnicodeObject(space, self[start:index]))
+ maxsplit -= 1
+ # Eat whitespace
+ for start in range(index + 1, end):
+ if not _isspace(self[start]):
+ break
+ else:
+ return space.newlist(parts)
+ parts.append(W_UnicodeObject(space, self[start:]))
+ return space.newlist(parts)
+
+
+def unicode_split__Unicode_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
+ self = w_self._value
+ delim = w_delim._value
+ maxsplit = space.int_w(w_maxsplit)
+ delim_len = len(delim)
+ if delim_len == 0:
+ raise OperationError(space.w_ValueError,
+ space.wrap('empty separator'))
+ parts = []
+ if len(self) == 0:
+ return space.newlist([])
+ start = 0
+ end = len(self)
+ while maxsplit != 0:
+ index = _find(self, delim, start, end)
+ if index < 0:
+ break
+ parts.append(W_UnicodeObject(space, self[start:index]))
+ start = index + delim_len
+ maxsplit -= 1
+ parts.append(W_UnicodeObject(space, self[start:]))
+ return space.newlist(parts)
+
+def _split(space, self, maxsplit):
+ if len(self) == 0:
+ return []
+ if maxsplit == 0:
+ return [W_UnicodeObject(space, self)]
+ index = 0
+ end = len(self)
+ parts = [W_UnicodeObject(space, [])]
+ maxsplit -= 1
+ while maxsplit != 0:
+ if index >= end:
+ break
+ parts.append(W_UnicodeObject(space, [self[index]]))
+ index += 1
+ maxsplit -= 1
+ parts.append(W_UnicodeObject(space, self[index:]))
+ return parts
+
+def unicode_replace__Unicode_Unicode_Unicode_ANY(space, w_self, w_old,
+ w_new, w_maxsplit):
+ if len(w_old._value):
+ w_parts = space.call_method(w_self, 'split', w_old, w_maxsplit)
+ else:
+ self = w_self._value
+ maxsplit = space.int_w(w_maxsplit)
+ w_parts = space.newlist(_split(space, self, maxsplit))
+ return space.call_method(w_new, 'join', w_parts)
+
+
+'translate'
+app = gateway.applevel(r'''
+import sys
+
+def unicode_expandtabs__Unicode_ANY(self, tabsize):
+ parts = self.split(u'\t')
+ result = [ parts[0] ]
+ prevsize = 0
+ for ch in parts[0]:
+ prevsize += 1
+ if ch in (u"\n", u"\r"):
+ prevsize = 0
+ for i in range(1, len(parts)):
+ pad = tabsize - prevsize % tabsize
+ result.append(u' ' * pad)
+ nextpart = parts[i]
+ result.append(nextpart)
+ prevsize = 0
+ for ch in nextpart:
+ prevsize += 1
+ if ch in (u"\n", u"\r"):
+ prevsize = 0
+ return u''.join(result)
+
+def unicode_translate__Unicode_ANY(self, table):
+ result = []
+ for unichar in self:
+ try:
+ newval = table[ord(unichar)]
+ except KeyError:
+ result.append(unichar)
+ else:
+ if newval is None:
+ continue
+ elif isinstance(newval, int):
+ if newval < 0 or newval > sys.maxunicode:
+ raise TypeError("character mapping must be in range(0x%x)"%(sys.maxunicode + 1,))
+ result.append(unichr(newval))
+ elif isinstance(newval, unicode):
+ result.append(newval)
+ else:
+ raise TypeError("character mapping must return integer, None or unicode")
+ return ''.join(result)
+
+''')
+unicode_expandtabs__Unicode_ANY = app.interphook('unicode_expandtabs__Unicode_ANY')
+unicode_translate__Unicode_ANY = app.interphook('unicode_translate__Unicode_ANY')
+
import unicodetype
register_all(vars(), unicodetype)
@@ -301,15 +779,38 @@
import stringtype
W_UnicodeObject = W_UnicodeObject
from pypy.objspace.std.stringobject import W_StringObject
- def str_strip__String_Unicode(space, w_self, w_chars ):
+ def str_strip__String_Unicode(space, w_self, w_chars):
return space.call_method(space.call_function(space.w_unicode, w_self),
'strip', w_chars)
- def str_lstrip__String_Unicode(space, w_self, w_chars ):
+ def str_lstrip__String_Unicode(space, w_self, w_chars):
return space.call_method(space.call_function(space.w_unicode, w_self),
'lstrip', w_chars)
self = w_self._value
- def str_rstrip__String_Unicode(space, w_self, w_chars ):
+ def str_rstrip__String_Unicode(space, w_self, w_chars):
return space.call_method(space.call_function(space.w_unicode, w_self),
'rstrip', w_chars)
+ def str_count__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'count', w_substr, w_start, w_end)
+ def str_find__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'find', w_substr, w_start, w_end)
+ def str_rfind__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'rfind', w_substr, w_start, w_end)
+ def str_index__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'index', w_substr, w_start, w_end)
+ def str_rindex__String_Unicode_ANY_ANY(space, w_self, w_substr, w_start, w_end):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'rindex', w_substr, w_start, w_end)
+ def str_replace__String_Unicode_Unicode_ANY(space, w_self, w_old, w_new, w_maxsplit):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'replace', w_old, w_new, w_maxsplit)
+
+ def str_split__String_Unicode_ANY(space, w_self, w_delim, w_maxsplit):
+ return space.call_method(space.call_function(space.w_unicode, w_self),
+ 'split', w_delim, w_maxsplit)
+
register_all(vars(), stringtype)
Modified: pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py (original)
+++ pypy/branch/non-fake-unicode/pypy/objspace/std/unicodetype.py Tue May 17 17:56:58 2005
@@ -8,7 +8,7 @@
unicode_center = MultiMethod('center', 2, )
unicode_count = MultiMethod('count', 4, defaults=(0, maxint))
unicode_encode = MultiMethod('encode', 3, defaults=(None, None))
-unicode_endswith = MultiMethod('endswith', 2) #[optional arguments not supported now]
+unicode_endswith = MultiMethod('endswith', 4, defaults=(0,maxint))
unicode_expandtabs = MultiMethod('expandtabs', 2, defaults=(8,))
unicode_find = MultiMethod('find', 4, defaults=(0, maxint))
unicode_index = MultiMethod('index', 4, defaults=(0, maxint))
@@ -32,47 +32,78 @@
unicode_rstrip = MultiMethod('rstrip', 2, defaults=(None,))
unicode_split = MultiMethod('split', 3, defaults=(None,-1))
unicode_splitlines = MultiMethod('splitlines', 2, defaults=(0,))
-unicode_startswith = MultiMethod('startswith', 3, defaults=(0,))
+unicode_startswith = MultiMethod('startswith', 4, defaults=(0,maxint))
unicode_strip = MultiMethod('strip', 2, defaults=(None,))
unicode_swapcase = MultiMethod('swapcase', 1)
unicode_title = MultiMethod('title', 1)
-unicode_translate = MultiMethod('translate', 3, defaults=('',))
+unicode_translate = MultiMethod('translate', 2)
unicode_upper = MultiMethod('upper', 1)
unicode_zfill = MultiMethod('zfill', 2)
-
+unicode_getslice = MultiMethod('__getslice__', 3)
# ____________________________________________________________
+
+app = gateway.applevel('''
+import codecs, sys
+
+def unicode_from_encoded_object(obj, encoding, errors):
+ # Fix later for buffer
+ if type(obj).__name__ == 'buffer':
+ obj = obj.buf
+ if encoding is None:
+ encoding = sys.getdefaultencoding()
+ decoder = codecs.getdecoder(encoding)
+ if errors is None:
+ retval, lenght = decoder(obj)
+ else:
+ retval, length = decoder(obj, errors)
+ if not isinstance(retval, unicode):
+ raise TypeError("decoder did not return an unicode object (type=%s)" %
+ type(retval).__name__)
+ return retval
+
+def unicode_from_object(obj):
+ if isinstance(obj, str):
+ res = obj
+ else:
+ try:
+ unicode_method = obj.__unicode__
+ except AttributeError:
+ res = str(obj)
+ else:
+ res = unicode_method()
+ if isinstance(res, unicode):
+ return res
+ return unicode_from_encoded_object(res, None, "strict")
+
+''')
+unicode_from_object = app.interphook('unicode_from_object')
+unicode_from_encoded_object = app.interphook('unicode_from_encoded_object')
+
+
def descr__new__(space, w_unicodetype, w_obj=None, w_encoding=None, w_errors=None):
from pypy.objspace.std.unicodeobject import W_UnicodeObject
w_obj_type = space.type(w_obj)
if space.is_w(w_obj_type, space.w_unicode):
+ if (not space.is_w(w_encoding, space.w_None) or
+ not space.is_w(w_errors, space.w_None)):
+ raise OperationError(space.w_TypeError,
+ space.wrap('decoding Unicode is not supported'))
if space.is_w(w_unicodetype, space.w_unicode):
return w_obj
- value = w_obj._value
+ w_value = w_obj
elif space.is_w(w_obj, space.w_None):
- value = []
- elif space.is_true(space.isinstance(w_obj, space.w_unicode)):
- value = w_obj._value
- elif space.is_w(w_obj_type, space.w_str):
- try:
- if space.is_w(w_encoding, space.w_None):
- value = [ u for u in unicode(space.str_w(w_obj)) ]
- elif space.is_w(w_errors, space.w_None):
- value = [ u for u in unicode(space.str_w(w_obj),
- space.str_w(w_encoding)) ]
- else:
- value = [u for u in unicode(space.str_w(w_obj),
- space.str_w(w_encoding),
- space.str_w(w_errors)) ]
- except UnicodeDecodeError, e:
- raise OperationError(space.w_UnicodeDecodeError,
- space.wrap(e.reason))
+ w_value = W_UnicodeObject(space, [])
+ elif (space.is_w(w_encoding, space.w_None) and
+ space.is_w(w_errors, space.w_None)):
+ if space.is_true(space.isinstance(w_obj, space.w_unicode)):
+ w_value = w_obj
+ else:
+ w_value = unicode_from_object(space, w_obj)
else:
- # try with __unicode__
- raise OperationError(space.w_ValueError,
- space.wrap('Can not create unicode from other than strings'%w_obj_type))
+ w_value = unicode_from_encoded_object(space, w_obj, w_encoding, w_errors)
w_newobj = space.allocate_instance(W_UnicodeObject, w_unicodetype)
- w_newobj.__init__(space, value)
+ w_newobj.__init__(space, w_value._value)
return w_newobj
# ____________________________________________________________
More information about the Pypy-commit
mailing list