[pypy-commit] pypy default: Pull shared logic between str, unicode and bytearray in base classes.
aliles
noreply at buildbot.pypy.org
Wed Mar 14 09:20:28 CET 2012
Author: aliles
Branch:
Changeset: r53539:ab3326dd1a85
Date: 2012-03-13 21:45 -0700
http://bitbucket.org/pypy/pypy/changeset/ab3326dd1a85/
Log: Pull shared logic between str, unicode and bytearray in base
classes.
diff --git a/pypy/objspace/std/abstractstring.py b/pypy/objspace/std/abstractstring.py
--- a/pypy/objspace/std/abstractstring.py
+++ b/pypy/objspace/std/abstractstring.py
@@ -2,6 +2,60 @@
from pypy.rlib.objectmodel import specialize
+class Mixin_BaseStringMethods(object):
+ __slots__ = ()
+
+ def isalnum(w_self, space):
+ return w_self._all_true(space, w_self._isalnum)
+
+ def isalpha(w_self, space):
+ return w_self._all_true(space, w_self._isalpha)
+
+ def isdigit(w_self, space):
+ return w_self._all_true(space, w_self._isdigit)
+
+ def islower(w_self, space):
+ return w_self._none_false_one_true(space,
+ w_self._islower, w_self._isupper)
+
+ def isspace(w_self, space):
+ return w_self._all_true(space, w_self._isspace)
+
+ def isupper(w_self, space):
+ return w_self._none_false_one_true(space,
+ w_self._isupper, w_self._islower)
+
+ def istitle(w_self, space):
+ return w_self._title(space)
+
+
+class AbstractCharIterator(object):
+
+ def __init__(self, sequence):
+ self.sequence = sequence
+ self.pos = 0
+
+ def __len__(self):
+ return len(self.sequence)
+
+ def __iter__(self):
+ return self
+
+ def next(self):
+ ch = self.nextchar()
+ if ch is None:
+ raise StopIteration
+ return ch
+
+ # XXX deprecate nextchar() method
+ def nextchar(self):
+ if self.pos >= len(self):
+ return None
+ idx = self.pos
+ self.pos += 1
+ return self.sequence[idx]
+
+
class W_AbstractBaseStringObject(W_Object):
__slots__ = ()
@@ -9,6 +63,11 @@
""" representation for debugging purposes """
return "%s(%r)" % (w_self.__class__.__name__, w_self.raw_value())
+ def immutable_unique_id(w_self, space):
+ if w_self.user_overridden_class:
+ return None
+ return space.wrap(compute_unique_id(w_self.unwrap(space)))
+
def is_w(self, space, w_other):
if not isinstance(w_other, W_AbstractBaseStringObject):
return False
@@ -18,32 +77,75 @@
return False
return self.unwrap(space) is w_other.unwrap(space)
- def immutable_unique_id(w_self, space):
- if w_self.user_overridden_class:
- return None
- return space.wrap(compute_unique_id(w_self.unwrap(space)))
+ def iterator(w_self, space):
+ return AbstractCharIterator(w_self.unwrap(space))
+
+ def length(w_self, space):
+ return len(w_self.unwrap(space))
def raw_value(w_self):
raise NotImplemented("method not implemented")
- def unwrap(w_self, space):
- raise NotImplemented("method not implemented")
-
def str_w(w_self, space):
raise NotImplemented("method not implemented")
def unicode_w(w_self, space):
raise NotImplemented("method not implemented")
+ def unwrap(w_self, space):
+ raise NotImplemented("method not implemented")
- at specialize.arg(2)
-def is_generic(space, w_self, fun):
- v = w_self._value
- if len(v) == 0:
- return space.w_False
- if len(v) == 1:
- return space.newbool(fun(v[0]))
- for idx in range(len(v)):
- if not fun(v[idx]):
+ @specialize.arg(2)
+ def _all_true(w_self, space, func):
+ """Test all elements of a list with func for True.
+ Returns True only if all elements test True."""
+ size = w_self.length(space)
+ it = w_self.iterator(space)
+ if size == 0:
return space.w_False
- return space.w_True
+ if size == 1:
+ return space.newbool(func(it.nextchar()))
+ # not all it objects will support iterator protocol, eg rope
+ for pos in range(size):
+ ch = it.nextchar()
+ if not func(ch):
+ return space.w_False
+ return space.w_True
+
+ @specialize.arg(2, 3)
+ def _none_false_one_true(w_self, space, pred, inverse):
+ """Test all elements against predicate and inverse.
+ Returns True only if all elements fail inverse and at least one
+ element passes predicate."""
+ v = w_self.unwrap(space)
+ if len(v) == 1:
+ c = v[0]
+ return space.newbool(pred(c))
+ status = False
+ for idx in range(len(v)):
+ if inverse(v[idx]):
+ return space.w_False
+ elif not status and pred(v[idx]):
+ status = True
+ return space.newbool(status)
+
+ def _title(w_self, space):
+ input = w_self.unwrap(space)
+ cased = False
+ previous_is_cased = False
+
+ for pos in range(0, len(input)):
+ ch = input[pos]
+ if w_self._isupper(ch):
+ if previous_is_cased:
+ return space.w_False
+ previous_is_cased = True
+ cased = True
+ elif w_self._islower(ch):
+ if not previous_is_cased:
+ return space.w_False
+ cased = True
+ else:
+ previous_is_cased = False
+
+ return space.newbool(cased)
diff --git a/pypy/objspace/std/bytearrayobject.py b/pypy/objspace/std/bytearrayobject.py
--- a/pypy/objspace/std/bytearrayobject.py
+++ b/pypy/objspace/std/bytearrayobject.py
@@ -19,22 +19,39 @@
from pypy.interpreter import gateway
from pypy.interpreter.argument import Signature
from pypy.interpreter.buffer import RWBuffer
+from pypy.objspace.std.abstractstring import \
+ W_AbstractBaseStringObject, Mixin_BaseStringMethods
from pypy.objspace.std.bytearraytype import (
makebytearraydata_w, getbytevalue,
new_bytearray
)
-from pypy.tool.sourcetools import func_with_new_name
-class W_BytearrayObject(W_Object):
+class Mixin_BytearrayMethods(Mixin_BaseStringMethods):
+ __slots__ = ()
+
+
+class W_AbstractBytearrayObject(stringobject.W_AbstractStringObject):
+ __slots__ = ()
+
+
+class W_BytearrayObject(W_AbstractBytearrayObject, Mixin_BytearrayMethods):
from pypy.objspace.std.bytearraytype import bytearray_typedef as typedef
def __init__(w_self, data):
w_self.data = data
- def __repr__(w_self):
- """ representation for debugging purposes """
- return "%s(%s)" % (w_self.__class__.__name__, ''.join(w_self.data))
+ def raw_value(w_self):
+ return w_self.data
+
+ def str_w(w_self, space):
+ return w_self.data
+
+ def unicode_w(w_self, space):
+ # XXX should this use the default encoding?
+ from pypy.objspace.std.unicodetype import plain_str2unicode
+ return plain_str2unicode(space, w_self.data)
+
registerimplementation(W_BytearrayObject)
@@ -279,6 +296,27 @@
def str__Bytearray(space, w_bytearray):
return space.wrap(''.join(w_bytearray.data))
+def str_isalnum__Bytearray(space, w_self):
+ return w_self.isalnum(space)
+
+def str_isalpha__Bytearray(space, w_self):
+ return w_self.isalpha(space)
+
+def str_isdigit__Bytearray(space, w_self):
+ return w_self.isdigit(space)
+
+def str_islower__Bytearray(space, w_self):
+ return w_self.islower(space)
+
+def str_isspace__Bytearray(space, w_self):
+ return w_self.isspace(space)
+
+def str_istitle__Bytearray(space, w_self):
+ return w_self.istitle(space)
+
+def str_isupper__Bytearray(space, w_self):
+ return w_self.isupper(space)
+
def str_count__Bytearray_Int_ANY_ANY(space, w_bytearray, w_char, w_start, w_stop):
char = w_char.intval
bytearray = w_bytearray.data
@@ -372,34 +410,6 @@
w_str = str__Bytearray(space, w_bytearray)
return stringobject.str_decode__String_ANY_ANY(space, w_str, w_encoding, w_errors)
-def str_islower__Bytearray(space, w_bytearray):
- w_str = str__Bytearray(space, w_bytearray)
- return stringobject.str_islower__String(space, w_str)
-
-def str_isupper__Bytearray(space, w_bytearray):
- w_str = str__Bytearray(space, w_bytearray)
- return stringobject.str_isupper__String(space, w_str)
-
-def str_isalpha__Bytearray(space, w_bytearray):
- w_str = str__Bytearray(space, w_bytearray)
- return stringobject.str_isalpha__String(space, w_str)
-
-def str_isalnum__Bytearray(space, w_bytearray):
- w_str = str__Bytearray(space, w_bytearray)
- return stringobject.str_isalnum__String(space, w_str)
-
-def str_isdigit__Bytearray(space, w_bytearray):
- w_str = str__Bytearray(space, w_bytearray)
- return stringobject.str_isdigit__String(space, w_str)
-
-def str_istitle__Bytearray(space, w_bytearray):
- w_str = str__Bytearray(space, w_bytearray)
- return stringobject.str_istitle__String(space, w_str)
-
-def str_isspace__Bytearray(space, w_bytearray):
- w_str = str__Bytearray(space, w_bytearray)
- return stringobject.str_isspace__String(space, w_str)
-
def bytearray_insert__Bytearray_Int_ANY(space, w_bytearray, w_idx, w_other):
where = space.int_w(w_idx)
length = len(w_bytearray.data)
diff --git a/pypy/objspace/std/ropeobject.py b/pypy/objspace/std/ropeobject.py
--- a/pypy/objspace/std/ropeobject.py
+++ b/pypy/objspace/std/ropeobject.py
@@ -17,9 +17,10 @@
from pypy.objspace.std.stringobject import (
mod__String_ANY as mod__Rope_ANY,
str_format__String as str_format__Rope,
- _upper, _lower, DEFAULT_NOOP_TABLE)
+ DEFAULT_NOOP_TABLE)
-class W_RopeObject(stringobject.W_AbstractStringObject):
+class W_RopeObject(stringobject.W_AbstractStringObject,
+ stringobject.Mixin_StringMethods):
from pypy.objspace.std.stringtype import str_typedef as typedef
_immutable_fields_ = ['_node']
@@ -28,6 +29,12 @@
assert node.is_bytestring()
w_self._node = node
+ def iterator(w_self, space):
+ return rope.ItemIterator(w_self._node)
+
+ def length(w_self, space):
+ return w_self._node.length()
+
def raw_value(w_self):
return w_self._node
@@ -67,92 +74,34 @@
registerimplementation(W_RopeIterObject)
-def _is_generic(space, w_self, fun):
- l = w_self._node.length()
- if l == 0:
- return space.w_False
- iter = rope.ItemIterator(w_self._node)
- for i in range(l):
- if not fun(iter.nextchar()):
- return space.w_False
- return space.w_True
-_is_generic._annspecialcase_ = "specialize:arg(2)"
-
-_isspace = lambda c: c.isspace()
-_isdigit = lambda c: c.isdigit()
-_isalpha = lambda c: c.isalpha()
-_isalnum = lambda c: c.isalnum()
-
def str_isspace__Rope(space, w_self):
- return _is_generic(space, w_self, _isspace)
+ return w_self.isspace(space)
def str_isdigit__Rope(space, w_self):
- return _is_generic(space, w_self, _isdigit)
+ return w_self.isdigit(space)
def str_isalpha__Rope(space, w_self):
- return _is_generic(space, w_self, _isalpha)
+ return w_self.isalpha(space)
def str_isalnum__Rope(space, w_self):
- return _is_generic(space, w_self, _isalnum)
+ return w_self.isalnum(space)
def str_isupper__Rope(space, w_self):
"""Return True if all cased characters in S are uppercase and there is
at least one cased character in S, False otherwise."""
- l = w_self._node.length()
-
- if l == 0:
- return space.w_False
- cased = False
- iter = rope.ItemIterator(w_self._node)
- for idx in range(l):
- c = iter.nextchar()
- if c.islower():
- return space.w_False
- elif not cased and c.isupper():
- cased = True
- return space.newbool(cased)
+ return w_self.isupper(space)
def str_islower__Rope(space, w_self):
"""Return True if all cased characters in S are lowercase and there is
at least one cased character in S, False otherwise."""
- l = w_self._node.length()
-
- if l == 0:
- return space.w_False
- cased = False
- iter = rope.ItemIterator(w_self._node)
- for idx in range(l):
- c = iter.nextchar()
- if c.isupper():
- return space.w_False
- elif not cased and c.islower():
- cased = True
- return space.newbool(cased)
+ return w_self.islower(space)
def str_istitle__Rope(space, w_self):
"""Return True if S is a titlecased string and there is at least one
character in S, i.e. uppercase characters may only follow uncased
characters and lowercase characters only cased ones. Return False
otherwise."""
- cased = False
- previous_is_cased = False
-
- iter = rope.ItemIterator(w_self._node)
- for pos in range(0, w_self._node.length()):
- ch = iter.nextchar()
- if ch.isupper():
- if previous_is_cased:
- return space.w_False
- previous_is_cased = True
- cased = True
- elif ch.islower():
- if not previous_is_cased:
- return space.w_False
- cased = True
- else:
- previous_is_cased = False
-
- return space.newbool(cased)
+ return w_self.istitle(space)
def _local_transform(node, transform):
l = node.length()
@@ -166,24 +115,13 @@
_local_transform._annspecialcase_ = "specialize:arg(1)"
def str_upper__Rope(space, w_self):
- return _local_transform(w_self._node, _upper)
+ return _local_transform(w_self._node, w_self._upper)
def str_lower__Rope(space, w_self):
- return _local_transform(w_self._node, _lower)
-
-def _swapcase(ch):
- if ch.isupper():
- o = ord(ch) + 32
- return chr(o)
- elif ch.islower():
- o = ord(ch) - 32
- return chr(o)
- else:
- return ch
+ return _local_transform(w_self._node, w_self._lower)
def str_swapcase__Rope(space, w_self):
- return _local_transform(w_self._node, _swapcase)
-
+ return _local_transform(w_self._node, w_self._swapcase)
def str_capitalize__Rope(space, w_self):
node = w_self._node
@@ -221,9 +159,9 @@
for pos in range(0, length):
ch = iter.nextchar()
if not prev_letter.isalpha():
- buffer[pos] = _upper(ch)
+ buffer[pos] = w_self._upper(ch)
else:
- buffer[pos] = _lower(ch)
+ buffer[pos] = w_self._lower(ch)
prev_letter = buffer[pos]
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -14,7 +14,6 @@
from pypy.objspace.std.tupleobject import W_TupleObject
from pypy.rlib.rarithmetic import intmask, ovfcheck
from pypy.module.unicodedata import unicodedb
-from pypy.tool.sourcetools import func_with_new_name
from pypy.objspace.std.formatting import mod_format
@@ -84,6 +83,12 @@
def __init__(w_self, node):
w_self._node = node
+ def iterator(w_self, space):
+ return rope.ItemIterator(w_self._node)
+
+ def length(w_self, space):
+ return w_self._node.length()
+
def raw_value(w_self):
return w_self._node
@@ -102,9 +107,6 @@
registerimplementation(W_RopeUnicodeObject)
-def _isspace(uchar_ord):
- return unicodedb.isspace(uchar_ord)
-
def ropeunicode_w(space, w_str):
if isinstance(w_str, W_RopeUnicodeObject):
return w_str._node
@@ -314,81 +316,38 @@
def mul__ANY_RopeUnicode(space, w_times, w_uni):
return mul__RopeUnicode_ANY(space, w_uni, w_times)
+def unicode_isspace__RopeUnicode(space, w_self):
+ return w_self.isspace(space)
-def make_generic(funcname):
- def func(space, w_self):
- node = w_self._node
- if node.length() == 0:
- return space.w_False
- iter = rope.ItemIterator(node)
- for idx in range(node.length()):
- if not getattr(unicodedb, funcname)(iter.nextint()):
- return space.w_False
- return space.w_True
- return func_with_new_name(func, "unicode_%s__RopeUnicode" % (funcname, ))
+def unicode_isalpha__RopeUnicode(space, w_self):
+ return w_self.isalpha(space)
-unicode_isspace__RopeUnicode = make_generic("isspace")
-unicode_isalpha__RopeUnicode = make_generic("isalpha")
-unicode_isalnum__RopeUnicode = make_generic("isalnum")
-unicode_isdecimal__RopeUnicode = make_generic("isdecimal")
-unicode_isdigit__RopeUnicode = make_generic("isdigit")
-unicode_isnumeric__RopeUnicode = make_generic("isnumeric")
+def unicode_isalnum__RopeUnicode(space, w_self):
+ return w_self.isalnum(space)
-def unicode_islower__RopeUnicode(space, w_unicode):
- cased = False
- iter = rope.ItemIterator(w_unicode._node)
- while 1:
- try:
- ch = iter.nextint()
- except StopIteration:
- return space.newbool(cased)
- if (unicodedb.isupper(ch) or
- unicodedb.istitle(ch)):
- return space.w_False
- if not cased and unicodedb.islower(ch):
- cased = True
+def unicode_isdecimal__RopeUnicode(space, w_self):
+ return w_self.isdecimal(space)
-def unicode_isupper__RopeUnicode(space, w_unicode):
- cased = False
- iter = rope.ItemIterator(w_unicode._node)
- while 1:
- try:
- ch = iter.nextint()
- except StopIteration:
- return space.newbool(cased)
- if (unicodedb.islower(ch) or
- unicodedb.istitle(ch)):
- return space.w_False
- if not cased and unicodedb.isupper(ch):
- cased = True
+def unicode_isdigit__RopeUnicode(space, w_self):
+ return w_self.isdigit(space)
-def unicode_istitle__RopeUnicode(space, w_unicode):
- cased = False
- previous_is_cased = False
- iter = rope.ItemIterator(w_unicode._node)
- while 1:
- try:
- ch = iter.nextint()
- except StopIteration:
- return space.newbool(cased)
- if (unicodedb.isupper(ch) or
- unicodedb.istitle(ch)):
- if previous_is_cased:
- return space.w_False
- previous_is_cased = cased = True
- elif unicodedb.islower(ch):
- if not previous_is_cased:
- return space.w_False
- previous_is_cased = cased = True
- else:
- previous_is_cased = False
+def unicode_isnumeric__RopeUnicode(space, w_self):
+ return w_self.isnumeric(space)
+def unicode_islower__RopeUnicode(space, w_self):
+ return w_self.islower(space)
+
+def unicode_isupper__RopeUnicode(space, w_self):
+ return w_self.isupper(space)
+
+def unicode_istitle__RopeUnicode(space, w_self):
+ return w_self.istitle(space)
def _contains(i, uni):
return unichr(i) in uni
def unicode_strip__RopeUnicode_None(space, w_self, w_chars):
- return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _isspace))
+ return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, unicodedb.isspace))
def unicode_strip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
return W_RopeUnicodeObject(rope.strip(w_self._node, True, True, _contains,
w_chars._node.flatten_unicode()))
@@ -398,7 +357,7 @@
unicode_from_string(space, w_chars))
def unicode_lstrip__RopeUnicode_None(space, w_self, w_chars):
- return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _isspace))
+ return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, unicodedb.isspace))
def unicode_lstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
return W_RopeUnicodeObject(rope.strip(w_self._node, True, False, _contains,
w_chars._node.flatten_unicode()))
@@ -407,7 +366,7 @@
unicode_from_string(space, w_chars))
def unicode_rstrip__RopeUnicode_None(space, w_self, w_chars):
- return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _isspace))
+ return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, unicodedb.isspace))
def unicode_rstrip__RopeUnicode_RopeUnicode(space, w_self, w_chars):
return W_RopeUnicodeObject(rope.strip(w_self._node, False, True, _contains,
w_chars._node.flatten_unicode()))
@@ -653,7 +612,7 @@
selfnode = w_self._node
maxsplit = space.int_w(w_maxsplit)
res_w = [W_RopeUnicodeObject(node)
- for node in rope.split_chars(selfnode, maxsplit, _isspace)]
+ for node in rope.split_chars(selfnode, maxsplit, unicodedb.isspace)]
return space.newlist(res_w)
def unicode_split__RopeUnicode_RopeUnicode_ANY(space, w_self, w_delim, w_maxsplit):
@@ -672,7 +631,7 @@
selfnode = w_self._node
maxsplit = space.int_w(w_maxsplit)
res_w = [W_RopeUnicodeObject(node)
- for node in rope.rsplit_chars(selfnode, maxsplit, _isspace)]
+ for node in rope.rsplit_chars(selfnode, maxsplit, unicodedb.isspace)]
return space.newlist(res_w)
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -15,20 +15,67 @@
from pypy.rlib.rstring import StringBuilder, split
from pypy.interpreter.buffer import StringBuffer
-from pypy.objspace.std.abstractstring import W_AbstractBaseStringObject, \
- is_generic
+from pypy.objspace.std.abstractstring import \
+ W_AbstractBaseStringObject, Mixin_BaseStringMethods
from pypy.objspace.std.formatting import mod_format
from pypy.objspace.std.stringtype import sliced, wrapstr, wrapchar, \
stringendswith, stringstartswith, joined2
+
+class Mixin_StringMethods(Mixin_BaseStringMethods):
+ __slots__ = ()
+
+
class W_AbstractStringObject(W_AbstractBaseStringObject):
__slots__ = ()
def unwrap(w_self, space):
return w_self.str_w(space)
+ def _isalnum(self, ch):
+ return ch.isalnum()
-class W_StringObject(W_AbstractStringObject):
+ def _isalpha(self, ch):
+ return ch.isalpha()
+
+ def _isdigit(self, ch):
+ return ch.isdigit()
+
+ def _islower(self, ch):
+ return ch.islower()
+
+ def _isspace(self, ch):
+ return ch.isspace()
+
+ def _isupper(self, ch):
+ return ch.isupper()
+
+ def _lower(self, ch):
+ if ch.isupper():
+ o = ord(ch) + 32
+ return chr(o)
+ else:
+ return ch
+
+ def _upper(self, ch):
+ if ch.islower():
+ o = ord(ch) - 32
+ return chr(o)
+ else:
+ return ch
+
+ def _swapcase(self, ch):
+ if ch.isupper():
+ o = ord(ch) + 32
+ return chr(o)
+ elif ch.islower():
+ o = ord(ch) - 32
+ return chr(o)
+ else:
+ return ch
+
+
+class W_StringObject(W_AbstractStringObject, Mixin_StringMethods):
from pypy.objspace.std.stringtype import str_typedef as typedef
_immutable_fields_ = ['_value']
@@ -46,96 +93,41 @@
from pypy.objspace.std.unicodetype import plain_str2unicode
return plain_str2unicode(space, w_self._value)
+
registerimplementation(W_StringObject)
W_StringObject.EMPTY = W_StringObject('')
W_StringObject.PREBUILT = [W_StringObject(chr(i)) for i in range(256)]
del i
-def _upper(ch):
- if ch.islower():
- o = ord(ch) - 32
- return chr(o)
- else:
- return ch
+def str_isalnum__String(space, w_self):
+ return w_self.isalnum(space)
-def _lower(ch):
- if ch.isupper():
- o = ord(ch) + 32
- return chr(o)
- else:
- return ch
+def str_isalpha__String(space, w_self):
+ return w_self.isalpha(space)
+
+def str_isdigit__String(space, w_self):
+ return w_self.isdigit(space)
def str_isspace__String(space, w_self):
- isspace = lambda c: c.isspace()
- return is_generic(space, w_self, isspace)
+ return w_self.isspace(space)
-def str_isdigit__String(space, w_self):
- isdigit = lambda c: c.isdigit()
- return is_generic(space, w_self, isdigit)
-
-def str_isalpha__String(space, w_self):
- isalpha = lambda c: c.isalpha()
- return is_generic(space, w_self, isalpha)
-
-def str_isalnum__String(space, w_self):
- isalnum = lambda c: c.isalnum()
- return is_generic(space, w_self, isalnum)
+def str_islower__String(space, w_self):
+ """Return True if all cased characters in S are lowercase and there is
+at least one cased character in S, False otherwise."""
+ return w_self.islower(space)
def str_isupper__String(space, w_self):
"""Return True if all cased characters in S are uppercase and there is
at least one cased character in S, False otherwise."""
- v = w_self._value
- if len(v) == 1:
- c = v[0]
- return space.newbool(c.isupper())
- cased = False
- for idx in range(len(v)):
- if v[idx].islower():
- return space.w_False
- elif not cased and v[idx].isupper():
- cased = True
- return space.newbool(cased)
-
-def str_islower__String(space, w_self):
- """Return True if all cased characters in S are lowercase and there is
-at least one cased character in S, False otherwise."""
- v = w_self._value
- if len(v) == 1:
- c = v[0]
- return space.newbool(c.islower())
- cased = False
- for idx in range(len(v)):
- if v[idx].isupper():
- return space.w_False
- elif not cased and v[idx].islower():
- cased = True
- return space.newbool(cased)
+ return w_self.isupper(space)
def str_istitle__String(space, w_self):
"""Return True if S is a titlecased string and there is at least one
character in S, i.e. uppercase characters may only follow uncased
characters and lowercase characters only cased ones. Return False
otherwise."""
- input = w_self._value
- cased = False
- previous_is_cased = False
-
- for pos in range(0, len(input)):
- ch = input[pos]
- if ch.isupper():
- if previous_is_cased:
- return space.w_False
- previous_is_cased = True
- cased = True
- elif ch.islower():
- if not previous_is_cased:
- return space.w_False
- cased = True
- else:
- previous_is_cased = False
-
- return space.newbool(cased)
+ return w_self.istitle(space)
def str_upper__String(space, w_self):
self = w_self._value
@@ -150,18 +142,10 @@
builder = StringBuilder(len(self))
for i in range(len(self)):
ch = self[i]
- if ch.isupper():
- o = ord(ch) + 32
- builder.append(chr(o))
- elif ch.islower():
- o = ord(ch) - 32
- builder.append(chr(o))
- else:
- builder.append(ch)
+ builder.append(w_self._swapcase(ch))
return space.wrap(builder.build())
-
def str_capitalize__String(space, w_self):
input = w_self._value
builder = StringBuilder(len(input))
@@ -191,10 +175,10 @@
for pos in range(len(input)):
ch = input[pos]
if not prev_letter.isalpha():
- ch = _upper(ch)
+ ch = w_self._upper(ch)
builder.append(ch)
else:
- ch = _lower(ch)
+ ch = w_self._lower(ch)
builder.append(ch)
prev_letter = ch
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -15,18 +15,61 @@
from pypy.rlib.rstring import UnicodeBuilder
from pypy.rlib.runicode import unicode_encode_unicode_escape
from pypy.module.unicodedata import unicodedb
-from pypy.tool.sourcetools import func_with_new_name
-from pypy.objspace.std.abstractstring import W_AbstractBaseStringObject
+from pypy.objspace.std.abstractstring import \
+ W_AbstractBaseStringObject, Mixin_BaseStringMethods
from pypy.objspace.std.formatting import mod_format
from pypy.objspace.std.stringtype import stringstartswith, stringendswith
-class W_AbstractUnicodeObject(W_AbstractBaseStringObject):
+
+class Mixin_UnicodeMethods(Mixin_BaseStringMethods):
+ __slows__ = ()
+
+ def isdecimal(w_self, space):
+ return w_self._all_true(space, w_self._isdecimal)
+
+
+class W_AbstractUnicodeObject(W_AbstractBaseStringObject, Mixin_UnicodeMethods):
__slots__ = ()
def unwrap(w_self, space):
return w_self.unicode_w(space)
+ def _isalnum(self, ch):
+ return unicodedb.isalnum(ord(ch))
+
+ def _isalpha(self, ch):
+ return unicodedb.isalpha(ord(ch))
+
+ def _isdigit(self, ch):
+ return unicodedb.isdigit(ord(ch))
+
+ def _isdecimal(self, ch):
+ return unicodedb.isdecimal(ord(ch))
+
+ def _islower(self, ch):
+ return unicodedb.islower(ord(ch))
+
+ def _isspace(self, ch):
+ return unicodedb.isspace(ord(ch))
+
+ def _isupper(self, ch):
+ return unicodedb.isupper(ord(ch))
+
+ def _lower(self, ch):
+ return unichr(unicodedb.tolower(ord(ch)))
+
+ def _upper(self, ch):
+ return unichr(unicodedb.toupper(ord(ch)))
+
+ def _swapcase(self, ch):
+ if unicodedb.islower(ch):
+ return unichr(unicodedb.toupper(ord(ch)))
+ elif unicodedb.isupper(ch):
+ return unichr(unicodedb.tolower(ord(ch)))
+ else:
+ return ch
+
class W_UnicodeObject(W_AbstractUnicodeObject):
from pypy.objspace.std.unicodetype import unicode_typedef as typedef
@@ -50,6 +93,7 @@
return w_self
return W_UnicodeObject(w_self._value)
+
W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
registerimplementation(W_UnicodeObject)
@@ -60,6 +104,7 @@
raise operationerrfmt(space.w_TypeError,
"expected unicode, got '%s'",
space.type(w_unistr).getname(space))
+ # XXX remove direct use of _value
unistr = w_unistr._value
result = ['\0'] * len(unistr)
digits = [ '0', '1', '2', '3', '4',
@@ -287,63 +332,32 @@
def mul__ANY_Unicode(space, w_times, w_uni):
return mul__Unicode_ANY(space, w_uni, w_times)
-def _isspace(uchar):
- return unicodedb.isspace(ord(uchar))
-
def unicode_isspace__Unicode(space, w_self):
- return is_generic(space, w_self, unicodedb.isspace)
+ return w_self.isspace(space)
def unicode_isalpha__Unicode(space, w_self):
- return is_generic(space, w_self, unicodedb.isalpha)
+ return w_self.isalpha(space)
def unicode_isalnum__Unicode(space, w_self):
- return is_generic(space, w_self, unicodedb.isalnum)
+ return w_self.isalnum(space)
def unicode_isdecimal__Unicode(space, w_self):
- return is_generic(space, w_self, unicodedb.isdecimal)
+ return w_self.isdecimal(space)
def unicode_isdigit__Unicode(space, w_self):
- return is_generic(space, w_self, unicodedb.isdigit)
+ return w_self.isdigit(space)
def unicode_isnumeric__Unicode(space, w_self):
- return is_generic(space, w_self, unicodedb.isnumeric)
+ return w_self.isnumeric(space)
-def unicode_islower__Unicode(space, w_unicode):
- cased = False
- for uchar in w_unicode._value:
- if (unicodedb.isupper(ord(uchar)) or
- unicodedb.istitle(ord(uchar))):
- return space.w_False
- if not cased and unicodedb.islower(ord(uchar)):
- cased = True
- return space.newbool(cased)
+def unicode_islower__Unicode(space, w_self):
+ return w_self.islower(space)
-def unicode_isupper__Unicode(space, w_unicode):
- cased = False
- for uchar in w_unicode._value:
- if (unicodedb.islower(ord(uchar)) or
- unicodedb.istitle(ord(uchar))):
- return space.w_False
- if not cased and unicodedb.isupper(ord(uchar)):
- cased = True
- return space.newbool(cased)
+def unicode_isupper__Unicode(space, w_self):
+ return w_self.isupper(space)
-def unicode_istitle__Unicode(space, w_unicode):
- cased = False
- previous_is_cased = False
- for uchar in w_unicode._value:
- if (unicodedb.isupper(ord(uchar)) or
- unicodedb.istitle(ord(uchar))):
- if previous_is_cased:
- return space.w_False
- previous_is_cased = cased = True
- elif unicodedb.islower(ord(uchar)):
- if not previous_is_cased:
- return space.w_False
- previous_is_cased = cased = True
- else:
- previous_is_cased = False
- return space.newbool(cased)
+def unicode_istitle__Unicode(space, w_self):
+ return w_self.istitle(space)
def _strip(space, w_self, w_chars, left, right):
"internal function called by str_xstrip methods"
@@ -373,11 +387,11 @@
rpos = len(u_self)
if left:
- while lpos < rpos and _isspace(u_self[lpos]):
+ while lpos < rpos and w_self._isspace(u_self[lpos]):
lpos += 1
if right:
- while rpos > lpos and _isspace(u_self[rpos - 1]):
+ while rpos > lpos and w_self._isspace(u_self[rpos - 1]):
rpos -= 1
assert rpos >= 0
@@ -651,7 +665,7 @@
while True:
# find the beginning of the next word
while i < length:
- if not _isspace(value[i]):
+ if not w_self._isspace(value[i]):
break # found
i += 1
else:
@@ -662,7 +676,7 @@
j = length # take all the rest of the string
else:
j = i + 1
- while j < length and not _isspace(value[j]):
+ while j < length and not w_self._isspace(value[j]):
j += 1
maxsplit -= 1 # NB. if it's already < 0, it stays < 0
@@ -694,7 +708,7 @@
while True:
# starting from the end, find the end of the next word
while i >= 0:
- if not _isspace(value[i]):
+ if not w_self._isspace(value[i]):
break # found
i -= 1
else:
@@ -706,7 +720,7 @@
j = -1 # take all the rest of the string
else:
j = i - 1
- while j >= 0 and not _isspace(value[j]):
+ while j >= 0 and not w_self._isspace(value[j]):
j -= 1
maxsplit -= 1 # NB. if it's already < 0, it stays < 0
More information about the pypy-commit
mailing list