[pypy-commit] pypy default: Introduce a base class hierarchy for string objects.

Wed Mar 14 09:20:26 CET 2012

Author: aliles
Branch: 
Changeset: r53537:23f16a681b06
Date: 2012-03-12 23:44 -0700
http://bitbucket.org/pypy/pypy/changeset/23f16a681b06/

Log:	Introduce a base class hierarchy for string objects.

	Hierarchy to be used to pull up shared code between character
	sequence objects. Currently on generic function specialization has
	been pulled up.

diff --git a/pypy/objspace/std/abstractstring.py b/pypy/objspace/std/abstractstring.py
new file mode 100644
--- /dev/null
+++ b/pypy/objspace/std/abstractstring.py
@@ -0,0 +1,49 @@
+from pypy.objspace.std.model import W_Object
+from pypy.rlib.objectmodel import specialize
+
+
+class W_AbstractBaseStringObject(W_Object):
+    __slots__ = ()
+
+    def __repr__(w_self):
+        """ representation for debugging purposes """
+        return "%s(%r)" % (w_self.__class__.__name__, w_self.raw_value())
+
+    def is_w(self, space, w_other):
+        if not isinstance(w_other, W_AbstractBaseStringObject):
+            return False
+        if self is w_other:
+            return True
+        if self.user_overridden_class or w_other.user_overridden_class:
+            return False
+        return self.unwrap(space) is w_other.unwrap(space)
+
+    def immutable_unique_id(w_self, space):
+        if w_self.user_overridden_class:
+            return None
+        return space.wrap(compute_unique_id(w_self.unwrap(space)))
+
+    def raw_value(w_self):
+        raise NotImplemented("method not implemented")
+
+    def unwrap(w_self, space):
+        raise NotImplemented("method not implemented")
+
+    def str_w(w_self, space):
+        raise NotImplemented("method not implemented")
+
+    def unicode_w(w_self, space):
+        raise NotImplemented("method not implemented")
+
+
+ at specialize.arg(2)
+def is_generic(space, w_self, fun):
+    v = w_self._value
+    if len(v) == 0:
+        return space.w_False
+    if len(v) == 1:
+        return space.newbool(fun(v[0]))
+    for idx in range(len(v)):
+        if not fun(v[idx]):
+            return space.w_False
+    return space.w_True
diff --git a/pypy/objspace/std/ropeobject.py b/pypy/objspace/std/ropeobject.py
--- a/pypy/objspace/std/ropeobject.py
+++ b/pypy/objspace/std/ropeobject.py
@@ -28,24 +28,22 @@
             assert node.is_bytestring()
         w_self._node = node
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._node)
+    def raw_value(w_self):
+        return w_self._node
 
-    def unwrap(w_self, space):
+    def str_w(w_self, space):
         return w_self._node.flatten_string()
-    str_w = unwrap
+
+    def unicode_w(w_self, space):
+        # XXX should this use the default encoding?
+        from pypy.objspace.std.unicodetype import plain_str2unicode
+        return plain_str2unicode(space, w_self._node.flatten_string())
 
     def create_if_subclassed(w_self):
         if type(w_self) is W_RopeObject:
             return w_self
         return W_RopeObject(w_self._node)
 
-    def unicode_w(w_self, space):
-        # XXX should this use the default encoding?
-        from pypy.objspace.std.unicodetype import plain_str2unicode
-        return plain_str2unicode(space, w_self._node.flatten_string())
-
 W_RopeObject.EMPTY = W_RopeObject(rope.LiteralStringNode.EMPTY)
 W_RopeObject.PREBUILT = [W_RopeObject(rope.LiteralStringNode.PREBUILT[i])
                              for i in range(256)]
diff --git a/pypy/objspace/std/ropeunicodeobject.py b/pypy/objspace/std/ropeunicodeobject.py
--- a/pypy/objspace/std/ropeunicodeobject.py
+++ b/pypy/objspace/std/ropeunicodeobject.py
@@ -84,25 +84,20 @@
     def __init__(w_self, node):
         w_self._node = node
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._node)
-
-    def unwrap(w_self, space):
-        # for testing
-        return w_self._node.flatten_unicode()
+    def raw_value(w_self):
+        return w_self._node
 
     def str_w(w_self, space):
         return space.str_w(space.str(w_self))
 
+    def unicode_w(self, space):
+        return self._node.flatten_unicode()
+
     def create_if_subclassed(w_self):
         if type(w_self) is W_RopeUnicodeObject:
             return w_self
         return W_RopeUnicodeObject(w_self._node)
 
-    def unicode_w(self, space):
-        return self._node.flatten_unicode()
-
 W_RopeUnicodeObject.EMPTY = W_RopeUnicodeObject(rope.LiteralStringNode.EMPTY)
 
 registerimplementation(W_RopeUnicodeObject)
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -15,27 +15,17 @@
 from pypy.rlib.rstring import StringBuilder, split
 from pypy.interpreter.buffer import StringBuffer
 
+from pypy.objspace.std.abstractstring import W_AbstractBaseStringObject, \
+        is_generic
+from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stringtype import sliced, wrapstr, wrapchar, \
      stringendswith, stringstartswith, joined2
 
-from pypy.objspace.std.formatting import mod_format
-
-class W_AbstractStringObject(W_Object):
+class W_AbstractStringObject(W_AbstractBaseStringObject):
     __slots__ = ()
 
-    def is_w(self, space, w_other):
-        if not isinstance(w_other, W_AbstractStringObject):
-            return False
-        if self is w_other:
-            return True
-        if self.user_overridden_class or w_other.user_overridden_class:
-            return False
-        return space.str_w(self) is space.str_w(w_other)
-
-    def immutable_unique_id(self, space):
-        if self.user_overridden_class:
-            return None
-        return space.wrap(compute_unique_id(space.str_w(self)))
+    def unwrap(w_self, space):
+        return w_self.str_w(space)
 
 
 class W_StringObject(W_AbstractStringObject):
@@ -45,11 +35,7 @@
     def __init__(w_self, str):
         w_self._value = str
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._value)
-
-    def unwrap(w_self, space):
+    def raw_value(w_self):
         return w_self._value
 
     def str_w(w_self, space):
@@ -66,24 +52,6 @@
 W_StringObject.PREBUILT = [W_StringObject(chr(i)) for i in range(256)]
 del i
 
- at specialize.arg(2)
-def _is_generic(space, w_self, fun):
-    v = w_self._value
-    if len(v) == 0:
-        return space.w_False
-    if len(v) == 1:
-        c = v[0]
-        return space.newbool(fun(c))
-    else:
-        return _is_generic_loop(space, v, fun)
-
- at specialize.arg(2)
-def _is_generic_loop(space, v, fun):
-    for idx in range(len(v)):
-        if not fun(v[idx]):
-            return space.w_False
-    return space.w_True
-
 def _upper(ch):
     if ch.islower():
         o = ord(ch) - 32
@@ -98,22 +66,21 @@
     else:
         return ch
 
-_isspace = lambda c: c.isspace()
-_isdigit = lambda c: c.isdigit()
-_isalpha = lambda c: c.isalpha()
-_isalnum = lambda c: c.isalnum()
-
 def str_isspace__String(space, w_self):
-    return _is_generic(space, w_self, _isspace)
+    isspace = lambda c: c.isspace()
+    return is_generic(space, w_self, isspace)
 
 def str_isdigit__String(space, w_self):
-    return _is_generic(space, w_self, _isdigit)
+    isdigit = lambda c: c.isdigit()
+    return is_generic(space, w_self, isdigit)
 
 def str_isalpha__String(space, w_self):
-    return _is_generic(space, w_self, _isalpha)
+    isalpha = lambda c: c.isalpha()
+    return is_generic(space, w_self, isalpha)
 
 def str_isalnum__String(space, w_self):
-    return _is_generic(space, w_self, _isalnum)
+    isalnum = lambda c: c.isalnum()
+    return is_generic(space, w_self, isalnum)
 
 def str_isupper__String(space, w_self):
     """Return True if all cased characters in S are uppercase and there is
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -17,25 +17,15 @@
 from pypy.module.unicodedata import unicodedb
 from pypy.tool.sourcetools import func_with_new_name
 
+from pypy.objspace.std.abstractstring import W_AbstractBaseStringObject
 from pypy.objspace.std.formatting import mod_format
 from pypy.objspace.std.stringtype import stringstartswith, stringendswith
 
-class W_AbstractUnicodeObject(W_Object):
+class W_AbstractUnicodeObject(W_AbstractBaseStringObject):
     __slots__ = ()
 
-    def is_w(self, space, w_other):
-        if not isinstance(w_other, W_AbstractUnicodeObject):
-            return False
-        if self is w_other:
-            return True
-        if self.user_overridden_class or w_other.user_overridden_class:
-            return False
-        return space.unicode_w(self) is space.unicode_w(w_other)
-
-    def immutable_unique_id(self, space):
-        if self.user_overridden_class:
-            return None
-        return space.wrap(compute_unique_id(space.unicode_w(self)))
+    def unwrap(w_self, space):
+        return w_self.unicode_w(space)
 
 
 class W_UnicodeObject(W_AbstractUnicodeObject):
@@ -46,25 +36,20 @@
         assert isinstance(unistr, unicode)
         w_self._value = unistr
 
-    def __repr__(w_self):
-        """ representation for debugging purposes """
-        return "%s(%r)" % (w_self.__class__.__name__, w_self._value)
-
-    def unwrap(w_self, space):
-        # for testing
+    def raw_value(w_self):
         return w_self._value
 
-    def create_if_subclassed(w_self):
-        if type(w_self) is W_UnicodeObject:
-            return w_self
-        return W_UnicodeObject(w_self._value)
-
     def str_w(self, space):
         return space.str_w(space.str(self))
 
     def unicode_w(self, space):
         return self._value
 
+    def create_if_subclassed(w_self):
+        if type(w_self) is W_UnicodeObject:
+            return w_self
+        return W_UnicodeObject(w_self._value)
+
 W_UnicodeObject.EMPTY = W_UnicodeObject(u'')
 
 registerimplementation(W_UnicodeObject)
@@ -305,23 +290,23 @@
 def _isspace(uchar):
     return unicodedb.isspace(ord(uchar))
 
-def make_generic(funcname):
-    def func(space, w_self):
-        v = w_self._value
-        if len(v) == 0:
-            return space.w_False
-        for idx in range(len(v)):
-            if not getattr(unicodedb, funcname)(ord(v[idx])):
-                return space.w_False
-        return space.w_True
-    return func_with_new_name(func, "unicode_%s__Unicode" % (funcname, ))
+def unicode_isspace__Unicode(space, w_self):
+    return is_generic(space, w_self, unicodedb.isspace)
 
-unicode_isspace__Unicode = make_generic("isspace")
-unicode_isalpha__Unicode = make_generic("isalpha")
-unicode_isalnum__Unicode = make_generic("isalnum")
-unicode_isdecimal__Unicode = make_generic("isdecimal")
-unicode_isdigit__Unicode = make_generic("isdigit")
-unicode_isnumeric__Unicode = make_generic("isnumeric")
+def unicode_isalpha__Unicode(space, w_self):
+    return is_generic(space, w_self, unicodedb.isalpha)
+
+def unicode_isalnum__Unicode(space, w_self):
+    return is_generic(space, w_self, unicodedb.isalnum)
+
+def unicode_isdecimal__Unicode(space, w_self):
+    return is_generic(space, w_self, unicodedb.isdecimal)
+
+def unicode_isdigit__Unicode(space, w_self):
+    return is_generic(space, w_self, unicodedb.isdigit)
+
+def unicode_isnumeric__Unicode(space, w_self):
+    return is_generic(space, w_self, unicodedb.isnumeric)
 
 def unicode_islower__Unicode(space, w_unicode):
     cased = False