[pypy-commit] pypy default: Change VStringPlainValue, refactoring and giving a long

arigo noreply at buildbot.pypy.org
Fri Nov 4 19:14:12 CET 2011


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r48760:23f5428f3b52
Date: 2011-11-04 18:07 +0100
http://bitbucket.org/pypy/pypy/changeset/23f5428f3b52/

Log:	Change VStringPlainValue, refactoring and giving a long explanation
	of the meaning of '_chars' and when it contains None values. It
	simplifies some code I did earlier today, and hopefully it makes
	vstring.py safe now. At worst it should now crash
	in an assert that tries to do one of now-forbidden operations.

diff --git a/pypy/jit/metainterp/optimizeopt/optimizer.py b/pypy/jit/metainterp/optimizeopt/optimizer.py
--- a/pypy/jit/metainterp/optimizeopt/optimizer.py
+++ b/pypy/jit/metainterp/optimizeopt/optimizer.py
@@ -247,7 +247,6 @@
 CONST_1      = ConstInt(1)
 CVAL_ZERO    = ConstantValue(CONST_0)
 CVAL_ZERO_FLOAT = ConstantValue(Const._new(0.0))
-CVAL_UNINITIALIZED_ZERO = ConstantValue(CONST_0)
 llhelper.CVAL_NULLREF = ConstantValue(llhelper.CONST_NULL)
 oohelper.CVAL_NULLREF = ConstantValue(oohelper.CONST_NULL)
 
diff --git a/pypy/jit/metainterp/optimizeopt/vstring.py b/pypy/jit/metainterp/optimizeopt/vstring.py
--- a/pypy/jit/metainterp/optimizeopt/vstring.py
+++ b/pypy/jit/metainterp/optimizeopt/vstring.py
@@ -106,46 +106,33 @@
         if not we_are_translated():
             op.name = 'FORCE'
         optforce.emit_operation(op)
-        self.string_copy_parts(optforce, box, CONST_0, self.mode)
+        self.initialize_forced_string(optforce, box, CONST_0, self.mode)
+
+    def initialize_forced_string(self, string_optimizer, targetbox,
+                                 offsetbox, mode):
+        return self.string_copy_parts(string_optimizer, targetbox,
+                                      offsetbox, mode)
 
 
 class VStringPlainValue(VAbstractStringValue):
     """A string built with newstr(const)."""
     _lengthbox = None     # cache only
 
-    # Warning: an issue with VStringPlainValue is that sometimes it is
-    # initialized unpredictably by some copystrcontent.  When this occurs
-    # we set self._chars to None.  Be careful to check for is_valid().
-
-    def is_valid(self):
-        return self._chars is not None
-
-    def _invalidate(self):
-        assert self.is_valid()
-        if self._lengthbox is None:
-            self._lengthbox = ConstInt(len(self._chars))
-        self._chars = None
-
-    def _really_force(self, optforce):
-        VAbstractStringValue._really_force(self, optforce)
-        assert self.box is not None
-        if self.is_valid():
-            for c in self._chars:
-                if c is optimizer.CVAL_UNINITIALIZED_ZERO:
-                    # the string has uninitialized null bytes in it, so
-                    # assume that it is forced for being further mutated
-                    # (e.g. by copystrcontent).  So it becomes invalid
-                    # as a VStringPlainValue: the _chars must not be used
-                    # any longer.
-                    self._invalidate()
-                    break
-
     def setup(self, size):
-        self._chars = [optimizer.CVAL_UNINITIALIZED_ZERO] * size
+        # in this list, None means: "it's probably uninitialized so far,
+        # but maybe it was actually filled."  So to handle this case,
+        # strgetitem cannot be virtual-ized and must be done as a residual
+        # operation.  By contrast, any non-None value means: we know it
+        # is initialized to this value; strsetitem() there makes no sense.
+        # Also, as long as self.is_virtual(), then we know that no-one else
+        # could have written to the string, so we know that in this case
+        # "None" corresponds to "really uninitialized".
+        self._chars = [None] * size
 
     def setup_slice(self, longerlist, start, stop):
         assert 0 <= start <= stop <= len(longerlist)
         self._chars = longerlist[start:stop]
+        # slice the 'longerlist', which may also contain Nones
 
     def getstrlen(self, _, mode):
         if self._lengthbox is None:
@@ -153,44 +140,66 @@
         return self._lengthbox
 
     def getitem(self, index):
-        return self._chars[index]
+        return self._chars[index]     # may return None!
 
     def setitem(self, index, charvalue):
         assert isinstance(charvalue, optimizer.OptValue)
+        assert self._chars[index] is None, (
+            "setitem() on an already-initialized location")
         self._chars[index] = charvalue
 
+    def is_completely_initialized(self):
+        for c in self._chars:
+            if c is None:
+                return False
+        return True
+
     @specialize.arg(1)
     def get_constant_string_spec(self, mode):
-        if not self.is_valid():
-            return None
         for c in self._chars:
-            if c is optimizer.CVAL_UNINITIALIZED_ZERO or not c.is_constant():
+            if c is None or not c.is_constant():
                 return None
         return mode.emptystr.join([mode.chr(c.box.getint())
                                    for c in self._chars])
 
     def string_copy_parts(self, string_optimizer, targetbox, offsetbox, mode):
-        if not self.is_valid():
+        if not self.is_virtual() and not self.is_completely_initialized():
             return VAbstractStringValue.string_copy_parts(
                 self, string_optimizer, targetbox, offsetbox, mode)
+        else:
+            return self.initialize_forced_string(string_optimizer, targetbox,
+                                                 offsetbox, mode)
+
+    def initialize_forced_string(self, string_optimizer, targetbox,
+                                 offsetbox, mode):
         for i in range(len(self._chars)):
             assert isinstance(targetbox, BoxPtr)   # ConstPtr never makes sense
-            charbox = self._chars[i].force_box(string_optimizer)
-            if not (isinstance(charbox, Const) and charbox.same_constant(CONST_0)):
-                string_optimizer.emit_operation(ResOperation(mode.STRSETITEM, [targetbox,
-                                                                               offsetbox,
-                                                                               charbox],
-                                                  None))
+            charvalue = self.getitem(i)
+            if charvalue is not None:
+                charbox = charvalue.force_box(string_optimizer)
+                if not (isinstance(charbox, Const) and
+                        charbox.same_constant(CONST_0)):
+                    op = ResOperation(mode.STRSETITEM, [targetbox,
+                                                        offsetbox,
+                                                        charbox],
+                                      None)
+                    string_optimizer.emit_operation(op)
             offsetbox = _int_add(string_optimizer, offsetbox, CONST_1)
         return offsetbox
 
     def get_args_for_fail(self, modifier):
         if self.box is None and not modifier.already_seen_virtual(self.keybox):
-            assert self.is_valid()
-            charboxes = [value.get_key_box() for value in self._chars]
+            charboxes = []
+            for value in self._chars:
+                if value is not None:
+                    box = value.get_key_box()
+                else:
+                    box = None
+                charboxes.append(box)
             modifier.register_virtual_fields(self.keybox, charboxes)
             for value in self._chars:
-                value.get_args_for_fail(modifier)
+                if value is not None:
+                    value.get_args_for_fail(modifier)
 
     def _make_virtual(self, modifier):
         return modifier.make_vstrplain(self.mode is mode_unicode)
@@ -405,8 +414,7 @@
     def optimize_STRSETITEM(self, op):
         value = self.getvalue(op.getarg(0))
         assert not value.is_constant() # strsetitem(ConstPtr) never makes sense
-        if (value.is_virtual() and isinstance(value, VStringPlainValue)
-            and value.is_valid()):
+        if value.is_virtual() and isinstance(value, VStringPlainValue):
             indexbox = self.get_constant_box(op.getarg(1))
             if indexbox is not None:
                 value.setitem(indexbox.getint(), self.getvalue(op.getarg(2)))
@@ -437,10 +445,11 @@
             value = value.vstr
             vindex = self.getvalue(fullindexbox)
         #
-        if (isinstance(value, VStringPlainValue)  # even if no longer virtual
-            and value.is_valid()):                # but make sure it is valid
+        if isinstance(value, VStringPlainValue):  # even if no longer virtual
             if vindex.is_constant():
-                return value.getitem(vindex.box.getint())
+                result = value.getitem(vindex.box.getint())
+                if result is not None:
+                    return result
         #
         resbox = _strgetitem(self, value.force_box(self), vindex.force_box(self), mode)
         return self.getvalue(resbox)
@@ -538,12 +547,12 @@
         vstart = self.getvalue(op.getarg(2))
         vstop = self.getvalue(op.getarg(3))
         #
-        if (isinstance(vstr, VStringPlainValue) and vstr.is_valid()
-            and vstart.is_constant() and vstop.is_constant()):
-            value = self.make_vstring_plain(op.result, op, mode)
-            value.setup_slice(vstr._chars, vstart.box.getint(),
-                              vstop.box.getint())
-            return True
+        #if (isinstance(vstr, VStringPlainValue) and vstart.is_constant()
+        #    and vstop.is_constant()):
+        #    value = self.make_vstring_plain(op.result, op, mode)
+        #    value.setup_slice(vstr._chars, vstart.box.getint(),
+        #                      vstop.box.getint())
+        #    return True
         #
         vstr.ensure_nonnull()
         lengthbox = _int_sub(self, vstop.force_box(self),
diff --git a/pypy/jit/metainterp/resume.py b/pypy/jit/metainterp/resume.py
--- a/pypy/jit/metainterp/resume.py
+++ b/pypy/jit/metainterp/resume.py
@@ -126,6 +126,7 @@
 UNASSIGNED = tag(-1<<13, TAGBOX)
 UNASSIGNEDVIRTUAL = tag(-1<<13, TAGVIRTUAL)
 NULLREF = tag(-1, TAGCONST)
+UNINITIALIZED = tag(-2, TAGCONST)   # used for uninitialized string characters
 
 
 class ResumeDataLoopMemo(object):
@@ -439,6 +440,8 @@
         self.storage.rd_pendingfields = rd_pendingfields
 
     def _gettagged(self, box):
+        if box is None:
+            return UNINITIALIZED
         if isinstance(box, Const):
             return self.memo.getconst(box)
         else:
@@ -572,7 +575,9 @@
         string = decoder.allocate_string(length)
         decoder.virtuals_cache[index] = string
         for i in range(length):
-            decoder.string_setitem(string, i, self.fieldnums[i])
+            charnum = self.fieldnums[i]
+            if not tagged_eq(charnum, UNINITIALIZED):
+                decoder.string_setitem(string, i, charnum)
         return string
 
     def debug_prints(self):
@@ -625,7 +630,9 @@
         string = decoder.allocate_unicode(length)
         decoder.virtuals_cache[index] = string
         for i in range(length):
-            decoder.unicode_setitem(string, i, self.fieldnums[i])
+            charnum = self.fieldnums[i]
+            if not tagged_eq(charnum, UNINITIALIZED):
+                decoder.unicode_setitem(string, i, charnum)
         return string
 
     def debug_prints(self):


More information about the pypy-commit mailing list