[pypy-commit] pypy stringbuilder-perf: Use unsigned numbers, needed on 32-bit in case a raw buffer straddles

arigo noreply at buildbot.pypy.org
Mon Jun 9 17:03:32 CEST 2014


Author: Armin Rigo <arigo at tunes.org>
Branch: stringbuilder-perf
Changeset: r72001:e0d8eae14284
Date: 2014-06-09 17:02 +0200
http://bitbucket.org/pypy/pypy/changeset/e0d8eae14284/

Log:	Use unsigned numbers, needed on 32-bit in case a raw buffer
	straddles the 2**31 middle-of-memory limit

diff --git a/rpython/rtyper/lltypesystem/rbuilder.py b/rpython/rtyper/lltypesystem/rbuilder.py
--- a/rpython/rtyper/lltypesystem/rbuilder.py
+++ b/rpython/rtyper/lltypesystem/rbuilder.py
@@ -1,6 +1,6 @@
 from rpython.rlib import rgc, jit
 from rpython.rlib.objectmodel import enforceargs, specialize
-from rpython.rlib.rarithmetic import ovfcheck
+from rpython.rlib.rarithmetic import ovfcheck, r_uint, intmask
 from rpython.rlib.debug import ll_assert
 from rpython.rlib.rgc import must_be_light_finalizer
 from rpython.rtyper.rptr import PtrRepr
@@ -49,15 +49,21 @@
         #
         new_piece = lltype.malloc(STRINGPIECE)
         charsize = ll_builder.charsize
-        new_piece.piece_lgt = needed * charsize
+        try:
+            new_piece.piece_lgt = ovfcheck(needed * charsize)
+        except OverflowError:
+            raise MemoryError
         raw_ptr = lltype.malloc(rffi.CCHARP.TO, needed * charsize, flavor='raw')
         new_piece.raw_ptr = raw_ptr
         new_piece.prev_piece = ll_builder.extra_pieces
         ll_builder.extra_pieces = new_piece
-        ll_builder.current_ofs = rffi.cast(lltype.Signed, raw_ptr)
-        ll_builder.current_end = (rffi.cast(lltype.Signed, raw_ptr) +
-                                  needed * charsize)
-        ll_builder.total_size += needed
+        ll_builder.current_ofs = rffi.cast(lltype.Unsigned, raw_ptr)
+        ll_builder.current_end = (rffi.cast(lltype.Unsigned, raw_ptr) +
+                                  r_uint(needed * charsize))
+        try:
+            ll_builder.total_size = ovfcheck(ll_builder.total_size + needed)
+        except OverflowError:
+            raise MemoryError
         if ll_builder.current_buf:
             STRTYPE = lltype.typeOf(ll_builder.current_buf).TO
             ll_builder.initial_buf = ll_builder.current_buf
@@ -67,10 +73,10 @@
     def stringbuilder_append_overflow(ll_builder, ll_str):
         # First, the part that still fits in the current piece
         ofs = ll_builder.current_ofs
-        part1 = ll_builder.current_end - ofs     # in bytes, not (uni)chars
+        part1 = intmask(ll_builder.current_end - ofs) # in bytes, not (uni)chars
         # --- no GC! ---
         raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf)
-        rffi.c_memcpy(rffi.ptradd(raw, ofs),
+        rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)),
                       ll_str2raw(ll_str, 0),
                       part1)
         # --- end ---
@@ -79,11 +85,11 @@
         part2 = len(ll_str.chars) - part1        # in (uni)chars
         ll_assert(part2 > 0, "append_overflow: no overflow")
         ofs = stringbuilder_grow(ll_builder, part2)
-        ll_builder.current_ofs = ofs + part2 * ll_builder.charsize
+        ll_builder.current_ofs = ofs + r_uint(part2 * ll_builder.charsize)
         # --- no GC! ---
         ll_assert(not ll_builder.current_buf, "after grow(), current_buf!=NULL")
         raw = lltype.nullptr(rffi.CCHARP.TO)
-        rffi.c_memcpy(rffi.ptradd(raw, ofs),
+        rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)),
                       ll_str2raw(ll_str, part1),
                       part2 * ll_builder.charsize)
         # --- end ---
@@ -113,8 +119,8 @@
 
 STRINGBUILDER = lltype.GcStruct('stringbuilder',
     ('current_buf', lltype.Ptr(STR)),
-    ('current_ofs', lltype.Signed),
-    ('current_end', lltype.Signed),
+    ('current_ofs', lltype.Unsigned),
+    ('current_end', lltype.Unsigned),
     ('total_size', lltype.Signed),
     ('extra_pieces', lltype.Ptr(STRINGPIECE)),
     ('initial_buf', lltype.Ptr(STR)),
@@ -128,8 +134,8 @@
 
 UNICODEBUILDER = lltype.GcStruct('unicodebuilder',
     ('current_buf', lltype.Ptr(UNICODE)),
-    ('current_ofs', lltype.Signed),     # position measured in *bytes*
-    ('current_end', lltype.Signed),     # position measured in *bytes*
+    ('current_ofs', lltype.Unsigned),     # position measured in *bytes*
+    ('current_end', lltype.Unsigned),     # position measured in *bytes*
     ('total_size', lltype.Signed),
     ('extra_pieces', lltype.Ptr(STRINGPIECE)),
     ('initial_buf', lltype.Ptr(UNICODE)),
@@ -145,7 +151,7 @@
 def ll_baseofs(ll_str):
     STRTYPE = lltype.typeOf(ll_str).TO
     ofs = rffi.offsetof(STRTYPE, 'chars') + rffi.itemoffsetof(STRTYPE.chars, 0)
-    return llmemory.raw_malloc_usage(ofs)    # for direct run
+    return r_uint(llmemory.raw_malloc_usage(ofs))    # for direct run
 ll_baseofs._always_inline_ = True
 
 def ll_str2raw(ll_str, charoffset):
@@ -158,7 +164,7 @@
 ll_str2raw._always_inline_ = True
 
 def ll_rawsetitem(raw, byteoffset, char):
-    raw = rffi.ptradd(raw, byteoffset)
+    raw = rffi.ptradd(raw, intmask(byteoffset))
     if lltype.typeOf(char) == lltype.Char:
         raw[0] = char
     else:
@@ -204,14 +210,14 @@
     def _ll_append(ll_builder, ll_str):
         lgt = len(ll_str.chars) * ll_builder.charsize      # in bytes
         ofs = ll_builder.current_ofs
-        newofs = ofs + lgt
+        newofs = ofs + r_uint(lgt)
         if newofs > ll_builder.current_end:
             ll_builder.append_overflow(ll_builder, ll_str)
         else:
             ll_builder.current_ofs = newofs
             # --- no GC! ---
             raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf)
-            rffi.c_memcpy(rffi.ptradd(raw, ofs),
+            rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)),
                           ll_str2raw(ll_str, 0),
                           lgt)
             # --- end ---
@@ -298,7 +304,7 @@
     def _ll_append_slice(ll_builder, ll_str, start, end):
         lgt = (end - start) * ll_builder.charsize      # in bytes
         ofs = ll_builder.current_ofs
-        newofs = ofs + lgt
+        newofs = ofs + r_uint(lgt)
         if newofs > ll_builder.current_end:
             ll_str = rstr.LLHelpers.ll_stringslice_startstop(ll_str, start, end)
             ll_builder.append_overflow(ll_builder, ll_str)
@@ -306,7 +312,7 @@
             ll_builder.current_ofs = newofs
             # --- no GC! ---
             raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf)
-            rffi.c_memcpy(rffi.ptradd(raw, ofs),
+            rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)),
                           ll_str2raw(ll_str, start),
                           lgt)
             # --- end ---
@@ -327,11 +333,11 @@
                 return True
         if bool(ll_builder.current_buf):
             ofs = ll_builder.current_ofs
-            end = ofs + size * ll_builder.charsize
+            end = ofs + r_uint(size * ll_builder.charsize)
             if end <= ll_builder.current_end:
                 ll_builder.current_ofs = end
                 buf = ll_builder.current_buf
-                index = (ofs - ll_baseofs(buf)) // ll_builder.charsize
+                index = intmask(ofs - ll_baseofs(buf)) // ll_builder.charsize
                 if lltype.typeOf(buf).TO.chars.OF == lltype.Char:
                     rstr.copy_string_contents(ll_str, buf, start, index, size)
                 else:
@@ -353,7 +359,7 @@
     def _ll_append_multiple_char(ll_builder, char, times):
         lgt = times * ll_builder.charsize     # in bytes
         ofs = ll_builder.current_ofs
-        newofs = ofs + lgt
+        newofs = ofs + r_uint(lgt)
         if newofs > ll_builder.current_end:
             ll_str = rstr.LLHelpers.ll_char_mul(char, times)
             ll_builder.append_overflow(ll_builder, ll_str)
@@ -392,7 +398,7 @@
     def ll_append_charpsize(ll_builder, charp, size):
         lgt = size * ll_builder.charsize     # in bytes
         ofs = ll_builder.current_ofs
-        newofs = ofs + lgt
+        newofs = ofs + r_uint(lgt)
         if newofs > ll_builder.current_end:
             if ll_builder.charsize == 1:
                 ll_str = llstr(rffi.charpsize2str(charp, size))
@@ -403,7 +409,7 @@
             ll_builder.current_ofs = newofs
             # --- no GC! ---
             raw = rffi.cast(rffi.CCHARP, ll_builder.current_buf)
-            rffi.c_memcpy(rffi.ptradd(raw, ofs),
+            rffi.c_memcpy(rffi.ptradd(raw, intmask(ofs)),
                           rffi.cast(rffi.CCHARP, charp),
                           lgt)
             # --- end ---
@@ -411,7 +417,7 @@
     @staticmethod
     @always_inline
     def ll_getlength(ll_builder):
-        num_chars_missing_from_last_piece = (
+        num_chars_missing_from_last_piece = intmask(
             (ll_builder.current_end - ll_builder.current_ofs)
             // ll_builder.charsize)
         return ll_builder.total_size - num_chars_missing_from_last_piece
@@ -430,8 +436,8 @@
                 buf = rgc.ll_shrink_array(buf, final_size)
                 ll_builder.total_size = final_size
                 ll_builder.current_buf = buf
-                ll_builder.current_ofs = 0
-                ll_builder.current_end = 0
+                ll_builder.current_ofs = r_uint(0)
+                ll_builder.current_end = r_uint(0)
             return buf
         else:
             return BaseStringBuilderRepr._ll_build_extra(cls, ll_builder)
@@ -445,15 +451,15 @@
         ll_assert(bool(extra), "build() twice on a StringBuilder")
         ll_builder.extra_pieces = lltype.nullptr(STRINGPIECE)
         result = cls.mallocfn(final_size)
-        piece_lgt = ll_builder.current_ofs - rffi.cast(lltype.Signed, # in bytes
-                                                       extra.raw_ptr)
-        ll_assert(piece_lgt == extra.piece_lgt - (ll_builder.current_end -
-                                                  ll_builder.current_ofs),
+        piece_lgt = intmask(       # in bytes
+            ll_builder.current_ofs - rffi.cast(lltype.Unsigned, extra.raw_ptr))
+        ll_assert(piece_lgt == intmask(extra.piece_lgt -
+                            (ll_builder.current_end - ll_builder.current_ofs)),
                   "bogus last piece_lgt")
         ll_builder.total_size = final_size
         ll_builder.current_buf = result
-        ll_builder.current_ofs = 0
-        ll_builder.current_end = 0
+        ll_builder.current_ofs = r_uint(0)
+        ll_builder.current_end = r_uint(0)
 
         # --- no GC! ---
         dst = ll_str2raw(result, final_size)


More information about the pypy-commit mailing list