[pypy-commit] pypy faster-rstruct-2: 1) avoid to take an unneeded string slice to skip the chars after typed_read; 2) as it was written before, we did the out-of-bound checks *after* the call to typed_read, which means that we risked to read past the end of the buffer

antocuni pypy.commits at gmail.com
Mon May 8 14:44:50 EDT 2017


Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: faster-rstruct-2
Changeset: r91205:5155ad0ff2ca
Date: 2017-05-08 20:44 +0200
http://bitbucket.org/pypy/pypy/changeset/5155ad0ff2ca/

Log:	1) avoid to take an unneeded string slice to skip the chars after
	typed_read; 2) as it was written before, we did the out-of-bound
	checks *after* the call to typed_read, which means that we risked to
	read past the end of the buffer

diff --git a/pypy/module/struct/formatiterator.py b/pypy/module/struct/formatiterator.py
--- a/pypy/module/struct/formatiterator.py
+++ b/pypy/module/struct/formatiterator.py
@@ -140,13 +140,20 @@
         if self.pos != self.length:
             raise StructError("unpack str size too long for format")
 
+    def can_advance(self, count):
+        end = self.pos + count
+        return end <= self.length
+
+    def advance(self, count):
+        if not self.can_advance(count):
+            raise StructError("unpack str size too short for format")
+        self.pos += count
+
     def read(self, count):
-        end = self.pos + count
-        if end > self.length:
-            raise StructError("unpack str size too short for format")
-        s = self.buf.getslice(self.pos, end, 1, count)
-        self.pos = end
-        return s
+        curpos = self.pos
+        end = curpos + count
+        self.advance(count) # raise if we are out of bound
+        return self.buf.getslice(curpos, end, 1, count)
 
     @specialize.argtype(1)
     def appendobj(self, value):
diff --git a/rpython/rlib/rstruct/runpack.py b/rpython/rlib/rstruct/runpack.py
--- a/rpython/rlib/rstruct/runpack.py
+++ b/rpython/rlib/rstruct/runpack.py
@@ -16,14 +16,20 @@
         self.length = len(s)
         self.inputpos = 0
 
+    def can_advance(self, count):
+        end = self.inputpos + count
+        return end <= self.length
+
+    def advance(self, count):
+        if not self.can_advance(count):
+            raise StructError("unpack str size too short for format")
+        self.inputpos += count
+
     def read(self, count):
-        end = self.inputpos + count
-        if end > self.length:
-            raise StructError("unpack str size too short for format")
-        size = end - self.inputpos
-        s = self.inputbuf.getslice(self.inputpos, end, 1, size)
-        self.inputpos = end
-        return s
+        curpos = self.inputpos
+        end = curpos + count
+        self.advance(count) # raise if we are out of bound
+        return self.inputbuf.getslice(curpos, end, 1, count)
 
     def align(self, mask):
         self.inputpos = (self.inputpos + mask) & ~mask
@@ -46,8 +52,11 @@
         def get_buffer_and_pos(self):
             return self.mr.inputbuf, self.mr.inputpos
 
-        def skip(self, size):
-            self.read(size) # XXX, could avoid taking the slice
+        def can_advance(self, size):
+            return self.mr.can_advance(size)
+
+        def advance(self, size):
+            self.mr.advance(size)
     ReaderForPos.__name__ = 'ReaderForPos%d' % pos
     return ReaderForPos
 
diff --git a/rpython/rlib/rstruct/standardfmttable.py b/rpython/rlib/rstruct/standardfmttable.py
--- a/rpython/rlib/rstruct/standardfmttable.py
+++ b/rpython/rlib/rstruct/standardfmttable.py
@@ -149,11 +149,17 @@
             # buf.typed_read to raise CannotRead in case it is not aligned
             # *and* it is not supported.
             raise CannotRead
-        # we need to call skip *after* we typed_read(), because if it raises
-        # we do not want to skip
-        result = buf.typed_read(TYPE, pos)
-        fmtiter.skip(size)
-        return result
+        #
+        # typed_read does not do any bound check, so we must call it only if
+        # we are sure there are at least "size" bytes to read
+        if fmtiter.can_advance(size):
+            result = buf.typed_read(TYPE, pos)
+            fmtiter.advance(size)
+            return result
+        else:
+            # this will raise StructError
+            fmtiter.advance(size)
+            assert False, 'fmtiter.advance should have raised!'
     return do_unpack_fastpath
 
 @specialize.argtype(0)
diff --git a/rpython/rlib/rstruct/test/test_runpack.py b/rpython/rlib/rstruct/test/test_runpack.py
--- a/rpython/rlib/rstruct/test/test_runpack.py
+++ b/rpython/rlib/rstruct/test/test_runpack.py
@@ -1,6 +1,8 @@
+import pytest
 from rpython.rtyper.test.tool import BaseRtypingTest
 from rpython.rlib.rstruct.runpack import runpack
 from rpython.rlib.rstruct import standardfmttable
+from rpython.rlib.rstruct.error import StructError
 from rpython.rlib.rarithmetic import LONG_BIT
 import struct
 
@@ -22,6 +24,18 @@
         assert fn() == 124
         assert self.interpret(fn, []) == 124
 
+    def test_unpack_error(self):
+        data = '123' # 'i' expects 4 bytes, not 3
+        def fn():
+            try:
+                runpack('i', data)
+            except StructError:
+                return True
+            else:
+                return False
+        assert fn()
+        assert self.interpret(fn, [])
+
     def test_unpack_single(self):
         data = struct.pack('i', 123)
         def fn():


More information about the pypy-commit mailing list