[pypy-commit] pypy default: adapt the compare_digest impl from 3.3 to 2.7

pjenvey noreply at buildbot.pypy.org
Thu Sep 11 22:48:37 CEST 2014


Author: Philip Jenvey <pjenvey at underboss.org>
Branch: 
Changeset: r73479:9831468e1882
Date: 2014-09-11 12:20 -0700
http://bitbucket.org/pypy/pypy/changeset/9831468e1882/

Log:	adapt the compare_digest impl from 3.3 to 2.7

diff --git a/pypy/module/operator/interp_operator.py b/pypy/module/operator/interp_operator.py
--- a/pypy/module/operator/interp_operator.py
+++ b/pypy/module/operator/interp_operator.py
@@ -1,6 +1,4 @@
-from rpython.rlib.objectmodel import specialize
-
-from pypy.interpreter.error import OperationError, oefmt
+from pypy.interpreter.error import OperationError
 from pypy.interpreter.gateway import unwrap_spec
 
 
@@ -249,33 +247,3 @@
 @unwrap_spec(default=int)
 def _length_hint(space, w_iterable, default):
     return space.wrap(space.length_hint(w_iterable, default))
-
-def compare_digest(space, w_a, w_b):
-    if (
-        space.isinstance_w(w_a, space.w_unicode) and
-        space.isinstance_w(w_b, space.w_unicode)
-    ):
-        return space.wrap(tscmp(space.unicode_w(w_a), space.unicode_w(w_b)))
-    if (
-        space.isinstance_w(w_a, space.w_unicode) or
-        space.isinstance_w(w_b, space.w_unicode)
-    ):
-        raise oefmt(
-            space.w_TypeError,
-            "unsupported operand types(s) or combination of types: '%N' and '%N'",
-            w_a,
-            w_b,
-        )
-    else:
-        return space.wrap(tscmp(space.bufferstr_w(w_a), space.bufferstr_w(w_b)))
-
-
- at specialize.argtype(0, 1)
-def tscmp(a, b):
-    len_a = len(a)
-    len_b = len(b)
-    length = min(len(a), len(b))
-    res = len_a ^ len_b
-    for i in xrange(length):
-        res |= ord(a[i]) ^ ord(b[i])
-    return res == 0
diff --git a/pypy/module/operator/test/test_operator.py b/pypy/module/operator/test/test_operator.py
--- a/pypy/module/operator/test/test_operator.py
+++ b/pypy/module/operator/test/test_operator.py
@@ -335,14 +335,8 @@
         a, b = mybytes(b"foobar"), mybytes(b"foobaz")
         assert not operator._compare_digest(a, b)
 
-    def test_compare_digest_buffer(self):
+    def test_compare_digest_unicode(self):
         import operator
-        assert operator._compare_digest(b'asd', b'asd')
-        assert not operator._compare_digest(b'asd', b'qwe')
-        assert not operator._compare_digest(b'asd', b'asdq')
-
-    def test_compare_digest_ascii(self):
-        import operator
-        assert operator._compare_digest('asd', 'asd')
-        assert not operator._compare_digest('asd', 'qwe')
-        assert not operator._compare_digest('asd', 'asdq')
+        assert operator._compare_digest(u'asd', u'asd')
+        assert not operator._compare_digest(u'asd', u'qwe')
+        raises(TypeError, operator._compare_digest, u'asd', b'qwe')
diff --git a/pypy/module/operator/test/test_tscmp.py b/pypy/module/operator/test/test_tscmp.py
--- a/pypy/module/operator/test/test_tscmp.py
+++ b/pypy/module/operator/test/test_tscmp.py
@@ -1,14 +1,28 @@
-from pypy.module.operator.tscmp import pypy_tscmp
+from pypy.module.operator.tscmp import pypy_tscmp, pypy_tscmp_wide
 
 class TestTimingSafeCompare:
+    tostr = str
+    tscmp = staticmethod(pypy_tscmp)
+
     def test_tscmp_neq(self):
-        assert not pypy_tscmp('asd', 'qwe', 3, 3)
+        assert not self.tscmp(self.tostr('asd'), self.tostr('qwe'), 3, 3)
 
     def test_tscmp_eq(self):
-        assert pypy_tscmp('asd', 'asd', 3, 3)
+        assert self.tscmp(self.tostr('asd'), self.tostr('asd'), 3, 3)
 
     def test_tscmp_len(self):
-        assert pypy_tscmp('asdp', 'asdq', 3, 3)
+        assert self.tscmp(self.tostr('asdp'), self.tostr('asdq'), 3, 3)
 
     def test_tscmp_nlen(self):
-        assert not pypy_tscmp('asd', 'asd', 2, 3)
+        assert not self.tscmp(self.tostr('asd'), self.tostr('asd'), 2, 3)
+
+
+class TestTimingSafeCompareWide(TestTimingSafeCompare):
+    tostr = unicode
+    tscmp = staticmethod(pypy_tscmp_wide)
+
+    def test_tscmp_wide_nonascii(self):
+        a, b = u"\ud808\udf45", u"\ud808\udf45"
+        assert self.tscmp(a, b, len(a), len(b))
+        a, b = u"\ud808\udf45", u"\ud808\udf45 "
+        assert not self.tscmp(a, b, len(a), len(b))
diff --git a/pypy/module/operator/tscmp.c b/pypy/module/operator/tscmp.c
--- a/pypy/module/operator/tscmp.c
+++ b/pypy/module/operator/tscmp.c
@@ -1,21 +1,22 @@
-/* From CPython 3.3.5's operator.c
+/* Derived from CPython 3.3.5's operator.c::_tscmp
  */
 
 #include <stdlib.h>
+#include <wchar.h>
 #include "tscmp.h"
 
 int
-pypy_tscmp(const unsigned char *a, const unsigned char *b, long len_a, long len_b)
+pypy_tscmp(const char *a, const char *b, long len_a, long len_b)
 {
     /* The volatile type declarations make sure that the compiler has no
      * chance to optimize and fold the code in any way that may change
      * the timing.
      */
     volatile long length;
-    volatile const unsigned char *left;
-    volatile const unsigned char *right;
+    volatile const char *left;
+    volatile const char *right;
     long i;
-    unsigned char result;
+    char result;
 
     /* loop count depends on length of b */
     length = len_b;
@@ -26,7 +27,7 @@
      * volatile forces re-evaluation
      *  */
     if (len_a == length) {
-        left = *((volatile const unsigned char**)&a);
+        left = *((volatile const char**)&a);
         result = 0;
     }
     if (len_a != length) {
@@ -40,3 +41,40 @@
 
     return (result == 0);
 }
+
+int
+pypy_tscmp_wide(const wchar_t *a, const wchar_t *b, long len_a, long len_b)
+{
+    /* The volatile type declarations make sure that the compiler has no
+     * chance to optimize and fold the code in any way that may change
+     * the timing.
+     */
+    volatile long length;
+    volatile const wchar_t *left;
+    volatile const wchar_t *right;
+    long i;
+    wchar_t result;
+
+    /* loop count depends on length of b */
+    length = len_b;
+    left = NULL;
+    right = b;
+
+    /* don't use else here to keep the amount of CPU instructions constant,
+     * volatile forces re-evaluation
+     *  */
+    if (len_a == length) {
+        left = *((volatile const wchar_t**)&a);
+        result = 0;
+    }
+    if (len_a != length) {
+        left = b;
+        result = 1;
+    }
+
+    for (i=0; i < length; i++) {
+        result |= *left++ ^ *right++;
+    }
+
+    return (result == 0);
+}
diff --git a/pypy/module/operator/tscmp.h b/pypy/module/operator/tscmp.h
--- a/pypy/module/operator/tscmp.h
+++ b/pypy/module/operator/tscmp.h
@@ -1,1 +1,2 @@
-int pypy_tscmp(const unsigned char *a, const unsigned char *b, long len_a, long len_b);
+int pypy_tscmp(const char *, const char *, long, long);
+int pypy_tscmp_wide(const wchar_t *, const wchar_t *, long, long);
diff --git a/pypy/module/operator/tscmp.py b/pypy/module/operator/tscmp.py
--- a/pypy/module/operator/tscmp.py
+++ b/pypy/module/operator/tscmp.py
@@ -7,14 +7,14 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.translator.tool.cbuild import ExternalCompilationInfo
 
-from pypy.interpreter.error import OperationError, oefmt
+from pypy.interpreter.error import oefmt
 
 cwd = py.path.local(__file__).dirpath()
 eci = ExternalCompilationInfo(
     includes=[cwd.join('tscmp.h')],
     include_dirs=[str(cwd)],
     separate_module_files=[cwd.join('tscmp.c')],
-    export_symbols=['pypy_tscmp'])
+    export_symbols=['pypy_tscmp', 'pypy_tscmp_wide'])
 
 
 def llexternal(*args, **kwargs):
@@ -23,30 +23,51 @@
     return rffi.llexternal(*args, **kwargs)
 
 
-pypy_tscmp = llexternal('pypy_tscmp',
-                        [rffi.CCHARP, rffi.CCHARP, rffi.LONG, rffi.LONG],
-                        rffi.INT)
+pypy_tscmp = llexternal(
+    'pypy_tscmp',
+    [rffi.CCHARP, rffi.CCHARP, rffi.LONG, rffi.LONG],
+    rffi.INT)
+pypy_tscmp_wide = llexternal(
+    'pypy_tscmp_wide',
+    [rffi.CWCHARP, rffi.CWCHARP, rffi.LONG, rffi.LONG],
+    rffi.INT)
 
 
 def compare_digest(space, w_a, w_b):
+    """compare_digest(a, b) -> bool
+
+    Return 'a == b'.  This function uses an approach designed to prevent
+    timing analysis, making it appropriate for cryptography.  a and b
+    must both be of the same type: either str (ASCII only), or any type
+    that supports the buffer protocol (e.g. bytes).
+
+    Note: If a and b are of different lengths, or if an error occurs, a
+    timing attack could theoretically reveal information about the types
+    and lengths of a and b--but not their values.
+    """
     if (space.isinstance_w(w_a, space.w_unicode) and
         space.isinstance_w(w_b, space.w_unicode)):
-        try:
-            w_a = space.call_method(w_a, 'encode', space.wrap('ascii'))
-            w_b = space.call_method(w_b, 'encode', space.wrap('ascii'))
-        except OperationError as e:
-            if not e.match(space, space.w_UnicodeEncodeError):
-                raise
-            raise oefmt(space.w_TypeError,
-                        "comparing strings with non-ASCII characters is not "
-                        "supported")
+        a = space.unicode_w(w_a)
+        b = space.unicode_w(w_b)
+        with rffi.scoped_nonmoving_unicodebuffer(a) as a_buf:
+            with rffi.scoped_nonmoving_unicodebuffer(b) as b_buf:
+                result = pypy_tscmp_wide(a_buf, b_buf, len(a), len(b))
+        return space.wrap(rffi.cast(lltype.Bool, result))
     return compare_digest_buffer(space, w_a, w_b)
 
 
 def compare_digest_buffer(space, w_a, w_b):
-    a = space.bufferstr_w(w_a)
-    b = space.bufferstr_w(w_b)
-    with rffi.scoped_nonmovingbuffer(a) as a_buffer:
-        with rffi.scoped_nonmovingbuffer(b) as b_buffer:
-            result = pypy_tscmp(a_buffer, b_buffer, len(a), len(b))
-            return space.wrap(rffi.cast(lltype.Bool, result))
+    try:
+        a_buf = w_a.buffer_w(space, space.BUF_SIMPLE)
+        b_buf = w_b.buffer_w(space, space.BUF_SIMPLE)
+    except TypeError:
+        raise oefmt(space.w_TypeError,
+                    "unsupported operand types(s) or combination of types: "
+                    "'%T' and '%T'", w_a, w_b)
+
+    a = a_buf.as_str()
+    b = b_buf.as_str()
+    with rffi.scoped_nonmovingbuffer(a) as a_buf:
+        with rffi.scoped_nonmovingbuffer(b) as b_buf:
+            result = pypy_tscmp(a_buf, b_buf, len(a), len(b))
+    return space.wrap(rffi.cast(lltype.Bool, result))


More information about the pypy-commit mailing list