[pypy-commit] cffi default: Add an optional 'size' argument to ffi.gc(). So far, it has no effect

arigo pypy.commits at gmail.com
Wed Aug 2 08:37:23 EDT 2017


Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r2998:120347b84c08
Date: 2017-08-02 14:37 +0200
http://bitbucket.org/cffi/cffi/changeset/120347b84c08/

Log:	Add an optional 'size' argument to ffi.gc(). So far, it has no
	effect on CPython.

diff --git a/c/_cffi_backend.c b/c/_cffi_backend.c
--- a/c/_cffi_backend.c
+++ b/c/_cffi_backend.c
@@ -6708,10 +6708,12 @@
     CDataObject *cd;
     CDataObject *origobj;
     PyObject *destructor;
-    static char *keywords[] = {"cdata", "destructor", NULL};
-
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O:gc", keywords,
-                                     &CData_Type, &origobj, &destructor))
+    Py_ssize_t ignored;   /* for pypy */
+    static char *keywords[] = {"cdata", "destructor", "size", NULL};
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "O!O|n:gc", keywords,
+                                     &CData_Type, &origobj, &destructor,
+                                     &ignored))
         return NULL;
 
     if (destructor == Py_None) {
diff --git a/c/ffi_obj.c b/c/ffi_obj.c
--- a/c/ffi_obj.c
+++ b/c/ffi_obj.c
@@ -705,7 +705,12 @@
 PyDoc_STRVAR(ffi_gc_doc,
 "Return a new cdata object that points to the same data.\n"
 "Later, when this new cdata object is garbage-collected,\n"
-"'destructor(old_cdata_object)' will be called.");
+"'destructor(old_cdata_object)' will be called.\n"
+"\n"
+"The optional 'size' gives an estimate of the size, used to\n"
+"trigger the garbage collection more eagerly.  So far only used\n"
+"on PyPy.  It tells the GC that the returned object keeps alive\n"
+"roughly 'size' bytes of external memory.");
 
 #define ffi_gc  b_gcp     /* ffi_gc() => b_gcp()
                              from _cffi_backend.c */
diff --git a/cffi/api.py b/cffi/api.py
--- a/cffi/api.py
+++ b/cffi/api.py
@@ -394,12 +394,17 @@
             replace_with = ' ' + replace_with
         return self._backend.getcname(cdecl, replace_with)
 
-    def gc(self, cdata, destructor):
+    def gc(self, cdata, destructor, size=0):
         """Return a new cdata object that points to the same
         data.  Later, when this new cdata object is garbage-collected,
         'destructor(old_cdata_object)' will be called.
+
+        The optional 'size' gives an estimate of the size, used to
+        trigger the garbage collection more eagerly.  So far only used
+        on PyPy.  It tells the GC that the returned object keeps alive
+        roughly 'size' bytes of external memory.
         """
-        return self._backend.gcp(cdata, destructor)
+        return self._backend.gcp(cdata, destructor, size)
 
     def _get_cached_btype(self, type):
         assert self._lock.acquire(False) is False
diff --git a/cffi/backend_ctypes.py b/cffi/backend_ctypes.py
--- a/cffi/backend_ctypes.py
+++ b/cffi/backend_ctypes.py
@@ -1002,7 +1002,7 @@
 
     _weakref_cache_ref = None
 
-    def gcp(self, cdata, destructor):
+    def gcp(self, cdata, destructor, size=0):
         if self._weakref_cache_ref is None:
             import weakref
             class MyRef(weakref.ref):
diff --git a/testing/cffi0/test_verify.py b/testing/cffi0/test_verify.py
--- a/testing/cffi0/test_verify.py
+++ b/testing/cffi0/test_verify.py
@@ -2454,3 +2454,61 @@
     assert (pt.x, pt.y) == (-9*500*999, 9*500*999)
     pt = lib.call2(lib.cb2)
     assert (pt.x, pt.y) == (99*500*999, -99*500*999)
+
+def test_ffi_gc_size_arg():
+    # with PyPy's GC, these calls to ffi.gc() would rapidly consume
+    # 40 GB of RAM without the third argument
+    ffi = FFI()
+    ffi.cdef("void *malloc(size_t); void free(void *);")
+    lib = ffi.verify(r"""
+        #include <stdlib.h>
+    """)
+    for i in range(2000):
+        p = lib.malloc(20*1024*1024)    # 20 MB
+        p1 = ffi.cast("char *", p)
+        for j in xrange(0, 20*1024*1024, 4096):
+            p1[j] = '!'
+        p = ffi.gc(p, lib.free, 20*1024*1024)
+        del p
+
+def test_ffi_gc_size_arg_2():
+    # a variant of the above: this "attack" works on cpython's cyclic gc too
+    # and I found no obvious way to prevent that.  So for now, this test
+    # is skipped on CPython, where it eats all the memory.
+    if '__pypy__' not in sys.builtin_module_names:
+        py.test.skip("find a way to tweak the cyclic GC of CPython")
+    ffi = FFI()
+    ffi.cdef("void *malloc(size_t); void free(void *);")
+    lib = ffi.verify(r"""
+        #include <stdlib.h>
+    """)
+    class X(object):
+        pass
+    for i in range(2000):
+        p = lib.malloc(50*1024*1024)    # 50 MB
+        p1 = ffi.cast("char *", p)
+        for j in xrange(0, 50*1024*1024, 4096):
+            p1[j] = '!'
+        p = ffi.gc(p, lib.free, 50*1024*1024)
+        x = X()
+        x.p = p
+        x.cyclic = x
+        del p, x
+
+def test_ffi_new_with_cycles():
+    # still another variant, with ffi.new()
+    if '__pypy__' not in sys.builtin_module_names:
+        py.test.skip("find a way to tweak the cyclic GC of CPython")
+    ffi = FFI()
+    ffi.cdef("")
+    lib = ffi.verify("")
+    class X(object):
+        pass
+    for i in range(2000):
+        p = ffi.new("char[]", 50*1024*1024)    # 50 MB
+        for j in xrange(0, 50*1024*1024, 4096):
+            p[j] = '!'
+        x = X()
+        x.p = p
+        x.cyclic = x
+        del p, x
diff --git a/testing/cffi1/test_verify1.py b/testing/cffi1/test_verify1.py
--- a/testing/cffi1/test_verify1.py
+++ b/testing/cffi1/test_verify1.py
@@ -2290,3 +2290,61 @@
         expected = "unsigned int"
     assert ffi.typeof("UINT_PTR") is ffi.typeof(expected)
     assert ffi.typeof("PTSTR") is ffi.typeof("wchar_t *")
+
+def test_gc_pypy_size_arg():
+    ffi = FFI()
+    ffi.cdef("void *malloc(size_t); void free(void *);")
+    lib = ffi.verify(r"""
+        #include <stdlib.h>
+    """)
+    for i in range(2000):
+        p = lib.malloc(20*1024*1024)    # 20 MB
+        p1 = ffi.cast("char *", p)
+        for j in xrange(0, 20*1024*1024, 4096):
+            p1[j] = '!'
+        p = ffi.gc(p, lib.free, 20*1024*1024)
+        del p
+        # with PyPy's GC, the above would rapidly consume 40 GB of RAM
+        # without the third argument to ffi.gc()
+
+def test_ffi_gc_size_arg_2():
+    # a variant of the above: this "attack" works on cpython's cyclic gc too
+    # and I found no obvious way to prevent that.  So for now, this test
+    # is skipped on CPython, where it eats all the memory.
+    if '__pypy__' not in sys.builtin_module_names:
+        py.test.skip("find a way to tweak the cyclic GC of CPython")
+    ffi = FFI()
+    ffi.cdef("void *malloc(size_t); void free(void *);")
+    lib = ffi.verify(r"""
+        #include <stdlib.h>
+    """)
+    class X(object):
+        pass
+    for i in range(2000):
+        p = lib.malloc(50*1024*1024)    # 50 MB
+        p1 = ffi.cast("char *", p)
+        for j in xrange(0, 50*1024*1024, 4096):
+            p1[j] = '!'
+        p = ffi.gc(p, lib.free, 50*1024*1024)
+        x = X()
+        x.p = p
+        x.cyclic = x
+        del p, x
+
+def test_ffi_new_with_cycles():
+    # still another variant, with ffi.new()
+    if '__pypy__' not in sys.builtin_module_names:
+        py.test.skip("find a way to tweak the cyclic GC of CPython")
+    ffi = FFI()
+    ffi.cdef("")
+    lib = ffi.verify("")
+    class X(object):
+        pass
+    for i in range(2000):
+        p = ffi.new("char[]", 50*1024*1024)    # 50 MB
+        for j in xrange(0, 50*1024*1024, 4096):
+            p[j] = '!'
+        x = X()
+        x.p = p
+        x.cyclic = x
+        del p, x


More information about the pypy-commit mailing list