[pypy-commit] pypy backend-vector-ops: Shave a giant yak adding a flavor to malloc, so C level calls posix_memalign

fijal noreply at buildbot.pypy.org
Fri Feb 17 18:07:23 CET 2012


Author: Maciej Fijalkowski <fijall at gmail.com>
Branch: backend-vector-ops
Changeset: r52591:de1abef70971
Date: 2012-02-17 19:06 +0200
http://bitbucket.org/pypy/pypy/changeset/de1abef70971/

Log:	Shave a giant yak adding a flavor to malloc, so C level calls
	posix_memalign

diff --git a/pypy/rpython/lltypesystem/lloperation.py b/pypy/rpython/lltypesystem/lloperation.py
--- a/pypy/rpython/lltypesystem/lloperation.py
+++ b/pypy/rpython/lltypesystem/lloperation.py
@@ -391,6 +391,7 @@
     'boehm_register_finalizer': LLOp(),
     'boehm_disappearing_link': LLOp(),
     'raw_malloc':           LLOp(),
+    'raw_malloc_align':     LLOp(),
     'raw_malloc_usage':     LLOp(sideeffects=False),
     'raw_free':             LLOp(),
     'raw_memclear':         LLOp(),
diff --git a/pypy/rpython/memory/gctransform/transform.py b/pypy/rpython/memory/gctransform/transform.py
--- a/pypy/rpython/memory/gctransform/transform.py
+++ b/pypy/rpython/memory/gctransform/transform.py
@@ -483,6 +483,15 @@
         return result
     mh.ll_malloc_varsize_no_length_zero = _ll_malloc_varsize_no_length_zero
 
+    def _ll_malloc_varsize_zero_align(length, size, itemsize, align):
+        tot_size = _ll_compute_size(length, size, itemsize)
+        result = llop.raw_malloc_align(llmemory.Address, tot_size, align)
+        if not result:
+            raise MemoryError()
+        llmemory.raw_memclear(result, tot_size)
+        return result
+    mh.ll_malloc_varsize_zero_align = _ll_malloc_varsize_zero_align
+
     return mh
 
 class GCTransformer(BaseGCTransformer):
@@ -496,6 +505,7 @@
         ll_raw_malloc_varsize_no_length = mh.ll_malloc_varsize_no_length
         ll_raw_malloc_varsize = mh.ll_malloc_varsize
         ll_raw_malloc_varsize_no_length_zero  = mh.ll_malloc_varsize_no_length_zero
+        ll_raw_malloc_varsize_zero_align = mh.ll_malloc_varsize_zero_align
 
         stack_mh = mallocHelpers()
         stack_mh.allocate = lambda size: llop.stack_malloc(llmemory.Address, size)
@@ -513,6 +523,9 @@
 
             self.stack_malloc_fixedsize_ptr = self.inittime_helper(
                 ll_stack_malloc_fixedsize, [lltype.Signed], llmemory.Address)
+            self.raw_malloc_varsize_align_zero_ptr = self.inittime_helper(
+                ll_raw_malloc_varsize_zero_align, [lltype.Signed] * 4,
+                llmemory.Address)
 
     def gct_malloc(self, hop, add_flags=None):
         TYPE = hop.spaceop.result.concretetype.TO
@@ -601,16 +614,28 @@
                           [self.raw_malloc_memory_pressure_varsize_ptr,
                            v_length, c_item_size])
         if c_offset_to_length is None:
-            if flags.get('zero'):
-                fnptr = self.raw_malloc_varsize_no_length_zero_ptr
+            mpa = flags.get('memory_position_alignment')
+            if mpa is not None:
+                assert flags.get('zero')
+                fnptr = self.raw_malloc_varsize_align_zero_ptr
+                c_align = rmodel.inputconst(lltype.Signed, mpa)
+                v_raw = hop.genop("direct_call", [fnptr, v_length, c_const_size,
+                                                  c_item_size, c_align],
+                                  resulttype=llmemory.Address)
             else:
-                fnptr = self.raw_malloc_varsize_no_length_ptr
-            v_raw = hop.genop("direct_call",
-                               [fnptr, v_length, c_const_size, c_item_size],
-                               resulttype=llmemory.Address)
+                if flags.get('zero'):
+                    fnptr = self.raw_malloc_varsize_no_length_zero_ptr
+                else:
+                    fnptr = self.raw_malloc_varsize_no_length_ptr
+                    v_raw = hop.genop("direct_call",
+                                      [fnptr, v_length, c_const_size,
+                                       c_item_size],
+                                      resulttype=llmemory.Address)
         else:
             if flags.get('zero'):
                 raise NotImplementedError("raw zero varsize malloc with length field")
+            if flags.get('memory_position_alignment'):
+                raise NotImplementedError('raw varsize alloc with length and alignment')
             v_raw = hop.genop("direct_call",
                                [self.raw_malloc_varsize_ptr, v_length,
                                 c_const_size, c_item_size, c_offset_to_length],
diff --git a/pypy/rpython/rbuiltin.py b/pypy/rpython/rbuiltin.py
--- a/pypy/rpython/rbuiltin.py
+++ b/pypy/rpython/rbuiltin.py
@@ -362,6 +362,10 @@
         flags['track_allocation'] = v_track_allocation.value
     if i_add_memory_pressure is not None:
         flags['add_memory_pressure'] = v_add_memory_pressure.value
+    mpa = hop.r_result.lowleveltype.TO._hints.get('memory_position_alignment',
+                                                  None)
+    if mpa is not None:
+        flags['memory_position_alignment'] = mpa
     vlist.append(hop.inputconst(lltype.Void, flags))
 
     assert 1 <= hop.nb_args <= 2
diff --git a/pypy/translator/c/src/mem.h b/pypy/translator/c/src/mem.h
--- a/pypy/translator/c/src/mem.h
+++ b/pypy/translator/c/src/mem.h
@@ -110,6 +110,14 @@
 		} 							\
 	}
 
+#define OP_RAW_MALLOC_ALIGN(size, align, r)  { \
+    posix_memalign(&r, align, size); \
+		if (r != NULL) {					\
+			memset((void*)r, 0, size);			\
+			COUNT_MALLOC;					\
+		}							\
+	}
+
 #endif
 
 #define OP_RAW_FREE(p, r) PyObject_Free(p); COUNT_FREE;
diff --git a/pypy/translator/c/test/test_genc.py b/pypy/translator/c/test/test_genc.py
--- a/pypy/translator/c/test/test_genc.py
+++ b/pypy/translator/c/test/test_genc.py
@@ -13,6 +13,7 @@
 from pypy.translator.interactive import Translation
 from pypy.rlib.entrypoint import entrypoint
 from pypy.tool.nullpath import NullPyPathLocal
+from pypy.rpython.lltypesystem import lltype
 
 def compile(fn, argtypes, view=False, gcpolicy="ref", backendopt=True,
             annotatorpolicy=None):
@@ -462,11 +463,22 @@
     assert ' BarStruct ' in t.driver.cbuilder.c_source_filename.read()
     free(foo, flavor="raw")
 
+def test_malloc_aligned():
+    T = lltype.Array(lltype.Signed, hints={'nolength': True,
+                                           'memory_position_alignment': 16})
+    
+    def f():
+        a = lltype.malloc(T, 16, flavor='raw', zero=True)
+        lltype.free(a, flavor='raw')
+
+    t = Translation(f, [], backend='c')
+    t.annotate()
+    t.compile_c()
+    assert 'OP_RAW_MALLOC_ALIGN' in t.driver.cbuilder.c_source_filename.read()
+
 def test_recursive_llhelper():
     from pypy.rpython.annlowlevel import llhelper
-    from pypy.rpython.lltypesystem import lltype
     from pypy.rlib.objectmodel import specialize
-    from pypy.rlib.nonconst import NonConstant
     FT = lltype.ForwardReference()
     FTPTR = lltype.Ptr(FT)
     STRUCT = lltype.Struct("foo", ("bar", FTPTR))
@@ -514,7 +526,6 @@
     assert fn(True)
 
 def test_inhibit_tail_call():
-    from pypy.rpython.lltypesystem import lltype
     def foobar_fn(n):
         return 42
     foobar_fn._dont_inline_ = True
diff --git a/pypy/translator/goal/targetvector.py b/pypy/translator/goal/targetvector.py
new file mode 100644
--- /dev/null
+++ b/pypy/translator/goal/targetvector.py
@@ -0,0 +1,51 @@
+
+from pypy.rpython.lltypesystem import lltype
+from pypy.rlib import jit
+
+TP = lltype.Array(lltype.Float, hints={'nolength': True,
+                                       'memory_position_alignment': 16})
+
+driver = jit.JitDriver(greens = [], reds = ['a', 'i', 'b', 'size'])
+
+def initialize(arr, size):
+    for i in range(size):
+        arr[i] = float(i)
+
+def sum(arr, size):
+    s = 0
+    for i in range(size):
+        s += arr[i]
+    return s
+
+def main(n, size):
+    a = lltype.malloc(TP, size, flavor='raw', zero=True)
+    b = lltype.malloc(TP, size, flavor='raw', zero=True)
+    initialize(a, size)
+    initialize(b, size)
+    for i in range(n):
+        f(a, b, size)
+    lltype.free(a, flavor='raw')
+    lltype.free(b, flavor='raw')
+
+def f(a, b, size):
+    i = 0
+    while i < size:
+        driver.jit_merge_point(a=a, i=i, size=size, b=b)
+        jit.assert_aligned(a, i)
+        jit.assert_aligned(b, i)
+        b[i] = a[i] + a[i]
+        i += 1
+        b[i] = a[i] + a[i]
+        i += 1
+
+def entry_point(argv):
+    main(int(argv[1]), int(argv[2]))
+    return 0
+
+def jitpolicy(driver):
+    return None
+
+# _____ Define and setup target ___
+
+def target(*args):
+    return entry_point, None


More information about the pypy-commit mailing list