[pypy-commit] pypy unroll-if-alt: merge heads

Mon Aug 1 23:16:24 CEST 2011

Author: Benjamin Peterson <benjamin at python.org>
Branch: unroll-if-alt
Changeset: r46171:5372e50138c5
Date: 2011-08-01 16:16 -0500
http://bitbucket.org/pypy/pypy/changeset/5372e50138c5/

Log:	merge heads

diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -154,18 +154,18 @@
     RegrTest('test_cmd.py'),
     RegrTest('test_cmd_line_script.py'),
     RegrTest('test_codeccallbacks.py', core=True),
-    RegrTest('test_codecencodings_cn.py'),
-    RegrTest('test_codecencodings_hk.py'),
-    RegrTest('test_codecencodings_jp.py'),
-    RegrTest('test_codecencodings_kr.py'),
-    RegrTest('test_codecencodings_tw.py'),
+    RegrTest('test_codecencodings_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecencodings_tw.py', usemodules='_multibytecodec'),
 
-    RegrTest('test_codecmaps_cn.py'),
-    RegrTest('test_codecmaps_hk.py'),
-    RegrTest('test_codecmaps_jp.py'),
-    RegrTest('test_codecmaps_kr.py'),
-    RegrTest('test_codecmaps_tw.py'),
-    RegrTest('test_codecs.py', core=True),
+    RegrTest('test_codecmaps_cn.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_hk.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_jp.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_kr.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecmaps_tw.py', usemodules='_multibytecodec'),
+    RegrTest('test_codecs.py', core=True, usemodules='_multibytecodec'),
     RegrTest('test_codeop.py', core=True),
     RegrTest('test_coercion.py', core=True),
     RegrTest('test_collections.py'),
@@ -314,7 +314,7 @@
     RegrTest('test_mmap.py'),
     RegrTest('test_module.py', core=True),
     RegrTest('test_modulefinder.py'),
-    RegrTest('test_multibytecodec.py'),
+    RegrTest('test_multibytecodec.py', usemodules='_multibytecodec'),
     RegrTest('test_multibytecodec_support.py', skip="not a test"),
     RegrTest('test_multifile.py'),
     RegrTest('test_multiprocessing.py', skip='FIXME leaves subprocesses'),
diff --git a/lib-python/modified-2.7/test/test_multibytecodec.py b/lib-python/modified-2.7/test/test_multibytecodec.py
--- a/lib-python/modified-2.7/test/test_multibytecodec.py
+++ b/lib-python/modified-2.7/test/test_multibytecodec.py
@@ -148,7 +148,8 @@
 class Test_StreamReader(unittest.TestCase):
     def test_bug1728403(self):
         try:
-            open(TESTFN, 'w').write('\xa1')
+            with open(TESTFN, 'w') as f:
+                f.write('\xa1')
             f = codecs.open(TESTFN, encoding='cp949')
             self.assertRaises(UnicodeDecodeError, f.read, 2)
         finally:
diff --git a/lib_pypy/_ctypes/structure.py b/lib_pypy/_ctypes/structure.py
--- a/lib_pypy/_ctypes/structure.py
+++ b/lib_pypy/_ctypes/structure.py
@@ -34,16 +34,18 @@
     for i, field in enumerate(all_fields):
         name = field[0]
         value = field[1]
+        is_bitfield = (len(field) == 3)
         fields[name] = Field(name,
                              self._ffistruct.fieldoffset(name),
                              self._ffistruct.fieldsize(name),
-                             value, i)
+                             value, i, is_bitfield)
 
     if anonymous_fields:
         resnames = []
         for i, field in enumerate(all_fields):
             name = field[0]
             value = field[1]
+            is_bitfield = (len(field) == 3)
             startpos = self._ffistruct.fieldoffset(name)
             if name in anonymous_fields:
                 for subname in value._names:
@@ -52,7 +54,7 @@
                     subvalue = value._fieldtypes[subname].ctype
                     fields[subname] = Field(subname,
                                             relpos, subvalue._sizeofinstances(),
-                                            subvalue, i)
+                                            subvalue, i, is_bitfield)
             else:
                 resnames.append(name)
         names = resnames
@@ -60,8 +62,8 @@
     self._fieldtypes = fields
 
 class Field(object):
-    def __init__(self, name, offset, size, ctype, num):
-        for k in ('name', 'offset', 'size', 'ctype', 'num'):
+    def __init__(self, name, offset, size, ctype, num, is_bitfield):
+        for k in ('name', 'offset', 'size', 'ctype', 'num', 'is_bitfield'):
             self.__dict__[k] = locals()[k]
 
     def __setattr__(self, name, value):
@@ -225,7 +227,7 @@
             field = self._fieldtypes[name]
         except KeyError:
             return _CData.__getattribute__(self, name)
-        if field.size >> 16:
+        if field.is_bitfield:
             # bitfield member, use direct access
             return self._buffer.__getattr__(name)
         else:
diff --git a/pypy/annotation/builtin.py b/pypy/annotation/builtin.py
--- a/pypy/annotation/builtin.py
+++ b/pypy/annotation/builtin.py
@@ -416,7 +416,8 @@
 from pypy.annotation.model import SomePtr
 from pypy.rpython.lltypesystem import lltype
 
-def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None):
+def malloc(s_T, s_n=None, s_flavor=None, s_zero=None, s_track_allocation=None,
+           s_add_memory_pressure=None):
     assert (s_n is None or s_n.knowntype == int
             or issubclass(s_n.knowntype, pypy.rlib.rarithmetic.base_int))
     assert s_T.is_constant()
@@ -432,6 +433,8 @@
     else:
         assert s_flavor.is_constant()
         assert s_track_allocation is None or s_track_allocation.is_constant()
+        assert (s_add_memory_pressure is None or
+                s_add_memory_pressure.is_constant())
         # not sure how to call malloc() for the example 'p' in the
         # presence of s_extraargs
         r = SomePtr(lltype.Ptr(s_T.const))
diff --git a/pypy/jit/backend/llgraph/llimpl.py b/pypy/jit/backend/llgraph/llimpl.py
--- a/pypy/jit/backend/llgraph/llimpl.py
+++ b/pypy/jit/backend/llgraph/llimpl.py
@@ -1071,6 +1071,8 @@
         return heaptracker.adr2int(llmemory.cast_ptr_to_adr(x))
     if TP == llmemory.Address:
         return heaptracker.adr2int(x)
+    if TP is lltype.SingleFloat:
+        return longlong.singlefloat2int(x)
     return lltype.cast_primitive(lltype.Signed, x)
 
 def cast_from_int(TYPE, x):
@@ -1086,6 +1088,9 @@
             x = llmemory.cast_int_to_adr(x)
         assert lltype.typeOf(x) == llmemory.Address
         return x
+    elif TYPE is lltype.SingleFloat:
+        assert lltype.typeOf(x) is lltype.Signed
+        return longlong.int2singlefloat(x)
     else:
         if lltype.typeOf(x) == llmemory.Address:
             x = heaptracker.adr2int(x)
@@ -1140,6 +1145,7 @@
     del _future_values[:]
 
 def set_future_value_int(index, value):
+    assert lltype.typeOf(value) is lltype.Signed
     set_future_value_ref(index, value)
 
 def set_future_value_float(index, value):
@@ -1488,6 +1494,7 @@
     'i': lltype.Signed,
     'f': lltype.Float,
     'L': lltype.SignedLongLong,
+    'S': lltype.SingleFloat,
     'v': lltype.Void,
     }
 
diff --git a/pypy/jit/backend/llgraph/runner.py b/pypy/jit/backend/llgraph/runner.py
--- a/pypy/jit/backend/llgraph/runner.py
+++ b/pypy/jit/backend/llgraph/runner.py
@@ -91,6 +91,7 @@
 class BaseCPU(model.AbstractCPU):
     supports_floats = True
     supports_longlong = llimpl.IS_32_BIT
+    supports_singlefloats = True
 
     def __init__(self, rtyper, stats=None, opts=None,
                  translate_support_code=False,
@@ -327,12 +328,16 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport.ffisupport import get_ffi_type_kind
+        from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
         arg_types = []
-        for arg in ffi_args:
-            kind = get_ffi_type_kind(arg)
-            if kind != history.VOID:
-                arg_types.append(kind)
-        reskind = get_ffi_type_kind(ffi_result)
+        try:
+            for arg in ffi_args:
+                kind = get_ffi_type_kind(self, arg)
+                if kind != history.VOID:
+                    arg_types.append(kind)
+            reskind = get_ffi_type_kind(self, ffi_result)
+        except UnsupportedKind:
+            return None
         return self.getdescr(0, reskind, extrainfo=extrainfo,
                              arg_types=''.join(arg_types))
 
diff --git a/pypy/jit/backend/llsupport/descr.py b/pypy/jit/backend/llsupport/descr.py
--- a/pypy/jit/backend/llsupport/descr.py
+++ b/pypy/jit/backend/llsupport/descr.py
@@ -303,6 +303,8 @@
                 c = 'f'
             elif c == 'f' and longlong.supports_longlong:
                 return 'longlong.getrealfloat(%s)' % (process('L'),)
+            elif c == 'S':
+                return 'longlong.int2singlefloat(%s)' % (process('i'),)
             arg = 'args_%s[%d]' % (c, seen[c])
             seen[c] += 1
             return arg
@@ -318,6 +320,8 @@
                 return lltype.Void
             elif arg == 'L':
                 return lltype.SignedLongLong
+            elif arg == 'S':
+                return lltype.SingleFloat
             else:
                 raise AssertionError(arg)
 
@@ -334,6 +338,8 @@
             result = 'rffi.cast(lltype.SignedLongLong, res)'
         elif self.get_return_type() == history.VOID:
             result = 'None'
+        elif self.get_return_type() == 'S':
+            result = 'longlong.singlefloat2int(res)'
         else:
             assert 0
         source = py.code.Source("""
@@ -344,14 +350,15 @@
         """ % locals())
         ARGS = [TYPE(arg) for arg in self.arg_classes]
         FUNC = lltype.FuncType(ARGS, RESULT)
-        d = locals().copy()
-        d.update(globals())
+        d = globals().copy()
+        d.update(locals())
         exec source.compile() in d
         self.call_stub = d['call_stub']
 
     def verify_types(self, args_i, args_r, args_f, return_type):
         assert self._return_type in return_type
-        assert self.arg_classes.count('i') == len(args_i or ())
+        assert (self.arg_classes.count('i') +
+                self.arg_classes.count('S')) == len(args_i or ())
         assert self.arg_classes.count('r') == len(args_r or ())
         assert (self.arg_classes.count('f') +
                 self.arg_classes.count('L')) == len(args_f or ())
@@ -428,23 +435,39 @@
     def get_result_size(self, translate_support_code):
         return 0
 
+_SingleFloatCallDescr = None   # built lazily
+
 def getCallDescrClass(RESULT):
     if RESULT is lltype.Void:
         return VoidCallDescr
     if RESULT is lltype.Float:
         return FloatCallDescr
+    if RESULT is lltype.SingleFloat:
+        global _SingleFloatCallDescr
+        if _SingleFloatCallDescr is None:
+            assert rffi.sizeof(rffi.UINT) == rffi.sizeof(RESULT)
+            class SingleFloatCallDescr(getCallDescrClass(rffi.UINT)):
+                _clsname = 'SingleFloatCallDescr'
+                _return_type = 'S'
+            _SingleFloatCallDescr = SingleFloatCallDescr
+        return _SingleFloatCallDescr
     if is_longlong(RESULT):
         return LongLongCallDescr
     return getDescrClass(RESULT, BaseIntCallDescr, GcPtrCallDescr,
                          NonGcPtrCallDescr, 'Call', 'get_result_size',
                          Ellipsis,  # <= floatattrname should not be used here
                          '_is_result_signed')
+getCallDescrClass._annspecialcase_ = 'specialize:memo'
 
 def get_call_descr(gccache, ARGS, RESULT, extrainfo=None):
     arg_classes = []
     for ARG in ARGS:
         kind = getkind(ARG)
-        if   kind == 'int': arg_classes.append('i')
+        if   kind == 'int':
+            if ARG is lltype.SingleFloat:
+                arg_classes.append('S')
+            else:
+                arg_classes.append('i')
         elif kind == 'ref': arg_classes.append('r')
         elif kind == 'float':
             if is_longlong(ARG):
@@ -476,6 +499,9 @@
             return GcPtrDescr
         else:
             return NonGcPtrDescr
+    if TYPE is lltype.SingleFloat:
+        assert rffi.sizeof(rffi.UINT) == rffi.sizeof(TYPE)
+        TYPE = rffi.UINT
     try:
         return _cache[nameprefix, TYPE]
     except KeyError:
diff --git a/pypy/jit/backend/llsupport/ffisupport.py b/pypy/jit/backend/llsupport/ffisupport.py
--- a/pypy/jit/backend/llsupport/ffisupport.py
+++ b/pypy/jit/backend/llsupport/ffisupport.py
@@ -1,19 +1,21 @@
 from pypy.rlib.rarithmetic import intmask
 from pypy.jit.metainterp import history
-from pypy.jit.backend.llsupport.descr import DynamicIntCallDescr, NonGcPtrCallDescr,\
-    FloatCallDescr, VoidCallDescr
+from pypy.rpython.lltypesystem import rffi
+from pypy.jit.backend.llsupport.descr import (
+    DynamicIntCallDescr, NonGcPtrCallDescr, FloatCallDescr, VoidCallDescr,
+    LongLongCallDescr, getCallDescrClass)
 
 class UnsupportedKind(Exception):
     pass
 
-def get_call_descr_dynamic(ffi_args, ffi_result, extrainfo=None):
+def get_call_descr_dynamic(cpu, ffi_args, ffi_result, extrainfo=None):
     """Get a call descr: the types of result and args are represented by
     rlib.libffi.types.*"""
     try:
-        reskind = get_ffi_type_kind(ffi_result)
-        argkinds = [get_ffi_type_kind(arg) for arg in ffi_args]
+        reskind = get_ffi_type_kind(cpu, ffi_result)
+        argkinds = [get_ffi_type_kind(cpu, arg) for arg in ffi_args]
     except UnsupportedKind:
-        return None # ??
+        return None
     arg_classes = ''.join(argkinds)
     if reskind == history.INT:
         size = intmask(ffi_result.c_size)
@@ -25,17 +27,26 @@
         return FloatCallDescr(arg_classes, extrainfo)
     elif reskind == history.VOID:
         return VoidCallDescr(arg_classes, extrainfo)
+    elif reskind == 'L':
+        return LongLongCallDescr(arg_classes, extrainfo)
+    elif reskind == 'S':
+        SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+        return SingleFloatCallDescr(arg_classes, extrainfo)
     assert False
 
-def get_ffi_type_kind(ffi_type):
+def get_ffi_type_kind(cpu, ffi_type):
     from pypy.rlib.libffi import types
     kind = types.getkind(ffi_type)
     if kind == 'i' or kind == 'u':
         return history.INT
-    elif kind == 'f':
+    elif cpu.supports_floats and kind == 'f':
         return history.FLOAT
     elif kind == 'v':
         return history.VOID
+    elif cpu.supports_longlong and (kind == 'I' or kind == 'U'):     # longlong
+        return 'L'
+    elif cpu.supports_singlefloats and kind == 's':    # singlefloat
+        return 'S'
     raise UnsupportedKind("Unsupported kind '%s'" % kind)
 
 def is_ffi_type_signed(ffi_type):
diff --git a/pypy/jit/backend/llsupport/llmodel.py b/pypy/jit/backend/llsupport/llmodel.py
--- a/pypy/jit/backend/llsupport/llmodel.py
+++ b/pypy/jit/backend/llsupport/llmodel.py
@@ -259,7 +259,7 @@
 
     def calldescrof_dynamic(self, ffi_args, ffi_result, extrainfo=None):
         from pypy.jit.backend.llsupport import ffisupport
-        return ffisupport.get_call_descr_dynamic(ffi_args, ffi_result,
+        return ffisupport.get_call_descr_dynamic(self, ffi_args, ffi_result,
                                                  extrainfo)
 
     def get_overflow_error(self):
@@ -509,7 +509,7 @@
     def bh_call_i(self, func, calldescr, args_i, args_r, args_f):
         assert isinstance(calldescr, BaseIntCallDescr)
         if not we_are_translated():
-            calldescr.verify_types(args_i, args_r, args_f, history.INT)
+            calldescr.verify_types(args_i, args_r, args_f, history.INT + 'S')
         return calldescr.call_stub(func, args_i, args_r, args_f)
 
     def bh_call_r(self, func, calldescr, args_i, args_r, args_f):
diff --git a/pypy/jit/backend/llsupport/test/test_descr.py b/pypy/jit/backend/llsupport/test/test_descr.py
--- a/pypy/jit/backend/llsupport/test/test_descr.py
+++ b/pypy/jit/backend/llsupport/test/test_descr.py
@@ -52,7 +52,8 @@
     S = lltype.GcStruct('S', ('x', lltype.Char),
                              ('y', lltype.Ptr(T)),
                              ('z', lltype.Ptr(U)),
-                             ('f', lltype.Float))
+                             ('f', lltype.Float),
+                             ('s', lltype.SingleFloat))
     assert getFieldDescrClass(lltype.Ptr(T)) is GcPtrFieldDescr
     assert getFieldDescrClass(lltype.Ptr(U)) is NonGcPtrFieldDescr
     cls = getFieldDescrClass(lltype.Char)
@@ -61,6 +62,10 @@
     clsf = getFieldDescrClass(lltype.Float)
     assert clsf != cls
     assert clsf == getFieldDescrClass(lltype.Float)
+    clss = getFieldDescrClass(lltype.SingleFloat)
+    assert clss not in (cls, clsf)
+    assert clss == getFieldDescrClass(lltype.SingleFloat)
+    assert clss == getFieldDescrClass(rffi.UINT)    # for now
     #
     c0 = GcCache(False)
     c1 = GcCache(True)
@@ -72,14 +77,17 @@
         descr_y = get_field_descr(c2, S, 'y')
         descr_z = get_field_descr(c2, S, 'z')
         descr_f = get_field_descr(c2, S, 'f')
+        descr_s = get_field_descr(c2, S, 's')
         assert descr_x.__class__ is cls
         assert descr_y.__class__ is GcPtrFieldDescr
         assert descr_z.__class__ is NonGcPtrFieldDescr
         assert descr_f.__class__ is clsf
+        assert descr_s.__class__ is clss
         assert descr_x.name == 'S.x'
         assert descr_y.name == 'S.y'
         assert descr_z.name == 'S.z'
         assert descr_f.name == 'S.f'
+        assert descr_s.name == 'S.s'
         if not tsc:
             assert descr_x.offset < descr_y.offset < descr_z.offset
             assert descr_x.sort_key() < descr_y.sort_key() < descr_z.sort_key()
@@ -87,23 +95,29 @@
             assert descr_y.get_field_size(False) == rffi.sizeof(lltype.Ptr(T))
             assert descr_z.get_field_size(False) == rffi.sizeof(lltype.Ptr(U))
             assert descr_f.get_field_size(False) == rffi.sizeof(lltype.Float)
+            assert descr_s.get_field_size(False) == rffi.sizeof(
+                                                            lltype.SingleFloat)
         else:
             assert isinstance(descr_x.offset, Symbolic)
             assert isinstance(descr_y.offset, Symbolic)
             assert isinstance(descr_z.offset, Symbolic)
             assert isinstance(descr_f.offset, Symbolic)
+            assert isinstance(descr_s.offset, Symbolic)
             assert isinstance(descr_x.get_field_size(True), Symbolic)
             assert isinstance(descr_y.get_field_size(True), Symbolic)
             assert isinstance(descr_z.get_field_size(True), Symbolic)
             assert isinstance(descr_f.get_field_size(True), Symbolic)
+            assert isinstance(descr_s.get_field_size(True), Symbolic)
         assert not descr_x.is_pointer_field()
         assert     descr_y.is_pointer_field()
         assert not descr_z.is_pointer_field()
         assert not descr_f.is_pointer_field()
+        assert not descr_s.is_pointer_field()
         assert not descr_x.is_float_field()
         assert not descr_y.is_float_field()
         assert not descr_z.is_float_field()
         assert     descr_f.is_float_field()
+        assert not descr_s.is_float_field()
 
 
 def test_get_field_descr_sign():
@@ -135,6 +149,7 @@
     A2 = lltype.GcArray(lltype.Ptr(T))
     A3 = lltype.GcArray(lltype.Ptr(U))
     A4 = lltype.GcArray(lltype.Float)
+    A5 = lltype.GcArray(lltype.SingleFloat)
     assert getArrayDescrClass(A2) is GcPtrArrayDescr
     assert getArrayDescrClass(A3) is NonGcPtrArrayDescr
     cls = getArrayDescrClass(A1)
@@ -143,25 +158,32 @@
     clsf = getArrayDescrClass(A4)
     assert clsf != cls
     assert clsf == getArrayDescrClass(lltype.GcArray(lltype.Float))
+    clss = getArrayDescrClass(A5)
+    assert clss not in (clsf, cls)
+    assert clss == getArrayDescrClass(lltype.GcArray(rffi.UINT))
     #
     c0 = GcCache(False)
     descr1 = get_array_descr(c0, A1)
     descr2 = get_array_descr(c0, A2)
     descr3 = get_array_descr(c0, A3)
     descr4 = get_array_descr(c0, A4)
+    descr5 = get_array_descr(c0, A5)
     assert descr1.__class__ is cls
     assert descr2.__class__ is GcPtrArrayDescr
     assert descr3.__class__ is NonGcPtrArrayDescr
     assert descr4.__class__ is clsf
+    assert descr5.__class__ is clss
     assert descr1 == get_array_descr(c0, lltype.GcArray(lltype.Char))
     assert not descr1.is_array_of_pointers()
     assert     descr2.is_array_of_pointers()
     assert not descr3.is_array_of_pointers()
     assert not descr4.is_array_of_pointers()
+    assert not descr5.is_array_of_pointers()
     assert not descr1.is_array_of_floats()
     assert not descr2.is_array_of_floats()
     assert not descr3.is_array_of_floats()
     assert     descr4.is_array_of_floats()
+    assert not descr5.is_array_of_floats()
     #
     def get_alignment(code):
         # Retrieve default alignment for the compiler/platform
@@ -170,27 +192,33 @@
     assert descr2.get_base_size(False) == get_alignment('p')
     assert descr3.get_base_size(False) == get_alignment('p')
     assert descr4.get_base_size(False) == get_alignment('d')
+    assert descr5.get_base_size(False) == get_alignment('f')
     assert descr1.get_ofs_length(False) == 0
     assert descr2.get_ofs_length(False) == 0
     assert descr3.get_ofs_length(False) == 0
     assert descr4.get_ofs_length(False) == 0
+    assert descr5.get_ofs_length(False) == 0
     assert descr1.get_item_size(False) == rffi.sizeof(lltype.Char)
     assert descr2.get_item_size(False) == rffi.sizeof(lltype.Ptr(T))
     assert descr3.get_item_size(False) == rffi.sizeof(lltype.Ptr(U))
     assert descr4.get_item_size(False) == rffi.sizeof(lltype.Float)
+    assert descr5.get_item_size(False) == rffi.sizeof(lltype.SingleFloat)
     #
     assert isinstance(descr1.get_base_size(True), Symbolic)
     assert isinstance(descr2.get_base_size(True), Symbolic)
     assert isinstance(descr3.get_base_size(True), Symbolic)
     assert isinstance(descr4.get_base_size(True), Symbolic)
+    assert isinstance(descr5.get_base_size(True), Symbolic)
     assert isinstance(descr1.get_ofs_length(True), Symbolic)
     assert isinstance(descr2.get_ofs_length(True), Symbolic)
     assert isinstance(descr3.get_ofs_length(True), Symbolic)
     assert isinstance(descr4.get_ofs_length(True), Symbolic)
+    assert isinstance(descr5.get_ofs_length(True), Symbolic)
     assert isinstance(descr1.get_item_size(True), Symbolic)
     assert isinstance(descr2.get_item_size(True), Symbolic)
     assert isinstance(descr3.get_item_size(True), Symbolic)
     assert isinstance(descr4.get_item_size(True), Symbolic)
+    assert isinstance(descr5.get_item_size(True), Symbolic)
     CA = rffi.CArray(lltype.Signed)
     descr = get_array_descr(c0, CA)
     assert not descr.is_array_of_floats()
@@ -210,6 +238,11 @@
     assert descr.is_array_of_floats()
     assert descr.get_base_size(False) == 0
     assert descr.get_ofs_length(False) == -1
+    CA = rffi.CArray(rffi.FLOAT)
+    descr = get_array_descr(c0, CA)
+    assert not descr.is_array_of_floats()
+    assert descr.get_base_size(False) == 0
+    assert descr.get_ofs_length(False) == -1
 
 
 def test_get_array_descr_sign():
@@ -257,6 +290,11 @@
     assert descr4.get_result_size(False) == rffi.sizeof(lltype.Float)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c0, [lltype.SingleFloat], lltype.SingleFloat)
+    assert descr5.get_result_size(False) == rffi.sizeof(lltype.SingleFloat)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_get_call_descr_not_translated_longlong():
     if sys.maxint > 2147483647:
@@ -286,6 +324,11 @@
     assert isinstance(descr4.get_result_size(True), Symbolic)
     assert descr4.get_return_type() == history.FLOAT
     assert descr4.arg_classes == "ff"
+    #
+    descr5 = get_call_descr(c1, [lltype.SingleFloat], lltype.SingleFloat)
+    assert isinstance(descr5.get_result_size(True), Symbolic)
+    assert descr5.get_return_type() == "S"
+    assert descr5.arg_classes == "S"
 
 def test_call_descr_extra_info():
     c1 = GcCache(True)
@@ -345,8 +388,11 @@
     #
     descr4f = get_call_descr(c0, [lltype.Char, lltype.Ptr(S)], lltype.Float)
     assert 'FloatCallDescr' in descr4f.repr_of_descr()
+    #
+    descr5f = get_call_descr(c0, [lltype.Char], lltype.SingleFloat)
+    assert 'SingleFloatCallDescr' in descr5f.repr_of_descr()
 
-def test_call_stubs():
+def test_call_stubs_1():
     c0 = GcCache(False)
     ARGS = [lltype.Char, lltype.Signed]
     RES = lltype.Char
@@ -360,6 +406,8 @@
     res = call_stub(rffi.cast(lltype.Signed, fnptr), [1, 2], None, None)
     assert res == ord('c')
 
+def test_call_stubs_2():
+    c0 = GcCache(False)
     ARRAY = lltype.GcArray(lltype.Signed)
     ARGS = [lltype.Float, lltype.Ptr(ARRAY)]
     RES = lltype.Float
@@ -375,3 +423,27 @@
     res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
                            [], [opaquea], [longlong.getfloatstorage(3.5)])
     assert longlong.getrealfloat(res) == 4.5
+
+def test_call_stubs_single_float():
+    from pypy.rlib.longlong2float import uint2singlefloat, singlefloat2uint
+    from pypy.rlib.rarithmetic import r_singlefloat, intmask
+    #
+    c0 = GcCache(False)
+    ARGS = [lltype.SingleFloat, lltype.SingleFloat, lltype.SingleFloat]
+    RES = lltype.SingleFloat
+
+    def f(a, b, c):
+        a = float(a)
+        b = float(b)
+        c = float(c)
+        x = a - (b / c)
+        return r_singlefloat(x)
+
+    fnptr = llhelper(lltype.Ptr(lltype.FuncType(ARGS, RES)), f)
+    descr2 = get_call_descr(c0, ARGS, RES)
+    a = intmask(singlefloat2uint(r_singlefloat(-10.0)))
+    b = intmask(singlefloat2uint(r_singlefloat(3.0)))
+    c = intmask(singlefloat2uint(r_singlefloat(2.0)))
+    res = descr2.call_stub(rffi.cast(lltype.Signed, fnptr),
+                           [a, b, c], [], [])
+    assert float(uint2singlefloat(rffi.r_uint(res))) == -11.5
diff --git a/pypy/jit/backend/llsupport/test/test_ffisupport.py b/pypy/jit/backend/llsupport/test/test_ffisupport.py
--- a/pypy/jit/backend/llsupport/test/test_ffisupport.py
+++ b/pypy/jit/backend/llsupport/test/test_ffisupport.py
@@ -1,24 +1,48 @@
 from pypy.rlib.libffi import types
-from pypy.jit.backend.llsupport.ffisupport import get_call_descr_dynamic, \
-    VoidCallDescr, DynamicIntCallDescr
-    
+from pypy.jit.backend.llsupport.ffisupport import *
+
+
+class FakeCPU:
+    def __init__(self, supports_floats=False, supports_longlong=False,
+                 supports_singlefloats=False):
+        self.supports_floats = supports_floats
+        self.supports_longlong = supports_longlong
+        self.supports_singlefloats = supports_singlefloats
+
+
 def test_call_descr_dynamic():
+    args = [types.sint, types.pointer]
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.sint)
+    assert isinstance(descr, DynamicIntCallDescr)
+    assert descr.arg_classes == 'ii'
 
     args = [types.sint, types.double, types.pointer]
-    descr = get_call_descr_dynamic(args, types.void)
+    descr = get_call_descr_dynamic(FakeCPU(), args, types.void)
+    assert descr is None    # missing floats
+    descr = get_call_descr_dynamic(FakeCPU(supports_floats=True),
+                                   args, types.void)
     assert isinstance(descr, VoidCallDescr)
     assert descr.arg_classes == 'ifi'
 
-    descr = get_call_descr_dynamic([], types.sint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.sint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == True
 
-    descr = get_call_descr_dynamic([], types.uint8)
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.uint8)
     assert isinstance(descr, DynamicIntCallDescr)
     assert descr.get_result_size(False) == 1
     assert descr.is_result_signed() == False
 
-    descr = get_call_descr_dynamic([], types.float)
-    assert descr is None # single floats are not supported so far
-    
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.slonglong)
+    assert descr is None   # missing longlongs
+    descr = get_call_descr_dynamic(FakeCPU(supports_longlong=True),
+                                   [], types.slonglong)
+    assert isinstance(descr, LongLongCallDescr)
+
+    descr = get_call_descr_dynamic(FakeCPU(), [], types.float)
+    assert descr is None   # missing singlefloats
+    descr = get_call_descr_dynamic(FakeCPU(supports_singlefloats=True),
+                                   [], types.float)
+    SingleFloatCallDescr = getCallDescrClass(rffi.FLOAT)
+    assert isinstance(descr, SingleFloatCallDescr)
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -8,6 +8,7 @@
     # ^^^ This is only useful on 32-bit platforms.  If True,
     # longlongs are supported by the JIT, but stored as doubles.
     # Boxes and Consts are BoxFloats and ConstFloats.
+    supports_singlefloats = False
 
     done_with_this_frame_void_v = -1
     done_with_this_frame_int_v = -1
diff --git a/pypy/jit/backend/test/calling_convention_test.py b/pypy/jit/backend/test/calling_convention_test.py
--- a/pypy/jit/backend/test/calling_convention_test.py
+++ b/pypy/jit/backend/test/calling_convention_test.py
@@ -290,3 +290,58 @@
                 assert abs(x - expected_result) < 0.0001
             finally:
                 del self.cpu.done_with_this_frame_float_v
+
+    def test_call_with_singlefloats(self):
+        cpu = self.cpu
+        if not cpu.supports_floats or not cpu.supports_singlefloats:
+            py.test.skip('requires floats and singlefloats')
+
+        import random
+        from pypy.rlib.libffi import types
+        from pypy.rlib.rarithmetic import r_singlefloat
+
+        def func(*args):
+            res = 0.0
+            for i, x in enumerate(args):
+                res += (i + 1.1) * float(x)
+            return res
+
+        F = lltype.Float
+        S = lltype.SingleFloat
+        I = lltype.Signed
+        floats = [random.random() - 0.5 for i in range(8)]
+        singlefloats = [r_singlefloat(random.random() - 0.5) for i in range(8)]
+        ints = [random.randrange(-99, 99) for i in range(8)]
+        for repeat in range(100):
+            args = []
+            argvalues = []
+            argslist = []
+            local_floats = list(floats)
+            local_singlefloats = list(singlefloats)
+            local_ints = list(ints)
+            for i in range(8):
+                case = random.randrange(0, 3)
+                if case == 0:
+                    args.append(F)
+                    arg = local_floats.pop()
+                    argslist.append(boxfloat(arg))
+                elif case == 1:
+                    args.append(S)
+                    arg = local_singlefloats.pop()
+                    argslist.append(BoxInt(longlong.singlefloat2int(arg)))
+                else:
+                    args.append(I)
+                    arg = local_ints.pop()
+                    argslist.append(BoxInt(arg))
+                argvalues.append(arg)
+            FUNC = self.FuncType(args, F)
+            FPTR = self.Ptr(FUNC)
+            func_ptr = llhelper(FPTR, func)
+            calldescr = cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+            funcbox = self.get_funcbox(cpu, func_ptr)
+
+            res = self.execute_operation(rop.CALL,
+                                         [funcbox] + argslist,
+                                         'float', descr=calldescr)
+            expected = func(*argvalues)
+            assert abs(res.getfloat() - expected) < 0.0001
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -2734,6 +2734,65 @@
                                      'float', descr=calldescr)
         assert res.getfloatstorage() == expected
 
+    def test_singlefloat_result_of_call_direct(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("singlefloat test")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        x = self.cpu.bh_call_i(self.get_funcbox(self.cpu, f).value,
+                               calldescr, [ivalue], None, None)
+        assert x == iexpected
+
+    def test_singlefloat_result_of_call_compiled(self):
+        if not self.cpu.supports_singlefloats:
+            py.test.skip("test of singlefloat result")
+        from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.rlib.rarithmetic import r_singlefloat
+        eci = ExternalCompilationInfo(
+            separate_module_sources=["""
+            float fn_test_result_of_call(float x)
+            {
+                return x / 2.0f;
+            }
+            """],
+            export_symbols=['fn_test_result_of_call'])
+        f = rffi.llexternal('fn_test_result_of_call', [lltype.SingleFloat],
+                            lltype.SingleFloat,
+                            compilation_info=eci, _nowrapper=True)
+        value = r_singlefloat(-42.5)
+        expected = r_singlefloat(-21.25)
+        assert f(value) == expected
+        #
+        FUNC = self.FuncType([lltype.SingleFloat], lltype.SingleFloat)
+        FPTR = self.Ptr(FUNC)
+        calldescr = self.cpu.calldescrof(FUNC, FUNC.ARGS, FUNC.RESULT)
+        funcbox = self.get_funcbox(self.cpu, f)
+        ivalue = longlong.singlefloat2int(value)
+        iexpected = longlong.singlefloat2int(expected)
+        res = self.execute_operation(rop.CALL, [funcbox, BoxInt(ivalue)],
+                                     'int', descr=calldescr)
+        assert res.value == iexpected
+
     def test_free_loop_and_bridges(self):
         from pypy.jit.backend.llsupport.llmodel import AbstractLLCPU
         if not isinstance(self.cpu, AbstractLLCPU):
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -56,7 +56,9 @@
         self.exc = exc
         self.is_guard_not_invalidated = is_guard_not_invalidated
 
-DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed))
+DEBUG_COUNTER = lltype.Struct('DEBUG_COUNTER', ('i', lltype.Signed),
+                              ('bridge', lltype.Signed), # 0 or 1
+                              ('number', lltype.Signed))
 
 class Assembler386(object):
     _regalloc = None
@@ -155,9 +157,12 @@
     def finish_once(self):
         if self._debug:
             debug_start('jit-backend-counts')
-            for i in range(len(self.loop_run_counters)):
-                struct = self.loop_run_counters[i]
-                debug_print(str(i) + ':' + str(struct.i))
+            for struct in self.loop_run_counters:
+                if struct.bridge:
+                    prefix = 'bridge '
+                else:
+                    prefix = 'loop '
+                debug_print(prefix + str(struct.number) + ':' + str(struct.i))
             debug_stop('jit-backend-counts')
 
     def _build_float_constants(self):
@@ -404,7 +409,7 @@
         self.setup(looptoken)
         self.currently_compiling_loop = looptoken
         if log:
-            self._register_counter()
+            self._register_counter(False, looptoken.number)
             operations = self._inject_debugging_code(looptoken, operations)
 
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
@@ -473,7 +478,7 @@
 
         self.setup(original_loop_token)
         if log:
-            self._register_counter()
+            self._register_counter(True, descr_number)
             operations = self._inject_debugging_code(faildescr, operations)
 
         arglocs = self.rebuild_faillocs_from_descr(failure_recovery)
@@ -570,7 +575,7 @@
         return self.mc.materialize(self.cpu.asmmemmgr, allblocks,
                                    self.cpu.gc_ll_descr.gcrootmap)
 
-    def _register_counter(self):
+    def _register_counter(self, bridge, number):
         if self._debug:
             # YYY very minor leak -- we need the counters to stay alive
             # forever, just because we want to report them at the end
@@ -578,6 +583,8 @@
             struct = lltype.malloc(DEBUG_COUNTER, flavor='raw',
                                    track_allocation=False)
             struct.i = 0
+            struct.bridge = int(bridge)
+            struct.number = number
             self.loop_run_counters.append(struct)
 
     def _find_failure_recovery_bytecode(self, faildescr):
@@ -1068,9 +1075,10 @@
                     self.implement_guard(guard_token, checkfalsecond)
         return genop_cmp_guard_float
 
-    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax):
+    def _emit_call(self, force_index, x, arglocs, start=0, tmp=eax,
+                   argtypes=None):
         if IS_X86_64:
-            return self._emit_call_64(force_index, x, arglocs, start)
+            return self._emit_call_64(force_index, x, arglocs, start, argtypes)
 
         p = 0
         n = len(arglocs)
@@ -1098,12 +1106,13 @@
         self.mc.CALL(x)
         self.mark_gc_roots(force_index)
 
-    def _emit_call_64(self, force_index, x, arglocs, start):
+    def _emit_call_64(self, force_index, x, arglocs, start, argtypes):
         src_locs = []
         dst_locs = []
         xmm_src_locs = []
         xmm_dst_locs = []
         pass_on_stack = []
+        singlefloats = None
 
         # In reverse order for use with pop()
         unused_gpr = [r9, r8, ecx, edx, esi, edi]
@@ -1123,6 +1132,11 @@
                     xmm_dst_locs.append(unused_xmm.pop())
                 else:
                     pass_on_stack.append(loc)
+            elif (argtypes is not None and argtypes[i-start] == 'S' and
+                  len(unused_xmm) > 0):
+                # Singlefloat argument
+                if singlefloats is None: singlefloats = []
+                singlefloats.append((loc, unused_xmm.pop()))
             else:
                 if len(unused_gpr) > 0:
                     src_locs.append(loc)
@@ -1150,9 +1164,15 @@
                 else:
                     self.mc.MOV_sr(i*WORD, loc.value)
 
-        # Handle register arguments
+        # Handle register arguments: first remap the xmm arguments
+        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs,
+                           X86_64_XMM_SCRATCH_REG)
+        # Load the singlefloat arguments from main regs or stack to xmm regs
+        if singlefloats is not None:
+            for src, dst in singlefloats:
+                self.mc.MOVD(dst, src)
+        # Finally remap the arguments in the main regs
         remap_frame_layout(self, src_locs, dst_locs, X86_64_SCRATCH_REG)
-        remap_frame_layout(self, xmm_src_locs, xmm_dst_locs, X86_64_XMM_SCRATCH_REG)
 
         self._regalloc.reserve_param(len(pass_on_stack))
         self.mc.CALL(x)
@@ -1267,6 +1287,20 @@
     def genop_cast_int_to_float(self, op, arglocs, resloc):
         self.mc.CVTSI2SD(resloc, arglocs[0])
 
+    def genop_cast_float_to_singlefloat(self, op, arglocs, resloc):
+        loc0, loctmp = arglocs
+        self.mc.CVTSD2SS(loctmp, loc0)
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loctmp, RegLoc)
+        self.mc.MOVD_rx(resloc.value, loctmp.value)
+
+    def genop_cast_singlefloat_to_float(self, op, arglocs, resloc):
+        loc0, = arglocs
+        assert isinstance(resloc, RegLoc)
+        assert isinstance(loc0, RegLoc)
+        self.mc.MOVD_xr(resloc.value, loc0.value)
+        self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
+
     def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
         guard_opnum = guard_op.getopnum()
         self.mc.CMP(arglocs[0], imm0)
@@ -2025,7 +2059,8 @@
         else:
             tmp = eax
 
-        self._emit_call(force_index, x, arglocs, 3, tmp=tmp)
+        self._emit_call(force_index, x, arglocs, 3, tmp=tmp,
+                        argtypes=op.getdescr().get_arg_types())
 
         if IS_X86_32 and isinstance(resloc, StackLoc) and resloc.width == 8:
             # a float or a long long return
@@ -2037,7 +2072,19 @@
                 #     and this way is simpler also because the result loc
                 #     can just be always a stack location
             else:
-                self.mc.FSTP_b(resloc.value)   # float return
+                self.mc.FSTPL_b(resloc.value)   # float return
+        elif op.getdescr().get_return_type() == 'S':
+            # singlefloat return
+            assert resloc is eax
+            if IS_X86_32:
+                # must convert ST(0) to a 32-bit singlefloat and load it into EAX
+                # mess mess mess
+                self.mc.SUB_ri(esp.value, 4)
+                self.mc.FSTPS_s(0)
+                self.mc.POP_r(eax.value)
+            elif IS_X86_64:
+                # must copy from the lower 32 bits of XMM0 into eax
+                self.mc.MOVD_rx(eax.value, xmm0.value)
         elif size == WORD:
             assert resloc is eax or resloc is xmm0    # a full word
         elif size == 0:
@@ -2195,7 +2242,7 @@
         self._emit_call(fail_index, imm(asm_helper_adr), [eax, arglocs[1]], 0,
                         tmp=ecx)
         if IS_X86_32 and isinstance(result_loc, StackLoc) and result_loc.type == FLOAT:
-            self.mc.FSTP_b(result_loc.value)
+            self.mc.FSTPL_b(result_loc.value)
         #else: result_loc is already either eax or None, checked below
         self.mc.JMP_l8(0) # jump to done, patched later
         jmp_location = self.mc.get_relative_pos()
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -705,6 +705,17 @@
         self.Perform(op, [loc0], loc1)
         self.rm.possibly_free_var(op.getarg(0))
 
+    def consider_cast_float_to_singlefloat(self, op):
+        loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
+        loc1 = self.rm.force_allocate_reg(op.result)
+        self.xrm.possibly_free_var(op.getarg(0))
+        tmpxvar = TempBox()
+        loctmp = self.xrm.force_allocate_reg(tmpxvar)   # may be equal to loc0
+        self.xrm.possibly_free_var(tmpxvar)
+        self.Perform(op, [loc0, loctmp], loc1)
+
+    consider_cast_singlefloat_to_float = consider_cast_int_to_float
+
     def _consider_llong_binop_xx(self, op):
         # must force both arguments into xmm registers, because we don't
         # know if they will be suitably aligned.  Exception: if the second
diff --git a/pypy/jit/backend/x86/regloc.py b/pypy/jit/backend/x86/regloc.py
--- a/pypy/jit/backend/x86/regloc.py
+++ b/pypy/jit/backend/x86/regloc.py
@@ -521,6 +521,8 @@
     UCOMISD = _binaryop('UCOMISD')
     CVTSI2SD = _binaryop('CVTSI2SD')
     CVTTSD2SI = _binaryop('CVTTSD2SI')
+    CVTSD2SS = _binaryop('CVTSD2SS')
+    CVTSS2SD = _binaryop('CVTSS2SD')
     
     SQRTSD = _binaryop('SQRTSD')
 
@@ -534,6 +536,8 @@
     PXOR  = _binaryop('PXOR')
     PCMPEQD = _binaryop('PCMPEQD')
 
+    MOVD = _binaryop('MOVD')
+
     CALL = _relative_unaryop('CALL')
     JMP = _relative_unaryop('JMP')
 
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -19,6 +19,7 @@
 class AbstractX86CPU(AbstractLLCPU):
     debug = True
     supports_floats = True
+    supports_singlefloats = True
 
     BOOTSTRAP_TP = lltype.FuncType([], lltype.Signed)
     dont_keepalive_stuff = False # for tests
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -573,7 +573,8 @@
     BTS_jr = insn(rex_w, '\x0F\xAB', register(2,8), abs_, immediate(1))
 
     # x87 instructions
-    FSTP_b = insn('\xDD', orbyte(3<<3), stack_bp(1))
+    FSTPL_b = insn('\xDD', orbyte(3<<3), stack_bp(1)) # rffi.DOUBLE ('as' wants L??)
+    FSTPS_s = insn('\xD9', orbyte(3<<3), stack_sp(1)) # lltype.SingleFloat
 
     # ------------------------------ Random mess -----------------------
     RDTSC = insn('\x0F\x31')
@@ -590,8 +591,18 @@
     CVTTSD2SI_rx = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), register(2), '\xC0')
     CVTTSD2SI_rb = xmminsn('\xF2', rex_w, '\x0F\x2C', register(1, 8), stack_bp(2))
 
-    MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
-    MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xx = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSD2SS_xb = xmminsn('\xF2', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+    CVTSS2SD_xx = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), register(2), '\xC0')
+    CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A',
+                          register(1, 8), stack_bp(2))
+
+    MOVD_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
+    MOVD_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    MOVD_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
 
diff --git a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
--- a/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
+++ b/pypy/jit/backend/x86/test/test_rx86_32_auto_encoding.py
@@ -36,6 +36,14 @@
 def hexdump(s):
     return ' '.join(["%02X" % ord(c) for c in s])
 
+def reduce_to_32bit(s):
+    if s[:2] != '%r':
+        return s
+    if s[2:].isdigit():
+        return s + 'd'
+    else:
+        return '%e' + s[2:]
+
 # ____________________________________________________________
 
 COUNT1 = 15
@@ -180,12 +188,14 @@
     ##        for m, extra in args:
     ##            if m in (i386.MODRM, i386.MODRM8) or all:
     ##                suffix = suffixes[sizes[m]] + suffix
-            if argmodes and not self.is_xmm_insn:
+            if (argmodes and not self.is_xmm_insn
+                         and not instrname.startswith('FSTP')):
                 suffix = suffixes[self.WORD]
             # Special case: On 64-bit CPUs, rx86 assumes 64-bit integer
             # operands when converting to/from floating point, so we need to
             # indicate that with a suffix
-            if (self.WORD == 8) and instrname.startswith('CVT'):
+            if (self.WORD == 8) and (instrname.startswith('CVT') and
+                                     'SI' in instrname):
                 suffix = suffixes[self.WORD]
 
             if instr_suffix is not None:
@@ -218,10 +228,10 @@
                 and ops[1].startswith('%r')):
                 # movq $xxx, %rax => movl $xxx, %eax
                 suffix = 'l'
-                if ops[1][2:].isdigit():
-                    ops[1] += 'd'
-                else:
-                    ops[1] = '%e' + ops[1][2:]
+                ops[1] = reduce_to_32bit(ops[1])
+            if instrname.lower() == 'movd':
+                ops[0] = reduce_to_32bit(ops[0])
+                ops[1] = reduce_to_32bit(ops[1])
             #
             op = '\t%s%s %s%s' % (instrname.lower(), suffix,
                                   ', '.join(ops), following)
diff --git a/pypy/jit/codewriter/assembler.py b/pypy/jit/codewriter/assembler.py
--- a/pypy/jit/codewriter/assembler.py
+++ b/pypy/jit/codewriter/assembler.py
@@ -76,6 +76,8 @@
                 TYPE = llmemory.Address
             if TYPE == llmemory.Address:
                 value = heaptracker.adr2int(value)
+            if TYPE is lltype.SingleFloat:
+                value = longlong.singlefloat2int(value)
             if not isinstance(value, (llmemory.AddressAsInt,
                                       ComputedIntSymbolic)):
                 value = lltype.cast_primitive(lltype.Signed, value)
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -9,7 +9,7 @@
 from pypy.objspace.flow.model import SpaceOperation, Variable, Constant, c_last_exception
 from pypy.rlib import objectmodel
 from pypy.rlib.jit import _we_are_jitted
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rclass, rffi
 from pypy.rpython.rclass import IR_QUASIIMMUTABLE, IR_QUASIIMMUTABLE_ARRAY
 from pypy.translator.simplify import get_funcobj
 from pypy.translator.unsimplify import varoftype
@@ -785,7 +785,6 @@
             op2.result = op.result
             return op2
         elif toll:
-            from pypy.rpython.lltypesystem import rffi
             size, unsigned = rffi.size_and_sign(op.args[0].concretetype)
             if unsigned:
                 INTERMEDIATE = lltype.Unsigned
@@ -807,20 +806,27 @@
             return self.force_cast_without_longlong(op.args[0], op.result)
 
     def force_cast_without_longlong(self, v_arg, v_result):
-        from pypy.rpython.lltypesystem.rffi import size_and_sign, sizeof, FLOAT
-        #
-        if (v_result.concretetype in (FLOAT, lltype.Float) or
-            v_arg.concretetype in (FLOAT, lltype.Float)):
-            assert (v_result.concretetype == lltype.Float and
-                    v_arg.concretetype == lltype.Float), "xxx unsupported cast"
+        if v_result.concretetype == v_arg.concretetype:
             return
-        #
-        size2, unsigned2 = size_and_sign(v_result.concretetype)
-        assert size2 <= sizeof(lltype.Signed)
-        if size2 == sizeof(lltype.Signed):
+        if v_arg.concretetype == rffi.FLOAT:
+            assert v_result.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_singlefloat_to_float', [v_arg],
+                                  v_result)
+        if v_result.concretetype == rffi.FLOAT:
+            assert v_arg.concretetype == lltype.Float, "cast %s -> %s" % (
+                v_arg.concretetype, v_result.concretetype)
+            return SpaceOperation('cast_float_to_singlefloat', [v_arg],
+                                  v_result)
+        return self.force_cast_without_singlefloat(v_arg, v_result)
+
+    def force_cast_without_singlefloat(self, v_arg, v_result):
+        size2, unsigned2 = rffi.size_and_sign(v_result.concretetype)
+        assert size2 <= rffi.sizeof(lltype.Signed)
+        if size2 == rffi.sizeof(lltype.Signed):
             return     # the target type is LONG or ULONG
-        size1, unsigned1 = size_and_sign(v_arg.concretetype)
-        assert size1 <= sizeof(lltype.Signed)
+        size1, unsigned1 = rffi.size_and_sign(v_arg.concretetype)
+        assert size1 <= rffi.sizeof(lltype.Signed)
         #
         def bounds(size, unsigned):
             if unsigned:
@@ -849,7 +855,6 @@
         return result
 
     def rewrite_op_direct_ptradd(self, op):
-        from pypy.rpython.lltypesystem import rffi
         # xxx otherwise, not implemented:
         assert op.args[0].concretetype == rffi.CCHARP
         #
diff --git a/pypy/jit/codewriter/longlong.py b/pypy/jit/codewriter/longlong.py
--- a/pypy/jit/codewriter/longlong.py
+++ b/pypy/jit/codewriter/longlong.py
@@ -7,7 +7,8 @@
 """
 
 import sys
-from pypy.rpython.lltypesystem import lltype
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib import rarithmetic, longlong2float
 
 
 if sys.maxint > 2147483647:
@@ -31,8 +32,6 @@
     # ---------- 32-bit platform ----------
     # the type FloatStorage is r_longlong, and conversion is needed
 
-    from pypy.rlib import rarithmetic, longlong2float
-
     is_64_bit = False
     supports_longlong = True
     r_float_storage = rarithmetic.r_longlong
@@ -41,9 +40,19 @@
     getfloatstorage = longlong2float.float2longlong
     getrealfloat    = longlong2float.longlong2float
     gethash         = lambda xll: rarithmetic.intmask(xll - (xll >> 32))
-    is_longlong     = lambda TYPE: (TYPE == lltype.SignedLongLong or
-                                    TYPE == lltype.UnsignedLongLong)
+    is_longlong     = lambda TYPE: (TYPE is lltype.SignedLongLong or
+                                    TYPE is lltype.UnsignedLongLong)
 
     # -------------------------------------
 
 ZEROF = getfloatstorage(0.0)
+
+# ____________________________________________________________
+
+def int2singlefloat(x):
+    x = rffi.r_uint(x)
+    return longlong2float.uint2singlefloat(x)
+
+def singlefloat2int(x):
+    x = longlong2float.singlefloat2uint(x)
+    return rffi.cast(lltype.Signed, x)
diff --git a/pypy/jit/codewriter/policy.py b/pypy/jit/codewriter/policy.py
--- a/pypy/jit/codewriter/policy.py
+++ b/pypy/jit/codewriter/policy.py
@@ -12,6 +12,7 @@
         self.unsafe_loopy_graphs = set()
         self.supports_floats = False
         self.supports_longlong = False
+        self.supports_singlefloats = False
 
     def set_supports_floats(self, flag):
         self.supports_floats = flag
@@ -19,6 +20,9 @@
     def set_supports_longlong(self, flag):
         self.supports_longlong = flag
 
+    def set_supports_singlefloats(self, flag):
+        self.supports_singlefloats = flag
+
     def dump_unsafe_loops(self):
         f = udir.join("unsafe-loops.txt").open('w')
         strs = [str(graph) for graph in self.unsafe_loopy_graphs]
@@ -58,8 +62,9 @@
                     func, '_jit_unroll_safe_', False)
 
         unsupported = contains_unsupported_variable_type(graph,
-                                                         self.supports_floats,
-                                                         self.supports_longlong)
+                            self.supports_floats,
+                            self.supports_longlong,
+                            self.supports_singlefloats)
         res = see_function and not unsupported
         if res and contains_loop:
             self.unsafe_loopy_graphs.add(graph)
@@ -80,17 +85,24 @@
         return res
 
 def contains_unsupported_variable_type(graph, supports_floats,
-                                       supports_longlong):
+                                              supports_longlong,
+                                              supports_singlefloats):
     getkind = history.getkind
     try:
         for block in graph.iterblocks():
             for v in block.inputargs:
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
             for op in block.operations:
                 for v in op.args:
-                    getkind(v.concretetype, supports_floats, supports_longlong)
+                    getkind(v.concretetype, supports_floats,
+                                            supports_longlong,
+                                            supports_singlefloats)
                 v = op.result
-                getkind(v.concretetype, supports_floats, supports_longlong)
+                getkind(v.concretetype, supports_floats,
+                                        supports_longlong,
+                                        supports_singlefloats)
     except NotImplementedError, e:
         log.WARNING('%s, ignoring graph' % (e,))
         log.WARNING('  %s' % (graph,))
diff --git a/pypy/jit/codewriter/test/test_longlong.py b/pypy/jit/codewriter/test/test_longlong.py
--- a/pypy/jit/codewriter/test/test_longlong.py
+++ b/pypy/jit/codewriter/test/test_longlong.py
@@ -230,3 +230,18 @@
             assert list(op1.args[3]) == []
             assert list(op1.args[4]) == vlist
             assert op1.result == v_result
+
+
+##def test_singlefloat_constants():
+##    v_x = varoftype(TYPE)
+##    vlist = [v_x, const(rffi.cast(TYPE, 7))]
+##    v_result = varoftype(TYPE)
+##    op = SpaceOperation('llong_add', vlist, v_result)
+##    tr = Transformer(FakeCPU(), FakeBuiltinCallControl())
+##    op1 = tr.rewrite_operation(op)
+##    #
+##    assert op1.opname == 'residual_call_irf_f'
+##    assert list(op1.args[2]) == []
+##    assert list(op1.args[3]) == []
+##    assert list(op1.args[4]) == vlist
+##    assert op1.result == v_result
diff --git a/pypy/jit/codewriter/test/test_policy.py b/pypy/jit/codewriter/test/test_policy.py
--- a/pypy/jit/codewriter/test/test_policy.py
+++ b/pypy/jit/codewriter/test/test_policy.py
@@ -12,24 +12,30 @@
     graph = support.getgraph(f, [5])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert not contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                assert not contains_unsupported_variable_type(graph, sf,
+                                                              sll, ssf)
     #
     graph = support.getgraph(f, [5.5])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res is not sf
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res is not sf
     #
     graph = support.getgraph(f, [r_singlefloat(5.5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            assert contains_unsupported_variable_type(graph, sf, sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (not ssf)
     #
     graph = support.getgraph(f, [r_longlong(5)])
     for sf in [False, True]:
         for sll in [False, True]:
-            res = contains_unsupported_variable_type(graph, sf, sll)
-            assert res == (sys.maxint == 2147483647 and not sll)
+            for ssf in [False, True]:
+                res = contains_unsupported_variable_type(graph, sf, sll, ssf)
+                assert res == (sys.maxint == 2147483647 and not sll)
 
 
 def test_regular_function():
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -623,6 +623,19 @@
         x = float(a)
         return longlong.getfloatstorage(x)
 
+    @arguments("f", returns="i")
+    def bhimpl_cast_float_to_singlefloat(a):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        a = longlong.getrealfloat(a)
+        a = r_singlefloat(a)
+        return longlong.singlefloat2int(a)
+
+    @arguments("i", returns="f")
+    def bhimpl_cast_singlefloat_to_float(a):
+        a = longlong.int2singlefloat(a)
+        a = float(a)
+        return longlong.getfloatstorage(a)
+
     # ----------
     # control flow operations
 
diff --git a/pypy/jit/metainterp/executor.py b/pypy/jit/metainterp/executor.py
--- a/pypy/jit/metainterp/executor.py
+++ b/pypy/jit/metainterp/executor.py
@@ -50,7 +50,7 @@
     func = argboxes[0].getint()
     # do the call using the correct function from the cpu
     rettype = descr.get_return_type()
-    if rettype == INT:
+    if rettype == INT or rettype == 'S':       # *S*ingle float
         try:
             result = cpu.bh_call_i(func, descr, args_i, args_r, args_f)
         except Exception, e:
@@ -64,7 +64,7 @@
             metainterp.execute_raised(e)
             result = NULL
         return BoxPtr(result)
-    if rettype == FLOAT or rettype == 'L':
+    if rettype == FLOAT or rettype == 'L':     # *L*ong long
         try:
             result = cpu.bh_call_f(func, descr, args_i, args_r, args_f)
         except Exception, e:
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -20,12 +20,16 @@
 
 FAILARGS_LIMIT = 1000
 
-def getkind(TYPE, supports_floats=True, supports_longlong=True):
+def getkind(TYPE, supports_floats=True,
+                  supports_longlong=True,
+                  supports_singlefloats=True):
     if TYPE is lltype.Void:
         return "void"
     elif isinstance(TYPE, lltype.Primitive):
         if TYPE is lltype.Float and supports_floats:
             return 'float'
+        if TYPE is lltype.SingleFloat and supports_singlefloats:
+            return 'int'     # singlefloats are stored in an int
         if TYPE in (lltype.Float, lltype.SingleFloat):
             raise NotImplementedError("type %s not supported" % TYPE)
         # XXX fix this for oo...
@@ -145,6 +149,7 @@
         """ Implement in call descr.
         Must return INT, REF, FLOAT, or 'v' for void.
         On 32-bit (hack) it can also be 'L' for longlongs.
+        Additionally it can be 'S' for singlefloats.
         """
         raise NotImplementedError
 
diff --git a/pypy/jit/metainterp/optimizeopt/fficall.py b/pypy/jit/metainterp/optimizeopt/fficall.py
--- a/pypy/jit/metainterp/optimizeopt/fficall.py
+++ b/pypy/jit/metainterp/optimizeopt/fficall.py
@@ -6,7 +6,6 @@
 from pypy.jit.metainterp.resoperation import rop, ResOperation
 from pypy.jit.metainterp.optimizeopt.util import make_dispatcher_method
 from pypy.jit.metainterp.optimizeopt.optimizer import Optimization
-from pypy.jit.backend.llsupport.ffisupport import UnsupportedKind
 
 
 class FuncInfo(object):
@@ -20,11 +19,8 @@
         self.funcval = funcval
         self.opargs = []
         argtypes, restype = self._get_signature(funcval)
-        try:
-            self.descr = cpu.calldescrof_dynamic(argtypes, restype)
-        except UnsupportedKind:
-            # e.g., I or U for long longs
-            self.descr = None
+        self.descr = cpu.calldescrof_dynamic(argtypes, restype)
+        # ^^^ may be None if unsupported
         self.prepare_op = prepare_op
         self.delayed_ops = []
 
@@ -184,7 +180,8 @@
     def do_call(self, op):
         funcval = self._get_funcval(op)
         funcinfo = self.funcinfo
-        if not funcinfo or funcinfo.funcval is not funcval:
+        if (not funcinfo or funcinfo.funcval is not funcval or
+            funcinfo.descr is None):
             return [op] # cannot optimize
         funcsymval = self.getvalue(op.getarg(2))
         arglist = [funcsymval.force_box()]
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -215,6 +215,7 @@
 
     for _opimpl in ['int_is_true', 'int_is_zero', 'int_neg', 'int_invert',
                     'cast_float_to_int', 'cast_int_to_float',
+                    'cast_float_to_singlefloat', 'cast_singlefloat_to_float',
                     'float_neg', 'float_abs',
                     ]:
         exec py.code.Source('''
@@ -1242,7 +1243,7 @@
         src_i = src_r = src_f = 0
         i = 1
         for kind in descr.get_arg_types():
-            if kind == history.INT:
+            if kind == history.INT or kind == 'S':        # single float
                 while True:
                     box = argboxes[src_i]
                     src_i += 1
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -408,6 +408,8 @@
     'FLOAT_ABS/1',
     'CAST_FLOAT_TO_INT/1',
     'CAST_INT_TO_FLOAT/1',
+    'CAST_FLOAT_TO_SINGLEFLOAT/1',
+    'CAST_SINGLEFLOAT_TO_FLOAT/1',
     #
     'INT_LT/2b',
     'INT_LE/2b',
diff --git a/pypy/jit/metainterp/test/test_fficall.py b/pypy/jit/metainterp/test/test_fficall.py
--- a/pypy/jit/metainterp/test/test_fficall.py
+++ b/pypy/jit/metainterp/test/test_fficall.py
@@ -3,7 +3,7 @@
 from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
 from pypy.rlib.jit import JitDriver, promote, dont_look_inside
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.libffi import ArgChain, longlong2float, float2longlong
+from pypy.rlib.libffi import ArgChain
 from pypy.rlib.libffi import IS_32_BIT
 from pypy.rlib.test.test_libffi import TestLibffiCall as _TestLibffiCall
 from pypy.rpython.lltypesystem import lltype, rffi
@@ -12,10 +12,11 @@
 from pypy.jit.metainterp.test.support import LLJitMixin
 
 class TestFfiCall(LLJitMixin, _TestLibffiCall):
+    supports_all = False     # supports_{floats,longlong,singlefloats}
 
     # ===> ../../../rlib/test/test_libffi.py
 
-    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
+    def call(self, funcspec, args, RESULT, is_struct=False, jitif=[]):
         """
         Call the function specified by funcspec in a loop, and let the jit to
         see and optimize it.
@@ -24,14 +25,7 @@
         lib, name, argtypes, restype = funcspec
         method_and_args = []
         for argval in args:
-            if type(argval) is r_singlefloat:
-                method_name = 'arg_singlefloat'
-                argval = float(argval)
-            elif IS_32_BIT and type(argval) in [r_longlong, r_ulonglong]:
-                method_name = 'arg_longlong'
-                argval = rffi.cast(rffi.LONGLONG, argval)
-                argval = longlong2float(argval)
-            elif isinstance(argval, tuple):
+            if isinstance(argval, tuple):
                 method_name, argval = argval
             else:
                 method_name = 'arg'
@@ -39,10 +33,20 @@
         method_and_args = unrolling_iterable(method_and_args)
         #
         reds = ['n', 'res', 'func']
-        if (RESULT in [rffi.FLOAT, rffi.DOUBLE] or
+        if (RESULT is rffi.DOUBLE or
             IS_32_BIT and RESULT in [rffi.LONGLONG, rffi.ULONGLONG]):
-            reds = ['n', 'func', 'res'] # floats must be *after* refs
+            reds = ['n', 'func', 'res'] # 'double' floats must be *after* refs
         driver = JitDriver(reds=reds, greens=[])
+        init_result = rffi.cast(RESULT, 0)
+        #
+        def g(func):
+            # a different function, which is marked as "dont_look_inside"
+            # in case it uses an unsupported argument
+            argchain = ArgChain()
+            # this loop is unrolled
+            for method_name, argval in method_and_args:
+                getattr(argchain, method_name)(argval)
+            return func.call(argchain, RESULT, is_struct=is_struct)
         #
         def f(n):
             func = lib.getpointer(name, argtypes, restype)
@@ -50,18 +54,44 @@
             while n < 10:
                 driver.jit_merge_point(n=n, res=res, func=func)
                 promote(func)
-                argchain = ArgChain()
-                # this loop is unrolled
-                for method_name, argval in method_and_args:
-                    getattr(argchain, method_name)(argval)
-                res = func.call(argchain, RESULT, is_struct=is_struct)
+                res = g(func)
                 n += 1
             return res
         #
-        res = self.meta_interp(f, [0], backendopt=True)
+        res = self.meta_interp(f, [0], backendopt=True,
+                               supports_floats       = self.supports_all,
+                               supports_longlong     = self.supports_all,
+                               supports_singlefloats = self.supports_all)
+        d = {'floats': self.supports_all,
+             'longlong': self.supports_all or not IS_32_BIT,
+             'singlefloats': self.supports_all,
+             'byval': False}
+        supported = all(d[check] for check in jitif)
+        if supported:
+            self.check_loops(
+                call_release_gil=1,   # a CALL_RELEASE_GIL, and no other CALLs
+                call=0,
+                call_may_force=0,
+                guard_no_exception=1,
+                guard_not_forced=1,
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
+        else:
+            self.check_loops(
+                call_release_gil=0,   # no CALL_RELEASE_GIL
+                int_add=1,
+                int_lt=1,
+                guard_true=1,
+                jump=1)
         return res
 
     def test_byval_result(self):
         _TestLibffiCall.test_byval_result(self)
     test_byval_result.__doc__ = _TestLibffiCall.test_byval_result.__doc__
     test_byval_result.dont_track_allocations = True
+
+
+class TestFfiCallSupportAll(TestFfiCall):
+    supports_all = True     # supports_{floats,longlong,singlefloats}
diff --git a/pypy/jit/metainterp/test/test_float.py b/pypy/jit/metainterp/test/test_float.py
--- a/pypy/jit/metainterp/test/test_float.py
+++ b/pypy/jit/metainterp/test/test_float.py
@@ -36,6 +36,15 @@
         res = self.interp_operations(f, [x])
         assert res == -x
 
+    def test_singlefloat(self):
+        from pypy.rlib.rarithmetic import r_singlefloat
+        def f(a):
+            a = float(r_singlefloat(a))
+            a *= 4.25
+            return float(r_singlefloat(a))
+        res = self.interp_operations(f, [-2.0])
+        assert res == -8.5
+
 
 class TestOOtype(FloatTests, OOJitMixin):
     pass
diff --git a/pypy/jit/metainterp/test/test_warmspot.py b/pypy/jit/metainterp/test/test_warmspot.py
--- a/pypy/jit/metainterp/test/test_warmspot.py
+++ b/pypy/jit/metainterp/test/test_warmspot.py
@@ -303,6 +303,7 @@
         class FakeCPU(object):
             supports_floats = False
             supports_longlong = False
+            supports_singlefloats = False
             ts = llhelper
             translate_support_code = False
             stats = "stats"
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -1,13 +1,14 @@
 from pypy.rpython.test.test_llinterp import interpret
-from pypy.rpython.lltypesystem import lltype, llmemory, rstr
+from pypy.rpython.lltypesystem import lltype, llmemory, rstr, rffi
 from pypy.rpython.ootypesystem import ootype
 from pypy.rpython.annlowlevel import llhelper
-from pypy.jit.metainterp.warmstate import wrap, unwrap
+from pypy.jit.metainterp.warmstate import wrap, unwrap, specialize_value
 from pypy.jit.metainterp.warmstate import equal_whatever, hash_whatever
 from pypy.jit.metainterp.warmstate import WarmEnterState, JitCell
 from pypy.jit.metainterp.history import BoxInt, BoxFloat, BoxPtr
 from pypy.jit.metainterp.history import ConstInt, ConstFloat, ConstPtr
 from pypy.jit.codewriter import longlong
+from pypy.rlib.rarithmetic import r_singlefloat
 
 def boxfloat(x):
     return BoxFloat(longlong.getfloatstorage(x))
@@ -40,6 +41,28 @@
     assert _is(wrap(None, 42, in_const_box=True), ConstInt(42))
     assert _is(wrap(None, 42.5, in_const_box=True), constfloat(42.5))
     assert _is(wrap(None, p, in_const_box=True), ConstPtr(po))
+    if longlong.supports_longlong:
+        import sys
+        from pypy.rlib.rarithmetic import r_longlong, r_ulonglong
+        value = r_longlong(-sys.maxint*17)
+        assert _is(wrap(None, value), BoxFloat(value))
+        assert _is(wrap(None, value, in_const_box=True), ConstFloat(value))
+        value_unsigned = r_ulonglong(-sys.maxint*17)
+        assert _is(wrap(None, value_unsigned), BoxFloat(value))
+    sfval = r_singlefloat(42.5)
+    ival = longlong.singlefloat2int(sfval)
+    assert _is(wrap(None, sfval), BoxInt(ival))
+    assert _is(wrap(None, sfval, in_const_box=True), ConstInt(ival))
+
+def test_specialize_value():
+    assert specialize_value(lltype.Char, 0x41) == '\x41'
+    if longlong.supports_longlong:
+        import sys
+        value = longlong.r_float_storage(sys.maxint*17)
+        assert specialize_value(lltype.SignedLongLong, value) == sys.maxint*17
+    sfval = r_singlefloat(42.5)
+    ival = longlong.singlefloat2int(sfval)
+    assert specialize_value(rffi.FLOAT, ival) == sfval
 
 def test_hash_equal_whatever_lltype():
     s1 = rstr.mallocstr(2)
diff --git a/pypy/jit/metainterp/warmspot.py b/pypy/jit/metainterp/warmspot.py
--- a/pypy/jit/metainterp/warmspot.py
+++ b/pypy/jit/metainterp/warmspot.py
@@ -173,6 +173,7 @@
             policy = JitPolicy()
         policy.set_supports_floats(self.cpu.supports_floats)
         policy.set_supports_longlong(self.cpu.supports_longlong)
+        policy.set_supports_singlefloats(self.cpu.supports_singlefloats)
         graphs = self.codewriter.find_all_graphs(policy)
         policy.dump_unsafe_loops()
         self.check_access_directly_sanity(graphs)
@@ -283,7 +284,9 @@
         auto_inline_graphs(self.translator, graphs, 0.01)
 
     def build_cpu(self, CPUClass, translate_support_code=False,
-                  no_stats=False, **kwds):
+                  no_stats=False, supports_floats=True,
+                  supports_longlong=True, supports_singlefloats=True,
+                  **kwds):
         assert CPUClass is not None
         self.opt = history.Options(**kwds)
         if no_stats:
@@ -295,6 +298,9 @@
             self.annhelper = MixLevelHelperAnnotator(self.translator.rtyper)
         cpu = CPUClass(self.translator.rtyper, self.stats, self.opt,
                        translate_support_code, gcdescr=self.gcdescr)
+        if not supports_floats:       cpu.supports_floats       = False
+        if not supports_longlong:     cpu.supports_longlong     = False
+        if not supports_singlefloats: cpu.supports_singlefloats = False
         self.cpu = cpu
 
     def build_meta_interp(self, ProfilerClass):
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -25,9 +25,13 @@
         if isinstance(TYPE, lltype.Ptr) and TYPE.TO._gckind == 'raw':
             # non-gc pointer
             return rffi.cast(TYPE, x)
+        elif TYPE is lltype.SingleFloat:
+            return longlong.int2singlefloat(x)
         else:
             return lltype.cast_primitive(TYPE, x)
     elif INPUT is longlong.FLOATSTORAGE:
+        if longlong.is_longlong(TYPE):
+            return rffi.cast(TYPE, x)
         assert TYPE is lltype.Float
         return longlong.getrealfloat(x)
     else:
@@ -84,8 +88,12 @@
             return history.ConstObj(value)
         else:
             return history.BoxObj(value)
-    elif isinstance(value, float):
-        value = longlong.getfloatstorage(value)
+    elif (isinstance(value, float) or
+          longlong.is_longlong(lltype.typeOf(value))):
+        if isinstance(value, float):
+            value = longlong.getfloatstorage(value)
+        else:
+            value = rffi.cast(lltype.SignedLongLong, value)
         if in_const_box:
             return history.ConstFloat(value)
         else:
@@ -93,6 +101,8 @@
     elif isinstance(value, str) or isinstance(value, unicode):
         assert len(value) == 1     # must be a character
         value = ord(value)
+    elif lltype.typeOf(value) is lltype.SingleFloat:
+        value = longlong.singlefloat2int(value)
     else:
         value = intmask(value)
     if in_const_box:
diff --git a/pypy/module/_ffi/interp_ffi.py b/pypy/module/_ffi/interp_ffi.py
--- a/pypy/module/_ffi/interp_ffi.py
+++ b/pypy/module/_ffi/interp_ffi.py
@@ -222,9 +222,9 @@
                 w_arg = space.ord(w_arg)
                 argchain.arg(space.int_w(w_arg))
             elif w_argtype.is_double():
-                argchain.arg(space.float_w(w_arg))
+                self.arg_float(space, argchain, w_arg)
             elif w_argtype.is_singlefloat():
-                argchain.arg_singlefloat(space.float_w(w_arg))
+                self.arg_singlefloat(space, argchain, w_arg)
             elif w_argtype.is_struct():
                 # arg_raw directly takes value to put inside ll_args
                 w_arg = space.interp_w(W_StructureInstance, w_arg)
@@ -267,15 +267,26 @@
         else:
             return w_arg
 
-    @jit.dont_look_inside
+    def arg_float(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether floats are supported
+        argchain.arg(space.float_w(w_arg))
+
     def arg_longlong(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether longlongs are supported
         bigarg = space.bigint_w(w_arg)
         ullval = bigarg.ulonglongmask()
         llval = rffi.cast(rffi.LONGLONG, ullval)
-        # this is a hack: we store the 64 bits of the long long into the
-        # 64 bits of a float (i.e., a C double)
-        floatval = libffi.longlong2float(llval)
-        argchain.arg_longlong(floatval)
+        argchain.arg(llval)
+
+    def arg_singlefloat(self, space, argchain, w_arg):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether singlefloats are supported
+        from pypy.rlib.rarithmetic import r_singlefloat
+        fval = space.float_w(w_arg)
+        sfval = r_singlefloat(fval)
+        argchain.arg(sfval)
 
     def call(self, space, args_w):
         self = jit.promote(self)
@@ -296,8 +307,7 @@
             # note that we must check for longlong first, because either
             # is_signed or is_unsigned returns true anyway
             assert libffi.IS_32_BIT
-            reskind = libffi.types.getkind(self.func.restype) # XXX: remove the kind
-            return self._call_longlong(space, argchain, reskind)
+            return self._call_longlong(space, argchain)
         elif w_restype.is_signed():
             return self._call_int(space, argchain)
         elif w_restype.is_unsigned() or w_restype.is_pointer():
@@ -309,12 +319,9 @@
             intres = self.func.call(argchain, rffi.WCHAR_T)
             return space.wrap(unichr(intres))
         elif w_restype.is_double():
-            floatres = self.func.call(argchain, rffi.DOUBLE)
-            return space.wrap(floatres)
+            return self._call_float(space, argchain)
         elif w_restype.is_singlefloat():
-            # the result is a float, but widened to be inside a double
-            floatres = self.func.call(argchain, rffi.FLOAT)
-            return space.wrap(floatres)
+            return self._call_singlefloat(space, argchain)
         elif w_restype.is_struct():
             w_datashape = w_restype.w_datashape
             assert isinstance(w_datashape, W_Structure)
@@ -383,19 +390,32 @@
                                  space.wrap('Unsupported restype'))
         return space.wrap(intres)
 
-    @jit.dont_look_inside
-    def _call_longlong(self, space, argchain, reskind):
-        # this is a hack: we store the 64 bits of the long long into the 64
-        # bits of a float (i.e., a C double)
-        floatres = self.func.call(argchain, rffi.LONGLONG)
-        llres = libffi.float2longlong(floatres)
-        if reskind == 'I':
+    def _call_float(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether floats are supported
+        floatres = self.func.call(argchain, rffi.DOUBLE)
+        return space.wrap(floatres)
+
+    def _call_longlong(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether longlongs are supported
+        restype = self.func.restype
+        call = self.func.call
+        if restype is libffi.types.slonglong:
+            llres = call(argchain, rffi.LONGLONG)
             return space.wrap(llres)
-        elif reskind == 'U':
-            ullres = rffi.cast(rffi.ULONGLONG, llres)
+        elif restype is libffi.types.ulonglong:
+            ullres = call(argchain, rffi.ULONGLONG)
             return space.wrap(ullres)
         else:
-            assert False
+            raise OperationError(space.w_ValueError,
+                                 space.wrap('Unsupported longlong restype'))
+
+    def _call_singlefloat(self, space, argchain):
+        # a separate function, which can be seen by the jit or not,
+        # depending on whether singlefloats are supported
+        sfres = self.func.call(argchain, rffi.FLOAT)
+        return space.wrap(float(sfres))
 
     def getaddr(self, space):
         """
diff --git a/pypy/module/_multibytecodec/__init__.py b/pypy/module/_multibytecodec/__init__.py
--- a/pypy/module/_multibytecodec/__init__.py
+++ b/pypy/module/_multibytecodec/__init__.py
@@ -7,13 +7,14 @@
         # for compatibility this name is obscured, and should be called
         # via the _codecs_*.py modules written in lib_pypy.
         '__getcodec': 'interp_multibytecodec.getcodec',
+
+        'MultibyteIncrementalDecoder':
+            'interp_incremental.MultibyteIncrementalDecoder',
+        'MultibyteIncrementalEncoder':
+            'interp_incremental.MultibyteIncrementalEncoder',
     }
 
     appleveldefs = {
-        'MultibyteIncrementalEncoder':
-            'app_multibytecodec.MultibyteIncrementalEncoder',
-        'MultibyteIncrementalDecoder':
-            'app_multibytecodec.MultibyteIncrementalDecoder',
         'MultibyteStreamReader':
             'app_multibytecodec.MultibyteStreamReader',
         'MultibyteStreamWriter':
diff --git a/pypy/module/_multibytecodec/app_multibytecodec.py b/pypy/module/_multibytecodec/app_multibytecodec.py
--- a/pypy/module/_multibytecodec/app_multibytecodec.py
+++ b/pypy/module/_multibytecodec/app_multibytecodec.py
@@ -1,34 +1,47 @@
 # NOT_RPYTHON
 #
-# These classes are not supported so far.
-#
-# My theory is that they are not widely used on CPython either, because
-# I found two bugs just by looking at their .c source: they always call
-# encreset() after a piece of data, even though I think it's wrong ---
-# it should be called only once at the end; and mbiencoder_reset() calls
-# decreset() instead of encreset().
-#
+# The interface here may be a little bit on the lightweight side.
 
-class MultibyteIncrementalEncoder(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteIncrementalEncoder not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+from _multibytecodec import MultibyteIncrementalDecoder
+from _multibytecodec import MultibyteIncrementalEncoder
 
-class MultibyteIncrementalDecoder(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteIncrementalDecoder not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
 
-class MultibyteStreamReader(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteStreamReader not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+class MultibyteStreamReader(MultibyteIncrementalDecoder):
+    def __new__(cls, stream, errors=None):
+        self = MultibyteIncrementalDecoder.__new__(cls, errors)
+        self.stream = stream
+        return self
 
-class MultibyteStreamWriter(object):
-    def __init__(self, *args, **kwds):
-        raise LookupError(
-            "MultibyteStreamWriter not implemented; "
-            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+    def __read(self, read, size):
+        if size is None or size < 0:
+            return MultibyteIncrementalDecoder.decode(self, read(), True)
+        while True:
+            data = read(size)
+            final = not data
+            output = MultibyteIncrementalDecoder.decode(self, data, final)
+            if output or final:
+                return output
+            size = 1   # read 1 more byte and retry
+
+    def read(self, size=None):
+        return self.__read(self.stream.read, size)
+
+    def readline(self, size=None):
+        return self.__read(self.stream.readline, size)
+
+    def readlines(self, sizehint=None):
+        return self.__read(self.stream.read, sizehint).splitlines(True)
+
+
+class MultibyteStreamWriter(MultibyteIncrementalEncoder):
+    def __new__(cls, stream, errors=None):
+        self = MultibyteIncrementalEncoder.__new__(cls, errors)
+        self.stream = stream
+        return self
+
+    def write(self, data):
+        self.stream.write(MultibyteIncrementalEncoder.encode(self, data))
+
+    def writelines(self, lines):
+        for data in lines:
+            self.write(data)
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -52,11 +52,13 @@
     includes = ['src/cjkcodecs/multibytecodec.h'],
     include_dirs = [str(srcdir)],
     export_symbols = [
+        "pypy_cjk_dec_new",
         "pypy_cjk_dec_init", "pypy_cjk_dec_free", "pypy_cjk_dec_chunk",
         "pypy_cjk_dec_outbuf", "pypy_cjk_dec_outlen",
         "pypy_cjk_dec_inbuf_remaining", "pypy_cjk_dec_inbuf_consumed",
         "pypy_cjk_dec_replace_on_error",
 
+        "pypy_cjk_enc_new",
         "pypy_cjk_enc_init", "pypy_cjk_enc_free", "pypy_cjk_enc_chunk",
         "pypy_cjk_enc_reset", "pypy_cjk_enc_outbuf", "pypy_cjk_enc_outlen",
         "pypy_cjk_enc_inbuf_remaining", "pypy_cjk_enc_inbuf_consumed",
@@ -92,9 +94,11 @@
 # Decoding
 
 DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_new = llexternal('pypy_cjk_dec_new',
+                              [MULTIBYTECODEC_P], DECODEBUF_P)
 pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
-                               [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
-                               DECODEBUF_P)
+                               [DECODEBUF_P, rffi.CCHARP, rffi.SSIZE_T],
+                               rffi.SSIZE_T)
 pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
                                lltype.Void)
 pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
@@ -113,25 +117,30 @@
                                            rffi.SSIZE_T)
 
 def decode(codec, stringdata, errors="strict", errorcb=None, namecb=None):
+    decodebuf = pypy_cjk_dec_new(codec)
+    if not decodebuf:
+        raise MemoryError
+    try:
+        return decodeex(decodebuf, stringdata, errors, errorcb, namecb)
+    finally:
+        pypy_cjk_dec_free(decodebuf)
+
+def decodeex(decodebuf, stringdata, errors="strict", errorcb=None, namecb=None,
+             ignore_error=0):
     inleft = len(stringdata)
     inbuf = rffi.get_nonmovingbuffer(stringdata)
     try:
-        decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
-        if not decodebuf:
+        if pypy_cjk_dec_init(decodebuf, inbuf, inleft) < 0:
             raise MemoryError
-        try:
-            while True:
-                r = pypy_cjk_dec_chunk(decodebuf)
-                if r == 0:
-                    break
-                multibytecodec_decerror(decodebuf, r, errors,
-                                        errorcb, namecb, stringdata)
-            src = pypy_cjk_dec_outbuf(decodebuf)
-            length = pypy_cjk_dec_outlen(decodebuf)
-            return rffi.wcharpsize2unicode(src, length)
-        #
-        finally:
-            pypy_cjk_dec_free(decodebuf)
+        while True:
+            r = pypy_cjk_dec_chunk(decodebuf)
+            if r == 0 or r == ignore_error:
+                break
+            multibytecodec_decerror(decodebuf, r, errors,
+                                    errorcb, namecb, stringdata)
+        src = pypy_cjk_dec_outbuf(decodebuf)
+        length = pypy_cjk_dec_outlen(decodebuf)
+        return rffi.wcharpsize2unicode(src, length)
     #
     finally:
         rffi.free_nonmovingbuffer(stringdata, inbuf)
@@ -174,13 +183,15 @@
 # ____________________________________________________________
 # Encoding
 ENCODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_enc_s', compilation_info=eci)
+pypy_cjk_enc_new = llexternal('pypy_cjk_enc_new',
+                               [MULTIBYTECODEC_P], ENCODEBUF_P)
 pypy_cjk_enc_init = llexternal('pypy_cjk_enc_init',
-                               [MULTIBYTECODEC_P, rffi.CWCHARP, rffi.SSIZE_T],
-                               ENCODEBUF_P)
+                               [ENCODEBUF_P, rffi.CWCHARP, rffi.SSIZE_T],
+                               rffi.SSIZE_T)
 pypy_cjk_enc_free = llexternal('pypy_cjk_enc_free', [ENCODEBUF_P],
                                lltype.Void)
-pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk', [ENCODEBUF_P],
-                                rffi.SSIZE_T)
+pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk',
+                                [ENCODEBUF_P, rffi.SSIZE_T], rffi.SSIZE_T)
 pypy_cjk_enc_reset = llexternal('pypy_cjk_enc_reset', [ENCODEBUF_P],
                                 rffi.SSIZE_T)
 pypy_cjk_enc_outbuf = llexternal('pypy_cjk_enc_outbuf', [ENCODEBUF_P],
@@ -195,39 +206,52 @@
                                            [ENCODEBUF_P, rffi.CCHARP,
                                             rffi.SSIZE_T, rffi.SSIZE_T],
                                            rffi.SSIZE_T)
+pypy_cjk_enc_getcodec = llexternal('pypy_cjk_enc_getcodec',
+                                   [ENCODEBUF_P], MULTIBYTECODEC_P)
+MBENC_FLUSH = 1
+MBENC_RESET = 2
 
 def encode(codec, unicodedata, errors="strict", errorcb=None, namecb=None):
+    encodebuf = pypy_cjk_enc_new(codec)
+    if not encodebuf:
+        raise MemoryError
+    try:
+        return encodeex(encodebuf, unicodedata, errors, errorcb, namecb)
+    finally:
+        pypy_cjk_enc_free(encodebuf)
+
+def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None,
+             namecb=None, ignore_error=0):
     inleft = len(unicodedata)
     inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
     try:
-        encodebuf = pypy_cjk_enc_init(codec, inbuf, inleft)
-        if not encodebuf:
+        if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
             raise MemoryError
-        try:
-            while True:
-                r = pypy_cjk_enc_chunk(encodebuf)
-                if r == 0:
-                    break
-                multibytecodec_encerror(encodebuf, r, errors,
-                                        codec, errorcb, namecb, unicodedata)
-            while True:
-                r = pypy_cjk_enc_reset(encodebuf)
-                if r == 0:
-                    break
-                multibytecodec_encerror(encodebuf, r, errors,
-                                        codec, errorcb, namecb, unicodedata)
-            src = pypy_cjk_enc_outbuf(encodebuf)
-            length = pypy_cjk_enc_outlen(encodebuf)
-            return rffi.charpsize2str(src, length)
-        #
-        finally:
-            pypy_cjk_enc_free(encodebuf)
+        if ignore_error == 0:
+            flags = MBENC_FLUSH | MBENC_RESET
+        else:
+            flags = MBENC_RESET
+        while True:
+            r = pypy_cjk_enc_chunk(encodebuf, flags)
+            if r == 0 or r == ignore_error:
+                break
+            multibytecodec_encerror(encodebuf, r, errors,
+                                    errorcb, namecb, unicodedata)
+        while True:
+            r = pypy_cjk_enc_reset(encodebuf)
+            if r == 0:
+                break
+            multibytecodec_encerror(encodebuf, r, errors,
+                                    errorcb, namecb, unicodedata)
+        src = pypy_cjk_enc_outbuf(encodebuf)
+        length = pypy_cjk_enc_outlen(encodebuf)
+        return rffi.charpsize2str(src, length)
     #
     finally:
         rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
 
 def multibytecodec_encerror(encodebuf, e, errors,
-                            codec, errorcb, namecb, unicodedata):
+                            errorcb, namecb, unicodedata):
     if e > 0:
         reason = "illegal multibyte sequence"
         esize = e
@@ -248,6 +272,7 @@
     elif errors == "ignore":
         replace = ""
     elif errors == "replace":
+        codec = pypy_cjk_enc_getcodec(encodebuf)
         try:
             replace = encode(codec, u"?")
         except EncodeDecodeError:
diff --git a/pypy/module/_multibytecodec/interp_incremental.py b/pypy/module/_multibytecodec/interp_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/interp_incremental.py
@@ -0,0 +1,141 @@
+from pypy.rpython.lltypesystem import lltype
+from pypy.module._multibytecodec import c_codecs
+from pypy.module._multibytecodec.interp_multibytecodec import (
+    MultibyteCodec, wrap_unicodedecodeerror, wrap_runtimeerror,
+    wrap_unicodeencodeerror)
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.module._codecs.interp_codecs import CodecState
+
+
+class MultibyteIncrementalBase(Wrappable):
+
+    def __init__(self, space, errors):
+        if errors is None:
+            errors = 'strict'
+        self.space = space
+        self.errors = errors
+        w_codec = space.getattr(space.wrap(self), space.wrap("codec"))
+        codec = space.interp_w(MultibyteCodec, w_codec)
+        self.codec = codec.codec
+        self.name = codec.name
+        self._initialize()
+
+    def __del__(self):
+        self._free()
+
+    def reset_w(self):
+        self._free()
+        self._initialize()
+
+    def fget_errors(self, space):
+        return space.wrap(self.errors)
+
+    def fset_errors(self, space, w_errors):
+        self.errors = space.str_w(w_errors)
+
+
+class MultibyteIncrementalDecoder(MultibyteIncrementalBase):
+
+    def _initialize(self):
+        self.decodebuf = c_codecs.pypy_cjk_dec_new(self.codec)
+        self.pending = ""
+
+    def _free(self):
+        self.pending = None
+        if self.decodebuf:
+            c_codecs.pypy_cjk_dec_free(self.decodebuf)
+            self.decodebuf = lltype.nullptr(c_codecs.DECODEBUF_P.TO)
+
+    @unwrap_spec(object=str, final=bool)
+    def decode_w(self, object, final=False):
+        space = self.space
+        state = space.fromcache(CodecState)
+        if len(self.pending) > 0:
+            object = self.pending + object
+        try:
+            output = c_codecs.decodeex(self.decodebuf, object, self.errors,
+                                       state.decode_error_handler, self.name,
+                                       get_ignore_error(final))
+        except c_codecs.EncodeDecodeError, e:
+            raise wrap_unicodedecodeerror(space, e, object, self.name)
+        except RuntimeError:
+            raise wrap_runtimeerror(space)
+        pos = c_codecs.pypy_cjk_dec_inbuf_consumed(self.decodebuf)
+        assert 0 <= pos <= len(object)
+        self.pending = object[pos:]
+        return space.wrap(output)
+
+
+ at unwrap_spec(errors="str_or_None")
+def mbidecoder_new(space, w_subtype, errors=None):
+    r = space.allocate_instance(MultibyteIncrementalDecoder, w_subtype)
+    r.__init__(space, errors)
+    return space.wrap(r)
+
+MultibyteIncrementalDecoder.typedef = TypeDef(
+    'MultibyteIncrementalDecoder',
+    __module__ = '_multibytecodec',
+    __new__ = interp2app(mbidecoder_new),
+    decode  = interp2app(MultibyteIncrementalDecoder.decode_w),
+    reset   = interp2app(MultibyteIncrementalDecoder.reset_w),
+    errors  = GetSetProperty(MultibyteIncrementalDecoder.fget_errors,
+                             MultibyteIncrementalDecoder.fset_errors),
+    )
+
+
+class MultibyteIncrementalEncoder(MultibyteIncrementalBase):
+
+    def _initialize(self):
+        self.encodebuf = c_codecs.pypy_cjk_enc_new(self.codec)
+        self.pending = u""
+
+    def _free(self):
+        self.pending = None
+        if self.encodebuf:
+            c_codecs.pypy_cjk_enc_free(self.encodebuf)
+            self.encodebuf = lltype.nullptr(c_codecs.ENCODEBUF_P.TO)
+
+    @unwrap_spec(object=unicode, final=bool)
+    def encode_w(self, object, final=False):
+        space = self.space
+        state = space.fromcache(CodecState)
+        if len(self.pending) > 0:
+            object = self.pending + object
+        try:
+            output = c_codecs.encodeex(self.encodebuf, object, self.errors,
+                                       state.encode_error_handler, self.name,
+                                       get_ignore_error(final))
+        except c_codecs.EncodeDecodeError, e:
+            raise wrap_unicodeencodeerror(space, e, object, self.name)
+        except RuntimeError:
+            raise wrap_runtimeerror(space)
+        pos = c_codecs.pypy_cjk_enc_inbuf_consumed(self.encodebuf)
+        assert 0 <= pos <= len(object)
+        self.pending = object[pos:]
+        return space.wrap(output)
+
+
+ at unwrap_spec(errors="str_or_None")
+def mbiencoder_new(space, w_subtype, errors=None):
+    r = space.allocate_instance(MultibyteIncrementalEncoder, w_subtype)
+    r.__init__(space, errors)
+    return space.wrap(r)
+
+MultibyteIncrementalEncoder.typedef = TypeDef(
+    'MultibyteIncrementalEncoder',
+    __module__ = '_multibytecodec',
+    __new__ = interp2app(mbiencoder_new),
+    encode  = interp2app(MultibyteIncrementalEncoder.encode_w),
+    reset   = interp2app(MultibyteIncrementalEncoder.reset_w),
+    errors  = GetSetProperty(MultibyteIncrementalEncoder.fget_errors,
+                             MultibyteIncrementalEncoder.fset_errors),
+    )
+
+
+def get_ignore_error(final):
+    if final:
+        return 0    # don't ignore any error
+    else:
+        return c_codecs.MBERR_TOOFEW
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
--- a/pypy/module/_multibytecodec/interp_multibytecodec.py
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -22,17 +22,9 @@
             output = c_codecs.decode(self.codec, input, errors,
                                      state.decode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
-            raise OperationError(
-                space.w_UnicodeDecodeError,
-                space.newtuple([
-                    space.wrap(self.name),
-                    space.wrap(input),
-                    space.wrap(e.start),
-                    space.wrap(e.end),
-                    space.wrap(e.reason)]))
+            raise wrap_unicodedecodeerror(space, e, input, self.name)
         except RuntimeError:
-            raise OperationError(space.w_RuntimeError,
-                                 space.wrap("internal codec error"))
+            raise wrap_runtimeerror(space)
         return space.newtuple([space.wrap(output),
                                space.wrap(len(input))])
 
@@ -46,17 +38,9 @@
             output = c_codecs.encode(self.codec, input, errors,
                                      state.encode_error_handler, self.name)
         except c_codecs.EncodeDecodeError, e:
-            raise OperationError(
-                space.w_UnicodeEncodeError,
-                space.newtuple([
-                    space.wrap(self.name),
-                    space.wrap(input),
-                    space.wrap(e.start),
-                    space.wrap(e.end),
-                    space.wrap(e.reason)]))
+            raise wrap_unicodeencodeerror(space, e, input, self.name)
         except RuntimeError:
-            raise OperationError(space.w_RuntimeError,
-                                 space.wrap("internal codec error"))
+            raise wrap_runtimeerror(space)
         return space.newtuple([space.wrap(output),
                                space.wrap(len(input))])
 
@@ -78,3 +62,28 @@
         raise OperationError(space.w_LookupError,
                              space.wrap("no such codec is supported."))
     return space.wrap(MultibyteCodec(name, codec))
+
+
+def wrap_unicodedecodeerror(space, e, input, name):
+    return OperationError(
+        space.w_UnicodeDecodeError,
+        space.newtuple([
+            space.wrap(name),
+            space.wrap(input),
+            space.wrap(e.start),
+            space.wrap(e.end),
+            space.wrap(e.reason)]))
+
+def wrap_unicodeencodeerror(space, e, input, name):
+    raise OperationError(
+        space.w_UnicodeEncodeError,
+        space.newtuple([
+            space.wrap(name),
+            space.wrap(input),
+            space.wrap(e.start),
+            space.wrap(e.end),
+            space.wrap(e.reason)]))
+
+def wrap_runtimeerror(space):
+    raise OperationError(space.w_RuntimeError,
+                         space.wrap("internal codec error"))
diff --git a/pypy/module/_multibytecodec/test/test_app_incremental.py b/pypy/module/_multibytecodec/test/test_app_incremental.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_incremental.py
@@ -0,0 +1,163 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestClasses:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+        cls.w_IncrementalHzDecoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalDecoder
+
+            class IncrementalHzDecoder(MultibyteIncrementalDecoder):
+                codec = _codecs_cn.getcodec('hz')
+
+            return IncrementalHzDecoder
+        """)
+        cls.w_IncrementalHzEncoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalEncoder
+
+            class IncrementalHzEncoder(MultibyteIncrementalEncoder):
+                codec = _codecs_cn.getcodec('hz')
+
+            return IncrementalHzEncoder
+        """)
+        cls.w_IncrementalBig5hkscsEncoder = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteIncrementalEncoder
+
+            class IncrementalBig5hkscsEncoder(MultibyteIncrementalEncoder):
+                codec = _codecs_cn.getcodec('big5hkscs')
+
+            return IncrementalBig5hkscsEncoder
+        """)
+
+    def test_decode_hz(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("~{abcd~}")
+        assert r == u'\u5f95\u6c85'
+        r = d.decode("~{efgh~}")
+        assert r == u'\u5f50\u73b7'
+        for c, output in zip("!~{abcd~}xyz~{efgh",
+              [u'!',  # !
+               u'',   # ~
+               u'',   # {
+               u'',   # a
+               u'\u5f95',   # b
+               u'',   # c
+               u'\u6c85',   # d
+               u'',   # ~
+               u'',   # }
+               u'x',  # x
+               u'y',  # y
+               u'z',  # z
+               u'',   # ~
+               u'',   # {
+               u'',   # e
+               u'\u5f50',   # f
+               u'',   # g
+               u'\u73b7',   # h
+               ]):
+            r = d.decode(c)
+            assert r == output
+
+    def test_decode_hz_final(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("~{", True)
+        assert r == u''
+        raises(UnicodeDecodeError, d.decode, "~", True)
+        raises(UnicodeDecodeError, d.decode, "~{a", True)
+
+    def test_decode_hz_reset(self):
+        d = self.IncrementalHzDecoder()
+        r = d.decode("ab")
+        assert r == u'ab'
+        r = d.decode("~{")
+        assert r == u''
+        r = d.decode("ab")
+        assert r == u'\u5f95'
+        r = d.decode("ab")
+        assert r == u'\u5f95'
+        d.reset()
+        r = d.decode("ab")
+        assert r == u'ab'
+
+    def test_decode_hz_error(self):
+        d = self.IncrementalHzDecoder()
+        raises(UnicodeDecodeError, d.decode, "~{abc", True)
+        d = self.IncrementalHzDecoder("ignore")
+        r = d.decode("~{abc", True)
+        assert r == u'\u5f95'
+        d = self.IncrementalHzDecoder()
+        d.errors = "replace"
+        r = d.decode("~{abc", True)
+        assert r == u'\u5f95\ufffd'
+
+    def test_decode_hz_buffer_grow(self):
+        d = self.IncrementalHzDecoder()
+        for i in range(13):
+            r = d.decode("a" * (2**i))
+            assert r == u"a" * (2**i)
+
+    def test_encode_hz(self):
+        e = self.IncrementalHzEncoder()
+        r = e.encode("abcd")
+        assert r == 'abcd'
+        r = e.encode(u"\u5f95\u6c85")
+        assert r == '~{abcd~}'
+        r = e.encode(u"\u5f50")
+        assert r == '~{ef~}'
+        r = e.encode(u"\u73b7")
+        assert r == '~{gh~}'
+
+    def test_encode_hz_final(self):
+        e = self.IncrementalHzEncoder()
+        r = e.encode(u"xyz\u5f95\u6c85", True)
+        assert r == 'xyz~{abcd~}'
+        # This is a bit hard to test, because the only way I can see that
+        # encoders can return MBERR_TOOFEW is with surrogates, which only
+        # occur with 2-byte unicode characters...  We will just have to
+        # trust that the logic works, because it is exactly the same one
+        # as in the decode case :-/
+
+    def test_encode_hz_reset(self):
+        # Same issue as with test_encode_hz_final
+        e = self.IncrementalHzEncoder()
+        r = e.encode(u"xyz\u5f95\u6c85", True)
+        assert r == 'xyz~{abcd~}'
+        e.reset()
+        r = e.encode(u"xyz\u5f95\u6c85")
+        assert r == 'xyz~{abcd~}'
+
+    def test_encode_hz_error(self):
+        e = self.IncrementalHzEncoder()
+        raises(UnicodeEncodeError, e.encode, u"\u4321", True)
+        e = self.IncrementalHzEncoder("ignore")
+        r = e.encode(u"xy\u4321z", True)
+        assert r == 'xyz'
+        e = self.IncrementalHzEncoder()
+        e.errors = "replace"
+        r = e.encode(u"xy\u4321z", True)
+        assert r == 'xy?z'
+
+    def test_encode_hz_buffer_grow(self):
+        e = self.IncrementalHzEncoder()
+        for i in range(13):
+            r = e.encode(u"a" * (2**i))
+            assert r == "a" * (2**i)
+
+    def test_encode_big5hkscs(self):
+        #e = self.IncrementalBig5hkscsEncoder()
+        #r = e.encode(u'\xca', True)
+        #assert r == '\x88f'
+        #r = e.encode(u'\xca', True)
+        #assert r == '\x88f'
+        #raises(UnicodeEncodeError, e.encode, u'\u0304', True)
+        #
+        e = self.IncrementalBig5hkscsEncoder()
+        r = e.encode(u'\xca')
+        assert r == ''
+        r = e.encode(u'\xca')
+        assert r == '\x88f'
+        r = e.encode(u'\u0304')
+        assert r == '\x88b'
diff --git a/pypy/module/_multibytecodec/test/test_app_stream.py b/pypy/module/_multibytecodec/test/test_app_stream.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_stream.py
@@ -0,0 +1,93 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestStreams:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+        cls.w_HzStreamReader = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteStreamReader
+
+            class HzStreamReader(MultibyteStreamReader):
+                codec = _codecs_cn.getcodec('hz')
+
+            return HzStreamReader
+        """)
+        cls.w_HzStreamWriter = cls.space.appexec([], """():
+            import _codecs_cn
+            from _multibytecodec import MultibyteStreamWriter
+
+            class HzStreamWriter(MultibyteStreamWriter):
+                codec = _codecs_cn.getcodec('hz')
+
+            return HzStreamWriter
+        """)
+        cls.w_ShiftJisx0213StreamWriter = cls.space.appexec([], """():
+            import _codecs_jp
+            from _multibytecodec import MultibyteStreamWriter
+
+            class ShiftJisx0213StreamWriter(MultibyteStreamWriter):
+                codec = _codecs_jp.getcodec('shift_jisx0213')
+
+            return ShiftJisx0213StreamWriter
+        """)
+
+    def test_reader(self):
+        class FakeFile:
+            def __init__(self, data):
+                self.data = data
+                self.pos = 0
+            def read(self, size):
+                res = self.data[self.pos : self.pos + size]
+                self.pos += size
+                return res
+        #
+        r = self.HzStreamReader(FakeFile("!~{abcd~}xyz~{efgh"))
+        for expected in u'!\u5f95\u6c85xyz\u5f50\u73b7':
+            c = r.read(1)
+            assert c == expected
+        c = r.read(1)
+        assert c == ''
+
+    def test_reader_replace(self):
+        class FakeFile:
+            def __init__(self, data):
+                self.data = data
+            def read(self):
+                return self.data
+        #
+        r = self.HzStreamReader(FakeFile("!~{a"), "replace")
+        c = r.read()
+        assert c == u'!\ufffd'
+        #
+        r = self.HzStreamReader(FakeFile("!~{a"))
+        r.errors = "replace"
+        assert r.errors == "replace"
+        c = r.read()
+        assert c == u'!\ufffd'
+
+    def test_writer(self):
+        class FakeFile:
+            def __init__(self):
+                self.output = []
+            def write(self, data):
+                self.output.append(data)
+        #
+        w = self.HzStreamWriter(FakeFile())
+        for input in u'!\u5f95\u6c85xyz\u5f50\u73b7':
+            w.write(input)
+        assert w.stream.output == ['!', '~{ab~}', '~{cd~}', 'x', 'y', 'z',
+                                   '~{ef~}', '~{gh~}']
+
+    def test_no_flush(self):
+        class FakeFile:
+            def __init__(self):
+                self.output = []
+            def write(self, data):
+                self.output.append(data)
+        #
+        w = self.ShiftJisx0213StreamWriter(FakeFile())
+        w.write(u'\u30ce')
+        w.write(u'\u304b')
+        w.write(u'\u309a')
+        assert w.stream.output == ['\x83m', '', '\x82\xf5']
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
--- a/pypy/module/_multibytecodec/test/test_c_codecs.py
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -2,6 +2,7 @@
 from pypy.module._multibytecodec.c_codecs import getcodec, codecs
 from pypy.module._multibytecodec.c_codecs import decode, encode
 from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
+from pypy.module._multibytecodec import c_codecs
 
 
 def test_codecs_existence():
@@ -22,6 +23,52 @@
     c = getcodec("hz")
     u = decode(c, "~{abc}")
     assert u == u'\u5f95\u6cef'
+    u = decode(c, "~{")
+    assert u == u''
+
+def test_decodeex_hz():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    u = c_codecs.decodeex(decodebuf, "~{abcd~}")
+    assert u == u'\u5f95\u6c85'
+    u = c_codecs.decodeex(decodebuf, "~{efgh~}")
+    assert u == u'\u5f50\u73b7'
+    u = c_codecs.decodeex(decodebuf, "!~{abcd~}xyz~{efgh")
+    assert u == u'!\u5f95\u6c85xyz\u5f50\u73b7'
+    c_codecs.pypy_cjk_dec_free(decodebuf)
+
+def test_decodeex_hz_incomplete():
+    c = getcodec("hz")
+    decodebuf = c_codecs.pypy_cjk_dec_new(c)
+    buf = ''
+    for c, output in zip("!~{abcd~}xyz~{efgh",
+          [u'!',  # !
+           u'',   # ~
+           u'',   # {
+           u'',   # a
+           u'\u5f95',   # b
+           u'',   # c
+           u'\u6c85',   # d
+           u'',   # ~
+           u'',   # }
+           u'x',  # x
+           u'y',  # y
+           u'z',  # z
+           u'',   # ~
+           u'',   # {
+           u'',   # e
+           u'\u5f50',   # f
+           u'',   # g
+           u'\u73b7',   # h
+           ]):
+        buf += c
+        u = c_codecs.decodeex(decodebuf, buf,
+                              ignore_error = c_codecs.MBERR_TOOFEW)
+        assert u == output
+        incompletepos = c_codecs.pypy_cjk_dec_inbuf_consumed(decodebuf)
+        buf = buf[incompletepos:]
+    assert buf == ''
+    c_codecs.pypy_cjk_dec_free(decodebuf)
 
 def test_decode_hz_error():
     # error
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -268,7 +268,7 @@
             self.ll_buffer = rffi.cast(rffi.VOIDP, address)
         else:
             self.ll_buffer = lltype.malloc(rffi.VOIDP.TO, size, flavor='raw',
-                                           zero=True)
+                                           zero=True, add_memory_pressure=True)
             if tracker.DO_TRACING:
                 ll_buf = rffi.cast(lltype.Signed, self.ll_buffer)
                 tracker.trace_allocation(ll_buf, self)
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -227,7 +227,7 @@
                     self.allocated = size + some
                     new_buffer = lltype.malloc(mytype.arraytype,
                                                self.allocated, flavor='raw',
-                                               track_allocation=False)
+                                               add_memory_pressure=True)
                     for i in range(min(size, self.len)):
                         new_buffer[i] = self.buffer[i]
                 else:
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -477,7 +477,8 @@
         BaseArray.__init__(self)
         self.size = size
         self.storage = lltype.malloc(TP, size, zero=True,
-                                     flavor='raw', track_allocation=False)
+                                     flavor='raw', track_allocation=False,
+                                     add_memory_pressure=True)
         # XXX find out why test_zjit explodes with trackign of allocations
 
     def get_concrete(self):
@@ -506,7 +507,7 @@
             self._sliceloop2(start, stop, step, arr, self)
 
     def __del__(self):
-        lltype.free(self.storage, flavor='raw')
+        lltype.free(self.storage, flavor='raw', track_allocation=False)
 
 def new_numarray(space, w_size_or_iterable):
     l = space.listview(w_size_or_iterable)
diff --git a/pypy/module/pypyjit/test_pypy_c/test_array.py b/pypy/module/pypyjit/test_pypy_c/test_array.py
--- a/pypy/module/pypyjit/test_pypy_c/test_array.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_array.py
@@ -1,4 +1,4 @@
-import py
+import py, sys
 from pypy.module.pypyjit.test_pypy_c.test_00_model import BaseTestPyPyC
 
 class TestArray(BaseTestPyPyC):
@@ -88,6 +88,73 @@
             jump(p0, p1, p2, p3, p4, p5, p6, i28, i15, p9, i10, i11, descr=<Loop0>)
         """)
 
+    def test_array_of_doubles(self):
+        def main():
+            from array import array
+            img = array('d', [21.5]*1000)
+            i = 0
+            while i < 1000:
+                img[i] += 20.5
+                assert img[i] == 42.0
+                i += 1
+            return 123
+        #
+        log = self.run(main, [])
+        assert log.result == 123
+        loop, = log.loops_by_filename(self.filepath)
+        assert loop.match("""
+            i10 = int_lt(i6, 1000)
+            guard_true(i10, descr=...)
+            i11 = int_lt(i6, i7)
+            guard_true(i11, descr=...)
+            f13 = getarrayitem_raw(i8, i6, descr=<FloatArrayNoLengthDescr>)
+            f15 = float_add(f13, 20.500000)
+            setarrayitem_raw(i8, i6, f15, descr=<FloatArrayNoLengthDescr>)
+            f16 = getarrayitem_raw(i8, i6, descr=<FloatArrayNoLengthDescr>)
+            i18 = float_eq(f16, 42.000000)
+            guard_true(i18, descr=...)
+            i20 = int_add(i6, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """)
+
+    def test_array_of_floats(self):
+        def main():
+            from array import array
+            img = array('f', [21.5]*1000)
+            i = 0
+            while i < 1000:
+                img[i] += 20.5
+                assert img[i] == 42.0
+                i += 1
+            return 321
+        #
+        log = self.run(main, [])
+        assert log.result == 321
+        loop, = log.loops_by_filename(self.filepath)
+        if sys.maxint == 2147483647:
+            arraydescr = 'UnsignedArrayNoLengthDescr'
+        else:
+            arraydescr = 'UINTArrayNoLengthDescr'
+        assert loop.match("""
+            i10 = int_lt(i6, 1000)
+            guard_true(i10, descr=...)
+            i11 = int_lt(i6, i7)
+            guard_true(i11, descr=...)
+            i13 = getarrayitem_raw(i8, i6, descr=<%s>)
+            f14 = cast_singlefloat_to_float(i13)
+            f16 = float_add(f14, 20.500000)
+            i17 = cast_float_to_singlefloat(f16)
+            setarrayitem_raw(i8, i6,i17, descr=<%s>)
+            i18 = getarrayitem_raw(i8, i6, descr=<%s>)
+            f19 = cast_singlefloat_to_float(i18)
+            i21 = float_eq(f19, 42.000000)
+            guard_true(i21, descr=...)
+            i23 = int_add(i6, 1)
+            --TICK--
+            jump(..., descr=<Loop0>)
+        """ % (arraydescr, arraydescr, arraydescr))
+
 
     def test_zeropadded(self):
         def main():
diff --git a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
--- a/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
+++ b/pypy/module/test_lib_pypy/ctypes_tests/test_structures.py
@@ -424,6 +424,15 @@
             sys.settrace(oldtrace)
             events = None
 
+    def test_large_fields(self):
+        # make sure that large fields are not "confused" with bitfields
+        # (because the bitfields use the higher bits of the "size" attribute)
+        Array = c_long * 8192
+        class X(Structure):
+            _fields_ = [('items', Array)]
+        obj = X()
+        assert isinstance(obj.items, Array)
+
 class TestPointerMember(BaseCTypesTestChecker):
 
     def test_1(self):
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -2,14 +2,13 @@
 
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rlib.objectmodel import specialize, enforceargs, we_are_translated
-from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat
+from pypy.rlib.rarithmetic import intmask, r_uint, r_singlefloat, r_longlong
 from pypy.rlib import jit
 from pypy.rlib import clibffi
 from pypy.rlib.clibffi import get_libc_name, FUNCFLAG_CDECL, AbstractFuncPtr, \
     push_arg_as_ffiptr, c_ffi_call, FFI_TYPE_STRUCT
 from pypy.rlib.rdynload import dlopen, dlclose, dlsym, dlsym_byordinal
 from pypy.rlib.rdynload import DLLHANDLE
-from pypy.rlib.longlong2float import longlong2float, float2longlong
 
 class types(object):
     """
@@ -122,9 +121,10 @@
         elif TYPE is rffi.DOUBLE:
             cls = FloatArg
         elif TYPE is rffi.LONGLONG or TYPE is rffi.ULONGLONG:
-            raise TypeError, 'r_(u)longlong not supported by arg(), use arg_(u)longlong()'
+            cls = LongLongArg
+            val = rffi.cast(rffi.LONGLONG, val)
         elif TYPE is rffi.FLOAT:
-            raise TypeError, 'r_singlefloat not supported by arg(), use arg_singlefloat()'
+            cls = SingleFloatArg
         else:
             raise TypeError, 'Unsupported argument type: %s' % TYPE
         self._append(cls(val))
@@ -133,25 +133,6 @@
     def arg_raw(self, val):
         self._append(RawArg(val))
 
-    def arg_longlong(self, val):
-        """
-        Note: this is a hack. So far, the JIT does not support long longs, so
-        you must pass it as if it were a python Float (rffi.DOUBLE).  You can
-        use the convenience functions longlong2float and float2longlong to do
-        the conversions.  Note that if you use long longs, the call won't
-        be jitted at all.
-        """
-        assert IS_32_BIT      # use a normal integer on 64-bit platforms
-        self._append(LongLongArg(val))
-
-    def arg_singlefloat(self, val):
-        """
-        Note: you must pass a python Float (rffi.DOUBLE), not a r_singlefloat
-        (else the jit complains).  Note that if you use single floats, the
-        call won't be jitted at all.
-        """
-        self._append(SingleFloatArg(val))
-
     def _append(self, arg):
         if self.first is None:
             self.first = self.last = arg
@@ -196,25 +177,25 @@
         func._push_raw(self.ptrval, ll_args, i)
 
 class SingleFloatArg(AbstractArg):
-    """ An argument representing a C float (but holding a C double)
+    """ An argument representing a C float
     """
 
-    def __init__(self, floatval):
-        self.floatval = floatval
+    def __init__(self, singlefloatval):
+        self.singlefloatval = singlefloatval
 
     def push(self, func, ll_args, i):
-        func._push_single_float(self.floatval, ll_args, i)
+        func._push_singlefloat(self.singlefloatval, ll_args, i)
 
 
 class LongLongArg(AbstractArg):
-    """ An argument representing a C long long (but holding a C double)
+    """ An argument representing a C long long
     """
 
-    def __init__(self, floatval):
-        self.floatval = floatval
+    def __init__(self, longlongval):
+        self.longlongval = longlongval
 
     def push(self, func, ll_args, i):
-        func._push_longlong(self.floatval, ll_args, i)
+        func._push_longlong(self.longlongval, ll_args, i)
 
 
 # ======================================================================
@@ -274,15 +255,10 @@
         elif RESULT is rffi.DOUBLE:
             return self._do_call_float(self.funcsym, ll_args)
         elif RESULT is rffi.FLOAT:
-            # XXX: even if RESULT is FLOAT, we still return a DOUBLE, else the
-            # jit complains. Note that the jit is disabled in this case
-            return self._do_call_single_float(self.funcsym, ll_args)
+            return self._do_call_singlefloat(self.funcsym, ll_args)
         elif RESULT is rffi.LONGLONG or RESULT is rffi.ULONGLONG:
-            # XXX: even if RESULT is LONGLONG, we still return a DOUBLE, else the
-            # jit complains. Note that the jit is disabled in this case
-            # (it's not a typo, we really return a DOUBLE)
             assert IS_32_BIT
-            return self._do_call_longlong(self.funcsym, ll_args)
+            res = self._do_call_longlong(self.funcsym, ll_args)
         elif RESULT is lltype.Void:
             return self._do_call_void(self.funcsym, ll_args)
         else:
@@ -320,16 +296,15 @@
     def _push_float(self, value, ll_args, i):
         self._push_arg(value, ll_args, i)
 
-    @jit.dont_look_inside
-    def _push_single_float(self, value, ll_args, i):
-        self._push_arg(r_singlefloat(value), ll_args, i)
+    @jit.oopspec('libffi_push_singlefloat(self, value, ll_args, i)')
+    @enforceargs(None, r_singlefloat, None, int) # fix the annotation for tests
+    def _push_singlefloat(self, value, ll_args, i):
+        self._push_arg(value, ll_args, i)
 
-    @jit.dont_look_inside
-    def _push_longlong(self, floatval, ll_args, i):
-        """
-        Takes a longlong represented as a python Float. It's a hack for the
-        jit, else we could not see the whole libffi module at all"""  
-        self._push_arg(float2longlong(floatval), ll_args, i)
+    @jit.oopspec('libffi_push_longlong(self, value, ll_args, i)')
+    @enforceargs(None, r_longlong, None, int) # fix the annotation for tests
+    def _push_longlong(self, value, ll_args, i):
+        self._push_arg(value, ll_args, i)
 
     @jit.oopspec('libffi_call_int(self, funcsym, ll_args)')
     def _do_call_int(self, funcsym, ll_args):
@@ -339,20 +314,18 @@
     def _do_call_float(self, funcsym, ll_args):
         return self._do_call(funcsym, ll_args, rffi.DOUBLE)
 
-    @jit.dont_look_inside
-    def _do_call_single_float(self, funcsym, ll_args):
-        single_res = self._do_call(funcsym, ll_args, rffi.FLOAT)
-        return float(single_res)
+    @jit.oopspec('libffi_call_singlefloat(self, funcsym, ll_args)')
+    def _do_call_singlefloat(self, funcsym, ll_args):
+        return self._do_call(funcsym, ll_args, rffi.FLOAT)
 
     @jit.dont_look_inside
     def _do_call_raw(self, funcsym, ll_args):
         # same as _do_call_int, but marked as jit.dont_look_inside
         return self._do_call(funcsym, ll_args, rffi.LONG)
 
-    @jit.dont_look_inside
+    @jit.oopspec('libffi_call_longlong(self, funcsym, ll_args)')
     def _do_call_longlong(self, funcsym, ll_args):
-        llres = self._do_call(funcsym, ll_args, rffi.LONGLONG)
-        return longlong2float(llres)
+        return self._do_call(funcsym, ll_args, rffi.LONGLONG)
 
     @jit.oopspec('libffi_call_void(self, funcsym, ll_args)')
     def _do_call_void(self, funcsym, ll_args):
diff --git a/pypy/rlib/longlong2float.py b/pypy/rlib/longlong2float.py
--- a/pypy/rlib/longlong2float.py
+++ b/pypy/rlib/longlong2float.py
@@ -11,6 +11,8 @@
 # -------- implement longlong2float and float2longlong --------
 DOUBLE_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.DOUBLE))
 LONGLONG_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.LONGLONG))
+UINT_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.UINT))
+FLOAT_ARRAY_PTR = lltype.Ptr(lltype.Array(rffi.FLOAT))
 
 # these definitions are used only in tests, when not translated
 def longlong2float_emulator(llval):
@@ -29,6 +31,22 @@
     lltype.free(d_array, flavor='raw')
     return llval
 
+def uint2singlefloat_emulator(ival):
+    f_array = lltype.malloc(FLOAT_ARRAY_PTR.TO, 1, flavor='raw')
+    i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
+    i_array[0] = ival
+    singlefloatval = f_array[0]
+    lltype.free(f_array, flavor='raw')
+    return singlefloatval
+
+def singlefloat2uint_emulator(singlefloatval):
+    f_array = lltype.malloc(FLOAT_ARRAY_PTR.TO, 1, flavor='raw')
+    i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
+    f_array[0] = singlefloatval
+    ival = i_array[0]
+    lltype.free(f_array, flavor='raw')
+    return ival
+
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 eci = ExternalCompilationInfo(includes=['string.h', 'assert.h'],
                               post_include_bits=["""
@@ -44,6 +62,18 @@
     memcpy(&ll, &x, 8);
     return ll;
 }
+static float pypy__uint2singlefloat(unsigned int x) {
+    float ff;
+    assert(sizeof(float) == 4 && sizeof(unsigned int) == 4);
+    memcpy(&ff, &x, 4);
+    return ff;
+}
+static unsigned int pypy__singlefloat2uint(float x) {
+    unsigned int ii;
+    assert(sizeof(float) == 4 && sizeof(unsigned int) == 4);
+    memcpy(&ii, &x, 4);
+    return ii;
+}
 """])
 
 longlong2float = rffi.llexternal(
@@ -55,3 +85,13 @@
     "pypy__float2longlong", [rffi.DOUBLE], rffi.LONGLONG,
     _callable=float2longlong_emulator, compilation_info=eci,
     _nowrapper=True, elidable_function=True)
+
+uint2singlefloat = rffi.llexternal(
+    "pypy__uint2singlefloat", [rffi.UINT], rffi.FLOAT,
+    _callable=uint2singlefloat_emulator, compilation_info=eci,
+    _nowrapper=True, elidable_function=True)
+
+singlefloat2uint = rffi.llexternal(
+    "pypy__singlefloat2uint", [rffi.FLOAT], rffi.UINT,
+    _callable=singlefloat2uint_emulator, compilation_info=eci,
+    _nowrapper=True, elidable_function=True)
diff --git a/pypy/rlib/rarithmetic.py b/pypy/rlib/rarithmetic.py
--- a/pypy/rlib/rarithmetic.py
+++ b/pypy/rlib/rarithmetic.py
@@ -71,9 +71,8 @@
     return int(n)
 
 def longlongmask(n):
-    if isinstance(n, int):
-        n = long(n)
-    assert isinstance(n, long)
+    assert isinstance(n, (int, long))
+    n = long(n)
     n &= LONGLONG_MASK
     if n >= LONGLONG_TEST:
         n -= 2*LONGLONG_TEST
diff --git a/pypy/rlib/test/test_libffi.py b/pypy/rlib/test/test_libffi.py
--- a/pypy/rlib/test/test_libffi.py
+++ b/pypy/rlib/test/test_libffi.py
@@ -5,7 +5,7 @@
 from pypy.rlib.rarithmetic import r_singlefloat, r_longlong, r_ulonglong
 from pypy.rlib.test.test_clibffi import BaseFfiTest, get_libm_name, make_struct_ffitype_e
 from pypy.rlib.libffi import CDLL, Func, get_libc_name, ArgChain, types
-from pypy.rlib.libffi import longlong2float, float2longlong, IS_32_BIT
+from pypy.rlib.libffi import IS_32_BIT
 
 class TestLibffiMisc(BaseFfiTest):
 
@@ -52,19 +52,6 @@
         del lib
         assert not ALLOCATED
 
-    def test_longlong_as_float(self):
-        from pypy.translator.c.test.test_genc import compile
-        maxint64 = r_longlong(9223372036854775807)
-        def fn(x):
-            d = longlong2float(x)
-            ll = float2longlong(d)
-            return ll
-        assert fn(maxint64) == maxint64
-        #
-        fn2 = compile(fn, [r_longlong])
-        res = fn2(maxint64)
-        assert res == maxint64
-
 class TestLibffiCall(BaseFfiTest):
     """
     Test various kind of calls through libffi.
@@ -111,7 +98,7 @@
     def get_libfoo(self):
         return self.CDLL(self.libfoo_name)
 
-    def call(self, funcspec, args, RESULT, init_result=0, is_struct=False):
+    def call(self, funcspec, args, RESULT, is_struct=False, jitif=[]):
         """
         Call the specified function after constructing and ArgChain with the
         arguments in ``args``.
@@ -128,14 +115,7 @@
         func = lib.getpointer(name, argtypes, restype)
         chain = ArgChain()
         for arg in args:
-            if isinstance(arg, r_singlefloat):
-                chain.arg_singlefloat(float(arg))
-            elif IS_32_BIT and isinstance(arg, r_longlong):
-                chain.arg_longlong(longlong2float(arg))
-            elif IS_32_BIT and isinstance(arg, r_ulonglong):
-                arg = rffi.cast(rffi.LONGLONG, arg)
-                chain.arg_longlong(longlong2float(arg))
-            elif isinstance(arg, tuple):
+            if isinstance(arg, tuple):
                 methname, arg = arg
                 meth = getattr(chain, methname)
                 meth(arg)
@@ -143,13 +123,19 @@
                 chain.arg(arg)
         return func.call(chain, RESULT, is_struct=is_struct)
 
-    def check_loops(self, *args, **kwds):
+    # ------------------------------------------------------------------------
+
+    def test_very_simple(self):
         """
-        Ignored here, but does something in the JIT tests
+            int diff_xy(int x, long y)
+            {
+                return x - y;
+            }
         """
-        pass
-
-    # ------------------------------------------------------------------------
+        libfoo = self.get_libfoo() 
+        func = (libfoo, 'diff_xy', [types.sint, types.slong], types.sint)
+        res = self.call(func, [50, 8], lltype.Signed)
+        assert res == 42
 
     def test_simple(self):
         """
@@ -160,23 +146,14 @@
         """
         libfoo = self.get_libfoo() 
         func = (libfoo, 'sum_xy', [types.sint, types.double], types.sint)
-        res = self.call(func, [38, 4.2], rffi.LONG)
+        res = self.call(func, [38, 4.2], lltype.Signed, jitif=["floats"])
         assert res == 42
-        self.check_loops({
-                'call_release_gil': 1,
-                'guard_no_exception': 1,
-                'guard_not_forced': 1,
-                'int_add': 1,
-                'int_lt': 1,
-                'guard_true': 1,
-                'jump': 1})
 
     def test_float_result(self):
         libm = self.get_libm()
         func = (libm, 'pow', [types.double, types.double], types.double)
-        res = self.call(func, [2.0, 3.0], rffi.DOUBLE, init_result=0.0)
+        res = self.call(func, [2.0, 3.0], rffi.DOUBLE, jitif=["floats"])
         assert res == 8.0
-        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_result(self):
         """
@@ -189,7 +166,6 @@
         func = (libfoo, 'cast_to_uchar_and_ovf', [types.sint], types.uchar)
         res = self.call(func, [0], rffi.UCHAR)
         assert res == 200
-        self.check_loops(call_release_gil=1, guard_no_exception=1, guard_not_forced=1)
 
     def test_cast_argument(self):
         """
@@ -271,8 +247,7 @@
         libfoo = self.get_libfoo()
         func = (libfoo, 'get_pointer_to_b', [], types.pointer)
         LONGP = lltype.Ptr(rffi.CArray(rffi.LONG))
-        null = lltype.nullptr(LONGP.TO)
-        res = self.call(func, [], LONGP, init_result=null)
+        res = self.call(func, [], LONGP)
         assert res[0] == 20
 
     def test_void_result(self):
@@ -287,7 +262,7 @@
         #
         initval = self.call(get_dummy, [], rffi.LONG)
         #
-        res = self.call(set_dummy, [initval+1], lltype.Void, init_result=None)
+        res = self.call(set_dummy, [initval+1], lltype.Void)
         assert res is None
         #
         res = self.call(get_dummy, [], rffi.LONG)
@@ -305,9 +280,9 @@
         func = (libfoo, 'sum_xy_float', [types.float, types.float], types.float)
         x = r_singlefloat(12.34)
         y = r_singlefloat(56.78)
-        res = self.call(func, [x, y], rffi.FLOAT, init_result=0.0)
+        res = self.call(func, [x, y], rffi.FLOAT, jitif=["singlefloats"])
         expected = c_float(c_float(12.34).value + c_float(56.78).value).value
-        assert res == expected
+        assert float(res) == expected
 
     def test_slonglong_args(self):
         """
@@ -325,16 +300,10 @@
         if IS_32_BIT:
             x = r_longlong(maxint32+1)
             y = r_longlong(maxint32+2)
-            zero = longlong2float(r_longlong(0))
         else:
             x = maxint32+1
             y = maxint32+2
-            zero = 0
-        res = self.call(func, [x, y], rffi.LONGLONG, init_result=zero)
-        if IS_32_BIT:
-            # obscure, on 32bit it's really a long long, so it returns a
-            # DOUBLE because of the JIT hack
-            res = float2longlong(res)
+        res = self.call(func, [x, y], rffi.LONGLONG, jitif=["longlong"])
         expected = maxint32*2 + 3
         assert res == expected
 
@@ -354,12 +323,7 @@
                 types.ulonglong)
         x = r_ulonglong(maxint64+1)
         y = r_ulonglong(2)
-        res = self.call(func, [x, y], rffi.ULONGLONG, init_result=0)
-        if IS_32_BIT:
-            # obscure, on 32bit it's really a long long, so it returns a
-            # DOUBLE because of the JIT hack
-            res = float2longlong(res)
-            res = rffi.cast(rffi.ULONGLONG, res)
+        res = self.call(func, [x, y], rffi.ULONGLONG, jitif=["longlong"])
         expected = maxint64 + 3
         assert res == expected
 
@@ -406,7 +370,8 @@
         buf[0] = 30
         buf[1] = 12
         adr = rffi.cast(rffi.VOIDP, buf)
-        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG, init_result=0)
+        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG,
+                        jitif=["byval"])
         assert res == 42
         # check that we still have the ownership on the buffer
         assert buf[0] == 30
@@ -431,8 +396,8 @@
         make_point = (libfoo, 'make_point', [types.slong, types.slong], ffi_point)
         #
         PTR = lltype.Ptr(rffi.CArray(rffi.LONG))
-        p = self.call(make_point, [12, 34], PTR, init_result=lltype.nullptr(PTR.TO),
-                      is_struct=True)
+        p = self.call(make_point, [12, 34], PTR, is_struct=True,
+                      jitif=["byval"])
         assert p[0] == 12
         assert p[1] == 34
         lltype.free(p, flavor='raw')
diff --git a/pypy/rlib/test/test_longlong2float.py b/pypy/rlib/test/test_longlong2float.py
--- a/pypy/rlib/test/test_longlong2float.py
+++ b/pypy/rlib/test/test_longlong2float.py
@@ -1,5 +1,7 @@
 from pypy.translator.c.test.test_genc import compile
 from pypy.rlib.longlong2float import longlong2float, float2longlong
+from pypy.rlib.longlong2float import uint2singlefloat, singlefloat2uint
+from pypy.rlib.rarithmetic import r_singlefloat
 
 
 def fn(f1):
@@ -28,3 +30,23 @@
     for x in enum_floats():
         res = fn2(x)
         assert repr(res) == repr(x)
+
+# ____________________________________________________________
+
+def fnsingle(f1):
+    sf1 = r_singlefloat(f1)
+    ii = singlefloat2uint(sf1)
+    sf2 = uint2singlefloat(ii)
+    f2 = float(sf2)
+    return f2
+
+def test_int_as_singlefloat():
+    for x in enum_floats():
+        res = fnsingle(x)
+        assert repr(res) == repr(float(r_singlefloat(x)))
+
+def test_compiled_single():
+    fn2 = compile(fnsingle, [float])
+    for x in enum_floats():
+        res = fn2(x)
+        assert repr(res) == repr(float(r_singlefloat(x)))
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -27,7 +27,11 @@
 from pypy.rpython import raddress
 from pypy.translator.platform import platform
 from array import array
-from thread import _local as tlsobject
+try:
+    from thread import _local as tlsobject
+except ImportError:
+    class tlsobject(object):
+        pass
 
 # ____________________________________________________________
 
@@ -688,6 +692,8 @@
                     res = ctypes.cast(res, ctypes.c_void_p).value
                     if res is None:
                         return 0
+                if T.TO.RESULT == lltype.SingleFloat:
+                    res = res.value     # baaaah, cannot return a c_float()
                 return res
 
             def callback(*cargs):
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -1,7 +1,7 @@
 import py
 from pypy.rlib.rarithmetic import (r_int, r_uint, intmask, r_singlefloat,
                                    r_ulonglong, r_longlong, r_longfloat,
-                                   base_int, normalizedinttype)
+                                   base_int, normalizedinttype, longlongmask)
 from pypy.rlib.objectmodel import Symbolic
 from pypy.tool.uid import Hashable
 from pypy.tool.identity_dict import identity_dict
@@ -654,6 +654,9 @@
 
 _numbertypes = {int: Number("Signed", int, intmask)}
 _numbertypes[r_int] = _numbertypes[int]
+if r_longlong is not r_int:
+    _numbertypes[r_longlong] = Number("SignedLongLong", r_longlong,
+                                      longlongmask)
 
 def build_number(name, type):
     try:
@@ -1936,7 +1939,7 @@
 
 
 def malloc(T, n=None, flavor='gc', immortal=False, zero=False,
-           track_allocation=True):
+           track_allocation=True, add_memory_pressure=False):
     assert flavor in ('gc', 'raw')
     if zero or immortal:
         initialization = 'example'
diff --git a/pypy/rpython/memory/gc/minimark.py b/pypy/rpython/memory/gc/minimark.py
--- a/pypy/rpython/memory/gc/minimark.py
+++ b/pypy/rpython/memory/gc/minimark.py
@@ -729,7 +729,7 @@
             if self.max_heap_size < self.next_major_collection_threshold:
                 self.next_major_collection_threshold = self.max_heap_size
 
-    def raw_malloc_varsize_hint(self, sizehint):
+    def raw_malloc_memory_pressure(self, sizehint):
         self.next_major_collection_threshold -= sizehint
         if self.next_major_collection_threshold < 0:
             # cannot trigger a full collection now, but we can ensure
diff --git a/pypy/rpython/memory/gc/test/test_minimark.py b/pypy/rpython/memory/gc/test/test_minimark.py
--- a/pypy/rpython/memory/gc/test/test_minimark.py
+++ b/pypy/rpython/memory/gc/test/test_minimark.py
@@ -34,6 +34,7 @@
                     growth_rate_max=1.5)
     gc.min_heap_size = 100.0
     gc.max_heap_size = 300.0
+    gc.next_major_collection_initial = 0.0
     gc.next_major_collection_threshold = 0.0
     # first, we don't grow past min_heap_size
     for i in range(5):
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -386,15 +386,15 @@
         else:
             self.malloc_varsize_nonmovable_ptr = None
 
-        if getattr(GCClass, 'raw_malloc_varsize_hint', False):
-            def raw_malloc_varsize_hint(length, itemsize):
+        if getattr(GCClass, 'raw_malloc_memory_pressure', False):
+            def raw_malloc_memory_pressure(length, itemsize):
                 totalmem = length * itemsize
                 if totalmem > 0:
-                    gcdata.gc.raw_malloc_varsize_hint(totalmem)
+                    gcdata.gc.raw_malloc_memory_pressure(totalmem)
                 #else: probably an overflow -- the following rawmalloc
                 #      will fail then
-            self.raw_malloc_varsize_hint_ptr = getfn(
-                raw_malloc_varsize_hint,
+            self.raw_malloc_memory_pressure_ptr = getfn(
+                raw_malloc_memory_pressure,
                 [annmodel.SomeInteger(), annmodel.SomeInteger()],
                 annmodel.s_None, minimal_transform = False)
 
diff --git a/pypy/rpython/memory/gctransform/transform.py b/pypy/rpython/memory/gctransform/transform.py
--- a/pypy/rpython/memory/gctransform/transform.py
+++ b/pypy/rpython/memory/gctransform/transform.py
@@ -590,15 +590,10 @@
 
     def gct_fv_raw_malloc_varsize(self, hop, flags, TYPE, v_length, c_const_size, c_item_size,
                                                                     c_offset_to_length):
-        track_allocation = flags.get('track_allocation', True)
-        if not track_allocation:
-            # idea: raw mallocs with track_allocation=False correspond
-            # generally to raw mallocs of stuff that we store in GC objects.
-            # So we tell the GC about such raw mallocs, so that it can
-            # adjust its total size estimate.
-            if hasattr(self, 'raw_malloc_varsize_hint_ptr'):
+        if flags.get('add_memory_pressure', False):
+            if hasattr(self, 'raw_malloc_memory_pressure_ptr'):
                 hop.genop("direct_call",
-                          [self.raw_malloc_varsize_hint_ptr,
+                          [self.raw_malloc_memory_pressure_ptr,
                            v_length, c_item_size])
         if c_offset_to_length is None:
             if flags.get('zero'):
@@ -615,7 +610,7 @@
                                [self.raw_malloc_varsize_ptr, v_length,
                                 c_const_size, c_item_size, c_offset_to_length],
                                resulttype=llmemory.Address)
-        if track_allocation:
+        if flags.get('track_allocation', True):
             hop.genop("track_alloc_start", [v_raw])
         return v_raw
 
diff --git a/pypy/rpython/rbuiltin.py b/pypy/rpython/rbuiltin.py
--- a/pypy/rpython/rbuiltin.py
+++ b/pypy/rpython/rbuiltin.py
@@ -345,14 +345,17 @@
 BUILTIN_TYPER[object.__init__] = rtype_object__init__
 # annotation of low-level types
 
-def rtype_malloc(hop, i_flavor=None, i_zero=None, i_track_allocation=None):
+def rtype_malloc(hop, i_flavor=None, i_zero=None, i_track_allocation=None,
+                 i_add_memory_pressure=None):
     assert hop.args_s[0].is_constant()
     vlist = [hop.inputarg(lltype.Void, arg=0)]
     opname = 'malloc'
-    v_flavor, v_zero, v_track_allocation = parse_kwds(hop,
+    v_flavor, v_zero, v_track_allocation, v_add_memory_pressure = parse_kwds(
+        hop,
         (i_flavor, lltype.Void),
         (i_zero, None),
-        (i_track_allocation, None))
+        (i_track_allocation, None),
+        (i_add_memory_pressure, None))
 
     flags = {'flavor': 'gc'}
     if v_flavor is not None:
@@ -361,8 +364,11 @@
         flags['zero'] = v_zero.value
     if i_track_allocation is not None:
         flags['track_allocation'] = v_track_allocation.value
+    if i_add_memory_pressure is not None:
+        flags['add_memory_pressure'] = v_add_memory_pressure.value
     vlist.append(hop.inputconst(lltype.Void, flags))
-        
+
+    assert 1 <= hop.nb_args <= 2
     if hop.nb_args == 2:
         vlist.append(hop.inputarg(lltype.Signed, arg=1))
         opname += '_varsize'
diff --git a/pypy/tool/jitlogparser/parser.py b/pypy/tool/jitlogparser/parser.py
--- a/pypy/tool/jitlogparser/parser.py
+++ b/pypy/tool/jitlogparser/parser.py
@@ -328,6 +328,8 @@
         if op.is_guard() and bridges.get('loop-' + str(op.guard_no), None):
             res.append(op)
             i = 0
+            if hasattr(op.bridge, 'force_asm'):
+                op.bridge.force_asm()
             ops = op.bridge.operations
         else:
             res.append(op)
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.c
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -3,31 +3,38 @@
 #include "src/cjkcodecs/multibytecodec.h"
 
 
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
-                                         char *inbuf, Py_ssize_t inlen)
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec)
 {
   struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
   if (!d)
     return NULL;
   if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
-    goto errorexit;
+    {
+      free(d);
+      return NULL;
+    }
+  d->codec = codec;
+  d->outbuf_start = NULL;
+  return d;
+}
 
-  d->codec = codec;
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+                             char *inbuf, Py_ssize_t inlen)
+{
   d->inbuf_start = inbuf;
   d->inbuf = inbuf;
   d->inbuf_end = inbuf + inlen;
-  d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
-                     malloc(inlen * sizeof(Py_UNICODE)) :
-                     NULL);
-  if (!d->outbuf_start)
-    goto errorexit;
+  if (d->outbuf_start == NULL)
+    {
+      d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
+                         malloc(inlen * sizeof(Py_UNICODE)) :
+                         NULL);
+      if (d->outbuf_start == NULL)
+        return -1;
+      d->outbuf_end = d->outbuf_start + inlen;
+    }
   d->outbuf = d->outbuf_start;
-  d->outbuf_end = d->outbuf_start + inlen;
-  return d;
-
- errorexit:
-  free(d);
-  return NULL;
+  return 0;
 }
 
 void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
@@ -112,34 +119,40 @@
 
 /************************************************************/
 
-struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
-                                         Py_UNICODE *inbuf, Py_ssize_t inlen)
+struct pypy_cjk_enc_s *pypy_cjk_enc_new(const MultibyteCodec *codec)
 {
-  Py_ssize_t outlen;
   struct pypy_cjk_enc_s *d = malloc(sizeof(struct pypy_cjk_enc_s));
   if (!d)
     return NULL;
   if (codec->encinit != NULL && codec->encinit(&d->state, codec->config) != 0)
-    goto errorexit;
+    {
+      free(d);
+      return NULL;
+    }
+  d->codec = codec;
+  d->outbuf_start = NULL;
+  return d;
+}
 
-  d->codec = codec;
+Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
+                             Py_UNICODE *inbuf, Py_ssize_t inlen)
+{
+  Py_ssize_t outlen;
   d->inbuf_start = inbuf;
   d->inbuf = inbuf;
   d->inbuf_end = inbuf + inlen;
-
-  if (inlen > (PY_SSIZE_T_MAX - 16) / 2)
-    goto errorexit;
-  outlen = inlen * 2 + 16;
-  d->outbuf_start = malloc(outlen);
-  if (!d->outbuf_start)
-    goto errorexit;
+  if (d->outbuf_start == NULL)
+    {
+      if (inlen > (PY_SSIZE_T_MAX - 16) / 2)
+        return -1;
+      outlen = inlen * 2 + 16;
+      d->outbuf_start = malloc(outlen);
+      if (d->outbuf_start == NULL)
+        return -1;
+      d->outbuf_end = d->outbuf_start + outlen;
+    }
   d->outbuf = d->outbuf_start;
-  d->outbuf_end = d->outbuf_start + outlen;
-  return d;
-
- errorexit:
-  free(d);
-  return NULL;
+  return 0;
 }
 
 void pypy_cjk_enc_free(struct pypy_cjk_enc_s *d)
@@ -167,11 +180,8 @@
   return 0;
 }
 
-#define MBENC_RESET     MBENC_MAX<<1
-
-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d)
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d, Py_ssize_t flags)
 {
-  int flags = MBENC_FLUSH | MBENC_RESET;   /* XXX always, for now */
   while (1)
     {
       Py_ssize_t r;
@@ -242,3 +252,8 @@
   d->inbuf = d->inbuf_start + in_offset;
   return 0;
 }
+
+const MultibyteCodec *pypy_cjk_enc_getcodec(struct pypy_cjk_enc_s *d)
+{
+  return d->codec;
+}
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
--- a/pypy/translator/c/src/cjkcodecs/multibytecodec.h
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -94,8 +94,9 @@
   Py_UNICODE *outbuf_start, *outbuf, *outbuf_end;
 };
 
-struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
-                                         char *inbuf, Py_ssize_t inlen);
+struct pypy_cjk_dec_s *pypy_cjk_dec_new(const MultibyteCodec *codec);
+Py_ssize_t pypy_cjk_dec_init(struct pypy_cjk_dec_s *d,
+                             char *inbuf, Py_ssize_t inlen);
 void pypy_cjk_dec_free(struct pypy_cjk_dec_s *);
 Py_ssize_t pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *);
 Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *);
@@ -112,10 +113,11 @@
   unsigned char *outbuf_start, *outbuf, *outbuf_end;
 };
 
-struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
-                                         Py_UNICODE *inbuf, Py_ssize_t inlen);
+struct pypy_cjk_enc_s *pypy_cjk_enc_new(const MultibyteCodec *codec);
+Py_ssize_t pypy_cjk_enc_init(struct pypy_cjk_enc_s *d,
+                             Py_UNICODE *inbuf, Py_ssize_t inlen);
 void pypy_cjk_enc_free(struct pypy_cjk_enc_s *);
-Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *);
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *, Py_ssize_t);
 Py_ssize_t pypy_cjk_enc_reset(struct pypy_cjk_enc_s *);
 char *pypy_cjk_enc_outbuf(struct pypy_cjk_enc_s *);
 Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
@@ -123,6 +125,7 @@
 Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
 Py_ssize_t pypy_cjk_enc_replace_on_error(struct pypy_cjk_enc_s* d,
                                          char *, Py_ssize_t, Py_ssize_t);
+const MultibyteCodec *pypy_cjk_enc_getcodec(struct pypy_cjk_enc_s *);
 
 /* list of codecs defined in the .c files */
 
diff --git a/pypy/translator/c/test/test_newgc.py b/pypy/translator/c/test/test_newgc.py
--- a/pypy/translator/c/test/test_newgc.py
+++ b/pypy/translator/c/test/test_newgc.py
@@ -1396,9 +1396,10 @@
         class A:
             def __init__(self, n):
                 self.buf = lltype.malloc(ARRAY, n, flavor='raw',
-                                         track_allocation=False)
+                                         add_memory_pressure=True)
             def __del__(self):
-                lltype.free(self.buf, flavor='raw', track_allocation=False)
+                lltype.free(self.buf, flavor='raw')
+        A(6)
         def f():
             # allocate a total of ~77GB, but if the automatic gc'ing works,
             # it should never need more than a few MBs at once