[pypy-commit] pypy dynamic-specialized-tuple: Merged default in.

Mon Mar 19 21:55:51 CET 2012

Author: Alex Gaynor <alex.gaynor at gmail.com>
Branch: dynamic-specialized-tuple
Changeset: r53818:62a901668a67
Date: 2012-03-19 15:45 -0500
http://bitbucket.org/pypy/pypy/changeset/62a901668a67/

Log:	Merged default in.

diff --git a/lib_pypy/numpypy/core/numeric.py b/lib_pypy/numpypy/core/numeric.py
--- a/lib_pypy/numpypy/core/numeric.py
+++ b/lib_pypy/numpypy/core/numeric.py
@@ -6,7 +6,7 @@
 import _numpypy as multiarray # ARGH
 from numpypy.core.arrayprint import array2string
 
-
+newaxis = None
 
 def asanyarray(a, dtype=None, order=None, maskna=None, ownmaskna=False):
     """
@@ -319,4 +319,4 @@
 False_ = bool_(False)
 True_ = bool_(True)
 e = math.e
-pi = math.pi
\ No newline at end of file
+pi = math.pi
diff --git a/pypy/__init__.py b/pypy/__init__.py
--- a/pypy/__init__.py
+++ b/pypy/__init__.py
@@ -1,1 +1,16 @@
 # Empty
+
+# XXX Should be empty again, soon.
+# XXX hack for win64:
+# This patch must stay here until the END OF STAGE 1
+# When all tests work, this branch will be merged
+# and the branch stage 2 is started, where we remove this patch.
+import sys
+if hasattr(sys, "maxsize"):
+    if sys.maxint != sys.maxsize:
+        sys.maxint = sys.maxsize
+        import warnings
+        warnings.warn("""\n
+---> This win64 port is now in stage 1: sys.maxint was modified.
+---> When pypy/__init__.py becomes empty again, we have reached stage 2.
+""")
diff --git a/pypy/annotation/classdef.py b/pypy/annotation/classdef.py
--- a/pypy/annotation/classdef.py
+++ b/pypy/annotation/classdef.py
@@ -148,7 +148,6 @@
                 "the attribute here; the list of read locations is:\n" +
                 '\n'.join([str(loc[0]) for loc in self.read_locations]))
 
-
 class ClassDef(object):
     "Wraps a user class."
 
diff --git a/pypy/bin/rpython b/pypy/bin/rpython
new file mode 100755
--- /dev/null
+++ b/pypy/bin/rpython
@@ -0,0 +1,18 @@
+#!/usr/bin/env pypy
+
+"""RPython translation usage:
+
+rpython <translation options> target <targetoptions>
+
+run with --help for more information
+"""
+
+import sys
+from pypy.translator.goal.translate import main
+
+# no implicit targets
+if len(sys.argv) == 1:
+    print __doc__
+    sys.exit(1)
+
+main()
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -1336,7 +1336,7 @@
         if not self.is_true(self.isinstance(w_obj, self.w_str)):
             raise OperationError(self.w_TypeError,
                                  self.wrap('argument must be a string'))
-        return self.str_w(w_obj)
+        return self.str_w(w_obj)            
 
     def unicode_w(self, w_obj):
         return w_obj.unicode_w(self)
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -47,6 +47,11 @@
 
     def async(self, space):
         "Check if this is an exception that should better not be caught."
+        if not space.full_exceptions:
+            # flow objspace does not support such exceptions and more
+            # importantly, raises KeyboardInterrupt if you try to access
+            # space.w_KeyboardInterrupt
+            return False
         return (self.match(space, space.w_SystemExit) or
                 self.match(space, space.w_KeyboardInterrupt))
 
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -901,15 +901,17 @@
 
     def __init__(self, source, filename=None, modname='__builtin__'):
         # HAAACK (but a good one)
+        self.filename = filename
+        self.source = str(py.code.Source(source).deindent())
+        self.modname = modname
         if filename is None:
             f = sys._getframe(1)
             filename = '<%s:%d>' % (f.f_code.co_filename, f.f_lineno)
+        if not os.path.exists(filename):
+            # make source code available for tracebacks
+            lines = [x + "\n" for x in source.split("\n")]
+            py.std.linecache.cache[filename] = (1, None, lines, filename)
         self.filename = filename
-        self.source = str(py.code.Source(source).deindent())
-        self.modname = modname
-        # make source code available for tracebacks
-        lines = [x + "\n" for x in source.split("\n")]
-        py.std.linecache.cache[filename] = (1, None, lines, filename)
 
     def __repr__(self):
         return "<ApplevelClass filename=%r>" % (self.filename,)
diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -794,7 +794,7 @@
     def test_tuple_constants(self):
         ns = {}
         exec "x = (1, 0); y = (1L, 0L)" in ns
-        assert isinstance(ns["x"][0], (int, long))
+        assert isinstance(ns["x"][0], int)
         assert isinstance(ns["y"][0], long)
 
     def test_division_folding(self):
diff --git a/pypy/jit/backend/llsupport/regalloc.py b/pypy/jit/backend/llsupport/regalloc.py
--- a/pypy/jit/backend/llsupport/regalloc.py
+++ b/pypy/jit/backend/llsupport/regalloc.py
@@ -321,7 +321,7 @@
         except KeyError:
             pass   # 'var' is already not in a register
 
-    def loc(self, box):
+    def loc(self, box, must_exist=False):
         """ Return the location of 'box'.
         """
         self._check_type(box)
@@ -332,6 +332,8 @@
         except KeyError:
             if box in self.bindings_to_frame_reg:
                 return self.frame_reg
+            if must_exist:
+                return self.frame_manager.bindings[box]
             return self.frame_manager.loc(box)
 
     def return_constant(self, v, forbidden_vars=[], selected_reg=None):
@@ -360,7 +362,7 @@
         self._check_type(v)
         if isinstance(v, Const):
             return self.return_constant(v, forbidden_vars, selected_reg)
-        prev_loc = self.loc(v)
+        prev_loc = self.loc(v, must_exist=True)
         if prev_loc is self.frame_reg and selected_reg is None:
             return prev_loc
         loc = self.force_allocate_reg(v, forbidden_vars, selected_reg,
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -11,6 +11,7 @@
 from pypy.jit.tool.oparser import parse
 from pypy.rpython.lltypesystem.rclass import OBJECT, OBJECT_VTABLE
 from pypy.jit.metainterp.optimizeopt.util import equaloplists
+from pypy.rlib.rarithmetic import is_valid_int
 
 def test_boehm():
     gc_ll_descr = GcLLDescr_boehm(None, None, None)
@@ -103,7 +104,7 @@
         gcrootmap.put(retaddr, shapeaddr)
         assert gcrootmap._gcmap[0] == retaddr
         assert gcrootmap._gcmap[1] == shapeaddr
-        p = rffi.cast(rffi.LONGP, gcrootmap.gcmapstart())
+        p = rffi.cast(rffi.SIGNEDP, gcrootmap.gcmapstart())
         assert p[0] == retaddr
         assert (gcrootmap.gcmapend() ==
                 gcrootmap.gcmapstart() + rffi.sizeof(lltype.Signed) * 2)
@@ -419,9 +420,9 @@
         assert newops[0].getarg(1) == v_value
         assert newops[0].result is None
         wbdescr = newops[0].getdescr()
-        assert isinstance(wbdescr.jit_wb_if_flag, int)
-        assert isinstance(wbdescr.jit_wb_if_flag_byteofs, int)
-        assert isinstance(wbdescr.jit_wb_if_flag_singlebyte, int)
+        assert is_valid_int(wbdescr.jit_wb_if_flag)
+        assert is_valid_int(wbdescr.jit_wb_if_flag_byteofs)
+        assert is_valid_int(wbdescr.jit_wb_if_flag_singlebyte)
 
     def test_get_rid_of_debug_merge_point(self):
         operations = [
diff --git a/pypy/jit/backend/llsupport/test/test_regalloc.py b/pypy/jit/backend/llsupport/test/test_regalloc.py
--- a/pypy/jit/backend/llsupport/test/test_regalloc.py
+++ b/pypy/jit/backend/llsupport/test/test_regalloc.py
@@ -1,4 +1,4 @@
-
+import py
 from pypy.jit.metainterp.history import BoxInt, ConstInt, BoxFloat, INT, FLOAT
 from pypy.jit.backend.llsupport.regalloc import FrameManager
 from pypy.jit.backend.llsupport.regalloc import RegisterManager as BaseRegMan
@@ -236,6 +236,16 @@
         assert isinstance(loc, FakeFramePos)
         assert len(asm.moves) == 1
 
+    def test_bogus_make_sure_var_in_reg(self):
+        b0, = newboxes(0)
+        longevity = {b0: (0, 1)}
+        fm = TFrameManager()
+        asm = MockAsm()
+        rm = RegisterManager(longevity, frame_manager=fm, assembler=asm)
+        rm.next_instruction()
+        # invalid call to make_sure_var_in_reg(): box unknown so far
+        py.test.raises(KeyError, rm.make_sure_var_in_reg, b0)
+
     def test_return_constant(self):
         asm = MockAsm()
         boxes, longevity = boxes_and_longevity(5)
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -16,9 +16,11 @@
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rpython.llinterp import LLException
 from pypy.jit.codewriter import heaptracker, longlong
+from pypy.rlib import longlong2float
 from pypy.rlib.rarithmetic import intmask, is_valid_int
 from pypy.jit.backend.detect_cpu import autodetect_main_model_and_size
 
+
 def boxfloat(x):
     return BoxFloat(longlong.getfloatstorage(x))
 
@@ -1496,13 +1498,30 @@
                                                c_nest, c_nest], 'void')
 
     def test_read_timestamp(self):
+        if sys.platform == 'win32':
+            # windows quite often is very inexact (like the old Intel 8259 PIC),
+            # so we stretch the time a little bit.
+            # On my virtual Parallels machine in a 2GHz Core i7 Mac Mini,
+            # the test starts working at delay == 21670 and stops at 20600000.
+            # We take the geometric mean value.
+            from math import log, exp
+            delay_min = 21670
+            delay_max = 20600000
+            delay = int(exp((log(delay_min)+log(delay_max))/2))
+            def wait_a_bit():
+                for i in xrange(delay): pass
+        else:
+            def wait_a_bit():
+                pass
         if longlong.is_64_bit:
             got1 = self.execute_operation(rop.READ_TIMESTAMP, [], 'int')
+            wait_a_bit()
             got2 = self.execute_operation(rop.READ_TIMESTAMP, [], 'int')
             res1 = got1.getint()
             res2 = got2.getint()
         else:
             got1 = self.execute_operation(rop.READ_TIMESTAMP, [], 'float')
+            wait_a_bit()
             got2 = self.execute_operation(rop.READ_TIMESTAMP, [], 'float')
             res1 = got1.getlonglong()
             res2 = got2.getlonglong()
@@ -1598,6 +1617,12 @@
                                      [BoxPtr(x)], 'int').value
         assert res == -19
 
+    def test_convert_float_bytes(self):
+        t = 'int' if longlong.is_64_bit else 'float'
+        res = self.execute_operation(rop.CONVERT_FLOAT_BYTES_TO_LONGLONG,
+                                     [boxfloat(2.5)], t).value
+        assert res == longlong2float.float2longlong(2.5)
+
     def test_ooops_non_gc(self):
         x = lltype.malloc(lltype.Struct('x'), flavor='raw')
         v = heaptracker.adr2int(llmemory.cast_ptr_to_adr(x))
diff --git a/pypy/jit/backend/test/support.py b/pypy/jit/backend/test/support.py
--- a/pypy/jit/backend/test/support.py
+++ b/pypy/jit/backend/test/support.py
@@ -3,6 +3,7 @@
 from pypy.rlib.debug import debug_print
 from pypy.translator.translator import TranslationContext, graphof
 from pypy.jit.metainterp.optimizeopt import ALL_OPTS_NAMES
+from pypy.rlib.rarithmetic import is_valid_int
 
 class BaseCompiledMixin(object):
 
@@ -24,7 +25,7 @@
         from pypy.annotation import model as annmodel
 
         for arg in args:
-            assert isinstance(arg, int)
+            assert is_valid_int(arg)
 
         self.pre_translation_hook()
         t = self._get_TranslationContext()
diff --git a/pypy/jit/backend/test/test_random.py b/pypy/jit/backend/test/test_random.py
--- a/pypy/jit/backend/test/test_random.py
+++ b/pypy/jit/backend/test/test_random.py
@@ -449,6 +449,7 @@
 
 OPERATIONS.append(CastFloatToIntOperation(rop.CAST_FLOAT_TO_INT))
 OPERATIONS.append(CastIntToFloatOperation(rop.CAST_INT_TO_FLOAT))
+OPERATIONS.append(CastFloatToIntOperation(rop.CONVERT_FLOAT_BYTES_TO_LONGLONG))
 
 OperationBuilder.OPERATIONS = OPERATIONS
 
@@ -502,11 +503,11 @@
     else:
         assert 0, "unknown backend %r" % pytest.config.option.backend
 
-# ____________________________________________________________    
+# ____________________________________________________________
 
 class RandomLoop(object):
     dont_generate_more = False
-    
+
     def __init__(self, cpu, builder_factory, r, startvars=None):
         self.cpu = cpu
         if startvars is None:
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -606,7 +606,7 @@
         else:
             assert token
             struct.number = compute_unique_id(token)
-        self.loop_run_counters.append(struct)            
+        self.loop_run_counters.append(struct)
         return struct
 
     def _find_failure_recovery_bytecode(self, faildescr):
@@ -665,7 +665,7 @@
                ResOperation(rop.SETFIELD_RAW, [c_adr, box2],
                             None, descr=self.debug_counter_descr)]
         operations.extend(ops)
-        
+
     @specialize.argtype(1)
     def _inject_debugging_code(self, looptoken, operations, tp, number):
         if self._debug:
@@ -836,8 +836,8 @@
             self.mc.MOVSD_sx(0, loc.value)
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
             # XXX evil trick
-            self.mc.PUSH_b(get_ebp_ofs(loc.position))
-            self.mc.PUSH_b(get_ebp_ofs(loc.position + 1))
+            self.mc.PUSH_b(loc.value + 4)
+            self.mc.PUSH_b(loc.value)
         else:
             self.mc.PUSH(loc)
 
@@ -847,8 +847,8 @@
             self.mc.ADD_ri(esp.value, 8)   # = size of doubles
         elif WORD == 4 and isinstance(loc, StackLoc) and loc.get_width() == 8:
             # XXX evil trick
-            self.mc.POP_b(get_ebp_ofs(loc.position + 1))
-            self.mc.POP_b(get_ebp_ofs(loc.position))
+            self.mc.POP_b(loc.value)
+            self.mc.POP_b(loc.value + 4)
         else:
             self.mc.POP(loc)
 
@@ -1242,6 +1242,15 @@
         self.mc.MOVD_xr(resloc.value, loc0.value)
         self.mc.CVTSS2SD_xx(resloc.value, resloc.value)
 
+    def genop_convert_float_bytes_to_longlong(self, op, arglocs, resloc):
+        loc0, = arglocs
+        if longlong.is_64_bit:
+            assert isinstance(resloc, RegLoc)
+            assert isinstance(loc0, RegLoc)
+            self.mc.MOVD(resloc, loc0)
+        else:
+            self.mov(loc0, resloc)
+
     def genop_guard_int_is_true(self, op, guard_op, guard_token, arglocs, resloc):
         guard_opnum = guard_op.getopnum()
         self.mc.CMP(arglocs[0], imm0)
@@ -1954,8 +1963,6 @@
             mc.PUSH_r(ebx.value)
         elif IS_X86_64:
             mc.MOV_rr(edi.value, ebx.value)
-            # XXX: Correct to only align the stack on 64-bit?
-            mc.AND_ri(esp.value, -16)
         else:
             raise AssertionError("Shouldn't happen")
 
@@ -2117,9 +2124,12 @@
         # First, we need to save away the registers listed in
         # 'save_registers' that are not callee-save.  XXX We assume that
         # the XMM registers won't be modified.  We store them in
-        # [ESP+4], [ESP+8], etc., leaving enough room in [ESP] for the
-        # single argument to closestack_addr below.
-        p = WORD
+        # [ESP+4], [ESP+8], etc.; on x86-32 we leave enough room in [ESP]
+        # for the single argument to closestack_addr below.
+        if IS_X86_32:
+            p = WORD
+        elif IS_X86_64:
+            p = 0
         for reg in self._regalloc.rm.save_around_call_regs:
             if reg in save_registers:
                 self.mc.MOV_sr(p, reg.value)
@@ -2174,7 +2184,10 @@
         #
         self._emit_call(-1, imm(self.releasegil_addr), args)
         # Finally, restore the registers saved above.
-        p = WORD
+        if IS_X86_32:
+            p = WORD
+        elif IS_X86_64:
+            p = 0
         for reg in self._regalloc.rm.save_around_call_regs:
             if reg in save_registers:
                 self.mc.MOV_rs(reg.value, p)
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -766,6 +766,18 @@
 
     consider_cast_singlefloat_to_float = consider_cast_int_to_float
 
+    def consider_convert_float_bytes_to_longlong(self, op):
+        if longlong.is_64_bit:
+            loc0 = self.xrm.make_sure_var_in_reg(op.getarg(0))
+            loc1 = self.rm.force_allocate_reg(op.result)
+            self.Perform(op, [loc0], loc1)
+            self.xrm.possibly_free_var(op.getarg(0))
+        else:
+            loc0 = self.xrm.loc(op.getarg(0))
+            loc1 = self.xrm.force_allocate_reg(op.result)
+            self.Perform(op, [loc0], loc1)
+            self.xrm.possibly_free_var(op.getarg(0))
+
     def _consider_llong_binop_xx(self, op):
         # must force both arguments into xmm registers, because we don't
         # know if they will be suitably aligned.  Exception: if the second
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -601,9 +601,10 @@
     CVTSS2SD_xb = xmminsn('\xF3', rex_nw, '\x0F\x5A',
                           register(1, 8), stack_bp(2))
 
-    MOVD_rx = xmminsn('\x66', rex_nw, '\x0F\x7E', register(2, 8), register(1), '\xC0')
-    MOVD_xr = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), register(2), '\xC0')
-    MOVD_xb = xmminsn('\x66', rex_nw, '\x0F\x6E', register(1, 8), stack_bp(2))
+    # These work on machine sized registers.
+    MOVD_rx = xmminsn('\x66', rex_w, '\x0F\x7E', register(2, 8), register(1), '\xC0')
+    MOVD_xr = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), register(2), '\xC0')
+    MOVD_xb = xmminsn('\x66', rex_w, '\x0F\x6E', register(1, 8), stack_bp(2))
 
     PSRAD_xi = xmminsn('\x66', rex_nw, '\x0F\x72', register(1), '\xE0', immediate(2, 'b'))
 
diff --git a/pypy/jit/backend/x86/support.py b/pypy/jit/backend/x86/support.py
--- a/pypy/jit/backend/x86/support.py
+++ b/pypy/jit/backend/x86/support.py
@@ -36,15 +36,15 @@
 
 # ____________________________________________________________
 
-if sys.platform == 'win32':
-    ensure_sse2_floats = lambda : None
-    # XXX check for SSE2 on win32 too
+if WORD == 4:
+    extra = ['-DPYPY_X86_CHECK_SSE2']
 else:
-    if WORD == 4:
-        extra = ['-DPYPY_X86_CHECK_SSE2']
-    else:
-        extra = []
-    ensure_sse2_floats = rffi.llexternal_use_eci(ExternalCompilationInfo(
-        compile_extra = ['-msse2', '-mfpmath=sse',
-                         '-DPYPY_CPU_HAS_STANDARD_PRECISION'] + extra,
-        ))
+    extra = []
+
+if sys.platform != 'win32':
+    extra = ['-msse2', '-mfpmath=sse',
+             '-DPYPY_CPU_HAS_STANDARD_PRECISION'] + extra
+
+ensure_sse2_floats = rffi.llexternal_use_eci(ExternalCompilationInfo(
+    compile_extra = extra,
+))
diff --git a/pypy/jit/backend/x86/test/test_zmath.py b/pypy/jit/backend/x86/test/test_zmath.py
--- a/pypy/jit/backend/x86/test/test_zmath.py
+++ b/pypy/jit/backend/x86/test/test_zmath.py
@@ -6,6 +6,8 @@
 from pypy.translator.c.test.test_genc import compile
 from pypy.jit.backend.x86.support import ensure_sse2_floats
 from pypy.rlib import rfloat
+from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.debug import debug_print
 
 
 def get_test_case((fnname, args, expected)):
@@ -16,16 +18,32 @@
     expect_valueerror = (expected == ValueError)
     expect_overflowerror = (expected == OverflowError)
     check = test_direct.get_tester(expected)
+    unroll_args = unrolling_iterable(args)
     #
     def testfn():
+        debug_print('calling', fnname, 'with arguments:')
+        for arg in unroll_args:
+            debug_print('\t', arg)
         try:
             got = fn(*args)
         except ValueError:
-            return expect_valueerror
+            if expect_valueerror:
+                return True
+            else:
+                debug_print('unexpected ValueError!')
+                return False
         except OverflowError:
-            return expect_overflowerror
+            if expect_overflowerror:
+                return True
+            else:
+                debug_print('unexpected OverflowError!')
+                return False
         else:
-            return check(got)
+            if check(got):
+                return True
+            else:
+                debug_print('unexpected result:', got)
+                return False
     #
     testfn.func_name = 'test_' + fnname
     return testfn
diff --git a/pypy/jit/codewriter/jtransform.py b/pypy/jit/codewriter/jtransform.py
--- a/pypy/jit/codewriter/jtransform.py
+++ b/pypy/jit/codewriter/jtransform.py
@@ -309,6 +309,11 @@
         op1 = SpaceOperation('-live-', [], None)
         return [op, op1]
 
+    def _noop_rewrite(self, op):
+        return op
+
+    rewrite_op_convert_float_bytes_to_longlong = _noop_rewrite
+
     # ----------
     # Various kinds of calls
 
diff --git a/pypy/jit/codewriter/test/test_flatten.py b/pypy/jit/codewriter/test/test_flatten.py
--- a/pypy/jit/codewriter/test/test_flatten.py
+++ b/pypy/jit/codewriter/test/test_flatten.py
@@ -968,6 +968,15 @@
             int_return %i2
         """, transform=True)
 
+    def test_convert_float_bytes_to_int(self):
+        from pypy.rlib.longlong2float import float2longlong
+        def f(x):
+            return float2longlong(x)
+        self.encoding_test(f, [25.0], """
+            convert_float_bytes_to_longlong %f0 -> %i0
+            int_return %i0
+        """)
+
 
 def check_force_cast(FROM, TO, operations, value):
     """Check that the test is correctly written..."""
diff --git a/pypy/jit/metainterp/blackhole.py b/pypy/jit/metainterp/blackhole.py
--- a/pypy/jit/metainterp/blackhole.py
+++ b/pypy/jit/metainterp/blackhole.py
@@ -1,15 +1,16 @@
+from pypy.jit.codewriter import heaptracker, longlong
+from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
+from pypy.jit.metainterp.compile import ResumeAtPositionDescr
+from pypy.jit.metainterp.jitexc import JitException, get_llexception, reraise
+from pypy.rlib import longlong2float
+from pypy.rlib.debug import debug_start, debug_stop, ll_assert, make_sure_not_resized
+from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.rarithmetic import intmask, LONG_BIT, r_uint, ovfcheck
+from pypy.rlib.rtimer import read_timestamp
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.rtimer import read_timestamp
-from pypy.rlib.rarithmetic import intmask, LONG_BIT, r_uint, ovfcheck
-from pypy.rlib.objectmodel import we_are_translated
-from pypy.rlib.debug import debug_start, debug_stop, ll_assert
-from pypy.rlib.debug import make_sure_not_resized
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.lltypesystem.lloperation import llop
-from pypy.jit.codewriter.jitcode import JitCode, SwitchDictDescr
-from pypy.jit.codewriter import heaptracker, longlong
-from pypy.jit.metainterp.jitexc import JitException, get_llexception, reraise
-from pypy.jit.metainterp.compile import ResumeAtPositionDescr
+
 
 def arguments(*argtypes, **kwds):
     resulttype = kwds.pop('returns', None)
@@ -20,6 +21,9 @@
         return function
     return decorate
 
+LONGLONG_TYPECODE = 'i' if longlong.is_64_bit else 'f'
+
+
 class LeaveFrame(JitException):
     pass
 
@@ -663,6 +667,11 @@
         a = float(a)
         return longlong.getfloatstorage(a)
 
+    @arguments("f", returns=LONGLONG_TYPECODE)
+    def bhimpl_convert_float_bytes_to_longlong(a):
+        a = longlong.getrealfloat(a)
+        return longlong2float.float2longlong(a)
+
     # ----------
     # control flow operations
 
@@ -1309,7 +1318,7 @@
     def bhimpl_copyunicodecontent(cpu, src, dst, srcstart, dststart, length):
         cpu.bh_copyunicodecontent(src, dst, srcstart, dststart, length)
 
-    @arguments(returns=(longlong.is_64_bit and "i" or "f"))
+    @arguments(returns=LONGLONG_TYPECODE)
     def bhimpl_ll_read_timestamp():
         return read_timestamp()
 
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -223,6 +223,7 @@
                     'cast_float_to_singlefloat', 'cast_singlefloat_to_float',
                     'float_neg', 'float_abs',
                     'cast_ptr_to_int', 'cast_int_to_ptr',
+                    'convert_float_bytes_to_longlong',
                     ]:
         exec py.code.Source('''
             @arguments("box")
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -419,6 +419,7 @@
     'CAST_INT_TO_FLOAT/1',          # need some messy code in the backend
     'CAST_FLOAT_TO_SINGLEFLOAT/1',
     'CAST_SINGLEFLOAT_TO_FLOAT/1',
+    'CONVERT_FLOAT_BYTES_TO_LONGLONG/1',
     #
     'INT_LT/2b',
     'INT_LE/2b',
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -3,6 +3,7 @@
 import py
 
 from pypy import conftest
+from pypy.jit.codewriter import longlong
 from pypy.jit.codewriter.policy import JitPolicy, StopAtXPolicy
 from pypy.jit.metainterp import pyjitpl, history
 from pypy.jit.metainterp.optimizeopt import ALL_OPTS_DICT
@@ -14,6 +15,7 @@
     loop_invariant, elidable, promote, jit_debug, assert_green,
     AssertGreenFailed, unroll_safe, current_trace_length, look_inside_iff,
     isconstant, isvirtual, promote_string, set_param, record_known_class)
+from pypy.rlib.longlong2float import float2longlong
 from pypy.rlib.rarithmetic import ovfcheck, is_valid_int
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rpython.ootypesystem import ootype
@@ -3793,6 +3795,16 @@
         res = self.interp_operations(g, [1])
         assert res == 3
 
+    def test_float2longlong(self):
+        def f(n):
+            return float2longlong(n)
+
+        for x in [2.5, float("nan"), -2.5, float("inf")]:
+            # There are tests elsewhere to verify the correctness of this.
+            expected = float2longlong(x)
+            res = self.interp_operations(f, [x])
+            assert longlong.getfloatstorage(res) == expected
+
 
 class TestLLtype(BaseLLtypeTests, LLJitMixin):
     def test_tagged(self):
diff --git a/pypy/jit/tl/tlc.py b/pypy/jit/tl/tlc.py
--- a/pypy/jit/tl/tlc.py
+++ b/pypy/jit/tl/tlc.py
@@ -6,6 +6,8 @@
 from pypy.jit.tl.tlopcode import *
 from pypy.jit.tl import tlopcode
 from pypy.rlib.jit import JitDriver, elidable
+from pypy.rlib.rarithmetic import is_valid_int
+
 
 class Obj(object):
 
@@ -219,7 +221,7 @@
 class Frame(object):
 
     def __init__(self, args, pc):
-        assert isinstance(pc, int)
+        assert is_valid_int(pc)
         self.args  = args
         self.pc    = pc
         self.stack = []
@@ -239,7 +241,7 @@
         return interp_eval(code, pc, args, pool).int_o()
 
     def interp_eval(code, pc, args, pool):
-        assert isinstance(pc, int)
+        assert is_valid_int(pc)
         frame = Frame(args, pc)
         pc = frame.pc
 
diff --git a/pypy/module/_ssl/test/test_ssl.py b/pypy/module/_ssl/test/test_ssl.py
--- a/pypy/module/_ssl/test/test_ssl.py
+++ b/pypy/module/_ssl/test/test_ssl.py
@@ -1,7 +1,6 @@
 from pypy.conftest import gettestobjspace
 import os
 import py
-from pypy.rlib.rarithmetic import is_valid_int
 
 
 class AppTestSSL:
@@ -31,7 +30,6 @@
         assert isinstance(_ssl.SSL_ERROR_EOF, int)
         assert isinstance(_ssl.SSL_ERROR_INVALID_ERROR_CODE, int)
 
-        assert is_valid_int(_ssl.OPENSSL_VERSION_NUMBER)
         assert isinstance(_ssl.OPENSSL_VERSION_INFO, tuple)
         assert len(_ssl.OPENSSL_VERSION_INFO) == 5
         assert isinstance(_ssl.OPENSSL_VERSION, str)
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -824,6 +824,8 @@
     pypy_decls.append("#ifdef __cplusplus")
     pypy_decls.append("extern \"C\" {")
     pypy_decls.append("#endif\n")
+    pypy_decls.append('#define Signed   long           /* xxx temporary fix */\n')
+    pypy_decls.append('#define Unsigned unsigned long  /* xxx temporary fix */\n')
 
     for decl in FORWARD_DECLS:
         pypy_decls.append("%s;" % (decl,))
@@ -855,6 +857,8 @@
             typ = 'PyObject*'
         pypy_decls.append('PyAPI_DATA(%s) %s;' % (typ, name))
 
+    pypy_decls.append('#undef Signed    /* xxx temporary fix */\n')
+    pypy_decls.append('#undef Unsigned  /* xxx temporary fix */\n')
     pypy_decls.append("#ifdef __cplusplus")
     pypy_decls.append("}")
     pypy_decls.append("#endif")
diff --git a/pypy/module/cpyext/pystate.py b/pypy/module/cpyext/pystate.py
--- a/pypy/module/cpyext/pystate.py
+++ b/pypy/module/cpyext/pystate.py
@@ -10,7 +10,7 @@
     [('next', PyInterpreterState)],
     PyInterpreterStateStruct)
 PyThreadState = lltype.Ptr(cpython_struct(
-    "PyThreadState", 
+    "PyThreadState",
     [('interp', PyInterpreterState),
      ('dict', PyObject),
      ]))
@@ -19,12 +19,15 @@
 def PyEval_SaveThread(space):
     """Release the global interpreter lock (if it has been created and thread
     support is enabled) and reset the thread state to NULL, returning the
-    previous thread state (which is not NULL except in PyPy).  If the lock has been created,
+    previous thread state.  If the lock has been created,
     the current thread must have acquired it.  (This function is available even
     when thread support is disabled at compile time.)"""
+    state = space.fromcache(InterpreterState)
     if rffi.aroundstate.before:
         rffi.aroundstate.before()
-    return lltype.nullptr(PyThreadState.TO)
+    tstate = state.swap_thread_state(
+        space, lltype.nullptr(PyThreadState.TO))
+    return tstate
 
 @cpython_api([PyThreadState], lltype.Void)
 def PyEval_RestoreThread(space, tstate):
@@ -35,6 +38,8 @@
     when thread support is disabled at compile time.)"""
     if rffi.aroundstate.after:
         rffi.aroundstate.after()
+    state = space.fromcache(InterpreterState)
+    state.swap_thread_state(space, tstate)
 
 @cpython_api([], lltype.Void)
 def PyEval_InitThreads(space):
@@ -67,28 +72,91 @@
                                   dealloc=ThreadState_dealloc)
 
 from pypy.interpreter.executioncontext import ExecutionContext
+
+# Keep track of the ThreadStateCapsule for a particular execution context.  The
+# default is for new execution contexts not to have one; it is allocated on the
+# first cpyext-based request for it.
 ExecutionContext.cpyext_threadstate = ThreadStateCapsule(None)
 
+# Also keep track of whether it has been initialized yet or not (None is a valid
+# PyThreadState for an execution context to have, when the GIL has been
+# released, so a check against that can't be used to determine the need for
+# initialization).
+ExecutionContext.cpyext_initialized_threadstate = False
+
+def cleanup_cpyext_state(self):
+    try:
+        del self.cpyext_threadstate
+    except AttributeError:
+        pass
+    self.cpyext_initialized_threadstate = False
+ExecutionContext.cleanup_cpyext_state = cleanup_cpyext_state
+
 class InterpreterState(object):
     def __init__(self, space):
         self.interpreter_state = lltype.malloc(
             PyInterpreterState.TO, flavor='raw', zero=True, immortal=True)
 
     def new_thread_state(self, space):
+        """
+        Create a new ThreadStateCapsule to hold the PyThreadState for a
+        particular execution context.
+
+        :param space: A space.
+
+        :returns: A new ThreadStateCapsule holding a newly allocated
+            PyThreadState and referring to this interpreter state.
+        """
         capsule = ThreadStateCapsule(space)
         ts = capsule.memory
         ts.c_interp = self.interpreter_state
         ts.c_dict = make_ref(space, space.newdict())
         return capsule
 
+
     def get_thread_state(self, space):
+        """
+        Get the current PyThreadState for the current execution context.
+
+        :param space: A space.
+
+        :returns: The current PyThreadState for the current execution context,
+            or None if it does not have one.
+        """
         ec = space.getexecutioncontext()
         return self._get_thread_state(space, ec).memory
 
+
+    def swap_thread_state(self, space, tstate):
+        """
+        Replace the current thread state of the current execution context with a
+        new thread state.
+
+        :param space: The space.
+
+        :param tstate: The new PyThreadState for the current execution context.
+
+        :returns: The old thread state for the current execution context, either
+            None or a PyThreadState.
+        """
+        ec = space.getexecutioncontext()
+        capsule = self._get_thread_state(space, ec)
+        old_tstate = capsule.memory
+        capsule.memory = tstate
+        return old_tstate
+
     def _get_thread_state(self, space, ec):
-        if ec.cpyext_threadstate.memory == lltype.nullptr(PyThreadState.TO):
+        """
+        Get the ThreadStateCapsule for the given execution context, possibly
+        creating a new one if it does not already have one.
+
+        :param space: The space.
+        :param ec: The ExecutionContext of which to get the thread state.
+        :returns: The ThreadStateCapsule for the given execution context.
+        """
+        if not ec.cpyext_initialized_threadstate:
             ec.cpyext_threadstate = self.new_thread_state(space)
-
+            ec.cpyext_initialized_threadstate = True
         return ec.cpyext_threadstate
 
 @cpython_api([], PyThreadState, error=CANNOT_FAIL)
@@ -105,13 +173,8 @@
 def PyThreadState_Swap(space, tstate):
     """Swap the current thread state with the thread state given by the argument
     tstate, which may be NULL.  The global interpreter lock must be held."""
-    # All cpyext calls release and acquire the GIL, so this function has no
-    # side-effects
-    if tstate:
-        return lltype.nullptr(PyThreadState.TO)
-    else:
-        state = space.fromcache(InterpreterState)
-        return state.get_thread_state(space)
+    state = space.fromcache(InterpreterState)
+    return state.swap_thread_state(space, tstate)
 
 @cpython_api([PyThreadState], lltype.Void)
 def PyEval_AcquireThread(space, tstate):
diff --git a/pypy/module/cpyext/src/getargs.c b/pypy/module/cpyext/src/getargs.c
--- a/pypy/module/cpyext/src/getargs.c
+++ b/pypy/module/cpyext/src/getargs.c
@@ -23,16 +23,33 @@
 #define FLAG_COMPAT 1
 #define FLAG_SIZE_T 2
 
+typedef int (*destr_t)(PyObject *, void *);
+
+
+/* Keep track of "objects" that have been allocated or initialized and
+   which will need to be deallocated or cleaned up somehow if overall
+   parsing fails.
+*/
+typedef struct {
+  void *item;
+  destr_t destructor;
+} freelistentry_t;
+
+typedef struct {
+  int first_available;
+  freelistentry_t *entries;
+} freelist_t;
+
 
 /* Forward */
 static int vgetargs1(PyObject *, const char *, va_list *, int);
 static void seterror(int, const char *, int *, const char *, const char *);
 static char *convertitem(PyObject *, const char **, va_list *, int, int *, 
-                         char *, size_t, PyObject **);
+                         char *, size_t, freelist_t *);
 static char *converttuple(PyObject *, const char **, va_list *, int,
-			  int *, char *, size_t, int, PyObject **);
+			  int *, char *, size_t, int, freelist_t *);
 static char *convertsimple(PyObject *, const char **, va_list *, int, char *,
-			   size_t, PyObject **);
+			   size_t, freelist_t *);
 static Py_ssize_t convertbuffer(PyObject *, void **p, char **);
 static int getbuffer(PyObject *, Py_buffer *, char**);
 
@@ -129,57 +146,56 @@
 
 /* Handle cleanup of allocated memory in case of exception */
 
-static void
-cleanup_ptr(void *ptr)
+static int
+cleanup_ptr(PyObject *self, void *ptr)
 {
-	PyMem_FREE(ptr);
-}
-
-static void
-cleanup_buffer(void *ptr)
-{
-	PyBuffer_Release((Py_buffer *) ptr);
+    if (ptr) {
+        PyMem_FREE(ptr);
+    }
+    return 0;
 }
 
 static int
-addcleanup(void *ptr, PyObject **freelist, void (*destr)(void *))
+cleanup_buffer(PyObject *self, void *ptr)
 {
-	PyObject *cobj;
-	if (!*freelist) {
-		*freelist = PyList_New(0);
-		if (!*freelist) {
-			destr(ptr);
-			return -1;
-		}
-	}
-	cobj = PyCObject_FromVoidPtr(ptr, destr);
-	if (!cobj) {
-		destr(ptr);
-		return -1;
-	}
-	if (PyList_Append(*freelist, cobj)) {
-		Py_DECREF(cobj);
-		return -1;
-	}
-        Py_DECREF(cobj);
-	return 0;
+    Py_buffer *buf = (Py_buffer *)ptr;
+    if (buf) {
+        PyBuffer_Release(buf);
+    }
+    return 0;
 }
 
 static int
-cleanreturn(int retval, PyObject *freelist)
+addcleanup(void *ptr, freelist_t *freelist, destr_t destructor)
 {
-	if (freelist && retval != 0) {
-		/* We were successful, reset the destructors so that they
-		   don't get called. */
-		Py_ssize_t len = PyList_GET_SIZE(freelist), i;
-		for (i = 0; i < len; i++)
-			((PyCObject *) PyList_GET_ITEM(freelist, i))
-				->destructor = NULL;
-	}
-	Py_XDECREF(freelist);
-	return retval;
+    int index;
+
+    index = freelist->first_available;
+    freelist->first_available += 1;
+
+    freelist->entries[index].item = ptr;
+    freelist->entries[index].destructor = destructor;
+
+    return 0;
 }
 
+static int
+cleanreturn(int retval, freelist_t *freelist)
+{
+    int index;
+
+    if (retval == 0) {
+      /* A failure occurred, therefore execute all of the cleanup
+	 functions.
+      */
+      for (index = 0; index < freelist->first_available; ++index) {
+          freelist->entries[index].destructor(NULL,
+                                              freelist->entries[index].item);
+      }
+    }
+    PyMem_Free(freelist->entries);
+    return retval;
+}
 
 static int
 vgetargs1(PyObject *args, const char *format, va_list *p_va, int flags)
@@ -195,7 +211,7 @@
 	const char *formatsave = format;
 	Py_ssize_t i, len;
 	char *msg;
-	PyObject *freelist = NULL;
+	freelist_t freelist = {0, NULL};
 	int compat = flags & FLAG_COMPAT;
 
 	assert(compat || (args != (PyObject*)NULL));
@@ -251,16 +267,18 @@
 	
 	format = formatsave;
 	
+	freelist.entries = PyMem_New(freelistentry_t, max);
+
 	if (compat) {
 		if (max == 0) {
 			if (args == NULL)
-				return 1;
+			    return cleanreturn(1, &freelist);
 			PyOS_snprintf(msgbuf, sizeof(msgbuf),
 				      "%.200s%s takes no arguments",
 				      fname==NULL ? "function" : fname,
 				      fname==NULL ? "" : "()");
 			PyErr_SetString(PyExc_TypeError, msgbuf);
-			return 0;
+			return cleanreturn(0, &freelist);
 		}
 		else if (min == 1 && max == 1) {
 			if (args == NULL) {
@@ -269,26 +287,26 @@
 					      fname==NULL ? "function" : fname,
 					      fname==NULL ? "" : "()");
 				PyErr_SetString(PyExc_TypeError, msgbuf);
-				return 0;
+				return cleanreturn(0, &freelist);
 			}
 			msg = convertitem(args, &format, p_va, flags, levels, 
 					  msgbuf, sizeof(msgbuf), &freelist);
 			if (msg == NULL)
-				return cleanreturn(1, freelist);
+				return cleanreturn(1, &freelist);
 			seterror(levels[0], msg, levels+1, fname, message);
-			return cleanreturn(0, freelist);
+			return cleanreturn(0, &freelist);
 		}
 		else {
 			PyErr_SetString(PyExc_SystemError,
 			    "old style getargs format uses new features");
-			return 0;
+			return cleanreturn(0, &freelist);
 		}
 	}
 	
 	if (!PyTuple_Check(args)) {
 		PyErr_SetString(PyExc_SystemError,
 		    "new style getargs format but argument is not a tuple");
-		return 0;
+		return cleanreturn(0, &freelist);
 	}
 	
 	len = PyTuple_GET_SIZE(args);
@@ -308,7 +326,7 @@
 			message = msgbuf;
 		}
 		PyErr_SetString(PyExc_TypeError, message);
-		return 0;
+		return cleanreturn(0, &freelist);
 	}
 	
 	for (i = 0; i < len; i++) {
@@ -319,7 +337,7 @@
 				  sizeof(msgbuf), &freelist);
 		if (msg) {
 			seterror(i+1, msg, levels, fname, message);
-			return cleanreturn(0, freelist);
+			return cleanreturn(0, &freelist);
 		}
 	}
 
@@ -328,10 +346,10 @@
 	    *format != '|' && *format != ':' && *format != ';') {
 		PyErr_Format(PyExc_SystemError,
 			     "bad format string: %.200s", formatsave);
-		return cleanreturn(0, freelist);
+		return cleanreturn(0, &freelist);
 	}
 	
-	return cleanreturn(1, freelist);
+	return cleanreturn(1, &freelist);
 }
 
 
@@ -395,7 +413,7 @@
 static char *
 converttuple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
              int *levels, char *msgbuf, size_t bufsize, int toplevel, 
-             PyObject **freelist)
+             freelist_t *freelist)
 {
 	int level = 0;
 	int n = 0;
@@ -472,7 +490,7 @@
 
 static char *
 convertitem(PyObject *arg, const char **p_format, va_list *p_va, int flags,
-            int *levels, char *msgbuf, size_t bufsize, PyObject **freelist)
+            int *levels, char *msgbuf, size_t bufsize, freelist_t *freelist)
 {
 	char *msg;
 	const char *format = *p_format;
@@ -539,7 +557,7 @@
 
 static char *
 convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags,
-              char *msgbuf, size_t bufsize, PyObject **freelist)
+              char *msgbuf, size_t bufsize, freelist_t *freelist)
 {
 	/* For # codes */
 #define FETCH_SIZE	int *q=NULL;Py_ssize_t *q2=NULL;\
@@ -1501,7 +1519,9 @@
 	const char *fname, *msg, *custom_msg, *keyword;
 	int min = INT_MAX;
 	int i, len, nargs, nkeywords;
-	PyObject *freelist = NULL, *current_arg;
+	PyObject *current_arg;
+	freelist_t freelist = {0, NULL};
+
 
 	assert(args != NULL && PyTuple_Check(args));
 	assert(keywords == NULL || PyDict_Check(keywords));
@@ -1525,6 +1545,8 @@
 	for (len=0; kwlist[len]; len++)
 		continue;
 
+	freelist.entries = PyMem_New(freelistentry_t, len);
+
 	nargs = PyTuple_GET_SIZE(args);
 	nkeywords = (keywords == NULL) ? 0 : PyDict_Size(keywords);
 	if (nargs + nkeywords > len) {
@@ -1535,7 +1557,7 @@
 			     len,
 			     (len == 1) ? "" : "s",
 			     nargs + nkeywords);
-		return 0;
+		return cleanreturn(0, &freelist);
 	}
 
 	/* convert tuple args and keyword args in same loop, using kwlist to drive process */
@@ -1549,7 +1571,7 @@
 			PyErr_Format(PyExc_RuntimeError,
 				     "More keyword list entries (%d) than "
 				     "format specifiers (%d)", len, i);
-			return cleanreturn(0, freelist);
+			return cleanreturn(0, &freelist);
 		}
 		current_arg = NULL;
 		if (nkeywords) {
@@ -1563,11 +1585,11 @@
 					     "Argument given by name ('%s') "
 					     "and position (%d)",
 					     keyword, i+1);
-				return cleanreturn(0, freelist);
+				return cleanreturn(0, &freelist);
 			}
 		}
 		else if (nkeywords && PyErr_Occurred())
-			return cleanreturn(0, freelist);
+			return cleanreturn(0, &freelist);
 		else if (i < nargs)
 			current_arg = PyTuple_GET_ITEM(args, i);
 			
@@ -1576,7 +1598,7 @@
 				levels, msgbuf, sizeof(msgbuf), &freelist);
 			if (msg) {
 				seterror(i+1, msg, levels, fname, custom_msg);
-				return cleanreturn(0, freelist);
+				return cleanreturn(0, &freelist);
 			}
 			continue;
 		}
@@ -1585,14 +1607,14 @@
 			PyErr_Format(PyExc_TypeError, "Required argument "
 				     "'%s' (pos %d) not found",
 				     keyword, i+1);
-			return cleanreturn(0, freelist);
+			return cleanreturn(0, &freelist);
 		}
 		/* current code reports success when all required args
 		 * fulfilled and no keyword args left, with no further
 		 * validation. XXX Maybe skip this in debug build ?
 		 */
 		if (!nkeywords)
-			return cleanreturn(1, freelist);
+			return cleanreturn(1, &freelist);
 
 		/* We are into optional args, skip thru to any remaining
 		 * keyword args */
@@ -1600,7 +1622,7 @@
 		if (msg) {
 			PyErr_Format(PyExc_RuntimeError, "%s: '%s'", msg,
 				     format);
-			return cleanreturn(0, freelist);
+			return cleanreturn(0, &freelist);
 		}
 	}
 
@@ -1608,7 +1630,7 @@
 		PyErr_Format(PyExc_RuntimeError,
 			"more argument specifiers than keyword list entries "
 			"(remaining format:'%s')", format);
-		return cleanreturn(0, freelist);
+		return cleanreturn(0, &freelist);
 	}
 
 	/* make sure there are no extraneous keyword arguments */
@@ -1621,7 +1643,7 @@
 			if (!PyString_Check(key)) {
                             PyErr_SetString(PyExc_TypeError, 
 					        "keywords must be strings");
-				return cleanreturn(0, freelist);
+				return cleanreturn(0, &freelist);
 			}
 			ks = PyString_AsString(key);
 			for (i = 0; i < len; i++) {
@@ -1635,12 +1657,12 @@
 					     "'%s' is an invalid keyword "
 					     "argument for this function",
 					     ks);
-				return cleanreturn(0, freelist);
+				return cleanreturn(0, &freelist);
 			}
 		}
 	}
 
-	return cleanreturn(1, freelist);
+	return cleanreturn(1, &freelist);
 }
 
 
diff --git a/pypy/module/cpyext/stringobject.py b/pypy/module/cpyext/stringobject.py
--- a/pypy/module/cpyext/stringobject.py
+++ b/pypy/module/cpyext/stringobject.py
@@ -130,6 +130,11 @@
 
 @cpython_api([PyObject], rffi.CCHARP, error=0)
 def PyString_AsString(space, ref):
+    if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str:
+        pass    # typecheck returned "ok" without forcing 'ref' at all
+    elif not PyString_Check(space, ref):   # otherwise, use the alternate way
+        raise OperationError(space.w_TypeError, space.wrap(
+            "PyString_AsString only support strings"))
     ref_str = rffi.cast(PyStringObject, ref)
     if not ref_str.c_buffer:
         # copy string buffer
diff --git a/pypy/module/cpyext/test/test_cpyext.py b/pypy/module/cpyext/test/test_cpyext.py
--- a/pypy/module/cpyext/test/test_cpyext.py
+++ b/pypy/module/cpyext/test/test_cpyext.py
@@ -106,10 +106,7 @@
             del obj
         import gc; gc.collect()
 
-        try:
-            del space.getexecutioncontext().cpyext_threadstate
-        except AttributeError:
-            pass
+        space.getexecutioncontext().cleanup_cpyext_state()
 
         for w_obj in state.non_heaptypes_w:
             Py_DecRef(space, w_obj)
diff --git a/pypy/module/cpyext/test/test_longobject.py b/pypy/module/cpyext/test/test_longobject.py
--- a/pypy/module/cpyext/test/test_longobject.py
+++ b/pypy/module/cpyext/test/test_longobject.py
@@ -101,9 +101,9 @@
                                   space.wrap((2, 7)))):
             py.test.skip("unsupported before Python 2.7")
 
-        assert api._PyLong_Sign(space.wrap(0L)) == 0
-        assert api._PyLong_Sign(space.wrap(2L)) == 1
-        assert api._PyLong_Sign(space.wrap(-2L)) == -1
+        assert api._PyLong_Sign(space.wraplong(0L)) == 0
+        assert api._PyLong_Sign(space.wraplong(2L)) == 1
+        assert api._PyLong_Sign(space.wraplong(-2L)) == -1
 
         assert api._PyLong_NumBits(space.wrap(0)) == 0
         assert api._PyLong_NumBits(space.wrap(1)) == 1
diff --git a/pypy/module/cpyext/test/test_number.py b/pypy/module/cpyext/test/test_number.py
--- a/pypy/module/cpyext/test/test_number.py
+++ b/pypy/module/cpyext/test/test_number.py
@@ -6,12 +6,12 @@
 class TestIterator(BaseApiTest):
     def test_check(self, space, api):
         assert api.PyIndex_Check(space.wrap(12))
-        assert api.PyIndex_Check(space.wrap(-12L))
+        assert api.PyIndex_Check(space.wraplong(-12L))
         assert not api.PyIndex_Check(space.wrap(12.1))
         assert not api.PyIndex_Check(space.wrap('12'))
 
         assert api.PyNumber_Check(space.wrap(12))
-        assert api.PyNumber_Check(space.wrap(-12L))
+        assert api.PyNumber_Check(space.wraplong(-12L))
         assert api.PyNumber_Check(space.wrap(12.1))
         assert not api.PyNumber_Check(space.wrap('12'))
         assert not api.PyNumber_Check(space.wrap(1+3j))
@@ -21,7 +21,7 @@
         assert api.PyLong_CheckExact(w_l)
 
     def test_number_int(self, space, api):
-        w_l = api.PyNumber_Int(space.wrap(123L))
+        w_l = api.PyNumber_Int(space.wraplong(123L))
         assert api.PyInt_CheckExact(w_l)
         w_l = api.PyNumber_Int(space.wrap(2 << 65))
         assert api.PyLong_CheckExact(w_l)
@@ -29,7 +29,7 @@
         assert api.PyInt_CheckExact(w_l)
 
     def test_number_index(self, space, api):
-        w_l = api.PyNumber_Index(space.wrap(123L))
+        w_l = api.PyNumber_Index(space.wraplong(123L))
         assert api.PyLong_CheckExact(w_l)
         w_l = api.PyNumber_Index(space.wrap(42.3))
         assert w_l is None
diff --git a/pypy/module/cpyext/test/test_pystate.py b/pypy/module/cpyext/test/test_pystate.py
--- a/pypy/module/cpyext/test/test_pystate.py
+++ b/pypy/module/cpyext/test/test_pystate.py
@@ -3,6 +3,10 @@
 from pypy.rpython.lltypesystem.lltype import nullptr
 from pypy.module.cpyext.pystate import PyInterpreterState, PyThreadState
 from pypy.module.cpyext.pyobject import from_ref
+from pypy.rpython.lltypesystem import lltype
+from pypy.module.cpyext.test.test_cpyext import LeakCheckingTest, freeze_refcnts
+from pypy.module.cpyext.pystate import PyThreadState_Get, PyInterpreterState_Head
+from pypy.tool import leakfinder
 
 class AppTestThreads(AppTestCpythonExtensionBase):
     def test_allow_threads(self):
@@ -21,6 +25,93 @@
         # Should compile at least
         module.test()
 
+
+    def test_thread_state_get(self):
+        module = self.import_extension('foo', [
+                ("get", "METH_NOARGS",
+                 """
+                     PyThreadState *tstate = PyThreadState_Get();
+                     if (tstate == NULL) {
+                         return PyLong_FromLong(0);
+                     }
+                     if (tstate->interp != PyInterpreterState_Head()) {
+                         return PyLong_FromLong(1);
+                     }
+                     if (tstate->interp->next != NULL) {
+                         return PyLong_FromLong(2);
+                     }
+                     return PyLong_FromLong(3);
+                 """),
+                ])
+        assert module.get() == 3
+
+    def test_basic_threadstate_dance(self):
+        module = self.import_extension('foo', [
+                ("dance", "METH_NOARGS",
+                 """
+                     PyThreadState *old_tstate, *new_tstate;
+
+                     old_tstate = PyThreadState_Swap(NULL);
+                     if (old_tstate == NULL) {
+                         return PyLong_FromLong(0);
+                     }
+
+                     new_tstate = PyThreadState_Get();
+                     if (new_tstate != NULL) {
+                         return PyLong_FromLong(1);
+                     }
+
+                     new_tstate = PyThreadState_Swap(old_tstate);
+                     if (new_tstate != NULL) {
+                         return PyLong_FromLong(2);
+                     }
+
+                     new_tstate = PyThreadState_Get();
+                     if (new_tstate != old_tstate) {
+                         return PyLong_FromLong(3);
+                     }
+
+                     return PyLong_FromLong(4);
+                 """),
+                ])
+        assert module.dance() == 4
+
+    def test_threadstate_dict(self):
+        module = self.import_extension('foo', [
+                ("getdict", "METH_NOARGS",
+                 """
+                 PyObject *dict = PyThreadState_GetDict();
+                 Py_INCREF(dict);
+                 return dict;
+                 """),
+                ])
+        assert isinstance(module.getdict(), dict)
+
+    def test_savethread(self):
+        module = self.import_extension('foo', [
+                ("bounce", "METH_NOARGS",
+                 """
+                 PyThreadState *tstate = PyEval_SaveThread();
+                 if (tstate == NULL) {
+                     return PyLong_FromLong(0);
+                 }
+
+                 if (PyThreadState_Get() != NULL) {
+                     return PyLong_FromLong(1);
+                 }
+
+                 PyEval_RestoreThread(tstate);
+
+                 if (PyThreadState_Get() != tstate) {
+                     return PyLong_FromLong(2);
+                 }
+
+                 return PyLong_FromLong(3);
+                                  """),
+                ])
+
+
+
 class TestInterpreterState(BaseApiTest):
     def test_interpreter_head(self, space, api):
         state = api.PyInterpreterState_Head()
@@ -29,31 +120,3 @@
     def test_interpreter_next(self, space, api):
         state = api.PyInterpreterState_Head()
         assert nullptr(PyInterpreterState.TO) == api.PyInterpreterState_Next(state)
-
-class TestThreadState(BaseApiTest):
-    def test_thread_state_get(self, space, api):
-        ts = api.PyThreadState_Get()
-        assert ts != nullptr(PyThreadState.TO)
-
-    def test_thread_state_interp(self, space, api):
-        ts = api.PyThreadState_Get()
-        assert ts.c_interp == api.PyInterpreterState_Head()
-        assert ts.c_interp.c_next == nullptr(PyInterpreterState.TO)
-
-    def test_basic_threadstate_dance(self, space, api):
-        # Let extension modules call these functions,
-        # Not sure of the semantics in pypy though.
-        # (cpyext always acquires and releases the GIL around calls)
-        tstate = api.PyThreadState_Swap(None)
-        assert tstate is not None
-        assert not api.PyThreadState_Swap(tstate)
-
-        api.PyEval_AcquireThread(tstate)
-        api.PyEval_ReleaseThread(tstate)
-
-    def test_threadstate_dict(self, space, api):
-        ts = api.PyThreadState_Get()
-        ref = ts.c_dict
-        assert ref == api.PyThreadState_GetDict()
-        w_obj = from_ref(space, ref)
-        assert space.isinstance_w(w_obj, space.w_dict)
diff --git a/pypy/module/math/test/test_direct.py b/pypy/module/math/test/test_direct.py
--- a/pypy/module/math/test/test_direct.py
+++ b/pypy/module/math/test/test_direct.py
@@ -59,6 +59,9 @@
         ('copysign', (1.5, -0.0), -1.5),
         ('copysign', (1.5, INFINITY), 1.5),
         ('copysign', (1.5, -INFINITY), -1.5),
+        ]
+    if sys.platform != 'win32':    # all NaNs seem to be negative there...?
+        IRREGCASES += [
         ('copysign', (1.5, NAN), 1.5),
         ('copysign', (1.75, -NAN), -1.75),      # special case for -NAN here
         ]
diff --git a/pypy/module/micronumpy/__init__.py b/pypy/module/micronumpy/__init__.py
--- a/pypy/module/micronumpy/__init__.py
+++ b/pypy/module/micronumpy/__init__.py
@@ -37,26 +37,44 @@
         'True_': 'types.Bool.True',
         'False_': 'types.Bool.False',
 
+        'typeinfo': 'interp_dtype.get_dtype_cache(space).w_typeinfo',
+
         'generic': 'interp_boxes.W_GenericBox',
         'number': 'interp_boxes.W_NumberBox',
         'integer': 'interp_boxes.W_IntegerBox',
         'signedinteger': 'interp_boxes.W_SignedIntegerBox',
         'unsignedinteger': 'interp_boxes.W_UnsignedIntegerBox',
         'bool_': 'interp_boxes.W_BoolBox',
+        'bool8': 'interp_boxes.W_BoolBox',
         'int8': 'interp_boxes.W_Int8Box',
+        'byte': 'interp_boxes.W_Int8Box',
         'uint8': 'interp_boxes.W_UInt8Box',
+        'ubyte': 'interp_boxes.W_UInt8Box',
         'int16': 'interp_boxes.W_Int16Box',
+        'short': 'interp_boxes.W_Int16Box',
         'uint16': 'interp_boxes.W_UInt16Box',
+        'ushort': 'interp_boxes.W_UInt16Box',
         'int32': 'interp_boxes.W_Int32Box',
+        'intc': 'interp_boxes.W_Int32Box',
         'uint32': 'interp_boxes.W_UInt32Box',
+        'uintc': 'interp_boxes.W_UInt32Box',
         'int64': 'interp_boxes.W_Int64Box',
         'uint64': 'interp_boxes.W_UInt64Box',
+        'longlong': 'interp_boxes.W_LongLongBox',
+        'ulonglong': 'interp_boxes.W_ULongLongBox',
         'int_': 'interp_boxes.W_LongBox',
         'inexact': 'interp_boxes.W_InexactBox',
         'floating': 'interp_boxes.W_FloatingBox',
         'float_': 'interp_boxes.W_Float64Box',
         'float32': 'interp_boxes.W_Float32Box',
         'float64': 'interp_boxes.W_Float64Box',
+        'intp': 'types.IntP.BoxType',
+        'uintp': 'types.UIntP.BoxType',
+        'flexible': 'interp_boxes.W_FlexibleBox',
+        'character': 'interp_boxes.W_CharacterBox',
+        'str_': 'interp_boxes.W_StringBox',
+        'unicode_': 'interp_boxes.W_UnicodeBox',
+        'void': 'interp_boxes.W_VoidBox',
     }
 
     # ufuncs
@@ -67,6 +85,7 @@
         ("arccos", "arccos"),
         ("arcsin", "arcsin"),
         ("arctan", "arctan"),
+        ("arctan2", "arctan2"),
         ("arccosh", "arccosh"),
         ("arcsinh", "arcsinh"),
         ("arctanh", "arctanh"),
@@ -77,7 +96,10 @@
         ("true_divide", "true_divide"),
         ("equal", "equal"),
         ("exp", "exp"),
+        ("exp2", "exp2"),
+        ("expm1", "expm1"),
         ("fabs", "fabs"),
+        ("fmod", "fmod"),
         ("floor", "floor"),
         ("ceil", "ceil"),
         ("greater", "greater"),
@@ -92,8 +114,10 @@
         ("radians", "radians"),
         ("degrees", "degrees"),
         ("deg2rad", "radians"),
+        ("rad2deg", "degrees"),
         ("reciprocal", "reciprocal"),
         ("sign", "sign"),
+        ("signbit", "signbit"),
         ("sin", "sin"),
         ("sinh", "sinh"),
         ("subtract", "subtract"),
@@ -106,6 +130,9 @@
         ('bitwise_not', 'invert'),
         ('isnan', 'isnan'),
         ('isinf', 'isinf'),
+        ('isneginf', 'isneginf'),
+        ('isposinf', 'isposinf'),
+        ('isfinite', 'isfinite'),
         ('logical_and', 'logical_and'),
         ('logical_xor', 'logical_xor'),
         ('logical_not', 'logical_not'),
@@ -116,6 +143,8 @@
         ('log1p', 'log1p'),
         ('power', 'power'),
         ('floor_divide', 'floor_divide'),
+        ('logaddexp', 'logaddexp'),
+        ('logaddexp2', 'logaddexp2'),
     ]:
         interpleveldefs[exposed] = "interp_ufuncs.get(space).%s" % impl
 
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -33,7 +33,7 @@
     pass
 
 SINGLE_ARG_FUNCTIONS = ["sum", "prod", "max", "min", "all", "any",
-                        "unegative", "flat"]
+                        "unegative", "flat", "tostring"]
 TWO_ARG_FUNCTIONS = ["dot", 'take']
 
 class FakeSpace(object):
@@ -51,6 +51,8 @@
     w_long = "long"
     w_tuple = 'tuple'
     w_slice = "slice"
+    w_str = "str"
+    w_unicode = "unicode"
 
     def __init__(self):
         """NOT_RPYTHON"""
@@ -91,8 +93,12 @@
             return BoolObject(obj)
         elif isinstance(obj, int):
             return IntObject(obj)
+        elif isinstance(obj, long):
+            return LongObject(obj)
         elif isinstance(obj, W_Root):
             return obj
+        elif isinstance(obj, str):
+            return StringObject(obj)
         raise NotImplementedError
 
     def newlist(self, items):
@@ -120,6 +126,11 @@
             return int(w_obj.floatval)
         raise NotImplementedError
 
+    def str_w(self, w_obj):
+        if isinstance(w_obj, StringObject):
+            return w_obj.v
+        raise NotImplementedError
+
     def int(self, w_obj):
         if isinstance(w_obj, IntObject):
             return w_obj
@@ -151,7 +162,13 @@
         return instantiate(klass)
 
     def newtuple(self, list_w):
-        raise ValueError
+        return ListObject(list_w)
+
+    def newdict(self):
+        return {}
+
+    def setitem(self, dict, item, value):
+        dict[item] = value
 
     def len_w(self, w_obj):
         if isinstance(w_obj, ListObject):
@@ -178,6 +195,11 @@
     def __init__(self, intval):
         self.intval = intval
 
+class LongObject(W_Root):
+    tp = FakeSpace.w_long
+    def __init__(self, intval):
+        self.intval = intval
+
 class ListObject(W_Root):
     tp = FakeSpace.w_list
     def __init__(self, items):
@@ -190,6 +212,11 @@
         self.stop = stop
         self.step = step
 
+class StringObject(W_Root):
+    tp = FakeSpace.w_str
+    def __init__(self, v):
+        self.v = v
+
 class InterpreterState(object):
     def __init__(self, code):
         self.code = code
@@ -407,6 +434,9 @@
                 w_res = neg.call(interp.space, [arr])
             elif self.name == "flat":
                 w_res = arr.descr_get_flatiter(interp.space)
+            elif self.name == "tostring":
+                arr.descr_tostring(interp.space)
+                w_res = None
             else:
                 assert False # unreachable code
         elif self.name in TWO_ARG_FUNCTIONS:
diff --git a/pypy/module/micronumpy/interp_boxes.py b/pypy/module/micronumpy/interp_boxes.py
--- a/pypy/module/micronumpy/interp_boxes.py
+++ b/pypy/module/micronumpy/interp_boxes.py
@@ -1,24 +1,25 @@
 from pypy.interpreter.baseobjspace import Wrappable
-from pypy.interpreter.error import operationerrfmt
+from pypy.interpreter.error import operationerrfmt, OperationError
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
 from pypy.objspace.std.floattype import float_typedef
+from pypy.objspace.std.stringtype import str_typedef
+from pypy.objspace.std.unicodetype import unicode_typedef, unicode_from_object
 from pypy.objspace.std.inttype import int_typedef
 from pypy.rlib.rarithmetic import LONG_BIT
 from pypy.tool.sourcetools import func_with_new_name
 
-
 MIXIN_32 = (int_typedef,) if LONG_BIT == 32 else ()
 MIXIN_64 = (int_typedef,) if LONG_BIT == 64 else ()
 
 def new_dtype_getter(name):
-    def get_dtype(space):
+    def _get_dtype(space):
         from pypy.module.micronumpy.interp_dtype import get_dtype_cache
         return getattr(get_dtype_cache(space), "w_%sdtype" % name)
     def new(space, w_subtype, w_value):
-        dtype = get_dtype(space)
+        dtype = _get_dtype(space)
         return dtype.itemtype.coerce_subtype(space, w_subtype, w_value)
-    return func_with_new_name(new, name + "_box_new"), staticmethod(get_dtype)
+    return func_with_new_name(new, name + "_box_new"), staticmethod(_get_dtype)
 
 class PrimitiveBox(object):
     _mixin_ = True
@@ -37,6 +38,9 @@
             w_subtype.getname(space, '?')
         )
 
+    def get_dtype(self, space):
+        return self._get_dtype(space)
+
     def descr_str(self, space):
         return space.wrap(self.get_dtype(space).itemtype.str_format(self))
 
@@ -44,12 +48,12 @@
         return space.format(self.item(space), w_spec)
 
     def descr_int(self, space):
-        box = self.convert_to(W_LongBox.get_dtype(space))
+        box = self.convert_to(W_LongBox._get_dtype(space))
         assert isinstance(box, W_LongBox)
         return space.wrap(box.value)
 
     def descr_float(self, space):
-        box = self.convert_to(W_Float64Box.get_dtype(space))
+        box = self.convert_to(W_Float64Box._get_dtype(space))
         assert isinstance(box, W_Float64Box)
         return space.wrap(box.value)
 
@@ -130,7 +134,7 @@
 
 
 class W_BoolBox(W_GenericBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("bool")
+    descr__new__, _get_dtype = new_dtype_getter("bool")
 
 class W_NumberBox(W_GenericBox):
     _attrs_ = ()
@@ -146,34 +150,40 @@
     pass
 
 class W_Int8Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("int8")
+    descr__new__, _get_dtype = new_dtype_getter("int8")
 
 class W_UInt8Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("uint8")
+    descr__new__, _get_dtype = new_dtype_getter("uint8")
 
 class W_Int16Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("int16")
+    descr__new__, _get_dtype = new_dtype_getter("int16")
 
 class W_UInt16Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("uint16")
+    descr__new__, _get_dtype = new_dtype_getter("uint16")
 
 class W_Int32Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("int32")
+    descr__new__, _get_dtype = new_dtype_getter("int32")
 
 class W_UInt32Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("uint32")
+    descr__new__, _get_dtype = new_dtype_getter("uint32")
 
 class W_LongBox(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("long")
+    descr__new__, _get_dtype = new_dtype_getter("long")
 
 class W_ULongBox(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("ulong")
+    descr__new__, _get_dtype = new_dtype_getter("ulong")
 
 class W_Int64Box(W_SignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("int64")
+    descr__new__, _get_dtype = new_dtype_getter("int64")
+
+class W_LongLongBox(W_SignedIntegerBox, PrimitiveBox):
+    descr__new__, _get_dtype = new_dtype_getter('longlong')
 
 class W_UInt64Box(W_UnsignedIntegerBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("uint64")
+    descr__new__, _get_dtype = new_dtype_getter("uint64")
+
+class W_ULongLongBox(W_SignedIntegerBox, PrimitiveBox):
+    descr__new__, _get_dtype = new_dtype_getter('ulonglong')
 
 class W_InexactBox(W_NumberBox):
     _attrs_ = ()
@@ -182,16 +192,71 @@
     _attrs_ = ()
 
 class W_Float32Box(W_FloatingBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("float32")
+    descr__new__, _get_dtype = new_dtype_getter("float32")
 
 class W_Float64Box(W_FloatingBox, PrimitiveBox):
-    descr__new__, get_dtype = new_dtype_getter("float64")
+    descr__new__, _get_dtype = new_dtype_getter("float64")
 
 
+class W_FlexibleBox(W_GenericBox):
+    def __init__(self, arr, ofs, dtype):
+        self.arr = arr # we have to keep array alive
+        self.ofs = ofs
+        self.dtype = dtype
+
+    def get_dtype(self, space):
+        return self.arr.dtype
+
 @unwrap_spec(self=W_GenericBox)
 def descr_index(space, self):
     return space.index(self.item(space))
 
+class W_VoidBox(W_FlexibleBox):
+    @unwrap_spec(item=str)
+    def descr_getitem(self, space, item):
+        try:
+            ofs, dtype = self.dtype.fields[item]
+        except KeyError:
+            raise OperationError(space.w_IndexError,
+                                 space.wrap("Field %s does not exist" % item))
+        return dtype.itemtype.read(self.arr, 1, self.ofs, ofs, dtype)
+
+    @unwrap_spec(item=str)
+    def descr_setitem(self, space, item, w_value):
+        try:
+            ofs, dtype = self.dtype.fields[item]
+        except KeyError:
+            raise OperationError(space.w_IndexError,
+                                 space.wrap("Field %s does not exist" % item))
+        dtype.itemtype.store(self.arr, 1, self.ofs, ofs,
+                             dtype.coerce(space, w_value))
+
+class W_CharacterBox(W_FlexibleBox):
+    pass
+
+class W_StringBox(W_CharacterBox):
+    def descr__new__string_box(space, w_subtype, w_arg):
+        from pypy.module.micronumpy.interp_numarray import W_NDimArray
+        from pypy.module.micronumpy.interp_dtype import new_string_dtype
+
+        arg = space.str_w(space.str(w_arg))
+        arr = W_NDimArray([1], new_string_dtype(space, len(arg)))
+        for i in range(len(arg)):
+            arr.storage[i] = arg[i]
+        return W_StringBox(arr, 0, arr.dtype)
+
+
+class W_UnicodeBox(W_CharacterBox):
+    def descr__new__unicode_box(space, w_subtype, w_arg):
+        from pypy.module.micronumpy.interp_numarray import W_NDimArray
+        from pypy.module.micronumpy.interp_dtype import new_unicode_dtype
+
+        arg = space.unicode_w(unicode_from_object(space, w_arg))
+        arr = W_NDimArray([1], new_unicode_dtype(space, len(arg)))
+        # XXX not this way, we need store
+        #for i in range(len(arg)):
+        #    arr.storage[i] = arg[i]
+        return W_UnicodeBox(arr, 0, arr.dtype)
 
 W_GenericBox.typedef = TypeDef("generic",
     __module__ = "numpypy",
@@ -348,3 +413,28 @@
     __new__ = interp2app(W_Float64Box.descr__new__.im_func),
 )
 
+
+W_FlexibleBox.typedef = TypeDef("flexible", W_GenericBox.typedef,
+    __module__ = "numpypy",
+)
+
+W_VoidBox.typedef = TypeDef("void", W_FlexibleBox.typedef,
+    __module__ = "numpypy",
+    __getitem__ = interp2app(W_VoidBox.descr_getitem),
+    __setitem__ = interp2app(W_VoidBox.descr_setitem),
+)
+
+W_CharacterBox.typedef = TypeDef("character", W_FlexibleBox.typedef,
+    __module__ = "numpypy",
+)
+
+W_StringBox.typedef = TypeDef("string_", (str_typedef, W_CharacterBox.typedef),
+    __module__ = "numpypy",
+    __new__ = interp2app(W_StringBox.descr__new__string_box.im_func),
+)
+
+W_UnicodeBox.typedef = TypeDef("unicode_", (unicode_typedef, W_CharacterBox.typedef),
+    __module__ = "numpypy",
+    __new__ = interp2app(W_UnicodeBox.descr__new__unicode_box.im_func),
+)
+                                          
diff --git a/pypy/module/micronumpy/interp_dtype.py b/pypy/module/micronumpy/interp_dtype.py
--- a/pypy/module/micronumpy/interp_dtype.py
+++ b/pypy/module/micronumpy/interp_dtype.py
@@ -1,26 +1,29 @@
+
+import sys
 from pypy.interpreter.baseobjspace import Wrappable
 from pypy.interpreter.error import OperationError
-from pypy.interpreter.gateway import interp2app
+from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import (TypeDef, GetSetProperty,
     interp_attrproperty, interp_attrproperty_w)
-from pypy.module.micronumpy import types, signature, interp_boxes
+from pypy.module.micronumpy import types, interp_boxes
 from pypy.rlib.objectmodel import specialize
-from pypy.rlib.rarithmetic import LONG_BIT
-from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rlib.rarithmetic import LONG_BIT, r_longlong, r_ulonglong
 
 
 UNSIGNEDLTR = "u"
 SIGNEDLTR = "i"
 BOOLLTR = "b"
 FLOATINGLTR = "f"
-
-
-VOID_STORAGE = lltype.Array(lltype.Char, hints={'nolength': True, 'render_as_void': True})
+VOIDLTR = 'V'
+STRINGLTR = 'S'
+UNICODELTR = 'U'
 
 class W_Dtype(Wrappable):
     _immutable_fields_ = ["itemtype", "num", "kind"]
 
-    def __init__(self, itemtype, num, kind, name, char, w_box_type, alternate_constructors=[], aliases=[]):
+    def __init__(self, itemtype, num, kind, name, char, w_box_type,
+                 alternate_constructors=[], aliases=[],
+                 fields=None, fieldnames=None, native=True):
         self.itemtype = itemtype
         self.num = num
         self.kind = kind
@@ -29,53 +32,28 @@
         self.w_box_type = w_box_type
         self.alternate_constructors = alternate_constructors
         self.aliases = aliases
-
-    def malloc(self, length):
-        # XXX find out why test_zjit explodes with tracking of allocations
-        return lltype.malloc(VOID_STORAGE, self.itemtype.get_element_size() * length,
-            zero=True, flavor="raw",
-            track_allocation=False, add_memory_pressure=True
-        )
+        self.fields = fields
+        self.fieldnames = fieldnames
+        self.native = native
 
     @specialize.argtype(1)
     def box(self, value):
         return self.itemtype.box(value)
 
     def coerce(self, space, w_item):
-        return self.itemtype.coerce(space, w_item)
+        return self.itemtype.coerce(space, self, w_item)
 
-    def getitem(self, storage, i):
-        return self.itemtype.read(storage, self.itemtype.get_element_size(), i, 0)
+    def getitem(self, arr, i):
+        return self.itemtype.read(arr, 1, i, 0)
 
-    def getitem_bool(self, storage, i):
-        isize = self.itemtype.get_element_size()
-        return self.itemtype.read_bool(storage, isize, i, 0)
+    def getitem_bool(self, arr, i):
+        return self.itemtype.read_bool(arr, 1, i, 0)
 
-    def setitem(self, storage, i, box):
-        self.itemtype.store(storage, self.itemtype.get_element_size(), i, 0, box)
+    def setitem(self, arr, i, box):
+        self.itemtype.store(arr, 1, i, 0, box)
 
     def fill(self, storage, box, start, stop):
-        self.itemtype.fill(storage, self.itemtype.get_element_size(), box, start, stop, 0)
-
-    def descr__new__(space, w_subtype, w_dtype):
-        cache = get_dtype_cache(space)
-
-        if space.is_w(w_dtype, space.w_None):
-            return cache.w_float64dtype
-        elif space.isinstance_w(w_dtype, w_subtype):
-            return w_dtype
-        elif space.isinstance_w(w_dtype, space.w_str):
-            name = space.str_w(w_dtype)
-            for dtype in cache.builtin_dtypes:
-                if dtype.name == name or dtype.char == name or name in dtype.aliases:
-                    return dtype
-        else:
-            for dtype in cache.builtin_dtypes:
-                if w_dtype in dtype.alternate_constructors:
-                    return dtype
-                if w_dtype is dtype.w_box_type:
-                    return dtype
-        raise OperationError(space.w_TypeError, space.wrap("data type not understood"))
+        self.itemtype.fill(storage, self.get_size(), box, start, stop, 0)
 
     def descr_str(self, space):
         return space.wrap(self.name)
@@ -86,6 +64,14 @@
     def descr_get_itemsize(self, space):
         return space.wrap(self.itemtype.get_element_size())
 
+    def descr_get_byteorder(self, space):
+        if self.native:
+            return space.wrap('=')
+        return space.wrap(nonnative_byteorder_prefix)
+
+    def descr_get_alignment(self, space):
+        return space.wrap(self.itemtype.alignment)
+
     def descr_get_shape(self, space):
         return space.newtuple([])
 
@@ -99,31 +85,193 @@
     def descr_ne(self, space, w_other):
         return space.wrap(not self.eq(space, w_other))
 
+    def descr_get_fields(self, space):
+        if self.fields is None:
+            return space.w_None
+        w_d = space.newdict()
+        for name, (offset, subdtype) in self.fields.iteritems():
+            space.setitem(w_d, space.wrap(name), space.newtuple([subdtype,
+                                                                 space.wrap(offset)]))
+        return w_d
+
+    def descr_get_names(self, space):
+        if self.fieldnames is None:
+            return space.w_None
+        return space.newtuple([space.wrap(name) for name in self.fieldnames])
+
+    @unwrap_spec(item=str)
+    def descr_getitem(self, space, item):
+        if self.fields is None:
+            raise OperationError(space.w_KeyError, space.wrap("There are no keys in dtypes %s" % self.name))
+        try:
+            return self.fields[item][1]
+        except KeyError:
+            raise OperationError(space.w_KeyError, space.wrap("Field named %s not found" % item))
+
     def is_int_type(self):
         return (self.kind == SIGNEDLTR or self.kind == UNSIGNEDLTR or
                 self.kind == BOOLLTR)
 
+    def is_signed(self):
+        return self.kind == SIGNEDLTR
+
     def is_bool_type(self):
         return self.kind == BOOLLTR
 
+    def is_record_type(self):
+        return self.fields is not None
+
+    def __repr__(self):
+        if self.fields is not None:
+            return '<DType %r>' % self.fields
+        return '<DType %r>' % self.itemtype
+
+    def get_size(self):
+        return self.itemtype.get_element_size()
+
+def dtype_from_list(space, w_lst):
+    lst_w = space.listview(w_lst)
+    fields = {}
+    offset = 0
+    ofs_and_items = []
+    fieldnames = []
+    for w_elem in lst_w:
+        w_fldname, w_flddesc = space.fixedview(w_elem, 2)
+        subdtype = descr__new__(space, space.gettypefor(W_Dtype), w_flddesc)
+        fldname = space.str_w(w_fldname)
+        if fldname in fields:
+            raise OperationError(space.w_ValueError, space.wrap("two fields with the same name"))
+        assert isinstance(subdtype, W_Dtype)
+        fields[fldname] = (offset, subdtype)
+        ofs_and_items.append((offset, subdtype.itemtype))
+        offset += subdtype.itemtype.get_element_size()
+        fieldnames.append(fldname)
+    itemtype = types.RecordType(ofs_and_items, offset)
+    return W_Dtype(itemtype, 20, VOIDLTR, "void" + str(8 * itemtype.get_element_size()),
+                   "V", space.gettypefor(interp_boxes.W_VoidBox), fields=fields,
+                   fieldnames=fieldnames)
+
+def dtype_from_dict(space, w_dict):
+    raise OperationError(space.w_NotImplementedError, space.wrap(
+        "dtype from dict"))
+
+def variable_dtype(space, name):
+    if name[0] in '<>=':
+        name = name[1:]
+    char = name[0]
+    if len(name) == 1:
+        size = 0
+    else:
+        try:
+            size = int(name[1:])
+        except ValueError:
+            raise OperationError(space.w_TypeError, space.wrap("data type not understood"))
+    if char == 'S':
+        itemtype = types.StringType(size)
+        basename = 'string'
+        num = 18
+        w_box_type = space.gettypefor(interp_boxes.W_StringBox)
+    elif char == 'V':
+        num = 20
+        basename = 'void'
+        w_box_type = space.gettypefor(interp_boxes.W_VoidBox)
+        raise OperationError(space.w_NotImplementedError, space.wrap(
+            "pure void dtype"))
+    else:
+        assert char == 'U'
+        basename = 'unicode'
+        itemtype = types.UnicodeType(size)
+        num = 19
+        w_box_type = space.gettypefor(interp_boxes.W_UnicodeBox)
+    return W_Dtype(itemtype, num, char,
+                   basename + str(8 * itemtype.get_element_size()),
+                   char, w_box_type)
+
+def dtype_from_spec(space, name):
+        raise OperationError(space.w_NotImplementedError, space.wrap(
+            "dtype from spec"))    
+
+def descr__new__(space, w_subtype, w_dtype):
+    cache = get_dtype_cache(space)
+
+    if space.is_w(w_dtype, space.w_None):
+        return cache.w_float64dtype
+    elif space.isinstance_w(w_dtype, w_subtype):
+        return w_dtype
+    elif space.isinstance_w(w_dtype, space.w_str):
+        name = space.str_w(w_dtype)
+        if ',' in name:
+            return dtype_from_spec(space, name)
+        try:
+            return cache.dtypes_by_name[name]
+        except KeyError:
+            pass
+        if name[0] in 'VSU' or name[0] in '<>=' and name[1] in 'VSU':
+            return variable_dtype(space, name)
+    elif space.isinstance_w(w_dtype, space.w_list):
+        return dtype_from_list(space, w_dtype)
+    elif space.isinstance_w(w_dtype, space.w_dict):
+        return dtype_from_dict(space, w_dtype)
+    else:
+        for dtype in cache.builtin_dtypes:
+            if w_dtype in dtype.alternate_constructors:
+                return dtype
+            if w_dtype is dtype.w_box_type:
+                return dtype
+    raise OperationError(space.w_TypeError, space.wrap("data type not understood"))
+
 W_Dtype.typedef = TypeDef("dtype",
     __module__ = "numpypy",
-    __new__ = interp2app(W_Dtype.descr__new__.im_func),
+    __new__ = interp2app(descr__new__),
 
     __str__= interp2app(W_Dtype.descr_str),
     __repr__ = interp2app(W_Dtype.descr_repr),
     __eq__ = interp2app(W_Dtype.descr_eq),
     __ne__ = interp2app(W_Dtype.descr_ne),
+    __getitem__ = interp2app(W_Dtype.descr_getitem),
 
     num = interp_attrproperty("num", cls=W_Dtype),
     kind = interp_attrproperty("kind", cls=W_Dtype),
+    char = interp_attrproperty("char", cls=W_Dtype),
     type = interp_attrproperty_w("w_box_type", cls=W_Dtype),
+    byteorder = GetSetProperty(W_Dtype.descr_get_byteorder),
     itemsize = GetSetProperty(W_Dtype.descr_get_itemsize),
+    alignment = GetSetProperty(W_Dtype.descr_get_alignment),
     shape = GetSetProperty(W_Dtype.descr_get_shape),
     name = interp_attrproperty('name', cls=W_Dtype),
+    fields = GetSetProperty(W_Dtype.descr_get_fields),
+    names = GetSetProperty(W_Dtype.descr_get_names),
 )
 W_Dtype.typedef.acceptable_as_base_class = False
 
+if sys.byteorder == 'little':
+    byteorder_prefix = '<'
+    nonnative_byteorder_prefix = '>'
+else:
+    byteorder_prefix = '>'
+    nonnative_byteorder_prefix = '<'
+
+def new_string_dtype(space, size):
+    return W_Dtype(
+        types.StringType(size),
+        num=18,
+        kind=STRINGLTR,
+        name='string',
+        char='S' + str(size),
+        w_box_type = space.gettypefor(interp_boxes.W_StringBox),
+    )
+
+def new_unicode_dtype(space, size):
+    return W_Dtype(
+        types.UnicodeType(size),
+        num=19,
+        kind=UNICODELTR,
+        name='unicode',
+        char='U' + str(size),
+        w_box_type = space.gettypefor(interp_boxes.W_UnicodeBox),
+    )
+
+
 class DtypeCache(object):
     def __init__(self, space):
         self.w_booldtype = W_Dtype(
@@ -211,7 +359,6 @@
             name="int64",
             char="q",
             w_box_type=space.gettypefor(interp_boxes.W_Int64Box),
-            alternate_constructors=[space.w_long],
         )
         self.w_uint64dtype = W_Dtype(
             types.UInt64(),
@@ -239,18 +386,149 @@
             alternate_constructors=[space.w_float],
             aliases=["float"],
         )
-
+        self.w_longlongdtype = W_Dtype(
+            types.Int64(),
+            num=9,
+            kind=SIGNEDLTR,
+            name='int64',
+            char='q',
+            w_box_type = space.gettypefor(interp_boxes.W_LongLongBox),
+            alternate_constructors=[space.w_long],
+        )
+        self.w_ulonglongdtype = W_Dtype(
+            types.UInt64(),
+            num=10,
+            kind=UNSIGNEDLTR,
+            name='uint64',
+            char='Q',
+            w_box_type = space.gettypefor(interp_boxes.W_ULongLongBox),
+        )
+        self.w_stringdtype = W_Dtype(
+            types.StringType(1),
+            num=18,
+            kind=STRINGLTR,
+            name='string',
+            char='S',
+            w_box_type = space.gettypefor(interp_boxes.W_StringBox),
+            alternate_constructors=[space.w_str],
+        )
+        self.w_unicodedtype = W_Dtype(
+            types.UnicodeType(1),
+            num=19,
+            kind=UNICODELTR,
+            name='unicode',
+            char='U',
+            w_box_type = space.gettypefor(interp_boxes.W_UnicodeBox),
+            alternate_constructors=[space.w_unicode],
+        )
+        self.w_voiddtype = W_Dtype(
+            types.VoidType(0),
+            num=20,
+            kind=VOIDLTR,
+            name='void',
+            char='V',
+            w_box_type = space.gettypefor(interp_boxes.W_VoidBox),
+            #alternate_constructors=[space.w_buffer],
+            # XXX no buffer in space
+        )
         self.builtin_dtypes = [
             self.w_booldtype, self.w_int8dtype, self.w_uint8dtype,
             self.w_int16dtype, self.w_uint16dtype, self.w_int32dtype,
             self.w_uint32dtype, self.w_longdtype, self.w_ulongdtype,
-            self.w_int64dtype, self.w_uint64dtype, self.w_float32dtype,
-            self.w_float64dtype
+            self.w_longlongdtype, self.w_ulonglongdtype,
+            self.w_float32dtype,
+            self.w_float64dtype, self.w_stringdtype, self.w_unicodedtype,
+            self.w_voiddtype,
         ]
         self.dtypes_by_num_bytes = sorted(
             (dtype.itemtype.get_element_size(), dtype)
             for dtype in self.builtin_dtypes
         )
+        self.dtypes_by_name = {}
+        for dtype in self.builtin_dtypes:
+            self.dtypes_by_name[dtype.name] = dtype
+            can_name = dtype.kind + str(dtype.itemtype.get_element_size())
+            self.dtypes_by_name[can_name] = dtype
+            self.dtypes_by_name[byteorder_prefix + can_name] = dtype
+            self.dtypes_by_name['=' + can_name] = dtype
+            new_name = nonnative_byteorder_prefix + can_name
+            itemtypename = dtype.itemtype.__class__.__name__
+            itemtype = getattr(types, 'NonNative' + itemtypename)()
+            self.dtypes_by_name[new_name] = W_Dtype(
+                itemtype,
+                dtype.num, dtype.kind, new_name, dtype.char, dtype.w_box_type,
+                native=False)
+            for alias in dtype.aliases:
+                self.dtypes_by_name[alias] = dtype
+            self.dtypes_by_name[dtype.char] = dtype
+
+        typeinfo_full = {
+            'LONGLONG': self.w_int64dtype,
+            'SHORT': self.w_int16dtype,
+            'VOID': self.w_voiddtype,
+            #'LONGDOUBLE':,
+            'UBYTE': self.w_uint8dtype,
+            'UINTP': self.w_ulongdtype,
+            'ULONG': self.w_ulongdtype,
+            'LONG': self.w_longdtype,
+            'UNICODE': self.w_unicodedtype,
+            #'OBJECT',
+            'ULONGLONG': self.w_ulonglongdtype,
+            'STRING': self.w_stringdtype,
+            #'CDOUBLE',
+            #'DATETIME',
+            'UINT': self.w_uint32dtype,
+            'INTP': self.w_longdtype,
+            #'HALF',
+            'BYTE': self.w_int8dtype,
+            #'CFLOAT': ,
+            #'TIMEDELTA',
+            'INT': self.w_int32dtype,
+            'DOUBLE': self.w_float64dtype,
+            'USHORT': self.w_uint16dtype,
+            'FLOAT': self.w_float32dtype,
+            'BOOL': self.w_booldtype,
+            #, 'CLONGDOUBLE']
+        }
+        typeinfo_partial = {
+            'Generic': interp_boxes.W_GenericBox,
+            'Character': interp_boxes.W_CharacterBox,
+            'Flexible': interp_boxes.W_FlexibleBox,
+            'Inexact': interp_boxes.W_InexactBox,
+            'Integer': interp_boxes.W_IntegerBox,
+            'SignedInteger': interp_boxes.W_SignedIntegerBox,
+            'UnsignedInteger': interp_boxes.W_UnsignedIntegerBox,
+            #'ComplexFloating',
+            'Number': interp_boxes.W_NumberBox,
+            'Floating': interp_boxes.W_FloatingBox
+        }
+        w_typeinfo = space.newdict()
+        for k, v in typeinfo_partial.iteritems():
+            space.setitem(w_typeinfo, space.wrap(k), space.gettypefor(v))
+        for k, dtype in typeinfo_full.iteritems():
+            itemsize = dtype.itemtype.get_element_size()
+            items_w = [space.wrap(dtype.char),
+                       space.wrap(dtype.num),
+                       space.wrap(itemsize * 8), # in case of changing
+                       # number of bits per byte in the future
+                       space.wrap(itemsize or 1)]
+            if dtype.is_int_type():
+                if dtype.kind == BOOLLTR:
+                    w_maxobj = space.wrap(1)
+                    w_minobj = space.wrap(0)
+                elif dtype.is_signed():
+                    w_maxobj = space.wrap(r_longlong((1 << (itemsize*8 - 1))
+                                          - 1))
+                    w_minobj = space.wrap(r_longlong(-1) << (itemsize*8 - 1))
+                else:
+                    w_maxobj = space.wrap(r_ulonglong(1 << (itemsize*8)) - 1)
+                    w_minobj = space.wrap(0)
+                items_w = items_w + [w_maxobj, w_minobj]
+            items_w = items_w + [dtype.w_box_type]
+                       
+            w_tuple = space.newtuple(items_w)
+            space.setitem(w_typeinfo, space.wrap(k), w_tuple)
+        self.w_typeinfo = w_typeinfo
 
 def get_dtype_cache(space):
     return space.fromcache(DtypeCache)
diff --git a/pypy/module/micronumpy/interp_iter.py b/pypy/module/micronumpy/interp_iter.py
--- a/pypy/module/micronumpy/interp_iter.py
+++ b/pypy/module/micronumpy/interp_iter.py
@@ -2,7 +2,7 @@
 from pypy.rlib import jit
 from pypy.rlib.objectmodel import instantiate
 from pypy.module.micronumpy.strides import calculate_broadcast_strides,\
-     calculate_slice_strides, calculate_dot_strides
+     calculate_slice_strides, calculate_dot_strides, enumerate_chunks
 
 """ This is a mini-tutorial on iterators, strides, and
 memory layout. It assumes you are familiar with the terms, see
@@ -42,28 +42,81 @@
 we can go faster.
 All the calculations happen in next()
 
-next_step_x() tries to do the iteration for a number of steps at once,
+next_skip_x() tries to do the iteration for a number of steps at once,
 but then we cannot gaurentee that we only overflow one single shape 
 dimension, perhaps we could overflow times in one big step.
 """
 
 # structures to describe slicing
 
-class Chunk(object):
+class BaseChunk(object):
+    pass
+
+class RecordChunk(BaseChunk):
+    def __init__(self, name):
+        self.name = name
+
+    def apply(self, arr):
+        from pypy.module.micronumpy.interp_numarray import W_NDimSlice
+
+        arr = arr.get_concrete()
+        ofs, subdtype = arr.dtype.fields[self.name]
+        # strides backstrides are identical, ofs only changes start
+        return W_NDimSlice(arr.start + ofs, arr.strides[:], arr.backstrides[:],
+                           arr.shape[:], arr, subdtype)
+
+class Chunks(BaseChunk):
+    def __init__(self, l):
+        self.l = l
+
+    @jit.unroll_safe
+    def extend_shape(self, old_shape):
+        shape = []
+        i = -1
+        for i, c in enumerate_chunks(self.l):
+            if c.step != 0:
+                shape.append(c.lgt)
+        s = i + 1
+        assert s >= 0
+        return shape[:] + old_shape[s:]
+
+    def apply(self, arr):
+        from pypy.module.micronumpy.interp_numarray import W_NDimSlice,\
+             VirtualSlice, ConcreteArray
+
+        shape = self.extend_shape(arr.shape)
+        if not isinstance(arr, ConcreteArray):
+            return VirtualSlice(arr, self, shape)
+        r = calculate_slice_strides(arr.shape, arr.start, arr.strides,
+                                    arr.backstrides, self.l)
+        _, start, strides, backstrides = r
+        return W_NDimSlice(start, strides[:], backstrides[:],
+                           shape[:], arr)
+
+
+class Chunk(BaseChunk):
+    axis_step = 1
+
     def __init__(self, start, stop, step, lgt):
         self.start = start
         self.stop = stop
         self.step = step
         self.lgt = lgt
 
-    def extend_shape(self, shape):
-        if self.step != 0:
-            shape.append(self.lgt)
-
     def __repr__(self):
         return 'Chunk(%d, %d, %d, %d)' % (self.start, self.stop, self.step,
                                           self.lgt)
 
+class NewAxisChunk(Chunk):
+    start = 0
+    stop = 1
+    step = 1
+    lgt = 1
+    axis_step = 0
+
+    def __init__(self):
+        pass
+
 class BaseTransform(object):
     pass
 
@@ -95,17 +148,19 @@
         raise NotImplementedError
 
 class ArrayIterator(BaseIterator):
-    def __init__(self, size):
+    def __init__(self, size, element_size):
         self.offset = 0
         self.size = size
+        self.element_size = element_size
 
     def next(self, shapelen):
         return self.next_skip_x(1)
 
-    def next_skip_x(self, ofs):
+    def next_skip_x(self, x):
         arr = instantiate(ArrayIterator)
         arr.size = self.size
-        arr.offset = self.offset + ofs
+        arr.offset = self.offset + x * self.element_size
+        arr.element_size = self.element_size
         return arr
 
     def next_no_increase(self, shapelen):
@@ -152,7 +207,7 @@
         elif isinstance(t, ViewTransform):
             r = calculate_slice_strides(self.res_shape, self.offset,
                                         self.strides,
-                                        self.backstrides, t.chunks)
+                                        self.backstrides, t.chunks.l)
             return ViewIterator(r[1], r[2], r[3], r[0])
 
     @jit.unroll_safe
diff --git a/pypy/module/micronumpy/interp_numarray.py b/pypy/module/micronumpy/interp_numarray.py
--- a/pypy/module/micronumpy/interp_numarray.py
+++ b/pypy/module/micronumpy/interp_numarray.py
@@ -7,10 +7,10 @@
 from pypy.module.micronumpy.appbridge import get_appbridge_cache
 from pypy.module.micronumpy.dot import multidim_dot, match_dot_shapes
 from pypy.module.micronumpy.interp_iter import (ArrayIterator,
-    SkipLastAxisIterator, Chunk, ViewIterator)
-from pypy.module.micronumpy.strides import (calculate_slice_strides,
-    shape_agreement, find_shape_and_elems, get_shape_from_iterable,
-    calc_new_strides, to_coords)
+    SkipLastAxisIterator, Chunk, ViewIterator, Chunks, RecordChunk,
+    NewAxisChunk)
+from pypy.module.micronumpy.strides import (shape_agreement,
+    find_shape_and_elems, get_shape_from_iterable, calc_new_strides, to_coords)
 from pypy.rlib import jit
 from pypy.rlib.rstring import StringBuilder
 from pypy.rpython.lltypesystem import lltype, rffi
@@ -47,7 +47,7 @@
 )
 flat_set_driver = jit.JitDriver(
     greens=['shapelen', 'base'],
-    reds=['step', 'ai', 'lngth', 'arr', 'basei'],
+    reds=['step', 'lngth', 'ri', 'arr', 'basei'],
     name='numpy_flatset',
 )
 
@@ -79,8 +79,8 @@
         dtype = space.interp_w(interp_dtype.W_Dtype,
             space.call_function(space.gettypefor(interp_dtype.W_Dtype), w_dtype)
         )
-        size, shape = _find_size_and_shape(space, w_size)
-        return space.wrap(W_NDimArray(size, shape[:], dtype=dtype))
+        shape = _find_shape(space, w_size)
+        return space.wrap(W_NDimArray(shape[:], dtype=dtype))
 
     def _unaryop_impl(ufunc_name):
         def impl(self, space):
@@ -225,8 +225,7 @@
             return scalar_w(space, dtype, space.wrap(0))
         # Do the dims match?
         out_shape, other_critical_dim = match_dot_shapes(space, self, other)
-        out_size = support.product(out_shape)
-        result = W_NDimArray(out_size, out_shape, dtype)
+        result = W_NDimArray(out_shape, dtype)
         # This is the place to add fpypy and blas
         return multidim_dot(space, self.get_concrete(),
                             other.get_concrete(), result, dtype,
@@ -245,7 +244,7 @@
         return space.wrap(self.find_dtype().itemtype.get_element_size())
 
     def descr_get_nbytes(self, space):
-        return space.wrap(self.size * self.find_dtype().itemtype.get_element_size())
+        return space.wrap(self.size)
 
     @jit.unroll_safe
     def descr_get_shape(self, space):
@@ -253,13 +252,16 @@
 
     def descr_set_shape(self, space, w_iterable):
         new_shape = get_shape_from_iterable(space,
-                            self.size, w_iterable)
+                            support.product(self.shape), w_iterable)
         if isinstance(self, Scalar):
             return
         self.get_concrete().setshape(space, new_shape)
 
     def descr_get_size(self, space):
-        return space.wrap(self.size)
+        return space.wrap(self.get_size())
+
+    def get_size(self):
+        return self.size // self.find_dtype().get_size()
 
     def descr_copy(self, space):
         return self.copy(space)
@@ -279,7 +281,7 @@
 
     def empty_copy(self, space, dtype):
         shape = self.shape
-        return W_NDimArray(support.product(shape), shape[:], dtype, 'C')
+        return W_NDimArray(shape[:], dtype, 'C')
 
     def descr_len(self, space):
         if len(self.shape):
@@ -320,7 +322,16 @@
         """ The result of getitem/setitem is a single item if w_idx
         is a list of scalars that match the size of shape
         """
+        if space.isinstance_w(w_idx, space.w_str):
+            return False
         shape_len = len(self.shape)
+        if space.isinstance_w(w_idx, space.w_tuple):
+            for w_item in space.fixedview(w_idx):
+                if (space.isinstance_w(w_item, space.w_slice) or
+                    space.is_w(w_item, space.w_None)):
+                    return False
+        elif space.is_w(w_idx, space.w_None):
+            return False
         if shape_len == 0:
             raise OperationError(space.w_IndexError, space.wrap(
                 "0-d arrays can't be indexed"))
@@ -336,43 +347,55 @@
         if lgt > shape_len:
             raise OperationError(space.w_IndexError,
                                  space.wrap("invalid index"))
-        if lgt < shape_len:
-            return False
-        for w_item in space.fixedview(w_idx):
-            if space.isinstance_w(w_item, space.w_slice):
-                return False
-        return True
+        return lgt == shape_len
 
     @jit.unroll_safe
     def _prepare_slice_args(self, space, w_idx):
+        if space.isinstance_w(w_idx, space.w_str):
+            idx = space.str_w(w_idx)
+            dtype = self.find_dtype()
+            if not dtype.is_record_type() or idx not in dtype.fields:
+                raise OperationError(space.w_ValueError, space.wrap(
+                    "field named %s not defined" % idx))
+            return RecordChunk(idx)
         if (space.isinstance_w(w_idx, space.w_int) or
             space.isinstance_w(w_idx, space.w_slice)):
-            return [Chunk(*space.decode_index4(w_idx, self.shape[0]))]
-        return [Chunk(*space.decode_index4(w_item, self.shape[i])) for i, w_item in
-                enumerate(space.fixedview(w_idx))]
+            return Chunks([Chunk(*space.decode_index4(w_idx, self.shape[0]))])
+        elif space.is_w(w_idx, space.w_None):
+            return Chunks([NewAxisChunk()])
+        result = []
+        i = 0
+        for w_item in space.fixedview(w_idx):
+            if space.is_w(w_item, space.w_None):
+                result.append(NewAxisChunk())
+            else:
+                result.append(Chunk(*space.decode_index4(w_item,
+                                                         self.shape[i])))
+                i += 1
+        return Chunks(result)
 
-    def count_all_true(self, arr):
-        sig = arr.find_sig()
-        frame = sig.create_frame(arr)
-        shapelen = len(arr.shape)
+    def count_all_true(self):
+        sig = self.find_sig()
+        frame = sig.create_frame(self)
+        shapelen = len(self.shape)
         s = 0
         iter = None
         while not frame.done():
-            count_driver.jit_merge_point(arr=arr, frame=frame, iter=iter, s=s,
+            count_driver.jit_merge_point(arr=self, frame=frame, iter=iter, s=s,
                                          shapelen=shapelen)
             iter = frame.get_final_iter()
-            s += arr.dtype.getitem_bool(arr.storage, iter.offset)
+            s += self.dtype.getitem_bool(self, iter.offset)
             frame.next(shapelen)
         return s
 
     def getitem_filter(self, space, arr):
         concr = arr.get_concrete()
-        if concr.size > self.size:
+        if concr.get_size() > self.get_size():
             raise OperationError(space.w_IndexError,
                                  space.wrap("index out of range for array"))
-        size = self.count_all_true(concr)
-        res = W_NDimArray(size, [size], self.find_dtype())
-        ri = ArrayIterator(size)
+        size = concr.count_all_true()
+        res = W_NDimArray([size], self.find_dtype())
+        ri = res.create_iter()
         shapelen = len(self.shape)
         argi = concr.create_iter()
         sig = self.find_sig()
@@ -382,7 +405,7 @@
             filter_driver.jit_merge_point(concr=concr, argi=argi, ri=ri,
                                           frame=frame, v=v, res=res, sig=sig,
                                           shapelen=shapelen, self=self)
-            if concr.dtype.getitem_bool(concr.storage, argi.offset):
+            if concr.dtype.getitem_bool(concr, argi.offset):
                 v = sig.eval(frame, self)
                 res.setitem(ri.offset, v)
                 ri = ri.next(1)
@@ -392,23 +415,6 @@
             frame.next(shapelen)
         return res
 
-    def setitem_filter(self, space, idx, val):
-        size = self.count_all_true(idx)
-        arr = SliceArray([size], self.dtype, self, val)
-        sig = arr.find_sig()
-        shapelen = len(self.shape)
-        frame = sig.create_frame(arr)
-        idxi = idx.create_iter()
-        while not frame.done():
-            filter_set_driver.jit_merge_point(idx=idx, idxi=idxi, sig=sig,
-                                              frame=frame, arr=arr,
-                                              shapelen=shapelen)
-            if idx.dtype.getitem_bool(idx.storage, idxi.offset):
-                sig.eval(frame, arr)
-                frame.next_from_second(1)
-            frame.next_first(shapelen)
-            idxi = idxi.next(shapelen)
-
     def descr_getitem(self, space, w_idx):
         if (isinstance(w_idx, BaseArray) and w_idx.shape == self.shape and
             w_idx.find_dtype().is_bool_type()):
@@ -418,7 +424,24 @@
             item = concrete._index_of_single_item(space, w_idx)
             return concrete.getitem(item)
         chunks = self._prepare_slice_args(space, w_idx)
-        return self.create_slice(chunks)
+        return chunks.apply(self)
+
+    def setitem_filter(self, space, idx, val):
+        size = idx.count_all_true()
+        arr = SliceArray([size], self.dtype, self, val)
+        sig = arr.find_sig()
+        shapelen = len(self.shape)
+        frame = sig.create_frame(arr)
+        idxi = idx.create_iter()
+        while not frame.done():
+            filter_set_driver.jit_merge_point(idx=idx, idxi=idxi, sig=sig,
+                                              frame=frame, arr=arr,
+                                              shapelen=shapelen)
+            if idx.dtype.getitem_bool(idx, idxi.offset):
+                sig.eval(frame, arr)
+                frame.next_from_second(1)
+            frame.next_first(shapelen)
+            idxi = idxi.next(shapelen)
 
     def descr_setitem(self, space, w_idx, w_value):
         self.invalidated()
@@ -436,26 +459,9 @@
         if not isinstance(w_value, BaseArray):
             w_value = convert_to_array(space, w_value)
         chunks = self._prepare_slice_args(space, w_idx)
-        view = self.create_slice(chunks).get_concrete()
+        view = chunks.apply(self).get_concrete()
         view.setslice(space, w_value)
 
-    @jit.unroll_safe
-    def create_slice(self, chunks):
-        shape = []
-        i = -1
-        for i, chunk in enumerate(chunks):
-            chunk.extend_shape(shape)
-        s = i + 1
-        assert s >= 0
-        shape += self.shape[s:]
-        if not isinstance(self, ConcreteArray):
-            return VirtualSlice(self, chunks, shape)
-        r = calculate_slice_strides(self.shape, self.start, self.strides,
-                                    self.backstrides, chunks)
-        _, start, strides, backstrides = r
-        return W_NDimSlice(start, strides[:], backstrides[:],
-                           shape[:], self)
-
     def descr_reshape(self, space, args_w):
         """reshape(...)
         a.reshape(shape)
@@ -472,13 +478,16 @@
             w_shape = args_w[0]
         else:
             w_shape = space.newtuple(args_w)
-        new_shape = get_shape_from_iterable(space, self.size, w_shape)
+        new_shape = get_shape_from_iterable(space, support.product(self.shape),
+                                            w_shape)
         return self.reshape(space, new_shape)
 
     def reshape(self, space, new_shape):
         concrete = self.get_concrete()
         # Since we got to here, prod(new_shape) == self.size
-        new_strides = calc_new_strides(new_shape, concrete.shape,
+        new_strides = None
+        if self.size > 0:
+            new_strides = calc_new_strides(new_shape, concrete.shape,
                                      concrete.strides, concrete.order)
         if new_strides:
             # We can create a view, strides somehow match up.
@@ -508,7 +517,7 @@
     def descr_mean(self, space, w_axis=None):
         if space.is_w(w_axis, space.w_None):
             w_axis = space.wrap(-1)
-            w_denom = space.wrap(self.size)
+            w_denom = space.wrap(support.product(self.shape))
         else:
             dim = space.int_w(w_axis)
             w_denom = space.wrap(self.shape[dim])
@@ -527,7 +536,7 @@
         concr.fill(space, w_value)
 
     def descr_nonzero(self, space):
-        if self.size > 1:
+        if self.get_size() > 1:
             raise OperationError(space.w_ValueError, space.wrap(
                 "The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()"))
         concr = self.get_concrete_or_scalar()
@@ -606,8 +615,7 @@
                                  space.wrap("axis unsupported for take"))
         index_i = index.create_iter()
         res_shape = index.shape
-        size = support.product(res_shape)
-        res = W_NDimArray(size, res_shape[:], concr.dtype, concr.order)
+        res = W_NDimArray(res_shape[:], concr.dtype, concr.order)
         res_i = res.create_iter()
         shapelen = len(index.shape)
         sig = concr.find_sig()
@@ -646,6 +654,11 @@
         raise OperationError(space.w_NotImplementedError, space.wrap(
             "non-int arg not supported"))
 
+    def descr_tostring(self, space):
+        ra = ToStringArray(self)
+        loop.compute(ra)
+        return space.wrap(ra.s.build())
+
     def compute_first_step(self, sig, frame):
         pass
 
@@ -667,8 +680,7 @@
     """
     Intermediate class representing a literal.
     """
-    size = 1
-    _attrs_ = ["dtype", "value", "shape"]
+    _attrs_ = ["dtype", "value", "shape", "size"]
 
     def __init__(self, dtype, value):
         self.shape = []
@@ -676,6 +688,7 @@
         self.dtype = dtype
         assert isinstance(value, interp_boxes.W_GenericBox)
         self.value = value
+        self.size = dtype.get_size()
 
     def find_dtype(self):
         return self.dtype
@@ -693,8 +706,7 @@
         return self
 
     def reshape(self, space, new_shape):
-        size = support.product(new_shape)
-        res = W_NDimArray(size, new_shape, self.dtype, 'C')
+        res = W_NDimArray(new_shape, self.dtype, 'C')
         res.setitem(0, self.value)
         return res
 
@@ -707,6 +719,7 @@
         self.forced_result = None
         self.res_dtype = res_dtype
         self.name = name
+        self.size = support.product(self.shape) * res_dtype.get_size()
 
     def _del_sources(self):
         # Function for deleting references to source arrays,
@@ -714,7 +727,7 @@
         raise NotImplementedError
 
     def compute(self):
-        ra = ResultArray(self, self.size, self.shape, self.res_dtype)
+        ra = ResultArray(self, self.shape, self.res_dtype)
         loop.compute(ra)
         return ra.left
 
@@ -742,7 +755,6 @@
     def __init__(self, child, chunks, shape):
         self.child = child
         self.chunks = chunks
-        self.size = support.product(shape)
         VirtualArray.__init__(self, 'slice', shape, child.find_dtype())
 
     def create_sig(self):
@@ -754,7 +766,7 @@
     def force_if_needed(self):
         if self.forced_result is None:
             concr = self.child.get_concrete()
-            self.forced_result = concr.create_slice(self.chunks)
+            self.forced_result = self.chunks.apply(concr)
 
     def _del_sources(self):
         self.child = None
@@ -787,7 +799,6 @@
         self.left = left
         self.right = right
         self.calc_dtype = calc_dtype
-        self.size = support.product(self.shape)
 
     def _del_sources(self):
         self.left = None
@@ -815,15 +826,30 @@
                                self.left.create_sig(), self.right.create_sig())
 
 class ResultArray(Call2):
-    def __init__(self, child, size, shape, dtype, res=None, order='C'):
+    def __init__(self, child, shape, dtype, res=None, order='C'):
         if res is None:
-            res = W_NDimArray(size, shape, dtype, order)
+            res = W_NDimArray(shape, dtype, order)
         Call2.__init__(self, None, 'assign', shape, dtype, dtype, res, child)
 
     def create_sig(self):
         return signature.ResultSignature(self.res_dtype, self.left.create_sig(),
                                          self.right.create_sig())
 
+class ToStringArray(Call1):
+    def __init__(self, child):
+        dtype = child.find_dtype()
+        self.item_size = dtype.itemtype.get_element_size()
+        self.s = StringBuilder(child.size * self.item_size)
+        Call1.__init__(self, None, 'tostring', child.shape, dtype, dtype,
+                       child)
+        self.res = W_NDimArray([1], dtype, 'C')
+        self.res_casted = rffi.cast(rffi.CArrayPtr(lltype.Char),
+                                    self.res.storage)
+
+    def create_sig(self):
+        return signature.ToStringSignature(self.calc_dtype,
+                                           self.values.create_sig())
+
 def done_if_true(dtype, val):
     return dtype.itemtype.bool(val)
 
@@ -895,13 +921,13 @@
     """
     _immutable_fields_ = ['storage']
 
-    def __init__(self, size, shape, dtype, order='C', parent=None):
-        self.size = size
+    def __init__(self, shape, dtype, order='C', parent=None):
         self.parent = parent
+        self.size = support.product(shape) * dtype.get_size()
         if parent is not None:
             self.storage = parent.storage
         else:
-            self.storage = dtype.malloc(size)
+            self.storage = dtype.itemtype.malloc(self.size)
         self.order = order
         self.dtype = dtype
         if self.strides is None:
@@ -920,13 +946,14 @@
         return self.dtype
 
     def getitem(self, item):
-        return self.dtype.getitem(self.storage, item)
+        return self.dtype.getitem(self, item)
 
     def setitem(self, item, value):
         self.invalidated()
-        self.dtype.setitem(self.storage, item, value)
+        self.dtype.setitem(self, item, value)
 
     def calc_strides(self, shape):
+        dtype = self.find_dtype()
         strides = []
         backstrides = []
         s = 1
@@ -934,8 +961,8 @@
         if self.order == 'C':
             shape_rev.reverse()
         for sh in shape_rev:
-            strides.append(s)
-            backstrides.append(s * (sh - 1))
+            strides.append(s * dtype.get_size())
+            backstrides.append(s * (sh - 1) * dtype.get_size())
             s *= sh
         if self.order == 'C':
             strides.reverse()
@@ -983,9 +1010,9 @@
         shapelen = len(self.shape)
         if shapelen == 1:
             rffi.c_memcpy(
-                rffi.ptradd(self.storage, self.start * itemsize),
-                rffi.ptradd(w_value.storage, w_value.start * itemsize),
-                self.size * itemsize
+                rffi.ptradd(self.storage, self.start),
+                rffi.ptradd(w_value.storage, w_value.start),
+                self.size
             )
         else:
             dest = SkipLastAxisIterator(self)
@@ -1000,7 +1027,7 @@
                 dest.next()
 
     def copy(self, space):
-        array = W_NDimArray(self.size, self.shape[:], self.dtype, self.order)
+        array = W_NDimArray(self.shape[:], self.dtype, self.order)
         array.setslice(space, self)
         return array
 
@@ -1014,14 +1041,15 @@
 
 
 class W_NDimSlice(ViewArray):
-    def __init__(self, start, strides, backstrides, shape, parent):
+    def __init__(self, start, strides, backstrides, shape, parent, dtype=None):
         assert isinstance(parent, ConcreteArray)
         if isinstance(parent, W_NDimSlice):
             parent = parent.parent
         self.strides = strides
         self.backstrides = backstrides
-        ViewArray.__init__(self, support.product(shape), shape, parent.dtype,
-                           parent.order, parent)
+        if dtype is None:
+            dtype = parent.dtype
+        ViewArray.__init__(self, shape, dtype, parent.order, parent)
         self.start = start
 
     def create_iter(self, transforms=None):
@@ -1031,18 +1059,19 @@
     def setshape(self, space, new_shape):
         if len(self.shape) < 1:
             return
-        elif len(self.shape) < 2:
+        elif len(self.shape) < 2 or self.size < 1:
             # TODO: this code could be refactored into calc_strides
             # but then calc_strides would have to accept a stepping factor
             strides = []
             backstrides = []
-            s = self.strides[0]
+            dtype = self.find_dtype()
+            s = self.strides[0] // dtype.get_size()
             if self.order == 'C':
                 new_shape.reverse()
             for sh in new_shape:
-                strides.append(s)
-                backstrides.append(s * (sh - 1))
-                s *= sh
+                strides.append(s * dtype.get_size())
+                backstrides.append(s * (sh - 1) * dtype.get_size())
+                s *= max(1, sh)
             if self.order == 'C':
                 strides.reverse()
                 backstrides.reverse()
@@ -1069,14 +1098,16 @@
     """
     def setitem(self, item, value):
         self.invalidated()
-        self.dtype.setitem(self.storage, item, value)
+        self.dtype.setitem(self, item, value)
 
     def setshape(self, space, new_shape):
         self.shape = new_shape
         self.calc_strides(new_shape)
 
     def create_iter(self, transforms=None):
-        return ArrayIterator(self.size).apply_transformations(self, transforms)
+        esize = self.find_dtype().get_size()
+        return ArrayIterator(self.size, esize).apply_transformations(self,
+                                                                     transforms)
 
     def create_sig(self):
         return signature.ArraySignature(self.dtype)
@@ -1084,18 +1115,13 @@
     def __del__(self):
         lltype.free(self.storage, flavor='raw', track_allocation=False)
 
-def _find_size_and_shape(space, w_size):
+def _find_shape(space, w_size):
     if space.isinstance_w(w_size, space.w_int):
-        size = space.int_w(w_size)
-        shape = [size]
-    else:
-        size = 1
-        shape = []
-        for w_item in space.fixedview(w_size):
-            item = space.int_w(w_item)
-            size *= item
-            shape.append(item)
-    return size, shape
+        return [space.int_w(w_size)]
+    shape = []
+    for w_item in space.fixedview(w_size):
+        shape.append(space.int_w(w_item))
+    return shape
 
 @unwrap_spec(subok=bool, copy=bool, ownmaskna=bool)
 def array(space, w_item_or_iterable, w_dtype=None, w_order=None,
@@ -1129,28 +1155,28 @@
         if copy:
             return w_item_or_iterable.copy(space)
         return w_item_or_iterable
-    shape, elems_w = find_shape_and_elems(space, w_item_or_iterable)
+    if w_dtype is None or space.is_w(w_dtype, space.w_None):
+        dtype = None
+    else:
+        dtype = space.interp_w(interp_dtype.W_Dtype,
+           space.call_function(space.gettypefor(interp_dtype.W_Dtype), w_dtype))
+    shape, elems_w = find_shape_and_elems(space, w_item_or_iterable, dtype)
     # they come back in C order
-    size = len(elems_w)
-    if w_dtype is None or space.is_w(w_dtype, space.w_None):
-        w_dtype = None
+    if dtype is None:
         for w_elem in elems_w:
-            w_dtype = interp_ufuncs.find_dtype_for_scalar(space, w_elem,
-                                                          w_dtype)
-            if w_dtype is interp_dtype.get_dtype_cache(space).w_float64dtype:
+            dtype = interp_ufuncs.find_dtype_for_scalar(space, w_elem,
+                                                        dtype)
+            if dtype is interp_dtype.get_dtype_cache(space).w_float64dtype:
                 break
-    if w_dtype is None:
-        w_dtype = space.w_None
-    dtype = space.interp_w(interp_dtype.W_Dtype,
-        space.call_function(space.gettypefor(interp_dtype.W_Dtype), w_dtype)
-    )
-    arr = W_NDimArray(size, shape[:], dtype=dtype, order=order)
+        if dtype is None:
+            dtype = interp_dtype.get_dtype_cache(space).w_float64dtype
+    arr = W_NDimArray(shape[:], dtype=dtype, order=order)
     shapelen = len(shape)
-    arr_iter = ArrayIterator(arr.size)
+    arr_iter = arr.create_iter()
     # XXX we might want to have a jitdriver here
     for i in range(len(elems_w)):
         w_elem = elems_w[i]
-        dtype.setitem(arr.storage, arr_iter.offset,
+        dtype.setitem(arr, arr_iter.offset,
                       dtype.coerce(space, w_elem))
         arr_iter = arr_iter.next(shapelen)
     return arr
@@ -1159,22 +1185,22 @@
     dtype = space.interp_w(interp_dtype.W_Dtype,
         space.call_function(space.gettypefor(interp_dtype.W_Dtype), w_dtype)
     )
-    size, shape = _find_size_and_shape(space, w_size)
+    shape = _find_shape(space, w_size)
     if not shape:
         return scalar_w(space, dtype, space.wrap(0))
-    return space.wrap(W_NDimArray(size, shape[:], dtype=dtype))
+    return space.wrap(W_NDimArray(shape[:], dtype=dtype))
 
 def ones(space, w_size, w_dtype=None):
     dtype = space.interp_w(interp_dtype.W_Dtype,
         space.call_function(space.gettypefor(interp_dtype.W_Dtype), w_dtype)
     )
 
-    size, shape = _find_size_and_shape(space, w_size)
+    shape = _find_shape(space, w_size)
     if not shape:
         return scalar_w(space, dtype, space.wrap(1))
-    arr = W_NDimArray(size, shape[:], dtype=dtype)
+    arr = W_NDimArray(shape[:], dtype=dtype)
     one = dtype.box(1)
-    arr.dtype.fill(arr.storage, one, 0, size)
+    arr.dtype.fill(arr.storage, one, 0, arr.size)
     return space.wrap(arr)
 
 @unwrap_spec(arr=BaseArray, skipna=bool, keepdims=bool)
@@ -1222,13 +1248,13 @@
                     "array dimensions must agree except for axis being concatenated"))
             elif i == axis:
                 shape[i] += axis_size
-    res = W_NDimArray(support.product(shape), shape, dtype, 'C')
+    res = W_NDimArray(shape, dtype, 'C')
     chunks = [Chunk(0, i, 1, i) for i in shape]
     axis_start = 0
     for arr in args_w:
         chunks[axis] = Chunk(axis_start, axis_start + arr.shape[axis], 1,
                              arr.shape[axis])
-        res.create_slice(chunks).setslice(space, arr)
+        Chunks(chunks).apply(res).setslice(space, arr)
         axis_start += arr.shape[axis]
     return res
 
@@ -1316,6 +1342,7 @@
     std = interp2app(BaseArray.descr_std),
 
     fill = interp2app(BaseArray.descr_fill),
+    tostring = interp2app(BaseArray.descr_tostring),
 
     copy = interp2app(BaseArray.descr_copy),
     flatten = interp2app(BaseArray.descr_flatten),
@@ -1338,7 +1365,7 @@
         self.iter = sig.create_frame(arr).get_final_iter()
         self.base = arr
         self.index = 0
-        ViewArray.__init__(self, arr.size, [arr.size], arr.dtype, arr.order,
+        ViewArray.__init__(self, [arr.get_size()], arr.dtype, arr.order,
                            arr)
 
     def descr_next(self, space):
@@ -1353,7 +1380,7 @@
         return self
 
     def descr_len(self, space):
-        return space.wrap(self.size)
+        return space.wrap(self.get_size())
 
     def descr_index(self, space):
         return space.wrap(self.index)
@@ -1371,28 +1398,26 @@
             raise OperationError(space.w_IndexError,
                                  space.wrap('unsupported iterator index'))
         base = self.base
-        start, stop, step, lngth = space.decode_index4(w_idx, base.size)
+        start, stop, step, lngth = space.decode_index4(w_idx, base.get_size())
         # setslice would have been better, but flat[u:v] for arbitrary
         # shapes of array a cannot be represented as a[x1:x2, y1:y2]
         basei = ViewIterator(base.start, base.strides,
-                               base.backstrides,base.shape)
+                             base.backstrides, base.shape)
         shapelen = len(base.shape)
         basei = basei.next_skip_x(shapelen, start)
         if lngth <2:
             return base.getitem(basei.offset)
-        ri = ArrayIterator(lngth)
-        res = W_NDimArray(lngth, [lngth], base.dtype,
-                                    base.order)
+        res = W_NDimArray([lngth], base.dtype, base.order)
+        ri = res.create_iter()
         while not ri.done():
             flat_get_driver.jit_merge_point(shapelen=shapelen,
                                              base=base,
                                              basei=basei,
                                              step=step,
                                              res=res,
-                                             ri=ri,
-                                            )
+                                             ri=ri)
             w_val = base.getitem(basei.offset)
-            res.setitem(ri.offset,w_val)
+            res.setitem(ri.offset, w_val)
             basei = basei.next_skip_x(shapelen, step)
             ri = ri.next(shapelen)
         return res
@@ -1403,27 +1428,28 @@
             raise OperationError(space.w_IndexError,
                                  space.wrap('unsupported iterator index'))
         base = self.base
-        start, stop, step, lngth = space.decode_index4(w_idx, base.size)
+        start, stop, step, lngth = space.decode_index4(w_idx, base.get_size())
         arr = convert_to_array(space, w_value)
-        ai = 0
+        ri = arr.create_iter()
         basei = ViewIterator(base.start, base.strides,
-                               base.backstrides,base.shape)
+                             base.backstrides, base.shape)
         shapelen = len(base.shape)
         basei = basei.next_skip_x(shapelen, start)
         while lngth > 0:
             flat_set_driver.jit_merge_point(shapelen=shapelen,
-                                             basei=basei,
-                                             base=base,
-                                             step=step,
-                                             arr=arr,
-                                             ai=ai,
-                                             lngth=lngth,
-                                            )
-            v = arr.getitem(ai).convert_to(base.dtype)
+                                            basei=basei,
+                                            base=base,
+                                            step=step,
+                                            arr=arr,
+                                            lngth=lngth,
+                                            ri=ri)
+            v = arr.getitem(ri.offset).convert_to(base.dtype)
             base.setitem(basei.offset, v)
             # need to repeat input values until all assignments are done
-            ai = (ai + 1) % arr.size
             basei = basei.next_skip_x(shapelen, step)
+            ri = ri.next(shapelen)
+            # WTF is numpy thinking?
+            ri.offset %= arr.size
             lngth -= 1
 
     def create_sig(self):
@@ -1431,9 +1457,9 @@
 
     def create_iter(self, transforms=None):
         return ViewIterator(self.base.start, self.base.strides,
-                    self.base.backstrides,
-                    self.base.shape).apply_transformations(self.base,
-                                                           transforms)
+                            self.base.backstrides,
+                            self.base.shape).apply_transformations(self.base,
+                                                                   transforms)
 
     def descr_base(self, space):
         return space.wrap(self.base)
diff --git a/pypy/module/micronumpy/interp_support.py b/pypy/module/micronumpy/interp_support.py
--- a/pypy/module/micronumpy/interp_support.py
+++ b/pypy/module/micronumpy/interp_support.py
@@ -51,9 +51,11 @@
         raise OperationError(space.w_ValueError, space.wrap(
             "string is smaller than requested size"))
 
-    a = W_NDimArray(num_items, [num_items], dtype=dtype)
-    for i, val in enumerate(items):
-        a.dtype.setitem(a.storage, i, val)
+    a = W_NDimArray([num_items], dtype=dtype)
+    ai = a.create_iter()
+    for val in items:
+        a.dtype.setitem(a, ai.offset, val)
+        ai = ai.next(1)
     
     return space.wrap(a)
 
@@ -61,6 +63,7 @@
     from pypy.module.micronumpy.interp_numarray import W_NDimArray
     
     itemsize = dtype.itemtype.get_element_size()
+    assert itemsize >= 0
     if count == -1:
         count = length / itemsize
     if length % itemsize != 0:
@@ -71,20 +74,23 @@
         raise OperationError(space.w_ValueError, space.wrap(
             "string is smaller than requested size"))
         
-    a = W_NDimArray(count, [count], dtype=dtype)
-    fromstring_loop(a, count, dtype, itemsize, s)
+    a = W_NDimArray([count], dtype=dtype)
+    fromstring_loop(a, dtype, itemsize, s)
     return space.wrap(a)
 
-fromstring_driver = jit.JitDriver(greens=[], reds=['count', 'i', 'itemsize',
-                                                   'dtype', 's', 'a'])
+fromstring_driver = jit.JitDriver(greens=[], reds=['i', 'itemsize',
+                                                   'dtype', 'ai', 's', 'a'])
 
-def fromstring_loop(a, count, dtype, itemsize, s):
+def fromstring_loop(a, dtype, itemsize, s):
     i = 0
-    while i < count:
-        fromstring_driver.jit_merge_point(a=a, count=count, dtype=dtype,
-                                          itemsize=itemsize, s=s, i=i)
+    ai = a.create_iter()
+    while not ai.done():
+        fromstring_driver.jit_merge_point(a=a, dtype=dtype,
+                                          itemsize=itemsize, s=s, i=i,
+                                          ai=ai)
         val = dtype.itemtype.runpack_str(s[i*itemsize:i*itemsize + itemsize])
-        a.dtype.setitem(a.storage, i, val)
+        a.dtype.setitem(a, ai.offset, val)
+        ai = ai.next(1)
         i += 1
 
 @unwrap_spec(s=str, count=int, sep=str)
diff --git a/pypy/module/micronumpy/interp_ufuncs.py b/pypy/module/micronumpy/interp_ufuncs.py
--- a/pypy/module/micronumpy/interp_ufuncs.py
+++ b/pypy/module/micronumpy/interp_ufuncs.py
@@ -156,7 +156,7 @@
             shape = obj.shape[:dim] + [1] + obj.shape[dim + 1:]
         else:
             shape = obj.shape[:dim] + obj.shape[dim + 1:]
-        result = W_NDimArray(support.product(shape), shape, dtype)
+        result = W_NDimArray(shape, dtype)
         arr = AxisReduce(self.func, self.name, self.identity, obj.shape, dtype,
                          result, obj, dim)
         loop.compute(arr)
@@ -404,6 +404,9 @@
             ("greater_equal", "ge", 2, {"comparison_func": True}),
             ("isnan", "isnan", 1, {"bool_result": True}),
             ("isinf", "isinf", 1, {"bool_result": True}),
+            ("isneginf", "isneginf", 1, {"bool_result": True}),
+            ("isposinf", "isposinf", 1, {"bool_result": True}),
+            ("isfinite", "isfinite", 1, {"bool_result": True}),
 
             ('logical_and', 'logical_and', 2, {'comparison_func': True,
                                                'identity': 1}),
@@ -421,12 +424,16 @@
             ("negative", "neg", 1),
             ("absolute", "abs", 1),
             ("sign", "sign", 1, {"promote_bools": True}),
+            ("signbit", "signbit", 1, {"bool_result": True}),
             ("reciprocal", "reciprocal", 1),
 
             ("fabs", "fabs", 1, {"promote_to_float": True}),
+            ("fmod", "fmod", 2, {"promote_to_float": True}),
             ("floor", "floor", 1, {"promote_to_float": True}),
             ("ceil", "ceil", 1, {"promote_to_float": True}),
             ("exp", "exp", 1, {"promote_to_float": True}),
+            ("exp2", "exp2", 1, {"promote_to_float": True}),
+            ("expm1", "expm1", 1, {"promote_to_float": True}),
 
             ('sqrt', 'sqrt', 1, {'promote_to_float': True}),
 
@@ -436,6 +443,7 @@
             ("arcsin", "arcsin", 1, {"promote_to_float": True}),
             ("arccos", "arccos", 1, {"promote_to_float": True}),
             ("arctan", "arctan", 1, {"promote_to_float": True}),
+            ("arctan2", "arctan2", 2, {"promote_to_float": True}),
             ("sinh", "sinh", 1, {"promote_to_float": True}),
             ("cosh", "cosh", 1, {"promote_to_float": True}),
             ("tanh", "tanh", 1, {"promote_to_float": True}),
@@ -450,6 +458,8 @@
             ("log2", "log2", 1, {"promote_to_float": True}),
             ("log10", "log10", 1, {"promote_to_float": True}),
             ("log1p", "log1p", 1, {"promote_to_float": True}),
+            ("logaddexp", "logaddexp", 2, {"promote_to_float": True}),
+            ("logaddexp2", "logaddexp2", 2, {"promote_to_float": True}),
         ]:
             self.add_ufunc(space, *ufunc_def)
 
diff --git a/pypy/module/micronumpy/signature.py b/pypy/module/micronumpy/signature.py
--- a/pypy/module/micronumpy/signature.py
+++ b/pypy/module/micronumpy/signature.py
@@ -4,6 +4,7 @@
      ViewTransform, BroadcastTransform
 from pypy.tool.pairtype import extendabletype
 from pypy.module.micronumpy.loop import ComputationDone
+from pypy.rlib import jit
 
 """ Signature specifies both the numpy expression that has been constructed
 and the assembler to be compiled. This is a very important observation -
@@ -142,11 +143,10 @@
         from pypy.module.micronumpy.interp_numarray import ConcreteArray
         concr = arr.get_concrete()
         assert isinstance(concr, ConcreteArray)
-        storage = concr.storage
         if self.iter_no >= len(iterlist):
             iterlist.append(concr.create_iter(transforms))
         if self.array_no >= len(arraylist):
-            arraylist.append(storage)
+            arraylist.append(concr)
 
     def eval(self, frame, arr):
         iter = frame.iterators[self.iter_no]
@@ -318,6 +318,20 @@
         offset = frame.get_final_iter().offset
         arr.left.setitem(offset, self.right.eval(frame, arr.right))
 
+class ToStringSignature(Call1):
+    def __init__(self, dtype, child):
+        Call1.__init__(self, None, 'tostring', dtype, child)
+
+    @jit.unroll_safe
+    def eval(self, frame, arr):
+        from pypy.module.micronumpy.interp_numarray import ToStringArray
+
+        assert isinstance(arr, ToStringArray)
+        arr.res.setitem(0, self.child.eval(frame, arr.values).convert_to(
+            self.dtype))
+        for i in range(arr.item_size):
+            arr.s.append(arr.res_casted[i])
+
 class BroadcastLeft(Call2):
     def _invent_numbering(self, cache, allnumbers):
         self.left._invent_numbering(new_cache(), allnumbers)
diff --git a/pypy/module/micronumpy/strides.py b/pypy/module/micronumpy/strides.py
--- a/pypy/module/micronumpy/strides.py
+++ b/pypy/module/micronumpy/strides.py
@@ -1,6 +1,14 @@
 from pypy.rlib import jit
 from pypy.interpreter.error import OperationError
 
+def enumerate_chunks(chunks):
+    result = []
+    i = -1
+    for chunk in chunks:
+        i += chunk.axis_step
+        result.append((i, chunk))
+    return result
+
 @jit.look_inside_iff(lambda shape, start, strides, backstrides, chunks:
     jit.isconstant(len(chunks))
 )
@@ -10,7 +18,7 @@
     rstart = start
     rshape = []
     i = -1
-    for i, chunk in enumerate(chunks):
+    for i, chunk in enumerate_chunks(chunks):
         if chunk.step != 0:
             rstrides.append(strides[i] * chunk.step)
             rbackstrides.append(strides[i] * (chunk.lgt - 1) * chunk.step)
@@ -38,22 +46,31 @@
     rbackstrides = [0] * (len(res_shape) - len(orig_shape)) + rbackstrides
     return rstrides, rbackstrides
 
-def find_shape_and_elems(space, w_iterable):
+def is_single_elem(space, w_elem, is_rec_type):
+    if (is_rec_type and space.isinstance_w(w_elem, space.w_tuple)):
+        return True
+    if space.issequence_w(w_elem):
+        return False
+    return True
+
+def find_shape_and_elems(space, w_iterable, dtype):
     shape = [space.len_w(w_iterable)]
     batch = space.listview(w_iterable)
+    is_rec_type = dtype is not None and dtype.is_record_type()
     while True:
         new_batch = []
         if not batch:
             return shape, []
-        if not space.issequence_w(batch[0]):
-            for elem in batch:
-                if space.issequence_w(elem):
+        if is_single_elem(space, batch[0], is_rec_type):
+            for w_elem in batch:
+                if not is_single_elem(space, w_elem, is_rec_type):
                     raise OperationError(space.w_ValueError, space.wrap(
                         "setting an array element with a sequence"))
             return shape, batch
         size = space.len_w(batch[0])
         for w_elem in batch:
-            if not space.issequence_w(w_elem) or space.len_w(w_elem) != size:
+            if (is_single_elem(space, w_elem, is_rec_type) or
+                space.len_w(w_elem) != size):
                 raise OperationError(space.w_ValueError, space.wrap(
                     "setting an array element with a sequence"))
             new_batch += space.listview(w_elem)
diff --git a/pypy/module/micronumpy/test/test_base.py b/pypy/module/micronumpy/test/test_base.py
--- a/pypy/module/micronumpy/test/test_base.py
+++ b/pypy/module/micronumpy/test/test_base.py
@@ -4,6 +4,8 @@
 from pypy.module.micronumpy.interp_ufuncs import (find_binop_result_dtype,
         find_unaryop_result_dtype)
 from pypy.module.micronumpy.interp_boxes import W_Float64Box
+from pypy.module.micronumpy.interp_dtype import nonnative_byteorder_prefix,\
+     byteorder_prefix
 from pypy.conftest import option
 import sys
 
@@ -15,14 +17,16 @@
                 sys.modules['numpypy'] = numpy
                 sys.modules['_numpypy'] = numpy
         cls.space = gettestobjspace(usemodules=['micronumpy'])
+        cls.w_non_native_prefix = cls.space.wrap(nonnative_byteorder_prefix)
+        cls.w_native_prefix = cls.space.wrap(byteorder_prefix)
 
 class TestSignature(object):
     def test_binop_signature(self, space):
         float64_dtype = get_dtype_cache(space).w_float64dtype
         bool_dtype = get_dtype_cache(space).w_booldtype
 
-        ar = W_NDimArray(10, [10], dtype=float64_dtype)
-        ar2 = W_NDimArray(10, [10], dtype=float64_dtype)
+        ar = W_NDimArray([10], dtype=float64_dtype)
+        ar2 = W_NDimArray([10], dtype=float64_dtype)
         v1 = ar.descr_add(space, ar)
         v2 = ar.descr_add(space, Scalar(float64_dtype, W_Float64Box(2.0)))
         sig1 = v1.find_sig()
@@ -40,7 +44,7 @@
         v4 = ar.descr_add(space, ar)
         assert v1.find_sig() is v4.find_sig()
 
-        bool_ar = W_NDimArray(10, [10], dtype=bool_dtype)
+        bool_ar = W_NDimArray([10], dtype=bool_dtype)
         v5 = ar.descr_add(space, bool_ar)
         assert v5.find_sig() is not v1.find_sig()
         assert v5.find_sig() is not v2.find_sig()
@@ -57,7 +61,7 @@
     def test_slice_signature(self, space):
         float64_dtype = get_dtype_cache(space).w_float64dtype
 
-        ar = W_NDimArray(10, [10], dtype=float64_dtype)
+        ar = W_NDimArray([10], dtype=float64_dtype)
         v1 = ar.descr_getitem(space, space.wrap(slice(1, 3, 1)))
         v2 = ar.descr_getitem(space, space.wrap(slice(4, 6, 1)))
         assert v1.find_sig() is v2.find_sig()
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -1,5 +1,7 @@
+import py
+from pypy.conftest import option
 from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
-
+from pypy.interpreter.gateway import interp2app
 
 class AppTestDtypes(BaseNumpyAppTest):
     def test_dtype(self):
@@ -12,7 +14,10 @@
         assert dtype(d) is d
         assert dtype(None) is dtype(float)
         assert dtype('int8').name == 'int8'
+        assert dtype(int).fields is None
+        assert dtype(int).names is None
         raises(TypeError, dtype, 1042)
+        raises(KeyError, 'dtype(int)["asdasd"]')
 
     def test_dtype_eq(self):
         from _numpypy import dtype
@@ -53,13 +58,13 @@
             assert a[i] is True_
 
     def test_copy_array_with_dtype(self):
-        from _numpypy import array, False_, True_, int64
+        from _numpypy import array, False_, longlong
 
         a = array([0, 1, 2, 3], dtype=long)
         # int on 64-bit, long in 32-bit
-        assert isinstance(a[0], int64)
+        assert isinstance(a[0], longlong)
         b = a.copy()
-        assert isinstance(b[0], int64)
+        assert isinstance(b[0], longlong)
 
         a = array([0, 1, 2, 3], dtype=bool)
         assert a[0] is False_
@@ -81,17 +86,17 @@
             assert a[i] is True_
 
     def test_zeros_long(self):
-        from _numpypy import zeros, int64
+        from _numpypy import zeros, longlong
         a = zeros(10, dtype=long)
         for i in range(10):
-            assert isinstance(a[i], int64)
+            assert isinstance(a[i], longlong)
             assert a[1] == 0
 
     def test_ones_long(self):
-        from _numpypy import ones, int64
+        from _numpypy import ones, longlong
         a = ones(10, dtype=long)
         for i in range(10):
-            assert isinstance(a[i], int64)
+            assert isinstance(a[i], longlong)
             assert a[1] == 1
 
     def test_overflow(self):
@@ -181,17 +186,18 @@
         assert dtype("float") is dtype(float)
 
 
-class AppTestTypes(BaseNumpyAppTest):
+class AppTestTypes(BaseNumpyAppTest):    
     def test_abstract_types(self):
         import _numpypy as numpy
         raises(TypeError, numpy.generic, 0)
         raises(TypeError, numpy.number, 0)
         raises(TypeError, numpy.integer, 0)
         exc = raises(TypeError, numpy.signedinteger, 0)
-        assert str(exc.value) == "cannot create 'signedinteger' instances"
+        assert 'cannot create' in str(exc.value)
+        assert 'signedinteger' in str(exc.value)
         exc = raises(TypeError, numpy.unsignedinteger, 0)
-        assert str(exc.value) == "cannot create 'unsignedinteger' instances"
-
+        assert 'cannot create' in str(exc.value)
+        assert 'unsignedinteger' in str(exc.value)
         raises(TypeError, numpy.floating, 0)
         raises(TypeError, numpy.inexact, 0)
 
@@ -404,10 +410,29 @@
             assert issubclass(int64, int)
             assert int_ is int64
 
+    def test_various_types(self):
+        import _numpypy as numpy
+        import sys
+        
+        assert numpy.int16 is numpy.short
+        assert numpy.int8 is numpy.byte
+        assert numpy.bool_ is numpy.bool8
+        if sys.maxint == (1 << 63) - 1:
+            assert numpy.intp is numpy.int64
+        else:
+            assert numpy.intp is numpy.int32
+
+    def test_mro(self):
+        import _numpypy as numpy
+        
+        assert numpy.int16.__mro__ == (numpy.int16, numpy.signedinteger,
+                                       numpy.integer, numpy.number,
+                                       numpy.generic, object)
+        assert numpy.bool_.__mro__ == (numpy.bool_, numpy.generic, object)
+
     def test_operators(self):
         from operator import truediv
         from _numpypy import float64, int_, True_, False_
-
         assert 5 / int_(2) == int_(2)
         assert truediv(int_(3), int_(2)) == float64(1.5)
         assert truediv(3, int_(2)) == float64(1.5)
@@ -427,9 +452,115 @@
         assert int_(3) ^ int_(5) == int_(6)
         assert True_ ^ False_ is True_
         assert 5 ^ int_(3) == int_(6)
-
         assert +int_(3) == int_(3)
         assert ~int_(3) == int_(-4)
-
         raises(TypeError, lambda: float64(3) & 1)
 
+    def test_alternate_constructs(self):
+        from _numpypy import dtype
+        nnp = self.non_native_prefix
+        byteorder = self.native_prefix
+        assert dtype('i8') == dtype(byteorder + 'i8') == dtype('=i8') # XXX should be equal == dtype(long)
+        assert dtype(nnp + 'i8') != dtype('i8')
+        assert dtype(nnp + 'i8').byteorder == nnp
+        assert dtype('=i8').byteorder == '='
+        assert dtype(byteorder + 'i8').byteorder == '='
+
+    def test_alignment(self):
+        from _numpypy import dtype
+        assert dtype('i4').alignment == 4
+
+    def test_typeinfo(self):
+        from _numpypy import typeinfo, void, number, int64, bool_
+        assert typeinfo['Number'] == number
+        assert typeinfo['LONGLONG'] == ('q', 9, 64, 8, 9223372036854775807L, -9223372036854775808L, int64)
+        assert typeinfo['VOID'] == ('V', 20, 0, 1, void)
+        assert typeinfo['BOOL'] == ('?', 0, 8, 1, 1, 0, bool_)
+
+class AppTestStrUnicodeDtypes(BaseNumpyAppTest):
+    def test_str_unicode(self):
+        from _numpypy import str_, unicode_, character, flexible, generic
+        
+        assert str_.mro() == [str_, str, basestring, character, flexible, generic, object]
+        assert unicode_.mro() == [unicode_, unicode, basestring, character, flexible, generic, object]
+
+    def test_str_dtype(self):
+        from _numpypy import dtype, str_
+
+        raises(TypeError, "dtype('Sx')")
+        d = dtype('S8')
+        assert d.itemsize == 8
+        assert dtype(str) == dtype('S')
+        assert d.kind == 'S'
+        assert d.type is str_
+        assert d.name == "string64"
+        assert d.num == 18
+
+    def test_unicode_dtype(self):
+        from _numpypy import dtype, unicode_
+
+        raises(TypeError, "dtype('Ux')")
+        d = dtype('U8')
+        assert d.itemsize == 8 * 4
+        assert dtype(unicode) == dtype('U')
+        assert d.kind == 'U'
+        assert d.type is unicode_
+        assert d.name == "unicode256"
+        assert d.num == 19
+
+    def test_string_boxes(self):
+        from _numpypy import str_
+        assert isinstance(str_(3), str_)
+
+    def test_unicode_boxes(self):
+        from _numpypy import unicode_
+        assert isinstance(unicode_(3), unicode)
+
+class AppTestRecordDtypes(BaseNumpyAppTest):
+    def test_create(self):
+        from _numpypy import dtype, void
+
+        raises(ValueError, "dtype([('x', int), ('x', float)])")
+        d = dtype([("x", "int32"), ("y", "int32"), ("z", "int32"), ("value", float)])
+        assert d.fields['x'] == (dtype('int32'), 0)
+        assert d.fields['value'] == (dtype(float), 12)
+        assert d['x'] == dtype('int32')
+        assert d.name == "void160"
+        assert d.num == 20
+        assert d.itemsize == 20
+        assert d.kind == 'V'
+        assert d.type is void
+        assert d.char == 'V'
+        assert d.names == ("x", "y", "z", "value")
+        raises(KeyError, 'd["xyz"]')
+        raises(KeyError, 'd.fields["xyz"]')
+
+    def test_create_from_dict(self):
+        skip("not yet")
+        from _numpypy import dtype
+        d = dtype({'names': ['a', 'b', 'c'],
+                   })
+        
+class AppTestNotDirect(BaseNumpyAppTest):
+    def setup_class(cls):
+        BaseNumpyAppTest.setup_class.im_func(cls)
+        def check_non_native(w_obj, w_obj2):
+            assert w_obj.storage[0] == w_obj2.storage[1]
+            assert w_obj.storage[1] == w_obj2.storage[0]
+            if w_obj.storage[0] == '\x00':
+                assert w_obj2.storage[1] == '\x00'
+                assert w_obj2.storage[0] == '\x01'
+            else:
+                assert w_obj2.storage[1] == '\x01'
+                assert w_obj2.storage[0] == '\x00'
+        cls.w_check_non_native = cls.space.wrap(interp2app(check_non_native))
+        if option.runappdirect:
+            py.test.skip("not a direct test")
+
+    def test_non_native(self):
+        from _numpypy import array
+        a = array([1, 2, 3], dtype=self.non_native_prefix + 'i2')
+        assert a[0] == 1
+        assert (a + a)[1] == 4
+        self.check_non_native(a, array([1, 2, 3], 'i2'))
+
diff --git a/pypy/module/micronumpy/test/test_numarray.py b/pypy/module/micronumpy/test/test_numarray.py
--- a/pypy/module/micronumpy/test/test_numarray.py
+++ b/pypy/module/micronumpy/test/test_numarray.py
@@ -5,15 +5,23 @@
 from pypy.interpreter.error import OperationError
 from pypy.module.micronumpy import signature
 from pypy.module.micronumpy.appbridge import get_appbridge_cache
-from pypy.module.micronumpy.interp_iter import Chunk
+from pypy.module.micronumpy.interp_iter import Chunk, Chunks
 from pypy.module.micronumpy.interp_numarray import W_NDimArray, shape_agreement
 from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
 
 
 class MockDtype(object):
-    def malloc(self, size):
-        return None
+    class itemtype(object):
+        @staticmethod
+        def malloc(size):
+            return None
 
+    def get_size(self):
+        return 1
+
+
+def create_slice(a, chunks):
+    return Chunks(chunks).apply(a)
 
 class TestNumArrayDirect(object):
     def newslice(self, *args):
@@ -29,116 +37,116 @@
         return self.space.newtuple(args_w)
 
     def test_strides_f(self):
-        a = W_NDimArray(100, [10, 5, 3], MockDtype(), 'F')
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'F')
         assert a.strides == [1, 10, 50]
         assert a.backstrides == [9, 40, 100]
 
     def test_strides_c(self):
-        a = W_NDimArray(100, [10, 5, 3], MockDtype(), 'C')
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'C')
         assert a.strides == [15, 3, 1]
         assert a.backstrides == [135, 12, 2]
 
     def test_create_slice_f(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), 'F')
-        s = a.create_slice([Chunk(3, 0, 0, 1)])
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'F')
+        s = create_slice(a, [Chunk(3, 0, 0, 1)])
         assert s.start == 3
         assert s.strides == [10, 50]
         assert s.backstrides == [40, 100]
-        s = a.create_slice([Chunk(1, 9, 2, 4)])
+        s = create_slice(a, [Chunk(1, 9, 2, 4)])
         assert s.start == 1
         assert s.strides == [2, 10, 50]
         assert s.backstrides == [6, 40, 100]
-        s = a.create_slice([Chunk(1, 5, 3, 2), Chunk(1, 2, 1, 1), Chunk(1, 0, 0, 1)])
+        s = create_slice(a, [Chunk(1, 5, 3, 2), Chunk(1, 2, 1, 1), Chunk(1, 0, 0, 1)])
         assert s.shape == [2, 1]
         assert s.strides == [3, 10]
         assert s.backstrides == [3, 0]
-        s = a.create_slice([Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
+        s = create_slice(a, [Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
         assert s.start == 20
         assert s.shape == [10, 3]
 
     def test_create_slice_c(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), 'C')
-        s = a.create_slice([Chunk(3, 0, 0, 1)])
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'C')
+        s = create_slice(a, [Chunk(3, 0, 0, 1)])
         assert s.start == 45
         assert s.strides == [3, 1]
         assert s.backstrides == [12, 2]
-        s = a.create_slice([Chunk(1, 9, 2, 4)])
+        s = create_slice(a, [Chunk(1, 9, 2, 4)])
         assert s.start == 15
         assert s.strides == [30, 3, 1]
         assert s.backstrides == [90, 12, 2]
-        s = a.create_slice([Chunk(1, 5, 3, 2), Chunk(1, 2, 1, 1),
+        s = create_slice(a, [Chunk(1, 5, 3, 2), Chunk(1, 2, 1, 1),
                             Chunk(1, 0, 0, 1)])
         assert s.start == 19
         assert s.shape == [2, 1]
         assert s.strides == [45, 3]
         assert s.backstrides == [45, 0]
-        s = a.create_slice([Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
+        s = create_slice(a, [Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
         assert s.start == 6
         assert s.shape == [10, 3]
 
     def test_slice_of_slice_f(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), 'F')
-        s = a.create_slice([Chunk(5, 0, 0, 1)])
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'F')
+        s = create_slice(a, [Chunk(5, 0, 0, 1)])
         assert s.start == 5
-        s2 = s.create_slice([Chunk(3, 0, 0, 1)])
+        s2 = create_slice(s, [Chunk(3, 0, 0, 1)])
         assert s2.shape == [3]
         assert s2.strides == [50]
         assert s2.parent is a
         assert s2.backstrides == [100]
         assert s2.start == 35
-        s = a.create_slice([Chunk(1, 5, 3, 2)])
-        s2 = s.create_slice([Chunk(0, 2, 1, 2), Chunk(2, 0, 0, 1)])
+        s = create_slice(a, [Chunk(1, 5, 3, 2)])
+        s2 = create_slice(s, [Chunk(0, 2, 1, 2), Chunk(2, 0, 0, 1)])
         assert s2.shape == [2, 3]
         assert s2.strides == [3, 50]
         assert s2.backstrides == [3, 100]
         assert s2.start == 1 * 15 + 2 * 3
 
     def test_slice_of_slice_c(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), order='C')
-        s = a.create_slice([Chunk(5, 0, 0, 1)])
+        a = W_NDimArray([10, 5, 3], MockDtype(), order='C')
+        s = create_slice(a, [Chunk(5, 0, 0, 1)])
         assert s.start == 15 * 5
-        s2 = s.create_slice([Chunk(3, 0, 0, 1)])
+        s2 = create_slice(s, [Chunk(3, 0, 0, 1)])
         assert s2.shape == [3]
         assert s2.strides == [1]
         assert s2.parent is a
         assert s2.backstrides == [2]
         assert s2.start == 5 * 15 + 3 * 3
-        s = a.create_slice([Chunk(1, 5, 3, 2)])
-        s2 = s.create_slice([Chunk(0, 2, 1, 2), Chunk(2, 0, 0, 1)])
+        s = create_slice(a, [Chunk(1, 5, 3, 2)])
+        s2 = create_slice(s, [Chunk(0, 2, 1, 2), Chunk(2, 0, 0, 1)])
         assert s2.shape == [2, 3]
         assert s2.strides == [45, 1]
         assert s2.backstrides == [45, 2]
         assert s2.start == 1 * 15 + 2 * 3
 
     def test_negative_step_f(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), 'F')
-        s = a.create_slice([Chunk(9, -1, -2, 5)])
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'F')
+        s = create_slice(a, [Chunk(9, -1, -2, 5)])
         assert s.start == 9
         assert s.strides == [-2, 10, 50]
         assert s.backstrides == [-8, 40, 100]
 
     def test_negative_step_c(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), order='C')
-        s = a.create_slice([Chunk(9, -1, -2, 5)])
+        a = W_NDimArray([10, 5, 3], MockDtype(), order='C')
+        s = create_slice(a, [Chunk(9, -1, -2, 5)])
         assert s.start == 135
         assert s.strides == [-30, 3, 1]
         assert s.backstrides == [-120, 12, 2]
 
     def test_index_of_single_item_f(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), 'F')
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'F')
         r = a._index_of_single_item(self.space, self.newtuple(1, 2, 2))
         assert r == 1 + 2 * 10 + 2 * 50
-        s = a.create_slice([Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
+        s = create_slice(a, [Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
         r = s._index_of_single_item(self.space, self.newtuple(1, 0))
         assert r == a._index_of_single_item(self.space, self.newtuple(1, 2, 0))
         r = s._index_of_single_item(self.space, self.newtuple(1, 1))
         assert r == a._index_of_single_item(self.space, self.newtuple(1, 2, 1))
 
     def test_index_of_single_item_c(self):
-        a = W_NDimArray(10 * 5 * 3, [10, 5, 3], MockDtype(), 'C')
+        a = W_NDimArray([10, 5, 3], MockDtype(), 'C')
         r = a._index_of_single_item(self.space, self.newtuple(1, 2, 2))
         assert r == 1 * 3 * 5 + 2 * 3 + 2
-        s = a.create_slice([Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
+        s = create_slice(a, [Chunk(0, 10, 1, 10), Chunk(2, 0, 0, 1)])
         r = s._index_of_single_item(self.space, self.newtuple(1, 0))
         assert r == a._index_of_single_item(self.space, self.newtuple(1, 2, 0))
         r = s._index_of_single_item(self.space, self.newtuple(1, 1))
@@ -374,6 +382,58 @@
         assert a[1] == 0.
         assert a[3] == 0.
 
+    def test_newaxis(self):
+        from _numpypy import array
+        from numpypy.core.numeric import newaxis
+        a = array(range(5))
+        b = array([range(5)])
+        assert (a[newaxis] == b).all()
+
+    def test_newaxis_slice(self):
+        from _numpypy import array
+        from numpypy.core.numeric import newaxis
+
+        a = array(range(5))
+        b = array(range(1,5))
+        c = array([range(1,5)])
+        d = array([[x] for x in range(1,5)])
+
+        assert (a[1:] == b).all()
+        assert (a[1:,newaxis] == d).all()
+        assert (a[newaxis,1:] == c).all()
+
+    def test_newaxis_assign(self):
+        from _numpypy import array
+        from numpypy.core.numeric import newaxis
+
+        a = array(range(5))
+        a[newaxis,1] = [2]
+        assert a[1] == 2
+
+    def test_newaxis_virtual(self):
+        from _numpypy import array
+        from numpypy.core.numeric import newaxis
+
+        a = array(range(5))
+        b = (a + a)[newaxis]
+        c = array([[0, 2, 4, 6, 8]])
+        assert (b == c).all()
+
+    def test_newaxis_then_slice(self):
+        from _numpypy import array
+        from numpypy.core.numeric import newaxis
+        a = array(range(5))
+        b = a[newaxis]
+        assert b.shape == (1, 5)
+        assert (b[0,1:] == a[1:]).all()
+
+    def test_slice_then_newaxis(self):
+        from _numpypy import array
+        from numpypy.core.numeric import newaxis
+        a = array(range(5))
+        b = a[2:]
+        assert (b[newaxis] == [[2, 3, 4]]).all()
+
     def test_scalar(self):
         from _numpypy import array, dtype
         a = array(3)
@@ -434,6 +494,8 @@
         a = zeros((4, 2, 3))
         a.shape = (12, 2)
         (a + a).reshape(2, 12) # assert did not explode
+        a = array([[[[]]]])
+        assert a.reshape((0,)).shape == (0,)
 
     def test_slice_reshape(self):
         from _numpypy import zeros, arange
@@ -1087,7 +1149,7 @@
         assert array([True, False]).dtype is dtype(bool)
         assert array([True, 1]).dtype is dtype(int)
         assert array([1, 2, 3]).dtype is dtype(int)
-        assert array([1L, 2, 3]).dtype is dtype(long)
+        #assert array([1L, 2, 3]).dtype is dtype(long)
         assert array([1.2, True]).dtype is dtype(float)
         assert array([1.2, 5]).dtype is dtype(float)
         assert array([]).dtype is dtype(float)
@@ -1594,6 +1656,7 @@
         a = arange(12).reshape(3,4)
         b = a.T.flat
         b[6::2] = [-1, -2]
+        print a == [[0, 1, -1, 3], [4, 5, 6, -1], [8, 9, -2, 11]]
         assert (a == [[0, 1, -1, 3], [4, 5, 6, -1], [8, 9, -2, 11]]).all()
         b[0:2] = [[[100]]]
         assert(a[0,0] == 100)
@@ -1868,6 +1931,12 @@
         #5 bytes is larger than 3 bytes
         raises(ValueError, fromstring, "\x01\x02\x03", count=5, dtype=uint8)
 
+    def test_tostring(self):
+        from _numpypy import array
+        assert array([1, 2, 3], 'i2').tostring() == '\x01\x00\x02\x00\x03\x00'
+        assert array([1, 2, 3], 'i2')[::2].tostring() == '\x01\x00\x03\x00'
+        assert array([1, 2, 3], '<i2')[::2].tostring() == '\x01\x00\x03\x00'
+        assert array([1, 2, 3], '>i2')[::2].tostring() == '\x00\x01\x00\x03'
 
 class AppTestRanges(BaseNumpyAppTest):
     def test_arange(self):
@@ -1913,3 +1982,57 @@
         cache = get_appbridge_cache(cls.space)
         cache.w_array_repr = cls.old_array_repr
         cache.w_array_str = cls.old_array_str
+
+class AppTestRecordDtype(BaseNumpyAppTest):
+    def test_zeros(self):
+        from _numpypy import zeros
+        a = zeros(2, dtype=[('x', int), ('y', float)])
+        raises(IndexError, 'a[0]["xyz"]')
+        assert a[0]['x'] == 0
+        assert a[0]['y'] == 0
+        raises(ValueError, "a[0] = (1, 2, 3)")
+        a[0]['x'] = 13
+        assert a[0]['x'] == 13
+        a[1] = (1, 2)
+        assert a[1]['y'] == 2
+        b = zeros(2, dtype=[('x', int), ('y', float)])
+        b[1] = a[1]
+        assert a[1]['y'] == 2
+
+    def test_views(self):
+        from _numpypy import array
+        a = array([(1, 2), (3, 4)], dtype=[('x', int), ('y', float)])
+        raises(ValueError, 'array([1])["x"]')
+        raises(ValueError, 'a["z"]')
+        assert a['x'][1] == 3
+        assert a['y'][1] == 4
+        a['x'][0] = 15
+        assert a['x'][0] == 15
+        b = a['x'] + a['y']
+        assert (b == [15+2, 3+4]).all()
+        assert b.dtype == float
+
+    def test_assign_tuple(self):
+        from _numpypy import zeros
+        a = zeros((2, 3), dtype=[('x', int), ('y', float)])
+        a[1, 2] = (1, 2)
+        assert a['x'][1, 2] == 1
+        assert a['y'][1, 2] == 2
+
+    def test_creation_and_repr(self):
+        from _numpypy import array
+        a = array([(1, 2), (3, 4)], dtype=[('x', int), ('y', float)])
+        assert repr(a[0]) == '(1, 2.0)'
+
+    def test_nested_dtype(self):
+        from _numpypy import zeros
+        a = [('x', int), ('y', float)]
+        b = [('x', int), ('y', a)]
+        arr = zeros(3, dtype=b)
+        arr[1]['x'] = 15
+        assert arr[1]['x'] == 15
+        arr[1]['y']['y'] = 3.5
+        assert arr[1]['y']['y'] == 3.5
+        assert arr[1]['y']['x'] == 0.0
+        assert arr[1]['x'] == 15
+        
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -113,14 +113,37 @@
 
         assert (divide(array([-10]), array([2])) == array([-5])).all()
 
+    def test_true_divide(self):
+        from _numpypy import array, true_divide
+
+        a = array([0, 1, 2, 3, 4, 1, -1])
+        b = array([4, 4, 4, 4, 4, 0,  0])
+        c = true_divide(a, b)
+        assert (c == [0.0, 0.25, 0.5, 0.75, 1.0, float('inf'), float('-inf')]).all()
+
+        assert math.isnan(true_divide(0, 0))
+
     def test_fabs(self):
         from _numpypy import array, fabs
-        from math import fabs as math_fabs
+        from math import fabs as math_fabs, isnan
 
         a = array([-5.0, -0.0, 1.0])
         b = fabs(a)
         for i in range(3):
             assert b[i] == math_fabs(a[i])
+        assert fabs(float('inf')) == float('inf')
+        assert fabs(float('-inf')) == float('inf')
+        assert isnan(fabs(float('nan')))
+
+    def test_fmod(self):
+        from _numpypy import fmod
+        import math
+
+        assert fmod(-1e-100, 1e100) == -1e-100
+        assert fmod(3, float('inf')) == 3
+        assert (fmod([-3, -2, -1, 1, 2, 3], 2) == [-1,  0, -1,  1,  0,  1]).all()
+        for v in [float('inf'), float('-inf'), float('nan'), float('-nan')]:
+            assert math.isnan(fmod(v, 2))
 
     def test_minimum(self):
         from _numpypy import array, minimum
@@ -172,6 +195,15 @@
         assert a[0] == 1
         assert a[1] == 0
 
+    def test_signbit(self):
+        from _numpypy import signbit, copysign
+        import struct
+
+        assert (signbit([0, 0.0, 1, 1.0, float('inf'), float('nan')]) ==
+            [False, False, False, False, False, False]).all()
+        assert (signbit([-0, -0.0, -1, -1.0, float('-inf'), -float('nan'), float('-nan')]) ==
+            [False,  True,  True,  True,  True,  True, True]).all()
+
     def test_reciporocal(self):
         from _numpypy import array, reciprocal
 
@@ -231,13 +263,46 @@
         a = array([-5.0, -0.0, 0.0, 12345678.0, float("inf"),
                    -float('inf'), -12343424.0])
         b = exp(a)
-        for i in range(4):
+        for i in range(len(a)):
             try:
                 res = math.exp(a[i])
             except OverflowError:
                 res = float('inf')
             assert b[i] == res
 
+    def test_exp2(self):
+        import math
+        from _numpypy import array, exp2
+
+        a = array([-5.0, -0.0, 0.0, 2, 12345678.0, float("inf"),
+                   -float('inf'), -12343424.0])
+        b = exp2(a)
+        for i in range(len(a)):
+            try:
+                res = 2 ** a[i]
+            except OverflowError:
+                res = float('inf')
+            assert b[i] == res
+
+        assert exp2(3) == 8
+        assert math.isnan(exp2(float("nan")))
+
+    def test_expm1(self):
+        import math
+        from _numpypy import array, expm1
+
+        a = array([-5.0, -0.0, 0.0, 12345678.0, float("inf"),
+                   -float('inf'), -12343424.0])
+        b = expm1(a)
+        for i in range(4):
+            try:
+                res = math.exp(a[i]) - 1
+            except OverflowError:
+                res = float('inf')
+            assert b[i] == res
+
+        assert expm1(1e-50) == 1e-50
+
     def test_sin(self):
         import math
         from _numpypy import array, sin
@@ -310,6 +375,21 @@
         b = arctan(a)
         assert math.isnan(b[0])
 
+    def test_arctan2(self):
+        import math
+        from _numpypy import array, arctan2
+
+        # From the numpy documentation
+        assert (
+            arctan2(
+                [0.,  0.,           1.,          -1., float('inf'),  float('inf')],
+                [0., -0., float('inf'), float('inf'), float('inf'), float('-inf')]) ==
+            [0.,  math.pi,  0., -0.,  math.pi/4, 3*math.pi/4]).all()
+
+        a = array([float('nan')])
+        b = arctan2(a, 0)
+        assert math.isnan(b[0])
+
     def test_sinh(self):
         import math
         from _numpypy import array, sinh
@@ -415,6 +495,19 @@
         for i in range(len(a)):
             assert b[i] == math.degrees(a[i])
 
+    def test_rad2deg(self):
+        import math
+        from _numpypy import rad2deg, array
+        a = array([
+            -181, -180, -179,
+            181, 180, 179,
+            359, 360, 361,
+            400, -1, 0, 1,
+            float('inf'), float('-inf')])
+        b = rad2deg(a)
+        for i in range(len(a)):
+            assert b[i] == math.degrees(a[i])
+
     def test_reduce_errors(self):
         from _numpypy import sin, add
 
@@ -510,6 +603,26 @@
         assert (isinf(array([0.2, float('inf'), float('nan')])) == [False, True, False]).all()
         assert isinf(array([0.2])).dtype.kind == 'b'
 
+    def test_isposinf_isneginf(self):
+        from _numpypy import isneginf, isposinf
+        assert isposinf(float('inf'))
+        assert not isposinf(float('-inf'))
+        assert not isposinf(float('nan'))
+        assert not isposinf(0)
+        assert not isposinf(0.0)
+        assert isneginf(float('-inf'))
+        assert not isneginf(float('inf'))
+        assert not isneginf(float('nan'))
+        assert not isneginf(0)
+        assert not isneginf(0.0)
+
+    def test_isfinite(self):
+        from _numpypy import isfinite
+        assert (isfinite([0, 0.0, 1e50, -1e-50]) ==
+            [True, True, True, True]).all()
+        assert (isfinite([float('-inf'), float('inf'), float('-nan'), float('nan')]) ==
+            [False, False, False, False]).all()
+
     def test_logical_ops(self):
         from _numpypy import logical_and, logical_or, logical_xor, logical_not
 
@@ -544,7 +657,7 @@
         assert log1p(float('inf')) == float('inf')
         assert (log1p([0, 1e-50, math.e - 1]) == [0, 1e-50, 1]).all()
 
-    def test_power(self):
+    def test_power_float(self):
         import math
         from _numpypy import power, array
         a = array([1., 2., 3.])
@@ -558,9 +671,94 @@
         for i in range(len(a)):
             assert c[i] == a[i] ** b[i]
 
+        assert power(2, float('inf')) == float('inf')
+        assert power(float('inf'), float('inf')) == float('inf')
+        assert power(12345.0, 12345.0) == float('inf')
+        assert power(-12345.0, 12345.0) == float('-inf')
+        assert power(-12345.0, 12346.0) == float('inf')
+        assert math.isnan(power(-1, 1.1))
+        assert math.isnan(power(-1, -1.1))
+        assert power(-2.0, -1) == -0.5
+        assert power(-2.0, -2) == 0.25
+        assert power(12345.0, -12345.0) == 0
+        assert power(float('-inf'), 2) == float('inf')
+        assert power(float('-inf'), 2.5) == float('inf')
+        assert power(float('-inf'), 3) == float('-inf')
+
+    def test_power_int(self):
+        import math
+        from _numpypy import power, array
+        a = array([1, 2, 3])
+        b = power(a, 3)
+        for i in range(len(a)):
+            assert b[i] == a[i] ** 3
+
+        a = array([1, 2, 3])
+        b = array([1, 2, 3])
+        c = power(a, b)
+        for i in range(len(a)):
+            assert c[i] == a[i] ** b[i]
+
+        # assert power(12345, 12345) == -9223372036854775808
+        # assert power(-12345, 12345) == -9223372036854775808
+        # assert power(-12345, 12346) == -9223372036854775808
+        assert power(2, 0) == 1
+        assert power(2, -1) == 0
+        assert power(2, -2) == 0
+        assert power(-2, -1) == 0
+        assert power(-2, -2) == 0
+        assert power(12345, -12345) == 0
+
     def test_floordiv(self):
         from _numpypy import floor_divide, array
         a = array([1., 2., 3., 4., 5., 6., 6.01])
         b = floor_divide(a, 2.5)
         for i in range(len(a)):
             assert b[i] == a[i] // 2.5
+
+    def test_logaddexp(self):
+        import math
+        from _numpypy import logaddexp
+
+        # From the numpy documentation
+        prob1 = math.log(1e-50)
+        prob2 = math.log(2.5e-50)
+        prob12 = logaddexp(prob1, prob2)
+        assert math.fabs(-113.87649168120691 - prob12) < 0.000000000001
+
+        assert logaddexp(0, 0) == math.log(2)
+        assert logaddexp(float('-inf'), 0) == 0
+        assert logaddexp(12345678, 12345678) == float('inf')
+
+        assert math.isnan(logaddexp(float('nan'), 1))
+        assert math.isnan(logaddexp(1, float('nan')))
+        assert math.isnan(logaddexp(float('nan'), float('inf')))
+        assert math.isnan(logaddexp(float('inf'), float('nan')))
+        assert logaddexp(float('-inf'), float('-inf')) == float('-inf')
+        assert logaddexp(float('-inf'), float('inf')) == float('inf')
+        assert logaddexp(float('inf'), float('-inf')) == float('inf')
+        assert logaddexp(float('inf'), float('inf')) == float('inf')
+
+    def test_logaddexp2(self):
+        import math
+        from _numpypy import logaddexp2
+        log2 = math.log(2)
+
+        # From the numpy documentation
+        prob1 = math.log(1e-50) / log2
+        prob2 = math.log(2.5e-50) / log2
+        prob12 = logaddexp2(prob1, prob2)
+        assert math.fabs(-164.28904982231052 - prob12) < 0.000000000001
+
+        assert logaddexp2(0, 0) == 1
+        assert logaddexp2(float('-inf'), 0) == 0
+        assert logaddexp2(12345678, 12345678) == float('inf')
+
+        assert math.isnan(logaddexp2(float('nan'), 1))
+        assert math.isnan(logaddexp2(1, float('nan')))
+        assert math.isnan(logaddexp2(float('nan'), float('inf')))
+        assert math.isnan(logaddexp2(float('inf'), float('nan')))
+        assert logaddexp2(float('-inf'), float('-inf')) == float('-inf')
+        assert logaddexp2(float('-inf'), float('inf')) == float('inf')
+        assert logaddexp2(float('inf'), float('-inf')) == float('inf')
+        assert logaddexp2(float('inf'), float('inf')) == float('inf')
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -1,15 +1,20 @@
 import functools
 import math
+import struct
 
 from pypy.interpreter.error import OperationError
 from pypy.module.micronumpy import interp_boxes
 from pypy.objspace.std.floatobject import float2string
 from pypy.rlib import rfloat, libffi, clibffi
-from pypy.rlib.objectmodel import specialize
-from pypy.rlib.rarithmetic import LONG_BIT, widen
+from pypy.rlib.objectmodel import specialize, we_are_translated
+from pypy.rlib.rarithmetic import widen, byteswap
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib.rstruct.runpack import runpack
+from pypy.tool.sourcetools import func_with_new_name
+from pypy.rlib import jit
 
+VOID_STORAGE = lltype.Array(lltype.Char, hints={'nolength': True,
+                                                'render_as_void': True})
 degToRad = math.pi / 180.0
 log2 = math.log(2)
 
@@ -59,9 +64,20 @@
     return dispatcher
 
 class BaseType(object):
+    _attrs_ = ()
+    
     def _unimplemented_ufunc(self, *args):
         raise NotImplementedError
 
+    def malloc(self, size):
+        # XXX find out why test_zjit explodes with tracking of allocations
+        return lltype.malloc(VOID_STORAGE, size,
+                             zero=True, flavor="raw",
+                             track_allocation=False, add_memory_pressure=True)
+
+    def __repr__(self):
+        return self.__class__.__name__
+
 class Primitive(object):
     _mixin_ = True
 
@@ -76,7 +92,7 @@
         assert isinstance(box, self.BoxType)
         return box.value
 
-    def coerce(self, space, w_item):
+    def coerce(self, space, dtype, w_item):
         if isinstance(w_item, self.BoxType):
             return w_item
         return self.coerce_subtype(space, space.gettypefor(self.BoxType), w_item)
@@ -97,32 +113,41 @@
     def default_fromstring(self, space):
         raise NotImplementedError
 
-    def read(self, storage, width, i, offset):
-        return self.box(libffi.array_getitem(clibffi.cast_type_to_ffitype(self.T),
-            width, storage, i, offset
-        ))
+    def _read(self, storage, width, i, offset):
+        if we_are_translated():
+            return libffi.array_getitem(clibffi.cast_type_to_ffitype(self.T),
+                                        width, storage, i, offset)
+        else:
+            return libffi.array_getitem_T(self.T, width, storage, i, offset)
 
-    def read_bool(self, storage, width, i, offset):
-        return bool(self.for_computation(
-            libffi.array_getitem(clibffi.cast_type_to_ffitype(self.T),
-                                 width, storage, i, offset)))
+    def read(self, arr, width, i, offset, dtype=None):
+        return self.box(self._read(arr.storage, width, i, offset))
 
-    def store(self, storage, width, i, offset, box):
-        value = self.unbox(box)
-        libffi.array_setitem(clibffi.cast_type_to_ffitype(self.T),
-            width, storage, i, offset, value
-        )
+    def read_bool(self, arr, width, i, offset):
+        return bool(self.for_computation(self._read(arr.storage, width, i, offset)))
+
+    def _write(self, storage, width, i, offset, value):
+        if we_are_translated():
+            libffi.array_setitem(clibffi.cast_type_to_ffitype(self.T),
+                                 width, storage, i, offset, value)
+        else:
+            libffi.array_setitem_T(self.T, width, storage, i, offset, value)
+        
+
+    def store(self, arr, width, i, offset, box):
+        self._write(arr.storage, width, i, offset, self.unbox(box))
 
     def fill(self, storage, width, box, start, stop, offset):
         value = self.unbox(box)
-        for i in xrange(start, stop):
-            libffi.array_setitem(clibffi.cast_type_to_ffitype(self.T),
-                width, storage, i, offset, value
-            )
+        for i in xrange(start, stop, width):
+            self._write(storage, 1, i, offset, value)
 
     def runpack_str(self, s):
         return self.box(runpack(self.format_code, s))
 
+    def pack_str(self, box):
+        return struct.pack(self.format_code, self.unbox(box))
+
     @simple_binary_op
     def add(self, v1, v2):
         return v1 + v2
@@ -155,6 +180,14 @@
     def isinf(self, v):
         return False
 
+    @raw_unary_op
+    def isneginf(self, v):
+        return False
+
+    @raw_unary_op
+    def isposinf(self, v):
+        return False
+
     @raw_binary_op
     def eq(self, v1, v2):
         return v1 == v2
@@ -206,8 +239,31 @@
     def min(self, v1, v2):
         return min(v1, v2)
 
+class NonNativePrimitive(Primitive):
+    _mixin_ = True
+    
+    def _read(self, storage, width, i, offset):
+        if we_are_translated():
+            res = libffi.array_getitem(clibffi.cast_type_to_ffitype(self.T),
+                                        width, storage, i, offset)
+        else:
+            res = libffi.array_getitem_T(self.T, width, storage, i, offset)
+        return byteswap(res)
+
+    def _write(self, storage, width, i, offset, value):
+        value = byteswap(value)
+        if we_are_translated():
+            libffi.array_setitem(clibffi.cast_type_to_ffitype(self.T),
+                                 width, storage, i, offset, value)
+        else:
+            libffi.array_setitem_T(self.T, width, storage, i, offset, value)
+
+    def pack_str(self, box):
+        return struct.pack(self.format_code, byteswap(self.unbox(box)))
 
 class Bool(BaseType, Primitive):
+    _attrs_ = ()
+
     T = lltype.Bool
     BoxType = interp_boxes.W_BoolBox
     format_code = "?"
@@ -234,8 +290,7 @@
         return space.wrap(self.unbox(w_item))
 
     def str_format(self, box):
-        value = self.unbox(box)
-        return "True" if value else "False"
+        return "True" if self.unbox(box) else "False"
 
     def for_computation(self, v):
         return int(v)
@@ -259,15 +314,18 @@
     def invert(self, v):
         return ~v
 
+NonNativeBool = Bool
+
 class Integer(Primitive):
     _mixin_ = True
 
+    def _base_coerce(self, space, w_item):
+        return self.box(space.int_w(space.call_function(space.w_int, w_item)))
     def _coerce(self, space, w_item):
-        return self.box(space.int_w(space.call_function(space.w_int, w_item)))
+        return self._base_coerce(space, w_item)
 
     def str_format(self, box):
-        value = self.unbox(box)
-        return str(self.for_computation(value))
+        return str(self.for_computation(self.unbox(box)))
 
     def for_computation(self, v):
         return widen(v)
@@ -293,6 +351,8 @@
 
     @simple_binary_op
     def pow(self, v1, v2):
+        if v2 < 0:
+            return 0
         res = 1
         while v2 > 0:
             if v2 & 1:
@@ -337,68 +397,153 @@
     def invert(self, v):
         return ~v
 
+class NonNativeInteger(NonNativePrimitive, Integer):
+    _mixin_ = True
+
 class Int8(BaseType, Integer):
+    _attrs_ = ()
+
     T = rffi.SIGNEDCHAR
     BoxType = interp_boxes.W_Int8Box
     format_code = "b"
+NonNativeInt8 = Int8
 
 class UInt8(BaseType, Integer):
+    _attrs_ = ()
+
     T = rffi.UCHAR
     BoxType = interp_boxes.W_UInt8Box
     format_code = "B"
+NonNativeUInt8 = UInt8
 
 class Int16(BaseType, Integer):
+    _attrs_ = ()
+
+    T = rffi.SHORT
+    BoxType = interp_boxes.W_Int16Box
+    format_code = "h"
+
+class NonNativeInt16(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
     T = rffi.SHORT
     BoxType = interp_boxes.W_Int16Box
     format_code = "h"
 
 class UInt16(BaseType, Integer):
+    _attrs_ = ()
+
+    T = rffi.USHORT
+    BoxType = interp_boxes.W_UInt16Box
+    format_code = "H"
+
+class NonNativeUInt16(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
     T = rffi.USHORT
     BoxType = interp_boxes.W_UInt16Box
     format_code = "H"
 
 class Int32(BaseType, Integer):
+    _attrs_ = ()
+
+    T = rffi.INT
+    BoxType = interp_boxes.W_Int32Box
+    format_code = "i"
+
+class NonNativeInt32(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
     T = rffi.INT
     BoxType = interp_boxes.W_Int32Box
     format_code = "i"
 
 class UInt32(BaseType, Integer):
+    _attrs_ = ()
+
+    T = rffi.UINT
+    BoxType = interp_boxes.W_UInt32Box
+    format_code = "I"
+
+class NonNativeUInt32(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
     T = rffi.UINT
     BoxType = interp_boxes.W_UInt32Box
     format_code = "I"
 
 class Long(BaseType, Integer):
+    _attrs_ = ()
+
+    T = rffi.LONG
+    BoxType = interp_boxes.W_LongBox
+    format_code = "l"
+
+class NonNativeLong(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
     T = rffi.LONG
     BoxType = interp_boxes.W_LongBox
     format_code = "l"
 
 class ULong(BaseType, Integer):
+    _attrs_ = ()
+
+    T = rffi.ULONG
+    BoxType = interp_boxes.W_ULongBox
+    format_code = "L"
+
+class NonNativeULong(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
     T = rffi.ULONG
     BoxType = interp_boxes.W_ULongBox
     format_code = "L"
 
 class Int64(BaseType, Integer):
+    _attrs_ = ()
+
     T = rffi.LONGLONG
     BoxType = interp_boxes.W_Int64Box
     format_code = "q"
 
+class NonNativeInt64(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
+    T = rffi.LONGLONG
+    BoxType = interp_boxes.W_Int64Box
+    format_code = "q"    
+
+def _uint64_coerce(self, space, w_item):
+    try:
+        return self._base_coerce(space, w_item)
+    except OperationError, e:
+        if not e.match(space, space.w_OverflowError):
+            raise
+    bigint = space.bigint_w(w_item)
+    try:
+        value = bigint.toulonglong()
+    except OverflowError:
+        raise OperationError(space.w_OverflowError, space.w_None)
+    return self.box(value)
+
 class UInt64(BaseType, Integer):
+    _attrs_ = ()
+
     T = rffi.ULONGLONG
     BoxType = interp_boxes.W_UInt64Box
     format_code = "Q"
 
-    def _coerce(self, space, w_item):
-        try:
-            return Integer._coerce(self, space, w_item)
-        except OperationError, e:
-            if not e.match(space, space.w_OverflowError):
-                raise
-        bigint = space.bigint_w(w_item)
-        try:
-            value = bigint.toulonglong()
-        except OverflowError:
-            raise OperationError(space.w_OverflowError, space.w_None)
-        return self.box(value)
+    _coerce = func_with_new_name(_uint64_coerce, '_coerce')
+
+class NonNativeUInt64(BaseType, NonNativeInteger):
+    _attrs_ = ()
+
+    T = rffi.ULONGLONG
+    BoxType = interp_boxes.W_UInt64Box
+    format_code = "Q"
+
+    _coerce = func_with_new_name(_uint64_coerce, '_coerce')
 
 class Float(Primitive):
     _mixin_ = True
@@ -407,8 +552,8 @@
         return self.box(space.float_w(space.call_function(space.w_float, w_item)))
 
     def str_format(self, box):
-        value = self.unbox(box)
-        return float2string(self.for_computation(value), "g", rfloat.DTSF_STR_PRECISION)
+        return float2string(self.for_computation(self.unbox(box)), "g",
+                            rfloat.DTSF_STR_PRECISION)
 
     def for_computation(self, v):
         return float(v)
@@ -440,7 +585,15 @@
 
     @simple_binary_op
     def pow(self, v1, v2):
-        return math.pow(v1, v2)
+        try:
+            return math.pow(v1, v2)
+        except ValueError:
+            return rfloat.NAN
+        except OverflowError:
+            if math.modf(v2)[0] == 0 and math.modf(v2 / 2)[0] != 0:
+                # Odd integer powers result in the same sign as the base
+                return rfloat.copysign(rfloat.INFINITY, v1)
+            return rfloat.INFINITY
 
     @simple_binary_op
     def copysign(self, v1, v2):
@@ -452,10 +605,21 @@
             return 0.0
         return rfloat.copysign(1.0, v)
 
+    @raw_unary_op
+    def signbit(self, v):
+        return rfloat.copysign(1.0, v) < 0.0
+
     @simple_unary_op
     def fabs(self, v):
         return math.fabs(v)
 
+    @simple_binary_op
+    def fmod(self, v1, v2):
+        try:
+            return math.fmod(v1, v2)
+        except ValueError:
+            return rfloat.NAN
+
     @simple_unary_op
     def reciprocal(self, v):
         if v == 0.0:
@@ -478,6 +642,20 @@
             return rfloat.INFINITY
 
     @simple_unary_op
+    def exp2(self, v):
+        try:
+            return math.pow(2, v)
+        except OverflowError:
+            return rfloat.INFINITY
+
+    @simple_unary_op
+    def expm1(self, v):
+        try:
+            return rfloat.expm1(v)
+        except OverflowError:
+            return rfloat.INFINITY
+
+    @simple_unary_op
     def sin(self, v):
         return math.sin(v)
 
@@ -505,6 +683,10 @@
     def arctan(self, v):
         return math.atan(v)
 
+    @simple_binary_op
+    def arctan2(self, v1, v2):
+        return math.atan2(v1, v2)
+
     @simple_unary_op
     def sinh(self, v):
         return math.sinh(v)
@@ -550,6 +732,18 @@
     def isinf(self, v):
         return rfloat.isinf(v)
 
+    @raw_unary_op
+    def isneginf(self, v):
+        return rfloat.isinf(v) and v < 0
+
+    @raw_unary_op
+    def isposinf(self, v):
+        return rfloat.isinf(v) and v > 0
+
+    @raw_unary_op
+    def isfinite(self, v):
+        return not (rfloat.isinf(v) or rfloat.isnan(v))
+
     @simple_unary_op
     def radians(self, v):
         return v * degToRad
@@ -601,13 +795,200 @@
         except ValueError:
             return rfloat.NAN
 
+    @simple_binary_op
+    def logaddexp(self, v1, v2):
+        try:
+            v1e = math.exp(v1)
+        except OverflowError:
+            v1e = rfloat.INFINITY
+        try:
+            v2e = math.exp(v2)
+        except OverflowError:
+            v2e = rfloat.INFINITY
+
+        v12e = v1e + v2e
+        try:
+            return math.log(v12e)
+        except ValueError:
+            if v12e == 0.0:
+                # CPython raises ValueError here, so we have to check
+                # the value to find the correct numpy return value
+                return -rfloat.INFINITY
+            return rfloat.NAN
+
+    @simple_binary_op
+    def logaddexp2(self, v1, v2):
+        try:
+            v1e = math.pow(2, v1)
+        except OverflowError:
+            v1e = rfloat.INFINITY
+        try:
+            v2e = math.pow(2, v2)
+        except OverflowError:
+            v2e = rfloat.INFINITY
+
+        v12e = v1e + v2e
+        try:
+            return math.log(v12e) / log2
+        except ValueError:
+            if v12e == 0.0:
+                # CPython raises ValueError here, so we have to check
+                # the value to find the correct numpy return value
+                return -rfloat.INFINITY
+            return rfloat.NAN
+
+class NonNativeFloat(NonNativePrimitive, Float):
+    _mixin_ = True
+
+    def _read(self, storage, width, i, offset):
+        if we_are_translated():
+            res = libffi.array_getitem(clibffi.cast_type_to_ffitype(self.T),
+                                        width, storage, i, offset)
+        else:
+            res = libffi.array_getitem_T(self.T, width, storage, i, offset)
+        #return byteswap(res)
+        return res
+
+    def _write(self, storage, width, i, offset, value):
+        #value = byteswap(value) XXX
+        if we_are_translated():
+            libffi.array_setitem(clibffi.cast_type_to_ffitype(self.T),
+                                 width, storage, i, offset, value)
+        else:
+            libffi.array_setitem_T(self.T, width, storage, i, offset, value)
+
+    def pack_str(self, box):
+        # XXX byteswap
+        return struct.pack(self.format_code, self.unbox(box))
+
 
 class Float32(BaseType, Float):
+    _attrs_ = ()
+
     T = rffi.FLOAT
     BoxType = interp_boxes.W_Float32Box
     format_code = "f"
 
+class NonNativeFloat32(BaseType, NonNativeFloat):
+    _attrs_ = ()
+
+    T = rffi.FLOAT
+    BoxType = interp_boxes.W_Float32Box
+    format_code = "f"    
+
 class Float64(BaseType, Float):
+    _attrs_ = ()
+
     T = rffi.DOUBLE
     BoxType = interp_boxes.W_Float64Box
     format_code = "d"
+
+class NonNativeFloat64(BaseType, NonNativeFloat):
+    _attrs_ = ()
+
+    T = rffi.DOUBLE
+    BoxType = interp_boxes.W_Float64Box
+    format_code = "d"
+
+class BaseStringType(object):
+    _mixin_ = True
+    
+    def __init__(self, size=0):
+        self.size = size
+
+    def get_element_size(self):
+        return self.size * rffi.sizeof(self.T)
+
+class StringType(BaseType, BaseStringType):
+    T = lltype.Char
+
+class VoidType(BaseType, BaseStringType):
+    T = lltype.Char
+
+NonNativeVoidType = VoidType
+NonNativeStringType = StringType
+
+class UnicodeType(BaseType, BaseStringType):
+    T = lltype.UniChar
+
+NonNativeUnicodeType = UnicodeType
+
+class RecordType(BaseType):
+
+    T = lltype.Char
+
+    def __init__(self, offsets_and_fields, size):
+        self.offsets_and_fields = offsets_and_fields
+        self.size = size
+
+    def get_element_size(self):
+        return self.size
+    
+    def read(self, arr, width, i, offset, dtype=None):
+        if dtype is None:
+            dtype = arr.dtype
+        return interp_boxes.W_VoidBox(arr, i + offset, dtype)
+
+    @jit.unroll_safe
+    def coerce(self, space, dtype, w_item): 
+        from pypy.module.micronumpy.interp_numarray import W_NDimArray
+
+        if isinstance(w_item, interp_boxes.W_VoidBox):
+            return w_item
+        # we treat every sequence as sequence, no special support
+        # for arrays
+        if not space.issequence_w(w_item):
+            raise OperationError(space.w_TypeError, space.wrap(
+                "expected sequence"))
+        if len(self.offsets_and_fields) != space.int_w(space.len(w_item)):
+            raise OperationError(space.w_ValueError, space.wrap(
+                "wrong length"))
+        items_w = space.fixedview(w_item)
+        # XXX optimize it out one day, but for now we just allocate an
+        #     array
+        arr = W_NDimArray([1], dtype)
+        for i in range(len(items_w)):
+            subdtype = dtype.fields[dtype.fieldnames[i]][1]
+            ofs, itemtype = self.offsets_and_fields[i]
+            w_item = items_w[i]
+            w_box = itemtype.coerce(space, subdtype, w_item)
+            itemtype.store(arr, 1, 0, ofs, w_box)
+        return interp_boxes.W_VoidBox(arr, 0, arr.dtype)
+
+    @jit.unroll_safe
+    def store(self, arr, _, i, ofs, box):
+        assert isinstance(box, interp_boxes.W_VoidBox)
+        for k in range(self.get_element_size()):
+            arr.storage[k + i] = box.arr.storage[k + box.ofs]
+
+    @jit.unroll_safe
+    def str_format(self, box):
+        assert isinstance(box, interp_boxes.W_VoidBox)
+        pieces = ["("]
+        first = True
+        for ofs, tp in self.offsets_and_fields:
+            if first:
+                first = False
+            else:
+                pieces.append(", ")
+            pieces.append(tp.str_format(tp.read(box.arr, 1, box.ofs, ofs)))
+        pieces.append(")")
+        return "".join(pieces)
+
+for tp in [Int32, Int64]:
+    if tp.T == lltype.Signed:
+        IntP = tp
+        break
+for tp in [UInt32, UInt64]:
+    if tp.T == lltype.Unsigned:
+        UIntP = tp
+        break
+del tp
+
+def _setup():
+    # compute alignment
+    for tp in globals().values():
+        if isinstance(tp, type) and hasattr(tp, 'T'):
+            tp.alignment = clibffi.cast_type_to_ffitype(tp.T).c_alignment
+_setup()
+del _setup
diff --git a/pypy/module/test_lib_pypy/numpypy/core/test_numeric.py b/pypy/module/test_lib_pypy/numpypy/core/test_numeric.py
--- a/pypy/module/test_lib_pypy/numpypy/core/test_numeric.py
+++ b/pypy/module/test_lib_pypy/numpypy/core/test_numeric.py
@@ -34,7 +34,7 @@
         assert repr(a) == "array([], dtype=float64)"
         a = zeros(1001)
         assert repr(a) == "array([ 0.,  0.,  0., ...,  0.,  0.,  0.])"
-        a = array(range(5), long)
+        a = array(range(5), int)
         if a.dtype.itemsize == int_size:
             assert repr(a) == "array([0, 1, 2, 3, 4])"
         else:
diff --git a/pypy/objspace/fake/objspace.py b/pypy/objspace/fake/objspace.py
--- a/pypy/objspace/fake/objspace.py
+++ b/pypy/objspace/fake/objspace.py
@@ -207,6 +207,11 @@
         is_arguments(args)
         return w_some_obj()
 
+    def get_and_call_function(space, w_descr, w_obj, *args_w):
+        args = argument.Arguments(space, list(args_w))
+        w_impl = space.get(w_descr, w_obj)
+        return space.call_args(w_impl, args)
+
     def gettypefor(self, cls):
         return self.gettypeobject(cls.typedef)
 
diff --git a/pypy/objspace/flow/model.py b/pypy/objspace/flow/model.py
--- a/pypy/objspace/flow/model.py
+++ b/pypy/objspace/flow/model.py
@@ -8,7 +8,7 @@
 from pypy.tool.descriptor import roproperty
 from pypy.tool.sourcetools import PY_IDENTIFIER, nice_repr_for_func
 from pypy.tool.identity_dict import identity_dict
-from pypy.rlib.rarithmetic import is_valid_int
+from pypy.rlib.rarithmetic import is_valid_int, r_longlong, r_ulonglong
 
 
 """
@@ -546,6 +546,8 @@
                     for n in cases[:len(cases)-has_default]:
                         if is_valid_int(n):
                             continue
+                        if isinstance(n, (r_longlong, r_ulonglong)):
+                            continue
                         if isinstance(n, (str, unicode)) and len(n) == 1:
                             continue
                         assert n != 'default', (
diff --git a/pypy/objspace/flow/objspace.py b/pypy/objspace/flow/objspace.py
--- a/pypy/objspace/flow/objspace.py
+++ b/pypy/objspace/flow/objspace.py
@@ -117,7 +117,7 @@
         else:
             return Constant(tuple(content))
 
-    def newlist(self, args_w):
+    def newlist(self, args_w, sizehint=None):
         if self.concrete_mode:
             content = [self.unwrap(w_arg) for w_arg in args_w]
             return Constant(content)
diff --git a/pypy/objspace/flow/test/test_objspace.py b/pypy/objspace/flow/test/test_objspace.py
--- a/pypy/objspace/flow/test/test_objspace.py
+++ b/pypy/objspace/flow/test/test_objspace.py
@@ -849,16 +849,25 @@
                         c.co_filename, c.co_name, c.co_firstlineno,
                         c.co_lnotab)
 
+    def patch_opcodes(self, *opcodes):
+        flow_meth_names = flowcontext.FlowSpaceFrame.opcode_method_names
+        pyframe_meth_names = PyFrame.opcode_method_names
+        for name in opcodes:
+            num = bytecode_spec.opmap[name]
+            setattr(self, 'old_' + name, flow_meth_names[num])
+            flow_meth_names[num] = pyframe_meth_names[num]
+
+    def unpatch_opcodes(self, *opcodes):
+        flow_meth_names = flowcontext.FlowSpaceFrame.opcode_method_names
+        for name in opcodes:
+            num = bytecode_spec.opmap[name]
+            flow_meth_names[num] = getattr(self, 'old_' + name)
+
     def test_callmethod_opcode(self):
         """ Tests code generated by pypy-c compiled with CALL_METHOD
         bytecode
         """
-        flow_meth_names = flowcontext.FlowSpaceFrame.opcode_method_names
-        pyframe_meth_names = PyFrame.opcode_method_names
-        for name in ['CALL_METHOD', 'LOOKUP_METHOD']:
-            num = bytecode_spec.opmap[name]
-            locals()['old_' + name] = flow_meth_names[num]
-            flow_meth_names[num] = pyframe_meth_names[num]
+        self.patch_opcodes('CALL_METHOD', 'LOOKUP_METHOD')
         try:
             class X:
                 def m(self):
@@ -878,9 +887,29 @@
             assert all_ops['simple_call'] == 2
             assert all_ops['getattr'] == 1
         finally:
-            for name in ['CALL_METHOD', 'LOOKUP_METHOD']:
-                num = bytecode_spec.opmap[name]
-                flow_meth_names[num] = locals()['old_' + name]
+            self.unpatch_opcodes('CALL_METHOD', 'LOOKUP_METHOD')
+
+    def test_build_list_from_arg_opcode(self):
+        """ Tests code generated by pypy-c compiled with BUILD_LIST_FROM_ARG
+        bytecode
+        """
+        self.patch_opcodes('BUILD_LIST_FROM_ARG')
+        try:
+            def f():
+                return [i for i in "abc"]
+
+            # this code is generated by pypy-c when compiling above f
+            pypy_code = 'd\x01\x00\xcb\x00\x00D]\x0c\x00}\x00\x00|\x00\x00^\x02\x00q\x07\x00S'
+            new_c = self.monkey_patch_code(f.func_code, 3, 67, pypy_code, (),
+                                           ('i',))
+            f2 = new.function(new_c, locals(), 'f')
+
+            graph = self.codetest(f2)
+            all_ops = self.all_operations(graph)
+            assert all_ops == {'newlist': 1, 'getattr': 1, 'simple_call': 1,
+                               'iter': 1, 'next': 1}
+        finally:
+            self.unpatch_opcodes('BUILD_LIST_FROM_ARG')
 
     def test_dont_capture_RuntimeError(self):
         class Foo:
diff --git a/pypy/objspace/std/complexobject.py b/pypy/objspace/std/complexobject.py
--- a/pypy/objspace/std/complexobject.py
+++ b/pypy/objspace/std/complexobject.py
@@ -9,6 +9,7 @@
 from pypy.rlib.rfloat import (
     formatd, DTSF_STR_PRECISION, isinf, isnan, copysign)
 from pypy.rlib import jit
+from pypy.rlib.rarithmetic import intmask
 
 import math
 
@@ -173,7 +174,7 @@
 def hash__Complex(space, w_value):
     hashreal = _hash_float(space, w_value.realval)
     hashimg = _hash_float(space, w_value.imagval)
-    combined = hashreal + 1000003 * hashimg
+    combined = intmask(hashreal + 1000003 * hashimg)
     return space.newint(combined)
 
 def add__Complex_Complex(space, w_complex1, w_complex2):
diff --git a/pypy/objspace/std/intobject.py b/pypy/objspace/std/intobject.py
--- a/pypy/objspace/std/intobject.py
+++ b/pypy/objspace/std/intobject.py
@@ -6,7 +6,7 @@
 from pypy.objspace.std.noneobject import W_NoneObject
 from pypy.objspace.std.register_all import register_all
 from pypy.rlib import jit
-from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, r_uint
+from pypy.rlib.rarithmetic import ovfcheck, LONG_BIT, r_uint, is_valid_int
 from pypy.rlib.rbigint import rbigint
 
 """
@@ -42,6 +42,7 @@
     from pypy.objspace.std.inttype import int_typedef as typedef
 
     def __init__(w_self, intval):
+        assert is_valid_int(intval)
         w_self.intval = intval
 
     def __repr__(w_self):
diff --git a/pypy/objspace/std/iterobject.py b/pypy/objspace/std/iterobject.py
--- a/pypy/objspace/std/iterobject.py
+++ b/pypy/objspace/std/iterobject.py
@@ -22,7 +22,7 @@
         index = self.index
         w_length = space.len(self.w_seq)
         w_len = space.sub(w_length, space.wrap(index))
-        if space.is_true(space.lt(w_len,space.wrap(0))):
+        if space.is_true(space.lt(w_len, space.wrap(0))):
             w_len = space.wrap(0)
         return w_len
 
@@ -30,8 +30,8 @@
     """Sequence iterator implementation for general sequences."""
 
 class W_FastListIterObject(W_AbstractSeqIterObject):
-    """Sequence iterator specialized for lists, accessing
-    directly their RPython-level list of wrapped objects.
+    """Sequence iterator specialized for lists, accessing directly their
+    RPython-level list of wrapped objects.
     """
 
 class W_ReverseSeqIterObject(W_Object):
diff --git a/pypy/objspace/std/objspace.py b/pypy/objspace/std/objspace.py
--- a/pypy/objspace/std/objspace.py
+++ b/pypy/objspace/std/objspace.py
@@ -9,7 +9,7 @@
 from pypy.objspace.descroperation import DescrOperation, raiseattrerror
 from pypy.rlib.objectmodel import instantiate, r_dict, specialize, is_annotation_constant
 from pypy.rlib.debug import make_sure_not_resized
-from pypy.rlib.rarithmetic import base_int, widen, maxint
+from pypy.rlib.rarithmetic import base_int, widen, maxint, is_valid_int
 from pypy.rlib.objectmodel import we_are_translated
 from pypy.rlib import jit
 
@@ -165,10 +165,6 @@
                 return self.newbool(x)
             else:
                 return self.newint(x)
-        # this is an inlined 'is_valid_int' which cannot be used
-        # due to the special annotation nature of 'wrap'.
-        if isinstance(x, long) and (-maxint - 1 <= x <= maxint):
-            return self.newint(x)
         if isinstance(x, str):
             return wrapstr(self, x)
         if isinstance(x, unicode):
@@ -199,6 +195,11 @@
         "NOT_RPYTHON"
         # _____ this code is here to support testing only _____
 
+        # we might get there in non-translated versions if 'x' is
+        # a long that fits the correct range.
+        if is_valid_int(x):
+            return self.newint(x)
+
         # wrap() of a container works on CPython, but the code is
         # not RPython.  Don't use -- it is kept around mostly for tests.
         # Use instead newdict(), newlist(), newtuple().
@@ -217,17 +218,7 @@
         # The following cases are even stranger.
         # Really really only for tests.
         if type(x) is long:
-            if self.config.objspace.std.withsmalllong:
-                from pypy.rlib.rarithmetic import r_longlong
-                try:
-                    rx = r_longlong(x)
-                except OverflowError:
-                    pass
-                else:
-                    from pypy.objspace.std.smalllongobject import \
-                                                   W_SmallLongObject
-                    return W_SmallLongObject(rx)
-            return W_LongObject.fromlong(x)
+            return self.wraplong(x)
         if isinstance(x, slice):
             return W_SliceObject(self.wrap(x.start),
                                  self.wrap(x.stop),
@@ -268,6 +259,20 @@
             return w_result
         return None
 
+    def wraplong(self, x):
+        "NOT_RPYTHON"
+        if self.config.objspace.std.withsmalllong:
+            from pypy.rlib.rarithmetic import r_longlong
+            try:
+                rx = r_longlong(x)
+            except OverflowError:
+                pass
+            else:
+                from pypy.objspace.std.smalllongobject import \
+                                               W_SmallLongObject
+                return W_SmallLongObject(rx)
+        return W_LongObject.fromlong(x)
+
     def unwrap(self, w_obj):
         """NOT_RPYTHON"""
         if isinstance(w_obj, Wrappable):
diff --git a/pypy/objspace/std/test/test_intobject.py b/pypy/objspace/std/test/test_intobject.py
--- a/pypy/objspace/std/test/test_intobject.py
+++ b/pypy/objspace/std/test/test_intobject.py
@@ -2,7 +2,7 @@
 import sys
 from pypy.objspace.std import intobject as iobj
 from pypy.objspace.std.multimethod import FailedToImplement
-from pypy.rlib.rarithmetic import r_uint
+from pypy.rlib.rarithmetic import r_uint, is_valid_int
 from pypy.rlib.rbigint import rbigint
 
 
@@ -15,7 +15,7 @@
         while 1:
             ires = x << n
             lres = l << n
-            if type(ires) is long or lres != ires:
+            if not is_valid_int(ires) or lres != ires:
                 return n
             n += 1
 
diff --git a/pypy/objspace/std/test/test_listobject.py b/pypy/objspace/std/test/test_listobject.py
--- a/pypy/objspace/std/test/test_listobject.py
+++ b/pypy/objspace/std/test/test_listobject.py
@@ -3,6 +3,7 @@
 from pypy.objspace.std.listobject import W_ListObject, SizeListStrategy,\
      IntegerListStrategy, ObjectListStrategy
 from pypy.interpreter.error import OperationError
+from pypy.rlib.rarithmetic import is_valid_int
 
 from pypy.conftest import gettestobjspace, option
 
@@ -242,7 +243,7 @@
                 self.space.raises_w(self.space.w_IndexError,
                                     self.space.setitem, w_list, w(key), w(42))
             else:
-                if isinstance(value, int):   # non-slicing
+                if is_valid_int(value):   # non-slicing
                     if random.random() < 0.25:   # deleting
                         self.space.delitem(w_list, w(key))
                         del expected[key]
diff --git a/pypy/objspace/std/test/test_smallintobject.py b/pypy/objspace/std/test/test_smallintobject.py
--- a/pypy/objspace/std/test/test_smallintobject.py
+++ b/pypy/objspace/std/test/test_smallintobject.py
@@ -64,7 +64,7 @@
                 f1 = wrapint(self.space, x)
                 f2 = wrapint(self.space, y)
                 result = self.space.unwrap(self.space.add(f1, f2))
-                assert result == x+y and type(result) == type(x+y)
+                assert result == x+y
 
     def test_sub(self):
         for x in [1, 100, sys.maxint // 2 - 50,
@@ -74,15 +74,16 @@
                 f1 = wrapint(self.space, x)
                 f2 = wrapint(self.space, y)
                 result = self.space.unwrap(self.space.sub(f1, f2))
-                assert result == x-y and type(result) == type(x-y)
-
+                assert result == x-y
+                
     def test_mul(self):
         for x in [0, 1, 100, sys.maxint // 2 - 50, sys.maxint - 1000]:
             for y in [0, 1, 100, sys.maxint // 2 - 50, sys.maxint - 1000]:
                 f1 = wrapint(self.space, x)
                 f2 = wrapint(self.space, y)
                 result = self.space.unwrap(self.space.mul(f1, f2))
-                assert result == x*y and type(result) == type(x*y)
+                assert result == x*y
+                
 
     def test_div(self):
         for i in range(10):
diff --git a/pypy/objspace/std/test/test_stringobject.py b/pypy/objspace/std/test/test_stringobject.py
--- a/pypy/objspace/std/test/test_stringobject.py
+++ b/pypy/objspace/std/test/test_stringobject.py
@@ -508,6 +508,8 @@
         # Need default encoding utf-8, but sys.setdefaultencoding
         # is removed after startup.
         import sys
+        if not hasattr(sys, 'setdefaultencoding'):
+            skip("sys.setdefaultencoding() not available")
         old_encoding = sys.getdefaultencoding()
         # Duplicate unittest.test_support.CleanImport logic because it won't
         # import.
diff --git a/pypy/objspace/std/test/test_userobject.py b/pypy/objspace/std/test/test_userobject.py
--- a/pypy/objspace/std/test/test_userobject.py
+++ b/pypy/objspace/std/test/test_userobject.py
@@ -10,10 +10,10 @@
         from pypy import conftest
         cls.space = conftest.gettestobjspace(**cls.OPTIONS)
         cls.w_runappdirect = cls.space.wrap(bool(conftest.option.runappdirect))
-
-    def w_rand(self):
-        import random
-        return random.randrange(0, 5)
+        def rand(space):
+            import random
+            return space.wrap(random.randrange(0, 5))
+        cls.w_rand = cls.space.wrap(gateway.interp2app(rand))
 
     def test_emptyclass(self):
         class empty(object): pass
diff --git a/pypy/rlib/bitmanipulation.py b/pypy/rlib/bitmanipulation.py
--- a/pypy/rlib/bitmanipulation.py
+++ b/pypy/rlib/bitmanipulation.py
@@ -1,5 +1,6 @@
 from pypy.rlib import unroll
 
+
 class BitSplitter(dict):
     def __getitem__(self, lengths):
         if isinstance(lengths, int):
diff --git a/pypy/rlib/clibffi.py b/pypy/rlib/clibffi.py
--- a/pypy/rlib/clibffi.py
+++ b/pypy/rlib/clibffi.py
@@ -233,6 +233,7 @@
     (rffi.LONGLONG, _signed_type_for(rffi.LONGLONG)),
     (lltype.UniChar, _unsigned_type_for(lltype.UniChar)),
     (lltype.Bool, _unsigned_type_for(lltype.Bool)),
+    (lltype.Char, _signed_type_for(lltype.Char)),
     ]
 
 __float_type_map = [
diff --git a/pypy/rlib/libffi.py b/pypy/rlib/libffi.py
--- a/pypy/rlib/libffi.py
+++ b/pypy/rlib/libffi.py
@@ -35,6 +35,7 @@
         cls.ulong = clibffi.cast_type_to_ffitype(rffi.ULONG)
         cls.slonglong = clibffi.cast_type_to_ffitype(rffi.LONGLONG)
         cls.ulonglong = clibffi.cast_type_to_ffitype(rffi.ULONGLONG)
+        cls.signed = clibffi.cast_type_to_ffitype(rffi.SIGNED)
         cls.wchar_t = clibffi.cast_type_to_ffitype(lltype.UniChar)
         del cls._import
 
@@ -79,16 +80,20 @@
 
 types._import()
 
+# this was '_fits_into_long', which is not adequate, because long is
+# not necessary the type where we compute with. Actually meant is
+# the type 'Signed'.
+
 @specialize.arg(0)
-def _fits_into_long(TYPE):
+def _fits_into_signed(TYPE):
     if isinstance(TYPE, lltype.Ptr):
-        return True # pointers always fits into longs
+        return True # pointers always fits into Signeds
     if not isinstance(TYPE, lltype.Primitive):
         return False
     if TYPE is lltype.Void or TYPE is rffi.FLOAT or TYPE is rffi.DOUBLE:
         return False
     sz = rffi.sizeof(TYPE)
-    return sz <= rffi.sizeof(rffi.LONG)
+    return sz <= rffi.sizeof(rffi.SIGNED)
 
 
 # ======================================================================
@@ -115,9 +120,9 @@
     def arg(self, val):
         TYPE = lltype.typeOf(val)
         _check_type(TYPE)
-        if _fits_into_long(TYPE):
+        if _fits_into_signed(TYPE):
             cls = IntArg
-            val = rffi.cast(rffi.LONG, val)
+            val = rffi.cast(rffi.SIGNED, val)
         elif TYPE is rffi.DOUBLE:
             cls = FloatArg
         elif TYPE is rffi.LONGLONG or TYPE is rffi.ULONGLONG:
@@ -250,7 +255,7 @@
         if is_struct:
             assert types.is_struct(self.restype)
             res = self._do_call_raw(self.funcsym, ll_args)
-        elif _fits_into_long(RESULT):
+        elif _fits_into_signed(RESULT):
             assert not types.is_struct(self.restype)
             res = self._do_call_int(self.funcsym, ll_args)
         elif RESULT is rffi.DOUBLE:
@@ -309,7 +314,7 @@
 
     @jit.oopspec('libffi_call_int(self, funcsym, ll_args)')
     def _do_call_int(self, funcsym, ll_args):
-        return self._do_call(funcsym, ll_args, rffi.LONG)
+        return self._do_call(funcsym, ll_args, rffi.SIGNED)
 
     @jit.oopspec('libffi_call_float(self, funcsym, ll_args)')
     def _do_call_float(self, funcsym, ll_args):
@@ -322,7 +327,7 @@
     @jit.dont_look_inside
     def _do_call_raw(self, funcsym, ll_args):
         # same as _do_call_int, but marked as jit.dont_look_inside
-        return self._do_call(funcsym, ll_args, rffi.LONG)
+        return self._do_call(funcsym, ll_args, rffi.SIGNED)
 
     @jit.oopspec('libffi_call_longlong(self, funcsym, ll_args)')
     def _do_call_longlong(self, funcsym, ll_args):
@@ -360,7 +365,7 @@
             TP = lltype.Ptr(rffi.CArray(RESULT))
             buf = rffi.cast(TP, ll_result)
             if types.is_struct(self.restype):
-                assert RESULT == rffi.LONG
+                assert RESULT == rffi.SIGNED
                 # for structs, we directly return the buffer and transfer the
                 # ownership
                 res = rffi.cast(RESULT, buf)
@@ -424,6 +429,11 @@
             return rffi.cast(rffi.CArrayPtr(TYPE), addr)[0]
     assert False
 
+def array_getitem_T(TYPE, width, addr, index, offset):
+    addr = rffi.ptradd(addr, index * width)
+    addr = rffi.ptradd(addr, offset)
+    return rffi.cast(rffi.CArrayPtr(TYPE), addr)[0]
+
 @specialize.call_location()
 @jit.oopspec("libffi_array_setitem(ffitype, width, addr, index, offset, value)")
 def array_setitem(ffitype, width, addr, index, offset, value):
@@ -434,3 +444,8 @@
             rffi.cast(rffi.CArrayPtr(TYPE), addr)[0] = value
             return
     assert False
+
+def array_setitem_T(TYPE, width, addr, index, offset, value):
+    addr = rffi.ptradd(addr, index * width)
+    addr = rffi.ptradd(addr, offset)
+    rffi.cast(rffi.CArrayPtr(TYPE), addr)[0] = value
diff --git a/pypy/rlib/longlong2float.py b/pypy/rlib/longlong2float.py
--- a/pypy/rlib/longlong2float.py
+++ b/pypy/rlib/longlong2float.py
@@ -5,7 +5,12 @@
 long long to a float and back to a long long.  There are corner cases
 in which it does not work.
 """
+
+from pypy.annotation import model as annmodel
+from pypy.rlib.rarithmetic import r_int64
 from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.rpython.extregistry import ExtRegistryEntry
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
 
 # -------- implement longlong2float and float2longlong --------
@@ -16,38 +21,33 @@
 
 # these definitions are used only in tests, when not translated
 def longlong2float_emulator(llval):
-    d_array = lltype.malloc(DOUBLE_ARRAY_PTR.TO, 1, flavor='raw')
-    ll_array = rffi.cast(LONGLONG_ARRAY_PTR, d_array)
-    ll_array[0] = llval
-    floatval = d_array[0]
-    lltype.free(d_array, flavor='raw')
-    return floatval
+    with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 1) as d_array:
+        ll_array = rffi.cast(LONGLONG_ARRAY_PTR, d_array)
+        ll_array[0] = llval
+        floatval = d_array[0]
+        return floatval
 
-def float2longlong_emulator(floatval):
-    d_array = lltype.malloc(DOUBLE_ARRAY_PTR.TO, 1, flavor='raw')
-    ll_array = rffi.cast(LONGLONG_ARRAY_PTR, d_array)
-    d_array[0] = floatval
-    llval = ll_array[0]
-    lltype.free(d_array, flavor='raw')
-    return llval
+def float2longlong(floatval):
+    with lltype.scoped_alloc(DOUBLE_ARRAY_PTR.TO, 1) as d_array:
+        ll_array = rffi.cast(LONGLONG_ARRAY_PTR, d_array)
+        d_array[0] = floatval
+        llval = ll_array[0]
+        return llval
 
 def uint2singlefloat_emulator(ival):
-    f_array = lltype.malloc(FLOAT_ARRAY_PTR.TO, 1, flavor='raw')
-    i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
-    i_array[0] = ival
-    singlefloatval = f_array[0]
-    lltype.free(f_array, flavor='raw')
-    return singlefloatval
+    with lltype.scoped_alloc(FLOAT_ARRAY_PTR.TO, 1) as f_array:
+        i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
+        i_array[0] = ival
+        singlefloatval = f_array[0]
+        return singlefloatval
 
 def singlefloat2uint_emulator(singlefloatval):
-    f_array = lltype.malloc(FLOAT_ARRAY_PTR.TO, 1, flavor='raw')
-    i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
-    f_array[0] = singlefloatval
-    ival = i_array[0]
-    lltype.free(f_array, flavor='raw')
-    return ival
+    with lltype.scoped_alloc(FLOAT_ARRAY_PTR.TO, 1) as f_array:
+        i_array = rffi.cast(UINT_ARRAY_PTR, f_array)
+        f_array[0] = singlefloatval
+        ival = i_array[0]
+        return ival
 
-from pypy.translator.tool.cbuild import ExternalCompilationInfo
 eci = ExternalCompilationInfo(includes=['string.h', 'assert.h'],
                               post_include_bits=["""
 static double pypy__longlong2float(long long x) {
@@ -56,12 +56,6 @@
     memcpy(&dd, &x, 8);
     return dd;
 }
-static long long pypy__float2longlong(double x) {
-    long long ll;
-    assert(sizeof(double) == 8 && sizeof(long long) == 8);
-    memcpy(&ll, &x, 8);
-    return ll;
-}
 static float pypy__uint2singlefloat(unsigned int x) {
     float ff;
     assert(sizeof(float) == 4 && sizeof(unsigned int) == 4);
@@ -82,12 +76,6 @@
     _nowrapper=True, elidable_function=True, sandboxsafe=True,
     oo_primitive="pypy__longlong2float")
 
-float2longlong = rffi.llexternal(
-    "pypy__float2longlong", [rffi.DOUBLE], rffi.LONGLONG,
-    _callable=float2longlong_emulator, compilation_info=eci,
-    _nowrapper=True, elidable_function=True, sandboxsafe=True,
-    oo_primitive="pypy__float2longlong")
-
 uint2singlefloat = rffi.llexternal(
     "pypy__uint2singlefloat", [rffi.UINT], rffi.FLOAT,
     _callable=uint2singlefloat_emulator, compilation_info=eci,
@@ -99,3 +87,15 @@
     _callable=singlefloat2uint_emulator, compilation_info=eci,
     _nowrapper=True, elidable_function=True, sandboxsafe=True,
     oo_primitive="pypy__singlefloat2uint")
+
+
+class Float2LongLongEntry(ExtRegistryEntry):
+    _about_ = float2longlong
+
+    def compute_result_annotation(self, s_float):
+        assert annmodel.SomeFloat().contains(s_float)
+        return annmodel.SomeInteger(knowntype=r_int64)
+
+    def specialize_call(self, hop):
+        [v_float] = hop.inputargs(lltype.Float)
+        return hop.genop("convert_float_bytes_to_longlong", [v_float], resulttype=hop.r_result)
diff --git a/pypy/rlib/rarithmetic.py b/pypy/rlib/rarithmetic.py
--- a/pypy/rlib/rarithmetic.py
+++ b/pypy/rlib/rarithmetic.py
@@ -92,7 +92,10 @@
 We therefore can no longer use the int type as it is, but need
 to use long everywhere.
 """
-    
+
+# XXX returning int(n) should not be necessary and should be simply n.
+# XXX TODO: replace all int(n) by long(n) and fix everything that breaks.
+# XXX       Then relax it and replace int(n) by n.
 def intmask(n):
     if isinstance(n, objectmodel.Symbolic):
         return n        # assume Symbolics don't overflow
@@ -137,8 +140,11 @@
 maxint = int(LONG_TEST - 1)
 
 def is_valid_int(r):
-    return isinstance(r, (int, long)) and (
+    if objectmodel.we_are_translated():
+        return isinstance(r, int)
+    return type(r) in (int, long, bool) and (
         -maxint - 1 <= r <= maxint)
+is_valid_int._annspecialcase_ = 'specialize:argtype(0)'
 
 def ovfcheck(r):
     "NOT_RPYTHON"
@@ -463,9 +469,6 @@
 r_longlong = build_int('r_longlong', True, 64)
 r_ulonglong = build_int('r_ulonglong', False, 64)
 
-r_long = build_int('r_long', True, 32)
-r_ulong = build_int('r_ulong', False, 32)
-
 longlongmax = r_longlong(LONGLONG_TEST - 1)
 
 if r_longlong is not r_int:
@@ -474,10 +477,7 @@
     r_int64 = int
 
 # needed for ll_os_stat.time_t_to_FILE_TIME in the 64 bit case
-if r_long is not r_int:
-    r_uint32 = r_ulong
-else:
-    r_uint32 = r_uint
+r_uint32 = build_int('r_uint32', False, 32)
 
 # needed for ll_time.time_sleep_llimpl
 maxint32 = int((1 << 31) -1)
@@ -569,3 +569,37 @@
     if not objectmodel.we_are_translated():
         assert n <= p
     return llop.int_between(lltype.Bool, n, m, p)
+
+ at objectmodel.specialize.ll()
+def byteswap(arg):
+    """ Convert little->big endian and the opposite
+    """
+    from pypy.rpython.lltypesystem import lltype, rffi
+    
+    T = lltype.typeOf(arg)
+    # XXX we cannot do arithmetics on small ints
+    if isinstance(arg, base_int):
+        arg = widen(arg)
+    if rffi.sizeof(T) == 1:
+        res = arg
+    elif rffi.sizeof(T) == 2:
+        a, b = arg & 0xFF, arg & 0xFF00
+        res = (a << 8) | (b >> 8)
+    elif rffi.sizeof(T) == 4:
+        FF = r_uint(0xFF)
+        arg = r_uint(arg)
+        a, b, c, d = (arg & FF, arg & (FF << 8), arg & (FF << 16),
+                      arg & (FF << 24))
+        res = (a << 24) | (b << 8) | (c >> 8) | (d >> 24)
+    elif rffi.sizeof(T) == 8:
+        FF = r_ulonglong(0xFF)
+        arg = r_ulonglong(arg)
+        a, b, c, d = (arg & FF, arg & (FF << 8), arg & (FF << 16),
+                      arg & (FF << 24))
+        e, f, g, h = (arg & (FF << 32), arg & (FF << 40), arg & (FF << 48),
+                      arg & (FF << 56))
+        res = ((a << 56) | (b << 40) | (c << 24) | (d << 8) | (e >> 8) |
+               (f >> 24) | (g >> 40) | (h >> 56))
+    else:
+        assert False # unreachable code
+    return rffi.cast(T, res)
diff --git a/pypy/rlib/rfloat.py b/pypy/rlib/rfloat.py
--- a/pypy/rlib/rfloat.py
+++ b/pypy/rlib/rfloat.py
@@ -1,11 +1,13 @@
 """Float constants"""
 
 import math
+
+from pypy.annotation.model import SomeString
+from pypy.rlib import objectmodel
+from pypy.rpython.extfunc import register_external
 from pypy.rpython.tool import rffi_platform
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.rlib import objectmodel
-from pypy.rpython.extfunc import register_external
-from pypy.annotation.model import SomeString
+
 
 USE_SHORT_FLOAT_REPR = True # XXX make it a translation option?
 
@@ -74,7 +76,7 @@
         while i < len(s) and s[i] in '0123456789':
             after_point += s[i]
             i += 1
-            
+
         if i == len(s):
             return sign, before_point, after_point, exponent
 
@@ -91,7 +93,7 @@
 
     if i == len(s):
         raise ValueError
-    
+
     while i < len(s) and s[i] in '0123456789':
         exponent += s[i]
         i += 1
diff --git a/pypy/rlib/rmmap.py b/pypy/rlib/rmmap.py
--- a/pypy/rlib/rmmap.py
+++ b/pypy/rlib/rmmap.py
@@ -226,6 +226,7 @@
             # XXX should be propagate the real type, allowing
             # for 2*sys.maxint?
             high = high_ref[0]
+            high = rffi.cast(lltype.Signed, high)
             # low might just happen to have the value INVALID_FILE_SIZE
             # so we need to check the last error also
             INVALID_FILE_SIZE = -1
@@ -548,7 +549,7 @@
             FILE_BEGIN = 0
             high_ref = lltype.malloc(PLONG.TO, 1, flavor='raw')
             try:
-                high_ref[0] = newsize_high
+                high_ref[0] = rffi.cast(LONG, newsize_high)
                 SetFilePointer(self.file_handle, newsize_low, high_ref,
                                FILE_BEGIN)
             finally:
@@ -710,7 +711,9 @@
     free = c_munmap_safe
 
 elif _MS_WINDOWS:
-    def mmap(fileno, length, tagname="", access=_ACCESS_DEFAULT, offset=0):
+    def mmap(fileno, length, flags=0, tagname="", access=_ACCESS_DEFAULT, offset=0):
+        # XXX flags is or-ed into access by now.
+        
         # check size boundaries
         _check_map_size(length)
         map_size = length
@@ -792,6 +795,7 @@
             offset_hi = 0
             offset_lo = offset
 
+        flProtect |= flags
         m.map_handle = CreateFileMapping(m.file_handle, NULL, flProtect,
                                          size_hi, size_lo, m.tagname)
 
@@ -809,6 +813,11 @@
         m.map_handle = INVALID_HANDLE
         raise winerror
 
+    class Hint:
+        pos = -0x4fff0000   # for reproducible results
+    hint = Hint()
+    # XXX this has no effect on windows
+
     def alloc(map_size):
         """Allocate memory.  This is intended to be used by the JIT,
         so the memory has the executable bit set.
diff --git a/pypy/rlib/rstruct/nativefmttable.py b/pypy/rlib/rstruct/nativefmttable.py
--- a/pypy/rlib/rstruct/nativefmttable.py
+++ b/pypy/rlib/rstruct/nativefmttable.py
@@ -3,14 +3,17 @@
 The table 'native_fmttable' is also used by pypy.module.array.interp_array.
 """
 import struct
-from pypy.rlib import jit
+
+from pypy.rlib import jit, longlong2float
+from pypy.rlib.objectmodel import specialize
+from pypy.rlib.rarithmetic import r_singlefloat, widen
 from pypy.rlib.rstruct import standardfmttable as std
 from pypy.rlib.rstruct.error import StructError
+from pypy.rlib.unroll import unrolling_iterable
+from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rpython.tool import rffi_platform
-from pypy.rpython.lltypesystem import lltype, rffi
-from pypy.rlib.rarithmetic import r_singlefloat
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
-from pypy.rlib.objectmodel import specialize
+
 
 native_is_bigendian = struct.pack("=i", 1) == struct.pack(">i", 1)
 
@@ -23,18 +26,24 @@
 
 # ____________________________________________________________
 
+
 double_buf = lltype.malloc(rffi.DOUBLEP.TO, 1, flavor='raw', immortal=True)
 float_buf = lltype.malloc(rffi.FLOATP.TO, 1, flavor='raw', immortal=True)
 
- at jit.dont_look_inside
-def double_to_ccharp(doubleval):
-    double_buf[0] = doubleval
-    return rffi.cast(rffi.CCHARP, double_buf)
+range_8_unroll = unrolling_iterable(list(reversed(range(8))))
+range_4_unroll = unrolling_iterable(list(reversed(range(4))))
 
 def pack_double(fmtiter):
     doubleval = fmtiter.accept_float_arg()
-    p = double_to_ccharp(doubleval)
-    fmtiter.result.append_charpsize(p, rffi.sizeof(rffi.DOUBLE))
+    value = longlong2float.float2longlong(doubleval)
+    if fmtiter.bigendian:
+        for i in range_8_unroll:
+            x = (value >> (8*i)) & 0xff
+            fmtiter.result.append(chr(x))
+    else:
+        for i in range_8_unroll:
+            fmtiter.result.append(chr(value & 0xff))
+            value >>= 8
 
 @specialize.argtype(0)
 def unpack_double(fmtiter):
@@ -45,16 +54,19 @@
     doubleval = double_buf[0]
     fmtiter.appendobj(doubleval)
 
- at jit.dont_look_inside
-def float_to_ccharp(floatval):
-    float_buf[0] = floatval
-    return rffi.cast(rffi.CCHARP, float_buf)
-
 def pack_float(fmtiter):
     doubleval = fmtiter.accept_float_arg()
     floatval = r_singlefloat(doubleval)
-    p = float_to_ccharp(floatval)
-    fmtiter.result.append_charpsize(p, rffi.sizeof(rffi.FLOAT))
+    value = longlong2float.singlefloat2uint(floatval)
+    value = widen(value)
+    if fmtiter.bigendian:
+        for i in range_4_unroll:
+            x = (value >> (8*i)) & 0xff
+            fmtiter.result.append(chr(x))
+    else:
+        for i in range_4_unroll:
+            fmtiter.result.append(chr(value & 0xff))
+            value >>= 8
 
 @specialize.argtype(0)
 def unpack_float(fmtiter):
diff --git a/pypy/rlib/rstruct/runpack.py b/pypy/rlib/rstruct/runpack.py
--- a/pypy/rlib/rstruct/runpack.py
+++ b/pypy/rlib/rstruct/runpack.py
@@ -4,11 +4,10 @@
 """
 
 import py
-from struct import pack, unpack
+from struct import unpack
 from pypy.rlib.rstruct.formatiterator import FormatIterator
 from pypy.rlib.rstruct.error import StructError
 from pypy.rlib.rstruct.nativefmttable import native_is_bigendian
-from pypy.rpython.extregistry import ExtRegistryEntry
 
 class MasterReader(object):
     def __init__(self, s):
diff --git a/pypy/rlib/rstruct/standardfmttable.py b/pypy/rlib/rstruct/standardfmttable.py
--- a/pypy/rlib/rstruct/standardfmttable.py
+++ b/pypy/rlib/rstruct/standardfmttable.py
@@ -6,11 +6,12 @@
 # values when packing.
 
 import struct
+
+from pypy.rlib.objectmodel import specialize
+from pypy.rlib.rarithmetic import r_uint, r_longlong, r_ulonglong
+from pypy.rlib.rstruct import ieee
 from pypy.rlib.rstruct.error import StructError, StructOverflowError
-from pypy.rlib.rstruct import ieee
 from pypy.rlib.unroll import unrolling_iterable
-from pypy.rlib.rarithmetic import r_uint, r_longlong, r_ulonglong
-from pypy.rlib.objectmodel import specialize
 
 # In the CPython struct module, pack() unconsistently accepts inputs
 # that are out-of-range or floats instead of ints.  Should we emulate
diff --git a/pypy/rlib/rzlib.py b/pypy/rlib/rzlib.py
--- a/pypy/rlib/rzlib.py
+++ b/pypy/rlib/rzlib.py
@@ -1,6 +1,7 @@
 from __future__ import with_statement
 import sys
 
+from pypy.rlib import rgc
 from pypy.rlib.rstring import StringBuilder
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.rpython.tool import rffi_platform
@@ -236,6 +237,7 @@
     compress data.
     """
     stream = lltype.malloc(z_stream, flavor='raw', zero=True)
+    rgc.add_memory_pressure(rffi.sizeof(z_stream))
     err = _deflateInit2(stream, level, method, wbits, memLevel, strategy)
     if err == Z_OK:
         return stream
@@ -264,6 +266,7 @@
     decompress data.
     """
     stream = lltype.malloc(z_stream, flavor='raw', zero=True)
+    rgc.add_memory_pressure(rffi.sizeof(z_stream))
     err = _inflateInit2(stream, wbits)
     if err == Z_OK:
         return stream
diff --git a/pypy/rlib/test/test_libffi.py b/pypy/rlib/test/test_libffi.py
--- a/pypy/rlib/test/test_libffi.py
+++ b/pypy/rlib/test/test_libffi.py
@@ -34,8 +34,8 @@
         # .arg() only supports integers and floats
         chain = ArgChain()
         x = lltype.malloc(lltype.GcStruct('xxx'))
-        y = lltype.malloc(lltype.GcArray(rffi.LONG), 3)
-        z = lltype.malloc(lltype.Array(rffi.LONG), 4, flavor='raw')
+        y = lltype.malloc(lltype.GcArray(rffi.SIGNED), 3)
+        z = lltype.malloc(lltype.Array(rffi.SIGNED), 4, flavor='raw')
         py.test.raises(TypeError, "chain.arg(x)")
         py.test.raises(TypeError, "chain.arg(y)")
         py.test.raises(TypeError, "chain.arg(z)")
@@ -100,6 +100,7 @@
     def setup_class(cls):
         from pypy.tool.udir import udir
         from pypy.translator.tool.cbuild import ExternalCompilationInfo
+        from pypy.translator.tool.cbuild import STANDARD_DEFINES
         from pypy.translator.platform import platform
 
         BaseFfiTest.setup_class()
@@ -120,7 +121,7 @@
                     for match in re.finditer(" ([a-z_]+)\(", meth.__doc__):
                         exports.append(match.group(1))
         #
-        c_file.write(py.code.Source('\n'.join(snippets)))
+        c_file.write(STANDARD_DEFINES + str(py.code.Source('\n'.join(snippets))))
         eci = ExternalCompilationInfo(export_symbols=exports)
         cls.libfoo_name = str(platform.compile([c_file], eci, 'x',
                                                standalone=False))
@@ -157,13 +158,13 @@
 
     def test_very_simple(self):
         """
-            int diff_xy(int x, long y)
+            int diff_xy(int x, Signed y)
             {
                 return x - y;
             }
         """
         libfoo = self.get_libfoo()
-        func = (libfoo, 'diff_xy', [types.sint, types.slong], types.sint)
+        func = (libfoo, 'diff_xy', [types.sint, types.signed], types.sint)
         res = self.call(func, [50, 8], lltype.Signed)
         assert res == 42
 
@@ -206,7 +207,7 @@
         """
         libfoo = self.get_libfoo()
         func = (libfoo, 'many_args', [types.uchar, types.sint], types.sint)
-        res = self.call(func, [chr(20), 22], rffi.LONG)
+        res = self.call(func, [chr(20), 22], rffi.SIGNED)
         assert res == 42
 
     def test_char_args(self):
@@ -235,9 +236,9 @@
 
     def test_pointer_as_argument(self):
         """#include <stdlib.h>
-            long inc(long* x)
+            Signed inc(Signed* x)
             {
-                long oldval;
+                Signed oldval;
                 if (x == NULL)
                     return -1;
                 oldval = *x;
@@ -246,15 +247,14 @@
             }
         """
         libfoo = self.get_libfoo()
-        func = (libfoo, 'inc', [types.pointer], types.slong)
-        LONGP = lltype.Ptr(rffi.CArray(rffi.LONG))
-        null = lltype.nullptr(LONGP.TO)
-        res = self.call(func, [null], rffi.LONG)
+        func = (libfoo, 'inc', [types.pointer], types.signed)
+        null = lltype.nullptr(rffi.SIGNEDP.TO)
+        res = self.call(func, [null], rffi.SIGNED)
         assert res == -1
         #
-        ptr_result = lltype.malloc(LONGP.TO, 1, flavor='raw')
+        ptr_result = lltype.malloc(rffi.SIGNEDP.TO, 1, flavor='raw')
         ptr_result[0] = 41
-        res = self.call(func, [ptr_result], rffi.LONG)
+        res = self.call(func, [ptr_result], rffi.SIGNED)
         if self.__class__ is TestLibffiCall:
             # the function was called only once
             assert res == 41
@@ -274,21 +274,20 @@
     def test_return_pointer(self):
         """
             struct pair {
-                long a;
-                long b;
+                Signed a;
+                Signed b;
             };
 
             struct pair my_static_pair = {10, 20};
 
-            long* get_pointer_to_b()
+            Signed* get_pointer_to_b()
             {
                 return &my_static_pair.b;
             }
         """
         libfoo = self.get_libfoo()
         func = (libfoo, 'get_pointer_to_b', [], types.pointer)
-        LONGP = lltype.Ptr(rffi.CArray(rffi.LONG))
-        res = self.call(func, [], LONGP)
+        res = self.call(func, [], rffi.SIGNEDP)
         assert res[0] == 20
 
     def test_void_result(self):
@@ -301,12 +300,12 @@
         set_dummy = (libfoo, 'set_dummy', [types.sint], types.void)
         get_dummy = (libfoo, 'get_dummy', [], types.sint)
         #
-        initval = self.call(get_dummy, [], rffi.LONG)
+        initval = self.call(get_dummy, [], rffi.SIGNED)
         #
         res = self.call(set_dummy, [initval+1], lltype.Void)
         assert res is None
         #
-        res = self.call(get_dummy, [], rffi.LONG)
+        res = self.call(get_dummy, [], rffi.SIGNED)
         assert res == initval+1
 
     def test_single_float_args(self):
@@ -386,32 +385,32 @@
             else:
                 assert False, 'Did not raise'
 
-        my_raises("self.call(func, [38], rffi.LONG)") # one less
-        my_raises("self.call(func, [38, 12.3, 42], rffi.LONG)") # one more
+        my_raises("self.call(func, [38], rffi.SIGNED)") # one less
+        my_raises("self.call(func, [38, 12.3, 42], rffi.SIGNED)") # one more
 
 
     def test_byval_argument(self):
         """
             struct Point {
-                long x;
-                long y;
+                Signed x;
+                Signed y;
             };
 
-            long sum_point(struct Point p) {
+            Signed sum_point(struct Point p) {
                 return p.x + p.y;
             }
         """
         libfoo = CDLL(self.libfoo_name)
-        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.signed, types.signed])
         ffi_point = ffi_point_struct.ffistruct
-        sum_point = (libfoo, 'sum_point', [ffi_point], types.slong)
+        sum_point = (libfoo, 'sum_point', [ffi_point], types.signed)
         #
-        ARRAY = rffi.CArray(rffi.LONG)
+        ARRAY = rffi.CArray(rffi.SIGNED)
         buf = lltype.malloc(ARRAY, 2, flavor='raw')
         buf[0] = 30
         buf[1] = 12
         adr = rffi.cast(rffi.VOIDP, buf)
-        res = self.call(sum_point, [('arg_raw', adr)], rffi.LONG,
+        res = self.call(sum_point, [('arg_raw', adr)], rffi.SIGNED,
                         jitif=["byval"])
         assert res == 42
         # check that we still have the ownership on the buffer
@@ -422,7 +421,7 @@
 
     def test_byval_result(self):
         """
-            struct Point make_point(long x, long y) {
+            struct Point make_point(Signed x, Signed y) {
                 struct Point p;
                 p.x = x;
                 p.y = y;
@@ -430,13 +429,13 @@
             }
         """
         libfoo = CDLL(self.libfoo_name)
-        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.slong, types.slong])
+        ffi_point_struct = make_struct_ffitype_e(0, 0, [types.signed, types.signed])
         ffi_point = ffi_point_struct.ffistruct
 
         libfoo = CDLL(self.libfoo_name)
-        make_point = (libfoo, 'make_point', [types.slong, types.slong], ffi_point)
+        make_point = (libfoo, 'make_point', [types.signed, types.signed], ffi_point)
         #
-        PTR = lltype.Ptr(rffi.CArray(rffi.LONG))
+        PTR = lltype.Ptr(rffi.CArray(rffi.SIGNED))
         p = self.call(make_point, [12, 34], PTR, is_struct=True,
                       jitif=["byval"])
         assert p[0] == 12
diff --git a/pypy/rlib/test/test_rarithmetic.py b/pypy/rlib/test/test_rarithmetic.py
--- a/pypy/rlib/test/test_rarithmetic.py
+++ b/pypy/rlib/test/test_rarithmetic.py
@@ -330,6 +330,15 @@
             return a == b
         py.test.raises(MissingRTypeOperation, "self.interpret(f, [42.0])")
 
+    def test_is_valid_int(self):
+        def f(x):
+            return (is_valid_int(x)     * 4 +
+                    is_valid_int(x > 0) * 2 +
+                    is_valid_int(x + 0.5))
+        assert f(123) == 4 + 2
+        res = self.interpret(f, [123])
+        assert res == 4 + 2
+
 class TestLLtype(BaseTestRarithmetic, LLRtypeMixin):
     pass
 
@@ -374,3 +383,9 @@
     assert not int_between(1, 2, 2)
     assert not int_between(1, 1, 1)
 
+def test_byteswap():
+    from pypy.rpython.lltypesystem import rffi
+    
+    assert byteswap(rffi.cast(rffi.USHORT, 0x0102)) == 0x0201
+    assert byteswap(rffi.cast(rffi.INT, 0x01020304)) == 0x04030201
+    assert byteswap(rffi.cast(rffi.ULONGLONG, 0x0102030405060708L)) == 0x0807060504030201L
diff --git a/pypy/rpython/llinterp.py b/pypy/rpython/llinterp.py
--- a/pypy/rpython/llinterp.py
+++ b/pypy/rpython/llinterp.py
@@ -1,6 +1,6 @@
 from pypy.objspace.flow.model import FunctionGraph, Constant, Variable, c_last_exception
 from pypy.rlib.rarithmetic import intmask, r_uint, ovfcheck, r_longlong
-from pypy.rlib.rarithmetic import r_ulonglong
+from pypy.rlib.rarithmetic import r_ulonglong, is_valid_int
 from pypy.rpython.lltypesystem import lltype, llmemory, lloperation, llheap
 from pypy.rpython.lltypesystem import rclass
 from pypy.rpython.ootypesystem import ootype
@@ -1027,22 +1027,22 @@
     # Overflow-detecting variants
 
     def op_int_neg_ovf(self, x):
-        assert type(x) is int
+        assert is_valid_int(x)
         try:
             return ovfcheck(-x)
         except OverflowError:
             self.make_llexception()
 
     def op_int_abs_ovf(self, x):
-        assert type(x) is int
+        assert is_valid_int(x)
         try:
             return ovfcheck(abs(x))
         except OverflowError:
             self.make_llexception()
 
     def op_int_lshift_ovf(self, x, y):
-        assert isinstance(x, int)
-        assert isinstance(y, int)
+        assert is_valid_int(x)
+        assert is_valid_int(y)
         try:
             return ovfcheck(x << y)
         except OverflowError:
@@ -1066,7 +1066,9 @@
                 return r
                 '''%locals()
         elif operator == '%':
-            code = '''r = %(checkfn)s(x %% y)
+            ## overflow check on % does not work with emulated int
+            code = '''%(checkfn)s(x // y)
+                r = x %% y
                 if x^y < 0 and x%%y != 0:
                     r -= y
                 return r
@@ -1083,15 +1085,15 @@
                 self.make_llexception()
         """ % locals()).compile() in globals(), d
 
-    _makefunc2('op_int_add_ovf', '+', '(int, llmemory.AddressOffset)')
-    _makefunc2('op_int_mul_ovf', '*', '(int, llmemory.AddressOffset)', 'int')
-    _makefunc2('op_int_sub_ovf',          '-',  'int')
-    _makefunc2('op_int_floordiv_ovf',     '//', 'int')  # XXX negative args
-    _makefunc2('op_int_floordiv_zer',     '//', 'int')  # can get off-by-one
-    _makefunc2('op_int_floordiv_ovf_zer', '//', 'int')  # (see op_int_floordiv)
-    _makefunc2('op_int_mod_ovf',          '%',  'int')
-    _makefunc2('op_int_mod_zer',          '%',  'int')
-    _makefunc2('op_int_mod_ovf_zer',      '%',  'int')
+    _makefunc2('op_int_add_ovf', '+', '(int, long, llmemory.AddressOffset)')
+    _makefunc2('op_int_mul_ovf', '*', '(int, long, llmemory.AddressOffset)', '(int, long)')
+    _makefunc2('op_int_sub_ovf',          '-',  '(int, long)')
+    _makefunc2('op_int_floordiv_ovf',     '//', '(int, long)')  # XXX negative args
+    _makefunc2('op_int_floordiv_zer',     '//', '(int, long)')  # can get off-by-one
+    _makefunc2('op_int_floordiv_ovf_zer', '//', '(int, long)')  # (see op_int_floordiv)
+    _makefunc2('op_int_mod_ovf',          '%',  '(int, long)')
+    _makefunc2('op_int_mod_zer',          '%',  '(int, long)')
+    _makefunc2('op_int_mod_ovf_zer',      '%',  '(int, long)')
 
     _makefunc2('op_uint_floordiv_zer',    '//', 'r_uint')
     _makefunc2('op_uint_mod_zer',         '%',  'r_uint')
@@ -1113,7 +1115,7 @@
             x = x.default
         # if type(x) is a subclass of Symbolic, bool(x) will usually raise
         # a TypeError -- unless __nonzero__ has been explicitly overridden.
-        assert isinstance(x, (int, Symbolic))
+        assert is_valid_int(x) or isinstance(x, Symbolic)
         return bool(x)
 
     # hack for jit.codegen.llgraph
@@ -1135,7 +1137,7 @@
 
     def op_oonewarray(self, ARRAY, length):
         assert isinstance(ARRAY, ootype.Array)
-        assert isinstance(length, int)
+        assert is_valid_int(length)
         return ootype.oonewarray(ARRAY, length)
 
     def op_runtimenew(self, class_):
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -84,14 +84,14 @@
             else:
                 PIECESIZE = 0x08000000
         PIECES = 10
-        flags = 0
+        flags = (0,)
         if _LINUX:
             flags = (rmmap.MAP_PRIVATE|rmmap.MAP_ANONYMOUS|rmmap.MAP_NORESERVE,
                      rmmap.PROT_READ|rmmap.PROT_WRITE)
         if _MS_WINDOWS:
-            flags = rmmap.MEM_RESERVE
+            flags = (rmmap.MEM_RESERVE,)
             # XXX seems not to work
-        m = rmmap.mmap(-1, PIECES * PIECESIZE, flags)
+        m = rmmap.mmap(-1, PIECES * PIECESIZE, *flags)
         m.close = lambda : None    # leak instead of giving a spurious
                                    # error at CPython's shutdown
         m._ll2ctypes_pieces = []
diff --git a/pypy/rpython/lltypesystem/llmemory.py b/pypy/rpython/lltypesystem/llmemory.py
--- a/pypy/rpython/lltypesystem/llmemory.py
+++ b/pypy/rpython/lltypesystem/llmemory.py
@@ -8,6 +8,8 @@
 from pypy.rlib.objectmodel import Symbolic, specialize
 from pypy.rpython.lltypesystem import lltype
 from pypy.tool.uid import uid
+from pypy.rlib.rarithmetic import is_valid_int
+
 
 class AddressOffset(Symbolic):
 
@@ -28,7 +30,7 @@
     def __ge__(self, other):
         if self is other:
             return True
-        elif (isinstance(other, (int, long)) and other == 0 and
+        elif (is_valid_int(other) and other == 0 and
             self.known_nonneg()):
             return True
         else:
@@ -58,7 +60,7 @@
         return "<ItemOffset %r %r>" % (self.TYPE, self.repeat)
 
     def __mul__(self, other):
-        if not isinstance(other, int):
+        if not is_valid_int(other):
             return NotImplemented
         return ItemOffset(self.TYPE, self.repeat * other)
 
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -18,6 +18,7 @@
 from pypy.rlib.rstring import StringBuilder, UnicodeBuilder, assert_str0
 from pypy.rlib import jit
 from pypy.rpython.lltypesystem import llmemory
+from pypy.rlib.rarithmetic import maxint, LONG_BIT
 import os, sys
 
 class CConstant(Symbolic):
@@ -649,8 +650,9 @@
 # float *
 FLOATP = lltype.Ptr(lltype.Array(FLOAT, hints={'nolength': True}))
 
-# SIGNED *
-SIGNEDP = lltype.Ptr(lltype.Array(lltype.Signed, hints={'nolength': True}))
+# Signed, Signed *
+SIGNED = lltype.Signed
+SIGNEDP = lltype.Ptr(lltype.Array(SIGNED, hints={'nolength': True}))
 
 # various type mapping
 
@@ -900,7 +902,7 @@
             size = llmemory.sizeof(tp)    # a symbolic result in this case
         return size
     if isinstance(tp, lltype.Ptr) or tp is llmemory.Address:
-        tp = ULONG     # XXX!
+        tp = lltype.Signed
     if tp is lltype.Char or tp is lltype.Bool:
         return 1
     if tp is lltype.UniChar:
@@ -911,7 +913,7 @@
         return 4
     assert isinstance(tp, lltype.Number)
     if tp is lltype.Signed:
-        return ULONG._type.BITS/8
+        return LONG_BIT/8
     return tp._type.BITS/8
 sizeof._annspecialcase_ = 'specialize:memo'
 
@@ -931,11 +933,11 @@
 offsetof._annspecialcase_ = 'specialize:memo'
 
 # check that we have a sane configuration
-assert sys.maxint == (1 << (8 * sizeof(lltype.Signed) - 1)) - 1, (
+assert maxint == (1 << (8 * sizeof(lltype.Signed) - 1)) - 1, (
     "Mixed configuration of the word size of the machine:\n\t"
     "the underlying Python was compiled with maxint=%d,\n\t"
     "but the C compiler says that 'long' is %d bytes" % (
-    sys.maxint, sizeof(lltype.Signed)))
+    maxint, sizeof(lltype.Signed)))
 
 # ********************** some helpers *******************
 
diff --git a/pypy/rpython/lltypesystem/test/test_rffi.py b/pypy/rpython/lltypesystem/test/test_rffi.py
--- a/pypy/rpython/lltypesystem/test/test_rffi.py
+++ b/pypy/rpython/lltypesystem/test/test_rffi.py
@@ -180,7 +180,7 @@
             struct.c_three = cast(INT, 5)
             result = z(struct)
             lltype.free(struct, flavor='raw')
-            return cast(LONG, result)
+            return cast(SIGNED, result)
     
         fn = self.compile(f, [], backendopt=False)
         assert fn() == 8
@@ -377,7 +377,7 @@
         h_source = py.code.Source("""
         #ifndef _CALLBACK_H
         #define _CALLBACK_H
-        extern long eating_callback(long arg, long(*call)(long));
+        extern Signed eating_callback(Signed arg, Signed(*call)(Signed));
         #endif /* _CALLBACK_H */
         """)
         
@@ -385,9 +385,9 @@
         h_include.write(h_source)
 
         c_source = py.code.Source("""
-        long eating_callback(long arg, long(*call)(long))
+        Signed eating_callback(Signed arg, Signed(*call)(Signed))
         {
-            long res = call(arg);
+            Signed res = call(arg);
             if (res == -1)
               return -1;
             return res;
@@ -399,8 +399,8 @@
                                       separate_module_sources=[c_source],
                                       export_symbols=['eating_callback'])
 
-        args = [LONG, CCallback([LONG], LONG)]
-        eating_callback = llexternal('eating_callback', args, LONG,
+        args = [SIGNED, CCallback([SIGNED], SIGNED)]
+        eating_callback = llexternal('eating_callback', args, SIGNED,
                                      compilation_info=eci)
 
         return eating_callback
@@ -554,13 +554,13 @@
             p = make(X, c_one=cast(INT, 3))
             res = p.c_one
             lltype.free(p, flavor='raw')
-            return cast(LONG, res)
+            return cast(SIGNED, res)
         assert f() == 3
         assert interpret(f, []) == 3
     
     def test_structcopy(self):
-        X2 = lltype.Struct('X2', ('x', LONG))
-        X1 = lltype.Struct('X1', ('a', LONG), ('x2', X2), ('p', lltype.Ptr(X2)))
+        X2 = lltype.Struct('X2', ('x', SIGNED))
+        X1 = lltype.Struct('X1', ('a', SIGNED), ('x2', X2), ('p', lltype.Ptr(X2)))
         def f():
             p2 = make(X2, x=123)
             p1 = make(X1, a=5, p=p2)
@@ -620,7 +620,7 @@
         eci = ExternalCompilationInfo(includes=['string.h'])
         strlen = llexternal('strlen', [CCHARP], SIZE_T, compilation_info=eci)
         def f():
-            return cast(LONG, strlen("Xxx"))
+            return cast(SIGNED, strlen("Xxx"))
         assert interpret(f, [], backendopt=True) == 3
     
     def test_stringpolicy3(self):
@@ -643,7 +643,7 @@
             ll_str = str2charp("Xxx")
             res2 = strlen(ll_str)
             lltype.free(ll_str, flavor='raw')
-            return cast(LONG, res1*10 + res2)
+            return cast(SIGNED, res1*10 + res2)
     
         assert interpret(f, [], backendopt=True) == 43    
     
diff --git a/pypy/rpython/memory/gc/test/test_direct.py b/pypy/rpython/memory/gc/test/test_direct.py
--- a/pypy/rpython/memory/gc/test/test_direct.py
+++ b/pypy/rpython/memory/gc/test/test_direct.py
@@ -9,7 +9,7 @@
 import py
 from pypy.rpython.lltypesystem import lltype, llmemory
 from pypy.rpython.memory.gctypelayout import TypeLayoutBuilder
-from pypy.rlib.rarithmetic import LONG_BIT
+from pypy.rlib.rarithmetic import LONG_BIT, is_valid_int
 
 WORD = LONG_BIT // 8
 
@@ -286,7 +286,7 @@
         p = self.malloc(S)
         hash = self.gc.identityhash(p)
         print hash
-        assert isinstance(hash, (int, long))
+        assert is_valid_int(hash)
         assert hash == self.gc.identityhash(p)
         self.stackroots.append(p)
         for i in range(6):
@@ -299,7 +299,7 @@
         self.gc.collect()
         hash = self.gc.identityhash(self.stackroots[-1])
         print hash
-        assert isinstance(hash, (int, long))
+        assert is_valid_int(hash)
         for i in range(6):
             self.gc.collect()
             assert hash == self.gc.identityhash(self.stackroots[-1])
@@ -311,7 +311,7 @@
             self.gc.collect()
         hash = self.gc.identityhash(self.stackroots[-1])
         print hash
-        assert isinstance(hash, (int, long))
+        assert is_valid_int(hash)
         for i in range(2):
             self.gc.collect()
             assert hash == self.gc.identityhash(self.stackroots[-1])
@@ -319,7 +319,7 @@
         # (4) p is a prebuilt object
         hash = self.gc.identityhash(p_const)
         print hash
-        assert isinstance(hash, (int, long))
+        assert is_valid_int(hash)
         assert hash == self.gc.identityhash(p_const)
         # (5) p is actually moving (for the markcompact gc)
         p0 = self.malloc(S)
diff --git a/pypy/rpython/module/ll_os.py b/pypy/rpython/module/ll_os.py
--- a/pypy/rpython/module/ll_os.py
+++ b/pypy/rpython/module/ll_os.py
@@ -237,7 +237,7 @@
     def extdef_for_os_function_returning_int(self, name, **kwds):
         c_func = self.llexternal(name, [], rffi.INT, **kwds)
         def c_func_llimpl():
-            res = rffi.cast(rffi.LONG, c_func())
+            res = rffi.cast(rffi.SIGNED, c_func())
             if res == -1:
                 raise OSError(rposix.get_errno(), "%s failed" % name)
             return res
@@ -249,7 +249,7 @@
     def extdef_for_os_function_accepting_int(self, name, **kwds):
         c_func = self.llexternal(name, [rffi.INT], rffi.INT, **kwds)
         def c_func_llimpl(arg):
-            res = rffi.cast(rffi.LONG, c_func(arg))
+            res = rffi.cast(rffi.SIGNED, c_func(arg))
             if res == -1:
                 raise OSError(rposix.get_errno(), "%s failed" % name)
         
@@ -261,7 +261,7 @@
     def extdef_for_os_function_accepting_2int(self, name, **kwds):
         c_func = self.llexternal(name, [rffi.INT, rffi.INT], rffi.INT, **kwds)
         def c_func_llimpl(arg, arg2):
-            res = rffi.cast(rffi.LONG, c_func(arg, arg2))
+            res = rffi.cast(rffi.SIGNED, c_func(arg, arg2))
             if res == -1:
                 raise OSError(rposix.get_errno(), "%s failed" % name)
         
@@ -273,7 +273,7 @@
     def extdef_for_os_function_accepting_0int(self, name, **kwds):
         c_func = self.llexternal(name, [], rffi.INT, **kwds)
         def c_func_llimpl():
-            res = rffi.cast(rffi.LONG, c_func())
+            res = rffi.cast(rffi.SIGNED, c_func())
             if res == -1:
                 raise OSError(rposix.get_errno(), "%s failed" % name)
         
@@ -285,7 +285,7 @@
     def extdef_for_os_function_int_to_int(self, name, **kwds):
         c_func = self.llexternal(name, [rffi.INT], rffi.INT, **kwds)
         def c_func_llimpl(arg):
-            res = rffi.cast(rffi.LONG, c_func(arg))
+            res = rffi.cast(rffi.SIGNED, c_func(arg))
             if res == -1:
                 raise OSError(rposix.get_errno(), "%s failed" % name)
             return res
@@ -438,9 +438,13 @@
         UTIMBUFP = lltype.Ptr(self.UTIMBUF)
         os_utime = self.llexternal('utime', [rffi.CCHARP, UTIMBUFP], rffi.INT)
 
+        if not _WIN32:
+            includes = ['sys/time.h']
+        else:
+            includes = ['time.h']
         class CConfig:
             _compilation_info_ = ExternalCompilationInfo(
-                includes=['sys/time.h']
+                includes=includes
             )
             HAVE_UTIMES = platform.Has('utimes')
         config = platform.configure(CConfig)
@@ -450,9 +454,14 @@
 
         if config['HAVE_UTIMES']:
             class CConfig:
-                _compilation_info_ = ExternalCompilationInfo(
-                    includes = ['sys/time.h']
-                )
+                if not _WIN32:
+                    _compilation_info_ = ExternalCompilationInfo(
+                    includes = includes
+                    )
+                else:
+                    _compilation_info_ = ExternalCompilationInfo(
+                        includes = ['time.h']
+                    )
                 TIMEVAL = platform.Struct('struct timeval', [('tv_sec', rffi.LONG),
                                                              ('tv_usec', rffi.LONG)])
             config = platform.configure(CConfig)
@@ -557,10 +566,10 @@
                 # The fields of a FILETIME structure are the hi and lo parts
                 # of a 64-bit value expressed in 100 nanosecond units
                 # (of course).
-                result = (pkernel.c_dwHighDateTime*429.4967296 +
-                          pkernel.c_dwLowDateTime*1E-7,
-                          puser.c_dwHighDateTime*429.4967296 +
-                          puser.c_dwLowDateTime*1E-7,
+                result = (rffi.cast(lltype.Signed, pkernel.c_dwHighDateTime) * 429.4967296 +
+                          rffi.cast(lltype.Signed, pkernel.c_dwLowDateTime) * 1E-7,
+                          rffi.cast(lltype.Signed, puser.c_dwHighDateTime) * 429.4967296 +
+                          rffi.cast(lltype.Signed, puser.c_dwLowDateTime) * 1E-7,
                           0, 0, 0)
                 lltype.free(puser,   flavor='raw')
                 lltype.free(pkernel, flavor='raw')
@@ -755,7 +764,7 @@
         if self.GETPGRP_HAVE_ARG:
             c_func = self.llexternal(name, [rffi.INT], rffi.INT)
             def c_func_llimpl():
-                res = rffi.cast(rffi.LONG, c_func(0))
+                res = rffi.cast(rffi.SIGNED, c_func(0))
                 if res == -1:
                     raise OSError(rposix.get_errno(), "%s failed" % name)
                 return res
@@ -773,7 +782,7 @@
         if self.SETPGRP_HAVE_ARG:
             c_func = self.llexternal(name, [rffi.INT, rffi.INT], rffi.INT)
             def c_func_llimpl():
-                res = rffi.cast(rffi.LONG, c_func(0, 0))
+                res = rffi.cast(rffi.SIGNED, c_func(0, 0))
                 if res == -1:
                     raise OSError(rposix.get_errno(), "%s failed" % name)
 
@@ -818,7 +827,7 @@
                                   [traits.CCHARP, rffi.INT, rffi.MODE_T],
                                   rffi.INT)
         def os_open_llimpl(path, flags, mode):
-            result = rffi.cast(rffi.LONG, os_open(path, flags, mode))
+            result = rffi.cast(lltype.Signed, os_open(path, flags, mode))
             if result == -1:
                 raise OSError(rposix.get_errno(), "os_open failed")
             return result
@@ -1009,7 +1018,7 @@
             os_fsync = self.llexternal('_commit', [rffi.INT], rffi.INT)
 
         def fsync_llimpl(fd):
-            res = rffi.cast(rffi.LONG, os_fsync(rffi.cast(rffi.INT, fd)))
+            res = rffi.cast(rffi.SIGNED, os_fsync(rffi.cast(rffi.INT, fd)))
             if res < 0:
                 raise OSError(rposix.get_errno(), "fsync failed")
         return extdef([int], s_None,
@@ -1021,7 +1030,7 @@
         os_fdatasync = self.llexternal('fdatasync', [rffi.INT], rffi.INT)
 
         def fdatasync_llimpl(fd):
-            res = rffi.cast(rffi.LONG, os_fdatasync(rffi.cast(rffi.INT, fd)))
+            res = rffi.cast(rffi.SIGNED, os_fdatasync(rffi.cast(rffi.INT, fd)))
             if res < 0:
                 raise OSError(rposix.get_errno(), "fdatasync failed")
         return extdef([int], s_None,
@@ -1033,7 +1042,7 @@
         os_fchdir = self.llexternal('fchdir', [rffi.INT], rffi.INT)
 
         def fchdir_llimpl(fd):
-            res = rffi.cast(rffi.LONG, os_fchdir(rffi.cast(rffi.INT, fd)))
+            res = rffi.cast(rffi.SIGNED, os_fchdir(rffi.cast(rffi.INT, fd)))
             if res < 0:
                 raise OSError(rposix.get_errno(), "fchdir failed")
         return extdef([int], s_None,
@@ -1312,7 +1321,9 @@
                 result = os__cwait(status_p, pid, options)
                 # shift the status left a byte so this is more
                 # like the POSIX waitpid
-                status_p[0] <<= 8
+                tmp = rffi.cast(rffi.SIGNED, status_p[0])
+                tmp <<= 8
+                status_p[0] = rffi.cast(rffi.INT, tmp)
                 return result
         else:
             # Posix
@@ -1343,7 +1354,7 @@
         os_isatty = self.llexternal(underscore_on_windows+'isatty', [rffi.INT], rffi.INT)
 
         def isatty_llimpl(fd):
-            res = rffi.cast(rffi.LONG, os_isatty(rffi.cast(rffi.INT, fd)))
+            res = rffi.cast(lltype.Signed, os_isatty(rffi.cast(rffi.INT, fd)))
             return res != 0
 
         return extdef([int], bool, llimpl=isatty_llimpl,
diff --git a/pypy/rpython/module/ll_time.py b/pypy/rpython/module/ll_time.py
--- a/pypy/rpython/module/ll_time.py
+++ b/pypy/rpython/module/ll_time.py
@@ -9,7 +9,7 @@
 from pypy.rpython.lltypesystem import lltype
 from pypy.rpython.extfunc import BaseLazyRegistering, registering, extdef
 from pypy.rlib import rposix
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, maxint32
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
 if sys.platform == 'win32':
@@ -177,7 +177,7 @@
     @registering(time.sleep)
     def register_time_sleep(self):
         if sys.platform == 'win32':
-            MAX = sys.maxint
+            MAX = maxint32
             Sleep = self.llexternal('Sleep', [rffi.ULONG], lltype.Void)
             def time_sleep_llimpl(secs):
                 millisecs = secs * 1000.0
diff --git a/pypy/rpython/ootypesystem/ootype.py b/pypy/rpython/ootypesystem/ootype.py
--- a/pypy/rpython/ootypesystem/ootype.py
+++ b/pypy/rpython/ootypesystem/ootype.py
@@ -1295,6 +1295,8 @@
         for meth in self.overloadings:
             ARGS = meth._TYPE.ARGS
             if ARGS in signatures:
+                # XXX Conflict on 'Signed' vs 'SignedLongLong' on win64.
+                # XXX note that this partially works if this error is ignored.
                 raise TypeError, 'Bad overloading'
             signatures.add(ARGS)
 
diff --git a/pypy/rpython/test/test_rbuiltin.py b/pypy/rpython/test/test_rbuiltin.py
--- a/pypy/rpython/test/test_rbuiltin.py
+++ b/pypy/rpython/test/test_rbuiltin.py
@@ -5,7 +5,7 @@
 from pypy.rlib.debug import llinterpcall
 from pypy.rpython.lltypesystem import lltype
 from pypy.tool import udir
-from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.rarithmetic import intmask, is_valid_int
 from pypy.rlib.rarithmetic import r_int, r_uint, r_longlong, r_ulonglong
 from pypy.annotation.builtin import *
 from pypy.rpython.test.tool import BaseRtypingTest, LLRtypeMixin, OORtypeMixin
@@ -567,7 +567,7 @@
         if r_longlong is not r_int:
             assert isinstance(res, r_longlong)
         else:
-            assert isinstance(res, int)
+            assert is_valid_int(res)
         #
         def llfn(v):
             return rffi.cast(rffi.ULONGLONG, v)
diff --git a/pypy/rpython/test/test_rptr.py b/pypy/rpython/test/test_rptr.py
--- a/pypy/rpython/test/test_rptr.py
+++ b/pypy/rpython/test/test_rptr.py
@@ -5,6 +5,7 @@
 from pypy.rpython.lltypesystem import llmemory
 from pypy.rpython.rtyper import RPythonTyper
 from pypy.annotation import model as annmodel
+from pypy.rlib.rarithmetic import is_valid_int
 
 
 # ____________________________________________________________
@@ -188,7 +189,7 @@
             return llmemory.cast_adr_to_int(a, "forced")
 
     res = interpret(fn, [2])
-    assert type(res) is int
+    assert is_valid_int(res)
     assert res == cast_ptr_to_int(p)
     #
     res = interpret(fn, [4])
@@ -196,7 +197,7 @@
     assert llmemory.cast_int_to_adr(res) == llmemory.cast_ptr_to_adr(p)
     #
     res = interpret(fn, [6])
-    assert type(res) is int
+    assert is_valid_int(res)
     from pypy.rpython.lltypesystem import rffi
     assert res == rffi.cast(Signed, p)
 
diff --git a/pypy/rpython/tool/rffi_platform.py b/pypy/rpython/tool/rffi_platform.py
--- a/pypy/rpython/tool/rffi_platform.py
+++ b/pypy/rpython/tool/rffi_platform.py
@@ -710,9 +710,13 @@
 PYPY_EXTERNAL_DIR = py.path.local(pypydir).join('..', '..')
 # XXX make this configurable
 if sys.platform == 'win32':
-    libdir = py.path.local('c:/buildslave/support') # on the bigboard buildbot
-    if libdir.check():
-        PYPY_EXTERNAL_DIR = libdir
+    for libdir in [
+        py.path.local('c:/buildslave/support'), # on the bigboard buildbot
+        py.path.local('d:/myslave'), # on the snakepit buildbot
+        ]:
+        if libdir.check():
+            PYPY_EXTERNAL_DIR = libdir
+            break
 
 def configure_external_library(name, eci, configurations,
                                symbol=None, _cache={}):
@@ -790,9 +794,15 @@
     if platform is None:
         from pypy.translator.platform import platform
     if sys.platform == 'win32':
-        library_dir = 'Release'
-        libraries = ['gc']
-        includes=['gc.h']
+        import platform as host_platform # just to ask for the arch. Confusion-alert!
+        if host_platform.architecture()[0] == '32bit':
+            library_dir = 'Release'
+            libraries = ['gc']
+            includes=['gc.h']
+        else:
+            library_dir = ''
+            libraries = ['gc64_dll']
+            includes = ['gc.h']
     else:
         library_dir = ''
         libraries = ['gc', 'dl']
diff --git a/pypy/translator/c/src/asm.h b/pypy/translator/c/src/asm.h
--- a/pypy/translator/c/src/asm.h
+++ b/pypy/translator/c/src/asm.h
@@ -14,4 +14,8 @@
 #  include "src/asm_ppc.h"
 #endif
 
+#if defined(MS_WINDOWS) && defined(_MSC_VER)
+#  include "src/asm_msvc.h"
+#endif
+
 #endif /* _PYPY_ASM_H */
diff --git a/pypy/translator/c/src/asm_msvc.h b/pypy/translator/c/src/asm_msvc.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/asm_msvc.h
@@ -0,0 +1,31 @@
+
+#ifdef PYPY_X86_CHECK_SSE2
+#define PYPY_X86_CHECK_SSE2_DEFINED
+extern void pypy_x86_check_sse2(void);
+#endif
+
+
+/* implementations */
+
+#ifndef PYPY_NOT_MAIN_FILE
+#ifdef PYPY_X86_CHECK_SSE2
+#include <intrin.h>
+void pypy_x86_check_sse2(void)
+{
+    int features;
+    int CPUInfo[4];
+    CPUInfo[3] = 0;
+    __cpuid(CPUInfo, 1);
+    features = CPUInfo[3];
+
+    //Check bits 25 and 26, this indicates SSE2 support
+    if (((features & (1 << 25)) == 0) || ((features & (1 << 26)) == 0))
+    {
+        fprintf(stderr, "Old CPU with no SSE2 support, cannot continue.\n"
+                        "You need to re-translate with "
+                        "'--jit-backend=x86-without-sse2'\n");
+        abort();
+    }
+}
+#endif
+#endif
diff --git a/pypy/translator/c/src/float.h b/pypy/translator/c/src/float.h
--- a/pypy/translator/c/src/float.h
+++ b/pypy/translator/c/src/float.h
@@ -27,7 +27,7 @@
 #define OP_FLOAT_SUB(x,y,r)     r = x - y
 #define OP_FLOAT_MUL(x,y,r)     r = x * y
 #define OP_FLOAT_TRUEDIV(x,y,r) r = x / y
-#define OP_FLOAT_POW(x,y,r)     r = pow(x, y) 
+#define OP_FLOAT_POW(x,y,r)     r = pow(x, y)
 
 /*** conversions ***/
 
@@ -42,5 +42,6 @@
 #ifdef HAVE_LONG_LONG
 #define OP_CAST_FLOAT_TO_LONGLONG(x,r) r = (long long)(x)
 #define OP_CAST_FLOAT_TO_ULONGLONG(x,r) r = (unsigned long long)(x)
+#define OP_CONVERT_FLOAT_BYTES_TO_LONGLONG(x,r) memcpy(&r, &x, sizeof(double))
 #endif
 
diff --git a/pypy/translator/c/test/test_newgc.py b/pypy/translator/c/test/test_newgc.py
--- a/pypy/translator/c/test/test_newgc.py
+++ b/pypy/translator/c/test/test_newgc.py
@@ -676,8 +676,8 @@
             gc.collect()
             p_a1 = rffi.cast(rffi.VOIDPP, ll_args[0])[0]
             p_a2 = rffi.cast(rffi.VOIDPP, ll_args[1])[0]
-            a1 = rffi.cast(rffi.LONGP, p_a1)[0]
-            a2 = rffi.cast(rffi.LONGP, p_a2)[0]
+            a1 = rffi.cast(rffi.SIGNEDP, p_a1)[0]
+            a2 = rffi.cast(rffi.SIGNEDP, p_a2)[0]
             res = rffi.cast(rffi.INTP, ll_res)
             if a1 > a2:
                 res[0] = rffi.cast(rffi.INT, 1)
@@ -693,7 +693,7 @@
             ptr = CallbackFuncPtr([ffi_type_pointer, ffi_type_pointer],
                                   ffi_type_sint, callback)
 
-            TP = rffi.CArray(rffi.LONG)
+            TP = rffi.CArray(lltype.Signed)
             to_sort = lltype.malloc(TP, 4, flavor='raw')
             to_sort[0] = 4
             to_sort[1] = 3
@@ -701,7 +701,7 @@
             to_sort[3] = 2
             qsort.push_arg(rffi.cast(rffi.VOIDP, to_sort))
             qsort.push_arg(rffi.cast(rffi.SIZE_T, 4))
-            qsort.push_arg(rffi.cast(rffi.SIZE_T, rffi.sizeof(rffi.LONG)))
+            qsort.push_arg(rffi.cast(rffi.SIZE_T, rffi.sizeof(lltype.Signed)))
             qsort.push_arg(rffi.cast(rffi.VOIDP, ptr.ll_closure))
             qsort.call(lltype.Void)
             result = [to_sort[i] for i in range(4)] == [1,2,3,4]
@@ -1221,7 +1221,7 @@
         def f():
             from pypy.rpython.lltypesystem import lltype, rffi
             alist = [A() for i in range(50000)]
-            idarray = lltype.malloc(rffi.LONGP.TO, len(alist), flavor='raw')
+            idarray = lltype.malloc(rffi.SIGNEDP.TO, len(alist), flavor='raw')
             # Compute the id of all elements of the list.  The goal is
             # to not allocate memory, so that if the GC needs memory to
             # remember the ids, it will trigger some collections itself
diff --git a/pypy/translator/cli/sdk.py b/pypy/translator/cli/sdk.py
--- a/pypy/translator/cli/sdk.py
+++ b/pypy/translator/cli/sdk.py
@@ -103,6 +103,11 @@
         mono_bin = find_mono_on_windows()
         if mono_bin is not None:
             SDK.ILASM = os.path.join(mono_bin, 'ilasm2.bat')
+        # XXX the failing tests are boring, and the SDK is usually installed
+        # on windows. I do not care right now, because the Linux buildbots
+        # don't test this at all...
+        if platform.architecture()[0] == '64bit':
+            py.test.skip('mono on 64bit is not well enough supported')
     else:
         SDK = MonoSDK
     return SDK
diff --git a/pypy/translator/oosupport/test_template/overflow.py b/pypy/translator/oosupport/test_template/overflow.py
--- a/pypy/translator/oosupport/test_template/overflow.py
+++ b/pypy/translator/oosupport/test_template/overflow.py
@@ -3,9 +3,12 @@
 
 class BaseTestOverflow:
 
-    def check(self, fn, args):
+    def check(self, fn, args, expected=None):
         res1 = self.interpret(fn, args)
-        res2 = fn(*args)
+        if expected is not None:
+            res2 = expected
+        else:
+            res2 = fn(*args)
         assert res1 == res2
 
     def test_add(self):
@@ -63,7 +66,9 @@
                 return ovfcheck(x % y)
             except OverflowError:
                 return 42
-        self.check(fn, [-sys.maxint-1, -1])
+        # force the expected result to be 42, because direct run of ovfcheck()
+        # cannot detect this particular overflow case
+        self.check(fn, [-sys.maxint-1, -1], expected=42)
 
     def test_div(self):
         def fn(x, y):
diff --git a/pytest.py b/pytest.py
--- a/pytest.py
+++ b/pytest.py
@@ -4,6 +4,20 @@
 """
 __all__ = ['main']
 
+# XXX hack for win64:
+# This patch must stay here until the END OF STAGE 1
+# When all tests work, this branch will be merged
+# and the branch stage 2 is started, where we remove this patch.
+import sys
+if hasattr(sys, "maxsize"):
+    if sys.maxint != sys.maxsize:
+        sys.maxint = sys.maxsize
+        import warnings
+        warnings.warn("""\n
+---> This win64 port is now in stage 1: sys.maxint was modified.
+---> When pypy/__init__.py becomes empty again, we have reached stage 2.
+""")
+
 from _pytest.core import main, UsageError, _preloadplugins
 from _pytest import core as cmdline
 from _pytest import __version__