[pypy-commit] pypy default: merge

Fri Oct 16 11:38:33 CEST 2015

Author: fijal
Branch: 
Changeset: r80265:f377aa5469e9
Date: 2015-10-16 11:38 +0200
http://bitbucket.org/pypy/pypy/changeset/f377aa5469e9/

Log:	merge

diff too long, truncating to 2000 out of 15271 lines

diff --git a/.gitignore b/.gitignore
--- a/.gitignore
+++ b/.gitignore
@@ -1,9 +1,14 @@
 .hg
 .svn
 
+# VIM
+*.swp
+*.swo
+
 *.pyc
 *.pyo
 *~
+__pycache__/
 
 bin/pypy-c
 include/*.h
@@ -22,4 +27,6 @@
 pypy/translator/goal/pypy-c
 pypy/translator/goal/target*-c
 release/
+!pypy/tool/release/
 rpython/_cache/
+__pycache__/
diff --git a/lib_pypy/cffi/ffiplatform.py b/lib_pypy/cffi/ffiplatform.py
--- a/lib_pypy/cffi/ffiplatform.py
+++ b/lib_pypy/cffi/ffiplatform.py
@@ -14,17 +14,7 @@
 LIST_OF_FILE_NAMES = ['sources', 'include_dirs', 'library_dirs',
                       'extra_objects', 'depends']
 
-def _hack_at_distutils():
-    # Windows-only workaround for some configurations: see
-    # https://bugs.python.org/issue23246 (Python 2.7.9)
-    if sys.platform == "win32":
-        try:
-            import setuptools    # for side-effects, patches distutils
-        except ImportError:
-            pass
-
 def get_extension(srcfilename, modname, sources=(), **kwds):
-    _hack_at_distutils()   # *before* the following import
     from distutils.core import Extension
     allsources = [srcfilename]
     allsources.extend(sources)
@@ -47,7 +37,6 @@
 
 def _build(tmpdir, ext):
     # XXX compact but horrible :-(
-    _hack_at_distutils()
     from distutils.core import Distribution
     import distutils.errors
     #
diff --git a/lib_pypy/cffi/verifier.py b/lib_pypy/cffi/verifier.py
--- a/lib_pypy/cffi/verifier.py
+++ b/lib_pypy/cffi/verifier.py
@@ -22,6 +22,16 @@
                 s = s.encode('ascii')
             super(NativeIO, self).write(s)
 
+def _hack_at_distutils():
+    # Windows-only workaround for some configurations: see
+    # https://bugs.python.org/issue23246 (Python 2.7 with 
+    # a specific MS compiler suite download)
+    if sys.platform == "win32":
+        try:
+            import setuptools    # for side-effects, patches distutils
+        except ImportError:
+            pass
+
 
 class Verifier(object):
 
@@ -112,6 +122,7 @@
         return basename
 
     def get_extension(self):
+        _hack_at_distutils() # backward compatibility hack
         if not self._has_source:
             with self.ffi._lock:
                 if not self._has_source:
diff --git a/pypy/doc/index-of-release-notes.rst b/pypy/doc/index-of-release-notes.rst
--- a/pypy/doc/index-of-release-notes.rst
+++ b/pypy/doc/index-of-release-notes.rst
@@ -6,6 +6,7 @@
 
 .. toctree::
 
+   release-15.11.0.rst
    release-2.6.1.rst
    release-2.6.0.rst
    release-2.5.1.rst
diff --git a/pypy/doc/index-of-whatsnew.rst b/pypy/doc/index-of-whatsnew.rst
--- a/pypy/doc/index-of-whatsnew.rst
+++ b/pypy/doc/index-of-whatsnew.rst
@@ -7,6 +7,7 @@
 .. toctree::
 
    whatsnew-head.rst
+   whatsnew-15.11.0.rst
    whatsnew-2.6.1.rst
    whatsnew-2.6.0.rst
    whatsnew-2.5.1.rst
diff --git a/pypy/doc/release-15.11.0.rst b/pypy/doc/release-15.11.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/release-15.11.0.rst
@@ -0,0 +1,191 @@
+============
+PyPy 15.11.0
+============
+
+We're pleased and proud to unleash PyPy 15.11, a major update of the PyPy
+python2.7.10 compatible interpreter with a Just In Time compiler.
+We have improved `warmup time and memory overhead used for tracing`_, added
+`vectorization`_ for numpy and general loops where possible on x86 hardware,
+refactored rough edges in rpython, and increased functionality of numpy.
+
+You can download the PyPy 15.11 release here:
+
+    http://pypy.org/download.html
+
+We would like to thank our donors for the continued support of the PyPy
+project.
+
+We would also like to thank our contributors (7 new ones since PyPy 2.6.0) and 
+encourage new people to join the project. PyPy has many
+layers and we need help with all of them: `PyPy`_ and `RPython`_ documentation
+improvements, tweaking popular `modules`_ to run on pypy, or general `help`_ 
+with making RPython's JIT even better. 
+
+
+Vectorization
+=============
+
+Richard Plangger began work in March and continued over a Google Summer of Code
+to add a vectorization step to the trace optimizer. The step recognizes common
+constructs and emits SIMD code where possible, much as any modern compiler does.
+This vectorization happens while tracing running code,  so it is actually easier
+at run-time to determine the
+availability of possible vectorization than it is for ahead-of-time compilers.
+
+Availability of SIMD hardware is detected at run time, without needing to
+precompile various code paths into the executable.
+
+Internal Refactoring and Warmup Time Improvement
+================================================
+
+Maciej Fijalkowski and Armin Rigo refactored internals of rpython that now allow
+PyPy to more efficiently use `guards`_ in jitted code. They also rewrote unrolling,
+leading to a warmup time improvement of 20% or so at the cost of a minor
+regression in jitted code speed.
+
+Numpy
+=====
+
+Our implementation of numpy continues to improve. ndarray and the numeric dtypes
+are very close to feature-complete; record, string and unicode dtypes are mostly
+supported.  We have reimplemented numpy linalg, random and fft as cffi-1.0
+modules that call out to the same underlying libraries that upstream numpy uses.
+Please try it out, especially using the new vectorization (via --jit vec=1 on the
+command line) and let us know what is missing for your code.
+
+CFFI
+====
+
+While not applicable only to PyPy, `cffi`_ is arguably our most significant
+contribution to the python ecosystem. Armin Rigo continued improving it,
+and PyPy reaps the benefits of cffi-1.3: improved manangement of object
+lifetimes, __stdcall on Win32, ffi.memmove(), ...
+
+.. _`warmup time and memory overhead used for tracing`: http://morepypy.blogspot.com/2015/10
+.. _`vectorization`: http://pypyvecopt.blogspot.co.at/
+.. _`guards`: http://rpython.readthedocs.org/en/latest/glossary.html
+
+.. _`PyPy`: http://doc.pypy.org 
+.. _`RPython`: https://rpython.readthedocs.org
+.. _`cffi`: https://cffi.readthedocs.org
+.. _`modules`: http://doc.pypy.org/en/latest/project-ideas.html#make-more-python-modules-pypy-friendly
+.. _`help`: http://doc.pypy.org/en/latest/project-ideas.html
+
+What is PyPy?
+=============
+
+PyPy is a very compliant Python interpreter, almost a drop-in replacement for
+CPython 2.7. It's fast (`pypy and cpython 2.7.x`_ performance comparison)
+due to its integrated tracing JIT compiler.
+
+This release supports **x86** machines on most common operating systems
+(Linux 32/64, Mac OS X 64, Windows 32, OpenBSD_, freebsd_),
+as well as newer **ARM** hardware (ARMv6 or ARMv7, with VFPv3) running Linux.
+
+We also welcome developers of other
+`dynamic languages`_ to see what RPython can do for them.
+
+.. _`pypy and cpython 2.7.x`: http://speed.pypy.org
+.. _OpenBSD: http://cvsweb.openbsd.org/cgi-bin/cvsweb/ports/lang/pypy
+.. _freebsd: https://svnweb.freebsd.org/ports/head/lang/pypy/
+.. _`dynamic languages`: http://pypyjs.org
+
+Highlights (since 2.6.1 release two months ago)
+===============================================
+
+* Bug Fixes
+
+  * Applied OPENBSD downstream fixes
+
+  * Fix a crash on non-linux when running more than 20 threads
+
+  * In cffi, ffi.new_handle() is more cpython compliant
+
+  * Accept unicode in functions inside the _curses cffi backend exactly like cpython
+
+  * Fix a segfault in itertools.islice()
+
+  * Use gcrootfinder=shadowstack by default, asmgcc on linux only
+
+  * Fix ndarray.copy() for upstream compatability when copying non-contiguous arrays
+
+  * Fix assumption that lltype.UniChar is unsigned
+
+  * Fix a subtle bug with stacklets on shadowstack
+
+  * Improve support for the cpython capi in cpyext (our capi compatibility
+    layer). Fixing these issues inspired some thought about cpyext in general,
+    stay tuned for more improvements
+
+  * When loading dynamic libraries, in case of a certain loading error, retry
+    loading the library assuming it is actually a linker script, like on Arch
+    and Gentoo
+
+  * Issues reported with our previous release were resolved_ after reports from users on
+    our issue tracker at https://bitbucket.org/pypy/pypy/issues or on IRC at
+    #pypy
+
+* New features:
+
+  * Add an optimization pass to vectorize loops using x86 SIMD intrinsics.
+
+  * Support __stdcall on Windows in CFFI
+
+  * Improve debug logging when using PYPYLOG=???
+
+  * Deal with platforms with no RAND_egd() in OpenSSL
+
+  * Enable building _vmprof in translation on OS/X by default
+
+* Numpy:
+
+  * Add support for ndarray.ctypes
+
+  * Fast path for mixing numpy scalars and floats
+
+  * Add support for creating Fortran-ordered ndarrays
+
+  * Fix casting failures in linalg (by extending ufunc casting)
+
+  * Recognize and disallow (for now) pickling of ndarrays with objects
+    embedded in them
+
+* Performance improvements and refactorings:
+
+  * Reuse hashed keys across dictionaries and sets
+
+  * Refactor JIT interals to improve warmup time by 20% or so at the cost of a
+    minor regression in JIT speed
+
+  * Recognize patterns of common sequences in the JIT backends and optimize them
+
+  * Make the garbage collecter more intcremental over external_malloc() calls
+
+  * Share guard resume data where possible which reduces memory usage
+
+  * Fast path for zip(list, list)
+
+  * Reduce the number of checks in the JIT for lst[a:]
+
+  * Move the non-optimizable part of callbacks outside the JIT
+
+  * Factor in field immutability when invalidating heap information
+
+  * Unroll itertools.izip_longest() with two sequences
+
+  * Minor optimizations after analyzing output from `vmprof`_ and trace logs
+
+  * Remove many class attributes in rpython classes
+
+  * Handle getfield_gc_pure* and getfield_gc_* uniformly in heap.py
+
+.. _`vmprof`: https://vmprof.readthedocs.org
+.. _resolved: http://doc.pypy.org/en/latest/whatsnew-15.11.0.html
+
+Please try it out and let us know what you think. We welcome feedback,
+we know you are using PyPy, please tell us about it!
+
+Cheers
+
+The PyPy Team
+
diff --git a/pypy/doc/whatsnew-15.11.0.rst b/pypy/doc/whatsnew-15.11.0.rst
new file mode 100644
--- /dev/null
+++ b/pypy/doc/whatsnew-15.11.0.rst
@@ -0,0 +1,87 @@
+========================
+What's new in PyPy 15.11
+========================
+
+.. this is a revision shortly after release-2.6.1
+.. startrev: 07769be4057b
+
+.. branch: keys_with_hash
+Improve the performance of dict.update() and a bunch of methods from
+sets, by reusing the hash value stored in one dict when inspecting
+or changing another dict with that key.
+
+.. branch: optresult-unroll 
+A major refactoring of the ResOperations that kills Box. Also rewrote
+unrolling to enable future enhancements.  Should improve warmup time
+by 20% or so.
+
+.. branch: optimize-cond-call
+Optimize common sequences of operations like
+``int_lt/cond_call`` in the JIT backends
+
+.. branch: missing_openssl_include
+Fix for missing headers in OpenBSD, already applied in downstream ports
+
+.. branch: gc-more-incremental
+Remove a source of non-incremental-ness in the GC: now
+external_malloc() no longer runs gc_step_until() any more. If there
+is a currently-running major collection, we do only so many steps
+before returning. This number of steps depends on the size of the
+allocated object. It is controlled by tracking the general progress
+of these major collection steps and the size of old objects that
+keep adding up between them.
+
+.. branch: remember-tracing-counts
+Reenable jithooks
+
+.. branch: detect_egd2
+
+.. branch: shadowstack-no-move-2
+Issue #2141: fix a crash on Windows and OS/X and ARM when running
+at least 20 threads.
+
+.. branch: numpy-ctypes
+
+Add support for ndarray.ctypes property.
+
+.. branch: share-guard-info
+
+Share guard resume data between consecutive guards that have only
+pure operations and guards in between.
+
+.. branch: issue-2148
+
+Fix performance regression on operations mixing numpy scalars and Python 
+floats, cf. issue #2148.
+
+.. branch: cffi-stdcall
+Win32: support '__stdcall' in CFFI.
+
+.. branch: callfamily
+
+Refactorings of annotation and rtyping of function calls.
+
+.. branch: fortran-order
+
+Allow creation of fortran-ordered ndarrays
+
+.. branch: type_system-cleanup
+
+Remove some remnants of the old ootypesystem vs lltypesystem dichotomy.
+
+.. branch: cffi-handle-lifetime
+
+ffi.new_handle() returns handles that work more like CPython's: they
+remain valid as long as the target exists (unlike the previous
+version, where handles become invalid *before* the __del__ is called).
+
+.. branch: ufunc-casting
+
+allow automatic casting in ufuncs (and frompypyfunc) to cast the
+arguments to the allowed function type declarations, fixes various
+failures in linalg cffi functions
+
+.. branch: vecopt
+.. branch: vecopt-merge
+
+A new optimization pass to use emit vectorized loops
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -1,82 +1,8 @@
-=======================
-What's new in PyPy 2.6+
-=======================
+=========================
+What's new in PyPy 15.11+
+=========================
 
-.. this is a revision shortly after release-2.6.1
-.. startrev: 07769be4057b
+.. this is a revision shortly after release-15.11.0
+.. startrev: d924723d483b
 
-.. branch: keys_with_hash
-Improve the performance of dict.update() and a bunch of methods from
-sets, by reusing the hash value stored in one dict when inspecting
-or changing another dict with that key.
 
-.. branch: optresult-unroll 
-A major refactoring of the ResOperations that kills Box. Also rewrote
-unrolling to enable future enhancements.  Should improve warmup time
-by 20% or so.
-
-.. branch: optimize-cond-call
-Optimize common sequences of operations like
-``int_lt/cond_call`` in the JIT backends
-
-.. branch: missing_openssl_include
-Fix for missing headers in OpenBSD, already applied in downstream ports
-
-.. branch: gc-more-incremental
-Remove a source of non-incremental-ness in the GC: now
-external_malloc() no longer runs gc_step_until() any more. If there
-is a currently-running major collection, we do only so many steps
-before returning. This number of steps depends on the size of the
-allocated object. It is controlled by tracking the general progress
-of these major collection steps and the size of old objects that
-keep adding up between them.
-
-.. branch: remember-tracing-counts
-Reenable jithooks
-
-.. branch: detect_egd2
-
-.. branch: shadowstack-no-move-2
-Issue #2141: fix a crash on Windows and OS/X and ARM when running
-at least 20 threads.
-
-.. branch: numpy-ctypes
-
-Add support for ndarray.ctypes property.
-
-.. branch: share-guard-info
-
-Share guard resume data between consecutive guards that have only
-pure operations and guards in between.
-
-.. branch: issue-2148
-
-Fix performance regression on operations mixing numpy scalars and Python 
-floats, cf. issue #2148.
-
-.. branch: cffi-stdcall
-Win32: support '__stdcall' in CFFI.
-
-.. branch: callfamily
-
-Refactorings of annotation and rtyping of function calls.
-
-.. branch: fortran-order
-
-Allow creation of fortran-ordered ndarrays
-
-.. branch: type_system-cleanup
-
-Remove some remnants of the old ootypesystem vs lltypesystem dichotomy.
-
-.. branch: cffi-handle-lifetime
-
-ffi.new_handle() returns handles that work more like CPython's: they
-remain valid as long as the target exists (unlike the previous
-version, where handles become invalid *before* the __del__ is called).
-
-.. branch: ufunc-casting
-
-allow automatic casting in ufuncs (and frompypyfunc) to cast the
-arguments to the allowed function type declarations, fixes various
-failures in linalg cffi functions
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -66,6 +66,7 @@
 import pypy.module.cpyext.codecs
 import pypy.module.cpyext.pyfile
 import pypy.module.cpyext.pystrtod
+import pypy.module.cpyext.pytraceback
 
 # now that all rffi_platform.Struct types are registered, configure them
 api.configure_types()
diff --git a/pypy/module/cpyext/include/Python.h b/pypy/module/cpyext/include/Python.h
--- a/pypy/module/cpyext/include/Python.h
+++ b/pypy/module/cpyext/include/Python.h
@@ -126,6 +126,7 @@
 #include "fileobject.h"
 #include "pysignals.h"
 #include "pythread.h"
+#include "traceback.h"
 
 /* Missing definitions */
 #include "missing.h"
diff --git a/pypy/module/cpyext/include/frameobject.h b/pypy/module/cpyext/include/frameobject.h
--- a/pypy/module/cpyext/include/frameobject.h
+++ b/pypy/module/cpyext/include/frameobject.h
@@ -4,7 +4,7 @@
 extern "C" {
 #endif
 
-typedef struct {
+typedef struct _frame {
     PyObject_HEAD
     PyCodeObject *f_code;
     PyObject *f_globals;
diff --git a/pypy/module/cpyext/include/traceback.h b/pypy/module/cpyext/include/traceback.h
--- a/pypy/module/cpyext/include/traceback.h
+++ b/pypy/module/cpyext/include/traceback.h
@@ -4,7 +4,15 @@
 extern "C" {
 #endif
 
-typedef PyObject PyTracebackObject;
+struct _frame;
+
+typedef struct _traceback {
+        PyObject_HEAD
+        struct _traceback *tb_next;
+        struct _frame *tb_frame;
+        int tb_lasti;
+        int tb_lineno;
+} PyTracebackObject;
 
 #ifdef __cplusplus
 }
diff --git a/pypy/module/cpyext/pytraceback.py b/pypy/module/cpyext/pytraceback.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/pytraceback.py
@@ -0,0 +1,50 @@
+from rpython.rtyper.lltypesystem import rffi, lltype
+from pypy.module.cpyext.api import (
+    PyObjectFields, generic_cpy_call, CONST_STRING, CANNOT_FAIL, Py_ssize_t,
+    cpython_api, bootstrap_function, cpython_struct, build_type_checkers)
+from pypy.module.cpyext.pyobject import (
+    PyObject, make_ref, from_ref, Py_DecRef, make_typedescr, borrow_from)
+from pypy.module.cpyext.frameobject import PyFrameObject
+from rpython.rlib.unroll import unrolling_iterable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.pytraceback import PyTraceback
+from pypy.interpreter import pycode
+
+
+PyTracebackObjectStruct = lltype.ForwardReference()
+PyTracebackObject = lltype.Ptr(PyTracebackObjectStruct)
+PyTracebackObjectFields = PyObjectFields + (
+    ("tb_next", PyTracebackObject),
+    ("tb_frame", PyFrameObject),
+    ("tb_lasti", rffi.INT),
+    ("tb_lineno", rffi.INT),
+)
+cpython_struct("PyTracebackObject", PyTracebackObjectFields, PyTracebackObjectStruct)
+
+ at bootstrap_function
+def init_traceback(space):
+    make_typedescr(PyTraceback.typedef,
+                   basestruct=PyTracebackObject.TO,
+                   attach=traceback_attach,
+                   dealloc=traceback_dealloc)
+
+
+def traceback_attach(space, py_obj, w_obj):
+    py_traceback = rffi.cast(PyTracebackObject, py_obj)
+    traceback = space.interp_w(PyTraceback, w_obj)
+    if traceback.next is None:
+        w_next_traceback = None
+    else:
+        w_next_traceback = space.wrap(traceback.next)
+    py_traceback.c_tb_next = rffi.cast(PyTracebackObject, make_ref(space, w_next_traceback))
+    py_traceback.c_tb_frame = rffi.cast(PyFrameObject, make_ref(space, space.wrap(traceback.frame)))
+    rffi.setintfield(py_traceback, 'c_tb_lasti', traceback.lasti)
+    rffi.setintfield(py_traceback, 'c_tb_lineno',traceback.get_lineno())
+
+ at cpython_api([PyObject], lltype.Void, external=False)
+def traceback_dealloc(space, py_obj):
+    py_traceback = rffi.cast(PyTracebackObject, py_obj)
+    Py_DecRef(space, rffi.cast(PyObject, py_traceback.c_tb_next))
+    Py_DecRef(space, rffi.cast(PyObject, py_traceback.c_tb_frame))
+    from pypy.module.cpyext.object import PyObject_dealloc
+    PyObject_dealloc(space, py_obj)
diff --git a/pypy/module/cpyext/slotdefs.py b/pypy/module/cpyext/slotdefs.py
--- a/pypy/module/cpyext/slotdefs.py
+++ b/pypy/module/cpyext/slotdefs.py
@@ -61,6 +61,30 @@
     args_w = space.fixedview(w_args)
     return generic_cpy_call(space, func_binary, w_self, args_w[0])
 
+def wrap_binaryfunc_l(space, w_self, w_args, func):
+    func_binary = rffi.cast(binaryfunc, func)
+    check_num_args(space, w_args, 1)
+    args_w = space.fixedview(w_args)
+
+    if not space.is_true(space.issubtype(space.type(args_w[0]),
+                                         space.type(w_self))):
+        raise OperationError(space.w_NotImplementedError, space.wrap(
+            "NotImplemented"))
+ 
+    return generic_cpy_call(space, func_binary, w_self, args_w[0])
+
+def wrap_binaryfunc_r(space, w_self, w_args, func):
+    func_binary = rffi.cast(binaryfunc, func)
+    check_num_args(space, w_args, 1)
+    args_w = space.fixedview(w_args)
+
+    if not space.is_true(space.issubtype(space.type(args_w[0]),
+                                         space.type(w_self))):
+        raise OperationError(space.w_NotImplementedError, space.wrap(
+            "NotImplemented"))
+
+    return generic_cpy_call(space, func_binary, args_w[0], w_self)
+
 def wrap_inquirypred(space, w_self, w_args, func):
     func_inquiry = rffi.cast(inquiry, func)
     check_num_args(space, w_args, 0)
diff --git a/pypy/module/cpyext/test/test_traceback.py b/pypy/module/cpyext/test/test_traceback.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/cpyext/test/test_traceback.py
@@ -0,0 +1,40 @@
+from rpython.rtyper.lltypesystem import lltype, rffi
+from pypy.module.cpyext.test.test_api import BaseApiTest
+from pypy.module.cpyext.pyobject import PyObject, make_ref, from_ref
+from pypy.module.cpyext.pytraceback import PyTracebackObject
+from pypy.interpreter.pytraceback import PyTraceback
+from pypy.interpreter.pyframe import PyFrame
+
+class TestPyTracebackObject(BaseApiTest):
+    def test_traceback(self, space, api):
+        w_traceback = space.appexec([], """():
+            import sys
+            try:
+                1/0
+            except:
+                return sys.exc_info()[2]
+        """)
+        py_obj = make_ref(space, w_traceback)
+        py_traceback = rffi.cast(PyTracebackObject, py_obj)
+        assert (from_ref(space, rffi.cast(PyObject, py_traceback.c_ob_type)) is
+                space.gettypeobject(PyTraceback.typedef))
+
+        traceback = space.interp_w(PyTraceback, w_traceback)
+        assert traceback.lasti == py_traceback.c_tb_lasti
+        assert traceback.get_lineno() == py_traceback.c_tb_lineno
+        assert space.eq_w(space.getattr(w_traceback, space.wrap("tb_lasti")),
+                          space.wrap(py_traceback.c_tb_lasti))
+        assert space.is_w(space.getattr(w_traceback, space.wrap("tb_frame")),
+                          from_ref(space, rffi.cast(PyObject,
+                                                    py_traceback.c_tb_frame)))
+
+        while not space.is_w(w_traceback, space.w_None):
+            assert space.is_w(
+                w_traceback,
+                from_ref(space, rffi.cast(PyObject, py_traceback)))
+            w_traceback = space.getattr(w_traceback, space.wrap("tb_next"))
+            py_traceback = py_traceback.c_tb_next
+
+        assert lltype.normalizeptr(py_traceback) is None
+
+        api.Py_DecRef(py_obj)
diff --git a/pypy/module/cpyext/test/test_typeobject.py b/pypy/module/cpyext/test/test_typeobject.py
--- a/pypy/module/cpyext/test/test_typeobject.py
+++ b/pypy/module/cpyext/test/test_typeobject.py
@@ -589,6 +589,48 @@
         assert bool(module.newInt(-1))
         raises(ValueError, bool, module.newInt(-42))
 
+    def test_binaryfunc(self):
+        module = self.import_extension('foo', [
+            ("new_obj", "METH_NOARGS",
+             """
+                FooObject *fooObj;
+
+                Foo_Type.tp_as_number = &foo_as_number;
+                foo_as_number.nb_add = foo_nb_add_call;
+                if (PyType_Ready(&Foo_Type) < 0) return NULL;
+                fooObj = PyObject_New(FooObject, &Foo_Type);
+                if (!fooObj) {
+                    return NULL;
+                }
+
+                return (PyObject *)fooObj;
+             """)],
+            """
+            typedef struct
+            {
+                PyObject_HEAD
+            } FooObject;
+
+            static PyObject * 
+            foo_nb_add_call(PyObject *self, PyObject *other)
+            {
+                return PyInt_FromLong(42); 
+            }
+
+            PyTypeObject Foo_Type = {
+                PyObject_HEAD_INIT(0)
+                /*ob_size*/             0,
+                /*tp_name*/             "Foo",
+                /*tp_basicsize*/        sizeof(FooObject),
+            };
+            static PyNumberMethods foo_as_number;
+            """)
+        a = module.new_obj()
+        b = module.new_obj() 
+        c = 3
+        assert (a + b) == 42 
+        raises(NotImplementedError, "b + c")
+
     def test_tp_new_in_subclass_of_type(self):
         skip("BROKEN")
         module = self.import_module(name='foo3')
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -2,6 +2,7 @@
 It should not be imported by the module itself
 """
 import re
+import py
 from pypy.interpreter import special
 from pypy.interpreter.baseobjspace import InternalSpaceCache, W_Root, ObjSpace
 from pypy.interpreter.error import OperationError
@@ -12,6 +13,10 @@
 from pypy.module.micronumpy.ndarray import W_NDimArray
 from pypy.module.micronumpy.ctors import array
 from pypy.module.micronumpy.descriptor import get_dtype_cache
+from pypy.interpreter.miscutils import ThreadLocals, make_weak_value_dictionary
+from pypy.interpreter.executioncontext import (ExecutionContext, ActionFlag,
+    UserDelAction)
+from pypy.interpreter.pyframe import PyFrame
 
 
 class BogusBytecode(Exception):
@@ -32,12 +37,11 @@
 class BadToken(Exception):
     pass
 
-
 SINGLE_ARG_FUNCTIONS = ["sum", "prod", "max", "min", "all", "any",
                         "unegative", "flat", "tostring", "count_nonzero",
                         "argsort", "cumsum", "logical_xor_reduce"]
-TWO_ARG_FUNCTIONS = ["dot", 'take', 'searchsorted']
-TWO_ARG_FUNCTIONS_OR_NONE = ['view', 'astype']
+TWO_ARG_FUNCTIONS = ["dot", 'take', 'searchsorted', 'multiply']
+TWO_ARG_FUNCTIONS_OR_NONE = ['view', 'astype', 'reshape']
 THREE_ARG_FUNCTIONS = ['where']
 
 class W_TypeObject(W_Root):
@@ -57,6 +61,10 @@
     w_OverflowError = W_TypeObject("OverflowError")
     w_NotImplementedError = W_TypeObject("NotImplementedError")
     w_AttributeError = W_TypeObject("AttributeError")
+    w_StopIteration = W_TypeObject("StopIteration")
+    w_KeyError = W_TypeObject("KeyError")
+    w_SystemExit = W_TypeObject("SystemExit")
+    w_KeyboardInterrupt = W_TypeObject("KeyboardInterrupt")
     w_None = None
 
     w_bool = W_TypeObject("bool")
@@ -72,13 +80,26 @@
     w_dict = W_TypeObject("dict")
     w_object = W_TypeObject("object")
     w_buffer = W_TypeObject("buffer")
+    w_type = W_TypeObject("type")
 
-    def __init__(self):
+    def __init__(self, config=None):
         """NOT_RPYTHON"""
         self.fromcache = InternalSpaceCache(self).getorbuild
         self.w_Ellipsis = special.Ellipsis()
         self.w_NotImplemented = special.NotImplemented()
 
+        if config is None:
+            from pypy.config.pypyoption import get_pypy_config
+            config = get_pypy_config(translating=False)
+        self.config = config
+
+        self.interned_strings = make_weak_value_dictionary(self, str, W_Root)
+        self.builtin = DictObject({})
+        self.FrameClass = PyFrame
+        self.threadlocals = ThreadLocals()
+        self.actionflag = ActionFlag()    # changed by the signal module
+        self.check_signal_action = None   # changed by the signal module
+
     def _freeze_(self):
         return True
 
@@ -89,12 +110,17 @@
         return isinstance(w_obj, ListObject) or isinstance(w_obj, W_NDimArray)
 
     def len(self, w_obj):
-        assert isinstance(w_obj, ListObject)
-        return self.wrap(len(w_obj.items))
+        if isinstance(w_obj, ListObject):
+            return self.wrap(len(w_obj.items))
+        elif isinstance(w_obj, DictObject):
+            return self.wrap(len(w_obj.items))
+        raise NotImplementedError
 
     def getattr(self, w_obj, w_attr):
         assert isinstance(w_attr, StringObject)
-        return w_obj.getdictvalue(self, w_attr.v)
+        if isinstance(w_obj, DictObject):
+            return w_obj.getdictvalue(self, w_attr)
+        return None
 
     def isinstance_w(self, w_obj, w_tp):
         try:
@@ -102,6 +128,22 @@
         except AttributeError:
             return False
 
+    def iter(self, w_iter):
+        if isinstance(w_iter, ListObject):
+            raise NotImplementedError
+            #return IterObject(space, w_iter.items)
+        elif isinstance(w_iter, DictObject):
+            return IterDictObject(self, w_iter)
+
+    def next(self, w_iter):
+        return w_iter.next()
+
+    def contains(self, w_iter, w_key):
+        if isinstance(w_iter, DictObject):
+            return self.wrap(w_key in w_iter.items)
+
+        raise NotImplementedError
+
     def decode_index4(self, w_idx, size):
         if isinstance(w_idx, IntObject):
             return (self.int_w(w_idx), 0, 0, 1)
@@ -123,6 +165,10 @@
                 lgt = (stop - start - 1) / step + 1
             return (start, stop, step, lgt)
 
+    def unicode_from_object(self, w_item):
+        # XXX
+        return StringObject("")
+
     @specialize.argtype(1)
     def wrap(self, obj):
         if isinstance(obj, float):
@@ -145,7 +191,55 @@
     def newcomplex(self, r, i):
         return ComplexObject(r, i)
 
+    def newfloat(self, f):
+        return self.float(f)
+
+    def le(self, w_obj1, w_obj2):
+        assert isinstance(w_obj1, boxes.W_GenericBox) 
+        assert isinstance(w_obj2, boxes.W_GenericBox) 
+        return w_obj1.descr_le(self, w_obj2)
+
+    def lt(self, w_obj1, w_obj2):
+        assert isinstance(w_obj1, boxes.W_GenericBox) 
+        assert isinstance(w_obj2, boxes.W_GenericBox) 
+        return w_obj1.descr_lt(self, w_obj2)
+
+    def ge(self, w_obj1, w_obj2):
+        assert isinstance(w_obj1, boxes.W_GenericBox) 
+        assert isinstance(w_obj2, boxes.W_GenericBox) 
+        return w_obj1.descr_ge(self, w_obj2)
+
+    def add(self, w_obj1, w_obj2):
+        assert isinstance(w_obj1, boxes.W_GenericBox) 
+        assert isinstance(w_obj2, boxes.W_GenericBox) 
+        return w_obj1.descr_add(self, w_obj2)
+
+    def sub(self, w_obj1, w_obj2):
+        return self.wrap(1)
+
+    def mul(self, w_obj1, w_obj2):
+        assert isinstance(w_obj1, boxes.W_GenericBox) 
+        assert isinstance(w_obj2, boxes.W_GenericBox) 
+        return w_obj1.descr_mul(self, w_obj2)
+
+    def pow(self, w_obj1, w_obj2, _):
+        return self.wrap(1)
+
+    def neg(self, w_obj1):
+        return self.wrap(0)
+
+    def repr(self, w_obj1):
+        return self.wrap('fake')
+
     def getitem(self, obj, index):
+        if isinstance(obj, DictObject):
+            w_dict = obj.getdict(self)
+            if w_dict is not None:
+                try:
+                    return w_dict[index]
+                except KeyError, e:
+                    raise OperationError(self.w_KeyError, self.wrap("key error"))
+
         assert isinstance(obj, ListObject)
         assert isinstance(index, IntObject)
         return obj.items[index.intval]
@@ -191,12 +285,24 @@
             return w_obj.v
         raise NotImplementedError
 
+    def unicode_w(self, w_obj):
+        # XXX
+        if isinstance(w_obj, StringObject):
+            return unicode(w_obj.v)
+        raise NotImplementedError
+
     def int(self, w_obj):
         if isinstance(w_obj, IntObject):
             return w_obj
         assert isinstance(w_obj, boxes.W_GenericBox)
         return self.int(w_obj.descr_int(self))
 
+    def long(self, w_obj):
+        if isinstance(w_obj, LongObject):
+            return w_obj
+        assert isinstance(w_obj, boxes.W_GenericBox)
+        return self.int(w_obj.descr_long(self))
+
     def str(self, w_obj):
         if isinstance(w_obj, StringObject):
             return w_obj
@@ -240,9 +346,29 @@
     def gettypefor(self, w_obj):
         return W_TypeObject(w_obj.typedef.name)
 
-    def call_function(self, tp, w_dtype):
+    def call_function(self, tp, w_dtype, *args):
+        if tp is self.w_float:
+            if isinstance(w_dtype, boxes.W_Float64Box):
+                return FloatObject(float(w_dtype.value))
+            if isinstance(w_dtype, boxes.W_Float32Box):
+                return FloatObject(float(w_dtype.value))
+            if isinstance(w_dtype, boxes.W_Int64Box):
+                return FloatObject(float(int(w_dtype.value)))
+            if isinstance(w_dtype, boxes.W_Int32Box):
+                return FloatObject(float(int(w_dtype.value)))
+            if isinstance(w_dtype, boxes.W_Int16Box):
+                return FloatObject(float(int(w_dtype.value)))
+            if isinstance(w_dtype, boxes.W_Int8Box):
+                return FloatObject(float(int(w_dtype.value)))
+            if isinstance(w_dtype, IntObject):
+                return FloatObject(float(w_dtype.intval))
+        if tp is self.w_int:
+            if isinstance(w_dtype, FloatObject):
+                return IntObject(int(w_dtype.floatval))
+
         return w_dtype
 
+    @specialize.arg(2)
     def call_method(self, w_obj, s, *args):
         # XXX even the hacks have hacks
         return getattr(w_obj, 'descr_' + s)(self, *args)
@@ -258,21 +384,21 @@
     def newtuple(self, list_w):
         return ListObject(list_w)
 
-    def newdict(self):
-        return {}
+    def newdict(self, module=True):
+        return DictObject({})
 
-    def setitem(self, dict, item, value):
-        dict[item] = value
+    def newint(self, i):
+        if isinstance(i, IntObject):
+            return i
+        return IntObject(i)
 
-    def len_w(self, w_obj):
-        if isinstance(w_obj, ListObject):
-            return len(w_obj.items)
-        # XXX array probably
-        assert False
+    def setitem(self, obj, index, value):
+        obj.items[index] = value
 
     def exception_match(self, w_exc_type, w_check_class):
-        # Good enough for now
-        raise NotImplementedError
+        assert isinstance(w_exc_type, W_TypeObject)
+        assert isinstance(w_check_class, W_TypeObject)
+        return w_exc_type.name == w_check_class.name
 
 class FloatObject(W_Root):
     tp = FakeSpace.w_float
@@ -283,6 +409,9 @@
     tp = FakeSpace.w_bool
     def __init__(self, boolval):
         self.intval = boolval
+FakeSpace.w_True = BoolObject(True)
+FakeSpace.w_False = BoolObject(False)
+
 
 class IntObject(W_Root):
     tp = FakeSpace.w_int
@@ -299,6 +428,33 @@
     def __init__(self, items):
         self.items = items
 
+class DictObject(W_Root):
+    tp = FakeSpace.w_dict
+    def __init__(self, items):
+        self.items = items
+
+    def getdict(self, space):
+        return self.items
+
+    def getdictvalue(self, space, key):
+        return self.items[key]
+
+class IterDictObject(W_Root):
+    def __init__(self, space, w_dict):
+        self.space = space
+        self.items = w_dict.items.items()
+        self.i = 0
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        space = self.space
+        if self.i >= len(self.items):
+            raise OperationError(space.w_StopIteration, space.wrap("stop iteration"))
+        self.i += 1
+        return self.items[self.i-1][0]
+
 class SliceObject(W_Root):
     tp = FakeSpace.w_slice
     def __init__(self, start, stop, step):
@@ -414,6 +570,15 @@
                 w_rhs = IntObject(int(w_rhs.floatval))
             assert isinstance(w_lhs, W_NDimArray)
             w_res = w_lhs.descr_getitem(interp.space, w_rhs)
+            if isinstance(w_rhs, IntObject):
+                if isinstance(w_res, boxes.W_Float64Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value)
+                if isinstance(w_res, boxes.W_Float32Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", float(w_res.value)
+                if isinstance(w_res, boxes.W_Int64Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value)
+                if isinstance(w_res, boxes.W_Int32Box):
+                    print "access", w_lhs, "[", w_rhs.intval, "] => ", int(w_res.value)
         else:
             raise NotImplementedError
         if (not isinstance(w_res, W_NDimArray) and
@@ -425,9 +590,22 @@
     def __repr__(self):
         return '(%r %s %r)' % (self.lhs, self.name, self.rhs)
 
-class FloatConstant(Node):
+class NumberConstant(Node):
     def __init__(self, v):
-        self.v = float(v)
+        if isinstance(v, int):
+            self.v = v
+        elif isinstance(v, float):
+            self.v = v
+        else:
+            assert isinstance(v, str)
+            assert len(v) > 0
+            c = v[-1]
+            if c == 'f':
+                self.v = float(v[:-1])
+            elif c == 'i':
+                self.v = int(v[:-1])
+            else:
+                self.v = float(v)
 
     def __repr__(self):
         return "Const(%s)" % self.v
@@ -519,8 +697,24 @@
     def execute(self, interp):
         if self.v == 'int':
             dtype = get_dtype_cache(interp.space).w_int64dtype
+        elif self.v == 'int8':
+            dtype = get_dtype_cache(interp.space).w_int8dtype
+        elif self.v == 'int16':
+            dtype = get_dtype_cache(interp.space).w_int16dtype
+        elif self.v == 'int32':
+            dtype = get_dtype_cache(interp.space).w_int32dtype
+        elif self.v == 'uint':
+            dtype = get_dtype_cache(interp.space).w_uint64dtype
+        elif self.v == 'uint8':
+            dtype = get_dtype_cache(interp.space).w_uint8dtype
+        elif self.v == 'uint16':
+            dtype = get_dtype_cache(interp.space).w_uint16dtype
+        elif self.v == 'uint32':
+            dtype = get_dtype_cache(interp.space).w_uint32dtype
         elif self.v == 'float':
             dtype = get_dtype_cache(interp.space).w_float64dtype
+        elif self.v == 'float32':
+            dtype = get_dtype_cache(interp.space).w_float32dtype
         else:
             raise BadToken('unknown v to dtype "%s"' % self.v)
         return dtype
@@ -556,8 +750,13 @@
                 raise ArgumentMismatch
             if self.name == "sum":
                 if len(self.args)>1:
-                    w_res = arr.descr_sum(interp.space,
+                    var = self.args[1]
+                    if isinstance(var, DtypeClass):
+                        w_res = arr.descr_sum(interp.space, None, var.execute(interp))
+                    else:
+                        w_res = arr.descr_sum(interp.space,
                                           self.args[1].execute(interp))
+
                 else:
                     w_res = arr.descr_sum(interp.space)
             elif self.name == "prod":
@@ -577,10 +776,10 @@
                 w_res = logical_xor.reduce(interp.space, arr, None)
             elif self.name == "unegative":
                 neg = ufuncs.get(interp.space).negative
-                w_res = neg.call(interp.space, [arr], None, None, None)
+                w_res = neg.call(interp.space, [arr], None, 'unsafe', None)
             elif self.name == "cos":
                 cos = ufuncs.get(interp.space).cos
-                w_res = cos.call(interp.space, [arr], None, None, None)
+                w_res = cos.call(interp.space, [arr], None, 'unsafe', None)
             elif self.name == "flat":
                 w_res = arr.descr_get_flatiter(interp.space)
             elif self.name == "argsort":
@@ -598,6 +797,8 @@
                 raise ArgumentNotAnArray
             if self.name == "dot":
                 w_res = arr.descr_dot(interp.space, arg)
+            elif self.name == 'multiply':
+                w_res = arr.descr_mul(interp.space, arg)
             elif self.name == 'take':
                 w_res = arr.descr_take(interp.space, arg)
             elif self.name == "searchsorted":
@@ -617,7 +818,7 @@
             if self.name == "where":
                 w_res = where(interp.space, arr, arg1, arg2)
             else:
-                assert False
+                assert False # unreachable code
         elif self.name in TWO_ARG_FUNCTIONS_OR_NONE:
             if len(self.args) != 2:
                 raise ArgumentMismatch
@@ -626,6 +827,11 @@
                 w_res = arr.descr_view(interp.space, arg)
             elif self.name == 'astype':
                 w_res = arr.descr_astype(interp.space, arg)
+            elif self.name == 'reshape':
+                w_arg = self.args[1]
+                assert isinstance(w_arg, ArrayConstant)
+                order = -1 
+                w_res = arr.reshape(interp.space, w_arg.wrap(interp.space), order)
             else:
                 assert False
         else:
@@ -645,7 +851,7 @@
         return W_NDimArray.new_scalar(interp.space, dtype, w_res)
 
 _REGEXES = [
-    ('-?[\d\.]+', 'number'),
+    ('-?[\d\.]+(i|f)?', 'number'),
     ('\[', 'array_left'),
     (':', 'colon'),
     ('\w+', 'identifier'),
@@ -719,7 +925,7 @@
             start = 0
         else:
             if tokens.get(0).name != 'colon':
-                return FloatConstant(start_tok.v)
+                return NumberConstant(start_tok.v)
             start = int(start_tok.v)
             tokens.pop()
         if not tokens.get(0).name in ['colon', 'number']:
@@ -751,8 +957,30 @@
                     stack.append(ArrayClass())
                 elif token.v.strip(' ') == 'int':
                     stack.append(DtypeClass('int'))
+                elif token.v.strip(' ') == 'int8':
+                    stack.append(DtypeClass('int8'))
+                elif token.v.strip(' ') == 'int16':
+                    stack.append(DtypeClass('int16'))
+                elif token.v.strip(' ') == 'int32':
+                    stack.append(DtypeClass('int32'))
+                elif token.v.strip(' ') == 'int64':
+                    stack.append(DtypeClass('int'))
+                elif token.v.strip(' ') == 'uint':
+                    stack.append(DtypeClass('uint'))
+                elif token.v.strip(' ') == 'uint8':
+                    stack.append(DtypeClass('uint8'))
+                elif token.v.strip(' ') == 'uint16':
+                    stack.append(DtypeClass('uint16'))
+                elif token.v.strip(' ') == 'uint32':
+                    stack.append(DtypeClass('uint32'))
+                elif token.v.strip(' ') == 'uint64':
+                    stack.append(DtypeClass('uint'))
                 elif token.v.strip(' ') == 'float':
                     stack.append(DtypeClass('float'))
+                elif token.v.strip(' ') == 'float32':
+                    stack.append(DtypeClass('float32'))
+                elif token.v.strip(' ') == 'float64':
+                    stack.append(DtypeClass('float'))
                 else:
                     stack.append(Variable(token.v.strip(' ')))
             elif token.name == 'array_left':
@@ -805,7 +1033,7 @@
         while True:
             token = tokens.pop()
             if token.name == 'number':
-                elems.append(FloatConstant(token.v))
+                elems.append(NumberConstant(token.v))
             elif token.name == 'array_left':
                 elems.append(ArrayConstant(self.parse_array_const(tokens)))
             elif token.name == 'paren_left':
diff --git a/pypy/module/micronumpy/concrete.py b/pypy/module/micronumpy/concrete.py
--- a/pypy/module/micronumpy/concrete.py
+++ b/pypy/module/micronumpy/concrete.py
@@ -70,7 +70,10 @@
 
     @jit.unroll_safe
     def setslice(self, space, arr):
-        if len(arr.get_shape()) >  len(self.get_shape()):
+        if arr.get_size() == 1:
+            # we can always set self[:] = scalar
+            pass
+        elif len(arr.get_shape()) >  len(self.get_shape()):
             # record arrays get one extra dimension
             if not self.dtype.is_record() or \
                     len(arr.get_shape()) > len(self.get_shape()) + 1:
diff --git a/pypy/module/micronumpy/ctors.py b/pypy/module/micronumpy/ctors.py
--- a/pypy/module/micronumpy/ctors.py
+++ b/pypy/module/micronumpy/ctors.py
@@ -86,6 +86,9 @@
 
 def _array(space, w_object, w_dtype=None, copy=True, w_order=None, subok=False):
 
+    # numpy testing calls array(type(array([]))) and expects a ValueError
+    if space.isinstance_w(w_object, space.w_type):
+        raise oefmt(space.w_ValueError, "cannot create ndarray from type instance")
     # for anything that isn't already an array, try __array__ method first
     if not isinstance(w_object, W_NDimArray):
         w_array = try_array_method(space, w_object, w_dtype)
diff --git a/pypy/module/micronumpy/flatiter.py b/pypy/module/micronumpy/flatiter.py
--- a/pypy/module/micronumpy/flatiter.py
+++ b/pypy/module/micronumpy/flatiter.py
@@ -97,7 +97,7 @@
         finally:
             self.iter.reset(self.state, mutate=True)
 
-    def descr___array_wrap__(self, space, obj):
+    def descr___array_wrap__(self, space, obj, w_context=None):
         return obj
 
 W_FlatIterator.typedef = TypeDef("numpy.flatiter",
diff --git a/pypy/module/micronumpy/iterators.py b/pypy/module/micronumpy/iterators.py
--- a/pypy/module/micronumpy/iterators.py
+++ b/pypy/module/micronumpy/iterators.py
@@ -83,6 +83,12 @@
         self._indices = indices
         self.offset = offset
 
+    def same(self, other):
+        if self.offset == other.offset and \
+           self.index == other.index and \
+           self._indices == other._indices:
+            return self.iterator.same_shape(other.iterator)
+        return False
 
 class ArrayIter(object):
     _immutable_fields_ = ['contiguous', 'array', 'size', 'ndim_m1', 'shape_m1[*]',
@@ -100,6 +106,7 @@
         self.array = array
         self.size = size
         self.ndim_m1 = len(shape) - 1
+        #
         self.shape_m1 = [s - 1 for s in shape]
         self.strides = strides
         self.backstrides = backstrides
@@ -113,6 +120,17 @@
                 factors[ndim-i-1] = factors[ndim-i] * shape[ndim-i]
         self.factors = factors
 
+    def same_shape(self, other):
+        """ Iterating over the same element """
+        if not self.contiguous or not other.contiguous:
+            return False
+        return (self.contiguous == other.contiguous and
+                self.array.dtype is self.array.dtype and
+                self.shape_m1 == other.shape_m1 and
+                self.strides == other.strides and
+                self.backstrides == other.backstrides and
+                self.factors == other.factors)
+
     @jit.unroll_safe
     def reset(self, state=None, mutate=False):
         index = 0
@@ -138,9 +156,13 @@
         indices = state._indices
         offset = state.offset
         if self.contiguous:
-            offset += self.array.dtype.elsize
+            elsize = self.array.dtype.elsize
+            jit.promote(elsize)
+            offset += elsize
         elif self.ndim_m1 == 0:
-            offset += self.strides[0]
+            stride = self.strides[0]
+            jit.promote(stride)
+            offset += stride
         else:
             for i in xrange(self.ndim_m1, -1, -1):
                 idx = indices[i]
@@ -192,7 +214,7 @@
         return state.index >= self.size
 
     def getitem(self, state):
-        assert state.iterator is self
+        # assert state.iterator is self
         return self.array.getitem(state.offset)
 
     def getitem_bool(self, state):
@@ -203,7 +225,6 @@
         assert state.iterator is self
         self.array.setitem(state.offset, elem)
 
-
 def AxisIter(array, shape, axis):
     strides = array.get_strides()
     backstrides = array.get_backstrides()
diff --git a/pypy/module/micronumpy/loop.py b/pypy/module/micronumpy/loop.py
--- a/pypy/module/micronumpy/loop.py
+++ b/pypy/module/micronumpy/loop.py
@@ -2,6 +2,7 @@
 operations. This is the place to look for all the computations that iterate
 over all the array elements.
 """
+import py
 from pypy.interpreter.error import OperationError
 from rpython.rlib import jit
 from rpython.rlib.rstring import StringBuilder
@@ -13,11 +14,6 @@
 from pypy.interpreter.argument import Arguments
 
 
-call2_driver = jit.JitDriver(
-    name='numpy_call2',
-    greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'],
-    reds='auto')
-
 def call2(space, shape, func, calc_dtype, w_lhs, w_rhs, out):
     if w_lhs.get_size() == 1:
         w_left = w_lhs.get_scalar_value().convert_to(space, calc_dtype)
@@ -38,24 +34,104 @@
     out_iter, out_state = out.create_iter(shape)
     shapelen = len(shape)
     res_dtype = out.get_dtype()
-    while not out_iter.done(out_state):
-        call2_driver.jit_merge_point(shapelen=shapelen, func=func,
-                                     calc_dtype=calc_dtype, res_dtype=res_dtype)
-        if left_iter:
-            w_left = left_iter.getitem(left_state).convert_to(space, calc_dtype)
-            left_state = left_iter.next(left_state)
-        if right_iter:
-            w_right = right_iter.getitem(right_state).convert_to(space, calc_dtype)
-            right_state = right_iter.next(right_state)
-        out_iter.setitem(out_state, func(calc_dtype, w_left, w_right).convert_to(
-            space, res_dtype))
-        out_state = out_iter.next(out_state)
-    return out
+    call2_func = try_to_share_iterators_call2(left_iter, right_iter,
+            left_state, right_state, out_state)
+    params = (space, shapelen, func, calc_dtype, res_dtype, out,
+              w_left, w_right, left_iter, right_iter, out_iter,
+              left_state, right_state, out_state)
+    return call2_func(*params)
+
+def try_to_share_iterators_call2(left_iter, right_iter, left_state, right_state, out_state):
+    # these are all possible iterator sharing combinations
+    # left == right == out
+    # left == right
+    # left == out
+    # right == out
+    right_out_equal = False
+    if right_iter:
+        # rhs is not a scalar
+        if out_state.same(right_state):
+            right_out_equal = True
+    #
+    if not left_iter:
+        # lhs is a scalar
+        if right_out_equal:
+            return call2_advance_out_left
+        else:
+            # worst case, nothing can be shared and lhs is a scalar
+            return call2_advance_out_left_right
+    else:
+        # lhs is NOT a scalar
+        if out_state.same(left_state):
+            # (2) out and left are the same -> remove left
+            if right_out_equal:
+                # the best case
+                return call2_advance_out
+            else:
+                return call2_advance_out_right
+        else:
+            if right_out_equal:
+                # right and out are equal, only advance left and out
+                return call2_advance_out_left
+            else:
+                if right_iter and right_state.same(left_state):
+                    # left and right are equal, but still need to advance out
+                    return call2_advance_out_left_eq_right
+                else:
+                    # worst case, nothing can be shared
+                    return call2_advance_out_left_right
+
+    assert 0, "logical problem with the selection of the call2 case"
+
+def generate_call2_cases(name, left_state, right_state):
+    call2_driver = jit.JitDriver(name='numpy_call2_' + name,
+        greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'],
+        reds='auto', vectorize=True)
+    #
+    advance_left_state = left_state == "left_state"
+    advance_right_state = right_state == "right_state"
+    code = """
+    def method(space, shapelen, func, calc_dtype, res_dtype, out,
+               w_left, w_right, left_iter, right_iter, out_iter,
+               left_state, right_state, out_state):
+        while not out_iter.done(out_state):
+            call2_driver.jit_merge_point(shapelen=shapelen, func=func,
+                    calc_dtype=calc_dtype, res_dtype=res_dtype)
+            if left_iter:
+                w_left = left_iter.getitem({left_state}).convert_to(space, calc_dtype)
+            if right_iter:
+                w_right = right_iter.getitem({right_state}).convert_to(space, calc_dtype)
+            w_out = func(calc_dtype, w_left, w_right)
+            out_iter.setitem(out_state, w_out.convert_to(space, res_dtype))
+            out_state = out_iter.next(out_state)
+            if advance_left_state and left_iter:
+                left_state = left_iter.next(left_state)
+            if advance_right_state and right_iter:
+                right_state = right_iter.next(right_state)
+            #
+            # if not set to None, the values will be loop carried
+            # (for the var,var case), forcing the vectorization to unpack
+            # the vector registers at the end of the loop
+            if left_iter:
+                w_left = None
+            if right_iter:
+                w_right = None
+        return out
+    """
+    exec(py.code.Source(code.format(left_state=left_state,right_state=right_state)).compile(), locals())
+    method.__name__ = "call2_" + name
+    return method
+
+call2_advance_out = generate_call2_cases("inc_out", "out_state", "out_state")
+call2_advance_out_left = generate_call2_cases("inc_out_left", "left_state", "out_state")
+call2_advance_out_right = generate_call2_cases("inc_out_right", "out_state", "right_state")
+call2_advance_out_left_eq_right = generate_call2_cases("inc_out_left_eq_right", "left_state", "left_state")
+call2_advance_out_left_right = generate_call2_cases("inc_out_left_right", "left_state", "right_state")
 
 call1_driver = jit.JitDriver(
     name='numpy_call1',
-    greens=['shapelen', 'func', 'calc_dtype', 'res_dtype'],
-    reds='auto')
+    greens=['shapelen', 'share_iterator', 'func', 'calc_dtype', 'res_dtype'],
+    reds='auto', vectorize=True)
 
 def call1(space, shape, func, calc_dtype, w_obj, w_ret):
     obj_iter, obj_state = w_obj.create_iter(shape)
@@ -63,13 +139,24 @@
     out_iter, out_state = w_ret.create_iter(shape)
     shapelen = len(shape)
     res_dtype = w_ret.get_dtype()
+    share_iterator = out_state.same(obj_state)
     while not out_iter.done(out_state):
         call1_driver.jit_merge_point(shapelen=shapelen, func=func,
+                                     share_iterator=share_iterator,
                                      calc_dtype=calc_dtype, res_dtype=res_dtype)
-        elem = obj_iter.getitem(obj_state).convert_to(space, calc_dtype)
+        if share_iterator:
+            # use out state as param to getitem
+            elem = obj_iter.getitem(out_state).convert_to(space, calc_dtype)
+        else:
+            elem = obj_iter.getitem(obj_state).convert_to(space, calc_dtype)
         out_iter.setitem(out_state, func(calc_dtype, elem).convert_to(space, res_dtype))
-        out_state = out_iter.next(out_state)
-        obj_state = obj_iter.next(obj_state)
+        if share_iterator:
+            # only advance out, they share the same iteration space
+            out_state = out_iter.next(out_state)
+        else:
+            out_state = out_iter.next(out_state)
+            obj_state = obj_iter.next(obj_state)
+        elem = None
     return w_ret
 
 call_many_to_one_driver = jit.JitDriver(
@@ -145,7 +232,7 @@
             vals[i] = in_dtypes[i].coerce(space, in_iters[i].getitem(in_states[i]))
         w_arglist = space.newlist(vals)
         w_outvals = space.call_args(func, Arguments.frompacked(space, w_arglist))
-        # w_outvals should be a tuple, but func can return a single value as well
+        # w_outvals should be a tuple, but func can return a single value as well 
         if space.isinstance_w(w_outvals, space.w_tuple):
             batch = space.listview(w_outvals)
             for i in range(len(batch)):
@@ -161,7 +248,7 @@
 
 setslice_driver = jit.JitDriver(name='numpy_setslice',
                                 greens = ['shapelen', 'dtype'],
-                                reds = 'auto')
+                                reds = 'auto', vectorize=True)
 
 def setslice(space, shape, target, source):
     if not shape:
@@ -239,7 +326,8 @@
 
 reduce_flat_driver = jit.JitDriver(
     name='numpy_reduce_flat',
-    greens = ['shapelen', 'func', 'done_func', 'calc_dtype'], reds = 'auto')
+    greens = ['shapelen', 'func', 'done_func', 'calc_dtype'], reds = 'auto',
+    vectorize = True)
 
 def reduce_flat(space, func, w_arr, calc_dtype, done_func, identity):
     obj_iter, obj_state = w_arr.create_iter()
@@ -260,10 +348,10 @@
         obj_state = obj_iter.next(obj_state)
     return cur_value
 
-
 reduce_driver = jit.JitDriver(
     name='numpy_reduce',
-    greens=['shapelen', 'func', 'dtype'], reds='auto')
+    greens=['shapelen', 'func', 'dtype'], reds='auto',
+    vectorize=True)
 
 def reduce(space, func, w_arr, axis_flags, dtype, out, identity):
     out_iter, out_state = out.create_iter()
@@ -298,7 +386,7 @@
 accumulate_flat_driver = jit.JitDriver(
     name='numpy_accumulate_flat',
     greens=['shapelen', 'func', 'dtype', 'out_dtype'],
-    reds='auto')
+    reds='auto', vectorize=True)
 
 def accumulate_flat(space, func, w_arr, calc_dtype, w_out, identity):
     arr_iter, arr_state = w_arr.create_iter()
@@ -325,7 +413,9 @@
 
 accumulate_driver = jit.JitDriver(
     name='numpy_accumulate',
-    greens=['shapelen', 'func', 'calc_dtype'], reds='auto')
+    greens=['shapelen', 'func', 'calc_dtype'],
+    reds='auto',
+    vectorize=True)
 
 
 def accumulate(space, func, w_arr, axis, calc_dtype, w_out, identity):
@@ -375,7 +465,8 @@
 
 where_driver = jit.JitDriver(name='numpy_where',
                              greens = ['shapelen', 'dtype', 'arr_dtype'],
-                             reds = 'auto')
+                             reds = 'auto',
+                             vectorize=True)
 
 def where(space, out, shape, arr, x, y, dtype):
     out_iter, out_state = out.create_iter(shape)
@@ -416,7 +507,6 @@
             state = x_state
     return out
 
-
 def _new_argmin_argmax(op_name):
     arg_driver = jit.JitDriver(name='numpy_' + op_name,
                                greens = ['shapelen', 'dtype'],
@@ -481,7 +571,8 @@
 
 dot_driver = jit.JitDriver(name = 'numpy_dot',
                            greens = ['dtype'],
-                           reds = 'auto')
+                           reds = 'auto',
+                           vectorize=True)
 
 def multidim_dot(space, left, right, result, dtype, right_critical_dim):
     ''' assumes left, right are concrete arrays
@@ -524,8 +615,8 @@
                 lval = left_impl.getitem(i1).convert_to(space, dtype)
                 rval = right_impl.getitem(i2).convert_to(space, dtype)
                 oval = dtype.itemtype.add(oval, dtype.itemtype.mul(lval, rval))
-                i1 += s1
-                i2 += s2
+                i1 += jit.promote(s1)
+                i2 += jit.promote(s2)
             outi.setitem(outs, oval)
             outs = outi.next(outs)
             rights = righti.next(rights)
@@ -535,7 +626,8 @@
 
 count_all_true_driver = jit.JitDriver(name = 'numpy_count',
                                       greens = ['shapelen', 'dtype'],
-                                      reds = 'auto')
+                                      reds = 'auto',
+                                      vectorize=True)
 
 def count_all_true_concrete(impl):
     s = 0
@@ -556,7 +648,8 @@
 
 nonzero_driver = jit.JitDriver(name = 'numpy_nonzero',
                                greens = ['shapelen', 'dims', 'dtype'],
-                               reds = 'auto')
+                               reds = 'auto',
+                               vectorize=True)
 
 def nonzero(res, arr, box):
     res_iter, res_state = res.create_iter()
@@ -578,7 +671,8 @@
 getitem_filter_driver = jit.JitDriver(name = 'numpy_getitem_bool',
                                       greens = ['shapelen', 'arr_dtype',
                                                 'index_dtype'],
-                                      reds = 'auto')
+                                      reds = 'auto',
+                                      vectorize=True)
 
 def getitem_filter(res, arr, index):
     res_iter, res_state = res.create_iter()
@@ -606,7 +700,8 @@
 setitem_filter_driver = jit.JitDriver(name = 'numpy_setitem_bool',
                                       greens = ['shapelen', 'arr_dtype',
                                                 'index_dtype'],
-                                      reds = 'auto')
+                                      reds = 'auto',
+                                      vectorize=True)
 
 def setitem_filter(space, arr, index, value):
     arr_iter, arr_state = arr.create_iter()
@@ -635,7 +730,8 @@
 
 flatiter_getitem_driver = jit.JitDriver(name = 'numpy_flatiter_getitem',
                                         greens = ['dtype'],
-                                        reds = 'auto')
+                                        reds = 'auto',
+                                        vectorize=True)
 
 def flatiter_getitem(res, base_iter, base_state, step):
     ri, rs = res.create_iter()
@@ -649,7 +745,8 @@
 
 flatiter_setitem_driver = jit.JitDriver(name = 'numpy_flatiter_setitem',
                                         greens = ['dtype'],
-                                        reds = 'auto')
+                                        reds = 'auto',
+                                        vectorize=True)
 
 def flatiter_setitem(space, dtype, val, arr_iter, arr_state, step, length):
     val_iter, val_state = val.create_iter()
@@ -758,7 +855,8 @@
 
 byteswap_driver = jit.JitDriver(name='numpy_byteswap_driver',
                                 greens = ['dtype'],
-                                reds = 'auto')
+                                reds = 'auto',
+                                vectorize=True)
 
 def byteswap(from_, to):
     dtype = from_.dtype
@@ -773,7 +871,8 @@
 
 choose_driver = jit.JitDriver(name='numpy_choose_driver',
                               greens = ['shapelen', 'mode', 'dtype'],
-                              reds = 'auto')
+                              reds = 'auto',
+                              vectorize=True)
 
 def choose(space, arr, choices, shape, dtype, out, mode):
     shapelen = len(shape)
@@ -807,7 +906,8 @@
 
 clip_driver = jit.JitDriver(name='numpy_clip_driver',
                             greens = ['shapelen', 'dtype'],
-                            reds = 'auto')
+                            reds = 'auto',
+                            vectorize=True)
 
 def clip(space, arr, shape, min, max, out):
     assert min or max
@@ -842,7 +942,8 @@
 
 round_driver = jit.JitDriver(name='numpy_round_driver',
                              greens = ['shapelen', 'dtype'],
-                             reds = 'auto')
+                             reds = 'auto',
+                             vectorize=True)
 
 def round(space, arr, dtype, shape, decimals, out):
     arr_iter, arr_state = arr.create_iter(shape)
diff --git a/pypy/module/micronumpy/strides.py b/pypy/module/micronumpy/strides.py
--- a/pypy/module/micronumpy/strides.py
+++ b/pypy/module/micronumpy/strides.py
@@ -7,6 +7,7 @@
 # structures to describe slicing
 
 class BaseChunk(object):
+    _attrs_ = ['step','out_dim']
     pass
 
 
diff --git a/pypy/module/micronumpy/test/test_compile.py b/pypy/module/micronumpy/test/test_compile.py
--- a/pypy/module/micronumpy/test/test_compile.py
+++ b/pypy/module/micronumpy/test/test_compile.py
@@ -1,6 +1,6 @@
 import py
 from pypy.module.micronumpy.compile import (numpy_compile, Assignment,
-    ArrayConstant, FloatConstant, Operator, Variable, RangeConstant, Execute,
+    ArrayConstant, NumberConstant, Operator, Variable, RangeConstant, Execute,
     FunctionCall, FakeSpace, W_NDimArray)
 
 
@@ -25,30 +25,30 @@
         interp = self.compile(code)
         assert isinstance(interp.code.statements[0].expr, ArrayConstant)
         st = interp.code.statements[0]
-        assert st.expr.items == [FloatConstant(1), FloatConstant(2),
-                                 FloatConstant(3)]
+        assert st.expr.items == [NumberConstant(1), NumberConstant(2),
+                                 NumberConstant(3)]
 
     def test_array_literal2(self):
         code = "a = [[1],[2],[3]]"
         interp = self.compile(code)
         assert isinstance(interp.code.statements[0].expr, ArrayConstant)
         st = interp.code.statements[0]
-        assert st.expr.items == [ArrayConstant([FloatConstant(1)]),
-                                 ArrayConstant([FloatConstant(2)]),
-                                 ArrayConstant([FloatConstant(3)])]
+        assert st.expr.items == [ArrayConstant([NumberConstant(1)]),
+                                 ArrayConstant([NumberConstant(2)]),
+                                 ArrayConstant([NumberConstant(3)])]
 
     def test_expr_1(self):
         code = "b = a + 1"
         interp = self.compile(code)
         assert (interp.code.statements[0].expr ==
-                Operator(Variable("a"), "+", FloatConstant(1)))
+                Operator(Variable("a"), "+", NumberConstant(1)))
 
     def test_expr_2(self):
         code = "b = a + b - 3"
         interp = self.compile(code)
         assert (interp.code.statements[0].expr ==
                 Operator(Operator(Variable("a"), "+", Variable("b")), "-",
-                         FloatConstant(3)))
+                         NumberConstant(3)))
 
     def test_expr_3(self):
         # an equivalent of range
@@ -60,13 +60,13 @@
         code = "3 + a"
         interp = self.compile(code)
         assert interp.code.statements[0] == Execute(
-            Operator(FloatConstant(3), "+", Variable("a")))
+            Operator(NumberConstant(3), "+", Variable("a")))
 
     def test_array_access(self):
         code = "a -> 3"
         interp = self.compile(code)
         assert interp.code.statements[0] == Execute(
-            Operator(Variable("a"), "->", FloatConstant(3)))
+            Operator(Variable("a"), "->", NumberConstant(3)))
 
     def test_function_call(self):
         code = "sum(a)"
@@ -81,7 +81,7 @@
         """
         interp = self.compile(code)
         assert interp.code.statements[0] == Assignment(
-            'a', Operator(Variable('b'), "+", FloatConstant(3)))
+            'a', Operator(Variable('b'), "+", NumberConstant(3)))
 
 
 class TestRunner(object):
@@ -272,6 +272,14 @@
         """)
         assert interp.results[0].value == 3
 
+    def test_any(self):
+        interp = self.run("""
+        a = [0,0,0,0,0.1,0,0,0,0]
+        b = any(a)
+        b -> 0
+        """)
+        assert interp.results[0].value == 1
+
     def test_where(self):
         interp = self.run('''
         a = [1, 0, 3, 0]
diff --git a/pypy/module/micronumpy/test/test_ndarray.py b/pypy/module/micronumpy/test/test_ndarray.py
--- a/pypy/module/micronumpy/test/test_ndarray.py
+++ b/pypy/module/micronumpy/test/test_ndarray.py
@@ -292,6 +292,8 @@
         a = np.array('123', dtype='intp')
         assert a == 123
         assert a.dtype == np.intp
+        # required for numpy test suite
+        raises(ValueError, np.array, type(a))
 
     def test_array_copy(self):
         from numpy import array
diff --git a/pypy/module/micronumpy/test/test_ufuncs.py b/pypy/module/micronumpy/test/test_ufuncs.py
--- a/pypy/module/micronumpy/test/test_ufuncs.py
+++ b/pypy/module/micronumpy/test/test_ufuncs.py
@@ -319,6 +319,28 @@
             assert out0.dtype in (int, complex) 
             assert (out0 == in0 * 2).all()
 
+    def test_frompyfunc_scalar(self):
+        import sys
+        import numpy as np
+        if '__pypy__' not in sys.builtin_module_names:
+            skip('PyPy only frompyfunc extension')
+
+        def summer(in0):
+            out = np.empty(1, in0.dtype)
+            out[0] = in0.sum()
+            return out
+
+        pysummer = np.frompyfunc([summer, summer], 1, 1,
+                            dtypes=[np.dtype(int), np.dtype(int),
+                                np.dtype(complex), np.dtype(complex)],
+                            stack_inputs=False, signature='(m,m)->()',
+                          )
+        for d in [np.dtype(float), np.dtype('uint8'), np.dtype('complex64')]:
+            in0 = np.arange(4, dtype=d).reshape(1, 2, 2)
+            out0 = pysummer(in0)
+            assert out0 == in0.sum()
+            assert out0.dtype in (int, complex)
+
     def test_ufunc_kwargs(self):
         from numpy import ufunc, frompyfunc, arange, dtype
         def adder(a, b):
diff --git a/pypy/module/micronumpy/test/test_zjit.py b/pypy/module/micronumpy/test/test_zjit.py
--- a/pypy/module/micronumpy/test/test_zjit.py
+++ b/pypy/module/micronumpy/test/test_zjit.py
@@ -4,17 +4,37 @@
 
 import py
 from rpython.jit.metainterp.test.support import LLJitMixin
+from rpython.jit.backend.x86.test.test_basic import Jit386Mixin
 from rpython.jit.metainterp.warmspot import reset_jit, get_stats
+from rpython.jit.metainterp.jitprof import Profiler
+from rpython.jit.metainterp import counter
+from rpython.rlib.jit import Counters
+from rpython.rlib.rarithmetic import intmask
 from pypy.module.micronumpy import boxes
 from pypy.module.micronumpy.compile import FakeSpace, Parser, InterpreterState
 from pypy.module.micronumpy.base import W_NDimArray
+from rpython.jit.backend.detect_cpu import getcpuclass
 
-py.test.skip('move these to pypyjit/test_pypy_c/test_micronumpy')
+CPU = getcpuclass()
+if not CPU.vector_extension:
+    py.test.skip("this cpu %s has no implemented vector backend" % CPU)
+
+def get_profiler():
+    from rpython.jit.metainterp import pyjitpl
+    return pyjitpl._warmrunnerdesc.metainterp_sd.profiler
 
 class TestNumpyJit(LLJitMixin):
+    enable_opts = "intbounds:rewrite:virtualize:string:earlyforce:pure:heap:unroll"
     graph = None
     interp = None
 
+    def setup_method(self, method):
+        if not self.CPUClass.vector_extension:
+            py.test.skip("needs vector extension to run (for now)")
+
+    def assert_float_equal(self, f1, f2, delta=0.0001):
+        assert abs(f1-f2) < delta
+
     def setup_class(cls):
         default = """
         a = [1,2,3,4]
@@ -52,12 +72,29 @@
                 w_res = i.getitem(s)
             if isinstance(w_res, boxes.W_Float64Box):
                 return w_res.value
+            if isinstance(w_res, boxes.W_Float32Box):
+                return float(w_res.value)
             elif isinstance(w_res, boxes.W_Int64Box):
                 return float(w_res.value)
+            elif isinstance(w_res, boxes.W_Int32Box):
+                return float(int(w_res.value))
+            elif isinstance(w_res, boxes.W_Int16Box):
+                return float(int(w_res.value))
+            elif isinstance(w_res, boxes.W_Int8Box):
+                return float(int(w_res.value))
+            elif isinstance(w_res, boxes.W_UInt64Box):
+                return float(intmask(w_res.value))
+            elif isinstance(w_res, boxes.W_UInt32Box):
+                return float(intmask(w_res.value))
+            elif isinstance(w_res, boxes.W_UInt16Box):
+                return float(intmask(w_res.value))
+            elif isinstance(w_res, boxes.W_UInt8Box):
+                return float(intmask(w_res.value))
             elif isinstance(w_res, boxes.W_LongBox):
                 return float(w_res.value)
             elif isinstance(w_res, boxes.W_BoolBox):
                 return float(w_res.value)
+            print "ERROR: did not implement return type for interpreter"
             raise TypeError(w_res)
 
         if self.graph is None:
@@ -65,122 +102,354 @@
                                              listops=True,
                                              listcomp=True,
                                              backendopt=True,
-                                             graph_and_interp_only=True)
+                                             graph_and_interp_only=True,
+                                             ProfilerClass=Profiler,
+                                             vec=True)
             self.__class__.interp = interp
             self.__class__.graph = graph
 
+    def check_vectorized(self, expected_tried, expected_success):
+        profiler = get_profiler()
+        tried = profiler.get_counter(Counters.OPT_VECTORIZE_TRY)
+        success = profiler.get_counter(Counters.OPT_VECTORIZED)
+        assert tried >= success
+        assert tried == expected_tried
+        assert success == expected_success
+
     def run(self, name):
         self.compile_graph()
+        profiler = get_profiler()
+        profiler.start()
         reset_jit()
         i = self.code_mapping[name]
         retval = self.interp.eval_graph(self.graph, [i])
         return retval
 
-    def define_add():
+    def define_float32_copy():
+        return """
+        a = astype(|30|, float32)
+        x1 = a -> 7
+        x2 = a -> 8
+        x3 = a -> 9
+        x4 = a -> 10
+        r = x1 + x2 + x3 + x4
+        r
+        """
+    def test_float32_copy(self):
+        result = self.run("float32_copy")
+        assert int(result) == 7+8+9+10
+        self.check_vectorized(1, 1)
+
+    def define_int32_copy():
+        return """
+        a = astype(|30|, int32)
+        x1 = a -> 7
+        x2 = a -> 8
+        x3 = a -> 9
+        x4 = a -> 10
+        x1 + x2 + x3 + x4
+        """
+    def test_int32_copy(self):
+        result = self.run("int32_copy")
+        assert int(result) == 7+8+9+10
+        self.check_vectorized(1, 1)
+
+    def define_float32_add():
+        return """
+        a = astype(|30|, float32)
+        b = a + a
+        b -> 15
+        """
+    def test_float32_add(self):
+        result = self.run("float32_add")
+        self.assert_float_equal(result, 15.0 + 15.0)
+        self.check_vectorized(2, 2)
+
+    def define_float_add():
         return """
         a = |30|
         b = a + a