[pypy-commit] pypy numpypy-complex2: merge default into branch

mattip noreply at buildbot.pypy.org
Tue Sep 25 08:23:27 CEST 2012


Author: mattip <matti.picus at gmail.com>
Branch: numpypy-complex2
Changeset: r57543:a0e9cfa41b77
Date: 2012-09-25 08:19 +0200
http://bitbucket.org/pypy/pypy/changeset/a0e9cfa41b77/

Log:	merge default into branch

diff too long, truncating to 2000 out of 5978 lines

diff --git a/lib-python/2.7/test/test_csv.py b/lib-python/2.7/test/test_csv.py
--- a/lib-python/2.7/test/test_csv.py
+++ b/lib-python/2.7/test/test_csv.py
@@ -20,7 +20,8 @@
     """
     def _test_arg_valid(self, ctor, arg):
         self.assertRaises(TypeError, ctor)
-        self.assertRaises(TypeError, ctor, None)
+        # PyPy gets an AttributeError instead of a TypeError
+        self.assertRaises((TypeError, AttributeError), ctor, None)
         self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
         self.assertRaises(TypeError, ctor, arg, delimiter = 0)
         self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
@@ -59,7 +60,8 @@
         self.assertRaises((TypeError, AttributeError), setattr, obj.dialect,
                           'delimiter', ':')
         self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting')
-        self.assertRaises(AttributeError, setattr, obj.dialect,
+        # PyPy gets a TypeError instead of an AttributeError
+        self.assertRaises((AttributeError, TypeError), setattr, obj.dialect,
                           'quoting', None)
 
     def test_reader_attrs(self):
@@ -133,7 +135,8 @@
             os.unlink(name)
 
     def test_write_arg_valid(self):
-        self.assertRaises(csv.Error, self._write_test, None, '')
+        # PyPy gets a TypeError instead of a csv.Error for "not a sequence"
+        self.assertRaises((csv.Error, TypeError), self._write_test, None, '')
         self._write_test((), '')
         self._write_test([None], '""')
         self.assertRaises(csv.Error, self._write_test,
diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -183,7 +183,7 @@
     RegrTest('test_cpickle.py', core=True),
     RegrTest('test_cprofile.py'), 
     RegrTest('test_crypt.py', usemodules='crypt', skip=skip_win32),
-    RegrTest('test_csv.py'),
+    RegrTest('test_csv.py', usemodules='_csv'),
 
     RegrTest('test_curses.py', skip="unsupported extension module"),
     RegrTest('test_datetime.py'),
diff --git a/lib_pypy/_csv.py b/lib_pypy/_csv.py
--- a/lib_pypy/_csv.py
+++ b/lib_pypy/_csv.py
@@ -363,9 +363,7 @@
                             (self.dialect.delimiter, self.dialect.quotechar))
 
         elif self.state == self.EAT_CRNL:
-            if c in '\r\n':
-                pass
-            else:
+            if c not in '\r\n':
                 raise Error("new-line character seen in unquoted field - "
                             "do you need to open the file "
                             "in universal-newline mode?")
diff --git a/lib_pypy/_ctypes/pointer.py b/lib_pypy/_ctypes/pointer.py
--- a/lib_pypy/_ctypes/pointer.py
+++ b/lib_pypy/_ctypes/pointer.py
@@ -81,7 +81,9 @@
         addr = self._buffer[0]
         if addr == 0:
             raise ValueError("NULL pointer access")
-        return self._type_.from_address(addr)
+        instance = self._type_.from_address(addr)
+        instance.__dict__['_base'] = self
+        return instance
 
     def setcontents(self, value):
         if not isinstance(value, self._type_):
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -34,7 +34,7 @@
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
      "_collections", "_multibytecodec", "micronumpy", "_ffi",
-     "_continuation", "_cffi_backend"]
+     "_continuation", "_cffi_backend", "_csv"]
 ))
 
 translation_modules = default_modules.copy()
diff --git a/pypy/doc/jit/pyjitpl5.rst b/pypy/doc/jit/pyjitpl5.rst
--- a/pypy/doc/jit/pyjitpl5.rst
+++ b/pypy/doc/jit/pyjitpl5.rst
@@ -149,7 +149,7 @@
 
 A *virtual* value is an array, struct, or RPython level instance that is created
 during the loop and does not escape from it via calls or longevity past the
-loop.  Since it is only used by the JIT, it be "optimized out"; the value
+loop.  Since it is only used by the JIT, it can be "optimized out"; the value
 doesn't have to be allocated at all and its fields can be stored as first class
 values instead of deferencing them in memory.  Virtuals allow temporary objects
 in the interpreter to be unwrapped.  For example, a W_IntObject in the PyPy can
diff --git a/pypy/doc/project-ideas.rst b/pypy/doc/project-ideas.rst
--- a/pypy/doc/project-ideas.rst
+++ b/pypy/doc/project-ideas.rst
@@ -21,7 +21,7 @@
 -------------------------
 
 PyPy's implementation of the Python ``long`` type is slower than CPython's.
-Find out why and optimize them.
+Find out why and optimize them.  **UPDATE:** this was done (thanks stian).
 
 Make bytearray type fast
 ------------------------
@@ -103,13 +103,32 @@
 
 * A concurrent garbage collector (a lot of work)
 
-STM, a.k.a. "remove the GIL"
-----------------------------
+STM (Software Transactional Memory)
+-----------------------------------
 
-Removing the GIL --- or more precisely, a GIL-less thread-less solution ---
-is `now work in progress.`__  Contributions welcome.
+This is work in progress.  Besides the main development path, whose goal is
+to make a (relatively fast) version of pypy which includes STM, there are
+independent topics that can already be experimented with on the existing,
+JIT-less pypy-stm version:
+  
+* What kind of conflicts do we get in real use cases?  And, sometimes,
+  which data structures would be more appropriate?  For example, a dict
+  implemented as a hash table will suffer "stm collisions" in all threads
+  whenever one thread writes anything to it; but there could be other
+  implementations.
 
-.. __: http://pypy.org/tmdonate.html
+* More generally, there is the idea that we would need some kind of
+  "debugger"-like tool to "debug" things that are not bugs, but stm
+  conflicts.  How would this tool look like to the end Python
+  programmers?  Like a profiler?  Or like a debugger with breakpoints
+  on aborted transactions?
+
+* Find good ways to have libraries using internally threads and atomics,
+  but not exposing threads to the user.  Right now there is a rough draft
+  in ``lib_pypy/transaction.py``, but much better is possible.  For example
+  we could probably have an iterator-like concept that allows each loop
+  iteration to run in parallel.
+
 
 Introduce new benchmarks
 ------------------------
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -18,6 +18,8 @@
 .. branch: numpypy_count_nonzero
 .. branch: numpy-refactor
 Remove numpy lazy evaluation and simplify everything
+.. branch: numpy-fancy-indexing
+Support for array[array-of-ints] in numpy
 .. branch: even-more-jit-hooks
 Implement better JIT hooks
 .. branch: virtual-arguments
diff --git a/pypy/interpreter/astcompiler/assemble.py b/pypy/interpreter/astcompiler/assemble.py
--- a/pypy/interpreter/astcompiler/assemble.py
+++ b/pypy/interpreter/astcompiler/assemble.py
@@ -65,24 +65,44 @@
         self.marked = False
         self.have_return = False
 
-    def _post_order(self, blocks):
-        if self.marked:
-            return
-        self.marked = True
-        if self.next_block is not None:
-            self.next_block._post_order(blocks)
-        for instr in self.instructions:
-            if instr.has_jump:
-                instr.jump[0]._post_order(blocks)
-        blocks.append(self)
-        self.marked = True
+    def _post_order_see(self, stack, nextblock):
+        if nextblock.marked == 0:
+            nextblock.marked = 1
+            stack.append(nextblock)
 
     def post_order(self):
-        """Return this block and its children in post order."""
-        blocks = []
-        self._post_order(blocks)
-        blocks.reverse()
-        return blocks
+        """Return this block and its children in post order.
+        This means that the graph of blocks is first cleaned up to
+        ignore back-edges, thus turning it into a DAG.  Then the DAG
+        is linearized.  For example:
+
+                   A --> B -\           =>     [A, D, B, C]
+                     \-> D ---> C
+        """
+        resultblocks = []
+        stack = [self]
+        self.marked = 1
+        while stack:
+            current = stack[-1]
+            if current.marked == 1:
+                current.marked = 2
+                if current.next_block is not None:
+                    self._post_order_see(stack, current.next_block)
+            else:
+                i = current.marked - 2
+                assert i >= 0
+                while i < len(current.instructions):
+                    instr = current.instructions[i]
+                    i += 1
+                    if instr.has_jump:
+                        current.marked = i + 2
+                        self._post_order_see(stack, instr.jump[0])
+                        break
+                else:
+                    resultblocks.append(current)
+                    stack.pop()
+        resultblocks.reverse()
+        return resultblocks
 
     def code_size(self):
         """Return the encoded size of all the instructions in this block."""
@@ -353,20 +373,26 @@
     def _stacksize(self, blocks):
         """Compute co_stacksize."""
         for block in blocks:
-            block.marked = False
-            block.initial_depth = -1000
-        return self._recursive_stack_depth_walk(blocks[0], 0, 0)
+            block.initial_depth = 0
+        # Assumes that it is sufficient to walk the blocks in 'post-order'.
+        # This means we ignore all back-edges, but apart from that, we only
+        # look into a block when all the previous blocks have been done.
+        self._max_depth = 0
+        for block in blocks:
+            self._do_stack_depth_walk(block)
+        return self._max_depth
 
-    def _recursive_stack_depth_walk(self, block, depth, max_depth):
-        if block.marked or block.initial_depth >= depth:
-            return max_depth
-        block.marked = True
-        block.initial_depth = depth
+    def _next_stack_depth_walk(self, nextblock, depth):
+        if depth > nextblock.initial_depth:
+            nextblock.initial_depth = depth
+
+    def _do_stack_depth_walk(self, block):
+        depth = block.initial_depth
         done = False
         for instr in block.instructions:
             depth += _opcode_stack_effect(instr.opcode, instr.arg)
-            if depth >= max_depth:
-                max_depth = depth
+            if depth >= self._max_depth:
+                self._max_depth = depth
             if instr.has_jump:
                 target_depth = depth
                 jump_op = instr.opcode
@@ -376,20 +402,15 @@
                       jump_op == ops.SETUP_EXCEPT or
                       jump_op == ops.SETUP_WITH):
                     target_depth += 3
-                    if target_depth > max_depth:
-                        max_depth = target_depth
-                max_depth = self._recursive_stack_depth_walk(instr.jump[0],
-                                                             target_depth,
-                                                             max_depth)
+                    if target_depth > self._max_depth:
+                        self._max_depth = target_depth
+                self._next_stack_depth_walk(instr.jump[0], target_depth)
                 if jump_op == ops.JUMP_ABSOLUTE or jump_op == ops.JUMP_FORWARD:
                     # Nothing more can occur.
                     done = True
                     break
         if block.next_block and not done:
-            max_depth = self._recursive_stack_depth_walk(block.next_block,
-                                                         depth, max_depth)
-        block.marked = False
-        return max_depth
+            max_depth = self._next_stack_depth_walk(block.next_block, depth)
 
     def _build_lnotab(self, blocks):
         """Build the line number table for tracebacks and tracing."""
diff --git a/pypy/interpreter/astcompiler/test/test_compiler.py b/pypy/interpreter/astcompiler/test/test_compiler.py
--- a/pypy/interpreter/astcompiler/test/test_compiler.py
+++ b/pypy/interpreter/astcompiler/test/test_compiler.py
@@ -778,6 +778,10 @@
             raise AssertionError("attribute not removed")"""
         yield self.st, test, "X.__name__", "X"
 
+    def test_lots_of_loops(self):
+        source = "for x in y: pass\n" * 1000
+        compile_with_astcompiler(source, 'exec', self.space)
+
 
 class AppTestCompiler:
 
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -590,10 +590,6 @@
                 w_exc = self.getitem(w_dic, w_name)
                 exc_types_w[name] = w_exc
                 setattr(self, "w_" + excname, w_exc)
-        # Make a prebuilt recursion error
-        w_msg = self.wrap("maximum recursion depth exceeded")
-        self.prebuilt_recursion_error = OperationError(self.w_RuntimeError,
-                                                       w_msg)
         return exc_types_w
 
     def install_mixedmodule(self, mixedname, installed_builtin_modules):
@@ -834,7 +830,8 @@
         return isinstance(obj, RequiredClass)
 
     def unpackiterable(self, w_iterable, expected_length=-1):
-        """Unpack an iterable object into a real (interpreter-level) list.
+        """Unpack an iterable into a real (interpreter-level) list.
+
         Raise an OperationError(w_ValueError) if the length is wrong."""
         w_iterator = self.iter(w_iterable)
         if expected_length == -1:
@@ -854,12 +851,10 @@
     def iteriterable(self, w_iterable):
         return W_InterpIterable(self, w_iterable)
 
-    @jit.dont_look_inside
     def _unpackiterable_unknown_length(self, w_iterator, w_iterable):
-        # Unpack a variable-size list of unknown length.
-        # The JIT does not look inside this function because it
-        # contains a loop (made explicit with the decorator above).
-        #
+        """Unpack an iterable of unknown length into an interp-level
+        list.
+        """
         # If we can guess the expected length we can preallocate.
         try:
             lgt_estimate = self.len_w(w_iterable)
diff --git a/pypy/interpreter/error.py b/pypy/interpreter/error.py
--- a/pypy/interpreter/error.py
+++ b/pypy/interpreter/error.py
@@ -21,9 +21,7 @@
     _application_traceback = None
 
     def __init__(self, w_type, w_value, tb=None):
-        if not we_are_translated() and w_type is None:
-            from pypy.tool.error import FlowingError
-            raise FlowingError(w_value)
+        assert w_type is not None
         self.setup(w_type)
         self._w_value = w_value
         self._application_traceback = tb
@@ -327,9 +325,7 @@
                 self.xstrings = strings
                 for i, attr in entries:
                     setattr(self, attr, args[i])
-                if not we_are_translated() and w_type is None:
-                    from pypy.tool.error import FlowingError
-                    raise FlowingError(self._compute_value())
+                assert w_type is not None
             def _compute_value(self):
                 lst = [None] * (len(formats) + len(formats) + 1)
                 for i, attr in entries:
@@ -393,7 +389,7 @@
         return OperationError(exc, w_error)
 
 def wrap_oserror2(space, e, w_filename=None, exception_name='w_OSError',
-                  w_exception_class=None): 
+                  w_exception_class=None):
     assert isinstance(e, OSError)
 
     if _WINDOWS and isinstance(e, WindowsError):
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -153,17 +153,13 @@
             self._trace(frame, 'exception', None, operationerr)
         #operationerr.print_detailed_traceback(self.space)
 
-    def _convert_exc(self, operr):
-        # Only for the flow object space
-        return operr
-
     def sys_exc_info(self): # attn: the result is not the wrapped sys.exc_info() !!!
         """Implements sys.exc_info().
         Return an OperationError instance or None."""
         frame = self.gettopframe_nohidden()
         while frame:
             if frame.last_exception is not None:
-                return self._convert_exc(frame.last_exception)
+                return frame.last_exception
             frame = self.getnextframe_nohidden(frame)
         return None
 
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -651,7 +651,8 @@
             raise OperationError(space.w_MemoryError, space.w_None)
         except rstackovf.StackOverflow, e:
             rstackovf.check_stack_overflow()
-            raise space.prebuilt_recursion_error
+            raise OperationError(space.w_RuntimeError,
+                                space.wrap("maximum recursion depth exceeded"))
         except RuntimeError:   # not on top of py.py
             raise OperationError(space.w_RuntimeError, space.w_None)
 
diff --git a/pypy/interpreter/pyopcode.py b/pypy/interpreter/pyopcode.py
--- a/pypy/interpreter/pyopcode.py
+++ b/pypy/interpreter/pyopcode.py
@@ -90,10 +90,6 @@
             next_instr = self.dispatch_bytecode(co_code, next_instr, ec)
         except OperationError, operr:
             next_instr = self.handle_operation_error(ec, operr)
-        except Reraise:
-            operr = self.last_exception
-            next_instr = self.handle_operation_error(ec, operr,
-                                                     attach_tb=False)
         except RaiseWithExplicitTraceback, e:
             next_instr = self.handle_operation_error(ec, e.operr,
                                                      attach_tb=False)
@@ -104,9 +100,11 @@
             next_instr = self.handle_asynchronous_error(ec,
                 self.space.w_MemoryError)
         except rstackovf.StackOverflow, e:
+            # Note that this case catches AttributeError!
             rstackovf.check_stack_overflow()
-            w_err = self.space.prebuilt_recursion_error
-            next_instr = self.handle_operation_error(ec, w_err)
+            next_instr = self.handle_asynchronous_error(ec,
+                self.space.w_RuntimeError,
+                self.space.wrap("maximum recursion depth exceeded"))
         return next_instr
 
     def handle_asynchronous_error(self, ec, w_type, w_value=None):
@@ -540,7 +538,7 @@
             ec = self.space.getexecutioncontext()
             while frame:
                 if frame.last_exception is not None:
-                    operror = ec._convert_exc(frame.last_exception)
+                    operror = frame.last_exception
                     break
                 frame = frame.f_backref()
             else:
@@ -548,7 +546,7 @@
                     space.wrap("raise: no active exception to re-raise"))
             # re-raise, no new traceback obj will be attached
             self.last_exception = operror
-            raise Reraise
+            raise RaiseWithExplicitTraceback(operror)
 
         w_value = w_traceback = space.w_None
         if nbargs >= 3:
@@ -966,6 +964,7 @@
                       isinstance(unroller, SApplicationException))
         if is_app_exc:
             operr = unroller.operr
+            self.last_exception = operr
             w_traceback = self.space.wrap(operr.get_traceback())
             w_suppress = self.call_contextmanager_exit_function(
                 w_exitfunc,
@@ -1165,10 +1164,8 @@
 class Yield(ExitFrame):
     """Raised when exiting a frame via a 'yield' statement."""
 
-class Reraise(Exception):
-    """Raised at interp-level by a bare 'raise' statement."""
 class RaiseWithExplicitTraceback(Exception):
-    """Raised at interp-level by a 3-arguments 'raise' statement."""
+    """Raised at interp-level by a 0- or 3-arguments 'raise' statement."""
     def __init__(self, operr):
         self.operr = operr
 
@@ -1225,12 +1222,6 @@
     def nomoreblocks(self):
         raise RaiseWithExplicitTraceback(self.operr)
 
-    def state_unpack_variables(self, space):
-        return [self.operr.w_type, self.operr.get_w_value(space)]
-    def state_pack_variables(space, w_type, w_value):
-        return SApplicationException(OperationError(w_type, w_value))
-    state_pack_variables = staticmethod(state_pack_variables)
-
 class SBreakLoop(SuspendedUnroller):
     """Signals a 'break' statement."""
     _immutable_ = True
diff --git a/pypy/interpreter/test/test_syntax.py b/pypy/interpreter/test/test_syntax.py
--- a/pypy/interpreter/test/test_syntax.py
+++ b/pypy/interpreter/test/test_syntax.py
@@ -1,3 +1,4 @@
+from __future__ import with_statement
 import py
 from pypy.conftest import gettestobjspace
 
@@ -567,6 +568,24 @@
         import types
         assert isinstance(acontextfact.exit_params[2], types.TracebackType)
 
+    def test_with_reraise_exception(self):
+        class Context:
+            def __enter__(self):
+                self.calls = []
+            def __exit__(self, exc_type, exc_value, exc_tb):
+                self.calls.append('exit')
+                raise
+
+        c = Context()
+        try:
+            with c:
+                1 / 0
+        except ZeroDivisionError:
+            pass
+        else:
+            raise AssertionError('Should have reraised initial exception')
+        assert c.calls == ['exit']
+
     def test_with_break(self):
 
         s = """
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -994,6 +994,7 @@
                              ('p', lltype.Ptr(TP)))
         a_box, A = self.alloc_array_of(ITEM, 15)
         s_box, S = self.alloc_instance(TP)
+        vsdescr = self.cpu.interiorfielddescrof(A, 'vs')
         kdescr = self.cpu.interiorfielddescrof(A, 'k')
         pdescr = self.cpu.interiorfielddescrof(A, 'p')
         self.execute_operation(rop.SETINTERIORFIELD_GC, [a_box, BoxInt(3),
@@ -1045,6 +1046,13 @@
         r = self.execute_operation(rop.GETINTERIORFIELD_GC, [a_box, BoxInt(3)],
                                    'ref', descr=pdescr)
         assert r.getref_base() == s_box.getref_base()
+        #
+        # test a corner case that used to fail on x86
+        i4 = BoxInt(4)
+        self.execute_operation(rop.SETINTERIORFIELD_GC, [a_box, i4, i4],
+                               'void', descr=vsdescr)
+        r = self.cpu.bh_getinteriorfield_gc_i(a_box.getref_base(), 4, vsdescr)
+        assert r == 4
 
     def test_string_basic(self):
         s_box = self.alloc_string("hello\xfe")
diff --git a/pypy/jit/backend/test/test_random.py b/pypy/jit/backend/test/test_random.py
--- a/pypy/jit/backend/test/test_random.py
+++ b/pypy/jit/backend/test/test_random.py
@@ -465,6 +465,16 @@
 
 # ____________________________________________________________
 
+def do_assert(condition, error_message):
+    if condition:
+        return
+    seed = pytest.config.option.randomseed
+    message = "%s\nPython: %s\nRandom seed: %r" % (
+        error_message,
+        sys.executable,
+        seed)
+    raise AssertionError(message)
+
 def Random():
     import random
     seed = pytest.config.option.randomseed
@@ -544,6 +554,7 @@
         self.startvars = startvars
         self.prebuilt_ptr_consts = []
         self.r = r
+        self.subloops = []
         self.build_random_loop(cpu, builder_factory, r, startvars, allow_delay)
 
     def build_random_loop(self, cpu, builder_factory, r, startvars, allow_delay):
@@ -668,13 +679,15 @@
 
         arguments = [box.value for box in self.loop.inputargs]
         fail = cpu.execute_token(self.runjitcelltoken(), *arguments)
-        assert fail is self.should_fail_by.getdescr()
+        do_assert(fail is self.should_fail_by.getdescr(),
+                  "Got %r, expected %r" % (fail,
+                                           self.should_fail_by.getdescr()))
         for i, v in enumerate(self.get_fail_args()):
             if isinstance(v, (BoxFloat, ConstFloat)):
                 value = cpu.get_latest_value_float(i)
             else:
                 value = cpu.get_latest_value_int(i)
-            assert value == self.expected[v], (
+            do_assert(value == self.expected[v],
                 "Got %r, expected %r for value #%d" % (value,
                                                        self.expected[v],
                                                        i)
@@ -683,9 +696,11 @@
         if (self.guard_op is not None and
             self.guard_op.is_guard_exception()):
             if self.guard_op.getopnum() == rop.GUARD_NO_EXCEPTION:
-                assert exc
+                do_assert(exc,
+                          "grab_exc_value() should not be %r" % (exc,))
         else:
-            assert not exc
+            do_assert(not exc,
+                      "unexpected grab_exc_value(): %r" % (exc,))
 
     def build_bridge(self):
         def exc_handling(guard_op):
@@ -710,6 +725,7 @@
             return False
         # generate the branch: a sequence of operations that ends in a FINISH
         subloop = DummyLoop([])
+        self.subloops.append(subloop)   # keep around for debugging
         if guard_op.is_guard_exception():
             subloop.operations.append(exc_handling(guard_op))
         bridge_builder = self.builder.fork(self.builder.cpu, subloop,
@@ -746,9 +762,6 @@
             args = [x.clonebox() for x in subset]
             rl = RandomLoop(self.builder.cpu, self.builder.fork,
                                      r, args)
-            dump(rl.loop)
-            self.cpu.compile_loop(rl.loop.inputargs, rl.loop.operations,
-                                  rl.loop._jitcelltoken)
             # done
             self.should_fail_by = rl.should_fail_by
             self.expected = rl.expected
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -1003,14 +1003,18 @@
         # If 'index_loc' is not an immediate, then we need a 'temp_loc' that
         # is a register whose value will be destroyed.  It's fine to destroy
         # the same register as 'index_loc', but not the other ones.
-        self.rm.possibly_free_var(box_index)
         if not isinstance(index_loc, ImmedLoc):
+            # ...that is, except in a corner case where 'index_loc' would be
+            # in the same register as 'value_loc'...
+            if index_loc is not value_loc:
+                self.rm.possibly_free_var(box_index)
             tempvar = TempBox()
             temp_loc = self.rm.force_allocate_reg(tempvar, [box_base,
                                                             box_value])
             self.rm.possibly_free_var(tempvar)
         else:
             temp_loc = None
+        self.rm.possibly_free_var(box_index)
         self.rm.possibly_free_var(box_base)
         self.possibly_free_var(box_value)
         self.PerformDiscard(op, [base_loc, ofs, itemsize, fieldsize,
diff --git a/pypy/jit/backend/x86/rx86.py b/pypy/jit/backend/x86/rx86.py
--- a/pypy/jit/backend/x86/rx86.py
+++ b/pypy/jit/backend/x86/rx86.py
@@ -576,7 +576,7 @@
     J_il8 = insn(immediate(1, 'o'), '\x70', immediate(2, 'b'))
     J_il = insn('\x0F', immediate(1,'o'), '\x80', relative(2))
 
-    SET_ir = insn(rex_w, '\x0F', immediate(1,'o'),'\x90', byte_register(2), '\xC0')
+    SET_ir = insn(rex_fw, '\x0F', immediate(1,'o'),'\x90', byte_register(2), '\xC0')
 
     # The 64-bit version of this, CQO, is defined in X86_64_CodeBuilder
     CDQ = insn(rex_nw, '\x99')
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -106,7 +106,8 @@
 
 def compile_loop(metainterp, greenkey, start,
                  inputargs, jumpargs,
-                 resume_at_jump_descr, full_preamble_needed=True):
+                 resume_at_jump_descr, full_preamble_needed=True,
+                 try_disabling_unroll=False):
     """Try to compile a new procedure by closing the current history back
     to the first operation.
     """
@@ -116,6 +117,13 @@
     jitdriver_sd = metainterp.jitdriver_sd
     history = metainterp.history
 
+    enable_opts = jitdriver_sd.warmstate.enable_opts
+    if try_disabling_unroll:
+        if 'unroll' not in enable_opts:
+            return None
+        enable_opts = enable_opts.copy()
+        del enable_opts['unroll']
+
     jitcell_token = make_jitcell_token(jitdriver_sd)
     part = create_empty_loop(metainterp)
     part.inputargs = inputargs[:]
@@ -126,7 +134,7 @@
                       [ResOperation(rop.LABEL, jumpargs, None, descr=jitcell_token)]
 
     try:
-        optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+        optimize_trace(metainterp_sd, part, enable_opts)
     except InvalidLoop:
         return None
     target_token = part.operations[0].getdescr()
@@ -153,7 +161,7 @@
         jumpargs = part.operations[-1].getarglist()
 
         try:
-            optimize_trace(metainterp_sd, part, jitdriver_sd.warmstate.enable_opts)
+            optimize_trace(metainterp_sd, part, enable_opts)
         except InvalidLoop:
             return None
             
diff --git a/pypy/jit/metainterp/pyjitpl.py b/pypy/jit/metainterp/pyjitpl.py
--- a/pypy/jit/metainterp/pyjitpl.py
+++ b/pypy/jit/metainterp/pyjitpl.py
@@ -2039,8 +2039,9 @@
                     memmgr = self.staticdata.warmrunnerdesc.memory_manager
                     if memmgr:
                         if self.cancel_count > memmgr.max_unroll_loops:
-                            self.staticdata.log('cancelled too many times!')
-                            raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP)
+                            self.compile_loop_or_abort(original_boxes,
+                                                       live_arg_boxes,
+                                                       start, resumedescr)
                 self.staticdata.log('cancelled, tracing more...')
 
         # Otherwise, no loop found so far, so continue tracing.
@@ -2140,7 +2141,8 @@
                 return None
         return token
 
-    def compile_loop(self, original_boxes, live_arg_boxes, start, resume_at_jump_descr):
+    def compile_loop(self, original_boxes, live_arg_boxes, start,
+                     resume_at_jump_descr, try_disabling_unroll=False):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = original_boxes[:num_green_args]
         if not self.partial_trace:
@@ -2156,7 +2158,8 @@
             target_token = compile.compile_loop(self, greenkey, start,
                                                 original_boxes[num_green_args:],
                                                 live_arg_boxes[num_green_args:],
-                                                resume_at_jump_descr)
+                                                resume_at_jump_descr,
+                                     try_disabling_unroll=try_disabling_unroll)
             if target_token is not None:
                 assert isinstance(target_token, TargetToken)
                 self.jitdriver_sd.warmstate.attach_procedure_to_interp(greenkey, target_token.targeting_jitcell_token)
@@ -2168,6 +2171,18 @@
             jitcell_token = target_token.targeting_jitcell_token
             self.raise_continue_running_normally(live_arg_boxes, jitcell_token)
 
+    def compile_loop_or_abort(self, original_boxes, live_arg_boxes,
+                              start, resume_at_jump_descr):
+        """Called after we aborted more than 'max_unroll_loops' times.
+        As a last attempt, try to compile the loop with unrolling disabled.
+        """
+        if not self.partial_trace:
+            self.compile_loop(original_boxes, live_arg_boxes, start,
+                              resume_at_jump_descr, try_disabling_unroll=True)
+        #
+        self.staticdata.log('cancelled too many times!')
+        raise SwitchToBlackhole(Counters.ABORT_BAD_LOOP)
+
     def compile_trace(self, live_arg_boxes, resume_at_jump_descr):
         num_green_args = self.jitdriver_sd.num_green_args
         greenkey = live_arg_boxes[:num_green_args]
diff --git a/pypy/jit/metainterp/test/test_ajit.py b/pypy/jit/metainterp/test/test_ajit.py
--- a/pypy/jit/metainterp/test/test_ajit.py
+++ b/pypy/jit/metainterp/test/test_ajit.py
@@ -2734,6 +2734,35 @@
         finally:
             optimizeopt.optimize_trace = old_optimize_trace
 
+    def test_max_unroll_loops_retry_without_unroll(self):
+        from pypy.jit.metainterp.optimize import InvalidLoop
+        from pypy.jit.metainterp import optimizeopt
+        myjitdriver = JitDriver(greens = [], reds = ['n', 'i'])
+        #
+        def f(n, limit):
+            set_param(myjitdriver, 'threshold', 5)
+            set_param(myjitdriver, 'max_unroll_loops', limit)
+            i = 0
+            while i < n:
+                myjitdriver.jit_merge_point(n=n, i=i)
+                print i
+                i += 1
+            return i
+        #
+        seen = []
+        def my_optimize_trace(metainterp_sd, loop, enable_opts, *args, **kwds):
+            seen.append('unroll' in enable_opts)
+            raise InvalidLoop
+        old_optimize_trace = optimizeopt.optimize_trace
+        optimizeopt.optimize_trace = my_optimize_trace
+        try:
+            res = self.meta_interp(f, [23, 4])
+            assert res == 23
+            assert False in seen
+            assert True in seen
+        finally:
+            optimizeopt.optimize_trace = old_optimize_trace
+
     def test_retrace_limit_with_extra_guards(self):
         myjitdriver = JitDriver(greens = [], reds = ['n', 'i', 'sa', 'a',
                                                      'node'])
diff --git a/pypy/module/_cffi_backend/__init__.py b/pypy/module/_cffi_backend/__init__.py
--- a/pypy/module/_cffi_backend/__init__.py
+++ b/pypy/module/_cffi_backend/__init__.py
@@ -1,11 +1,13 @@
 from pypy.interpreter.mixedmodule import MixedModule
+from pypy.rlib import rdynload
+
 
 class Module(MixedModule):
 
     appleveldefs = {
         }
     interpleveldefs = {
-        '__version__': 'space.wrap("0.3")',
+        '__version__': 'space.wrap("0.4")',
 
         'nonstandard_integer_types': 'misc.nonstandard_integer_types',
 
@@ -27,7 +29,8 @@
         'alignof': 'func.alignof',
         'sizeof': 'func.sizeof',
         'typeof': 'func.typeof',
-        'offsetof': 'func.offsetof',
+        'typeoffsetof': 'func.typeoffsetof',
+        'rawaddressof': 'func.rawaddressof',
         '_getfields': 'func._getfields',
         'getcname': 'func.getcname',
         '_get_types': 'func._get_types',
@@ -41,3 +44,12 @@
         'FFI_DEFAULT_ABI': 'ctypefunc._get_abi(space, "FFI_DEFAULT_ABI")',
         'FFI_CDECL': 'ctypefunc._get_abi(space,"FFI_DEFAULT_ABI")',#win32 name
         }
+
+for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL",
+              "RTLD_NODELETE", "RTLD_NOLOAD", "RTLD_DEEPBIND"]:
+    if getattr(rdynload.cConfig, _name) is not None:
+        Module.interpleveldefs[_name] = 'space.wrap(%d)' % (
+            getattr(rdynload.cConfig, _name),)
+
+for _name in ["RTLD_LAZY", "RTLD_NOW", "RTLD_GLOBAL", "RTLD_LOCAL"]:
+    Module.interpleveldefs.setdefault(_name, 'space.wrap(0)')
diff --git a/pypy/module/_cffi_backend/ctypeobj.py b/pypy/module/_cffi_backend/ctypeobj.py
--- a/pypy/module/_cffi_backend/ctypeobj.py
+++ b/pypy/module/_cffi_backend/ctypeobj.py
@@ -134,14 +134,22 @@
                               "ctype '%s' is of unknown alignment",
                               self.name)
 
-    def offsetof(self, fieldname):
+    def typeoffsetof(self, fieldname):
         space = self.space
-        raise OperationError(space.w_TypeError,
-                             space.wrap("not a struct or union ctype"))
+        if fieldname is None:
+            msg = "expected a struct or union ctype"
+        else:
+            msg = "expected a struct or union ctype, or a pointer to one"
+        raise OperationError(space.w_TypeError, space.wrap(msg))
 
     def _getfields(self):
         return None
 
+    def rawaddressof(self, cdata, offset):
+        space = self.space
+        raise OperationError(space.w_TypeError,
+                             space.wrap("expected a pointer ctype"))
+
     def call(self, funcaddr, args_w):
         space = self.space
         raise operationerrfmt(space.w_TypeError,
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -52,19 +52,25 @@
         if (isinstance(ob, cdataobj.W_CData) and
                isinstance(ob.ctype, ctypeptr.W_CTypePtrOrArray)):
             value = rffi.cast(lltype.Signed, ob._cdata)
-            value = r_ulonglong(value)
+            value = self._cast_result(value)
         elif space.isinstance_w(w_ob, space.w_str):
             value = self.cast_str(w_ob)
-            value = r_ulonglong(value)
+            value = self._cast_result(value)
         elif space.isinstance_w(w_ob, space.w_unicode):
             value = self.cast_unicode(w_ob)
-            value = r_ulonglong(value)
+            value = self._cast_result(value)
         else:
-            value = misc.as_unsigned_long_long(space, w_ob, strict=False)
+            value = self._cast_generic(w_ob)
         w_cdata = cdataobj.W_CDataMem(space, self.size, self)
         w_cdata.write_raw_integer_data(value)
         return w_cdata
 
+    def _cast_result(self, intvalue):
+        return r_ulonglong(intvalue)
+
+    def _cast_generic(self, w_ob):
+        return misc.as_unsigned_long_long(self.space, w_ob, strict=False)
+
     def _overflow(self, w_ob):
         space = self.space
         s = space.str_w(space.str(w_ob))
@@ -163,13 +169,9 @@
             self.vrangemax = (r_ulonglong(1) << sh) - 1
 
     def int(self, cdata):
-        if self.value_fits_long:
-            # this case is to handle enums, but also serves as a slight
-            # performance improvement for some other primitive types
-            value = misc.read_raw_long_data(cdata, self.size)
-            return self.space.wrap(value)
-        else:
-            return self.convert_to_object(cdata)
+        # enums: really call convert_to_object() just below,
+        # and not the one overridden in W_CTypeEnum.
+        return W_CTypePrimitiveSigned.convert_to_object(self, cdata)
 
     def convert_to_object(self, cdata):
         if self.value_fits_long:
@@ -203,8 +205,11 @@
         W_CTypePrimitive.__init__(self, *args)
         self.value_fits_long = self.size < rffi.sizeof(lltype.Signed)
         if self.size < rffi.sizeof(lltype.SignedLongLong):
-            sh = self.size * 8
-            self.vrangemax = (r_ulonglong(1) << sh) - 1
+            self.vrangemax = self._compute_vrange_max()
+
+    def _compute_vrange_max(self):
+        sh = self.size * 8
+        return (r_ulonglong(1) << sh) - 1
 
     def int(self, cdata):
         return self.convert_to_object(cdata)
@@ -231,6 +236,23 @@
         return self
 
 
+class W_CTypePrimitiveBool(W_CTypePrimitiveUnsigned):
+    _attrs_ = []
+
+    def _compute_vrange_max(self):
+        return r_ulonglong(1)
+
+    def _cast_result(self, intvalue):
+        return r_ulonglong(intvalue != 0)
+
+    def _cast_generic(self, w_ob):
+        return misc.object_as_bool(self.space, w_ob)
+
+    def string(self, cdataobj, maxlen):
+        # bypass the method 'string' implemented in W_CTypePrimitive
+        return W_CType.string(self, cdataobj, maxlen)
+
+
 class W_CTypePrimitiveFloat(W_CTypePrimitive):
     _attrs_ = []
 
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -70,7 +70,8 @@
             for i in range(len(lst_w)):
                 ctitem.convert_from_object(cdata, lst_w[i])
                 cdata = rffi.ptradd(cdata, ctitem.size)
-        elif isinstance(self.ctitem, ctypeprim.W_CTypePrimitiveChar):
+        elif (self.ctitem.is_primitive_integer and
+              self.ctitem.size == rffi.sizeof(lltype.Char)):
             try:
                 s = space.str_w(w_ob)
             except OperationError, e:
@@ -274,18 +275,26 @@
             return True
         else:
             set_mustfree_flag(cdata, False)
-            try:
-                self.convert_from_object(cdata, w_ob)
-            except OperationError:
-                if (self.is_struct_ptr and isinstance(ob, cdataobj.W_CData)
-                    and ob.ctype is self.ctitem):
-                    # special case to make the life of verifier.py easier:
-                    # if the formal argument type is 'struct foo *' but
-                    # we pass a 'struct foo', then get a pointer to it
-                    rffi.cast(rffi.CCHARPP, cdata)[0] = ob._cdata
-                else:
-                    raise
+            self.convert_from_object(cdata, w_ob)
             return False
 
     def getcfield(self, attr):
         return self.ctitem.getcfield(attr)
+
+    def typeoffsetof(self, fieldname):
+        if fieldname is None:
+            return W_CTypePtrBase.typeoffsetof(self, fieldname)
+        else:
+            return self.ctitem.typeoffsetof(fieldname)
+
+    def rawaddressof(self, cdata, offset):
+        from pypy.module._cffi_backend.ctypestruct import W_CTypeStructOrUnion
+        space = self.space
+        ctype2 = cdata.ctype
+        if (isinstance(ctype2, W_CTypeStructOrUnion) or
+            (isinstance(ctype2, W_CTypePtrOrArray) and ctype2.is_struct_ptr)):
+            ptrdata = rffi.ptradd(cdata._cdata, offset)
+            return cdataobj.W_CData(space, ptrdata, self)
+        else:
+            raise OperationError(space.w_TypeError,
+                     space.wrap("expected a 'cdata struct-or-union' object"))
diff --git a/pypy/module/_cffi_backend/ctypestruct.py b/pypy/module/_cffi_backend/ctypestruct.py
--- a/pypy/module/_cffi_backend/ctypestruct.py
+++ b/pypy/module/_cffi_backend/ctypestruct.py
@@ -61,14 +61,19 @@
         keepalive_until_here(ob)
         return ob
 
-    def offsetof(self, fieldname):
+    def typeoffsetof(self, fieldname):
+        if fieldname is None:
+            return (self, 0)
         self.check_complete()
+        space = self.space
         try:
             cfield = self.fields_dict[fieldname]
         except KeyError:
-            space = self.space
             raise OperationError(space.w_KeyError, space.wrap(fieldname))
-        return cfield.offset
+        if cfield.bitshift >= 0:
+            raise OperationError(space.w_TypeError,
+                                 space.wrap("not supported for bitfields"))
+        return (cfield.ctype, cfield.offset)
 
     def _copy_from_same(self, cdata, w_ob):
         space = self.space
diff --git a/pypy/module/_cffi_backend/func.py b/pypy/module/_cffi_backend/func.py
--- a/pypy/module/_cffi_backend/func.py
+++ b/pypy/module/_cffi_backend/func.py
@@ -53,15 +53,19 @@
     align = ctype.alignof()
     return space.wrap(align)
 
- at unwrap_spec(ctype=ctypeobj.W_CType, fieldname=str)
-def offsetof(space, ctype, fieldname):
-    ofs = ctype.offsetof(fieldname)
-    return space.wrap(ofs)
+ at unwrap_spec(ctype=ctypeobj.W_CType, fieldname="str_or_None")
+def typeoffsetof(space, ctype, fieldname):
+    ctype, offset = ctype.typeoffsetof(fieldname)
+    return space.newtuple([space.wrap(ctype), space.wrap(offset)])
 
 @unwrap_spec(ctype=ctypeobj.W_CType)
 def _getfields(space, ctype):
     return ctype._getfields()
 
+ at unwrap_spec(ctype=ctypeobj.W_CType, cdata=cdataobj.W_CData, offset=int)
+def rawaddressof(space, ctype, cdata, offset=0):
+    return ctype.rawaddressof(cdata, offset)
+
 # ____________________________________________________________
 
 @unwrap_spec(ctype=ctypeobj.W_CType, replace_with=str)
diff --git a/pypy/module/_cffi_backend/libraryobj.py b/pypy/module/_cffi_backend/libraryobj.py
--- a/pypy/module/_cffi_backend/libraryobj.py
+++ b/pypy/module/_cffi_backend/libraryobj.py
@@ -5,7 +5,6 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib.rdynload import DLLHANDLE, dlopen, dlsym, dlclose, DLOpenError
-from pypy.rlib.rdynload import RTLD_GLOBAL
 
 from pypy.module._cffi_backend.cdataobj import W_CData
 from pypy.module._cffi_backend.ctypeobj import W_CType
@@ -15,20 +14,16 @@
     _immutable_ = True
     handle = rffi.cast(DLLHANDLE, 0)
 
-    def __init__(self, space, filename, is_global):
+    def __init__(self, space, filename, flags):
         self.space = space
-        if is_global and RTLD_GLOBAL is not None:
-            mode = RTLD_GLOBAL
-        else:
-            mode = -1     # default value, corresponds to RTLD_LOCAL
         with rffi.scoped_str2charp(filename) as ll_libname:
             if filename is None:
                 filename = "<None>"
             try:
-                self.handle = dlopen(ll_libname, mode)
+                self.handle = dlopen(ll_libname, flags)
             except DLOpenError, e:
                 raise operationerrfmt(space.w_OSError,
-                                      "cannot load '%s': %s",
+                                      "cannot load library %s: %s",
                                       filename, e.msg)
         self.name = filename
 
@@ -100,7 +95,7 @@
 W_Library.acceptable_as_base_class = False
 
 
- at unwrap_spec(filename="str_or_None", is_global=int)
-def load_library(space, filename, is_global=0):
-    lib = W_Library(space, filename, is_global)
+ at unwrap_spec(filename="str_or_None", flags=int)
+def load_library(space, filename, flags=0):
+    lib = W_Library(space, filename, flags)
     return space.wrap(lib)
diff --git a/pypy/module/_cffi_backend/misc.py b/pypy/module/_cffi_backend/misc.py
--- a/pypy/module/_cffi_backend/misc.py
+++ b/pypy/module/_cffi_backend/misc.py
@@ -3,7 +3,9 @@
 from pypy.rpython.lltypesystem import lltype, llmemory, rffi
 from pypy.rlib.rarithmetic import r_ulonglong
 from pypy.rlib.unroll import unrolling_iterable
+from pypy.rlib.objectmodel import keepalive_until_here
 from pypy.rlib import jit
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
 
 # ____________________________________________________________
 
@@ -121,6 +123,14 @@
 
 # ____________________________________________________________
 
+def _is_a_float(space, w_ob):
+    from pypy.module._cffi_backend.cdataobj import W_CData
+    from pypy.module._cffi_backend.ctypeprim import W_CTypePrimitiveFloat
+    ob = space.interpclass_w(w_ob)
+    if isinstance(ob, W_CData):
+        return isinstance(ob.ctype, W_CTypePrimitiveFloat)
+    return space.isinstance_w(w_ob, space.w_float)
+
 def as_long_long(space, w_ob):
     # (possibly) convert and cast a Python object to a long long.
     # This version accepts a Python int too, and does convertions from
@@ -132,7 +142,7 @@
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
-        if space.isinstance_w(w_ob, space.w_float):
+        if _is_a_float(space, w_ob):
             raise
         bigint = space.bigint_w(space.int(w_ob))
     try:
@@ -155,7 +165,7 @@
     except OperationError, e:
         if not e.match(space, space.w_TypeError):
             raise
-        if strict and space.isinstance_w(w_ob, space.w_float):
+        if strict and _is_a_float(space, w_ob):
             raise
         bigint = space.bigint_w(space.int(w_ob))
     if strict:
@@ -173,6 +183,61 @@
 
 # ____________________________________________________________
 
+class _NotStandardObject(Exception):
+    pass
+
+def _standard_object_as_bool(space, w_ob):
+    if space.isinstance_w(w_ob, space.w_int):
+        return space.int_w(w_ob) != 0
+    if space.isinstance_w(w_ob, space.w_long):
+        return space.bigint_w(w_ob).tobool()
+    if space.isinstance_w(w_ob, space.w_float):
+        return space.float_w(w_ob) != 0.0
+    raise _NotStandardObject
+
+# hackish, but the most straightforward way to know if a LONGDOUBLE object
+# contains the value 0 or not.
+eci = ExternalCompilationInfo(post_include_bits=["""
+#define pypy__is_nonnull_longdouble(x)  ((x) != 0.0)
+"""])
+is_nonnull_longdouble = rffi.llexternal(
+    "pypy__is_nonnull_longdouble", [rffi.LONGDOUBLE], lltype.Bool,
+    compilation_info=eci, _nowrapper=True, elidable_function=True,
+    sandboxsafe=True)
+
+def object_as_bool(space, w_ob):
+    # convert and cast a Python object to a boolean.  Accept an integer
+    # or a float object, up to a CData 'long double'.
+    try:
+        return _standard_object_as_bool(space, w_ob)
+    except _NotStandardObject:
+        pass
+    #
+    from pypy.module._cffi_backend.cdataobj import W_CData
+    from pypy.module._cffi_backend.ctypeprim import W_CTypePrimitiveFloat
+    from pypy.module._cffi_backend.ctypeprim import W_CTypePrimitiveLongDouble
+    ob = space.interpclass_w(w_ob)
+    is_cdata = isinstance(ob, W_CData)
+    if is_cdata and isinstance(ob.ctype, W_CTypePrimitiveFloat):
+        if isinstance(ob.ctype, W_CTypePrimitiveLongDouble):
+            result = is_nonnull_longdouble(read_raw_longdouble_data(ob._cdata))
+        else:
+            result = read_raw_float_data(ob._cdata, ob.ctype.size) != 0.0
+        keepalive_until_here(ob)
+        return result
+    #
+    if not is_cdata and space.lookup(w_ob, '__float__') is not None:
+        w_io = space.float(w_ob)
+    else:
+        w_io = space.int(w_ob)
+    try:
+        return _standard_object_as_bool(space, w_io)
+    except _NotStandardObject:
+        raise OperationError(space.w_TypeError,
+                             space.wrap("integer/float expected"))
+
+# ____________________________________________________________
+
 def _raw_memcopy(source, dest, size):
     if jit.isconstant(size):
         # for the JIT: first handle the case where 'size' is known to be
diff --git a/pypy/module/_cffi_backend/newtype.py b/pypy/module/_cffi_backend/newtype.py
--- a/pypy/module/_cffi_backend/newtype.py
+++ b/pypy/module/_cffi_backend/newtype.py
@@ -38,6 +38,7 @@
 eptype("float",  rffi.FLOAT,  ctypeprim.W_CTypePrimitiveFloat)
 eptype("double", rffi.DOUBLE, ctypeprim.W_CTypePrimitiveFloat)
 eptype("long double", rffi.LONGDOUBLE, ctypeprim.W_CTypePrimitiveLongDouble)
+eptype("_Bool",  lltype.Bool,          ctypeprim.W_CTypePrimitiveBool)
 
 @unwrap_spec(name=str)
 def new_primitive_type(space, name):
diff --git a/pypy/module/_cffi_backend/test/_backend_test_c.py b/pypy/module/_cffi_backend/test/_backend_test_c.py
--- a/pypy/module/_cffi_backend/test/_backend_test_c.py
+++ b/pypy/module/_cffi_backend/test/_backend_test_c.py
@@ -42,19 +42,34 @@
     return sizeof(BPtr)
 
 
-def find_and_load_library(name, is_global=0):
+def find_and_load_library(name, flags=RTLD_NOW):
     import ctypes.util
     if name is None:
         path = None
     else:
         path = ctypes.util.find_library(name)
-    return load_library(path, is_global)
+    return load_library(path, flags)
 
 def test_load_library():
     x = find_and_load_library('c')
     assert repr(x).startswith("<clibrary '")
-    x = find_and_load_library('c', 1)
+    x = find_and_load_library('c', RTLD_NOW | RTLD_GLOBAL)
     assert repr(x).startswith("<clibrary '")
+    x = find_and_load_library('c', RTLD_LAZY)
+    assert repr(x).startswith("<clibrary '")
+
+def test_all_rtld_symbols():
+    import sys
+    FFI_DEFAULT_ABI        # these symbols must be defined
+    FFI_CDECL
+    RTLD_LAZY
+    RTLD_NOW
+    RTLD_GLOBAL
+    RTLD_LOCAL
+    if sys.platform.startswith("linux"):
+        RTLD_NODELETE
+        RTLD_NOLOAD
+        RTLD_DEEPBIND
 
 def test_nonstandard_integer_types():
     d = nonstandard_integer_types()
@@ -741,6 +756,8 @@
     assert repr(s.a1).startswith("<cdata 'int[5]' 0x")
 
 def test_offsetof():
+    def offsetof(BType, fieldname):
+        return typeoffsetof(BType, fieldname)[1]
     BInt = new_primitive_type("int")
     BStruct = new_struct_type("foo")
     py.test.raises(TypeError, offsetof, BInt, "abc")
@@ -749,6 +766,7 @@
     assert offsetof(BStruct, 'abc') == 0
     assert offsetof(BStruct, 'def') == size_of_int()
     py.test.raises(KeyError, offsetof, BStruct, "ghi")
+    assert offsetof(new_pointer_type(BStruct), "def") == size_of_int()
 
 def test_function_type():
     BInt = new_primitive_type("int")
@@ -888,11 +906,8 @@
     BFunc20 = new_function_type((BStructPtr,), BShort, False)
     f = cast(BFunc20, _testfunc(20))
     x = newp(BStructPtr, {'a1': b'A', 'a2': -4042})
-    # test the exception that allows us to pass a 'struct foo' where the
-    # function really expects a 'struct foo *'.
-    res = f(x[0])
-    assert res == -4042 + ord(b'A')
-    assert res == f(x)
+    # can't pass a 'struct foo'
+    py.test.raises(TypeError, f, x[0])
 
 def test_call_function_21():
     BInt = new_primitive_type("int")
@@ -2076,3 +2091,116 @@
     assert not isinstance(nullchr, CType)
     assert not isinstance(chrref, CType)
     assert isinstance(BChar, CType)
+
+def test_no_cdata_float():
+    BInt = new_primitive_type("int")
+    BIntP = new_pointer_type(BInt)
+    BUInt = new_primitive_type("unsigned int")
+    BUIntP = new_pointer_type(BUInt)
+    BFloat = new_primitive_type("float")
+    py.test.raises(TypeError, newp, BIntP, cast(BFloat, 0.0))
+    py.test.raises(TypeError, newp, BUIntP, cast(BFloat, 0.0))
+
+def test_bool():
+    BBool = new_primitive_type("_Bool")
+    BBoolP = new_pointer_type(BBool)
+    assert int(cast(BBool, False)) == 0
+    assert int(cast(BBool, True)) == 1
+    assert bool(cast(BBool, False)) is True    # warning!
+    assert int(cast(BBool, 3)) == 1
+    assert int(cast(BBool, long(3))) == 1
+    assert int(cast(BBool, long(10)**4000)) == 1
+    assert int(cast(BBool, -0.1)) == 1
+    assert int(cast(BBool, -0.0)) == 0
+    assert int(cast(BBool, '\x00')) == 0
+    assert int(cast(BBool, '\xff')) == 1
+    assert newp(BBoolP, False)[0] == 0
+    assert newp(BBoolP, True)[0] == 1
+    assert newp(BBoolP, 0)[0] == 0
+    assert newp(BBoolP, 1)[0] == 1
+    py.test.raises(TypeError, newp, BBoolP, 1.0)
+    py.test.raises(TypeError, newp, BBoolP, '\x00')
+    py.test.raises(OverflowError, newp, BBoolP, 2)
+    py.test.raises(OverflowError, newp, BBoolP, -1)
+    BCharP = new_pointer_type(new_primitive_type("char"))
+    p = newp(BCharP, 'X')
+    q = cast(BBoolP, p)
+    assert q[0] == ord('X')
+    py.test.raises(TypeError, string, cast(BBool, False))
+    BDouble = new_primitive_type("double")
+    assert int(cast(BBool, cast(BDouble, 0.1))) == 1
+    assert int(cast(BBool, cast(BDouble, 0.0))) == 0
+
+def test_typeoffsetof():
+    BChar = new_primitive_type("char")
+    BStruct = new_struct_type("foo")
+    BStructPtr = new_pointer_type(BStruct)
+    complete_struct_or_union(BStruct, [('a1', BChar, -1),
+                                       ('a2', BChar, -1),
+                                       ('a3', BChar, -1)])
+    py.test.raises(TypeError, typeoffsetof, BStructPtr, None)
+    assert typeoffsetof(BStruct, None) == (BStruct, 0)
+    assert typeoffsetof(BStructPtr, 'a1') == (BChar, 0)
+    assert typeoffsetof(BStruct, 'a1') == (BChar, 0)
+    assert typeoffsetof(BStructPtr, 'a2') == (BChar, 1)
+    assert typeoffsetof(BStruct, 'a3') == (BChar, 2)
+    py.test.raises(KeyError, typeoffsetof, BStructPtr, 'a4')
+    py.test.raises(KeyError, typeoffsetof, BStruct, 'a5')
+
+def test_typeoffsetof_no_bitfield():
+    BInt = new_primitive_type("int")
+    BStruct = new_struct_type("foo")
+    complete_struct_or_union(BStruct, [('a1', BInt, 4)])
+    py.test.raises(TypeError, typeoffsetof, BStruct, 'a1')
+
+def test_rawaddressof():
+    BChar = new_primitive_type("char")
+    BCharP = new_pointer_type(BChar)
+    BStruct = new_struct_type("foo")
+    BStructPtr = new_pointer_type(BStruct)
+    complete_struct_or_union(BStruct, [('a1', BChar, -1),
+                                       ('a2', BChar, -1),
+                                       ('a3', BChar, -1)])
+    p = newp(BStructPtr)
+    assert repr(p) == "<cdata 'struct foo *' owning 3 bytes>"
+    s = p[0]
+    assert repr(s) == "<cdata 'struct foo' owning 3 bytes>"
+    a = rawaddressof(BStructPtr, s)
+    assert repr(a).startswith("<cdata 'struct foo *' 0x")
+    py.test.raises(TypeError, rawaddressof, BStruct, s)
+    b = rawaddressof(BCharP, s)
+    assert b == cast(BCharP, p)
+    c = rawaddressof(BStructPtr, a)
+    assert c == a
+    py.test.raises(TypeError, rawaddressof, BStructPtr, cast(BChar, '?'))
+    #
+    d = rawaddressof(BCharP, s, 1)
+    assert d == cast(BCharP, p) + 1
+
+def test_newp_signed_unsigned_char():
+    BCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("char")), None)
+    p = newp(BCharArray, b"foo")
+    assert len(p) == 4
+    assert list(p) == [b"f", b"o", b"o", b"\x00"]
+    #
+    BUCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("unsigned char")), None)
+    p = newp(BUCharArray, b"fo\xff")
+    assert len(p) == 4
+    assert list(p) == [ord("f"), ord("o"), 0xff, 0]
+    #
+    BSCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("signed char")), None)
+    p = newp(BSCharArray, b"fo\xff")
+    assert len(p) == 4
+    assert list(p) == [ord("f"), ord("o"), -1, 0]
+
+def test_newp_from_bytearray_doesnt_work():
+    BCharArray = new_array_type(
+        new_pointer_type(new_primitive_type("char")), None)
+    py.test.raises(TypeError, newp, BCharArray, bytearray(b"foo"))
+    p = newp(BCharArray, 4)
+    buffer(p)[:] = bytearray(b"foo\x00")
+    assert len(p) == 4
+    assert list(p) == [b"f", b"o", b"o", b"\x00"]
diff --git a/pypy/module/_cffi_backend/test/test_c.py b/pypy/module/_cffi_backend/test/test_c.py
--- a/pypy/module/_cffi_backend/test/test_c.py
+++ b/pypy/module/_cffi_backend/test/test_c.py
@@ -22,7 +22,6 @@
 from pypy.tool.udir import udir
 from pypy.conftest import gettestobjspace, option
 from pypy.interpreter import gateway
-from pypy.module._cffi_backend.test import _backend_test_c
 from pypy.module._cffi_backend import Module
 from pypy.translator.platform import host
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
@@ -87,20 +86,24 @@
 
 all_names = ', '.join(Module.interpleveldefs.keys())
 
+backend_test_c = py.path.local(__file__).join('..', '_backend_test_c.py')
+
 lst = []
-for name, value in _backend_test_c.__dict__.items():
-    if name.startswith('test_'):
-        lst.append(value)
-lst.sort(key=lambda func: func.func_code.co_firstlineno)
+with backend_test_c.open('r') as f:
+    for line in f:
+        if line.startswith('def test_'):
+            line = line[4:]
+            line = line[:line.index('():')]
+            lst.append(line)
 
 tmpdir = udir.join('test_c').ensure(dir=1)
 
 tmpname = tmpdir.join('_test_c.py')
 with tmpname.open('w') as f:
     for func in lst:
-        print >> f, 'def %s(self):' % (func.__name__,)
+        print >> f, 'def %s(self):' % (func,)
         print >> f, '    import _all_test_c'
-        print >> f, '    _all_test_c.%s()' % (func.__name__,)
+        print >> f, '    _all_test_c.%s()' % (func,)
 
 tmpname2 = tmpdir.join('_all_test_c.py')
 with tmpname2.open('w') as f:
@@ -110,7 +113,7 @@
     print >> f, '    class test:'
     print >> f, '        raises = staticmethod(raises)'
     print >> f, '        skip = staticmethod(skip)'
-    print >> f, py.path.local(__file__).join('..', '_backend_test_c.py').read()
+    print >> f, backend_test_c.read()
 
 
 mod = tmpname.pyimport()
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -405,7 +405,6 @@
          "ascii_encode",
          "latin_1_encode",
          "utf_7_encode",
-         "utf_8_encode",
          "utf_16_encode",
          "utf_16_be_encode",
          "utf_16_le_encode",
@@ -422,7 +421,6 @@
          "ascii_decode",
          "latin_1_decode",
          "utf_7_decode",
-         "utf_8_decode",
          "utf_16_decode",
          "utf_16_be_decode",
          "utf_16_le_decode",
@@ -437,6 +435,30 @@
     make_encoder_wrapper('mbcs_encode')
     make_decoder_wrapper('mbcs_decode')
 
+# utf-8 functions are not regular, because we have to pass
+# "allow_surrogates=True"
+ at unwrap_spec(uni=unicode, errors='str_or_None')
+def utf_8_encode(space, uni, errors="strict"):
+    if errors is None:
+        errors = 'strict'
+    state = space.fromcache(CodecState)
+    result = runicode.unicode_encode_utf_8(
+        uni, len(uni), errors, state.encode_error_handler,
+        allow_surrogates=True)
+    return space.newtuple([space.wrap(result), space.wrap(len(uni))])
+
+ at unwrap_spec(string='bufferstr', errors='str_or_None')
+def utf_8_decode(space, string, errors="strict", w_final=False):
+    if errors is None:
+        errors = 'strict'
+    final = space.is_true(w_final)
+    state = space.fromcache(CodecState)
+    result, consumed = runicode.str_decode_utf_8(
+        string, len(string), errors,
+        final, state.decode_error_handler,
+        allow_surrogates=True)
+    return space.newtuple([space.wrap(result), space.wrap(consumed)])
+
 @unwrap_spec(data=str, errors='str_or_None', byteorder=int)
 def utf_16_ex_decode(space, data, errors='strict', byteorder=0, w_final=False):
     if errors is None:
diff --git a/pypy/module/_csv/__init__.py b/pypy/module/_csv/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/__init__.py
@@ -0,0 +1,87 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+
+class Module(MixedModule):
+    """CSV parsing and writing.
+
+This module provides classes that assist in the reading and writing
+of Comma Separated Value (CSV) files, and implements the interface
+described by PEP 305.  Although many CSV files are simple to parse,
+the format is not formally defined by a stable specification and
+is subtle enough that parsing lines of a CSV file with something
+like line.split(\",\") is bound to fail.  The module supports three
+basic APIs: reading, writing, and registration of dialects.
+
+
+DIALECT REGISTRATION:
+
+Readers and writers support a dialect argument, which is a convenient
+handle on a group of settings.  When the dialect argument is a string,
+it identifies one of the dialects previously registered with the module.
+If it is a class or instance, the attributes of the argument are used as
+the settings for the reader or writer:
+
+    class excel:
+        delimiter = ','
+        quotechar = '\"'
+        escapechar = None
+        doublequote = True
+        skipinitialspace = False
+        lineterminator = '\\r\\n'
+        quoting = QUOTE_MINIMAL
+
+SETTINGS:
+
+    * quotechar - specifies a one-character string to use as the 
+        quoting character.  It defaults to '\"'.
+    * delimiter - specifies a one-character string to use as the 
+        field separator.  It defaults to ','.
+    * skipinitialspace - specifies how to interpret whitespace which
+        immediately follows a delimiter.  It defaults to False, which
+        means that whitespace immediately following a delimiter is part
+        of the following field.
+    * lineterminator -  specifies the character sequence which should 
+        terminate rows.
+    * quoting - controls when quotes should be generated by the writer.
+        It can take on any of the following module constants:
+
+        csv.QUOTE_MINIMAL means only when required, for example, when a
+            field contains either the quotechar or the delimiter
+        csv.QUOTE_ALL means that quotes are always placed around fields.
+        csv.QUOTE_NONNUMERIC means that quotes are always placed around
+            fields which do not parse as integers or floating point
+            numbers.
+        csv.QUOTE_NONE means that quotes are never placed around fields.
+    * escapechar - specifies a one-character string used to escape 
+        the delimiter when quoting is set to QUOTE_NONE.
+    * doublequote - controls the handling of quotes inside fields.  When
+        True, two consecutive quotes are interpreted as one during read,
+        and when writing, each quote character embedded in the data is
+        written as two quotes.
+"""
+
+    appleveldefs = {
+        'register_dialect':   'app_csv.register_dialect',
+        'unregister_dialect': 'app_csv.unregister_dialect',
+        'get_dialect':        'app_csv.get_dialect',
+        'list_dialects':      'app_csv.list_dialects',
+        '_dialects':          'app_csv._dialects',
+
+        'Error':              'app_csv.Error',
+        }
+
+    interpleveldefs = {
+        '__version__':      'space.wrap("1.0")',
+
+        'QUOTE_MINIMAL':    'space.wrap(interp_csv.QUOTE_MINIMAL)',
+        'QUOTE_ALL':        'space.wrap(interp_csv.QUOTE_ALL)',
+        'QUOTE_NONNUMERIC': 'space.wrap(interp_csv.QUOTE_NONNUMERIC)',
+        'QUOTE_NONE':       'space.wrap(interp_csv.QUOTE_NONE)',
+
+        'Dialect': 'interp_csv.W_Dialect',
+
+        'reader': 'interp_reader.csv_reader',
+        'field_size_limit': 'interp_reader.csv_field_size_limit',
+
+        'writer': 'interp_writer.csv_writer',
+        }
diff --git a/pypy/module/_csv/app_csv.py b/pypy/module/_csv/app_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/app_csv.py
@@ -0,0 +1,33 @@
+import _csv
+
+class Error(Exception):
+    pass
+
+
+_dialects = {}
+
+def register_dialect(name, dialect=None, **kwargs):
+    """Create a mapping from a string name to a dialect class."""
+    if not isinstance(name, basestring):
+        raise TypeError("dialect name must be a string or unicode")
+
+    dialect = _csv.Dialect(dialect, **kwargs)
+    _dialects[name] = dialect
+
+def unregister_dialect(name):
+    """Delete the name/dialect mapping associated with a string name."""
+    try:
+        del _dialects[name]
+    except KeyError:
+        raise Error("unknown dialect")
+
+def get_dialect(name):
+    """Return the dialect instance associated with name."""
+    try:
+        return _dialects[name]
+    except KeyError:
+        raise Error("unknown dialect")
+
+def list_dialects():
+    """Return a list of all know dialect names."""
+    return list(_dialects)
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/interp_csv.py
@@ -0,0 +1,175 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
+from pypy.interpreter.typedef import GetSetProperty
+from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
+
+
+QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE = range(4)
+
+
+class W_Dialect(Wrappable):
+    _immutable_fields_ = [
+        "dialect",
+        "delimiter",
+        "doublequote",
+        "escapechar",
+        "lineterminator",
+        "quotechar",
+        "quoting",
+        "skipinitialspace",
+        "strict",
+        ]
+
+def _fetch(space, w_dialect, name):
+    return space.findattr(w_dialect, space.wrap(name))
+
+def _get_bool(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.is_true(w_src)
+
+def _get_int(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.int_w(w_src)
+
+def _get_str(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.str_w(w_src)
+
+def _get_char(space, w_src, default, name):
+    if w_src is None:
+        return default
+    if space.is_w(w_src, space.w_None):
+        return '\0'
+    src = space.str_w(w_src)
+    if len(src) == 1:
+        return src[0]
+    if len(src) == 0:
+        return '\0'
+    raise operationerrfmt(space.w_TypeError,
+                          '"%s" must be a 1-character string', name)
+
+def _build_dialect(space, w_dialect, w_delimiter, w_doublequote,
+                   w_escapechar, w_lineterminator, w_quotechar, w_quoting,
+                   w_skipinitialspace, w_strict):
+    if w_dialect is not None:
+        if space.isinstance_w(w_dialect, space.w_basestring):
+            w_module = space.getbuiltinmodule('_csv')
+            w_dialect = space.call_method(w_module, 'get_dialect', w_dialect)
+
+        dialect = space.interpclass_w(w_dialect)
+        if (isinstance(dialect, W_Dialect) and
+            w_delimiter is None and
+            w_doublequote is None and
+            w_escapechar is None and
+            w_lineterminator is None and
+            w_quotechar is None and
+            w_quoting is None and
+            w_skipinitialspace is None and
+            w_strict is None):
+            return dialect
+
+        if w_delimiter is None:
+            w_delimiter = _fetch(space, w_dialect, 'delimiter')
+        if w_doublequote is None:
+            w_doublequote = _fetch(space, w_dialect, 'doublequote')
+        if w_escapechar is None:
+            w_escapechar = _fetch(space, w_dialect, 'escapechar')
+        if w_lineterminator is None:
+            w_lineterminator = _fetch(space, w_dialect, 'lineterminator')
+        if w_quotechar is None:
+            w_quotechar = _fetch(space, w_dialect, 'quotechar')
+        if w_quoting is None:
+            w_quoting = _fetch(space, w_dialect, 'quoting')
+        if w_skipinitialspace is None:
+            w_skipinitialspace = _fetch(space, w_dialect, 'skipinitialspace')
+        if w_strict is None:
+            w_strict = _fetch(space, w_dialect, 'strict')
+
+    dialect = W_Dialect()
+    dialect.delimiter = _get_char(space, w_delimiter, ',', 'delimiter')
+    dialect.doublequote = _get_bool(space, w_doublequote, True)
+    dialect.escapechar = _get_char(space, w_escapechar, '\0', 'escapechar')
+    dialect.lineterminator = _get_str(space, w_lineterminator, '\r\n')
+    dialect.quotechar = _get_char(space, w_quotechar, '"', 'quotechar')
+    tmp_quoting = _get_int(space, w_quoting, QUOTE_MINIMAL)
+    dialect.skipinitialspace = _get_bool(space, w_skipinitialspace, False)
+    dialect.strict = _get_bool(space, w_strict, False)
+
+    # validate options
+    if not (0 <= tmp_quoting < 4):
+        raise OperationError(space.w_TypeError,
+                             space.wrap('bad "quoting" value'))
+
+    if dialect.delimiter == '\0':
+        raise OperationError(space.w_TypeError,
+                             space.wrap('delimiter must be set'))
+
+    if space.is_w(w_quotechar, space.w_None) and w_quoting is None:
+        tmp_quoting = QUOTE_NONE
+    if tmp_quoting != QUOTE_NONE and dialect.quotechar == '\0':
+        raise OperationError(space.w_TypeError,
+                        space.wrap('quotechar must be set if quoting enabled'))
+    dialect.quoting = tmp_quoting
+    return dialect
+
+def W_Dialect___new__(space, w_subtype, w_dialect = NoneNotWrapped,
+                      w_delimiter        = NoneNotWrapped,
+                      w_doublequote      = NoneNotWrapped,
+                      w_escapechar       = NoneNotWrapped,
+                      w_lineterminator   = NoneNotWrapped,
+                      w_quotechar        = NoneNotWrapped,
+                      w_quoting          = NoneNotWrapped,
+                      w_skipinitialspace = NoneNotWrapped,
+                      w_strict           = NoneNotWrapped,
+                      ):
+    dialect = _build_dialect(space, w_dialect, w_delimiter, w_doublequote,
+                             w_escapechar, w_lineterminator, w_quotechar,
+                             w_quoting, w_skipinitialspace, w_strict)
+    if space.is_w(w_subtype, space.gettypeobject(W_Dialect.typedef)):
+        return space.wrap(dialect)
+    else:
+        subdialect = space.allocate_instance(W_Dialect, w_subtype)
+        subdialect.delimiter        = dialect.delimiter
+        subdialect.doublequote      = dialect.doublequote
+        subdialect.escapechar       = dialect.escapechar
+        subdialect.lineterminator   = dialect.lineterminator
+        subdialect.quotechar        = dialect.quotechar
+        subdialect.quoting          = dialect.quoting
+        subdialect.skipinitialspace = dialect.skipinitialspace
+        subdialect.strict           = dialect.strict
+        return space.wrap(subdialect)
+
+
+def _get_escapechar(space, dialect):
+    if dialect.escapechar == '\0':
+        return space.w_None
+    return space.wrap(dialect.escapechar)
+
+def _get_quotechar(space, dialect):
+    if dialect.quotechar == '\0':
+        return space.w_None
+    return space.wrap(dialect.quotechar)
+
+
+W_Dialect.typedef = TypeDef(
+        'Dialect',
+        __module__ = '_csv',
+        __new__ = interp2app(W_Dialect___new__),
+
+        delimiter        = interp_attrproperty('delimiter', W_Dialect),
+        doublequote      = interp_attrproperty('doublequote', W_Dialect),
+        escapechar       = GetSetProperty(_get_escapechar, cls=W_Dialect),
+        lineterminator   = interp_attrproperty('lineterminator', W_Dialect),
+        quotechar        = GetSetProperty(_get_quotechar, cls=W_Dialect),
+        quoting          = interp_attrproperty('quoting', W_Dialect),
+        skipinitialspace = interp_attrproperty('skipinitialspace', W_Dialect),
+        strict           = interp_attrproperty('strict', W_Dialect),
+
+        __doc__ = """CSV dialect
+
+The Dialect type records CSV parsing and generation options.
+""")
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/interp_reader.py
@@ -0,0 +1,263 @@
+from pypy.rlib.rstring import StringBuilder
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError
+from pypy.interpreter.gateway import NoneNotWrapped, unwrap_spec
+from pypy.interpreter.typedef import TypeDef, interp2app
+from pypy.interpreter.typedef import interp_attrproperty_w, interp_attrproperty
+from pypy.module._csv.interp_csv import _build_dialect
+from pypy.module._csv.interp_csv import (QUOTE_MINIMAL, QUOTE_ALL,
+                                         QUOTE_NONNUMERIC, QUOTE_NONE)
+
+(START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
+ IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
+ EAT_CRNL) = range(8)
+
+
+class W_Reader(Wrappable):
+
+    def __init__(self, space, dialect, w_iter):
+        self.space = space
+        self.dialect = dialect
+        self.w_iter = w_iter
+        self.line_num = 0
+
+    def iter_w(self):
+        return self.space.wrap(self)
+
+    def error(self, msg):
+        space = self.space
+        msg = 'line %d: %s' % (self.line_num, msg)
+        w_module = space.getbuiltinmodule('_csv')
+        w_error = space.getattr(w_module, space.wrap('Error'))
+        raise OperationError(w_error, space.wrap(msg))
+    error._dont_inline_ = True
+
+    def add_char(self, field_builder, c):
+        assert field_builder is not None
+        if field_builder.getlength() >= field_limit.limit:
+            raise self.error("field larger than field limit")
+        field_builder.append(c)
+
+    def save_field(self, field_builder):
+        field = field_builder.build()
+        if self.numeric_field:
+            from pypy.objspace.std.strutil import ParseStringError
+            from pypy.objspace.std.strutil import string_to_float
+            self.numeric_field = False
+            try:
+                ff = string_to_float(field)
+            except ParseStringError, e:
+                raise OperationError(self.space.w_ValueError,
+                                     self.space.wrap(e.msg))
+            w_obj = self.space.wrap(ff)
+        else:
+            w_obj = self.space.wrap(field)
+        self.fields_w.append(w_obj)
+
+    def next_w(self):
+        space = self.space
+        dialect = self.dialect
+        self.fields_w = []
+        self.numeric_field = False
+        field_builder = None  # valid iff state not in [START_RECORD, EAT_CRNL]
+        state = START_RECORD
+        #
+        while True:
+            try:
+                w_line = space.next(self.w_iter)
+            except OperationError, e:
+                if e.match(space, space.w_StopIteration):
+                    if field_builder is not None:
+                        raise self.error("newline inside string")
+                raise
+            self.line_num += 1
+            line = space.str_w(w_line)
+            for c in line:
+                if c == '\0':
+                    raise self.error("line contains NULL byte")
+
+                if state == START_RECORD:
+                    if c == '\n' or c == '\r':
+                        state = EAT_CRNL
+                        continue
+                    # normal character - handle as START_FIELD
+                    state = START_FIELD
+                    # fall-through to the next case
+
+                if state == START_FIELD:
+                    field_builder = StringBuilder(64)
+                    # expecting field
+                    if c == '\n' or c == '\r':
+                        # save empty field
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif (c == dialect.quotechar and
+                              dialect.quoting != QUOTE_NONE):
+                        # start quoted field
+                        state = IN_QUOTED_FIELD
+                    elif c == dialect.escapechar:
+                        # possible escaped character
+                        state = ESCAPED_CHAR
+                    elif c == ' ' and dialect.skipinitialspace:
+                        # ignore space at start of field
+                        pass
+                    elif c == dialect.delimiter:
+                        # save empty field
+                        self.save_field(field_builder)
+                    else:
+                        # begin new unquoted field
+                        if dialect.quoting == QUOTE_NONNUMERIC:
+                            self.numeric_field = True
+                        self.add_char(field_builder, c)
+                        state = IN_FIELD
+
+                elif state == ESCAPED_CHAR:
+                    self.add_char(field_builder, c)
+                    state = IN_FIELD
+
+                elif state == IN_FIELD:
+                    # in unquoted field
+                    if c == '\n' or c == '\r':
+                        # end of line
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif c == dialect.escapechar:
+                        # possible escaped character
+                        state = ESCAPED_CHAR
+                    elif c == dialect.delimiter:
+                        # save field - wait for new field
+                        self.save_field(field_builder)
+                        state = START_FIELD
+                    else:
+                        # normal character - save in field
+                        self.add_char(field_builder, c)
+
+                elif state == IN_QUOTED_FIELD:
+                    # in quoted field
+                    if c == dialect.escapechar:
+                        # Possible escape character
+                        state = ESCAPE_IN_QUOTED_FIELD
+                    elif (c == dialect.quotechar and
+                              dialect.quoting != QUOTE_NONE):
+                        if dialect.doublequote:
+                            # doublequote; " represented by ""
+                            state = QUOTE_IN_QUOTED_FIELD
+                        else:
+                            # end of quote part of field
+                            state = IN_FIELD
+                    else:
+                        # normal character - save in field
+                        self.add_char(field_builder, c)
+
+                elif state == ESCAPE_IN_QUOTED_FIELD:
+                    self.add_char(field_builder, c)
+                    state = IN_QUOTED_FIELD
+
+                elif state == QUOTE_IN_QUOTED_FIELD:
+                    # doublequote - seen a quote in an quoted field
+                    if (dialect.quoting != QUOTE_NONE and
+                            c == dialect.quotechar):
+                        # save "" as "
+                        self.add_char(field_builder, c)
+                        state = IN_QUOTED_FIELD
+                    elif c == dialect.delimiter:
+                        # save field - wait for new field
+                        self.save_field(field_builder)
+                        state = START_FIELD
+                    elif c == '\n' or c == '\r':
+                        # end of line
+                        self.save_field(field_builder)
+                        state = EAT_CRNL
+                    elif not dialect.strict:
+                        self.add_char(field_builder, c)


More information about the pypy-commit mailing list