[pypy-commit] pypy arm-backend-2: merge up to d05a7437ee20

bivab noreply at buildbot.pypy.org
Fri Jul 1 16:15:58 CEST 2011

Author: David Schneider <david.schneider at picle.org>
Branch: arm-backend-2
Changeset: r45230:04530d3561b1
Date: 2011-07-01 15:16 +0200

Log:	merge up to d05a7437ee20

diff --git a/lib-python/conftest.py b/lib-python/conftest.py
--- a/lib-python/conftest.py
+++ b/lib-python/conftest.py
@@ -61,7 +61,7 @@
                                  usemodules = '',
         self.basename = basename 
-        self._usemodules = usemodules.split()
+        self._usemodules = usemodules.split() + ['signal']
         self._compiler = compiler 
         self.core = core
         self.skip = skip
@@ -154,17 +154,17 @@
     RegrTest('test_codeccallbacks.py', core=True),
-    RegrTest('test_codecencodings_cn.py', skip="encodings not available"),
-    RegrTest('test_codecencodings_hk.py', skip="encodings not available"),
-    RegrTest('test_codecencodings_jp.py', skip="encodings not available"),
-    RegrTest('test_codecencodings_kr.py', skip="encodings not available"),
-    RegrTest('test_codecencodings_tw.py', skip="encodings not available"),
+    RegrTest('test_codecencodings_cn.py'),
+    RegrTest('test_codecencodings_hk.py'),
+    RegrTest('test_codecencodings_jp.py'),
+    RegrTest('test_codecencodings_kr.py'),
+    RegrTest('test_codecencodings_tw.py'),
-    RegrTest('test_codecmaps_cn.py', skip="encodings not available"),
-    RegrTest('test_codecmaps_hk.py', skip="encodings not available"),
-    RegrTest('test_codecmaps_jp.py', skip="encodings not available"),
-    RegrTest('test_codecmaps_kr.py', skip="encodings not available"),
-    RegrTest('test_codecmaps_tw.py', skip="encodings not available"),
+    RegrTest('test_codecmaps_cn.py'),
+    RegrTest('test_codecmaps_hk.py'),
+    RegrTest('test_codecmaps_jp.py'),
+    RegrTest('test_codecmaps_kr.py'),
+    RegrTest('test_codecmaps_tw.py'),
     RegrTest('test_codecs.py', core=True),
     RegrTest('test_codeop.py', core=True),
     RegrTest('test_coercion.py', core=True),
@@ -314,7 +314,7 @@
     RegrTest('test_module.py', core=True),
-    RegrTest('test_multibytecodec.py', skip="unsupported codecs"),
+    RegrTest('test_multibytecodec.py'),
     RegrTest('test_multibytecodec_support.py', skip="not a test"),
     RegrTest('test_multiprocessing.py', skip='FIXME leaves subprocesses'),
@@ -400,7 +400,7 @@
     RegrTest('test_softspace.py', core=True),
     RegrTest('test_sort.py', core=True),
-    RegrTest('test_ssl.py'),
+    RegrTest('test_ssl.py', usemodules='_ssl _socket select'),
     RegrTest('test_str.py', core=True),
diff --git a/lib-python/modified-2.7/site.py b/lib-python/modified-2.7/site.py
--- a/lib-python/modified-2.7/site.py
+++ b/lib-python/modified-2.7/site.py
@@ -454,10 +454,10 @@
     __builtin__.copyright = _Printer("copyright", sys.copyright)
     __builtin__.credits = _Printer(
-        "PyPy is maintained by the PyPy developers: http://codespeak.net/pypy")
+        "PyPy is maintained by the PyPy developers: http://pypy.org/")
     __builtin__.license = _Printer(
-        "See http://codespeak.net/svn/pypy/dist/LICENSE")
+        "See https://bitbucket.org/pypy/pypy/src/default/LICENSE")
diff --git a/lib-python/modified-2.7/test/test_codecs.py b/lib-python/modified-2.7/test/test_codecs.py
deleted file mode 100644
--- a/lib-python/modified-2.7/test/test_codecs.py
+++ /dev/null
@@ -1,1615 +0,0 @@
-from test import test_support
-import unittest
-import codecs
-import sys, StringIO, _testcapi
-class Queue(object):
-    """
-    queue: write bytes at one end, read bytes from the other end
-    """
-    def __init__(self):
-        self._buffer = ""
-    def write(self, chars):
-        self._buffer += chars
-    def read(self, size=-1):
-        if size<0:
-            s = self._buffer
-            self._buffer = ""
-            return s
-        else:
-            s = self._buffer[:size]
-            self._buffer = self._buffer[size:]
-            return s
-class ReadTest(unittest.TestCase):
-    def check_partial(self, input, partialresults):
-        # get a StreamReader for the encoding and feed the bytestring version
-        # of input to the reader byte by byte. Read everything available from
-        # the StreamReader and check that the results equal the appropriate
-        # entries from partialresults.
-        q = Queue()
-        r = codecs.getreader(self.encoding)(q)
-        result = u""
-        for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
-            q.write(c)
-            result += r.read()
-            self.assertEqual(result, partialresult)
-        # check that there's nothing left in the buffers
-        self.assertEqual(r.read(), u"")
-        self.assertEqual(r.bytebuffer, "")
-        self.assertEqual(r.charbuffer, u"")
-        # do the check again, this time using a incremental decoder
-        d = codecs.getincrementaldecoder(self.encoding)()
-        result = u""
-        for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
-            result += d.decode(c)
-            self.assertEqual(result, partialresult)
-        # check that there's nothing left in the buffers
-        self.assertEqual(d.decode("", True), u"")
-        self.assertEqual(d.buffer, "")
-        # Check whether the reset method works properly
-        d.reset()
-        result = u""
-        for (c, partialresult) in zip(input.encode(self.encoding), partialresults):
-            result += d.decode(c)
-            self.assertEqual(result, partialresult)
-        # check that there's nothing left in the buffers
-        self.assertEqual(d.decode("", True), u"")
-        self.assertEqual(d.buffer, "")
-        # check iterdecode()
-        encoded = input.encode(self.encoding)
-        self.assertEqual(
-            input,
-            u"".join(codecs.iterdecode(encoded, self.encoding))
-        )
-    def test_readline(self):
-        def getreader(input):
-            stream = StringIO.StringIO(input.encode(self.encoding))
-            return codecs.getreader(self.encoding)(stream)
-        def readalllines(input, keepends=True, size=None):
-            reader = getreader(input)
-            lines = []
-            while True:
-                line = reader.readline(size=size, keepends=keepends)
-                if not line:
-                    break
-                lines.append(line)
-            return "|".join(lines)
-        s = u"foo\nbar\r\nbaz\rspam\u2028eggs"
-        sexpected = u"foo\n|bar\r\n|baz\r|spam\u2028|eggs"
-        sexpectednoends = u"foo|bar|baz|spam|eggs"
-        self.assertEqual(readalllines(s, True), sexpected)
-        self.assertEqual(readalllines(s, False), sexpectednoends)
-        self.assertEqual(readalllines(s, True, 10), sexpected)
-        self.assertEqual(readalllines(s, False, 10), sexpectednoends)
-        # Test long lines (multiple calls to read() in readline())
-        vw = []
-        vwo = []
-        for (i, lineend) in enumerate(u"\n \r\n \r \u2028".split()):
-            vw.append((i*200)*u"\3042" + lineend)
-            vwo.append((i*200)*u"\3042")
-        self.assertEqual(readalllines("".join(vw), True), "".join(vw))
-        self.assertEqual(readalllines("".join(vw), False),"".join(vwo))
-        # Test lines where the first read might end with \r, so the
-        # reader has to look ahead whether this is a lone \r or a \r\n
-        for size in xrange(80):
-            for lineend in u"\n \r\n \r \u2028".split():
-                s = 10*(size*u"a" + lineend + u"xxx\n")
-                reader = getreader(s)
-                for i in xrange(10):
-                    self.assertEqual(
-                        reader.readline(keepends=True),
-                        size*u"a" + lineend,
-                    )
-                reader = getreader(s)
-                for i in xrange(10):
-                    self.assertEqual(
-                        reader.readline(keepends=False),
-                        size*u"a",
-                    )
-    def test_bug1175396(self):
-        s = [
-            '<%!--===================================================\r\n',
-            '    BLOG index page: show recent articles,\r\n',
-            '    today\'s articles, or articles of a specific date.\r\n',
-            '========================================================--%>\r\n',
-            '<%@inputencoding="ISO-8859-1"%>\r\n',
-            '<%@pagetemplate=TEMPLATE.y%>\r\n',
-            '<%@import=import frog.util, frog%>\r\n',
-            '<%@import=import frog.objects%>\r\n',
-            '<%@import=from frog.storageerrors import StorageError%>\r\n',
-            '<%\r\n',
-            '\r\n',
-            'import logging\r\n',
-            'log=logging.getLogger("Snakelets.logger")\r\n',
-            '\r\n',
-            '\r\n',
-            'user=self.SessionCtx.user\r\n',
-            'storageEngine=self.SessionCtx.storageEngine\r\n',
-            '\r\n',
-            '\r\n',
-            'def readArticlesFromDate(date, count=None):\r\n',
-            '    entryids=storageEngine.listBlogEntries(date)\r\n',
-            '    entryids.reverse() # descending\r\n',
-            '    if count:\r\n',
-            '        entryids=entryids[:count]\r\n',
-            '    try:\r\n',
-            '        return [ frog.objects.BlogEntry.load(storageEngine, date, Id) for Id in entryids ]\r\n',
-            '    except StorageError,x:\r\n',
-            '        log.error("Error loading articles: "+str(x))\r\n',
-            '        self.abort("cannot load articles")\r\n',
-            '\r\n',
-            'showdate=None\r\n',
-            '\r\n',
-            'arg=self.Request.getArg()\r\n',
-            'if arg=="today":\r\n',
-            '    #-------------------- TODAY\'S ARTICLES\r\n',
-            '    self.write("<h2>Today\'s articles</h2>")\r\n',
-            '    showdate = frog.util.isodatestr() \r\n',
-            '    entries = readArticlesFromDate(showdate)\r\n',
-            'elif arg=="active":\r\n',
-            '    #-------------------- ACTIVE ARTICLES redirect\r\n',
-            '    self.Yredirect("active.y")\r\n',
-            'elif arg=="login":\r\n',
-            '    #-------------------- LOGIN PAGE redirect\r\n',
-            '    self.Yredirect("login.y")\r\n',
-            'elif arg=="date":\r\n',
-            '    #-------------------- ARTICLES OF A SPECIFIC DATE\r\n',
-            '    showdate = self.Request.getParameter("date")\r\n',
-            '    self.write("<h2>Articles written on %s</h2>"% frog.util.mediumdatestr(showdate))\r\n',
-            '    entries = readArticlesFromDate(showdate)\r\n',
-            'else:\r\n',
-            '    #-------------------- RECENT ARTICLES\r\n',
-            '    self.write("<h2>Recent articles</h2>")\r\n',
-            '    dates=storageEngine.listBlogEntryDates()\r\n',
-            '    if dates:\r\n',
-            '        entries=[]\r\n',
-            '        SHOWAMOUNT=10\r\n',
-            '        for showdate in dates:\r\n',
-            '            entries.extend( readArticlesFromDate(showdate, SHOWAMOUNT-len(entries)) )\r\n',
-            '            if len(entries)>=SHOWAMOUNT:\r\n',
-            '                break\r\n',
-            '                \r\n',
-        ]
-        stream = StringIO.StringIO("".join(s).encode(self.encoding))
-        reader = codecs.getreader(self.encoding)(stream)
-        for (i, line) in enumerate(reader):
-            self.assertEqual(line, s[i])
-    def test_readlinequeue(self):
-        q = Queue()
-        writer = codecs.getwriter(self.encoding)(q)
-        reader = codecs.getreader(self.encoding)(q)
-        # No lineends
-        writer.write(u"foo\r")
-        self.assertEqual(reader.readline(keepends=False), u"foo")
-        writer.write(u"\nbar\r")
-        self.assertEqual(reader.readline(keepends=False), u"")
-        self.assertEqual(reader.readline(keepends=False), u"bar")
-        writer.write(u"baz")
-        self.assertEqual(reader.readline(keepends=False), u"baz")
-        self.assertEqual(reader.readline(keepends=False), u"")
-        # Lineends
-        writer.write(u"foo\r")
-        self.assertEqual(reader.readline(keepends=True), u"foo\r")
-        writer.write(u"\nbar\r")
-        self.assertEqual(reader.readline(keepends=True), u"\n")
-        self.assertEqual(reader.readline(keepends=True), u"bar\r")
-        writer.write(u"baz")
-        self.assertEqual(reader.readline(keepends=True), u"baz")
-        self.assertEqual(reader.readline(keepends=True), u"")
-        writer.write(u"foo\r\n")
-        self.assertEqual(reader.readline(keepends=True), u"foo\r\n")
-    def test_bug1098990_a(self):
-        s1 = u"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy\r\n"
-        s2 = u"offending line: ladfj askldfj klasdj fskla dfzaskdj fasklfj laskd fjasklfzzzzaa%whereisthis!!!\r\n"
-        s3 = u"next line.\r\n"
-        s = (s1+s2+s3).encode(self.encoding)
-        stream = StringIO.StringIO(s)
-        reader = codecs.getreader(self.encoding)(stream)
-        self.assertEqual(reader.readline(), s1)
-        self.assertEqual(reader.readline(), s2)
-        self.assertEqual(reader.readline(), s3)
-        self.assertEqual(reader.readline(), u"")
-    def test_bug1098990_b(self):
-        s1 = u"aaaaaaaaaaaaaaaaaaaaaaaa\r\n"
-        s2 = u"bbbbbbbbbbbbbbbbbbbbbbbb\r\n"
-        s3 = u"stillokay:bbbbxx\r\n"
-        s4 = u"broken!!!!badbad\r\n"
-        s5 = u"againokay.\r\n"
-        s = (s1+s2+s3+s4+s5).encode(self.encoding)
-        stream = StringIO.StringIO(s)
-        reader = codecs.getreader(self.encoding)(stream)
-        self.assertEqual(reader.readline(), s1)
-        self.assertEqual(reader.readline(), s2)
-        self.assertEqual(reader.readline(), s3)
-        self.assertEqual(reader.readline(), s4)
-        self.assertEqual(reader.readline(), s5)
-        self.assertEqual(reader.readline(), u"")
-class UTF32Test(ReadTest):
-    encoding = "utf-32"
-    spamle = ('\xff\xfe\x00\x00'
-              's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00'
-              's\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m\x00\x00\x00')
-    spambe = ('\x00\x00\xfe\xff'
-              '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m'
-              '\x00\x00\x00s\x00\x00\x00p\x00\x00\x00a\x00\x00\x00m')
-    def test_only_one_bom(self):
-        _,_,reader,writer = codecs.lookup(self.encoding)
-        # encode some stream
-        s = StringIO.StringIO()
-        f = writer(s)
-        f.write(u"spam")
-        f.write(u"spam")
-        d = s.getvalue()
-        # check whether there is exactly one BOM in it
-        self.assertTrue(d == self.spamle or d == self.spambe)
-        # try to read it back
-        s = StringIO.StringIO(d)
-        f = reader(s)
-        self.assertEqual(f.read(), u"spamspam")
-    def test_badbom(self):
-        s = StringIO.StringIO(4*"\xff")
-        f = codecs.getreader(self.encoding)(s)
-        self.assertRaises(UnicodeError, f.read)
-        s = StringIO.StringIO(8*"\xff")
-        f = codecs.getreader(self.encoding)(s)
-        self.assertRaises(UnicodeError, f.read)
-    def test_partial(self):
-        self.check_partial(
-            u"\x00\xff\u0100\uffff",
-            [
-                u"", # first byte of BOM read
-                u"", # second byte of BOM read
-                u"", # third byte of BOM read
-                u"", # fourth byte of BOM read => byteorder known
-                u"",
-                u"",
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
-            ]
-        )
-    def test_handlers(self):
-        self.assertEqual((u'\ufffd', 1),
-                         codecs.utf_32_decode('\x01', 'replace', True))
-        self.assertEqual((u'', 1),
-                         codecs.utf_32_decode('\x01', 'ignore', True))
-    def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_32_decode,
-                          "\xff", "strict", True)
-    def test_issue8941(self):
-        # Issue #8941: insufficient result allocation when decoding into
-        # surrogate pairs on UCS-2 builds.
-        encoded_le = '\xff\xfe\x00\x00' + '\x00\x00\x01\x00' * 1024
-        self.assertEqual(u'\U00010000' * 1024,
-                         codecs.utf_32_decode(encoded_le)[0])
-        encoded_be = '\x00\x00\xfe\xff' + '\x00\x01\x00\x00' * 1024
-        self.assertEqual(u'\U00010000' * 1024,
-                         codecs.utf_32_decode(encoded_be)[0])
-class UTF32LETest(ReadTest):
-    encoding = "utf-32-le"
-    def test_partial(self):
-        self.check_partial(
-            u"\x00\xff\u0100\uffff",
-            [
-                u"",
-                u"",
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
-            ]
-        )
-    def test_simple(self):
-        self.assertEqual(u"\U00010203".encode(self.encoding), "\x03\x02\x01\x00")
-    def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_32_le_decode,
-                          "\xff", "strict", True)
-    def test_issue8941(self):
-        # Issue #8941: insufficient result allocation when decoding into
-        # surrogate pairs on UCS-2 builds.
-        encoded = '\x00\x00\x01\x00' * 1024
-        self.assertEqual(u'\U00010000' * 1024,
-                         codecs.utf_32_le_decode(encoded)[0])
-class UTF32BETest(ReadTest):
-    encoding = "utf-32-be"
-    def test_partial(self):
-        self.check_partial(
-            u"\x00\xff\u0100\uffff",
-            [
-                u"",
-                u"",
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
-            ]
-        )
-    def test_simple(self):
-        self.assertEqual(u"\U00010203".encode(self.encoding), "\x00\x01\x02\x03")
-    def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_32_be_decode,
-                          "\xff", "strict", True)
-    def test_issue8941(self):
-        # Issue #8941: insufficient result allocation when decoding into
-        # surrogate pairs on UCS-2 builds.
-        encoded = '\x00\x01\x00\x00' * 1024
-        self.assertEqual(u'\U00010000' * 1024,
-                         codecs.utf_32_be_decode(encoded)[0])
-class UTF16Test(ReadTest):
-    encoding = "utf-16"
-    spamle = '\xff\xfes\x00p\x00a\x00m\x00s\x00p\x00a\x00m\x00'
-    spambe = '\xfe\xff\x00s\x00p\x00a\x00m\x00s\x00p\x00a\x00m'
-    def test_only_one_bom(self):
-        _,_,reader,writer = codecs.lookup(self.encoding)
-        # encode some stream
-        s = StringIO.StringIO()
-        f = writer(s)
-        f.write(u"spam")
-        f.write(u"spam")
-        d = s.getvalue()
-        # check whether there is exactly one BOM in it
-        self.assertTrue(d == self.spamle or d == self.spambe)
-        # try to read it back
-        s = StringIO.StringIO(d)
-        f = reader(s)
-        self.assertEqual(f.read(), u"spamspam")
-    def test_badbom(self):
-        s = StringIO.StringIO("\xff\xff")
-        f = codecs.getreader(self.encoding)(s)
-        self.assertRaises(UnicodeError, f.read)
-        s = StringIO.StringIO("\xff\xff\xff\xff")
-        f = codecs.getreader(self.encoding)(s)
-        self.assertRaises(UnicodeError, f.read)
-    def test_partial(self):
-        self.check_partial(
-            u"\x00\xff\u0100\uffff",
-            [
-                u"", # first byte of BOM read
-                u"", # second byte of BOM read => byteorder known
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
-            ]
-        )
-    def test_handlers(self):
-        self.assertEqual((u'\ufffd', 1),
-                         codecs.utf_16_decode('\x01', 'replace', True))
-        self.assertEqual((u'', 1),
-                         codecs.utf_16_decode('\x01', 'ignore', True))
-    def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_16_decode, "\xff", "strict", True)
-    def test_bug691291(self):
-        # Files are always opened in binary mode, even if no binary mode was
-        # specified.  This means that no automatic conversion of '\n' is done
-        # on reading and writing.
-        s1 = u'Hello\r\nworld\r\n'
-        s = s1.encode(self.encoding)
-        try:
-            with open(test_support.TESTFN, 'wb') as fp:
-                fp.write(s)
-            with codecs.open(test_support.TESTFN, 'U', encoding=self.encoding) as reader:
-                self.assertEqual(reader.read(), s1)
-        finally:
-            test_support.unlink(test_support.TESTFN)
-class UTF16LETest(ReadTest):
-    encoding = "utf-16-le"
-    def test_partial(self):
-        self.check_partial(
-            u"\x00\xff\u0100\uffff",
-            [
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
-            ]
-        )
-    def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_16_le_decode, "\xff", "strict", True)
-class UTF16BETest(ReadTest):
-    encoding = "utf-16-be"
-    def test_partial(self):
-        self.check_partial(
-            u"\x00\xff\u0100\uffff",
-            [
-                u"",
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100",
-                u"\x00\xff\u0100\uffff",
-            ]
-        )
-    def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_16_be_decode, "\xff", "strict", True)
-class UTF8Test(ReadTest):
-    encoding = "utf-8"
-    def test_partial(self):
-        self.check_partial(
-            u"\x00\xff\u07ff\u0800\uffff",
-            [
-                u"\x00",
-                u"\x00",
-                u"\x00\xff",
-                u"\x00\xff",
-                u"\x00\xff\u07ff",
-                u"\x00\xff\u07ff",
-                u"\x00\xff\u07ff",
-                u"\x00\xff\u07ff\u0800",
-                u"\x00\xff\u07ff\u0800",
-                u"\x00\xff\u07ff\u0800",
-                u"\x00\xff\u07ff\u0800\uffff",
-            ]
-        )
-class UTF7Test(ReadTest):
-    encoding = "utf-7"
-    def test_partial(self):
-        self.check_partial(
-            u"a+-b",
-            [
-                u"a",
-                u"a",
-                u"a+",
-                u"a+-",
-                u"a+-b",
-            ]
-        )
-class UTF16ExTest(unittest.TestCase):
-    def test_errors(self):
-        self.assertRaises(UnicodeDecodeError, codecs.utf_16_ex_decode, "\xff", "strict", 0, True)
-    def test_bad_args(self):
-        self.assertRaises(TypeError, codecs.utf_16_ex_decode)
-class ReadBufferTest(unittest.TestCase):
-    def test_array(self):
-        import array
-        self.assertEqual(
-            codecs.readbuffer_encode(array.array("c", "spam")),
-            ("spam", 4)
-        )
-    def test_empty(self):
-        self.assertEqual(codecs.readbuffer_encode(""), ("", 0))
-    def test_bad_args(self):
-        self.assertRaises(TypeError, codecs.readbuffer_encode)
-        self.assertRaises(TypeError, codecs.readbuffer_encode, 42)
-class CharBufferTest(unittest.TestCase):
-    def test_string(self):
-        self.assertEqual(codecs.charbuffer_encode("spam"), ("spam", 4))
-    def test_empty(self):
-        self.assertEqual(codecs.charbuffer_encode(""), ("", 0))
-    def test_bad_args(self):
-        self.assertRaises(TypeError, codecs.charbuffer_encode)
-        self.assertRaises(TypeError, codecs.charbuffer_encode, 42)
-class UTF8SigTest(ReadTest):
-    encoding = "utf-8-sig"
-    def test_partial(self):
-        self.check_partial(
-            u"\ufeff\x00\xff\u07ff\u0800\uffff",
-            [
-                u"",
-                u"",
-                u"", # First BOM has been read and skipped
-                u"",
-                u"",
-                u"\ufeff", # Second BOM has been read and emitted
-                u"\ufeff\x00", # "\x00" read and emitted
-                u"\ufeff\x00", # First byte of encoded u"\xff" read
-                u"\ufeff\x00\xff", # Second byte of encoded u"\xff" read
-                u"\ufeff\x00\xff", # First byte of encoded u"\u07ff" read
-                u"\ufeff\x00\xff\u07ff", # Second byte of encoded u"\u07ff" read
-                u"\ufeff\x00\xff\u07ff",
-                u"\ufeff\x00\xff\u07ff",
-                u"\ufeff\x00\xff\u07ff\u0800",
-                u"\ufeff\x00\xff\u07ff\u0800",
-                u"\ufeff\x00\xff\u07ff\u0800",
-                u"\ufeff\x00\xff\u07ff\u0800\uffff",
-            ]
-        )
-    def test_bug1601501(self):
-        # SF bug #1601501: check that the codec works with a buffer
-        unicode("\xef\xbb\xbf", "utf-8-sig")
-    def test_bom(self):
-        d = codecs.getincrementaldecoder("utf-8-sig")()
-        s = u"spam"
-        self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
-    def test_stream_bom(self):
-        unistring = u"ABC\u00A1\u2200XYZ"
-        bytestring = codecs.BOM_UTF8 + "ABC\xC2\xA1\xE2\x88\x80XYZ"
-        reader = codecs.getreader("utf-8-sig")
-        for sizehint in [None] + range(1, 11) + \
-                        [64, 128, 256, 512, 1024]:
-            istream = reader(StringIO.StringIO(bytestring))
-            ostream = StringIO.StringIO()
-            while 1:
-                if sizehint is not None:
-                    data = istream.read(sizehint)
-                else:
-                    data = istream.read()
-                if not data:
-                    break
-                ostream.write(data)
-            got = ostream.getvalue()
-            self.assertEqual(got, unistring)
-    def test_stream_bare(self):
-        unistring = u"ABC\u00A1\u2200XYZ"
-        bytestring = "ABC\xC2\xA1\xE2\x88\x80XYZ"
-        reader = codecs.getreader("utf-8-sig")
-        for sizehint in [None] + range(1, 11) + \
-                        [64, 128, 256, 512, 1024]:
-            istream = reader(StringIO.StringIO(bytestring))
-            ostream = StringIO.StringIO()
-            while 1:
-                if sizehint is not None:
-                    data = istream.read(sizehint)
-                else:
-                    data = istream.read()
-                if not data:
-                    break
-                ostream.write(data)
-            got = ostream.getvalue()
-            self.assertEqual(got, unistring)
-class EscapeDecodeTest(unittest.TestCase):
-    def test_empty(self):
-        self.assertEqual(codecs.escape_decode(""), ("", 0))
-class RecodingTest(unittest.TestCase):
-    def test_recoding(self):
-        f = StringIO.StringIO()
-        f2 = codecs.EncodedFile(f, "unicode_internal", "utf-8")
-        f2.write(u"a")
-        f2.close()
-        # Python used to crash on this at exit because of a refcount
-        # bug in _codecsmodule.c
-# From RFC 3492
-punycode_testcases = [
-    # A Arabic (Egyptian):
-    (u"\u0644\u064A\u0647\u0645\u0627\u0628\u062A\u0643\u0644"
-     u"\u0645\u0648\u0634\u0639\u0631\u0628\u064A\u061F",
-     "egbpdaj6bu4bxfgehfvwxn"),
-    # B Chinese (simplified):
-    (u"\u4ED6\u4EEC\u4E3A\u4EC0\u4E48\u4E0D\u8BF4\u4E2D\u6587",
-     "ihqwcrb4cv8a8dqg056pqjye"),
-    # C Chinese (traditional):
-    (u"\u4ED6\u5011\u7232\u4EC0\u9EBD\u4E0D\u8AAA\u4E2D\u6587",
-     "ihqwctvzc91f659drss3x8bo0yb"),
-    # D Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky
-    (u"\u0050\u0072\u006F\u010D\u0070\u0072\u006F\u0073\u0074"
-     u"\u011B\u006E\u0065\u006D\u006C\u0075\u0076\u00ED\u010D"
-     u"\u0065\u0073\u006B\u0079",
-     "Proprostnemluvesky-uyb24dma41a"),
-    # E Hebrew:
-    (u"\u05DC\u05DE\u05D4\u05D4\u05DD\u05E4\u05E9\u05D5\u05D8"
-     u"\u05DC\u05D0\u05DE\u05D3\u05D1\u05E8\u05D9\u05DD\u05E2"
-     u"\u05D1\u05E8\u05D9\u05EA",
-     "4dbcagdahymbxekheh6e0a7fei0b"),
-    # F Hindi (Devanagari):
-    (u"\u092F\u0939\u0932\u094B\u0917\u0939\u093F\u0928\u094D"
-    u"\u0926\u0940\u0915\u094D\u092F\u094B\u0902\u0928\u0939"
-    u"\u0940\u0902\u092C\u094B\u0932\u0938\u0915\u0924\u0947"
-    u"\u0939\u0948\u0902",
-    "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),
-    #(G) Japanese (kanji and hiragana):
-    (u"\u306A\u305C\u307F\u3093\u306A\u65E5\u672C\u8A9E\u3092"
-    u"\u8A71\u3057\u3066\u304F\u308C\u306A\u3044\u306E\u304B",
-     "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),
-    # (H) Korean (Hangul syllables):
-    (u"\uC138\uACC4\uC758\uBAA8\uB4E0\uC0AC\uB78C\uB4E4\uC774"
-     u"\uD55C\uAD6D\uC5B4\uB97C\uC774\uD574\uD55C\uB2E4\uBA74"
-     u"\uC5BC\uB9C8\uB098\uC88B\uC744\uAE4C",
-     "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5j"
-     "psd879ccm6fea98c"),
-    # (I) Russian (Cyrillic):
-    (u"\u043F\u043E\u0447\u0435\u043C\u0443\u0436\u0435\u043E"
-     u"\u043D\u0438\u043D\u0435\u0433\u043E\u0432\u043E\u0440"
-     u"\u044F\u0442\u043F\u043E\u0440\u0443\u0441\u0441\u043A"
-     u"\u0438",
-     "b1abfaaepdrnnbgefbaDotcwatmq2g4l"),
-    # (J) Spanish: Porqu<eacute>nopuedensimplementehablarenEspa<ntilde>ol
-    (u"\u0050\u006F\u0072\u0071\u0075\u00E9\u006E\u006F\u0070"
-     u"\u0075\u0065\u0064\u0065\u006E\u0073\u0069\u006D\u0070"
-     u"\u006C\u0065\u006D\u0065\u006E\u0074\u0065\u0068\u0061"
-     u"\u0062\u006C\u0061\u0072\u0065\u006E\u0045\u0073\u0070"
-     u"\u0061\u00F1\u006F\u006C",
-     "PorqunopuedensimplementehablarenEspaol-fmd56a"),
-    # (K) Vietnamese:
-    #  T<adotbelow>isaoh<odotbelow>kh<ocirc>ngth<ecirchookabove>ch\
-    #   <ihookabove>n<oacute>iti<ecircacute>ngVi<ecircdotbelow>t
-    (u"\u0054\u1EA1\u0069\u0073\u0061\u006F\u0068\u1ECD\u006B"
-     u"\u0068\u00F4\u006E\u0067\u0074\u0068\u1EC3\u0063\u0068"
-     u"\u1EC9\u006E\u00F3\u0069\u0074\u0069\u1EBF\u006E\u0067"
-     u"\u0056\u0069\u1EC7\u0074",
-     "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),
-    #(L) 3<nen>B<gumi><kinpachi><sensei>
-    (u"\u0033\u5E74\u0042\u7D44\u91D1\u516B\u5148\u751F",
-     "3B-ww4c5e180e575a65lsy2b"),
-    # (M) <amuro><namie>-with-SUPER-MONKEYS
-    (u"\u5B89\u5BA4\u5948\u7F8E\u6075\u002D\u0077\u0069\u0074"
-     u"\u0068\u002D\u0053\u0055\u0050\u0045\u0052\u002D\u004D"
-     u"\u004F\u004E\u004B\u0045\u0059\u0053",
-     "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),
-    # (N) Hello-Another-Way-<sorezore><no><basho>
-    (u"\u0048\u0065\u006C\u006C\u006F\u002D\u0041\u006E\u006F"
-     u"\u0074\u0068\u0065\u0072\u002D\u0057\u0061\u0079\u002D"
-     u"\u305D\u308C\u305E\u308C\u306E\u5834\u6240",
-     "Hello-Another-Way--fc4qua05auwb3674vfr0b"),
-    # (O) <hitotsu><yane><no><shita>2
-    (u"\u3072\u3068\u3064\u5C4B\u6839\u306E\u4E0B\u0032",
-     "2-u9tlzr9756bt3uc0v"),
-    # (P) Maji<de>Koi<suru>5<byou><mae>
-    (u"\u004D\u0061\u006A\u0069\u3067\u004B\u006F\u0069\u3059"
-     u"\u308B\u0035\u79D2\u524D",
-     "MajiKoi5-783gue6qz075azm5e"),
-     # (Q) <pafii>de<runba>
-    (u"\u30D1\u30D5\u30A3\u30FC\u0064\u0065\u30EB\u30F3\u30D0",
-     "de-jg4avhby1noc0d"),
-    # (R) <sono><supiido><de>
-    (u"\u305D\u306E\u30B9\u30D4\u30FC\u30C9\u3067",
-     "d9juau41awczczp"),
-    # (S) -> $1.00 <-
-    (u"\u002D\u003E\u0020\u0024\u0031\u002E\u0030\u0030\u0020"
-     u"\u003C\u002D",
-     "-> $1.00 <--")
-    ]
-for i in punycode_testcases:
-    if len(i)!=2:
-        print repr(i)
-class PunycodeTest(unittest.TestCase):
-    def test_encode(self):
-        for uni, puny in punycode_testcases:
-            # Need to convert both strings to lower case, since
-            # some of the extended encodings use upper case, but our
-            # code produces only lower case. Converting just puny to
-            # lower is also insufficient, since some of the input characters
-            # are upper case.
-            self.assertEqual(uni.encode("punycode").lower(), puny.lower())
-    def test_decode(self):
-        for uni, puny in punycode_testcases:
-            self.assertEqual(uni, puny.decode("punycode"))
-class UnicodeInternalTest(unittest.TestCase):
-    def test_bug1251300(self):
-        # Decoding with unicode_internal used to not correctly handle "code
-        # points" above 0x10ffff on UCS-4 builds.
-        if sys.maxunicode > 0xffff:
-            ok = [
-                ("\x00\x10\xff\xff", u"\U0010ffff"),
-                ("\x00\x00\x01\x01", u"\U00000101"),
-                ("", u""),
-            ]
-            not_ok = [
-                "\x7f\xff\xff\xff",
-                "\x80\x00\x00\x00",
-                "\x81\x00\x00\x00",
-                "\x00",
-                "\x00\x00\x00\x00\x00",
-            ]
-            for internal, uni in ok:
-                if sys.byteorder == "little":
-                    internal = "".join(reversed(internal))
-                self.assertEqual(uni, internal.decode("unicode_internal"))
-            for internal in not_ok:
-                if sys.byteorder == "little":
-                    internal = "".join(reversed(internal))
-                self.assertRaises(UnicodeDecodeError, internal.decode,
-                    "unicode_internal")
-    def test_decode_error_attributes(self):
-        if sys.maxunicode > 0xffff:
-            try:
-                "\x00\x00\x00\x00\x00\x11\x11\x00".decode("unicode_internal")
-            except UnicodeDecodeError, ex:
-                self.assertEqual("unicode_internal", ex.encoding)
-                self.assertEqual("\x00\x00\x00\x00\x00\x11\x11\x00", ex.object)
-                self.assertEqual(4, ex.start)
-                self.assertEqual(8, ex.end)
-            else:
-                self.fail()
-    def test_decode_callback(self):
-        if sys.maxunicode > 0xffff:
-            codecs.register_error("UnicodeInternalTest", codecs.ignore_errors)
-            decoder = codecs.getdecoder("unicode_internal")
-            ab = u"ab".encode("unicode_internal")
-            ignored = decoder("%s\x22\x22\x22\x22%s" % (ab[:4], ab[4:]),
-                "UnicodeInternalTest")
-            self.assertEqual((u"ab", 12), ignored)
-    def test_encode_length(self):
-        # Issue 3739
-        encoder = codecs.getencoder("unicode_internal")
-        self.assertEqual(encoder(u"a")[1], 1)
-        self.assertEqual(encoder(u"\xe9\u0142")[1], 2)
-        encoder = codecs.getencoder("string-escape")
-        self.assertEqual(encoder(r'\x00')[1], 4)
-# From http://www.gnu.org/software/libidn/draft-josefsson-idn-test-vectors.html
-nameprep_tests = [
-    # 3.1 Map to nothing.
-    ('foo\xc2\xad\xcd\x8f\xe1\xa0\x86\xe1\xa0\x8bbar'
-     '\xe2\x80\x8b\xe2\x81\xa0baz\xef\xb8\x80\xef\xb8\x88\xef'
-     '\xb8\x8f\xef\xbb\xbf',
-     'foobarbaz'),
-    # 3.2 Case folding ASCII U+0043 U+0041 U+0046 U+0045.
-    ('CAFE',
-     'cafe'),
-    # 3.3 Case folding 8bit U+00DF (german sharp s).
-    # The original test case is bogus; it says \xc3\xdf
-    ('\xc3\x9f',
-     'ss'),
-    # 3.4 Case folding U+0130 (turkish capital I with dot).
-    ('\xc4\xb0',
-     'i\xcc\x87'),
-    # 3.5 Case folding multibyte U+0143 U+037A.
-    ('\xc5\x83\xcd\xba',
-     '\xc5\x84 \xce\xb9'),
-    # 3.6 Case folding U+2121 U+33C6 U+1D7BB.
-    # XXX: skip this as it fails in UCS-2 mode
-    #('\xe2\x84\xa1\xe3\x8f\x86\xf0\x9d\x9e\xbb',
-    # 'telc\xe2\x88\x95kg\xcf\x83'),
-    (None, None),
-    # 3.7 Normalization of U+006a U+030c U+00A0 U+00AA.
-    ('j\xcc\x8c\xc2\xa0\xc2\xaa',
-     '\xc7\xb0 a'),
-    # 3.8 Case folding U+1FB7 and normalization.
-    ('\xe1\xbe\xb7',
-     '\xe1\xbe\xb6\xce\xb9'),
-    # 3.9 Self-reverting case folding U+01F0 and normalization.
-    # The original test case is bogus, it says `\xc7\xf0'
-    ('\xc7\xb0',
-     '\xc7\xb0'),
-    # 3.10 Self-reverting case folding U+0390 and normalization.
-    ('\xce\x90',
-     '\xce\x90'),
-    # 3.11 Self-reverting case folding U+03B0 and normalization.
-    ('\xce\xb0',
-     '\xce\xb0'),
-    # 3.12 Self-reverting case folding U+1E96 and normalization.
-    ('\xe1\xba\x96',
-     '\xe1\xba\x96'),
-    # 3.13 Self-reverting case folding U+1F56 and normalization.
-    ('\xe1\xbd\x96',
-     '\xe1\xbd\x96'),
-    # 3.14 ASCII space character U+0020.
-    (' ',
-     ' '),
-    # 3.15 Non-ASCII 8bit space character U+00A0.
-    ('\xc2\xa0',
-     ' '),
-    # 3.16 Non-ASCII multibyte space character U+1680.
-    ('\xe1\x9a\x80',
-     None),
-    # 3.17 Non-ASCII multibyte space character U+2000.
-    ('\xe2\x80\x80',
-     ' '),
-    # 3.18 Zero Width Space U+200b.
-    ('\xe2\x80\x8b',
-     ''),
-    # 3.19 Non-ASCII multibyte space character U+3000.
-    ('\xe3\x80\x80',
-     ' '),
-    # 3.20 ASCII control characters U+0010 U+007F.
-    ('\x10\x7f',
-     '\x10\x7f'),
-    # 3.21 Non-ASCII 8bit control character U+0085.
-    ('\xc2\x85',
-     None),
-    # 3.22 Non-ASCII multibyte control character U+180E.
-    ('\xe1\xa0\x8e',
-     None),
-    # 3.23 Zero Width No-Break Space U+FEFF.
-    ('\xef\xbb\xbf',
-     ''),
-    # 3.24 Non-ASCII control character U+1D175.
-    ('\xf0\x9d\x85\xb5',
-     None),
-    # 3.25 Plane 0 private use character U+F123.
-    ('\xef\x84\xa3',
-     None),
-    # 3.26 Plane 15 private use character U+F1234.
-    ('\xf3\xb1\x88\xb4',
-     None),
-    # 3.27 Plane 16 private use character U+10F234.
-    ('\xf4\x8f\x88\xb4',
-     None),
-    # 3.28 Non-character code point U+8FFFE.
-    ('\xf2\x8f\xbf\xbe',
-     None),
-    # 3.29 Non-character code point U+10FFFF.
-    ('\xf4\x8f\xbf\xbf',
-     None),
-    # 3.30 Surrogate code U+DF42.
-    ('\xed\xbd\x82',
-     None),
-    # 3.31 Non-plain text character U+FFFD.
-    ('\xef\xbf\xbd',
-     None),
-    # 3.32 Ideographic description character U+2FF5.
-    ('\xe2\xbf\xb5',
-     None),
-    # 3.33 Display property character U+0341.
-    ('\xcd\x81',
-     '\xcc\x81'),
-    # 3.34 Left-to-right mark U+200E.
-    ('\xe2\x80\x8e',
-     None),
-    # 3.35 Deprecated U+202A.
-    ('\xe2\x80\xaa',
-     None),
-    # 3.36 Language tagging character U+E0001.
-    ('\xf3\xa0\x80\x81',
-     None),
-    # 3.37 Language tagging character U+E0042.
-    ('\xf3\xa0\x81\x82',
-     None),
-    # 3.38 Bidi: RandALCat character U+05BE and LCat characters.
-    ('foo\xd6\xbebar',
-     None),
-    # 3.39 Bidi: RandALCat character U+FD50 and LCat characters.
-    ('foo\xef\xb5\x90bar',
-     None),
-    # 3.40 Bidi: RandALCat character U+FB38 and LCat characters.
-    ('foo\xef\xb9\xb6bar',
-     'foo \xd9\x8ebar'),
-    # 3.41 Bidi: RandALCat without trailing RandALCat U+0627 U+0031.
-    ('\xd8\xa71',
-     None),
-    # 3.42 Bidi: RandALCat character U+0627 U+0031 U+0628.
-    ('\xd8\xa71\xd8\xa8',
-     '\xd8\xa71\xd8\xa8'),
-    # 3.43 Unassigned code point U+E0002.
-    # Skip this test as we allow unassigned
-    #('\xf3\xa0\x80\x82',
-    # None),
-    (None, None),
-    # 3.44 Larger test (shrinking).
-    # Original test case reads \xc3\xdf
-    ('X\xc2\xad\xc3\x9f\xc4\xb0\xe2\x84\xa1j\xcc\x8c\xc2\xa0\xc2'
-     '\xaa\xce\xb0\xe2\x80\x80',
-     'xssi\xcc\x87tel\xc7\xb0 a\xce\xb0 '),
-    # 3.45 Larger test (expanding).
-    # Original test case reads \xc3\x9f
-    ('X\xc3\x9f\xe3\x8c\x96\xc4\xb0\xe2\x84\xa1\xe2\x92\x9f\xe3\x8c'
-     '\x80',
-     'xss\xe3\x82\xad\xe3\x83\xad\xe3\x83\xa1\xe3\x83\xbc\xe3'
-     '\x83\x88\xe3\x83\xabi\xcc\x87tel\x28d\x29\xe3\x82'
-     '\xa2\xe3\x83\x91\xe3\x83\xbc\xe3\x83\x88')
-    ]
-class NameprepTest(unittest.TestCase):
-    def test_nameprep(self):
-        from encodings.idna import nameprep
-        for pos, (orig, prepped) in enumerate(nameprep_tests):
-            if orig is None:
-                # Skipped
-                continue
-            # The Unicode strings are given in UTF-8
-            orig = unicode(orig, "utf-8")
-            if prepped is None:
-                # Input contains prohibited characters
-                self.assertRaises(UnicodeError, nameprep, orig)
-            else:
-                prepped = unicode(prepped, "utf-8")
-                try:
-                    self.assertEqual(nameprep(orig), prepped)
-                except Exception,e:
-                    raise test_support.TestFailed("Test 3.%d: %s" % (pos+1, str(e)))
-class IDNACodecTest(unittest.TestCase):
-    def test_builtin_decode(self):
-        self.assertEqual(unicode("python.org", "idna"), u"python.org")
-        self.assertEqual(unicode("python.org.", "idna"), u"python.org.")
-        self.assertEqual(unicode("xn--pythn-mua.org", "idna"), u"pyth\xf6n.org")
-        self.assertEqual(unicode("xn--pythn-mua.org.", "idna"), u"pyth\xf6n.org.")
-    def test_builtin_encode(self):
-        self.assertEqual(u"python.org".encode("idna"), "python.org")
-        self.assertEqual("python.org.".encode("idna"), "python.org.")
-        self.assertEqual(u"pyth\xf6n.org".encode("idna"), "xn--pythn-mua.org")
-        self.assertEqual(u"pyth\xf6n.org.".encode("idna"), "xn--pythn-mua.org.")
-    def test_stream(self):
-        import StringIO
-        r = codecs.getreader("idna")(StringIO.StringIO("abc"))
-        r.read(3)
-        self.assertEqual(r.read(), u"")
-    def test_incremental_decode(self):
-        self.assertEqual(
-            "".join(codecs.iterdecode("python.org", "idna")),
-            u"python.org"
-        )
-        self.assertEqual(
-            "".join(codecs.iterdecode("python.org.", "idna")),
-            u"python.org."
-        )
-        self.assertEqual(
-            "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
-            u"pyth\xf6n.org."
-        )
-        self.assertEqual(
-            "".join(codecs.iterdecode("xn--pythn-mua.org.", "idna")),
-            u"pyth\xf6n.org."
-        )
-        decoder = codecs.getincrementaldecoder("idna")()
-        self.assertEqual(decoder.decode("xn--xam", ), u"")
-        self.assertEqual(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
-        self.assertEqual(decoder.decode(u"rg"), u"")
-        self.assertEqual(decoder.decode(u"", True), u"org")
-        decoder.reset()
-        self.assertEqual(decoder.decode("xn--xam", ), u"")
-        self.assertEqual(decoder.decode("ple-9ta.o", ), u"\xe4xample.")
-        self.assertEqual(decoder.decode("rg."), u"org.")
-        self.assertEqual(decoder.decode("", True), u"")
-    def test_incremental_encode(self):
-        self.assertEqual(
-            "".join(codecs.iterencode(u"python.org", "idna")),
-            "python.org"
-        )
-        self.assertEqual(
-            "".join(codecs.iterencode(u"python.org.", "idna")),
-            "python.org."
-        )
-        self.assertEqual(
-            "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
-            "xn--pythn-mua.org."
-        )
-        self.assertEqual(
-            "".join(codecs.iterencode(u"pyth\xf6n.org.", "idna")),
-            "xn--pythn-mua.org."
-        )
-        encoder = codecs.getincrementalencoder("idna")()
-        self.assertEqual(encoder.encode(u"\xe4x"), "")
-        self.assertEqual(encoder.encode(u"ample.org"), "xn--xample-9ta.")
-        self.assertEqual(encoder.encode(u"", True), "org")
-        encoder.reset()
-        self.assertEqual(encoder.encode(u"\xe4x"), "")
-        self.assertEqual(encoder.encode(u"ample.org."), "xn--xample-9ta.org.")
-        self.assertEqual(encoder.encode(u"", True), "")
-class CodecsModuleTest(unittest.TestCase):
-    def test_decode(self):
-        self.assertEqual(codecs.decode('\xe4\xf6\xfc', 'latin-1'),
-                          u'\xe4\xf6\xfc')
-        self.assertRaises(TypeError, codecs.decode)
-        self.assertEqual(codecs.decode('abc'), u'abc')
-        self.assertRaises(UnicodeDecodeError, codecs.decode, '\xff', 'ascii')
-    def test_encode(self):
-        self.assertEqual(codecs.encode(u'\xe4\xf6\xfc', 'latin-1'),
-                          '\xe4\xf6\xfc')
-        self.assertRaises(TypeError, codecs.encode)
-        self.assertRaises(LookupError, codecs.encode, "foo", "__spam__")
-        self.assertEqual(codecs.encode(u'abc'), 'abc')
-        self.assertRaises(UnicodeEncodeError, codecs.encode, u'\xffff', 'ascii')
-    def test_register(self):
-        self.assertRaises(TypeError, codecs.register)
-        self.assertRaises(TypeError, codecs.register, 42)
-    def test_lookup(self):
-        self.assertRaises(TypeError, codecs.lookup)
-        self.assertRaises(LookupError, codecs.lookup, "__spam__")
-        self.assertRaises(LookupError, codecs.lookup, " ")
-    def test_getencoder(self):
-        self.assertRaises(TypeError, codecs.getencoder)
-        self.assertRaises(LookupError, codecs.getencoder, "__spam__")
-    def test_getdecoder(self):
-        self.assertRaises(TypeError, codecs.getdecoder)
-        self.assertRaises(LookupError, codecs.getdecoder, "__spam__")
-    def test_getreader(self):
-        self.assertRaises(TypeError, codecs.getreader)
-        self.assertRaises(LookupError, codecs.getreader, "__spam__")
-    def test_getwriter(self):
-        self.assertRaises(TypeError, codecs.getwriter)
-        self.assertRaises(LookupError, codecs.getwriter, "__spam__")
-class StreamReaderTest(unittest.TestCase):
-    def setUp(self):
-        self.reader = codecs.getreader('utf-8')
-        self.stream = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
-    def test_readlines(self):
-        f = self.reader(self.stream)
-        self.assertEqual(f.readlines(), [u'\ud55c\n', u'\uae00'])
-class EncodedFileTest(unittest.TestCase):
-    def test_basic(self):
-        f = StringIO.StringIO('\xed\x95\x9c\n\xea\xb8\x80')
-        ef = codecs.EncodedFile(f, 'utf-16-le', 'utf-8')
-        self.assertEqual(ef.read(), '\\\xd5\n\x00\x00\xae')
-        f = StringIO.StringIO()
-        ef = codecs.EncodedFile(f, 'utf-8', 'latin1')
-        ef.write('\xc3\xbc')
-        self.assertEqual(f.getvalue(), '\xfc')
-class Str2StrTest(unittest.TestCase):
-    def test_read(self):
-        sin = "\x80".encode("base64_codec")
-        reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
-        sout = reader.read()
-        self.assertEqual(sout, "\x80")
-        self.assertIsInstance(sout, str)
-    def test_readline(self):
-        sin = "\x80".encode("base64_codec")
-        reader = codecs.getreader("base64_codec")(StringIO.StringIO(sin))
-        sout = reader.readline()
-        self.assertEqual(sout, "\x80")
-        self.assertIsInstance(sout, str)
-all_unicode_encodings = [
-    "ascii",
-    "base64_codec",
-    ## "big5",
-    ## "big5hkscs",
-    "charmap",
-    "cp037",
-    "cp1006",
-    "cp1026",
-    "cp1140",
-    "cp1250",
-    "cp1251",
-    "cp1252",
-    "cp1253",
-    "cp1254",
-    "cp1255",
-    "cp1256",
-    "cp1257",
-    "cp1258",
-    "cp424",
-    "cp437",
-    "cp500",
-    "cp720",
-    "cp737",
-    "cp775",
-    "cp850",
-    "cp852",
-    "cp855",
-    "cp856",
-    "cp857",
-    "cp858",
-    "cp860",
-    "cp861",
-    "cp862",
-    "cp863",
-    "cp864",
-    "cp865",
-    "cp866",
-    "cp869",
-    "cp874",
-    "cp875",
-    ## "cp932",
-    ## "cp949",
-    ## "cp950",
-    ## "euc_jis_2004",
-    ## "euc_jisx0213",
-    ## "euc_jp",
-    ## "euc_kr",
-    ## "gb18030",
-    ## "gb2312",
-    ## "gbk",
-    "hex_codec",
-    "hp_roman8",
-    ## "hz",
-    "idna",
-    ## "iso2022_jp",
-    ## "iso2022_jp_1",
-    ## "iso2022_jp_2",
-    ## "iso2022_jp_2004",
-    ## "iso2022_jp_3",
-    ## "iso2022_jp_ext",
-    ## "iso2022_kr",
-    "iso8859_1",
-    "iso8859_10",
-    "iso8859_11",
-    "iso8859_13",
-    "iso8859_14",
-    "iso8859_15",
-    "iso8859_16",
-    "iso8859_2",
-    "iso8859_3",
-    "iso8859_4",
-    "iso8859_5",
-    "iso8859_6",
-    "iso8859_7",
-    "iso8859_8",
-    "iso8859_9",
-    ## "johab",
-    "koi8_r",
-    "koi8_u",
-    "latin_1",
-    "mac_cyrillic",
-    "mac_greek",
-    "mac_iceland",
-    "mac_latin2",
-    "mac_roman",
-    "mac_turkish",
-    "palmos",
-    "ptcp154",
-    "punycode",
-    "raw_unicode_escape",
-    "rot_13",
-    ## "shift_jis",
-    ## "shift_jis_2004",
-    ## "shift_jisx0213",
-    "tis_620",
-    "unicode_escape",
-    "unicode_internal",
-    "utf_16",
-    "utf_16_be",
-    "utf_16_le",
-    "utf_7",
-    "utf_8",
-if hasattr(codecs, "mbcs_encode"):
-    all_unicode_encodings.append("mbcs")
-# The following encodings work only with str, not unicode
-all_string_encodings = [
-    "quopri_codec",
-    "string_escape",
-    "uu_codec",
-# The following encoding is not tested, because it's not supposed
-# to work:
-#    "undefined"
-# The following encodings don't work in stateful mode
-broken_unicode_with_streams = [
-    "base64_codec",
-    "hex_codec",
-    "punycode",
-    "unicode_internal"
-broken_incremental_coders = broken_unicode_with_streams[:]
-# The following encodings only support "strict" mode
-only_strict_mode = [
-    "idna",
-    "zlib_codec",
-    "bz2_codec",
-    import bz2
-except ImportError:
-    pass
-    all_unicode_encodings.append("bz2_codec")
-    broken_unicode_with_streams.append("bz2_codec")
-    import zlib
-except ImportError:
-    pass
-    all_unicode_encodings.append("zlib_codec")
-    broken_unicode_with_streams.append("zlib_codec")
-class BasicUnicodeTest(unittest.TestCase):
-    def test_basics(self):
-        s = u"abc123" # all codecs should be able to encode these
-        for encoding in all_unicode_encodings:
-            name = codecs.lookup(encoding).name
-            if encoding.endswith("_codec"):
-                name += "_codec"
-            elif encoding == "latin_1":
-                name = "latin_1"
-            self.assertEqual(encoding.replace("_", "-"), name.replace("_", "-"))
-            (bytes, size) = codecs.getencoder(encoding)(s)
-            self.assertEqual(size, len(s), "%r != %r (encoding=%r)" % (size, len(s), encoding))
-            (chars, size) = codecs.getdecoder(encoding)(bytes)
-            self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
-            if encoding not in broken_unicode_with_streams:
-                # check stream reader/writer
-                q = Queue()
-                writer = codecs.getwriter(encoding)(q)
-                encodedresult = ""
-                for c in s:
-                    writer.write(c)
-                    encodedresult += q.read()
-                q = Queue()
-                reader = codecs.getreader(encoding)(q)
-                decodedresult = u""
-                for c in encodedresult:
-                    q.write(c)
-                    decodedresult += reader.read()
-                self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
-            if encoding not in broken_incremental_coders:
-                # check incremental decoder/encoder (fetched via the Python
-                # and C API) and iterencode()/iterdecode()
-                try:
-                    encoder = codecs.getincrementalencoder(encoding)()
-                    cencoder = _testcapi.codec_incrementalencoder(encoding)
-                except LookupError: # no IncrementalEncoder
-                    pass
-                else:
-                    # check incremental decoder/encoder
-                    encodedresult = ""
-                    for c in s:
-                        encodedresult += encoder.encode(c)
-                    encodedresult += encoder.encode(u"", True)
-                    decoder = codecs.getincrementaldecoder(encoding)()
-                    decodedresult = u""
-                    for c in encodedresult:
-                        decodedresult += decoder.decode(c)
-                    decodedresult += decoder.decode("", True)
-                    self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
-                    # check C API
-                    encodedresult = ""
-                    for c in s:
-                        encodedresult += cencoder.encode(c)
-                    encodedresult += cencoder.encode(u"", True)
-                    cdecoder = _testcapi.codec_incrementaldecoder(encoding)
-                    decodedresult = u""
-                    for c in encodedresult:
-                        decodedresult += cdecoder.decode(c)
-                    decodedresult += cdecoder.decode("", True)
-                    self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
-                    # check iterencode()/iterdecode()
-                    result = u"".join(codecs.iterdecode(codecs.iterencode(s, encoding), encoding))
-                    self.assertEqual(result, s, "%r != %r (encoding=%r)" % (result, s, encoding))
-                    # check iterencode()/iterdecode() with empty string
-                    result = u"".join(codecs.iterdecode(codecs.iterencode(u"", encoding), encoding))
-                    self.assertEqual(result, u"")
-                if encoding not in only_strict_mode:
-                    # check incremental decoder/encoder with errors argument
-                    try:
-                        encoder = codecs.getincrementalencoder(encoding)("ignore")
-                        cencoder = _testcapi.codec_incrementalencoder(encoding, "ignore")
-                    except LookupError: # no IncrementalEncoder
-                        pass
-                    else:
-                        encodedresult = "".join(encoder.encode(c) for c in s)
-                        decoder = codecs.getincrementaldecoder(encoding)("ignore")
-                        decodedresult = u"".join(decoder.decode(c) for c in encodedresult)
-                        self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
-                        encodedresult = "".join(cencoder.encode(c) for c in s)
-                        cdecoder = _testcapi.codec_incrementaldecoder(encoding, "ignore")
-                        decodedresult = u"".join(cdecoder.decode(c) for c in encodedresult)
-                        self.assertEqual(decodedresult, s, "%r != %r (encoding=%r)" % (decodedresult, s, encoding))
-    def test_seek(self):
-        # all codecs should be able to encode these
-        s = u"%s\n%s\n" % (100*u"abc123", 100*u"def456")
-        for encoding in all_unicode_encodings:
-            if encoding == "idna": # FIXME: See SF bug #1163178
-                continue
-            if encoding in broken_unicode_with_streams:
-                continue
-            reader = codecs.getreader(encoding)(StringIO.StringIO(s.encode(encoding)))
-            for t in xrange(5):
-                # Test that calling seek resets the internal codec state and buffers
-                reader.seek(0, 0)
-                line = reader.readline()
-                self.assertEqual(s[:len(line)], line)
-    def test_bad_decode_args(self):
-        for encoding in all_unicode_encodings:
-            decoder = codecs.getdecoder(encoding)
-            self.assertRaises(TypeError, decoder)
-            if encoding not in ("idna", "punycode"):
-                self.assertRaises(TypeError, decoder, 42)
-    def test_bad_encode_args(self):
-        for encoding in all_unicode_encodings:
-            encoder = codecs.getencoder(encoding)
-            self.assertRaises(TypeError, encoder)
-    def test_encoding_map_type_initialized(self):
-        from encodings import cp1140
-        # This used to crash, we are only verifying there's no crash.
-        table_type = type(cp1140.encoding_table)
-        self.assertEqual(table_type, table_type)
-class BasicStrTest(unittest.TestCase):
-    def test_basics(self):
-        s = "abc123"
-        for encoding in all_string_encodings:
-            (bytes, size) = codecs.getencoder(encoding)(s)
-            self.assertEqual(size, len(s))
-            (chars, size) = codecs.getdecoder(encoding)(bytes)
-            self.assertEqual(chars, s, "%r != %r (encoding=%r)" % (chars, s, encoding))
-class CharmapTest(unittest.TestCase):
-    def test_decode_with_string_map(self):
-        self.assertEqual(
-            codecs.charmap_decode("\x00\x01\x02", "strict", u"abc"),
-            (u"abc", 3)
-        )
-        self.assertEqual(
-            codecs.charmap_decode("\x00\x01\x02", "replace", u"ab"),
-            (u"ab\ufffd", 3)
-        )
-        self.assertEqual(
-            codecs.charmap_decode("\x00\x01\x02", "replace", u"ab\ufffe"),
-            (u"ab\ufffd", 3)
-        )
-        self.assertEqual(
-            codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab"),
-            (u"ab", 3)
-        )
-        self.assertEqual(
-            codecs.charmap_decode("\x00\x01\x02", "ignore", u"ab\ufffe"),
-            (u"ab", 3)
-        )
-        allbytes = "".join(chr(i) for i in xrange(256))
-        self.assertEqual(
-            codecs.charmap_decode(allbytes, "ignore", u""),
-            (u"", len(allbytes))
-        )
-class WithStmtTest(unittest.TestCase):
-    def test_encodedfile(self):
-        f = StringIO.StringIO("\xc3\xbc")
-        with codecs.EncodedFile(f, "latin-1", "utf-8") as ef:
-            self.assertEqual(ef.read(), "\xfc")
-    def test_streamreaderwriter(self):
-        f = StringIO.StringIO("\xc3\xbc")
-        info = codecs.lookup("utf-8")
-        with codecs.StreamReaderWriter(f, info.streamreader,
-                                       info.streamwriter, 'strict') as srw:
-            self.assertEqual(srw.read(), u"\xfc")
-class BomTest(unittest.TestCase):
-    def test_seek0(self):
-        data = u"1234567890"
-        tests = ("utf-16",
-                 "utf-16-le",
-                 "utf-16-be",
-                 "utf-32",
-                 "utf-32-le",
-                 "utf-32-be")
-        for encoding in tests:
-            # Check if the BOM is written only once
-            with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
-                f.write(data)
-                f.write(data)
-                f.seek(0)
-                self.assertEqual(f.read(), data * 2)
-                f.seek(0)
-                self.assertEqual(f.read(), data * 2)
-            # Check that the BOM is written after a seek(0)
-            with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
-                f.write(data[0])
-                self.assertNotEqual(f.tell(), 0)
-                f.seek(0)
-                f.write(data)
-                f.seek(0)
-                self.assertEqual(f.read(), data)
-            # (StreamWriter) Check that the BOM is written after a seek(0)
-            with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
-                f.writer.write(data[0])
-                self.assertNotEqual(f.writer.tell(), 0)
-                f.writer.seek(0)
-                f.writer.write(data)
-                f.seek(0)
-                self.assertEqual(f.read(), data)
-            # Check that the BOM is not written after a seek() at a position
-            # different than the start
-            with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
-                f.write(data)
-                f.seek(f.tell())
-                f.write(data)
-                f.seek(0)
-                self.assertEqual(f.read(), data * 2)
-            # (StreamWriter) Check that the BOM is not written after a seek()
-            # at a position different than the start
-            with codecs.open(test_support.TESTFN, 'w+', encoding=encoding) as f:
-                f.writer.write(data)
-                f.writer.seek(f.writer.tell())
-                f.writer.write(data)
-                f.seek(0)
-                self.assertEqual(f.read(), data * 2)
-def test_main():
-    test_support.run_unittest(
-        UTF32Test,
-        UTF32LETest,
-        UTF32BETest,
-        UTF16Test,
-        UTF16LETest,
-        UTF16BETest,
-        UTF8Test,
-        UTF8SigTest,
-        UTF7Test,
-        UTF16ExTest,
-        ReadBufferTest,
-        CharBufferTest,
-        EscapeDecodeTest,
-        RecodingTest,
-        PunycodeTest,
-        UnicodeInternalTest,
-        NameprepTest,
-        IDNACodecTest,
-        CodecsModuleTest,
-        StreamReaderTest,
-        EncodedFileTest,
-        Str2StrTest,
-        BasicUnicodeTest,
-        BasicStrTest,
-        CharmapTest,
-        WithStmtTest,
-        BomTest,
-    )
-if __name__ == "__main__":
-    test_main()
diff --git a/lib-python/modified-2.7/test/test_ssl.py b/lib-python/modified-2.7/test/test_ssl.py
--- a/lib-python/modified-2.7/test/test_ssl.py
+++ b/lib-python/modified-2.7/test/test_ssl.py
@@ -105,7 +105,6 @@
             print "didn't raise TypeError"
         ssl.RAND_add("this is a random string", 75.0)
-    @test_support.impl_detail("obscure test")
     def test_parse_cert(self):
         # note that this uses an 'unofficial' function in _ssl.c,
         # provided solely for this test, to exercise the certificate
@@ -840,6 +839,8 @@
                 c = socket.socket()
                 c.connect((HOST, port))
+                # XXX why is it necessary?
+                test_support.gc_collect()
                     ssl_sock = ssl.wrap_socket(c)
                 except IOError:
diff --git a/lib-python/2.7/uuid.py b/lib-python/modified-2.7/uuid.py
copy from lib-python/2.7/uuid.py
copy to lib-python/modified-2.7/uuid.py
--- a/lib-python/2.7/uuid.py
+++ b/lib-python/modified-2.7/uuid.py
@@ -406,8 +406,12 @@
         if hasattr(lib, 'uuid_generate_random'):
             _uuid_generate_random = lib.uuid_generate_random
+            _uuid_generate_random.argtypes = [ctypes.c_char * 16]
+            _uuid_generate_random.restype = None
         if hasattr(lib, 'uuid_generate_time'):
             _uuid_generate_time = lib.uuid_generate_time
+            _uuid_generate_time.argtypes = [ctypes.c_char * 16]
+            _uuid_generate_time.restype = None
     # The uuid_generate_* functions are broken on MacOS X 10.5, as noted
     # in issue #8621 the function generates the same sequence of values
@@ -436,6 +440,9 @@
         lib = None
     _UuidCreate = getattr(lib, 'UuidCreateSequential',
                           getattr(lib, 'UuidCreate', None))
+    if _UuidCreate is not None:
+        _UuidCreate.argtypes = [ctypes.c_char * 16]
+        _UuidCreate.restype = ctypes.c_int
diff --git a/lib_pypy/_codecs_cn.py b/lib_pypy/_codecs_cn.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_codecs_cn.py
@@ -0,0 +1,7 @@
+# this getcodec() function supports any multibyte codec, although
+# for compatibility with CPython it should only be used for the
+# codecs from this module, i.e.:
+#     'gb2312', 'gbk', 'gb18030', 'hz'
+from _multibytecodec import __getcodec as getcodec
diff --git a/lib_pypy/_codecs_hk.py b/lib_pypy/_codecs_hk.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_codecs_hk.py
@@ -0,0 +1,7 @@
+# this getcodec() function supports any multibyte codec, although
+# for compatibility with CPython it should only be used for the
+# codecs from this module, i.e.:
+#    'big5hkscs'
+from _multibytecodec import __getcodec as getcodec
diff --git a/lib_pypy/_codecs_iso2022.py b/lib_pypy/_codecs_iso2022.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_codecs_iso2022.py
@@ -0,0 +1,8 @@
+# this getcodec() function supports any multibyte codec, although
+# for compatibility with CPython it should only be used for the
+# codecs from this module, i.e.:
+#    'iso2022_kr', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
+#    'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext'
+from _multibytecodec import __getcodec as getcodec
diff --git a/lib_pypy/_codecs_jp.py b/lib_pypy/_codecs_jp.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_codecs_jp.py
@@ -0,0 +1,8 @@
+# this getcodec() function supports any multibyte codec, although
+# for compatibility with CPython it should only be used for the
+# codecs from this module, i.e.:
+#    'shift_jis', 'cp932', 'euc_jp', 'shift_jis_2004',
+#    'euc_jis_2004', 'euc_jisx0213', 'shift_jisx0213'
+from _multibytecodec import __getcodec as getcodec
diff --git a/lib_pypy/_codecs_kr.py b/lib_pypy/_codecs_kr.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_codecs_kr.py
@@ -0,0 +1,7 @@
+# this getcodec() function supports any multibyte codec, although
+# for compatibility with CPython it should only be used for the
+# codecs from this module, i.e.:
+#    'euc_kr', 'cp949', 'johab'
+from _multibytecodec import __getcodec as getcodec
diff --git a/lib_pypy/_codecs_tw.py b/lib_pypy/_codecs_tw.py
new file mode 100644
--- /dev/null
+++ b/lib_pypy/_codecs_tw.py
@@ -0,0 +1,7 @@
+# this getcodec() function supports any multibyte codec, although
+# for compatibility with CPython it should only be used for the
+# codecs from this module, i.e.:
+#    'big5', 'cp950'
+from _multibytecodec import __getcodec as getcodec
diff --git a/lib_pypy/_sqlite3.py b/lib_pypy/_sqlite3.py
--- a/lib_pypy/_sqlite3.py
+++ b/lib_pypy/_sqlite3.py
@@ -180,9 +180,17 @@
 sqlite.sqlite3_libversion.argtypes = []
 sqlite.sqlite3_libversion.restype = c_char_p
 sqlite.sqlite3_open.argtypes = [c_char_p, c_void_p]
+sqlite.sqlite3_open.restype = c_int
 sqlite.sqlite3_prepare_v2.argtypes = [c_void_p, c_char_p, c_int, c_void_p, POINTER(c_char_p)]
+sqlite.sqlite3_prepare_v2.restype = c_int
 sqlite.sqlite3_column_decltype.argtypes = [c_void_p, c_int]
 sqlite.sqlite3_column_decltype.restype = c_char_p
+sqlite.sqlite3_step.argtypes = [c_void_p]
+sqlite.sqlite3_step.restype = c_int
+sqlite.sqlite3_reset.argtypes = [c_void_p]
+sqlite.sqlite3_reset.restype = c_int
+sqlite.sqlite3_column_count.argtypes = [c_void_p]
+sqlite.sqlite3_column_count.restype = c_int
 sqlite.sqlite3_result_blob.argtypes = [c_void_p, c_char_p, c_int, c_void_p]
 sqlite.sqlite3_result_int64.argtypes = [c_void_p, c_int64]
@@ -491,7 +499,7 @@
                 return callback(text1, text2)
             c_collation_callback = COLLATION(collation_callback)
-            self._collations[name] = collation_callback
+            self._collations[name] = c_collation_callback
         ret = sqlite.sqlite3_create_collation(self.db, name,
diff --git a/pypy/config/pypyoption.py b/pypy/config/pypyoption.py
--- a/pypy/config/pypyoption.py
+++ b/pypy/config/pypyoption.py
@@ -33,7 +33,7 @@
      "struct", "_hashlib", "_md5", "_sha", "_minimal_curses", "cStringIO",
      "thread", "itertools", "pyexpat", "_ssl", "cpyext", "array",
      "_bisect", "binascii", "_multiprocessing", '_warnings',
-     "_collections"]
+     "_collections", "_multibytecodec"]
 translation_modules = default_modules.copy()
diff --git a/pypy/config/support.py b/pypy/config/support.py
--- a/pypy/config/support.py
+++ b/pypy/config/support.py
@@ -2,13 +2,15 @@
 """ Some support code
-import re, sys, os
+import re, sys, os, subprocess
 def detect_number_of_processors(filename_or_file='/proc/cpuinfo'):
-    if sys.platform != 'linux2':
-        return 1    # implement me
     if os.environ.get('MAKEFLAGS'):
         return 1    # don't override MAKEFLAGS.  This will call 'make' without any '-j' option
+    if sys.platform == 'darwin':
+        return darwin_get_cpu_count()
+    elif sys.platform != 'linux2':
+        return 1    # implement me
         if isinstance(filename_or_file, str):
             f = open(filename_or_file, "r")
@@ -23,3 +25,12 @@
             return count
         return 1 # we really don't want to explode here, at worst we have 1
+def darwin_get_cpu_count(cmd = "/usr/sbin/sysctl hw.ncpu"):
+    try:
+        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True)
+        # 'hw.ncpu: 20'
+        count = proc.communicate()[0].rstrip()[8:]
+        return int(count)
+    except (OSError, ValueError):
+        return 1
diff --git a/pypy/config/test/test_support.py b/pypy/config/test/test_support.py
--- a/pypy/config/test/test_support.py
+++ b/pypy/config/test/test_support.py
@@ -1,6 +1,6 @@
 from cStringIO import StringIO
-from pypy.config.support import detect_number_of_processors
+from pypy.config import support
 import os, sys, py
 cpuinfo = """
@@ -39,15 +39,38 @@
         assert varname == 'MAKEFLAGS'
         return self._value
-def test_cpuinfo():
+def test_cpuinfo_linux():
     if sys.platform != 'linux2':
         py.test.skip("linux only")
     saved = os.environ
         os.environ = FakeEnviron(None)
-        assert detect_number_of_processors(StringIO(cpuinfo)) == 3
-        assert detect_number_of_processors('random crap that does not exist') == 1
+        assert support.detect_number_of_processors(StringIO(cpuinfo)) == 3
+        assert support.detect_number_of_processors('random crap that does not exist') == 1
         os.environ = FakeEnviron('-j2')
-        assert detect_number_of_processors(StringIO(cpuinfo)) == 1
+        assert support.detect_number_of_processors(StringIO(cpuinfo)) == 1
         os.environ = saved
+def test_cpuinfo_darwin():
+    if sys.platform != 'darwin':
+        py.test.skip('mac only')
+    saved_func = support.darwin_get_cpu_count
+    saved = os.environ
+    def count():
+        return 42
+    try:
+        support.darwin_get_cpu_count = count
+        os.environ = FakeEnviron(None)
+        assert support.detect_number_of_processors() == 42
+        os.environ = FakeEnviron('-j2')
+        assert support.detect_number_of_processors() == 1
+    finally:
+        os.environ = saved
+        support.darwin_get_cpu_count = saved_func
+def test_darwin_get_cpu_count():
+    if sys.platform != 'darwin':
+        py.test.skip('mac only')
+    assert support.darwin_get_cpu_count() > 0 # hopefully
+    assert support.darwin_get_cpu_count("false") == 1
diff --git a/pypy/config/translationoption.py b/pypy/config/translationoption.py
--- a/pypy/config/translationoption.py
+++ b/pypy/config/translationoption.py
@@ -164,9 +164,6 @@
     StrOption("linkerflags", "Specify flags for the linker (C backend only)",
-    BoolOption("force_make", "Force execution of makefile instead of"
-               " calling platform", cmdline="--force-make",
-               default=False, negation=False),
     IntOption("make_jobs", "Specify -j argument to make for compilation"
               " (C backend only)",
               cmdline="--make-jobs", default=detect_number_of_processors()),
diff --git a/pypy/doc/coding-guide.rst b/pypy/doc/coding-guide.rst
--- a/pypy/doc/coding-guide.rst
+++ b/pypy/doc/coding-guide.rst
@@ -560,12 +560,6 @@
 match an exception, as this will miss exceptions that are
 instances of subclasses.
-We are thinking about replacing ``OperationError`` with a
-family of common exception classes (e.g. ``AppKeyError``,
-``AppIndexError``...) so that we can more easily catch them.
-The generic ``AppError`` would stand for all other
-application-level classes.
 .. _`modules`:
diff --git a/pypy/doc/config/objspace.usemodules._multibytecodec.txt b/pypy/doc/config/objspace.usemodules._multibytecodec.txt
new file mode 100644
--- /dev/null
+++ b/pypy/doc/config/objspace.usemodules._multibytecodec.txt
@@ -0,0 +1,6 @@
+Use the '_multibytecodec' module.
+Used by the standard library to provide codecs for 'gb2312', 'gbk', 'gb18030',
+'hz', 'big5hkscs', 'iso2022_kr', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
+'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext', 'shift_jis', 'cp932',
+'euc_jp', 'shift_jis_2004', 'euc_jis_2004', 'euc_jisx0213', 'shift_jisx0213',
+'euc_kr', 'cp949', 'johab', 'big5', 'cp950'.
diff --git a/pypy/doc/config/translation.force_make.txt b/pypy/doc/config/translation.force_make.txt
deleted file mode 100644
--- a/pypy/doc/config/translation.force_make.txt
+++ /dev/null
@@ -1,1 +0,0 @@
-Force executing makefile instead of using platform.
diff --git a/pypy/doc/eventhistory.rst b/pypy/doc/eventhistory.rst
--- a/pypy/doc/eventhistory.rst
+++ b/pypy/doc/eventhistory.rst
@@ -267,7 +267,7 @@
 .. _`day 1`: http://codespeak.net/pipermail/pypy-dev/2005q2/002169.html
 .. _`day 2`: http://codespeak.net/pipermail/pypy-dev/2005q2/002171.html
 .. _`day 3`: http://codespeak.net/pipermail/pypy-dev/2005q2/002172.html
-.. _`pypy-dev`: http://codespeak.net/mailman/listinfo/pypy-dev
+.. _`pypy-dev`: http://python.org/mailman/listinfo/pypy-dev
 .. _EuroPython: http://europython.org 
 .. _`translation`: translation.html 
diff --git a/pypy/doc/extradoc.rst b/pypy/doc/extradoc.rst
--- a/pypy/doc/extradoc.rst
+++ b/pypy/doc/extradoc.rst
@@ -67,7 +67,7 @@
 .. _bibtex: https://bitbucket.org/pypy/extradoc/raw/tip/talk/bibtex.bib
 .. _`Allocation Removal by Partial Evaluation in a Tracing JIT`: http://codespeak.net/svn/pypy/extradoc/talk/pepm2011/bolz-allocation-removal.pdf
 .. _`Towards a Jitting VM for Prolog Execution`: http://www.stups.uni-duesseldorf.de/publications/bolz-prolog-jit.pdf
-.. _`High performance implementation of Python for CLI/.NET with JIT compiler generation for dynamic languages`: http://codespeak.net/svn/user/antocuni/phd/thesis/thesis.pdf
+.. _`High performance implementation of Python for CLI/.NET with JIT compiler generation for dynamic languages`: http://buildbot.pypy.org/misc/antocuni-thesis.pdf
 .. _`How to *not* write Virtual Machines for Dynamic Languages`: https://bitbucket.org/pypy/extradoc/raw/tip/talk/dyla2007/dyla.pdf
 .. _`Tracing the Meta-Level: PyPy's Tracing JIT Compiler`: https://bitbucket.org/pypy/extradoc/raw/tip/talk/icooolps2009/bolz-tracing-jit.pdf
 .. _`Faster than C#: Efficient Implementation of Dynamic Languages on .NET`: https://bitbucket.org/pypy/extradoc/raw/tip/talk/icooolps2009-dotnet/cli-jit.pdf
@@ -335,7 +335,7 @@
   Microsoft's Common Language Runtime (CLR) Intermediate Language (IL).
 * Tunes_ is not entirely unrelated.  The web site changed a lot, but a
-  snapshot of the `old Tunes Wiki`_ is available on codespeak; browsing
+  snapshot of the `old Tunes Wiki`_ is available; browsing
   through it is a lot of fun.
 .. _TraceMonkey: https://wiki.mozilla.org/JavaScript:TraceMonkey
@@ -355,4 +355,4 @@
 .. _`Dynamic Native Optimization of Native Interpreters`: http://people.csail.mit.edu/gregs/dynamorio.html
 .. _JikesRVM: http://jikesrvm.org/
 .. _Tunes: http://tunes.org
-.. _`old Tunes Wiki`: http://codespeak.net/cliki.tunes.org/
+.. _`old Tunes Wiki`: http://buildbot.pypy.org/misc/cliki.tunes.org/
diff --git a/pypy/doc/faq.rst b/pypy/doc/faq.rst
--- a/pypy/doc/faq.rst
+++ b/pypy/doc/faq.rst
@@ -162,7 +162,7 @@
 .. _`contact us`: index.html
-.. _`mailing list`: http://codespeak.net/mailman/listinfo/pypy-dev
+.. _`mailing list`: http://python.org/mailman/listinfo/pypy-dev
 OSError: ... cannot restore segment prot after reloc... Help?
diff --git a/pypy/doc/getting-started-dev.rst b/pypy/doc/getting-started-dev.rst
--- a/pypy/doc/getting-started-dev.rst
+++ b/pypy/doc/getting-started-dev.rst
@@ -369,7 +369,7 @@
 .. _`full Python interpreter`: getting-started-python.html
 .. _`the blog`: http://morepypy.blogspot.com
-.. _`pypy-dev mailing list`: http://codespeak.net/mailman/listinfo/pypy-dev
+.. _`pypy-dev mailing list`: http://python.org/mailman/listinfo/pypy-dev
 .. _`contact possibilities`: index.html
 .. _`py library`: http://pylib.org
diff --git a/pypy/doc/getting-started-python.rst b/pypy/doc/getting-started-python.rst
--- a/pypy/doc/getting-started-python.rst
+++ b/pypy/doc/getting-started-python.rst
@@ -217,23 +217,29 @@
 is "similar enough": some details of the system on which the translation
 occurred might be hard-coded in the executable.
-For installation purposes, note that the executable needs to be able to
-find its version of the Python standard library in the following three
-directories: ``lib-python/2.7``, ``lib-python/modified-2.7`` and
-``lib_pypy``.  They are located by "looking around" starting from the
-directory in which the executable resides.  The current logic is to try
-to find a ``PREFIX`` from which the directories
-``PREFIX/lib-python/2.7`` and ``PREFIX/lib-python/modified.2.7`` and
-``PREFIX/lib_pypy`` can all be found.  The prefixes that are tried are::
+PyPy dynamically finds the location of its libraries depending on the location
+of the executable.  The directory hierarchy of a typical PyPy installation
+looks like this::
-    .
-    ./lib/pypy1.5
-    ..
-    ../lib/pypy1.5
-    ../..
-    ../../lib/pypy-1.5
-    ../../..
-    etc.
+   ./bin/pypy
+   ./include/
+   ./lib_pypy/
+   ./lib-python/2.7
+   ./lib-python/modified-2.7
+   ./site-packages/
+The hierarchy shown above is relative to a PREFIX directory.  PREFIX is
+computed by starting from the directory where the executable resides, and
+"walking up" the filesystem until we find a directory containing ``lib_pypy``,
+``lib-python/2.7`` and ``lib-python/2.7.1``.
+The archives (.tar.bz2 or .zip) containing PyPy releases already contain the
+correct hierarchy, so to run PyPy it's enough to unpack the archive, and run
+the ``bin/pypy`` executable.
+To install PyPy system wide on unix-like systems, it is recommended to put the
+whole hierarchy alone (e.g. in ``/opt/pypy1.5``) and put a symlink to the
+``pypy`` executable into ``/usr/bin`` or ``/usr/local/bin``
 If the executable fails to find suitable libraries, it will report
 ``debug: WARNING: library path not found, using compiled-in sys.path``
diff --git a/pypy/doc/index-report.rst b/pypy/doc/index-report.rst
--- a/pypy/doc/index-report.rst
+++ b/pypy/doc/index-report.rst
@@ -99,7 +99,7 @@
 .. _`py-lib`: http://pylib.org/
 .. _`py.test`: http://pytest.org/
 .. _codespeak: http://codespeak.net/
-.. _`pypy-dev`: http://codespeak.net/mailman/listinfo/pypy-dev
+.. _`pypy-dev`: http://python.org/mailman/listinfo/pypy-dev
 Reports of 2006
diff --git a/pypy/doc/index.rst b/pypy/doc/index.rst
--- a/pypy/doc/index.rst
+++ b/pypy/doc/index.rst
@@ -52,8 +52,6 @@
 * `Mercurial commit mailing list`_: updates to code and
-* `Sprint mailing list`_: mailing list for organizing upcoming sprints. 
 * `Development bug/feature tracker`_: filing bugs and feature requests. 
 * **IRC channel #pypy on freenode**: Many of the core developers are hanging out 
@@ -76,9 +74,8 @@
 .. _`PyPy blog`: http://morepypy.blogspot.com/
 .. _`development bug/feature tracker`: https://codespeak.net/issue/pypy-dev/ 
 .. _here: http://tismerysoft.de/pypy/irc-logs/pypy
-.. _`sprint mailing list`: http://codespeak.net/mailman/listinfo/pypy-sprint 
-.. _`Mercurial commit mailing list`: http://codespeak.net/mailman/listinfo/pypy-svn
-.. _`development mailing list`: http://codespeak.net/mailman/listinfo/pypy-dev
+.. _`Mercurial commit mailing list`: http://python.org/mailman/listinfo/pypy-commit
+.. _`development mailing list`: http://python.org/mailman/listinfo/pypy-dev
 .. _`FAQ`: faq.html
 .. _`Getting Started`: getting-started.html
 .. _`Papers`: extradoc.html
diff --git a/pypy/doc/statistic/index.rst b/pypy/doc/statistic/index.rst
--- a/pypy/doc/statistic/index.rst
+++ b/pypy/doc/statistic/index.rst
@@ -63,5 +63,5 @@
 .. image:: webaccess.png
-.. _`pypy-dev`: http://codespeak.net/mailman/listinfo/pypy-svn
-.. _`pypy-svn`: http://codespeak.net/mailman/listinfo/pypy-dev
+.. _`pypy-dev`: http://python.org/mailman/listinfo/pypy-commit
+.. _`pypy-svn`: http://python.org/mailman/listinfo/pypy-dev
diff --git a/pypy/doc/translation.rst b/pypy/doc/translation.rst
--- a/pypy/doc/translation.rst
+++ b/pypy/doc/translation.rst
@@ -684,7 +684,7 @@
 .. _`Common Language Infrastructure`: http://www.ecma-international.org/publications/standards/Ecma-335.htm
 .. _`.NET`: http://www.microsoft.com/net/
 .. _Mono: http://www.mono-project.com/
-.. _`Master's thesis`: http://codespeak.net/~antocuni/Implementing%20Python%20in%20.NET.pdf
+.. _`Master's thesis`: http://buildbot.pypy.org/misc/Implementing%20Python%20in%20.NET.pdf
 .. _GenCLI: cli-backend.html
diff --git a/pypy/doc/video-index.rst b/pypy/doc/video-index.rst
--- a/pypy/doc/video-index.rst
+++ b/pypy/doc/video-index.rst
@@ -42,11 +42,11 @@
 Trailer: PyPy at the PyCon 2006
-130mb: http://wyvern.cs.uni-duesseldorf.de/torrent/pycon-trailer.avi.torrent
+130mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer.avi.torrent
-71mb: http://wyvern.cs.uni-duesseldorf.de/torrent/pycon-trailer-medium.avi.torrent
+71mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer-medium.avi.torrent
-50mb: http://wyvern.cs.uni-duesseldorf.de/torrent/pycon-trailer-320x240.avi.torrent
+50mb: http://buildbot.pypy.org/misc/torrent/pycon-trailer-320x240.avi.torrent
 .. image:: image/pycon-trailer.jpg
    :scale: 100
@@ -62,9 +62,9 @@
 Interview with Tim Peters
-440mb: http://wyvern.cs.uni-duesseldorf.de/torrent/interview-timpeters-v2.avi.torrent
+440mb: http://buildbot.pypy.org/misc/torrent/interview-timpeters-v2.avi.torrent
-138mb: http://wyvern.cs.uni-duesseldorf.de/torrent/interview-timpeters-320x240.avi.torrent
+138mb: http://buildbot.pypy.org/misc/torrent/interview-timpeters-320x240.avi.torrent
 .. image:: image/interview-timpeters.jpg
    :scale: 100
@@ -82,9 +82,9 @@
 Interview with Bob Ippolito
-155mb: http://wyvern.cs.uni-duesseldorf.de/torrent/interview-bobippolito-v2.avi.torrent
+155mb: http://buildbot.pypy.org/misc/torrent/interview-bobippolito-v2.avi.torrent
-50mb: http://wyvern.cs.uni-duesseldorf.de/torrent/interview-bobippolito-320x240.avi.torrent
+50mb: http://buildbot.pypy.org/misc/torrent/interview-bobippolito-320x240.avi.torrent
 .. image:: image/interview-bobippolito.jpg
    :scale: 100
@@ -102,9 +102,9 @@
 Introductory talk on PyPy
-430mb: http://wyvern.cs.uni-duesseldorf.de/torrent/introductory-talk-pycon-v1.avi.torrent
+430mb: http://buildbot.pypy.org/misc/torrent/introductory-talk-pycon-v1.avi.torrent
-166mb: http://wyvern.cs.uni-duesseldorf.de/torrent/introductory-talk-pycon-320x240.avi.torrent
+166mb: http://buildbot.pypy.org/misc/torrent/introductory-talk-pycon-320x240.avi.torrent
 .. image:: image/introductory-talk-pycon.jpg
    :scale: 100
@@ -125,9 +125,9 @@
 Talk on Agile Open Source Methods in the PyPy project
-395mb: http://wyvern.cs.uni-duesseldorf.de/torrent/agile-talk-v1.avi.torrent
+395mb: http://buildbot.pypy.org/misc/torrent/agile-talk-v1.avi.torrent
-153mb: http://wyvern.cs.uni-duesseldorf.de/torrent/agile-talk-320x240.avi.torrent
+153mb: http://buildbot.pypy.org/misc/torrent/agile-talk-320x240.avi.torrent
 .. image:: image/agile-talk.jpg
    :scale: 100
@@ -148,9 +148,9 @@
 PyPy Architecture session
-744mb: http://wyvern.cs.uni-duesseldorf.de/torrent/architecture-session-v1.avi.torrent
+744mb: http://buildbot.pypy.org/misc/torrent/architecture-session-v1.avi.torrent
-288mb: http://wyvern.cs.uni-duesseldorf.de/torrent/architecture-session-320x240.avi.torrent
+288mb: http://buildbot.pypy.org/misc/torrent/architecture-session-320x240.avi.torrent
 .. image:: image/architecture-session.jpg
    :scale: 100
@@ -171,9 +171,9 @@
 Sprint tutorial
-680mb: http://wyvern.cs.uni-duesseldorf.de/torrent/sprint-tutorial-v2.avi.torrent
+680mb: http://buildbot.pypy.org/misc/torrent/sprint-tutorial-v2.avi.torrent
-263mb: http://wyvern.cs.uni-duesseldorf.de/torrent/sprint-tutorial-320x240.avi.torrent
+263mb: http://buildbot.pypy.org/misc/torrent/sprint-tutorial-320x240.avi.torrent
 .. image:: image/sprint-tutorial.jpg
    :scale: 100
@@ -190,9 +190,9 @@
 Scripting .NET with IronPython by Jim Hugunin
-372mb: http://wyvern.cs.uni-duesseldorf.de/torrent/ironpython-talk-v2.avi.torrent
+372mb: http://buildbot.pypy.org/misc/torrent/ironpython-talk-v2.avi.torrent
-270mb: http://wyvern.cs.uni-duesseldorf.de/torrent/ironpython-talk-320x240.avi.torrent
+270mb: http://buildbot.pypy.org/misc/torrent/ironpython-talk-320x240.avi.torrent
 .. image:: image/ironpython.jpg
    :scale: 100
@@ -209,9 +209,9 @@
 Bram Cohen, founder and developer of BitTorrent
-509mb: http://wyvern.cs.uni-duesseldorf.de/torrent/bram-cohen-interview-v1.avi.torrent
+509mb: http://buildbot.pypy.org/misc/torrent/bram-cohen-interview-v1.avi.torrent
-370mb: http://wyvern.cs.uni-duesseldorf.de/torrent/bram-cohen-interview-320x240.avi.torrent
+370mb: http://buildbot.pypy.org/misc/torrent/bram-cohen-interview-320x240.avi.torrent
 .. image:: image/bram.jpg
    :scale: 100
@@ -226,9 +226,9 @@
 Keynote speech by Guido van Rossum on the new Python 2.5 features
-695mb: http://wyvern.cs.uni-duesseldorf.de/torrent/keynote-speech_guido-van-rossum_v1.avi.torrent
+695mb: http://buildbot.pypy.org/misc/torrent/keynote-speech_guido-van-rossum_v1.avi.torrent
-430mb: http://wyvern.cs.uni-duesseldorf.de/torrent/keynote-speech_guido-van-rossum_320x240.avi.torrent
+430mb: http://buildbot.pypy.org/misc/torrent/keynote-speech_guido-van-rossum_320x240.avi.torrent
 .. image:: image/guido.jpg
    :scale: 100
@@ -243,11 +243,11 @@
 Trailer: PyPy sprint at the University of Palma de Mallorca
-166mb: http://wyvern.cs.uni-duesseldorf.de/torrent/mallorca-trailer-v1.avi.torrent
+166mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-v1.avi.torrent
-88mb: http://wyvern.cs.uni-duesseldorf.de/torrent/mallorca-trailer-medium.avi.torrent
+88mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-medium.avi.torrent
-64mb: http://wyvern.cs.uni-duesseldorf.de/torrent/mallorca-trailer-320x240.avi.torrent
+64mb: http://buildbot.pypy.org/misc/torrent/mallorca-trailer-320x240.avi.torrent
 .. image:: image/mallorca-trailer.jpg
    :scale: 100
@@ -262,9 +262,9 @@
 Coding discussion of core developers Armin Rigo and Samuele Pedroni
-620mb: http://wyvern.cs.uni-duesseldorf.de/torrent/coding-discussion-v1.avi.torrent
+620mb: http://buildbot.pypy.org/misc/torrent/coding-discussion-v1.avi.torrent
-240mb: http://wyvern.cs.uni-duesseldorf.de/torrent/coding-discussion-320x240.avi.torrent
+240mb: http://buildbot.pypy.org/misc/torrent/coding-discussion-320x240.avi.torrent
 .. image:: image/coding-discussion.jpg
    :scale: 100
@@ -279,9 +279,9 @@
 PyPy technical talk at the University of Palma de Mallorca
-865mb: http://wyvern.cs.uni-duesseldorf.de/torrent/introductory-student-talk-v2.avi.torrent
+865mb: http://buildbot.pypy.org/misc/torrent/introductory-student-talk-v2.avi.torrent
-437mb: http://wyvern.cs.uni-duesseldorf.de/torrent/introductory-student-talk-320x240.avi.torrent
+437mb: http://buildbot.pypy.org/misc/torrent/introductory-student-talk-320x240.avi.torrent
 .. image:: image/introductory-student-talk.jpg
    :scale: 100
diff --git a/pypy/interpreter/argument.py b/pypy/interpreter/argument.py
--- a/pypy/interpreter/argument.py
+++ b/pypy/interpreter/argument.py
@@ -615,33 +615,42 @@
         self.num_kwds = nkwds
     def getmsg(self, fnname):
-        args = None
-        #args_w, kwds_w = args.unpack()
-        nargs = self.num_args + self.num_kwds
         n = self.expected_nargs
         if n == 0:
-            msg = "%s() takes no argument (%d given)" % (
+            msg = "%s() takes no arguments (%d given)" % (
-                nargs)
+                self.num_args + self.num_kwds)
             defcount = self.num_defaults
+            has_kwarg = self.has_kwarg
+            num_args = self.num_args
+            num_kwds = self.num_kwds
             if defcount == 0 and not self.has_vararg:
                 msg1 = "exactly"
+                if not has_kwarg:
+                    num_args += num_kwds
+                    num_kwds = 0
             elif not self.missing_args:
                 msg1 = "at most"
                 msg1 = "at least"
+                has_kwarg = False
                 n -= defcount
             if n == 1:
                 plural = ""
                 plural = "s"
-            msg = "%s() takes %s %d argument%s (%d given)" % (
+            if has_kwarg or num_kwds > 0:
+                msg2 = " non-keyword"
+            else:
+                msg2 = ""
+            msg = "%s() takes %s %d%s argument%s (%d given)" % (
+                msg2,
-                nargs)
+                num_args)
         return msg
 class ArgErrMultipleValues(ArgErr):
diff --git a/pypy/interpreter/baseobjspace.py b/pypy/interpreter/baseobjspace.py
--- a/pypy/interpreter/baseobjspace.py
+++ b/pypy/interpreter/baseobjspace.py
@@ -890,8 +890,7 @@
             w_res = self.call_args(w_func, args)
         except OperationError, e:
-            w_value = e.get_w_value(self)
-            ec.c_exception_trace(frame, w_value)
+            ec.c_exception_trace(frame, w_func)
         ec.c_return_trace(frame, w_func, args)
         return w_res
diff --git a/pypy/interpreter/executioncontext.py b/pypy/interpreter/executioncontext.py
--- a/pypy/interpreter/executioncontext.py
+++ b/pypy/interpreter/executioncontext.py
@@ -56,10 +56,10 @@
         frame.f_backref = self.topframeref
         self.topframeref = jit.virtual_ref(frame)
-    def leave(self, frame):
+    def leave(self, frame, w_exitvalue):
             if self.profilefunc:
-                self._trace(frame, 'leaveframe', self.space.w_None)
+                self._trace(frame, 'leaveframe', w_exitvalue)
             self.topframeref = frame.f_backref
diff --git a/pypy/interpreter/pyframe.py b/pypy/interpreter/pyframe.py
--- a/pypy/interpreter/pyframe.py
+++ b/pypy/interpreter/pyframe.py
@@ -138,6 +138,7 @@
                 not self.space.config.translating)
         executioncontext = self.space.getexecutioncontext()
+        w_exitvalue = self.space.w_None
@@ -166,7 +167,7 @@
             # allocating exception objects in some cases
             self.last_exception = None
-            executioncontext.leave(self)
+            executioncontext.leave(self, w_exitvalue)
         return w_exitvalue
     execute_frame.insert_stack_check_here = True
diff --git a/pypy/interpreter/test/test_argument.py b/pypy/interpreter/test/test_argument.py
--- a/pypy/interpreter/test/test_argument.py
+++ b/pypy/interpreter/test/test_argument.py
@@ -512,25 +512,34 @@
         # defaults_w, missing_args
         err = ArgErrCount(1, 0, 0, False, False, None, 0)
         s = err.getmsg('foo')
-        assert s == "foo() takes no argument (1 given)"
+        assert s == "foo() takes no arguments (1 given)"
         err = ArgErrCount(0, 0, 1, False, False, [], 1)
         s = err.getmsg('foo')
         assert s == "foo() takes exactly 1 argument (0 given)"
         err = ArgErrCount(3, 0, 2, False, False, [], 0)
         s = err.getmsg('foo')
         assert s == "foo() takes exactly 2 arguments (3 given)"
+        err = ArgErrCount(3, 0, 2, False, False, ['a'], 0)
+        s = err.getmsg('foo')
+        assert s == "foo() takes at most 2 arguments (3 given)"
         err = ArgErrCount(1, 0, 2, True, False, [], 1)
         s = err.getmsg('foo')
         assert s == "foo() takes at least 2 arguments (1 given)"
-        err = ArgErrCount(3, 0, 2, True, False, ['a'], 0)
-        s = err.getmsg('foo')
-        assert s == "foo() takes at most 2 arguments (3 given)"
         err = ArgErrCount(0, 1, 2, True, False, ['a'], 1)
         s = err.getmsg('foo')
-        assert s == "foo() takes at least 1 argument (1 given)"
+        assert s == "foo() takes at least 1 non-keyword argument (0 given)"
         err = ArgErrCount(2, 1, 1, False, True, [], 0)
         s = err.getmsg('foo')
-        assert s == "foo() takes exactly 1 argument (3 given)"
+        assert s == "foo() takes exactly 1 non-keyword argument (2 given)"
+        err = ArgErrCount(0, 1, 1, False, True, [], 1)
+        s = err.getmsg('foo')
+        assert s == "foo() takes exactly 1 non-keyword argument (0 given)"
+        err = ArgErrCount(0, 1, 1, True, True, [], 1)
+        s = err.getmsg('foo')
+        assert s == "foo() takes at least 1 non-keyword argument (0 given)"
+        err = ArgErrCount(2, 1, 1, False, True, ['a'], 0)
+        s = err.getmsg('foo')
+        assert s == "foo() takes at most 1 non-keyword argument (2 given)"
     def test_bad_type_for_star(self):
         space = self.space
@@ -565,15 +574,23 @@
 class AppTestArgument:
     def test_error_message(self):
         exc = raises(TypeError, (lambda a, b=2: 0), b=3)
-        assert exc.value.message == "<lambda>() takes at least 1 argument (1 given)"
+        assert exc.value.message == "<lambda>() takes at least 1 non-keyword argument (0 given)"
         exc = raises(TypeError, (lambda: 0), b=3)
-        assert exc.value.message == "<lambda>() takes no argument (1 given)"
+        assert exc.value.message == "<lambda>() takes no arguments (1 given)"
         exc = raises(TypeError, (lambda a, b: 0), 1, 2, 3, a=1)
         assert exc.value.message == "<lambda>() takes exactly 2 arguments (4 given)"
         exc = raises(TypeError, (lambda a, b=1: 0), 1, 2, 3, a=1)
-        assert exc.value.message == "<lambda>() takes at most 2 arguments (4 given)"
+        assert exc.value.message == "<lambda>() takes at most 2 non-keyword arguments (3 given)"
         exc = raises(TypeError, (lambda a, b=1, **kw: 0), 1, 2, 3)
-        assert exc.value.message == "<lambda>() takes at most 2 arguments (3 given)"
+        assert exc.value.message == "<lambda>() takes at most 2 non-keyword arguments (3 given)"
+        exc = raises(TypeError, (lambda a, b, c=3, **kw: 0), 1)
+        assert exc.value.message == "<lambda>() takes at least 2 arguments (1 given)"
+        exc = raises(TypeError, (lambda a, b, **kw: 0), 1)
+        assert exc.value.message == "<lambda>() takes exactly 2 non-keyword arguments (1 given)"
+        exc = raises(TypeError, (lambda a, b, c=3, **kw: 0), a=1)
+        assert exc.value.message == "<lambda>() takes at least 2 non-keyword arguments (0 given)"
+        exc = raises(TypeError, (lambda a, b, **kw: 0), a=1)
+        assert exc.value.message == "<lambda>() takes exactly 2 non-keyword arguments (0 given)"
 def make_arguments_for_translation(space, args_w, keywords_w={},
                                    w_stararg=None, w_starstararg=None):
diff --git a/pypy/interpreter/test/test_executioncontext.py b/pypy/interpreter/test/test_executioncontext.py
--- a/pypy/interpreter/test/test_executioncontext.py
+++ b/pypy/interpreter/test/test_executioncontext.py
@@ -324,3 +324,70 @@
         assert 'Called 1' in data
         assert 'Called 2' in data
+class AppTestProfile:
+    def test_return(self):
+        import sys
+        l = []
+        def profile(frame, event, arg):
+            l.append((event, arg))
+        def bar(x):
+            return 40 + x
+        sys.setprofile(profile)
+        bar(2)
+        sys.setprofile(None)
+        assert l == [('call', None),
+                     ('return', 42),
+                     ('c_call', sys.setprofile)], repr(l)
+    def test_c_return(self):
+        import sys
+        l = []
+        def profile(frame, event, arg):
+            l.append((event, arg))
+        sys.setprofile(profile)
+        max(2, 42)
+        sys.setprofile(None)
+        assert l == [('c_call', max),
+                     ('c_return', max),
+                     ('c_call', sys.setprofile)], repr(l)
+    def test_exception(self):
+        import sys
+        l = []
+        def profile(frame, event, arg):
+            l.append((event, arg))
+        def f():
+            raise ValueError("foo")
+        sys.setprofile(profile)
+        try:
+            f()
+        except ValueError:
+            pass
+        sys.setprofile(None)
+        assert l == [('call', None),
+                     ('return', None),
+                     ('c_call', sys.setprofile)], repr(l)
+    def test_c_exception(self):
+        import sys
+        l = []
+        def profile(frame, event, arg):
+            l.append((event, arg))
+        sys.setprofile(profile)
+        try:
+            divmod(5, 0)
+        except ZeroDivisionError:
+            pass
+        sys.setprofile(None)
+        assert l == [('c_call', divmod),
+                     ('c_exception', divmod),
+                     ('c_call', sys.setprofile)], repr(l)
diff --git a/pypy/interpreter/test/test_function.py b/pypy/interpreter/test/test_function.py
--- a/pypy/interpreter/test/test_function.py
+++ b/pypy/interpreter/test/test_function.py
@@ -98,6 +98,14 @@
             raises(TypeError, "dir.func_code = f.func_code")
             raises(TypeError, "list.append.im_func.func_code = f.func_code")
+    def test_set_module_to_name_eagerly(self):
+        skip("fails on PyPy but works on CPython.  Unsure we want to care")
+        exec '''if 1:
+            __name__ = "foo"
+            def f(): pass
+            __name__ = "bar"
+            assert f.__module__ == "foo"''' in {}
 class AppTestFunction:
     def test_simple_call(self):
diff --git a/pypy/jit/backend/arm/test/test_gc_integration.py b/pypy/jit/backend/arm/test/test_gc_integration.py
--- a/pypy/jit/backend/arm/test/test_gc_integration.py
+++ b/pypy/jit/backend/arm/test/test_gc_integration.py
@@ -69,6 +69,7 @@
         self.single_gcref_descr = GcPtrFieldDescr('', 0)
+    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/llsupport/gc.py b/pypy/jit/backend/llsupport/gc.py
--- a/pypy/jit/backend/llsupport/gc.py
+++ b/pypy/jit/backend/llsupport/gc.py
@@ -35,7 +35,7 @@
     def do_write_barrier(self, gcref_struct, gcref_newptr):
     def rewrite_assembler(self, cpu, operations):
-        pass
+        return operations
     def can_inline_malloc(self, descr):
         return False
     def can_inline_malloc_varsize(self, descr, num_elem):
@@ -772,6 +772,31 @@
+    def replace_constptrs_with_getfield_raw(self, cpu, newops, op):
+        # xxx some performance issue here
+        newargs = [None] * op.numargs()
+        needs_copy = False
+        for i in range(op.numargs()):
+            v = op.getarg(i)
+            newargs[i] = v
+            if isinstance(v, ConstPtr) and bool(v.value):
+                addr = self.gcrefs.get_address_of_gcref(v.value)
+                # ^^^even for non-movable objects, to record their presence
+                if rgc.can_move(v.value):
+                    box = BoxPtr(v.value)
+                    addr = cpu.cast_adr_to_int(addr)
+                    newops.append(ResOperation(rop.GETFIELD_RAW,
+                                               [ConstInt(addr)], box,
+                                               self.single_gcref_descr))
+                    newargs[i] = box
+                    needs_copy = True
+        #
+        if needs_copy:
+            return op.copy_and_change(op.getopnum(), args=newargs)
+        else:
+            return op
     def rewrite_assembler(self, cpu, operations):
         # Perform two kinds of rewrites in parallel:
@@ -794,19 +819,7 @@
             if op.getopnum() == rop.DEBUG_MERGE_POINT:
             # ---------- replace ConstPtrs with GETFIELD_RAW ----------
-            # xxx some performance issue here
-            for i in range(op.numargs()):
-                v = op.getarg(i)
-                if isinstance(v, ConstPtr) and bool(v.value):
-                    addr = self.gcrefs.get_address_of_gcref(v.value)
-                    # ^^^even for non-movable objects, to record their presence
-                    if rgc.can_move(v.value):
-                        box = BoxPtr(v.value)
-                        addr = cpu.cast_adr_to_int(addr)
-                        newops.append(ResOperation(rop.GETFIELD_RAW,
-                                                   [ConstInt(addr)], box,
-                                                   self.single_gcref_descr))
-                        op.setarg(i, box)
+            op = self.replace_constptrs_with_getfield_raw(cpu, newops, op)
             if op.is_malloc():
                 last_malloc = op.result
             elif op.can_malloc():
@@ -835,8 +848,7 @@
                         op = op.copy_and_change(rop.SETARRAYITEM_RAW)
             # ----------
-        del operations[:]
-        operations.extend(newops)
+        return newops
     def _gen_write_barrier(self, newops, v_base, v_value):
         args = [v_base, v_value]
diff --git a/pypy/jit/backend/llsupport/test/test_gc.py b/pypy/jit/backend/llsupport/test/test_gc.py
--- a/pypy/jit/backend/llsupport/test/test_gc.py
+++ b/pypy/jit/backend/llsupport/test/test_gc.py
@@ -6,6 +6,7 @@
 from pypy.jit.backend.llsupport.gc import *
 from pypy.jit.backend.llsupport import symbolic
 from pypy.jit.metainterp.gc import get_description
+from pypy.jit.metainterp.resoperation import get_deep_immutable_oplist
 from pypy.jit.tool.oparser import parse
 from pypy.rpython.lltypesystem.rclass import OBJECT, OBJECT_VTABLE
 from pypy.jit.metainterp.test.test_optimizeopt import equaloplists
@@ -413,7 +414,7 @@
             ResOperation(rop.DEBUG_MERGE_POINT, ['dummy', 2], None),
         gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.rewrite_assembler(None, operations)
+        operations = gc_ll_descr.rewrite_assembler(None, operations)
         assert len(operations) == 0
     def test_rewrite_assembler_1(self):
@@ -437,7 +438,8 @@
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
-        gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
+        operations = get_deep_immutable_oplist(operations)
+        operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
         assert len(operations) == 2
         assert operations[0].getopnum() == rop.GETFIELD_RAW
         assert operations[0].getarg(0) == ConstInt(43)
@@ -472,9 +474,10 @@
         gc_ll_descr = self.gc_ll_descr
         gc_ll_descr.gcrefs = MyFakeGCRefList()
         old_can_move = rgc.can_move
+        operations = get_deep_immutable_oplist(operations)
             rgc.can_move = lambda s: False
-            gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
+            operations = gc_ll_descr.rewrite_assembler(MyFakeCPU(), operations)
             rgc.can_move = old_can_move
         assert len(operations) == 1
@@ -496,7 +499,8 @@
         gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = get_deep_immutable_oplist(operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
         assert len(operations) == 2
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -520,7 +524,8 @@
         gc_ll_descr = self.gc_ll_descr
-        gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        operations = get_deep_immutable_oplist(operations)
+        operations = gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
         assert len(operations) == 2
         assert operations[0].getopnum() == rop.COND_CALL_GC_WB
@@ -552,8 +557,9 @@
         setfield_gc(p0, p1, descr=xdescr)
         """, namespace=locals())
-        self.gc_ll_descr.rewrite_assembler(self.fake_cpu, ops.operations)
-        equaloplists(ops.operations, expected.operations)
+        operations = get_deep_immutable_oplist(ops.operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        equaloplists(operations, expected.operations)
     def test_rewrite_assembler_initialization_store_2(self):
         S = lltype.GcStruct('S', ('parent', OBJECT),
@@ -576,8 +582,9 @@
         setfield_raw(p0, p1, descr=xdescr)
         """, namespace=locals())
-        self.gc_ll_descr.rewrite_assembler(self.fake_cpu, ops.operations)
-        equaloplists(ops.operations, expected.operations)
+        operations = get_deep_immutable_oplist(ops.operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        equaloplists(operations, expected.operations)
     def test_rewrite_assembler_initialization_store_3(self):
         A = lltype.GcArray(lltype.Ptr(lltype.GcStruct('S')))
@@ -594,8 +601,9 @@
         setarrayitem_gc(p0, 0, p1, descr=arraydescr)
         """, namespace=locals())
-        self.gc_ll_descr.rewrite_assembler(self.fake_cpu, ops.operations)
-        equaloplists(ops.operations, expected.operations)
+        operations = get_deep_immutable_oplist(ops.operations)
+        operations = self.gc_ll_descr.rewrite_assembler(self.fake_cpu, operations)
+        equaloplists(operations, expected.operations)
 class TestFrameworkMiniMark(TestFramework):
     gc = 'minimark'
diff --git a/pypy/jit/backend/model.py b/pypy/jit/backend/model.py
--- a/pypy/jit/backend/model.py
+++ b/pypy/jit/backend/model.py
@@ -58,12 +58,19 @@
         """Called once by the front-end when the program stops."""
     def compile_loop(self, inputargs, operations, looptoken, log=True):
         """Assemble the given loop.
         Should create and attach a fresh CompiledLoopToken to
         looptoken.compiled_loop_token and stick extra attributes
         on it to point to the compiled loop in assembler.
+        Optionally, return a ``ops_offset`` dictionary, which maps each operation
+        to its offset in the compiled code.  The ``ops_offset`` dictionary is then
+        used by the operation logger to print the offsets in the log.  The
+        offset representing the end of the last operation is stored in
+        ``ops_offset[None]``: note that this might not coincide with the end of
+        the loop, because usually in the loop footer there is code which does
+        not belong to any particular operation.
         raise NotImplementedError
@@ -71,9 +78,16 @@
                        original_loop_token, log=True):
         """Assemble the bridge.
         The FailDescr is the descr of the original guard that failed.
+        Optionally, return a ``ops_offset`` dictionary.  See the docstring of
+        ``compiled_loop`` for more informations about it.
         raise NotImplementedError
+    def dump_loop_token(self, looptoken):
+        """Print a disassembled version of looptoken to stdout"""
+        raise NotImplementedError
     def execute_token(self, looptoken):
         """Execute the generated code referenced by the looptoken.
         Returns the descr of the last executed operation: either the one
diff --git a/pypy/jit/backend/test/runner_test.py b/pypy/jit/backend/test/runner_test.py
--- a/pypy/jit/backend/test/runner_test.py
+++ b/pypy/jit/backend/test/runner_test.py
@@ -189,6 +189,8 @@
         wr_i1 = weakref.ref(i1)
         wr_guard = weakref.ref(operations[2])
         self.cpu.compile_loop(inputargs, operations, looptoken)
+        if hasattr(looptoken, '_x86_ops_offset'):
+            del looptoken._x86_ops_offset # else it's kept alive
         del i0, i1, i2
         del inputargs
         del operations
diff --git a/pypy/jit/backend/x86/assembler.py b/pypy/jit/backend/x86/assembler.py
--- a/pypy/jit/backend/x86/assembler.py
+++ b/pypy/jit/backend/x86/assembler.py
@@ -334,7 +334,7 @@
             operations = self._inject_debugging_code(looptoken, operations)
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
-        arglocs = regalloc.prepare_loop(inputargs, operations, looptoken)
+        arglocs, operations = regalloc.prepare_loop(inputargs, operations, looptoken)
         looptoken._x86_arglocs = arglocs
         bootstrappos = self.mc.get_relative_pos()
@@ -361,6 +361,13 @@
                                 frame_depth + param_depth)
+        ops_offset = self.mc.ops_offset
+        if not we_are_translated():
+            # used only by looptoken.dump() -- useful in tests
+            looptoken._x86_rawstart = rawstart
+            looptoken._x86_fullsize = fullsize
+            looptoken._x86_ops_offset = ops_offset
         looptoken._x86_bootstrap_code = rawstart + bootstrappos
         looptoken._x86_loop_code = rawstart + self.looppos
         looptoken._x86_direct_bootstrap_code = rawstart + directbootstrappos
@@ -370,6 +377,7 @@
             name = "Loop # %s: %s" % (looptoken.number, funcname)
                                                        rawstart, fullsize)
+        return ops_offset
     def assemble_bridge(self, faildescr, inputargs, operations,
                         original_loop_token, log):
@@ -397,8 +405,8 @@
                     [loc.assembler() for loc in faildescr._x86_debug_faillocs])
         regalloc = RegAlloc(self, self.cpu.translate_support_code)
         fail_depths = faildescr._x86_current_depths
-        regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
-                                operations)
+        operations = regalloc.prepare_bridge(fail_depths, inputargs, arglocs,
+                                             operations)
         stackadjustpos = self._patchable_stackadjust()
         frame_depth, param_depth = self._assemble(regalloc, operations)
@@ -419,12 +427,14 @@
             faildescr._x86_bridge_param_depth = param_depth
         # patch the jump from original guard
         self.patch_jump_for_descr(faildescr, rawstart)
+        ops_offset = self.mc.ops_offset
         # oprofile support
         if self.cpu.profile_agent is not None:
             name = "Bridge # %s: %s" % (descr_number, funcname)
                                                        rawstart, fullsize)
+        return ops_offset
     def write_pending_failure_recoveries(self):
         # for each pending guard, generate the code of the recovery stub
diff --git a/pypy/jit/backend/x86/codebuf.py b/pypy/jit/backend/x86/codebuf.py
--- a/pypy/jit/backend/x86/codebuf.py
+++ b/pypy/jit/backend/x86/codebuf.py
@@ -1,5 +1,7 @@
 from pypy.rpython.lltypesystem import lltype, rffi
 from pypy.rlib.rarithmetic import intmask
+from pypy.rlib.debug import debug_start, debug_print, debug_stop
+from pypy.rlib.debug import have_debug_prints
 from pypy.jit.backend.llsupport.asmmemmgr import BlockBuilderMixin
 from pypy.jit.backend.x86.rx86 import X86_32_CodeBuilder, X86_64_CodeBuilder
 from pypy.jit.backend.x86.regloc import LocationCodeBuilder
@@ -25,10 +27,19 @@
         # at [p-4:p] encode an absolute address that will need to be
         # made relative.
         self.relocations = []
+        #
+        # ResOperation --> offset in the assembly.
+        # ops_offset[None] represents the beginning of the code after the last op
+        # (i.e., the tail of the loop)
+        self.ops_offset = {}
     def add_pending_relocation(self):
+    def mark_op(self, op):
+        pos = self.get_relative_pos()
+        self.ops_offset[op] = pos
     def copy_to_raw_memory(self, addr):
         for reloc in self.relocations:
diff --git a/pypy/jit/backend/x86/regalloc.py b/pypy/jit/backend/x86/regalloc.py
--- a/pypy/jit/backend/x86/regalloc.py
+++ b/pypy/jit/backend/x86/regalloc.py
@@ -161,7 +161,7 @@
         self.fm = X86FrameManager()
         self.param_depth = 0
         cpu = self.assembler.cpu
-        cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
+        operations = cpu.gc_ll_descr.rewrite_assembler(cpu, operations)
         # compute longevity of variables
         longevity = compute_vars_longevity(inputargs, operations)
         self.longevity = longevity
@@ -170,20 +170,22 @@
                                   assembler = self.assembler)
         self.xrm = xmm_reg_mgr_cls(longevity, frame_manager = self.fm,
                                    assembler = self.assembler)
+        return operations
     def prepare_loop(self, inputargs, operations, looptoken):
-        self._prepare(inputargs, operations)
+        operations = self._prepare(inputargs, operations)
         jump = operations[-1]
         loop_consts = compute_loop_consts(inputargs, jump, looptoken)
         self.loop_consts = loop_consts
-        return self._process_inputargs(inputargs)
+        return self._process_inputargs(inputargs), operations
     def prepare_bridge(self, prev_depths, inputargs, arglocs, operations):
-        self._prepare(inputargs, operations)
+        operations = self._prepare(inputargs, operations)
         self.loop_consts = {}
         self._update_bindings(arglocs, inputargs)
         self.fm.frame_depth = prev_depths[0]
         self.param_depth = prev_depths[1]
+        return operations
     def reserve_param(self, n):
         self.param_depth = max(self.param_depth, n)
@@ -402,6 +404,7 @@
         #self.operations = operations
         while i < len(operations):
             op = operations[i]
+            self.assembler.mc.mark_op(op)
             self.rm.position = i
             self.xrm.position = i
             if op.has_no_side_effect() and op.result not in self.longevity:
@@ -422,6 +425,7 @@
             i += 1
         assert not self.rm.reg_bindings
         assert not self.xrm.reg_bindings
+        self.assembler.mc.mark_op(None) # end of the loop
     def loc(self, v):
diff --git a/pypy/jit/backend/x86/runner.py b/pypy/jit/backend/x86/runner.py
--- a/pypy/jit/backend/x86/runner.py
+++ b/pypy/jit/backend/x86/runner.py
@@ -60,16 +60,33 @@
+    def dump_loop_token(self, looptoken):
+        """
+        NOT_RPYTHON
+        """
+        from pypy.jit.backend.x86.tool.viewcode import machine_code_dump
+        data = []
+        label_list = [(offset, name) for name, offset in
+                      looptoken._x86_ops_offset.iteritems()]
+        label_list.sort()
+        addr = looptoken._x86_rawstart
+        src = rffi.cast(rffi.CCHARP, addr)
+        for p in range(looptoken._x86_fullsize):
+            data.append(src[p])
+        data = ''.join(data)
+        lines = machine_code_dump(data, addr, self.backend_name, label_list)
+        print ''.join(lines)
     def compile_loop(self, inputargs, operations, looptoken, log=True):
-        self.assembler.assemble_loop(inputargs, operations, looptoken,
-                                     log=log)
+        return self.assembler.assemble_loop(inputargs, operations, looptoken,
+                                            log=log)
     def compile_bridge(self, faildescr, inputargs, operations,
                        original_loop_token, log=True):
         clt = original_loop_token.compiled_loop_token
-        self.assembler.assemble_bridge(faildescr, inputargs, operations,
-                                       original_loop_token, log=log)
+        return self.assembler.assemble_bridge(faildescr, inputargs, operations,
+                                              original_loop_token, log=log)
     def set_future_value_int(self, index, intvalue):
         self.assembler.fail_boxes_int.setitem(index, intvalue)
@@ -164,7 +181,9 @@
         # positions invalidated
         looptoken.compiled_loop_token.invalidate_positions = []
 class CPU386(AbstractX86CPU):
+    backend_name = 'x86'
     WORD = 4
     NUM_REGS = 8
     CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.esi, regloc.edi]
@@ -180,6 +199,7 @@
     supports_longlong = False
 class CPU_X86_64(AbstractX86CPU):
+    backend_name = 'x86_64'
     WORD = 8
     NUM_REGS = 16
     CALLEE_SAVE_REGISTERS = [regloc.ebx, regloc.r12, regloc.r13, regloc.r14, regloc.r15]
diff --git a/pypy/jit/backend/x86/test/test_gc_integration.py b/pypy/jit/backend/x86/test/test_gc_integration.py
--- a/pypy/jit/backend/x86/test/test_gc_integration.py
+++ b/pypy/jit/backend/x86/test/test_gc_integration.py
@@ -54,7 +54,8 @@
         self.gcrefs = GcRefList()
         self.single_gcref_descr = GcPtrFieldDescr('', 0)
+    replace_constptrs_with_getfield_raw = GcLLDescr_framework.replace_constptrs_with_getfield_raw.im_func
     rewrite_assembler = GcLLDescr_framework.rewrite_assembler.im_func
 class TestRegallocDirectGcIntegration(object):
diff --git a/pypy/jit/backend/x86/test/test_runner.py b/pypy/jit/backend/x86/test/test_runner.py
--- a/pypy/jit/backend/x86/test/test_runner.py
+++ b/pypy/jit/backend/x86/test/test_runner.py
@@ -390,6 +390,29 @@
         res = self.cpu.get_latest_value_int(0)
         assert res == 20
+    def test_ops_offset(self):
+        from pypy.rlib import debug
+        i0 = BoxInt()
+        i1 = BoxInt()
+        i2 = BoxInt()
+        looptoken = LoopToken()
+        operations = [
+            ResOperation(rop.INT_ADD, [i0, ConstInt(1)], i1),
+            ResOperation(rop.INT_LE, [i1, ConstInt(9)], i2),
+            ResOperation(rop.JUMP, [i1], None, descr=looptoken),
+            ]
+        inputargs = [i0]
+        debug._log = dlog = debug.DebugLog()
+        ops_offset = self.cpu.compile_loop(inputargs, operations, looptoken)
+        debug._log = None
+        #
+        assert ops_offset is looptoken._x86_ops_offset
+        # getfield_raw/int_add/setfield_raw + ops + None
+        assert len(ops_offset) == 3 + len(operations) + 1
+        assert (ops_offset[operations[0]] <=
+                ops_offset[operations[1]] <=
+                ops_offset[operations[2]] <=
+                ops_offset[None])
 class TestDebuggingAssembler(object):
     def setup_method(self, meth):
diff --git a/pypy/jit/backend/x86/tool/__init__.py b/pypy/jit/backend/x86/tool/__init__.py
new file mode 100644
diff --git a/pypy/jit/backend/x86/tool/test/test_viewcode.py b/pypy/jit/backend/x86/tool/test/test_viewcode.py
new file mode 100644
--- /dev/null
+++ b/pypy/jit/backend/x86/tool/test/test_viewcode.py
@@ -0,0 +1,55 @@
+from cStringIO import StringIO
+from pypy.jit.backend.x86.tool.viewcode import format_code_dump_with_labels
+def test_format_code_dump_with_labels():
+    lines = StringIO("""
+aa00 <.data>:
+aa00: one
+aa01: two
+aa03: three
+aa04: for
+aa05: five
+aa06: six
+aa0c: seven
+aa12: eight
+    #
+    label_list = [(0x00, 'AAA'), (0x03, 'BBB'), (0x0c, 'CCC')]
+    lines = format_code_dump_with_labels(0xAA00, lines, label_list)
+    out = ''.join(lines)
+    assert out == """
+aa00 <.data>:
+aa00: one
+aa01: two
+aa03: three
+aa04: for
+aa05: five
+aa06: six
+aa0c: seven
+aa12: eight
+def test_format_code_dump_with_labels_no_labels():
+    input = """
+aa00 <.data>:
+aa00: one
+aa01: two
+aa03: three
+aa04: for
+aa05: five
+aa06: six
+aa0c: seven
+aa12: eight
+    lines = StringIO(input).readlines()
+    #
+    lines = format_code_dump_with_labels(0xAA00, lines, label_list=None)
+    out = ''.join(lines)
+    assert out.strip() == input
diff --git a/pypy/jit/backend/x86/tool/viewcode.py b/pypy/jit/backend/x86/tool/viewcode.py
--- a/pypy/jit/backend/x86/tool/viewcode.py
+++ b/pypy/jit/backend/x86/tool/viewcode.py
@@ -31,13 +31,14 @@
 if sys.platform == "win32":
     XXX   # lots more in Psyco
-def machine_code_dump(data, originaddr, backend_name):
+def machine_code_dump(data, originaddr, backend_name, label_list=None):
     objdump_backend_option = {
         'x86': 'i386',
         'x86_64': 'x86-64',
         'i386': 'i386',
     objdump = ('objdump -M %(backend)s -b binary -m i386 '
+               '--disassembler-options=intel-mnemonics '
                '--adjust-vma=%(origin)d -D %(file)s')
     f = open(tmpfile, 'wb')
@@ -50,7 +51,32 @@
     }, 'r')
     result = g.readlines()
-    return result[6:]   # drop some objdump cruft
+    lines = result[6:]   # drop some objdump cruft
+    return format_code_dump_with_labels(originaddr, lines, label_list)
+def format_code_dump_with_labels(originaddr, lines, label_list):
+    from pypy.rlib.rarithmetic import r_uint
+    if not label_list:
+        label_list = []
+    originaddr = r_uint(originaddr)
+    itlines = iter(lines)
+    yield itlines.next() # don't process the first line
+    for lbl_start, lbl_name in label_list:
+        for line in itlines:
+            addr, _ = line.split(':', 1)
+            addr = int(addr, 16)
+            if addr >= originaddr+lbl_start:
+                yield '\n'
+                if lbl_name is None:
+                    yield '--end of the loop--\n'
+                else:
+                    yield str(lbl_name) + '\n'
+                yield line
+                break
+            yield line
+    # yield all the remaining lines
+    for line in itlines:
+        yield line
 def load_symbols(filename):
     # the program that lists symbols, and the output it gives
@@ -134,6 +160,7 @@
     def disassemble(self):
         if not hasattr(self, 'text'):
             lines = machine_code_dump(self.data, self.addr, self.world.backend_name)
+            lines = list(lines)
             # instead of adding symbol names in the dumps we could
             # also make the 0xNNNNNNNN addresses be red and show the
             # symbol name when the mouse is over them
diff --git a/pypy/jit/metainterp/compile.py b/pypy/jit/metainterp/compile.py
--- a/pypy/jit/metainterp/compile.py
+++ b/pypy/jit/metainterp/compile.py
@@ -7,7 +7,7 @@
 from pypy.conftest import option
 from pypy.tool.sourcetools import func_with_new_name
-from pypy.jit.metainterp.resoperation import ResOperation, rop
+from pypy.jit.metainterp.resoperation import ResOperation, rop, get_deep_immutable_oplist
 from pypy.jit.metainterp.history import TreeLoop, Box, History, LoopToken
 from pypy.jit.metainterp.history import AbstractFailDescr, BoxInt
 from pypy.jit.metainterp.history import BoxPtr, BoxObj, BoxFloat, Const
@@ -73,7 +73,7 @@
             # test_memgr.py)
             if descr is not looptoken:
-            op.setdescr(None)    # clear reference, mostly for tests
+            op._descr = None    # clear reference, mostly for tests
             if not we_are_translated():
                 op._jumptarget_number = descr.number
     # record this looptoken on the QuasiImmut used in the code
@@ -156,20 +156,16 @@
     loop_token.number = n = globaldata.loopnumbering
     globaldata.loopnumbering += 1
-    metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n, type)
-    short = loop.token.short_preamble
-    if short:
-        metainterp_sd.logger_ops.log_short_preamble(short[-1].inputargs,
-                                                    short[-1].operations)
     if not we_are_translated():
         show_loop(metainterp_sd, loop)
+    operations = get_deep_immutable_oplist(loop.operations)
-        metainterp_sd.cpu.compile_loop(loop.inputargs, loop.operations,
-                                       loop.token)
+        ops_offset = metainterp_sd.cpu.compile_loop(loop.inputargs, operations,
+                                                    loop.token)
@@ -180,27 +176,37 @@
             loop._ignore_during_counting = True
     metainterp_sd.log("compiled new " + type)
+    #
+    metainterp_sd.logger_ops.log_loop(loop.inputargs, loop.operations, n, type, ops_offset)
+    short = loop.token.short_preamble
+    if short:
+        metainterp_sd.logger_ops.log_short_preamble(short[-1].inputargs,
+                                                    short[-1].operations)
+    #
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
 def send_bridge_to_backend(metainterp_sd, faildescr, inputargs, operations,
-    n = metainterp_sd.cpu.get_fail_descr_number(faildescr)
-    metainterp_sd.logger_ops.log_bridge(inputargs, operations, n)
     if not we_are_translated():
         TreeLoop.check_consistency_of(inputargs, operations)
+    operations = get_deep_immutable_oplist(operations)
-        metainterp_sd.cpu.compile_bridge(faildescr, inputargs, operations,
-                                         original_loop_token)
+        ops_offset = metainterp_sd.cpu.compile_bridge(faildescr, inputargs, operations,
+                                                      original_loop_token)
     if not we_are_translated():
     metainterp_sd.log("compiled new bridge")
+    #
+    n = metainterp_sd.cpu.get_fail_descr_number(faildescr)
+    metainterp_sd.logger_ops.log_bridge(inputargs, operations, n, ops_offset)
+    #
     if metainterp_sd.warmrunnerdesc is not None:    # for tests
@@ -685,6 +691,7 @@
         ResOperation(rop.FINISH, finishargs, None, descr=jd.portal_finishtoken)
+    operations = get_deep_immutable_oplist(operations)
     cpu.compile_loop(inputargs, operations, loop_token, log=False)
     if memory_manager is not None:    # for tests
diff --git a/pypy/jit/metainterp/history.py b/pypy/jit/metainterp/history.py
--- a/pypy/jit/metainterp/history.py
+++ b/pypy/jit/metainterp/history.py
@@ -785,6 +785,8 @@
     def repr_of_descr(self):
         return '<Loop%d>' % self.number
+    def dump(self):
+        self.compiled_loop_token.cpu.dump_loop_token(self)
 class TreeLoop(object):
     inputargs = None
diff --git a/pypy/jit/metainterp/logger.py b/pypy/jit/metainterp/logger.py
--- a/pypy/jit/metainterp/logger.py
+++ b/pypy/jit/metainterp/logger.py
@@ -14,33 +14,33 @@
         self.ts = metainterp_sd.cpu.ts
         self.guard_number = guard_number
-    def log_loop(self, inputargs, operations, number=0, type=None):
+    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
         if type is None:
-            self._log_operations(inputargs, operations)
+            self._log_operations(inputargs, operations, ops_offset)
             debug_print("# Loop", number, ":", type,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations)
+            self._log_operations(inputargs, operations, ops_offset)
-    def log_bridge(self, inputargs, operations, number=-1):
+    def log_bridge(self, inputargs, operations, number=-1, ops_offset=None):
         if number == -1:
-            self._log_operations(inputargs, operations)
+            self._log_operations(inputargs, operations, ops_offset)
             debug_print("# bridge out of Guard", number,
                         "with", len(operations), "ops")
-            self._log_operations(inputargs, operations)
+            self._log_operations(inputargs, operations, ops_offset)
     def log_short_preamble(self, inputargs, operations):
-        self._log_operations(inputargs, operations)
+        self._log_operations(inputargs, operations, ops_offset=None)
     def repr_of_descr(self, descr):
@@ -75,9 +75,11 @@
             return '?'
-    def _log_operations(self, inputargs, operations):
+    def _log_operations(self, inputargs, operations, ops_offset):
         if not have_debug_prints():
+        if ops_offset is None:
+            ops_offset = {}
         memo = {}
         if inputargs is not None:
             args = ", ".join([self.repr_of_arg(memo, arg) for arg in inputargs])
@@ -89,6 +91,11 @@
                 reclev = op.getarg(1).getint()
                 debug_print("debug_merge_point('%s', %s)" % (loc, reclev))
+            offset = ops_offset.get(op, -1)
+            if offset == -1:
+                s_offset = ""
+            else:
+                s_offset = "+%d: " % offset
             args = ", ".join([self.repr_of_arg(memo, op.getarg(i)) for i in range(op.numargs())])
             if op.result is not None:
                 res = self.repr_of_arg(memo, op.result) + " = "
@@ -108,8 +115,11 @@
                                               for arg in op.getfailargs()]) + ']'
                 fail_args = ''
-            debug_print(res + op.getopname() +
+            debug_print(s_offset + res + op.getopname() +
                         '(' + args + ')' + fail_args)
+        if ops_offset and None in ops_offset:
+            offset = ops_offset[None]
+            debug_print("+%d: --end of the loop--" % offset)
 def int_could_be_an_address(x):
diff --git a/pypy/jit/metainterp/optimizeopt/rewrite.py b/pypy/jit/metainterp/optimizeopt/rewrite.py
--- a/pypy/jit/metainterp/optimizeopt/rewrite.py
+++ b/pypy/jit/metainterp/optimizeopt/rewrite.py
@@ -15,7 +15,7 @@
     def reconstruct_for_next_iteration(self, optimizer, valuemap):
         return self
     def propagate_forward(self, op):
         args = self.optimizer.make_args_key(op)
         if self.find_rewritable_bool(op, args):
@@ -40,7 +40,7 @@
                     return False
         return self.is_emittable(op)
     def try_boolinvers(self, op, targs):
         oldop = self.optimizer.pure_operations.get(targs, None)
         if oldop is not None and oldop.getdescr() is op.getdescr():
@@ -69,7 +69,7 @@
             oldopnum = opboolreflex[op.getopnum()] # FIXME: add INT_ADD, INT_MUL
             targs = self.optimizer.make_args_key(ResOperation(oldopnum, [args[1], args[0]],
-                                                              None))            
+                                                              None))
             oldop = self.optimizer.pure_operations.get(targs, None)
             if oldop is not None and oldop.getdescr() is op.getdescr():
                 self.make_equal_to(op.result, self.getvalue(oldop.result))
@@ -80,7 +80,7 @@
             oldopnum = opboolinvers[opboolreflex[op.getopnum()]]
             targs = self.optimizer.make_args_key(ResOperation(oldopnum, [args[1], args[0]],
-                                                              None))            
+                                                              None))
             if self.try_boolinvers(op, targs):
                 return True
         except KeyError:
@@ -157,6 +157,15 @@
+    def optimize_UINT_FLOORDIV(self, op):
+        v1 = self.getvalue(op.getarg(0))
+        v2 = self.getvalue(op.getarg(1))
+        if v2.is_constant() and v2.box.getint() == 1:
+            self.make_equal_to(op.result, v1)
+        else:
+            self.emit_operation(op)
     def optimize_INT_LSHIFT(self, op):
         v1 = self.getvalue(op.getarg(0))
         v2 = self.getvalue(op.getarg(1))
@@ -322,7 +331,7 @@
         resvalue = self.getvalue(op.result)
         self.optimizer.loop_invariant_results[key] = resvalue
     def _optimize_nullness(self, op, box, expect_nonnull):
         value = self.getvalue(box)
         if value.is_nonnull():
@@ -381,7 +390,7 @@
 ##        if realclassbox is not None:
 ##            checkclassbox = self.optimizer.cpu.typedescr2classbox(op.descr)
 ##            result = self.optimizer.cpu.ts.subclassOf(self.optimizer.cpu,
-##                                                      realclassbox, 
+##                                                      realclassbox,
 ##                                                      checkclassbox)
 ##            self.make_constant_int(op.result, result)
 ##            return
diff --git a/pypy/jit/metainterp/resoperation.py b/pypy/jit/metainterp/resoperation.py
--- a/pypy/jit/metainterp/resoperation.py
+++ b/pypy/jit/metainterp/resoperation.py
@@ -626,3 +626,25 @@
     rop.PTR_EQ: rop.PTR_EQ,
     rop.PTR_NE: rop.PTR_NE,
+def get_deep_immutable_oplist(operations):
+    """
+    When not we_are_translated(), turns ``operations`` into a frozenlist and
+    monkey-patch its items to make sure they are not mutated.
+    When we_are_translated(), do nothing and just return the old list.
+    """
+    from pypy.tool.frozenlist import frozenlist
+    if we_are_translated():
+        return operations
+    #
+    def setarg(*args):
+        assert False, "operations cannot change at this point"
+    def setdescr(*args):
+        assert False, "operations cannot change at this point"
+    newops = frozenlist(operations)
+    for op in newops:
+        op.setarg = setarg
+        op.setdescr = setdescr
+    return newops
diff --git a/pypy/jit/metainterp/test/test_compile.py b/pypy/jit/metainterp/test/test_compile.py
--- a/pypy/jit/metainterp/test/test_compile.py
+++ b/pypy/jit/metainterp/test/test_compile.py
@@ -34,7 +34,7 @@
         self.seen.append((inputargs, operations, token))
 class FakeLogger(object):
-    def log_loop(self, inputargs, operations, number=0, type=None):
+    def log_loop(self, inputargs, operations, number=0, type=None, ops_offset=None):
 class FakeState(object):
diff --git a/pypy/jit/metainterp/test/test_logger.py b/pypy/jit/metainterp/test/test_logger.py
--- a/pypy/jit/metainterp/test/test_logger.py
+++ b/pypy/jit/metainterp/test/test_logger.py
@@ -31,10 +31,10 @@
     return log_stream.getvalue()
 class Logger(logger.Logger):
-    def log_loop(self, loop, namespace={}):
+    def log_loop(self, loop, namespace={}, ops_offset=None):
         self.namespace = namespace
         return capturing(logger.Logger.log_loop, self,
-                         loop.inputargs, loop.operations)
+                         loop.inputargs, loop.operations, ops_offset=ops_offset)
     def repr_of_descr(self, descr):
         for k, v in self.namespace.items():
@@ -178,3 +178,27 @@
         output = capturing(bare_logger.log_bridge, [], [], 3)
         assert output.splitlines()[0] == "# bridge out of Guard 3 with 0 ops"
+    def test_ops_offset(self):
+        inp = '''
+        [i0]
+        i1 = int_add(i0, 1)
+        i2 = int_mul(i1, 2)
+        jump(i2)
+        '''
+        loop = pure_parse(inp)
+        ops = loop.operations
+        ops_offset = {
+            ops[0]: 10,
+            ops[2]: 30,
+            None: 40
+            }
+        logger = Logger(self.make_metainterp_sd())
+        output = logger.log_loop(loop, ops_offset=ops_offset)
+        assert output.strip() == """
++10: i2 = int_add(i0, 1)
+i4 = int_mul(i2, 2)
++30: jump(i4)
++40: --end of the loop--
diff --git a/pypy/jit/metainterp/test/test_optimizeopt.py b/pypy/jit/metainterp/test/test_optimizeopt.py
--- a/pypy/jit/metainterp/test/test_optimizeopt.py
+++ b/pypy/jit/metainterp/test/test_optimizeopt.py
@@ -2843,6 +2843,18 @@
         self.optimize_loop(ops, expected)
+    def test_fold_partially_constant_uint_floordiv(self):
+        ops = """
+        [i0]
+        i1 = uint_floordiv(i0, 1)
+        jump(i1)
+        """
+        expected = """
+        [i0]
+        jump(i0)
+        """
+        self.optimize_loop(ops, expected)
     # ----------
 class TestLLtype(OptimizeOptTest, LLtypeMixin):
@@ -5746,7 +5758,7 @@
         expected = """
-        guard_not_invalidated() []        
+        guard_not_invalidated() []
diff --git a/pypy/jit/metainterp/test/test_resoperation.py b/pypy/jit/metainterp/test/test_resoperation.py
--- a/pypy/jit/metainterp/test/test_resoperation.py
+++ b/pypy/jit/metainterp/test/test_resoperation.py
@@ -68,3 +68,11 @@
     call = rop.ResOperation(rop.rop.CALL, ['a', 'b'], 'c', descr=mydescr)
     assert call.can_malloc()
     assert not rop.ResOperation(rop.rop.INT_ADD, ['a', 'b'], 'c').can_malloc()
+def test_get_deep_immutable_oplist():
+    ops = [rop.ResOperation(rop.rop.INT_ADD, ['a', 'b'], 'c')]
+    newops = rop.get_deep_immutable_oplist(ops)
+    py.test.raises(AttributeError, "newops.append('foobar')")
+    py.test.raises(TypeError, "newops[0] = 'foobar'")
+    py.test.raises(AssertionError, "newops[0].setarg(0, 'd')")
+    py.test.raises(AssertionError, "newops[0].setdescr('foobar')")
diff --git a/pypy/jit/metainterp/test/test_warmstate.py b/pypy/jit/metainterp/test/test_warmstate.py
--- a/pypy/jit/metainterp/test/test_warmstate.py
+++ b/pypy/jit/metainterp/test/test_warmstate.py
@@ -18,6 +18,7 @@
 def test_unwrap():
     S = lltype.GcStruct('S')
+    RS = lltype.Struct('S')
     p = lltype.malloc(S)
     po = lltype.cast_opaque_ptr(llmemory.GCREF, p)
     assert unwrap(lltype.Void, BoxInt(42)) is None
@@ -25,6 +26,7 @@
     assert unwrap(lltype.Char, BoxInt(42)) == chr(42)
     assert unwrap(lltype.Float, boxfloat(42.5)) == 42.5
     assert unwrap(lltype.Ptr(S), BoxPtr(po)) == p
+    assert unwrap(lltype.Ptr(RS), BoxInt(0)) == lltype.nullptr(RS)
 def test_wrap():
     def _is(box1, box2):
diff --git a/pypy/jit/metainterp/warmstate.py b/pypy/jit/metainterp/warmstate.py
--- a/pypy/jit/metainterp/warmstate.py
+++ b/pypy/jit/metainterp/warmstate.py
@@ -54,7 +54,10 @@
     if TYPE is lltype.Void:
         return None
     if isinstance(TYPE, lltype.Ptr):
-        return box.getref(TYPE)
+        if TYPE.TO._gckind == "gc":
+            return box.getref(TYPE)
+        else:
+            return llmemory.cast_adr_to_ptr(box.getaddr(), TYPE)
     if isinstance(TYPE, ootype.OOType):
         return box.getref(TYPE)
     if TYPE == lltype.Float:
@@ -578,7 +581,7 @@
             return entry_loop_token
         self.get_assembler_token = get_assembler_token
         get_location_ptr = self.jitdriver_sd._get_printable_location_ptr
         if get_location_ptr is None:
diff --git a/pypy/jit/tool/oparser.py b/pypy/jit/tool/oparser.py
--- a/pypy/jit/tool/oparser.py
+++ b/pypy/jit/tool/oparser.py
@@ -335,7 +335,7 @@
                 continue  # a comment or empty line
         base_indent, inpargs, newlines = self.parse_inpargs(newlines)
-        num, ops = self.parse_ops(base_indent, newlines, 0)
+        num, ops, last_offset = self.parse_ops(base_indent, newlines, 0)
         if num < len(newlines):
             raise ParseError("unexpected dedent at line: %s" % newlines[num])
         loop = ExtendedTreeLoop("loop")
@@ -343,11 +343,13 @@
         loop.token = self.looptoken
         loop.operations = ops
         loop.inputargs = inpargs
+        loop.last_offset = last_offset
         return loop
     def parse_ops(self, indent, lines, start):
         num = start
         ops = []
+        last_offset = None
         while num < len(lines):
             line = lines[num]
             if not line.startswith(" " * indent):
@@ -356,9 +358,25 @@
             elif line.startswith(" "*(indent + 1)):
                 raise ParseError("indentation not valid any more")
-                ops.append(self.parse_next_op(lines[num].strip()))
+                line = line.strip()
+                offset, line = self.parse_offset(line)
+                if line == '--end of the loop--':
+                    last_offset = offset
+                else:
+                    op = self.parse_next_op(line)
+                    if offset:
+                        op.offset = offset
+                    ops.append(op)
                 num += 1
-        return num, ops
+        return num, ops, last_offset
+    def parse_offset(self, line):
+        if line.startswith('+'):
+            # it begins with an offset, like: "+10: i1 = int_add(...)"
+            offset, _, line = line.partition(':')
+            offset = int(offset)
+            return offset, line.strip()
+        return None, line
     def parse_inpargs(self, lines):
         line = lines[0]
diff --git a/pypy/jit/tool/test/test_oparser.py b/pypy/jit/tool/test/test_oparser.py
--- a/pypy/jit/tool/test/test_oparser.py
+++ b/pypy/jit/tool/test/test_oparser.py
@@ -1,7 +1,7 @@
+import py
 from pypy.rpython.lltypesystem import lltype, llmemory
-from pypy.jit.tool.oparser import parse
+from pypy.jit.tool.oparser import parse, ParseError
 from pypy.jit.metainterp.resoperation import rop
 from pypy.jit.metainterp.history import AbstractDescr, BoxInt, LoopToken,\
@@ -203,3 +203,25 @@
     loop = parse(x, nonstrict=True)
     assert loop.inputargs == []
     assert loop.operations[0].getopname() == 'int_add'
+def test_offsets():
+    x = """
+    [i0, i1]
+    +10: i2 = int_add(i0, i1)
+    i3 = int_add(i2, 3)
+    """
+    #    +30: --end of the loop--
+    loop = parse(x)
+    assert loop.operations[0].offset == 10
+    assert not hasattr(loop.operations[1], 'offset')
+def test_last_offset():
+    x = """
+    [i0, i1]
+    +10: i2 = int_add(i0, i1)
+    i3 = int_add(i2, 3)
+    +30: --end of the loop--
+    """
+    loop = parse(x)
+    assert len(loop.operations) == 2
+    assert loop.last_offset == 30
diff --git a/pypy/module/_multibytecodec/__init__.py b/pypy/module/_multibytecodec/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/__init__.py
@@ -0,0 +1,21 @@
+from pypy.interpreter.mixedmodule import MixedModule 
+class Module(MixedModule):
+    interpleveldefs = {
+        # for compatibility this name is obscured, and should be called
+        # via the _codecs_*.py modules written in lib_pypy.
+        '__getcodec': 'interp_multibytecodec.getcodec',
+    }
+    appleveldefs = {
+        'MultibyteIncrementalEncoder':
+            'app_multibytecodec.MultibyteIncrementalEncoder',
+        'MultibyteIncrementalDecoder':
+            'app_multibytecodec.MultibyteIncrementalDecoder',
+        'MultibyteStreamReader':
+            'app_multibytecodec.MultibyteStreamReader',
+        'MultibyteStreamWriter':
+            'app_multibytecodec.MultibyteStreamWriter',
+    }
diff --git a/pypy/module/_multibytecodec/app_multibytecodec.py b/pypy/module/_multibytecodec/app_multibytecodec.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/app_multibytecodec.py
@@ -0,0 +1,34 @@
+# These classes are not supported so far.
+# My theory is that they are not widely used on CPython either, because
+# I found two bugs just by looking at their .c source: they always call
+# encreset() after a piece of data, even though I think it's wrong ---
+# it should be called only once at the end; and mbiencoder_reset() calls
+# decreset() instead of encreset().
+class MultibyteIncrementalEncoder(object):
+    def __init__(self, *args, **kwds):
+        raise LookupError(
+            "MultibyteIncrementalEncoder not implemented; "
+            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+class MultibyteIncrementalDecoder(object):
+    def __init__(self, *args, **kwds):
+        raise LookupError(
+            "MultibyteIncrementalDecoder not implemented; "
+            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+class MultibyteStreamReader(object):
+    def __init__(self, *args, **kwds):
+        raise LookupError(
+            "MultibyteStreamReader not implemented; "
+            "see pypy/module/_multibytecodec/app_multibytecodec.py")
+class MultibyteStreamWriter(object):
+    def __init__(self, *args, **kwds):
+        raise LookupError(
+            "MultibyteStreamWriter not implemented; "
+            "see pypy/module/_multibytecodec/app_multibytecodec.py")
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -0,0 +1,212 @@
+import py, sys
+from pypy.rpython.lltypesystem import lltype, rffi
+from pypy.translator.tool.cbuild import ExternalCompilationInfo
+from pypy.tool.autopath import pypydir
+class EncodeDecodeError(Exception):
+    def __init__(self, start, end, reason):
+        self.start = start
+        self.end = end
+        self.reason = reason
+    def __repr__(self):
+        return 'EncodeDecodeError(%r, %r, %r)' % (self.start, self.end,
+                                                  self.reason)
+srcdir = py.path.local(pypydir).join('translator', 'c')
+codecs = [
+    # _codecs_cn
+    'gb2312', 'gbk', 'gb18030', 'hz',
+    # _codecs_hk
+    'big5hkscs',
+    # _codecs_iso2022
+    'iso2022_kr', 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2',
+    'iso2022_jp_2004', 'iso2022_jp_3', 'iso2022_jp_ext',
+    # _codecs_jp
+    'shift_jis', 'cp932', 'euc_jp', 'shift_jis_2004',
+    'euc_jis_2004', 'euc_jisx0213', 'shift_jisx0213',
+    # _codecs_kr
+    'euc_kr', 'cp949', 'johab',
+    # _codecs_tw
+    'big5', 'cp950',
+eci = ExternalCompilationInfo(
+    separate_module_files = [
+        srcdir.join('src', 'cjkcodecs', '_codecs_cn.c'),
+        srcdir.join('src', 'cjkcodecs', '_codecs_hk.c'),
+        srcdir.join('src', 'cjkcodecs', '_codecs_iso2022.c'),
+        srcdir.join('src', 'cjkcodecs', '_codecs_jp.c'),
+        srcdir.join('src', 'cjkcodecs', '_codecs_kr.c'),
+        srcdir.join('src', 'cjkcodecs', '_codecs_tw.c'),
+        srcdir.join('src', 'cjkcodecs', 'multibytecodec.c'),
+    ],
+    includes = ['src/cjkcodecs/multibytecodec.h'],
+    include_dirs = [str(srcdir)],
+    export_symbols = [
+        "pypy_cjk_dec_init", "pypy_cjk_dec_free", "pypy_cjk_dec_chunk",
+        "pypy_cjk_dec_outbuf", "pypy_cjk_dec_outlen",
+        "pypy_cjk_dec_inbuf_remaining", "pypy_cjk_dec_inbuf_consumed",
+        "pypy_cjk_enc_init", "pypy_cjk_enc_free", "pypy_cjk_enc_chunk",
+        "pypy_cjk_enc_reset", "pypy_cjk_enc_outbuf", "pypy_cjk_enc_outlen",
+        "pypy_cjk_enc_inbuf_remaining", "pypy_cjk_enc_inbuf_consumed",
+    ] + ["pypy_cjkcodec_%s" % codec for codec in codecs],
+MBERR_TOOSMALL = -1  # insufficient output buffer space
+MBERR_TOOFEW   = -2  # incomplete input buffer
+MBERR_INTERNAL = -3  # internal runtime error
+MBERR_NOMEMORY = -4  # out of memory
+MULTIBYTECODEC_P = rffi.COpaquePtr('struct MultibyteCodec_s',
+                                   compilation_info=eci)
+def llexternal(*args, **kwds):
+    kwds.setdefault('compilation_info', eci)
+    kwds.setdefault('sandboxsafe', True)
+    kwds.setdefault('_nowrapper', True)
+    return rffi.llexternal(*args, **kwds)
+def getter_for(name):
+    return llexternal('pypy_cjkcodec_%s' % name, [], MULTIBYTECODEC_P)
+_codecs_getters = dict([(name, getter_for(name)) for name in codecs])
+assert len(_codecs_getters) == len(codecs)
+def getcodec(name):
+    getter = _codecs_getters[name]
+    return getter()
+# ____________________________________________________________
+# Decoding
+DECODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_dec_s', compilation_info=eci)
+pypy_cjk_dec_init = llexternal('pypy_cjk_dec_init',
+                               [MULTIBYTECODEC_P, rffi.CCHARP, rffi.SSIZE_T],
+                               DECODEBUF_P)
+pypy_cjk_dec_free = llexternal('pypy_cjk_dec_free', [DECODEBUF_P],
+                               lltype.Void)
+pypy_cjk_dec_chunk = llexternal('pypy_cjk_dec_chunk', [DECODEBUF_P],
+                                rffi.SSIZE_T)
+pypy_cjk_dec_outbuf = llexternal('pypy_cjk_dec_outbuf', [DECODEBUF_P],
+                                 rffi.CWCHARP)
+pypy_cjk_dec_outlen = llexternal('pypy_cjk_dec_outlen', [DECODEBUF_P],
+                                 rffi.SSIZE_T)
+pypy_cjk_dec_inbuf_remaining = llexternal('pypy_cjk_dec_inbuf_remaining',
+                                          [DECODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_dec_inbuf_consumed = llexternal('pypy_cjk_dec_inbuf_consumed',
+                                         [DECODEBUF_P], rffi.SSIZE_T)
+def decode(codec, stringdata):
+    inleft = len(stringdata)
+    inbuf = rffi.get_nonmovingbuffer(stringdata)
+    try:
+        decodebuf = pypy_cjk_dec_init(codec, inbuf, inleft)
+        if not decodebuf:
+            raise MemoryError
+        try:
+            r = pypy_cjk_dec_chunk(decodebuf)
+            if r != 0:
+                multibytecodec_decerror(decodebuf, r)
+                assert False
+            src = pypy_cjk_dec_outbuf(decodebuf)
+            length = pypy_cjk_dec_outlen(decodebuf)
+            return rffi.wcharpsize2unicode(src, length)
+        #
+        finally:
+            pypy_cjk_dec_free(decodebuf)
+    #
+    finally:
+        rffi.free_nonmovingbuffer(stringdata, inbuf)
+def multibytecodec_decerror(decodebuf, e):
+    if e > 0:
+        reason = "illegal multibyte sequence"
+        esize = e
+    elif e == MBERR_TOOFEW:
+        reason = "incomplete multibyte sequence"
+        esize = pypy_cjk_dec_inbuf_remaining(decodebuf)
+    elif e == MBERR_NOMEMORY:
+        raise MemoryError
+    else:
+        raise RuntimeError
+    #
+    # if errors == ERROR_REPLACE:...
+    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    start = pypy_cjk_dec_inbuf_consumed(decodebuf)
+    end = start + esize
+    if 1:  # errors == ERROR_STRICT:
+        raise EncodeDecodeError(start, end, reason)
+# ____________________________________________________________
+# Encoding
+ENCODEBUF_P = rffi.COpaquePtr('struct pypy_cjk_enc_s', compilation_info=eci)
+pypy_cjk_enc_init = llexternal('pypy_cjk_enc_init',
+                               [MULTIBYTECODEC_P, rffi.CWCHARP, rffi.SSIZE_T],
+                               ENCODEBUF_P)
+pypy_cjk_enc_free = llexternal('pypy_cjk_enc_free', [ENCODEBUF_P],
+                               lltype.Void)
+pypy_cjk_enc_chunk = llexternal('pypy_cjk_enc_chunk', [ENCODEBUF_P],
+                                rffi.SSIZE_T)
+pypy_cjk_enc_reset = llexternal('pypy_cjk_enc_reset', [ENCODEBUF_P],
+                                rffi.SSIZE_T)
+pypy_cjk_enc_outbuf = llexternal('pypy_cjk_enc_outbuf', [ENCODEBUF_P],
+                                 rffi.CCHARP)
+pypy_cjk_enc_outlen = llexternal('pypy_cjk_enc_outlen', [ENCODEBUF_P],
+                                 rffi.SSIZE_T)
+pypy_cjk_enc_inbuf_remaining = llexternal('pypy_cjk_enc_inbuf_remaining',
+                                          [ENCODEBUF_P], rffi.SSIZE_T)
+pypy_cjk_enc_inbuf_consumed = llexternal('pypy_cjk_enc_inbuf_consumed',
+                                         [ENCODEBUF_P], rffi.SSIZE_T)
+def encode(codec, unicodedata):
+    inleft = len(unicodedata)
+    inbuf = rffi.get_nonmoving_unicodebuffer(unicodedata)
+    try:
+        encodebuf = pypy_cjk_enc_init(codec, inbuf, inleft)
+        if not encodebuf:
+            raise MemoryError
+        try:
+            r = pypy_cjk_enc_chunk(encodebuf)
+            if r != 0:
+                multibytecodec_encerror(encodebuf, r)
+                assert False
+            r = pypy_cjk_enc_reset(encodebuf)
+            if r != 0:
+                multibytecodec_encerror(encodebuf, r)
+                assert False
+            src = pypy_cjk_enc_outbuf(encodebuf)
+            length = pypy_cjk_enc_outlen(encodebuf)
+            return rffi.charpsize2str(src, length)
+        #
+        finally:
+            pypy_cjk_enc_free(encodebuf)
+    #
+    finally:
+        rffi.free_nonmoving_unicodebuffer(unicodedata, inbuf)
+def multibytecodec_encerror(encodebuf, e):
+    if e > 0:
+        reason = "illegal multibyte sequence"
+        esize = e
+    elif e == MBERR_TOOFEW:
+        reason = "incomplete multibyte sequence"
+        esize = pypy_cjk_enc_inbuf_remaining(encodebuf)
+    elif e == MBERR_NOMEMORY:
+        raise MemoryError
+    else:
+        raise RuntimeError
+    #
+    # if errors == ERROR_REPLACE:...
+    # if errors == ERROR_IGNORE or errors == ERROR_REPLACE:...
+    start = pypy_cjk_enc_inbuf_consumed(encodebuf)
+    end = start + esize
+    if 1:  # errors == ERROR_STRICT:
+        raise EncodeDecodeError(start, end, reason)
diff --git a/pypy/module/_multibytecodec/interp_multibytecodec.py b/pypy/module/_multibytecodec/interp_multibytecodec.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/interp_multibytecodec.py
@@ -0,0 +1,79 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.gateway import ObjSpace, interp2app
+from pypy.interpreter.typedef import TypeDef
+from pypy.interpreter.error import OperationError
+from pypy.module._multibytecodec import c_codecs
+class MultibyteCodec(Wrappable):
+    def __init__(self, name, codec):
+        self.name = name
+        self.codec = codec
+    def decode(self, space, input, errors=None):
+        if errors is not None and errors != 'strict':
+            raise OperationError(space.w_NotImplementedError,    # XXX
+                                 space.wrap("errors='%s' in _multibytecodec"
+                                            % errors))
+        #
+        try:
+            output = c_codecs.decode(self.codec, input)
+        except c_codecs.EncodeDecodeError, e:
+            raise OperationError(
+                space.w_UnicodeDecodeError,
+                space.newtuple([
+                    space.wrap(self.name),
+                    space.wrap(input),
+                    space.wrap(e.start),
+                    space.wrap(e.end),
+                    space.wrap(e.reason)]))
+        except RuntimeError:
+            raise OperationError(space.w_RuntimeError,
+                                 space.wrap("internal codec error"))
+        return space.newtuple([space.wrap(output),
+                               space.wrap(len(input))])
+    decode.unwrap_spec = ['self', ObjSpace, str, 'str_or_None']
+    def encode(self, space, input, errors=None):
+        if errors is not None and errors != 'strict':
+            raise OperationError(space.w_NotImplementedError,    # XXX
+                                 space.wrap("errors='%s' in _multibytecodec"
+                                            % errors))
+        #
+        try:
+            output = c_codecs.encode(self.codec, input)
+        except c_codecs.EncodeDecodeError, e:
+            raise OperationError(
+                space.w_UnicodeEncodeError,
+                space.newtuple([
+                    space.wrap(self.name),
+                    space.wrap(input),
+                    space.wrap(e.start),
+                    space.wrap(e.end),
+                    space.wrap(e.reason)]))
+        except RuntimeError:
+            raise OperationError(space.w_RuntimeError,
+                                 space.wrap("internal codec error"))
+        return space.newtuple([space.wrap(output),
+                               space.wrap(len(input))])
+    encode.unwrap_spec = ['self', ObjSpace, unicode, 'str_or_None']
+MultibyteCodec.typedef = TypeDef(
+    'MultibyteCodec',
+    __module__ = '_multibytecodec',
+    decode = interp2app(MultibyteCodec.decode),
+    encode = interp2app(MultibyteCodec.encode),
+    )
+MultibyteCodec.typedef.acceptable_as_base_class = False
+def getcodec(space, name):
+    try:
+        codec = c_codecs.getcodec(name)
+    except KeyError:
+        raise OperationError(space.w_LookupError,
+                             space.wrap("no such codec is supported."))
+    return space.wrap(MultibyteCodec(name, codec))
+getcodec.unwrap_spec = [ObjSpace, str]
diff --git a/pypy/module/_multibytecodec/test/__init__.py b/pypy/module/_multibytecodec/test/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/__init__.py
@@ -0,0 +1,1 @@
diff --git a/pypy/module/_multibytecodec/test/test_app_codecs.py b/pypy/module/_multibytecodec/test/test_app_codecs.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_app_codecs.py
@@ -0,0 +1,56 @@
+from pypy.conftest import gettestobjspace
+class AppTestCodecs:
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_multibytecodec'])
+    def test_missing_codec(self):
+        import _codecs_cn
+        raises(LookupError, _codecs_cn.getcodec, "foobar")
+    def test_decode_hz(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("~{abc}")
+        assert r == (u'\u5f95\u6cef', 6)
+    def test_strict_error(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.decode("~{abc}", "strict")
+        assert r == (u'\u5f95\u6cef', 6)
+        assert type(r[0]) is unicode
+    def test_decode_hz_error(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        e = raises(UnicodeDecodeError, codec.decode, "~{}").value
+        assert e.args == ('hz', '~{}', 2, 3, 'incomplete multibyte sequence')
+        assert e.encoding == 'hz'
+        assert e.object == '~{}' and type(e.object) is str
+        assert e.start == 2
+        assert e.end == 3
+        assert e.reason == "incomplete multibyte sequence"
+        #
+        e = raises(UnicodeDecodeError, codec.decode, "~{xyz}").value
+        assert e.args == ('hz', '~{xyz}', 2, 4, 'illegal multibyte sequence')
+    def test_encode_hz(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        r = codec.encode(u'\u5f95\u6cef')
+        assert r == ('~{abc}~}', 2)
+        assert type(r[0]) is str
+    def test_encode_hz_error(self):
+        import _codecs_cn
+        codec = _codecs_cn.getcodec("hz")
+        u = u'abc\u1234def'
+        e = raises(UnicodeEncodeError, codec.encode, u).value
+        assert e.args == ('hz', u, 3, 4, 'illegal multibyte sequence')
+        assert e.encoding == 'hz'
+        assert e.object == u and type(e.object) is unicode
+        assert e.start == 3
+        assert e.end == 4
+        assert e.reason == 'illegal multibyte sequence'
diff --git a/pypy/module/_multibytecodec/test/test_c_codecs.py b/pypy/module/_multibytecodec/test/test_c_codecs.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_c_codecs.py
@@ -0,0 +1,57 @@
+import py
+from pypy.module._multibytecodec.c_codecs import getcodec, codecs
+from pypy.module._multibytecodec.c_codecs import decode, encode
+from pypy.module._multibytecodec.c_codecs import EncodeDecodeError
+def test_codecs_existence():
+    for name in codecs:
+        c = getcodec(name)
+        assert c
+    py.test.raises(KeyError, getcodec, "foobar")
+def test_decode_gbk():
+    c = getcodec("gbk")
+    u = decode(c, "\xA1\xAA")
+    assert u == unichr(0x2014)
+    u = decode(c, "foobar")
+    assert u == u"foobar"
+def test_decode_hz():
+    # stateful
+    c = getcodec("hz")
+    u = decode(c, "~{abc}")
+    assert u == u'\u5f95\u6cef'
+def test_decode_hz_error():
+    # error
+    c = getcodec("hz")
+    e = py.test.raises(EncodeDecodeError, decode, c, "~{}").value
+    assert e.start == 2
+    assert e.end == 3
+    assert e.reason == "incomplete multibyte sequence"
+    #
+    e = py.test.raises(EncodeDecodeError, decode, c, "~{xyz}").value
+    assert e.start == 2
+    assert e.end == 4
+    assert e.reason == "illegal multibyte sequence"
+def test_encode_hz():
+    c = getcodec("hz")
+    s = encode(c, u'foobar')
+    assert s == 'foobar' and type(s) is str
+    s = encode(c, u'\u5f95\u6cef')
+    assert s == '~{abc}~}'
+def test_encode_hz_error():
+    # error
+    c = getcodec("hz")
+    e = py.test.raises(EncodeDecodeError, encode, c, u'abc\u1234def').value
+    assert e.start == 3
+    assert e.end == 4
+    assert e.reason == "illegal multibyte sequence"
+def test_encode_jisx0208():
+    c = getcodec('iso2022_jp')
+    s = encode(c, u'\u83ca\u5730\u6642\u592b')
+    assert s == '\x1b$B5FCO;~IW\x1b(B' and type(s) is str
diff --git a/pypy/module/_multibytecodec/test/test_translation.py b/pypy/module/_multibytecodec/test/test_translation.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_multibytecodec/test/test_translation.py
@@ -0,0 +1,20 @@
+from pypy.module._multibytecodec import c_codecs
+from pypy.translator.c.test import test_standalone
+class TestTranslation(test_standalone.StandaloneTests):
+    def test_translation(self):
+        #
+        def entry_point(argv):
+            codecname, string = argv[1], argv[2]
+            c = c_codecs.getcodec(codecname)
+            u = c_codecs.decode(c, string)
+            r = c_codecs.encode(c, u)
+            print r
+            return 0
+        #
+        t, cbuilder = self.compile(entry_point)
+        cmd = 'hz "~{abc}"'
+        data = cbuilder.cmdexec(cmd)
+        assert data == '~{abc}~}\n'
diff --git a/pypy/module/_socket/test/test_sock_app.py b/pypy/module/_socket/test/test_sock_app.py
--- a/pypy/module/_socket/test/test_sock_app.py
+++ b/pypy/module/_socket/test/test_sock_app.py
@@ -372,11 +372,12 @@
     def test_socket_connect(self):
         import _socket, os
         s = _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM, 0)
-        # XXX temporarily we use codespeak to test, will have more robust tests in
-        # the absence of a network connection later when more parts of the socket
-        # API are implemented. currently skip the test if there is no connection.
+        # XXX temporarily we use python.org to test, will have more robust tests
+        # in the absence of a network connection later when more parts of the
+        # socket API are implemented.  Currently skip the test if there is no
+        # connection.
-            s.connect(("codespeak.net", 80))
+            s.connect(("www.python.org", 80))
         except _socket.gaierror, ex:
             skip("GAIError - probably no connection: %s" % str(ex.args))
         name = s.getpeername() # Will raise socket.error if not connected
@@ -506,11 +507,12 @@
         # Test that send/sendall/sendto accept a buffer or a unicode as arg
         import _socket, os
         s = _socket.socket(_socket.AF_INET, _socket.SOCK_STREAM, 0)
-        # XXX temporarily we use codespeak to test, will have more robust tests in
-        # the absence of a network connection later when more parts of the socket
-        # API are implemented. currently skip the test if there is no connection.
+        # XXX temporarily we use python.org to test, will have more robust tests
+        # in the absence of a network connection later when more parts of the
+        # socket API are implemented.  Currently skip the test if there is no
+        # connection.
-            s.connect(("codespeak.net", 80))
+            s.connect(("www.python.org", 80))
         except _socket.gaierror, ex:
             skip("GAIError - probably no connection: %s" % str(ex.args))
diff --git a/pypy/module/_ssl/__init__.py b/pypy/module/_ssl/__init__.py
--- a/pypy/module/_ssl/__init__.py
+++ b/pypy/module/_ssl/__init__.py
@@ -7,6 +7,7 @@
     interpleveldefs = {
         'sslwrap': 'interp_ssl.sslwrap',
         'SSLError': 'interp_ssl.get_error(space)',
+        '_test_decode_cert': 'interp_ssl._test_decode_cert',
     appleveldefs = {
@@ -30,3 +31,5 @@
     def startup(self, space):
         from pypy.rlib.ropenssl import init_ssl
+        from pypy.module._ssl.interp_ssl import setup_ssl_threads
+        setup_ssl_threads()
diff --git a/pypy/module/_ssl/interp_ssl.py b/pypy/module/_ssl/interp_ssl.py
--- a/pypy/module/_ssl/interp_ssl.py
+++ b/pypy/module/_ssl/interp_ssl.py
@@ -4,6 +4,7 @@
 from pypy.interpreter.typedef import TypeDef
 from pypy.interpreter.gateway import interp2app, unwrap_spec
+from pypy.rlib.rarithmetic import intmask
 from pypy.rlib import rpoll, rsocket
 from pypy.rlib.ropenssl import *
@@ -68,11 +69,8 @@
 def ssl_error(space, msg, errno=0):
     w_exception_class = get_error(space)
-    if errno:
-        w_exception = space.call_function(w_exception_class,
-                                          space.wrap(errno), space.wrap(msg))
-    else:
-        w_exception = space.call_function(w_exception_class, space.wrap(msg))
+    w_exception = space.call_function(w_exception_class,
+                                      space.wrap(errno), space.wrap(msg))
     return OperationError(w_exception_class, w_exception)
@@ -169,10 +167,10 @@
         num_bytes = 0
         while True:
             err = 0
             num_bytes = libssl_SSL_write(self.ssl, data, len(data))
             err = libssl_SSL_get_error(self.ssl, num_bytes)
             if err == SSL_ERROR_WANT_READ:
                 sockstate = check_socket_and_wait_for_timeout(self.space,
                     self.w_socket, False)
@@ -181,24 +179,34 @@
                     self.w_socket, True)
                 sockstate = SOCKET_OPERATION_OK
             if sockstate == SOCKET_HAS_TIMED_OUT:
                 raise ssl_error(self.space, "The write operation timed out")
             elif sockstate == SOCKET_HAS_BEEN_CLOSED:
                 raise ssl_error(self.space, "Underlying socket has been closed.")
             elif sockstate == SOCKET_IS_NONBLOCKING:
             if err == SSL_ERROR_WANT_READ or err == SSL_ERROR_WANT_WRITE:
         if num_bytes > 0:
             return self.space.wrap(num_bytes)
             raise _ssl_seterror(self.space, self, num_bytes)
+    def pending(self):
+        """pending() -> count
+        Returns the number of already decrypted bytes available for read,
+        pending on the connection."""
+        count = libssl_SSL_pending(self.ssl)
+        if count < 0:
+            raise _ssl_seterror(self.space, self, count)
+        return self.space.wrap(count)
     def read(self, num_bytes=1024):
         """read([len]) -> string
@@ -369,18 +377,263 @@
         return self.w_socket
+    def cipher(self, space):
+        if not self.ssl:
+            return space.w_None
+        current = libssl_SSL_get_current_cipher(self.ssl)
+        if not current:
+            return space.w_None
+        name = libssl_SSL_CIPHER_get_name(current)
+        if name:
+            w_name = space.wrap(rffi.charp2str(name))
+        else:
+            w_name = space.w_None
+        proto = libssl_SSL_CIPHER_get_version(current)
+        if proto:
+            w_proto = space.wrap(rffi.charp2str(name))
+        else:
+            w_proto = space.w_None
+        bits = libssl_SSL_CIPHER_get_bits(current, 
+                                          lltype.nullptr(rffi.INTP.TO))
+        w_bits = space.newint(bits)
+        return space.newtuple([w_name, w_proto, w_bits])
+    @unwrap_spec(der=bool)
+    def peer_certificate(self, der=False):
+        """peer_certificate([der=False]) -> certificate
+        Returns the certificate for the peer.  If no certificate was provided,
+        returns None.  If a certificate was provided, but not validated, returns
+        an empty dictionary.  Otherwise returns a dict containing information
+        about the peer certificate.
+        If the optional argument is True, returns a DER-encoded copy of the
+        peer certificate, or None if no certificate was provided.  This will
+        return the certificate even if it wasn't validated."""
+        if not self.peer_cert:
+            return self.space.w_None
+        if der:
+            # return cert in DER-encoded format
+            with lltype.scoped_alloc(rffi.CCHARPP.TO, 1) as buf_ptr:
+                buf_ptr[0] = lltype.nullptr(rffi.CCHARP.TO)
+                length = libssl_i2d_X509(self.peer_cert, buf_ptr)
+                if length < 0:
+                    raise _ssl_seterror(self.space, self, length)
+                try:
+                    # this is actually an immutable bytes sequence
+                    return self.space.wrap(rffi.charp2str(buf_ptr[0]))
+                finally:
+                    libssl_OPENSSL_free(buf_ptr[0])
+        else:
+            verification = libssl_SSL_CTX_get_verify_mode(
+                libssl_SSL_get_SSL_CTX(self.ssl))
+            if not verification & SSL_VERIFY_PEER:
+                return self.space.newdict()
+            else:
+                return _decode_certificate(self.space, self.peer_cert)
+def _decode_certificate(space, certificate, verbose=False):
+    w_retval = space.newdict()
+    w_peer = _create_tuple_for_X509_NAME(
+        space, libssl_X509_get_subject_name(certificate))
+    space.setitem(w_retval, space.wrap("subject"), w_peer)
+    if verbose:
+        w_issuer = _create_tuple_for_X509_NAME(
+            space, libssl_X509_get_issuer_name(certificate))
+        space.setitem(w_retval, space.wrap("issuer"), w_issuer)
+        space.setitem(w_retval, space.wrap("version"),
+                      space.wrap(libssl_X509_get_version(certificate)))
+    biobuf = libssl_BIO_new(libssl_BIO_s_mem())
+    try:
+        if verbose:
+            libssl_BIO_reset(biobuf)
+            serialNumber = libssl_X509_get_serialNumber(certificate)
+            libssl_i2a_ASN1_INTEGER(biobuf, serialNumber)
+            # should not exceed 20 octets, 160 bits, so buf is big enough
+            with lltype.scoped_alloc(rffi.CCHARP.TO, 100) as buf:
+                length = libssl_BIO_gets(biobuf, buf, 99)
+                if length < 0:
+                    raise _ssl_seterror(space, None, length)
+                w_serial = space.wrap(rffi.charpsize2str(buf, length))
+            space.setitem(w_retval, space.wrap("serialNumber"), w_serial)
+            libssl_BIO_reset(biobuf)
+            notBefore = libssl_X509_get_notBefore(certificate)
+            libssl_ASN1_TIME_print(biobuf, notBefore)
+            with lltype.scoped_alloc(rffi.CCHARP.TO, 100) as buf:
+                length = libssl_BIO_gets(biobuf, buf, 99)
+                if length < 0:
+                    raise _ssl_seterror(space, None, length)
+                w_date = space.wrap(rffi.charpsize2str(buf, length))
+            space.setitem(w_retval, space.wrap("notBefore"), w_date)
+        libssl_BIO_reset(biobuf)
+        notAfter = libssl_X509_get_notAfter(certificate)
+        libssl_ASN1_TIME_print(biobuf, notAfter)
+        with lltype.scoped_alloc(rffi.CCHARP.TO, 100) as buf:
+            length = libssl_BIO_gets(biobuf, buf, 99)
+            if length < 0:
+                raise _ssl_seterror(space, None, length)
+            w_date = space.wrap(rffi.charpsize2str(buf, length))
+        space.setitem(w_retval, space.wrap("notAfter"), w_date)
+    finally:
+        libssl_BIO_free(biobuf)
+    # Now look for subjectAltName
+    w_alt_names = _get_peer_alt_names(space, certificate)
+    if w_alt_names is not space.w_None:
+        space.setitem(w_retval, space.wrap("subjectAltName"), w_alt_names)
+    return w_retval
+def _create_tuple_for_X509_NAME(space, xname):
+    entry_count = libssl_X509_NAME_entry_count(xname)
+    dn_w = []
+    rdn_w = []
+    rdn_level = -1
+    for index in range(entry_count):
+        entry = libssl_X509_NAME_get_entry(xname, index)
+        # check to see if we've gotten to a new RDN
+        entry_level = intmask(entry[0].c_set)
+        if rdn_level >= 0:
+            if rdn_level != entry_level:
+                # yes, new RDN
+                # add old RDN to DN
+                dn_w.append(space.newtuple(list(rdn_w)))
+                rdn_w = []
+        rdn_level = entry_level
+        # Now add this attribute to the current RDN
+        name = libssl_X509_NAME_ENTRY_get_object(entry)
+        value = libssl_X509_NAME_ENTRY_get_data(entry)
+        attr = _create_tuple_for_attribute(space, name, value)
+        rdn_w.append(attr)
+    # Now, there is typically a dangling RDN
+    if rdn_w:
+        dn_w.append(space.newtuple(list(rdn_w)))
+    return space.newtuple(list(dn_w))
+def _get_peer_alt_names(space, certificate):
+    # this code follows the procedure outlined in
+    # OpenSSL's crypto/x509v3/v3_prn.c:X509v3_EXT_print()
+    # function to extract the STACK_OF(GENERAL_NAME),
+    # then iterates through the stack to add the
+    # names.
+    if not certificate:
+        return space.w_None
+    # get a memory buffer
+    biobuf = libssl_BIO_new(libssl_BIO_s_mem())
+    try:
+        alt_names_w = []
+        i = 0
+        while True:
+            i = libssl_X509_get_ext_by_NID(
+                certificate, NID_subject_alt_name, i)
+            if i < 0:
+                break
+            # now decode the altName
+            ext = libssl_X509_get_ext(certificate, i)
+            method = libssl_X509V3_EXT_get(ext)
+            if not method:
+                raise ssl_error(space, 
+                                "No method for internalizing subjectAltName!'")
+            with lltype.scoped_alloc(rffi.CCHARPP.TO, 1) as p_ptr:
+                p_ptr[0] = ext[0].c_value.c_data
+                length = intmask(ext[0].c_value.c_length)
+                null = lltype.nullptr(rffi.VOIDP.TO)
+                if method[0].c_it:
+                    names = rffi.cast(GENERAL_NAMES, libssl_ASN1_item_d2i(
+                            null, p_ptr, length,
+                            libssl_ASN1_ITEM_ptr(method[0].c_it)))
+                else:
+                    names = rffi.cast(GENERAL_NAMES, method[0].c_d2i(
+                            null, p_ptr, length))
+            for j in range(libssl_sk_GENERAL_NAME_num(names)):
+                # Get a rendering of each name in the set of names
+                name = libssl_sk_GENERAL_NAME_value(names, j)
+                if intmask(name[0].c_type) == GEN_DIRNAME:
+                    # we special-case DirName as a tuple of tuples of attributes
+                    dirname = libssl_pypy_GENERAL_NAME_dirn(name)
+                    w_t = space.newtuple([
+                            space.wrap("DirName"),
+                            _create_tuple_for_X509_NAME(space, dirname)
+                            ])
+                else:
+                    # for everything else, we use the OpenSSL print form
+                    libssl_BIO_reset(biobuf)
+                    libssl_GENERAL_NAME_print(biobuf, name)
+                    with lltype.scoped_alloc(rffi.CCHARP.TO, 2048) as buf:
+                        length = libssl_BIO_gets(biobuf, buf, 2047)
+                        if length < 0:
+                            raise _ssl_seterror(space, None, 0)
+                        v = rffi.charpsize2str(buf, length)
+                    v1, v2 = v.split(':', 1)
+                    w_t = space.newtuple([space.wrap(v1),
+                                          space.wrap(v2)])
+                alt_names_w.append(w_t)
+    finally:
+        libssl_BIO_free(biobuf)
+    if alt_names_w:
+        return space.newtuple(list(alt_names_w))
+    else:
+        return space.w_None
+def _create_tuple_for_attribute(space, name, value):
+    with lltype.scoped_alloc(rffi.CCHARP.TO, X509_NAME_MAXLEN) as buf:
+        length = libssl_OBJ_obj2txt(buf, X509_NAME_MAXLEN, name, 0)
+        if length < 0:
+            raise _ssl_seterror(space, None, 0)
+        w_name = space.wrap(rffi.charpsize2str(buf, length))
+    with lltype.scoped_alloc(rffi.CCHARPP.TO, 1) as buf_ptr:
+        length = libssl_ASN1_STRING_to_UTF8(buf_ptr, value)
+        if length < 0:
+            raise _ssl_seterror(space, None, 0)
+        w_value = space.wrap(rffi.charpsize2str(buf_ptr[0], length))
+        w_value = space.call_method(w_value, "decode", space.wrap("utf-8"))
+    return space.newtuple([w_name, w_value])
 SSLObject.typedef = TypeDef("SSLObject",
     server = interp2app(SSLObject.server),
     issuer = interp2app(SSLObject.issuer),
     write = interp2app(SSLObject.write),
+    pending = interp2app(SSLObject.pending),
     read = interp2app(SSLObject.read),
-    do_handshake=interp2app(SSLObject.do_handshake),
-    shutdown=interp2app(SSLObject.shutdown),
+    do_handshake = interp2app(SSLObject.do_handshake),
+    shutdown = interp2app(SSLObject.shutdown),
+    cipher = interp2app(SSLObject.cipher),
+    peer_certificate = interp2app(SSLObject.peer_certificate),
-def new_sslobject(space, w_sock, side, w_key_file, w_cert_file):
+def new_sslobject(space, w_sock, side, w_key_file, w_cert_file,
+                  cert_mode, protocol, w_cacerts_file, w_ciphers):
     ss = SSLObject(space)
     sock_fd = space.int_w(space.call_method(w_sock, "fileno"))
@@ -397,18 +650,47 @@
         cert_file = None
         cert_file = space.str_w(w_cert_file)
+    if space.is_w(w_cacerts_file, space.w_None):
+        cacerts_file = None
+    else:
+        cacerts_file = space.str_w(w_cacerts_file)
+    if space.is_w(w_ciphers, space.w_None):
+        ciphers = None
+    else:
+        ciphers = space.str_w(w_ciphers)
     if side == PY_SSL_SERVER and (not key_file or not cert_file):
         raise ssl_error(space, "Both the key & certificate files "
                         "must be specified for server-side operation")
-    ss.ctx = libssl_SSL_CTX_new(libssl_SSLv23_method()) # set up context
+    # set up context
+    if protocol == PY_SSL_VERSION_TLS1:
+        method = libssl_TLSv1_method()
+    elif protocol == PY_SSL_VERSION_SSL3:
+        method = libssl_SSLv3_method()
+    elif protocol == PY_SSL_VERSION_SSL2:
+        method = libssl_SSLv2_method()
+    elif protocol == PY_SSL_VERSION_SSL23:
+        method = libssl_SSLv23_method()
+    else:
+        raise ssl_error(space, "Invalid SSL protocol variant specified")
+    ss.ctx = libssl_SSL_CTX_new(method)
     if not ss.ctx:
-        raise ssl_error(space, "Invalid SSL protocol variant specified")
+        raise ssl_error(space, "Could not create SSL context")
-    # XXX SSL_CTX_set_cipher_list?
+    if ciphers:
+        ret = libssl_SSL_CTX_set_cipher_list(ss.ctx, ciphers)
+        if ret == 0:
+            raise ssl_error(space, "No cipher can be selected.")
-    # XXX SSL_CTX_load_verify_locations?
+    if cert_mode != PY_SSL_CERT_NONE:
+        if not cacerts_file:
+            raise ssl_error(space,
+                            "No root certificates specified for "
+                            "verification of other-side certificates.")
+        ret = libssl_SSL_CTX_load_verify_locations(ss.ctx, cacerts_file, None)
+        if ret != 1:
+            raise _ssl_seterror(space, None, 0)
     if key_file:
         ret = libssl_SSL_CTX_use_PrivateKey_file(ss.ctx, key_file,
@@ -423,7 +705,12 @@
     # ssl compatibility
     libssl_SSL_CTX_set_options(ss.ctx, SSL_OP_ALL)
-    libssl_SSL_CTX_set_verify(ss.ctx, SSL_VERIFY_NONE, None) # set verify level
+    verification_mode = SSL_VERIFY_NONE
+    if cert_mode == PY_SSL_CERT_OPTIONAL:
+        verification_mode = SSL_VERIFY_PEER
+    elif cert_mode == PY_SSL_CERT_REQUIRED:
+        verification_mode = SSL_VERIFY_PEER | SSL_VERIFY_FAIL_IF_NO_PEER_CERT
+    libssl_SSL_CTX_set_verify(ss.ctx, verification_mode, None)
     ss.ssl = libssl_SSL_new(ss.ctx) # new ssl struct
     libssl_SSL_set_fd(ss.ssl, sock_fd) # set the socket for SSL
     libssl_SSL_set_mode(ss.ssl, SSL_MODE_AUTO_RETRY)
@@ -432,8 +719,8 @@
     # to non-blocking mode (blocking is the default)
     if has_timeout:
         # Set both the read and write BIO's to non-blocking mode
-        libssl_BIO_ctrl(libssl_SSL_get_rbio(ss.ssl), BIO_C_SET_NBIO, 1, None)
-        libssl_BIO_ctrl(libssl_SSL_get_wbio(ss.ssl), BIO_C_SET_NBIO, 1, None)
+        libssl_BIO_set_nbio(libssl_SSL_get_rbio(ss.ssl), 1)
+        libssl_BIO_set_nbio(libssl_SSL_get_wbio(ss.ssl), 1)
     if side == PY_SSL_CLIENT:
@@ -494,7 +781,10 @@
 def _ssl_seterror(space, ss, ret):
     assert ret <= 0
-    err = libssl_SSL_get_error(ss.ssl, ret)
+    if ss and ss.ssl:
+        err = libssl_SSL_get_error(ss.ssl, ret)
+    else:
+        err = SSL_ERROR_SSL
     errstr = ""
     errval = 0
@@ -546,10 +836,12 @@
 @unwrap_spec(side=int, cert_mode=int, protocol=int)
 def sslwrap(space, w_socket, side, w_key_file=None, w_cert_file=None,
             cert_mode=PY_SSL_CERT_NONE, protocol=PY_SSL_VERSION_SSL23,
-            w_cacerts_file=None, w_cipher=None):
+            w_cacerts_file=None, w_ciphers=None):
     """sslwrap(socket, side, [keyfile, certfile]) -> sslobject"""
     return space.wrap(new_sslobject(
-        space, w_socket, side, w_key_file, w_cert_file))
+        space, w_socket, side, w_key_file, w_cert_file,
+        cert_mode, protocol,
+        w_cacerts_file, w_ciphers))
 class Cache:
     def __init__(self, space):
@@ -559,3 +851,59 @@
 def get_error(space):
     return space.fromcache(Cache).w_error
+ at unwrap_spec(filename=str, verbose=bool)
+def _test_decode_cert(space, filename, verbose=True):
+    cert = libssl_BIO_new(libssl_BIO_s_file())
+    if not cert:
+        raise ssl_error(space, "Can't malloc memory to read file")
+    try:
+        if libssl_BIO_read_filename(cert, filename) <= 0:
+            raise ssl_error(space, "Can't open file")
+        x = libssl_PEM_read_bio_X509_AUX(cert, None, None, None)
+        if not x:
+            raise ssl_error(space, "Error decoding PEM-encoded file")
+        try:
+            return _decode_certificate(space, x, verbose)
+        finally:
+            libssl_X509_free(x)
+    finally:
+        libssl_BIO_free(cert)
+# this function is needed to perform locking on shared data
+# structures. (Note that OpenSSL uses a number of global data
+# structures that will be implicitly shared whenever multiple threads
+# use OpenSSL.) Multi-threaded applications will crash at random if
+# it is not set.
+# locking_function() must be able to handle up to CRYPTO_num_locks()
+# different mutex locks. It sets the n-th lock if mode & CRYPTO_LOCK, and
+# releases it otherwise.
+# filename and line are the file number of the function setting the
+# lock. They can be useful for debugging.
+_ssl_locks = []
+def _ssl_thread_locking_function(mode, n, filename, line):
+    n = intmask(n)
+    if n < 0 or n >= len(_ssl_locks):
+        return
+    if intmask(mode) & CRYPTO_LOCK:
+        _ssl_locks[n].acquire(True)
+    else:
+        _ssl_locks[n].release()
+def _ssl_thread_id_function():
+    from pypy.module.thread import ll_thread
+    return rffi.cast(rffi.INT, ll_thread.get_ident())
+def setup_ssl_threads():
+    from pypy.module.thread import ll_thread
+    for i in range(libssl_CRYPTO_num_locks()):
+        _ssl_locks.append(ll_thread.allocate_lock())
+    libssl_CRYPTO_set_locking_callback(_ssl_thread_locking_function)
+    libssl_CRYPTO_set_id_callback(_ssl_thread_id_function)
diff --git a/pypy/module/_ssl/test/test_ssl.py b/pypy/module/_ssl/test/test_ssl.py
--- a/pypy/module/_ssl/test/test_ssl.py
+++ b/pypy/module/_ssl/test/test_ssl.py
@@ -81,7 +81,7 @@
         ss = _ssl.sslwrap(s, 0)
         exc = raises(_ssl.SSLError, ss.write, "data")
-        assert exc.value.message == "Underlying socket has been closed."
+        assert exc.value.strerror == "Underlying socket has been closed."
 class AppTestConnectedSSL:
@@ -90,8 +90,8 @@
         cls.space = space
     def setup_method(self, method):
-        # https://codespeak.net/
-        ADDR = "codespeak.net", 443
+        # https://www.verisign.net/
+        ADDR = "www.verisign.net", 443
         self.w_s = self.space.appexec([self.space.wrap(ADDR)], """(ADDR):
             import socket
@@ -146,6 +146,7 @@
         data = ss.read(10)
         assert isinstance(data, str)
         assert len(data) == 10
+        assert ss.pending() > 50 # many more bytes to read
     def test_shutdown(self):
diff --git a/pypy/module/cpyext/__init__.py b/pypy/module/cpyext/__init__.py
--- a/pypy/module/cpyext/__init__.py
+++ b/pypy/module/cpyext/__init__.py
@@ -12,9 +12,21 @@
     appleveldefs = {
+    atexit_funcs = []
     def startup(self, space):
+    def register_atexit(self, function):
+        if len(self.atexit_funcs) >= 32:
+            raise ValueError("cannot register more than 32 atexit functions")
+        self.atexit_funcs.append(function)
+    def shutdown(self, space):
+        for func in self.atexit_funcs:
+            func()
 # import these modules to register api functions by side-effect
 import pypy.module.cpyext.thread
 import pypy.module.cpyext.pyobject
diff --git a/pypy/module/cpyext/import_.py b/pypy/module/cpyext/import_.py
--- a/pypy/module/cpyext/import_.py
+++ b/pypy/module/cpyext/import_.py
@@ -73,3 +73,10 @@
         w_mod = Module(space, space.wrap(modulename))
     return borrow_from(None, w_mod)
+ at cpython_api([], PyObject)
+def PyImport_GetModuleDict(space):
+    """Return the dictionary used for the module administration (a.k.a.
+    sys.modules).  Note that this is a per-interpreter variable."""
+    w_modulesDict = space.sys.get('modules')
+    return borrow_from(None, w_modulesDict)
diff --git a/pypy/module/cpyext/number.py b/pypy/module/cpyext/number.py
--- a/pypy/module/cpyext/number.py
+++ b/pypy/module/cpyext/number.py
@@ -40,8 +40,7 @@
 @cpython_api([PyObject], PyObject)
 def PyNumber_Int(space, w_obj):
     """Returns the o converted to an integer object on success, or NULL on failure.
-    If the argument is outside the integer range a long object will be returned
-    instead. This is the equivalent of the Python expression int(o)."""
+    This is the equivalent of the Python expression int(o)."""
     return space.int(w_obj)
 @cpython_api([PyObject], PyObject)
diff --git a/pypy/module/cpyext/pyfile.py b/pypy/module/cpyext/pyfile.py
--- a/pypy/module/cpyext/pyfile.py
+++ b/pypy/module/cpyext/pyfile.py
@@ -1,8 +1,7 @@
 from pypy.rpython.lltypesystem import rffi, lltype
 from pypy.module.cpyext.api import (
     cpython_api, CONST_STRING, FILEP, build_type_checkers)
-from pypy.module.cpyext.pyobject import (
-    PyObject)
+from pypy.module.cpyext.pyobject import PyObject, borrow_from
 from pypy.interpreter.error import OperationError
 from pypy.module._file.interp_file import W_File
@@ -66,3 +65,7 @@
     space.call_method(w_p, "write", w_s)
     return 0
+ at cpython_api([PyObject], PyObject)
+def PyFile_Name(space, w_p):
+    """Return the name of the file specified by p as a string object."""
+    return borrow_from(w_p, space.getattr(w_p, space.wrap("name")))
\ No newline at end of file
diff --git a/pypy/module/cpyext/pythonrun.py b/pypy/module/cpyext/pythonrun.py
--- a/pypy/module/cpyext/pythonrun.py
+++ b/pypy/module/cpyext/pythonrun.py
@@ -14,3 +14,21 @@
     return space.fromcache(State).get_programname()
+ at cpython_api([lltype.Ptr(lltype.FuncType([], lltype.Void))], rffi.INT_real, error=-1)
+def Py_AtExit(space, func_ptr):
+    """Register a cleanup function to be called by Py_Finalize().  The cleanup
+    function will be called with no arguments and should return no value.  At
+    most 32 cleanup functions can be registered.  When the registration is
+    successful, Py_AtExit() returns 0; on failure, it returns -1.  The cleanup
+    function registered last is called first. Each cleanup function will be
+    called at most once.  Since Python's internal finalization will have
+    completed before the cleanup function, no Python APIs should be called by
+    func."""
+    from pypy.module import cpyext
+    w_module = space.getbuiltinmodule('cpyext')
+    module = space.interp_w(cpyext.Module, w_module)
+    try:
+        module.register_atexit(func_ptr)
+    except ValueError:
+        return -1
+    return 0
diff --git a/pypy/module/cpyext/sequence.py b/pypy/module/cpyext/sequence.py
--- a/pypy/module/cpyext/sequence.py
+++ b/pypy/module/cpyext/sequence.py
@@ -36,7 +36,6 @@
 def PySequence_Length(space, w_obj):
     return space.len_w(w_obj)
 @cpython_api([PyObject, CONST_STRING], PyObject)
 def PySequence_Fast(space, w_obj, m):
     """Returns the sequence o as a tuple, unless it is already a tuple or list, in
@@ -96,10 +95,21 @@
     return 0
 @cpython_api([PyObject, Py_ssize_t], PyObject)
+def PySequence_ITEM(space, w_obj, i):
+    """Return the ith element of o or NULL on failure. Macro form of
+    PySequence_GetItem() but without checking that
+    PySequence_Check(o)() is true and without adjustment for negative
+    indices.
+    This function used an int type for i. This might require
+    changes in your code for properly supporting 64-bit systems."""
+    return space.getitem(w_obj, space.wrap(i))
+ at cpython_api([PyObject, Py_ssize_t], PyObject)
 def PySequence_GetItem(space, w_obj, i):
     """Return the ith element of o, or NULL on failure. This is the equivalent of
     the Python expression o[i]."""
-    return space.getitem(w_obj, space.wrap(i))
+    return PySequence_ITEM(space, w_obj, i)
 @cpython_api([PyObject], PyObject)
 def PySequence_List(space, w_obj):
@@ -154,3 +164,27 @@
     equivalent of the Python statement del o[i]."""
     space.delitem(w_o, space.wrap(i))
     return 0
+ at cpython_api([PyObject, PyObject], Py_ssize_t, error=-1)
+def PySequence_Index(space, w_seq, w_obj):
+    """Return the first index i for which o[i] == value.  On error, return
+    -1.    This is equivalent to the Python expression o.index(value).
+    This function returned an int type. This might require changes
+    in your code for properly supporting 64-bit systems."""
+    w_iter = space.iter(w_seq)
+    idx = 0
+    while True:
+        try:
+            w_next = space.next(w_iter)
+        except OperationError, e:
+            if e.match(space, space.w_StopIteration):
+                break
+            raise
+        if space.is_true(space.eq(w_next, w_obj)):
+            return idx
+        idx += 1
+    raise OperationError(space.w_ValueError, space.wrap(
+        "sequence.index(x): x not in sequence"))
diff --git a/pypy/module/cpyext/stubs.py b/pypy/module/cpyext/stubs.py
--- a/pypy/module/cpyext/stubs.py
+++ b/pypy/module/cpyext/stubs.py
@@ -47,7 +47,7 @@
     allows for complicated memory sharing possibilities, but some caller may
     not be able to handle all the complexity but may want to see if the
     exporter will let them take a simpler view to its memory.
     Some exporters may not be able to share memory in every possible way and
     may need to raise errors to signal to some consumers that something is
     just not possible. These errors should be a BufferError unless
@@ -55,17 +55,17 @@
     exporter can use flags information to simplify how much of the
     Py_buffer structure is filled in with non-default values and/or
     raise an error if the object can't support a simpler view of its memory.
     0 is returned on success and -1 on error.
     The following table gives possible values to the flags arguments.
     This is the default flag state.  The returned
     buffer may or may not have writable memory.  The
     format of the data will be assumed to be unsigned
@@ -73,14 +73,14 @@
     never needs to be '|'d to the others. The exporter
     will raise an error if it cannot provide such a
     contiguous buffer of bytes.
     The returned buffer must be writable.  If it is
     not writable, then raise an error.
     This implies PyBUF_ND. The returned
     buffer must provide strides information (i.e. the
     strides cannot be NULL). This would be used when
@@ -89,20 +89,20 @@
     you can handle shape.  The exporter can raise an
     error if a strided representation of the data is
     not possible (i.e. without the suboffsets).
     The returned buffer must provide shape
     information. The memory will be assumed C-style
     contiguous (last dimension varies the
     fastest). The exporter may raise an error if it
     cannot provide this kind of contiguous buffer. If
     this is not given then shape will be NULL.
     These flags indicate that the contiguity returned
     buffer must be respectively, C-contiguous (last
     dimension varies the fastest), Fortran contiguous
@@ -111,18 +111,18 @@
     PyBUF_STRIDES and guarantee that the
     strides buffer info structure will be filled in
     This flag indicates the returned buffer must have
     suboffsets information (which can be NULL if no
     suboffsets are needed).  This can be used when
     the consumer can handle indirect array
     referencing implied by these suboffsets. This
     implies PyBUF_STRIDES.
     The returned buffer must have true format
     information if this flag is provided. This would
     be used when the consumer is going to be checking
@@ -132,43 +132,43 @@
     explicitly requested then the format must be
     returned as NULL (which means 'B', or
     unsigned bytes)
     This is equivalent to (PyBUF_STRIDES |
     This is equivalent to (PyBUF_STRIDES).
     This is equivalent to (PyBUF_STRIDES |
     This is equivalent to (PyBUF_STRIDES |
     This is equivalent to (PyBUF_INDIRECT |
     This is equivalent to (PyBUF_INDIRECT |
     This is equivalent to (PyBUF_ND |
     This is equivalent to (PyBUF_ND)."""
     raise NotImplementedError
@@ -251,7 +251,7 @@
 def PyByteArray_FromObject(space, o):
     """Return a new bytearray object from any object, o, that implements the
     buffer protocol.
     XXX expand about the buffer protocol, at least somewhere"""
     raise NotImplementedError
@@ -354,7 +354,7 @@
 @cpython_api([PyObject], rffi.INT_real, error=-1)
 def PyCodec_Register(space, search_function):
     """Register a new codec search function.
     As side effect, this tries to load the encodings package, if not yet
     done, to make sure that it is always first in the list of search functions."""
     raise NotImplementedError
@@ -362,7 +362,7 @@
 @cpython_api([PyObject, rffi.CCHARP, rffi.CCHARP], PyObject)
 def PyCodec_Encode(space, object, encoding, errors):
     """Generic codec based encoding API.
     object is passed through the encoder function found for the given
     encoding using the error handling method defined by errors.  errors may
     be NULL to use the default method defined for the codec.  Raises a
@@ -372,7 +372,7 @@
 @cpython_api([PyObject, rffi.CCHARP, rffi.CCHARP], PyObject)
 def PyCodec_Decode(space, object, encoding, errors):
     """Generic codec based decoding API.
     object is passed through the decoder function found for the given
     encoding using the error handling method defined by errors.  errors may
     be NULL to use the default method defined for the codec.  Raises a
@@ -405,7 +405,7 @@
     This callback function will be called by a codec when it encounters
     unencodable characters/undecodable bytes and name is specified as the error
     parameter in the call to the encode/decode function.
     The callback gets a single argument, an instance of
     UnicodeEncodeError, UnicodeDecodeError or
     UnicodeTranslateError that holds information about the problematic
@@ -415,7 +415,7 @@
     containing the replacement for the problematic sequence, and an integer
     giving the offset in the original string at which encoding/decoding should be
     Return 0 on success, -1 on error."""
     raise NotImplementedError
@@ -500,18 +500,18 @@
     the set of strings accepted by Python's float() constructor,
     except that s must not have leading or trailing whitespace.
     The conversion is independent of the current locale.
     If endptr is NULL, convert the whole string.  Raise
     ValueError and return -1.0 if the string is not a valid
     representation of a floating-point number.
     If endptr is not NULL, convert as much of the string as
     possible and set *endptr to point to the first unconverted
     character.  If no initial segment of the string is the valid
     representation of a floating-point number, set *endptr to point
     to the beginning of the string, raise ValueError, and return
     If s represents a value that is too large to store in a float
     (for example, "1e500" is such a string on many platforms) then
     if overflow_exception is NULL return Py_HUGE_VAL (with
@@ -519,7 +519,7 @@
     overflow_exception must point to a Python exception object;
     raise that exception and return -1.0.  In both cases, set
     *endptr to point to the first character after the converted value.
     If any other error occurs during the conversion (for example an
     out-of-memory error), set the appropriate Python exception and
     return -1.0.
@@ -531,12 +531,12 @@
     """Convert a string to a double. This function behaves like the Standard C
     function strtod() does in the C locale. It does this without changing the
     current locale, since that would not be thread-safe.
     PyOS_ascii_strtod() should typically be used for reading configuration
     files or other non-user input that should be locale independent.
     See the Unix man page strtod(2) for details.
     Use PyOS_string_to_double() instead."""
     raise NotImplementedError
@@ -546,10 +546,10 @@
     separator. format is a printf()-style format string specifying the
     number format. Allowed conversion characters are 'e', 'E', 'f',
     'F', 'g' and 'G'.
     The return value is a pointer to buffer with the converted string or NULL if
     the conversion failed.
     This function is removed in Python 2.7 and 3.1.  Use PyOS_double_to_string()
     raise NotImplementedError
@@ -558,29 +558,29 @@
 def PyOS_double_to_string(space, val, format_code, precision, flags, ptype):
     """Convert a double val to a string using supplied
     format_code, precision, and flags.
     format_code must be one of 'e', 'E', 'f', 'F',
     'g', 'G' or 'r'.  For 'r', the supplied precision
     must be 0 and is ignored.  The 'r' format code specifies the
     standard repr() format.
     flags can be zero or more of the values Py_DTSF_SIGN,
     Py_DTSF_ADD_DOT_0, or Py_DTSF_ALT, or-ed together:
     Py_DTSF_SIGN means to always precede the returned string with a sign
     character, even if val is non-negative.
     Py_DTSF_ADD_DOT_0 means to ensure that the returned string will not look
     like an integer.
     Py_DTSF_ALT means to apply "alternate" formatting rules.  See the
     documentation for the PyOS_snprintf() '#' specifier for
     If ptype is non-NULL, then the value it points to will be set to one of
     Py_DTST_FINITE, Py_DTST_INFINITE, or Py_DTST_NAN, signifying that
     val is a finite number, an infinite number, or not a number, respectively.
     The return value is a pointer to buffer with the converted string or
     NULL if the conversion failed. The caller is responsible for freeing the
     returned string by calling PyMem_Free().
@@ -590,9 +590,9 @@
 @cpython_api([rffi.CCHARP], rffi.DOUBLE, error=CANNOT_FAIL)
 def PyOS_ascii_atof(space, nptr):
     """Convert a string to a double in a locale-independent way.
     See the Unix man page atof(2) for details.
     Use PyOS_string_to_double() instead."""
     raise NotImplementedError
@@ -683,7 +683,7 @@
     override is true, else the first wins. Return 0 on success or -1
     if an exception was raised. Equivalent Python (except for the return
     def PyDict_MergeFromSeq2(a, seq2, override):
         for key, value in seq2:
             if override or key not in a:
@@ -708,7 +708,7 @@
 def PyErr_SetExcFromWindowsErr(space, type, ierr):
     """Similar to PyErr_SetFromWindowsErr(), with an additional parameter
     specifying the exception type to be raised. Availability: Windows.
     Return value: always NULL."""
     raise NotImplementedError
@@ -724,7 +724,7 @@
 def PyErr_SetExcFromWindowsErrWithFilename(space, type, ierr, filename):
     """Similar to PyErr_SetFromWindowsErrWithFilename(), with an additional
     parameter specifying the exception type to be raised. Availability: Windows.
     Return value: always NULL."""
     raise NotImplementedError
@@ -815,15 +815,15 @@
 @cpython_api([rffi.CCHARP], rffi.INT_real, error=1)
 def Py_EnterRecursiveCall(space, where):
     """Marks a point where a recursive C-level call is about to be performed.
     If USE_STACKCHECK is defined, this function checks if the the OS
     stack overflowed using PyOS_CheckStack().  In this is the case, it
     sets a MemoryError and returns a nonzero value.
     The function then checks if the recursion limit is reached.  If this is the
     case, a RuntimeError is set and a nonzero value is returned.
     Otherwise, zero is returned.
     where should be a string such as " in instance check" to be
     concatenated to the RuntimeError message caused by the recursion depth
@@ -843,12 +843,12 @@
     Callers of this must call PyFile_DecUseCount() when they are
     finished with the FILE*.  Otherwise the file object will
     never be closed by Python.
     The GIL must be held while calling this function.
     The suggested use is to call this after PyFile_AsFile() and before
     you release the GIL:
     FILE *fp = PyFile_AsFile(p);
     /* ... */
@@ -865,18 +865,12 @@
     """Decrements the PyFileObject's internal unlocked_count member to
     indicate that the caller is done with its own use of the FILE*.
     This may only be called to undo a prior call to PyFile_IncUseCount().
     The GIL must be held while calling this function (see the example
     raise NotImplementedError
- at cpython_api([PyObject], PyObject)
-def PyFile_Name(space, p):
-    """Return the name of the file specified by p as a string object."""
-    borrow_from()
-    raise NotImplementedError
 @cpython_api([PyFileObject, rffi.CCHARP], rffi.INT_real, error=0)
 def PyFile_SetEncoding(space, p, enc):
     """Set the file's encoding for Unicode output to enc. Return 1 on success and 0
@@ -944,10 +938,10 @@
 def PyFloat_AsString(space, buf, v):
     """Convert the argument v to a string, using the same rules as
     str(). The length of buf should be at least 100.
     This function is unsafe to call because it writes to a buffer whose
     length it does not know.
     Use PyObject_Str() or PyOS_double_to_string() instead."""
     raise NotImplementedError
@@ -955,10 +949,10 @@
 def PyFloat_AsReprString(space, buf, v):
     """Same as PyFloat_AsString, except uses the same rules as
     repr().  The length of buf should be at least 100.
     This function is unsafe to call because it writes to a buffer whose
     length it does not know.
     Use PyObject_Repr() or PyOS_double_to_string() instead."""
     raise NotImplementedError
@@ -966,7 +960,7 @@
 def PyFunction_New(space, code, globals):
     """Return a new function object associated with the code object code. globals
     must be a dictionary with the global variables accessible to the function.
     The function's docstring, name and __module__ are retrieved from the code
     object, the argument defaults and closure are set to NULL."""
     raise NotImplementedError
@@ -1002,7 +996,7 @@
 def PyFunction_SetDefaults(space, op, defaults):
     """Set the argument default values for the function object op. defaults must be
     Py_None or a tuple.
     Raises SystemError and returns -1 on failure."""
     raise NotImplementedError
@@ -1017,7 +1011,7 @@
 def PyFunction_SetClosure(space, op, closure):
     """Set the closure associated with the function object op. closure must be
     Py_None or a tuple of cell objects.
     Raises SystemError and returns -1 on failure."""
     raise NotImplementedError
@@ -1025,7 +1019,7 @@
 def PyObject_GC_NewVar(space, type, size):
     """Analogous to PyObject_NewVar() but for container objects with the
     Py_TPFLAGS_HAVE_GC flag set.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -1034,7 +1028,7 @@
 def PyObject_GC_Resize(space, op, newsize):
     """Resize an object allocated by PyObject_NewVar().  Returns the
     resized object or NULL on failure.
     This function used an int type for newsize. This might
     require changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -1074,15 +1068,15 @@
     """Import a module.  This is best described by referring to the built-in
     Python function __import__(), as the standard __import__() function calls
     this function directly.
     The return value is a new reference to the imported module or top-level package,
     or NULL with an exception set on failure (before Python 2.4, the module may
     still be created in this case).  Like for __import__(), the return value
     when a submodule of a package was requested is normally the top-level package,
     unless a non-empty fromlist was given.
     Failing imports remove incomplete module objects.
     The function is an alias for PyImport_ImportModuleLevel() with
     -1 as level, meaning relative import."""
     raise NotImplementedError
@@ -1092,7 +1086,7 @@
     """Import a module.  This is best described by referring to the built-in Python
     function __import__(), as the standard __import__() function calls
     this function directly.
     The return value is a new reference to the imported module or top-level package,
     or NULL with an exception set on failure.  Like for __import__(),
     the return value when a submodule of a package was requested is normally the
@@ -1120,16 +1114,16 @@
     incompletely initialized modules in sys.modules is dangerous, as imports of
     such modules have no way to know that the module object is an unknown (and
     probably damaged with respect to the module author's intents) state.
     The module's __file__ attribute will be set to the code object's
     This function will reload the module if it was already imported.  See
     PyImport_ReloadModule() for the intended way to reload a module.
     If name points to a dotted name of the form package.module, any package
     structures not already created will still not be created.
     name is removed from sys.modules in error cases."""
     raise NotImplementedError
@@ -1250,7 +1244,7 @@
     allocated by the Python interpreter.  This is a no-op when called for a second
     time (without calling Py_Initialize() again first).  There is no return
     value; errors during finalization are ignored.
     This function is provided for a number of reasons.  An embedding application
     might want to restart Python without having to restart the application itself.
     An application that has loaded the Python interpreter from a dynamically
@@ -1258,7 +1252,7 @@
     before unloading the DLL. During a hunt for memory leaks in an application a
     developer might want to free all memory allocated by Python before exiting from
     the application.
     Bugs and caveats: The destruction of modules and objects in modules is done
     in random order; this may cause destructors (__del__() methods) to fail
     when they depend on other objects (even functions) or modules.  Dynamically
@@ -1308,13 +1302,13 @@
     variable in the top-level Makefile and the --exec-prefix
     argument to the configure script at build  time.  The value is
     available to Python code as sys.exec_prefix.  It is only useful on Unix.
     Background: The exec-prefix differs from the prefix when platform dependent
     files (such as executables and shared libraries) are installed in a different
     directory tree.  In a typical installation, platform dependent files may be
     installed in the /usr/local/plat subtree while platform independent may
     be installed in /usr/local.
     Generally speaking, a platform is a combination of hardware and software
     families, e.g.  Sparc machines running the Solaris 2.x operating system are
     considered the same platform, but Intel machines running Solaris 2.x are another
@@ -1325,7 +1319,7 @@
     meaningless, and set to the empty string. Note that compiled Python bytecode
     files are platform independent (but not independent from the Python version by
     which they were compiled!).
     System administrators will know how to configure the mount or
     automount programs to share /usr/local between platforms
     while having /usr/local/plat be a different filesystem for each
@@ -1351,7 +1345,7 @@
     storage; the caller should not modify its value.  The list sys.path is
     initialized with this value on interpreter startup; it can be (and usually
     is) modified later to change the search path for loading modules.
     XXX should give the exact rules"""
     raise NotImplementedError
@@ -1359,9 +1353,9 @@
 def Py_GetVersion(space):
     """Return the version of this Python interpreter.  This is a string that looks
     something like
     "1.5 (\#67, Dec 31 1997, 22:34:28) [GCC]"
     The first word (up to the first space character) is the current Python version;
     the first three characters are the major and minor version separated by a
     period.  The returned string points into static storage; the caller should not
@@ -1382,9 +1376,9 @@
 @cpython_api([], rffi.CCHARP)
 def Py_GetCopyright(space):
     """Return the official copyright string for the current Python version, for example
     'Copyright 1991-1995 Stichting Mathematisch Centrum, Amsterdam'
     The returned string points into static storage; the caller should not modify its
     value.  The value is available to Python code as sys.copyright."""
     raise NotImplementedError
@@ -1393,9 +1387,9 @@
 def Py_GetCompiler(space):
     """Return an indication of the compiler used to build the current Python version,
     in square brackets, for example:
     The returned string points into static storage; the caller should not modify its
     value.  The value is available to Python code as part of the variable
@@ -1405,9 +1399,9 @@
 def Py_GetBuildInfo(space):
     """Return information about the sequence number and build date and time  of the
     current Python interpreter instance, for example
     "\#67, Aug  1 1997, 22:34:28"
     The returned string points into static storage; the caller should not modify its
     value.  The value is available to Python code as part of the variable
@@ -1422,31 +1416,31 @@
     will be run, the first entry in argv can be an empty string.  If this
     function fails to initialize sys.argv, a fatal condition is signalled using
     If updatepath is zero, this is all the function does.  If updatepath
     is non-zero, the function also modifies sys.path according to the
     following algorithm:
     If the name of an existing script is passed in argv[0], the absolute
     path of the directory where the script is located is prepended to
     Otherwise (that is, if argc is 0 or argv[0] doesn't point
     to an existing file name), an empty string is prepended to
     sys.path, which is the same as prepending the current working
     directory (".").
     It is recommended that applications embedding the Python interpreter
     for purposes other than executing a single script pass 0 as updatepath,
     and update sys.path themselves if desired.
     See CVE-2008-5983.
     On versions before 2.6.6, you can achieve the same effect by manually
     popping the first sys.path element after having called
     PySys_SetArgv(), for example using:
     PyRun_SimpleString("import sys; sys.path.pop(0)\n");
     XXX impl. doesn't seem consistent in allowing 0/NULL for the params;
     check w/ Guido."""
     raise NotImplementedError
@@ -1461,7 +1455,7 @@
     """Set the default "home" directory, that is, the location of the standard
     Python libraries.  See PYTHONHOME for the meaning of the
     argument string.
     The argument should point to a zero-terminated character string in static
     storage whose contents will not change for the duration of the program's
     execution.  No code in the Python interpreter will change the contents of
@@ -1509,7 +1503,7 @@
     the dictionary.  It is okay to call this function when no current thread state
     is available. If this function returns NULL, no exception has been raised and
     the caller should assume no current thread state is available.
     Previously this could only be called when a current thread is active, and NULL
     meant that an exception was raised."""
@@ -1531,7 +1525,7 @@
 def PyEval_AcquireLock(space):
     """Acquire the global interpreter lock.  The lock must have been created earlier.
     If this thread already has the lock, a deadlock ensues.
     This function does not change the current thread state.  Please use
     PyEval_RestoreThread() or PyEval_AcquireThread()
@@ -1540,7 +1534,7 @@
 @cpython_api([], lltype.Void)
 def PyEval_ReleaseLock(space):
     """Release the global interpreter lock.  The lock must have been created earlier.
     This function does not change the current thread state.  Please use
     PyEval_SaveThread() or PyEval_ReleaseThread()
@@ -1556,7 +1550,7 @@
     separate.  The new environment has no sys.argv variable.  It has new standard
     I/O stream file objects sys.stdin, sys.stdout and sys.stderr (however these
     refer to the same underlying file descriptors).
     The return value points to the first thread state created in the new
     sub-interpreter.  This thread state is made in the current thread state.
     Note that no actual thread is created; see the discussion of thread states
@@ -1567,7 +1561,7 @@
     calling this function and is still held when it returns; however, unlike most
     other Python/C API functions, there needn't be a current thread state on
     Extension modules are shared between (sub-)interpreters as follows: the first
     time a particular extension is imported, it is initialized normally, and a
     (shallow) copy of its module's dictionary is squirreled away.  When the same
@@ -1601,11 +1595,11 @@
     asynchronous notification recursively, but it can still be interrupted to
     switch threads if the global interpreter lock is released, for example, if it
     calls back into Python code.
     This function returns 0 on success in which case the notification has been
     scheduled.  Otherwise, for example if the notification buffer is full, it
     returns -1 without setting any exception.
     This function can be called on any thread, be it a Python thread or some
     other system thread.  If it is a Python thread, it doesn't matter if it holds
     the global interpreter lock or not.
@@ -1633,62 +1627,62 @@
 def PyEval_GetCallStats(space, self):
     """Return a tuple of function call counts.  There are constants defined for the
     positions within the tuple:
     PCALL_FAST_FUNCTION means no argument tuple needs to be created.
     PCALL_FASTER_FUNCTION means that the fast-path frame setup code is used.
     If there is a method call where the call can be optimized by changing
     the argument tuple and calling the function directly, it gets recorded
     This function is only present if Python is compiled with CALL_PROFILE
     raise NotImplementedError
@@ -1747,7 +1741,7 @@
     and high.  Return NULL and set an exception if unsuccessful.  Analogous
     to list[low:high].  Negative indices, as when slicing from Python, are not
     This function used an int for low and high. This might
     require changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -1773,7 +1767,7 @@
     gives the number of characters, and base is the radix for the conversion.  The
     radix must be in the range [2, 36]; if it is out of range, ValueError
     will be raised.
     This function used an int for length. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -1803,21 +1797,21 @@
     """Marshal a long integer, value, to file.  This will only write
     the least-significant 32 bits of value; regardless of the size of the
     native long type.
     version indicates the file format."""
     raise NotImplementedError
 @cpython_api([PyObject, FILE, rffi.INT_real], lltype.Void)
 def PyMarshal_WriteObjectToFile(space, value, file, version):
     """Marshal a Python object, value, to file.
     version indicates the file format."""
     raise NotImplementedError
 @cpython_api([PyObject, rffi.INT_real], PyObject)
 def PyMarshal_WriteObjectToString(space, value, version):
     """Return a string object containing the marshalled representation of value.
     version indicates the file format."""
     raise NotImplementedError
@@ -1860,7 +1854,7 @@
     containing len bytes pointed to by string.  On error, sets the
     appropriate exception (EOFError or TypeError) and returns
     This function used an int type for len. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2012,7 +2006,7 @@
     """Return the result of repeating sequence object o count times, or NULL on
     failure.  The operation is done in-place when o supports it.  This is the
     equivalent of the Python expression o *= count.
     This function used an int type for count. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2022,16 +2016,7 @@
     """Return the number of occurrences of value in o, that is, return the number
     of keys for which o[key] == value.  On failure, return -1.  This is
     equivalent to the Python expression o.count(value).
-    This function returned an int type. This might require changes
-    in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
- at cpython_api([PyObject, PyObject], Py_ssize_t, error=-1)
-def PySequence_Index(space, o, value):
-    """Return the first index i for which o[i] == value.  On error, return
-    -1.    This is equivalent to the Python expression o.index(value).
     This function returned an int type. This might require changes
     in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2040,24 +2025,13 @@
 def PySequence_Fast_ITEMS(space, o):
     """Return the underlying array of PyObject pointers.  Assumes that o was returned
     by PySequence_Fast() and o is not NULL.
     Note, if a list gets resized, the reallocation may relocate the items array.
     So, only use the underlying array pointer in contexts where the sequence
     cannot change.
     raise NotImplementedError
- at cpython_api([PyObject, Py_ssize_t], PyObject)
-def PySequence_ITEM(space, o, i):
-    """Return the ith element of o or NULL on failure. Macro form of
-    PySequence_GetItem() but without checking that
-    PySequence_Check(o)() is true and without adjustment for negative
-    indices.
-    This function used an int type for i. This might require
-    changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
 @cpython_api([PyObject], rffi.INT_real, error=CANNOT_FAIL)
 def PySet_Check(space, p):
     """Return true if p is a set object or an instance of a subtype.
@@ -2104,7 +2078,7 @@
     The iterable may be NULL to create a new empty frozenset.  Return the new
     set on success or NULL on failure.  Raise TypeError if iterable is
     not actually iterable.
     Now guaranteed to return a brand-new frozenset.  Formerly,
     frozensets of zero-length were a singleton.  This got in the way of
     building-up new frozensets with PySet_Add()."""
@@ -2115,7 +2089,7 @@
     """Return the length of a set or frozenset object. Equivalent to
     len(anyset).  Raises a PyExc_SystemError if anyset is not a set, frozenset,
     or an instance of a subtype.
     This function returned an int. This might require changes in
     your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2141,7 +2115,7 @@
     the key is unhashable. Raise a MemoryError if there is no room to grow.
     Raise a SystemError if set is an not an instance of set or its
     Now works with instances of frozenset or its subtypes.
     Like PyTuple_SetItem() in that it can be used to fill-in the
     values of brand new frozensets before they are exposed to other code."""
@@ -2181,7 +2155,7 @@
     though there is a lot of talk about reference counts, think of this function as
     reference-count-neutral; you own the object after the call if and only if you
     owned it before the call.)
     This function is not available in 3.x and does not have a PyBytes alias."""
     raise NotImplementedError
@@ -2192,9 +2166,9 @@
     as the parameters of the same name in the unicode() built-in function.
     The codec to be used is looked up using the Python codec registry.  Return
     NULL if an exception was raised by the codec.
     This function is not available in 3.x and does not have a PyBytes alias.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2206,7 +2180,7 @@
     meaning as the parameters of the same name in the string encode() method.
     The codec to be used is looked up using the Python codec registry. Return NULL
     if an exception was raised by the codec.
     This function is not available in 3.x and does not have a PyBytes alias."""
     raise NotImplementedError
@@ -2217,9 +2191,9 @@
     have the same meaning as the parameters of the same name in the string
     encode() method. The codec to be used is looked up using the Python codec
     registry.  Return NULL if an exception was raised by the codec.
     This function is not available in 3.x and does not have a PyBytes alias.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2280,23 +2254,11 @@
     standard C library function exit(status)."""
     raise NotImplementedError
- at cpython_api([rffi.VOIDP], rffi.INT_real, error=-1)
-def Py_AtExit(space, func):
-    """Register a cleanup function to be called by Py_Finalize().  The cleanup
-    function will be called with no arguments and should return no value.  At
-    most 32 cleanup functions can be registered.  When the registration is
-    successful, Py_AtExit() returns 0; on failure, it returns -1.  The cleanup
-    function registered last is called first. Each cleanup function will be
-    called at most once.  Since Python's internal finalization will have
-    completed before the cleanup function, no Python APIs should be called by
-    func."""
-    raise NotImplementedError
 @cpython_api([PyObject, Py_ssize_t, Py_ssize_t], PyObject)
 def PyTuple_GetSlice(space, p, low, high):
     """Take a slice of the tuple pointed to by p from low to high and return it
     as a new tuple.
     This function used an int type for low and high. This might
     require changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2384,93 +2346,93 @@
     a string with the values formatted into it.  The variable arguments must be C
     types and must correspond exactly to the format characters in the format
     string.  The following format characters are allowed:
     Format Characters
     The literal % character.
     A single character,
     represented as an C int.
     Exactly equivalent to
     unsigned int
     Exactly equivalent to
     Exactly equivalent to
     unsigned long
     Exactly equivalent to
     Exactly equivalent to
     Exactly equivalent to
     Exactly equivalent to
     Exactly equivalent to
     A null-terminated C character
     The hex representation of a C
     pointer. Mostly equivalent to
     printf("%p") except that
@@ -2478,38 +2440,38 @@
     the literal 0x regardless
     of what the platform's
     printf yields.
     A unicode object.
     PyObject*, char *
     A unicode object (which may be
     NULL) and a null-terminated
     C character array as a second
     parameter (which will be used,
     if the first parameter is
     The result of calling
     The result of calling
     An unrecognized format character causes all the rest of the format string to be
     copied as-is to the result string, and any extra arguments discarded.
@@ -2529,7 +2491,7 @@
     of the same name in the Unicode encode() method.  The codec to be used is
     looked up using the Python codec registry.  Return NULL if an exception was
     raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2540,7 +2502,7 @@
     consumed is not NULL, trailing incomplete UTF-8 byte sequences will not be
     treated as an error. Those bytes will not be decoded and the number of bytes
     that have been decoded will be stored in consumed.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2549,7 +2511,7 @@
 def PyUnicode_EncodeUTF8(space, s, size, errors):
     """Encode the Py_UNICODE buffer of the given size using UTF-8 and return a
     Python string object.  Return NULL if an exception was raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2559,26 +2521,26 @@
     """Decode length bytes from a UTF-32 encoded buffer string and return the
     corresponding Unicode object.  errors (if non-NULL) defines the error
     handling. It defaults to "strict".
     If byteorder is non-NULL, the decoder starts decoding using the given byte
     *byteorder == -1: little endian
     *byteorder == 0:  native order
     *byteorder == 1:  big endian
     If *byteorder is zero, and the first four bytes of the input data are a
     byte order mark (BOM), the decoder switches to this byte order and the BOM is
     not copied into the resulting Unicode string.  If *byteorder is -1 or
     1, any byte order mark is copied to the output.
     After completion, *byteorder is set to the current byte order at the end
     of input data.
     In a narrow build codepoints outside the BMP will be decoded as surrogate pairs.
     If byteorder is NULL, the codec starts in native order mode.
     Return NULL if an exception was raised by the codec.
     raise NotImplementedError
@@ -2597,17 +2559,17 @@
 def PyUnicode_EncodeUTF32(space, s, size, errors, byteorder):
     """Return a Python bytes object holding the UTF-32 encoded value of the Unicode
     data in s.  Output is written according to the following byte order:
     byteorder == -1: little endian
     byteorder == 0:  native byte order (writes a BOM mark)
     byteorder == 1:  big endian
     If byteorder is 0, the output string will always start with the Unicode BOM
     mark (U+FEFF). In the other two modes, no BOM mark is prepended.
     If Py_UNICODE_WIDE is not defined, surrogate pairs will be output
     as a single codepoint.
     Return NULL if an exception was raised by the codec.
     raise NotImplementedError
@@ -2627,7 +2589,7 @@
     trailing incomplete UTF-16 byte sequences (such as an odd number of bytes or a
     split surrogate pair) as an error. Those bytes will not be decoded and the
     number of bytes that have been decoded will be stored in consumed.
     This function used an int type for size and an int *
     type for consumed. This might require changes in your code for
     properly supporting 64-bit systems."""
@@ -2637,20 +2599,20 @@
 def PyUnicode_EncodeUTF16(space, s, size, errors, byteorder):
     """Return a Python string object holding the UTF-16 encoded value of the Unicode
     data in s.  Output is written according to the following byte order:
     byteorder == -1: little endian
     byteorder == 0:  native byte order (writes a BOM mark)
     byteorder == 1:  big endian
     If byteorder is 0, the output string will always start with the Unicode BOM
     mark (U+FEFF). In the other two modes, no BOM mark is prepended.
     If Py_UNICODE_WIDE is defined, a single Py_UNICODE value may get
     represented as a surrogate pair. If it is not defined, each Py_UNICODE
     values is interpreted as an UCS-2 character.
     Return NULL if an exception was raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2681,7 +2643,7 @@
     """Encode the Py_UNICODE buffer of the given size using UTF-7 and
     return a Python bytes object.  Return NULL if an exception was raised by
     the codec.
     If base64SetO is nonzero, "Set O" (punctuation that has no otherwise
     special meaning) will be encoded in base-64.  If base64WhiteSpace is
     nonzero, whitespace will be encoded in base-64.  Both are set to zero for the
@@ -2692,7 +2654,7 @@
 def PyUnicode_DecodeUnicodeEscape(space, s, size, errors):
     """Create a Unicode object by decoding size bytes of the Unicode-Escape encoded
     string s.  Return NULL if an exception was raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2702,7 +2664,7 @@
     """Encode the Py_UNICODE buffer of the given size using Unicode-Escape and
     return a Python string object.  Return NULL if an exception was raised by the
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2711,7 +2673,7 @@
 def PyUnicode_DecodeRawUnicodeEscape(space, s, size, errors):
     """Create a Unicode object by decoding size bytes of the Raw-Unicode-Escape
     encoded string s.  Return NULL if an exception was raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2721,7 +2683,7 @@
     """Encode the Py_UNICODE buffer of the given size using Raw-Unicode-Escape
     and return a Python string object.  Return NULL if an exception was raised by
     the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2737,7 +2699,7 @@
 def PyUnicode_DecodeLatin1(space, s, size, errors):
     """Create a Unicode object by decoding size bytes of the Latin-1 encoded string
     s.  Return NULL if an exception was raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2746,7 +2708,7 @@
 def PyUnicode_EncodeLatin1(space, s, size, errors):
     """Encode the Py_UNICODE buffer of the given size using Latin-1 and return
     a Python string object.  Return NULL if an exception was raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2766,9 +2728,9 @@
     dictionary mapping byte or a unicode string, which is treated as a lookup table.
     Byte values greater that the length of the string and U+FFFE "characters" are
     treated as "undefined mapping".
     Allowed unicode string as mapping argument.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2778,7 +2740,7 @@
     """Encode the Py_UNICODE buffer of the given size using the given
     mapping object and return a Python string object. Return NULL if an
     exception was raised by the codec.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2795,14 +2757,14 @@
     """Translate a Py_UNICODE buffer of the given length by applying a
     character mapping table to it and return the resulting Unicode object.  Return
     NULL when an exception was raised by the codec.
     The mapping table must map Unicode ordinal integers to Unicode ordinal
     integers or None (causing deletion of the character).
     Mapping tables need only provide the __getitem__() interface; dictionaries
     and sequences work well.  Unmapped character ordinals (ones which cause a
     LookupError) are left untouched and are copied as-is.
     This function used an int type for size. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2834,7 +2796,7 @@
     will be done at all whitespace substrings.  Otherwise, splits occur at the given
     separator.  At most maxsplit splits will be done.  If negative, no limit is
     set.  Separators are not included in the resulting list.
     This function used an int type for maxsplit. This might require
     changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2850,14 +2812,14 @@
 def PyUnicode_Translate(space, str, table, errors):
     """Translate a string by applying a character mapping table to it and return the
     resulting Unicode object.
     The mapping table must map Unicode ordinal integers to Unicode ordinal integers
     or None (causing deletion of the character).
     Mapping tables need only provide the __getitem__() interface; dictionaries
     and sequences work well.  Unmapped character ordinals (ones which cause a
     LookupError) are left untouched and are copied as-is.
     errors has the usual meaning for codecs. It may be NULL which indicates to
     use the default error handling."""
     raise NotImplementedError
@@ -2873,7 +2835,7 @@
     """Return 1 if substr matches str*[*start:end] at the given tail end
     (direction == -1 means to do a prefix match, direction == 1 a suffix match),
     0 otherwise. Return -1 if an error occurred.
     This function used an int type for start and end. This
     might require changes in your code for properly supporting 64-bit
@@ -2886,7 +2848,7 @@
     backward search).  The return value is the index of the first match; a value of
     -1 indicates that no match was found, and -2 indicates that an error
     occurred and an exception has been set.
     This function used an int type for start and end. This
     might require changes in your code for properly supporting 64-bit
@@ -2896,7 +2858,7 @@
 def PyUnicode_Count(space, str, substr, start, end):
     """Return the number of non-overlapping occurrences of substr in
     str[start:end].  Return -1 if an error occurred.
     This function returned an int type and used an int
     type for start and end. This might require changes in your code for
     properly supporting 64-bit systems."""
@@ -2907,7 +2869,7 @@
     """Replace at most maxcount occurrences of substr in str with replstr and
     return the resulting Unicode object. maxcount == -1 means replace all
     This function used an int type for maxcount. This might
     require changes in your code for properly supporting 64-bit systems."""
     raise NotImplementedError
@@ -2915,17 +2877,17 @@
 @cpython_api([PyObject, PyObject, rffi.INT_real], PyObject)
 def PyUnicode_RichCompare(space, left, right, op):
     """Rich compare two unicode strings and return one of the following:
     NULL in case an exception was raised
     Py_True or Py_False for successful comparisons
     Py_NotImplemented in case the type combination is unknown
     Note that Py_EQ and Py_NE comparisons can cause a
     UnicodeWarning in case the conversion of the arguments to Unicode fails
     with a UnicodeDecodeError.
     Possible values for op are Py_GT, Py_GE, Py_EQ,
     Py_NE, Py_LT, and Py_LE."""
     raise NotImplementedError
@@ -2940,7 +2902,7 @@
 def PyUnicode_Contains(space, container, element):
     """Check whether element is contained in container and return true or false
     element has to coerce to a one element Unicode string. -1 is returned if
     there was an error."""
     raise NotImplementedError
@@ -2955,7 +2917,7 @@
     value will be the integer passed to the sys.exit() function, 1 if the
     interpreter exits due to an exception, or 2 if the parameter list does not
     represent a valid Python command line.
     Note that if an otherwise unhandled SystemError is raised, this
     function will not return 1, but exit the process, as long as
     Py_InspectFlag is not set."""
@@ -2995,7 +2957,7 @@
     is created.  Returns 0 on success or -1 if an exception was raised.  If
     there was an error, there is no way to get the exception information. For the
     meaning of flags, see below.
     Note that if an otherwise unhandled SystemError is raised, this
     function will not return -1, but exit the process, as long as
     Py_InspectFlag is not set."""
@@ -3097,7 +3059,7 @@
     dictionaries globals and locals with the compiler flags specified by
     flags.  The parameter start specifies the start token that should be used to
     parse the source code.
     Returns the result of executing the code as a Python object, or NULL if an
     exception was raised."""
     raise NotImplementedError
diff --git a/pypy/module/cpyext/test/test_eval.py b/pypy/module/cpyext/test/test_eval.py
--- a/pypy/module/cpyext/test/test_eval.py
+++ b/pypy/module/cpyext/test/test_eval.py
@@ -166,6 +166,15 @@
         lltype.free(pi, flavor='raw')
+    def test_atexit(self, space, api):
+        lst = []
+        def func():
+            lst.append(42)
+        api.Py_AtExit(func)
+        cpyext = space.getbuiltinmodule('cpyext')
+        cpyext.shutdown(space) # simulate shutdown
+        assert lst == [42]
 class AppTestCall(AppTestCpythonExtensionBase):
     def test_CallFunction(self):
         module = self.import_extension('foo', [
diff --git a/pypy/module/cpyext/test/test_import.py b/pypy/module/cpyext/test/test_import.py
--- a/pypy/module/cpyext/test/test_import.py
+++ b/pypy/module/cpyext/test/test_import.py
@@ -18,6 +18,19 @@
         assert space.str_w(space.getattr(w_foobar,
                                          space.wrap('__name__'))) == 'foobar'
+    def test_getmoduledict(self, space, api):
+        testmod = "binascii"
+        w_pre_dict = api.PyImport_GetModuleDict()
+        assert not space.is_true(space.contains(w_pre_dict, space.wrap(testmod)))
+        with rffi.scoped_str2charp(testmod) as modname:
+            w_module = api.PyImport_ImportModule(modname)
+            print w_module
+            assert w_module
+        w_dict = api.PyImport_GetModuleDict()
+        assert space.is_true(space.contains(w_dict, space.wrap(testmod)))
     def test_reload(self, space, api):
         pdb = api.PyImport_Import(space.wrap("pdb"))
         space.delattr(pdb, space.wrap("set_trace"))
diff --git a/pypy/module/cpyext/test/test_number.py b/pypy/module/cpyext/test/test_number.py
--- a/pypy/module/cpyext/test/test_number.py
+++ b/pypy/module/cpyext/test/test_number.py
@@ -23,6 +23,8 @@
     def test_number_int(self, space, api):
         w_l = api.PyNumber_Int(space.wrap(123L))
         assert api.PyInt_CheckExact(w_l)
+        w_l = api.PyNumber_Int(space.wrap(2 << 65))
+        assert api.PyLong_CheckExact(w_l)
     def test_numbermethods(self, space, api):
         assert "ab" == space.unwrap(
diff --git a/pypy/module/cpyext/test/test_pyfile.py b/pypy/module/cpyext/test/test_pyfile.py
--- a/pypy/module/cpyext/test/test_pyfile.py
+++ b/pypy/module/cpyext/test/test_pyfile.py
@@ -52,6 +52,13 @@
         space.call_method(w_file, "close")
+    def test_file_name(self, space, api):
+        name = str(udir / "_test_file")
+        with rffi.scoped_str2charp(name) as filename:
+            with rffi.scoped_str2charp("wb") as mode:
+                w_file = api.PyFile_FromString(filename, mode)
+        assert space.str_w(api.PyFile_Name(w_file)) == name
     def test_file_fromfile(self, space, api):
diff --git a/pypy/module/cpyext/test/test_sequence.py b/pypy/module/cpyext/test/test_sequence.py
--- a/pypy/module/cpyext/test/test_sequence.py
+++ b/pypy/module/cpyext/test/test_sequence.py
@@ -105,3 +105,34 @@
         self.raises(space, api, IndexError, api.PySequence_DelItem,
                     w_l, 3)
+    def test_getitem(self, space, api):
+        thelist = [8, 7, 6, 5, 4, 3, 2, 1]
+        w_l = space.wrap(thelist)
+        result = api.PySequence_GetItem(w_l, 4)
+        assert space.is_true(space.eq(result, space.wrap(4)))
+        result = api.PySequence_ITEM(w_l, 4)
+        assert space.is_true(space.eq(result, space.wrap(4)))
+        self.raises(space, api, IndexError, api.PySequence_GetItem, w_l, 9000)
+    def test_index(self, space, api):
+        thelist = [9, 8, 7, 6, 5, 4, 3, 2, 1]
+        w_l = space.wrap(thelist)
+        w_tofind = space.wrap(5)
+        result = api.PySequence_Index(w_l, w_tofind)
+        assert result == thelist.index(5)
+        w_tofind = space.wrap(9001)
+        result = api.PySequence_Index(w_l, w_tofind)
+        assert result == -1
+        assert api.PyErr_Occurred() is space.w_ValueError
+        api.PyErr_Clear()
+        gen = (x ** 2 for x in range(40))
+        w_tofind = space.wrap(16)
+        result = api.PySequence_Index(space.wrap(gen), w_tofind)
+        assert result == 4
diff --git a/pypy/module/pypyjit/policy.py b/pypy/module/pypyjit/policy.py
--- a/pypy/module/pypyjit/policy.py
+++ b/pypy/module/pypyjit/policy.py
@@ -14,7 +14,7 @@
             modname, _ = modname.split('.', 1)
         if modname in ['pypyjit', 'signal', 'micronumpy', 'math', 'exceptions',
                        'imp', 'sys', 'array', '_ffi', 'itertools', 'operator',
-                       '_socket', '_sre', '_lsprof']:
+                       'posix', '_socket', '_sre', '_lsprof']:
             return True
         return False
diff --git a/pypy/module/pypyjit/test/test_policy.py b/pypy/module/pypyjit/test/test_policy.py
--- a/pypy/module/pypyjit/test/test_policy.py
+++ b/pypy/module/pypyjit/test/test_policy.py
@@ -39,7 +39,7 @@
 def test_pypy_module():
     from pypy.module._random.interp_random import W_Random
     assert not pypypolicy.look_inside_function(W_Random.random)
-    assert not pypypolicy.look_inside_pypy_module('posix.interp_expat')
+    assert not pypypolicy.look_inside_pypy_module('select.interp_epoll')
     assert pypypolicy.look_inside_pypy_module('__builtin__.operation')
     assert pypypolicy.look_inside_pypy_module('__builtin__.abstractinst')
     assert pypypolicy.look_inside_pypy_module('__builtin__.functional')
diff --git a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py b/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
--- a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
@@ -1031,7 +1031,6 @@
     def test_func_defaults(self):
-        py.test.skip("until we fix defaults")
         def main(n):
             i = 1
             while i < n:
@@ -1044,20 +1043,10 @@
         assert loop.match("""
             i10 = int_lt(i5, i6)
             guard_true(i10, descr=<Guard3>)
-            # This can be improved if the JIT realized the lookup of i5 produces
-            # a constant and thus can be removed entirely
             i120 = int_add(i5, 1)
-            i140 = int_lt(0, i120)
-            guard_true(i140, descr=<Guard4>)
-            i13 = uint_floordiv(i5, i7)
-            i15 = int_add(i13, 1)
-            i17 = int_lt(i15, 0)
-            guard_false(i17, descr=<Guard5>)
-            i20 = int_sub(i15, i5)
-            i21 = int_add_ovf(i5, i20)
-            guard_no_overflow(descr=<Guard6>)
+            guard_not_invalidated(descr=<Guard4>)
-            jump(p0, p1, p2, p3, p4, i21, i6, i7, p8, p9, descr=<Loop0>)
+            jump(..., descr=<Loop0>)
     def test_unpack_iterable_non_list_tuple(self):
@@ -1092,7 +1081,7 @@
             jump(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, i28, i25, i19, i13, p14, p15, descr=<Loop0>)
     def test_mutate_class(self):
         def fn(n):
             class A(object):
@@ -1497,7 +1486,7 @@
         def main():
-            while i < 300: 
+            while i < 300:
                 sa+=min(max(i, 3000), 4000)
             return sa
@@ -1534,7 +1523,7 @@
             p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
     def test_iter_max(self):
         def main():
             i = 2
@@ -1552,7 +1541,7 @@
         assert len(guards) < 20
         assert loop.match_by_id('max',"""
-            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)            
+            p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
diff --git a/pypy/module/thread/ll_thread.py b/pypy/module/thread/ll_thread.py
--- a/pypy/module/thread/ll_thread.py
+++ b/pypy/module/thread/ll_thread.py
@@ -114,6 +114,8 @@
     def __del__(self):
+        if free_ll_lock is None:  # happens when tests are shutting down
+            return
     def __enter__(self):
diff --git a/pypy/objspace/std/stringobject.py b/pypy/objspace/std/stringobject.py
--- a/pypy/objspace/std/stringobject.py
+++ b/pypy/objspace/std/stringobject.py
@@ -52,12 +52,16 @@
         c = v[0]
         return space.newbool(fun(c))
-        for idx in range(len(v)):
-            if not fun(v[idx]):
-                return space.w_False
-        return space.w_True
+        return _is_generic_loop(space, v, fun)
 _is_generic._annspecialcase_ = "specialize:arg(2)"
+def _is_generic_loop(space, v, fun):
+    for idx in range(len(v)):
+        if not fun(v[idx]):
+            return space.w_False
+    return space.w_True
+_is_generic_loop._annspecialcase_ = "specialize:arg(2)"
 def _upper(ch):
     if ch.islower():
         o = ord(ch) - 32
diff --git a/pypy/objspace/trace.py b/pypy/objspace/trace.py
--- a/pypy/objspace/trace.py
+++ b/pypy/objspace/trace.py
@@ -110,10 +110,10 @@
-    def leave(self, frame):
+    def leave(self, frame, w_exitvalue):
         """ called just after evaluating of a frame is suspended/finished. """
-        self.ec.leave(frame)
+        self.ec.leave(frame, w_exitvalue)
     def bytecode_trace(self, frame):
         """ called just before execution of a bytecode. """
diff --git a/pypy/rlib/_rsocket_rffi.py b/pypy/rlib/_rsocket_rffi.py
--- a/pypy/rlib/_rsocket_rffi.py
+++ b/pypy/rlib/_rsocket_rffi.py
@@ -90,35 +90,10 @@
     COND_HEADER = ''
 constants = {}
-sources = ["""
-    void pypy_macro_wrapper_FD_SET(int fd, fd_set *set)
-    {
-        FD_SET(fd, set);
-    }
-    void pypy_macro_wrapper_FD_ZERO(fd_set *set)
-    {
-        FD_ZERO(set);
-    }
-    void pypy_macro_wrapper_FD_CLR(int fd, fd_set *set)
-    {
-        FD_CLR(fd, set);
-    }
-    int pypy_macro_wrapper_FD_ISSET(int fd, fd_set *set)
-    {
-        return FD_ISSET(fd, set);
-    }
-    """]
 eci = ExternalCompilationInfo(
     post_include_bits = [HEADER, COND_HEADER],
     includes = includes,
     libraries = libraries,
-    separate_module_sources = sources,
-    export_symbols = ['pypy_macro_wrapper_FD_ZERO',
-                      'pypy_macro_wrapper_FD_SET',
-                      'pypy_macro_wrapper_FD_CLR',
-                      'pypy_macro_wrapper_FD_ISSET',
-                      ],
 class CConfig:
@@ -484,9 +459,9 @@
     return rffi.llexternal(name, args, result, compilation_info=eci,
-def external_c(name, args, result):
+def external_c(name, args, result, **kwargs):
     return rffi.llexternal(name, args, result, compilation_info=eci,
-                           calling_conv='c')
+                           calling_conv='c', **kwargs)
 if _POSIX:
     dup = external('dup', [socketfd_type], socketfd_type)
@@ -583,10 +558,10 @@
                    fd_set, lltype.Ptr(timeval)],
-FD_CLR = external_c('pypy_macro_wrapper_FD_CLR', [rffi.INT, fd_set], lltype.Void)
-FD_ISSET = external_c('pypy_macro_wrapper_FD_ISSET', [rffi.INT, fd_set], rffi.INT)
-FD_SET = external_c('pypy_macro_wrapper_FD_SET', [rffi.INT, fd_set], lltype.Void)
-FD_ZERO = external_c('pypy_macro_wrapper_FD_ZERO', [fd_set], lltype.Void)
+FD_CLR = external_c('FD_CLR', [rffi.INT, fd_set], lltype.Void, macro=True)
+FD_ISSET = external_c('FD_ISSET', [rffi.INT, fd_set], rffi.INT, macro=True)
+FD_SET = external_c('FD_SET', [rffi.INT, fd_set], lltype.Void, macro=True)
+FD_ZERO = external_c('FD_ZERO', [fd_set], lltype.Void, macro=True)
 if _POSIX:
     pollfdarray = rffi.CArray(pollfd)
diff --git a/pypy/rlib/ropenssl.py b/pypy/rlib/ropenssl.py
--- a/pypy/rlib/ropenssl.py
+++ b/pypy/rlib/ropenssl.py
@@ -15,19 +15,27 @@
         # wincrypt.h defines X509_NAME, include it here
         # so that openssl/ssl.h can repair this nonsense.
-        'wincrypt.h',
-        'openssl/ssl.h',
-        'openssl/err.h',
-        'openssl/evp.h']
+        'wincrypt.h']
     libraries = ['ssl', 'crypto']
-    includes = ['openssl/ssl.h', 'openssl/err.h',
-                'openssl/evp.h']
+    includes = []
+includes += [
+    'openssl/ssl.h', 
+    'openssl/err.h',
+    'openssl/rand.h',
+    'openssl/evp.h',
+    'openssl/x509v3.h']
 eci = ExternalCompilationInfo(
     libraries = libraries,
     includes = includes,
     export_symbols = [],
+    post_include_bits = [
+        # Unnamed structures are not supported by rffi_platform.
+        # So we replace an attribute access with a macro call.
+        '#define pypy_GENERAL_NAME_dirn(name) (name->d.dirn)',
+        ],
 eci = rffi_platform.configure_external_library(
@@ -43,6 +51,10 @@
     from pypy.rlib._rsocket_rffi import FD_SETSIZE as MAX_FD_SIZE
+ASN1_STRING = lltype.Ptr(lltype.ForwardReference())
+ASN1_ITEM = rffi.COpaquePtr('ASN1_ITEM')
+X509_NAME = rffi.COpaquePtr('X509_NAME')
 class CConfig:
     _compilation_info_ = eci
@@ -53,6 +65,8 @@
     SSL_FILETYPE_PEM = rffi_platform.ConstantInteger("SSL_FILETYPE_PEM")
     SSL_OP_ALL = rffi_platform.ConstantInteger("SSL_OP_ALL")
     SSL_VERIFY_NONE = rffi_platform.ConstantInteger("SSL_VERIFY_NONE")
+    SSL_VERIFY_PEER = rffi_platform.ConstantInteger("SSL_VERIFY_PEER")
     SSL_ERROR_WANT_READ = rffi_platform.ConstantInteger(
     SSL_ERROR_WANT_WRITE = rffi_platform.ConstantInteger(
@@ -67,21 +81,54 @@
     SSL_ERROR_SSL = rffi_platform.ConstantInteger("SSL_ERROR_SSL")
     SSL_RECEIVED_SHUTDOWN = rffi_platform.ConstantInteger(
-    SSL_CTRL_OPTIONS = rffi_platform.ConstantInteger("SSL_CTRL_OPTIONS")
-    SSL_CTRL_MODE = rffi_platform.ConstantInteger("SSL_CTRL_MODE")
-    BIO_C_SET_NBIO = rffi_platform.ConstantInteger("BIO_C_SET_NBIO")
     SSL_MODE_AUTO_RETRY = rffi_platform.ConstantInteger("SSL_MODE_AUTO_RETRY")
+    NID_subject_alt_name = rffi_platform.ConstantInteger("NID_subject_alt_name")
+    GEN_DIRNAME = rffi_platform.ConstantInteger("GEN_DIRNAME")
+    CRYPTO_LOCK = rffi_platform.ConstantInteger("CRYPTO_LOCK")
+    # Some structures, with only the fields used in the _ssl module
+    X509_name_entry_st = rffi_platform.Struct('struct X509_name_entry_st',
+                                              [('set', rffi.INT)])
+    asn1_string_st = rffi_platform.Struct('struct asn1_string_st',
+                                          [('length', rffi.INT),
+                                           ('data', rffi.CCHARP)])
+    X509_extension_st = rffi_platform.Struct(
+        'struct X509_extension_st',
+        [('value', ASN1_STRING)])
+    ASN1_ITEM_EXP = lltype.FuncType([], ASN1_ITEM)
+    X509V3_EXT_D2I = lltype.FuncType([rffi.VOIDP, rffi.CCHARPP, rffi.LONG], 
+                                     rffi.VOIDP)
+    v3_ext_method = rffi_platform.Struct(
+        'struct v3_ext_method',
+        [('it', lltype.Ptr(ASN1_ITEM_EXP)),
+         ('d2i', lltype.Ptr(X509V3_EXT_D2I))])
+    GENERAL_NAME_st = rffi_platform.Struct(
+        'struct GENERAL_NAME_st',
+        [('type', rffi.INT),
+         ]) 
 for k, v in rffi_platform.configure(CConfig).items():
     globals()[k] = v
 # opaque structures
 SSL_METHOD = rffi.COpaquePtr('SSL_METHOD')
 SSL_CTX = rffi.COpaquePtr('SSL_CTX')
+SSL_CIPHER = rffi.COpaquePtr('SSL_CIPHER')
 SSL = rffi.COpaquePtr('SSL')
 BIO = rffi.COpaquePtr('BIO')
 X509 = rffi.COpaquePtr('X509')
-X509_NAME = rffi.COpaquePtr('X509_NAME')
+X509_NAME_ENTRY = rffi.CArrayPtr(X509_name_entry_st)
+X509_EXTENSION = rffi.CArrayPtr(X509_extension_st)
+X509V3_EXT_METHOD = rffi.CArrayPtr(v3_ext_method)
+ASN1_OBJECT = rffi.COpaquePtr('ASN1_OBJECT')
+ASN1_TIME = rffi.COpaquePtr('ASN1_TIME')
+ASN1_INTEGER = rffi.COpaquePtr('ASN1_INTEGER')
@@ -97,18 +144,36 @@
 ssl_external('SSL_load_error_strings', [], lltype.Void)
 ssl_external('SSL_library_init', [], rffi.INT)
+ssl_external('CRYPTO_num_locks', [], rffi.INT)
+             [lltype.Ptr(lltype.FuncType(
+                [rffi.INT, rffi.INT, rffi.CCHARP, rffi.INT], lltype.Void))],
+             lltype.Void)
+             [lltype.Ptr(lltype.FuncType([], rffi.INT))],
+             lltype.Void)
     ssl_external('RAND_add', [rffi.CCHARP, rffi.INT, rffi.DOUBLE], lltype.Void)
     ssl_external('RAND_status', [], rffi.INT)
     ssl_external('RAND_egd', [rffi.CCHARP], rffi.INT)
 ssl_external('SSL_CTX_new', [SSL_METHOD], SSL_CTX)
+ssl_external('SSL_get_SSL_CTX', [SSL], SSL_CTX)
+ssl_external('TLSv1_method', [], SSL_METHOD)
+ssl_external('SSLv2_method', [], SSL_METHOD)
+ssl_external('SSLv3_method', [], SSL_METHOD)
 ssl_external('SSLv23_method', [], SSL_METHOD)
 ssl_external('SSL_CTX_use_PrivateKey_file', [SSL_CTX, rffi.CCHARP, rffi.INT], rffi.INT)
 ssl_external('SSL_CTX_use_certificate_chain_file', [SSL_CTX, rffi.CCHARP], rffi.INT)
+ssl_external('SSL_CTX_set_options', [SSL_CTX, rffi.INT], rffi.INT, macro=True)
 ssl_external('SSL_CTX_ctrl', [SSL_CTX, rffi.INT, rffi.INT, rffi.VOIDP], rffi.INT)
 ssl_external('SSL_CTX_set_verify', [SSL_CTX, rffi.INT, rffi.VOIDP], lltype.Void)
+ssl_external('SSL_CTX_get_verify_mode', [SSL_CTX], rffi.INT)
+ssl_external('SSL_CTX_set_cipher_list', [SSL_CTX, rffi.CCHARP], rffi.INT)
+ssl_external('SSL_CTX_load_verify_locations', [SSL_CTX, rffi.CCHARP, rffi.CCHARP], rffi.INT)
 ssl_external('SSL_new', [SSL_CTX], SSL)
 ssl_external('SSL_set_fd', [SSL, rffi.INT], rffi.INT)
+ssl_external('SSL_set_mode', [SSL, rffi.INT], rffi.INT, macro=True)
 ssl_external('SSL_ctrl', [SSL, rffi.INT, rffi.INT, rffi.VOIDP], rffi.INT)
 ssl_external('BIO_ctrl', [BIO, rffi.INT, rffi.INT, rffi.VOIDP], rffi.INT)
 ssl_external('SSL_get_rbio', [SSL], BIO)
@@ -122,20 +187,70 @@
 ssl_external('SSL_get_shutdown', [SSL], rffi.INT)
 ssl_external('SSL_set_read_ahead', [SSL, rffi.INT], lltype.Void)
-ssl_external('ERR_get_error', [], rffi.INT)
-ssl_external('ERR_error_string', [rffi.ULONG, rffi.CCHARP], rffi.CCHARP)
 ssl_external('SSL_get_peer_certificate', [SSL], X509)
 ssl_external('X509_get_subject_name', [X509], X509_NAME)
 ssl_external('X509_get_issuer_name', [X509], X509_NAME)
 ssl_external('X509_NAME_oneline', [X509_NAME, rffi.CCHARP, rffi.INT], rffi.CCHARP)
+ssl_external('X509_NAME_entry_count', [X509_NAME], rffi.INT)
+ssl_external('X509_NAME_get_entry', [X509_NAME, rffi.INT], X509_NAME_ENTRY)
+ssl_external('X509_NAME_ENTRY_get_object', [X509_NAME_ENTRY], ASN1_OBJECT)
+ssl_external('X509_NAME_ENTRY_get_data', [X509_NAME_ENTRY], ASN1_STRING)
+ssl_external('i2d_X509', [X509, rffi.CCHARPP], rffi.INT)
 ssl_external('X509_free', [X509], lltype.Void)
+ssl_external('X509_get_notBefore', [X509], ASN1_TIME, macro=True)
+ssl_external('X509_get_notAfter', [X509], ASN1_TIME, macro=True)
+ssl_external('X509_get_serialNumber', [X509], ASN1_INTEGER)
+ssl_external('X509_get_version', [X509], rffi.INT, macro=True)
+ssl_external('X509_get_ext_by_NID', [X509, rffi.INT, rffi.INT], rffi.INT)
+ssl_external('X509_get_ext', [X509, rffi.INT], X509_EXTENSION)
+ssl_external('X509V3_EXT_get', [X509_EXTENSION], X509V3_EXT_METHOD)
+             [rffi.CCHARP, rffi.INT, ASN1_OBJECT, rffi.INT], rffi.INT)
+ssl_external('ASN1_STRING_to_UTF8', [rffi.CCHARPP, ASN1_STRING], rffi.INT)
+ssl_external('ASN1_TIME_print', [BIO, ASN1_TIME], rffi.INT)
+ssl_external('i2a_ASN1_INTEGER', [BIO, ASN1_INTEGER], rffi.INT)
+             [rffi.VOIDP, rffi.CCHARPP, rffi.LONG, ASN1_ITEM], rffi.VOIDP)
+ssl_external('ASN1_ITEM_ptr', [rffi.VOIDP], ASN1_ITEM, macro=True)
+ssl_external('sk_GENERAL_NAME_num', [GENERAL_NAMES], rffi.INT,
+             macro=True)
+ssl_external('sk_GENERAL_NAME_value', [GENERAL_NAMES, rffi.INT], GENERAL_NAME,
+             macro=True)
+ssl_external('GENERAL_NAME_print', [BIO, GENERAL_NAME], rffi.INT)
+ssl_external('pypy_GENERAL_NAME_dirn', [GENERAL_NAME], X509_NAME,
+             macro=True)
+ssl_external('SSL_get_current_cipher', [SSL], SSL_CIPHER)
+ssl_external('SSL_CIPHER_get_name', [SSL_CIPHER], rffi.CCHARP)
+ssl_external('SSL_CIPHER_get_version', [SSL_CIPHER], rffi.CCHARP)
+ssl_external('SSL_CIPHER_get_bits', [SSL_CIPHER, rffi.INTP], rffi.INT)
+ssl_external('ERR_get_error', [], rffi.INT)
+ssl_external('ERR_error_string', [rffi.ULONG, rffi.CCHARP], rffi.CCHARP)
 ssl_external('SSL_free', [SSL], lltype.Void)
 ssl_external('SSL_CTX_free', [SSL_CTX], lltype.Void)
+ssl_external('CRYPTO_free', [rffi.VOIDP], lltype.Void)
+libssl_OPENSSL_free = libssl_CRYPTO_free
 ssl_external('SSL_write', [SSL, rffi.CCHARP, rffi.INT], rffi.INT)
 ssl_external('SSL_pending', [SSL], rffi.INT)
 ssl_external('SSL_read', [SSL, rffi.CCHARP, rffi.INT], rffi.INT)
-ssl_external('SSL_read', [SSL, rffi.CCHARP, rffi.INT], rffi.INT)
+BIO_METHOD = rffi.COpaquePtr('BIO_METHOD')
+ssl_external('BIO_s_mem', [], BIO_METHOD)
+ssl_external('BIO_s_file', [], BIO_METHOD)
+ssl_external('BIO_new', [BIO_METHOD], BIO)
+ssl_external('BIO_set_nbio', [BIO, rffi.INT], rffi.INT, macro=True)
+ssl_external('BIO_free', [BIO], rffi.INT)
+ssl_external('BIO_reset', [BIO], rffi.INT, macro=True)
+ssl_external('BIO_read_filename', [BIO, rffi.CCHARP], rffi.INT, macro=True)
+ssl_external('BIO_gets', [BIO, rffi.CCHARP, rffi.INT], rffi.INT)
+             [BIO, rffi.VOIDP, rffi.VOIDP, rffi.VOIDP], X509)
 EVP_MD_CTX = rffi.COpaquePtr('EVP_MD_CTX', compilation_info=eci)
 EVP_MD     = rffi.COpaquePtr('EVP_MD')
@@ -159,13 +274,6 @@
 EVP_MD_CTX_cleanup = external(
     'EVP_MD_CTX_cleanup', [EVP_MD_CTX], rffi.INT)
-def libssl_SSL_set_mode(ssl, op):
-    return libssl_SSL_ctrl(ssl, SSL_CTRL_MODE, op, None)
-def libssl_SSL_CTX_set_options(ctx, op):
-    return libssl_SSL_CTX_ctrl(ctx, SSL_CTRL_OPTIONS, op, None)
-def libssl_BIO_set_nbio(bio, nonblocking):
-    return libssl_BIO_ctrl(bio, BIO_C_SET_NBIO, nonblocking, None)
 def init_ssl():
diff --git a/pypy/rlib/test/test_rsocket.py b/pypy/rlib/test/test_rsocket.py
--- a/pypy/rlib/test/test_rsocket.py
+++ b/pypy/rlib/test/test_rsocket.py
@@ -297,24 +297,25 @@
     e = py.test.raises(GAIError, getaddrinfo, 'www.very-invalidaddress.com', None)
     assert isinstance(e.value.get_msg(), str)
-def test_getaddrinfo_codespeak():
-    lst = getaddrinfo('codespeak.net', None)
+def test_getaddrinfo_pydotorg():
+    lst = getaddrinfo('python.org', None)
     assert isinstance(lst, list)
     found = False
     for family, socktype, protocol, canonname, addr in lst:
-        if addr.get_host() == '':
+        if addr.get_host() == '':
             found = True
     assert found, lst
 def test_getaddrinfo_no_reverse_lookup():
     # It seems that getaddrinfo never runs a reverse lookup on Linux.
     # Python2.3 on Windows returns the hostname.
-    lst = getaddrinfo('', None, flags=AI_NUMERICHOST)
+    lst = getaddrinfo('', None, flags=AI_NUMERICHOST)
     assert isinstance(lst, list)
     found = False
+    print lst
     for family, socktype, protocol, canonname, addr in lst:
-        assert canonname != 'codespeak.net'
-        if addr.get_host() == '':
+        assert 'python.org' not in canonname
+        if addr.get_host() == '':
             found = True
     assert found, lst
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -616,7 +616,7 @@
             container = llobj._obj.container
             T = lltype.Ptr(lltype.typeOf(container))
             # otherwise it came from integer and we want a c_void_p with
-            # the same valu
+            # the same value
             if getattr(container, 'llopaque', None):
                 no = len(_opaque_objs)
@@ -774,7 +774,7 @@
             # CFunctionType.__nonzero__ is broken before Python 2.6
             return lltype.nullptr(T.TO)
         if isinstance(T.TO, lltype.Struct):
-            if ptrval & 1: # a tagged pointer
+            if T.TO._gckind == 'gc' and ptrval & 1: # a tagged pointer
                 gcref = _opaque_objs[ptrval // 2].hide()
                 return lltype.cast_opaque_ptr(T, gcref)
             REAL_TYPE = T.TO
@@ -973,13 +973,13 @@
     if funcname == 'mmap':
         funcname = 'mmap64'
     if hasattr(old_eci, '_with_ctypes'):
-        eci = old_eci._with_ctypes
-    else:
-        try:
-            eci = _eci_cache[old_eci]
-        except KeyError:
-            eci = old_eci.compile_shared_lib()
-            _eci_cache[old_eci] = eci
+        old_eci = old_eci._with_ctypes
+    try:
+        eci = _eci_cache[old_eci]
+    except KeyError:
+        eci = old_eci.compile_shared_lib()
+        _eci_cache[old_eci] = eci
     libraries = eci.testonly_libraries + eci.libraries + eci.frameworks
diff --git a/pypy/rpython/lltypesystem/rbuilder.py b/pypy/rpython/lltypesystem/rbuilder.py
--- a/pypy/rpython/lltypesystem/rbuilder.py
+++ b/pypy/rpython/lltypesystem/rbuilder.py
@@ -6,6 +6,7 @@
 from pypy.rpython.annlowlevel import llstr
 from pypy.rlib import rgc
 from pypy.rlib.rarithmetic import ovfcheck
+from pypy.rlib.objectmodel import enforceargs
 from pypy.rpython.lltypesystem.lltype import staticAdtMethod
 from pypy.tool.sourcetools import func_with_new_name
@@ -15,6 +16,7 @@
 GROW_FAST_UNTIL = 100*1024*1024      # 100 MB
 def new_grow_func(name, mallocfn, copycontentsfn):
+    @enforceargs(None, int)
     def stringbuilder_grow(ll_builder, needed):
         allocated = ll_builder.allocated
         #if allocated < GROW_FAST_UNTIL:
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -15,6 +15,7 @@
 from pypy.translator.tool.cbuild import ExternalCompilationInfo
 from pypy.rpython.annlowlevel import llhelper
 from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
 from pypy.rpython.lltypesystem import llmemory
 import os, sys
@@ -54,7 +55,8 @@
                sandboxsafe=False, threadsafe='auto',
                _nowrapper=False, calling_conv='c',
-               oo_primitive=None, pure_function=False):
+               oo_primitive=None, pure_function=False,
+               macro=None):
     """Build an external function that will invoke the C function 'name'
     with the given 'args' types and 'result' type.
@@ -78,7 +80,13 @@
         assert callable(_callable)
     ext_type = lltype.FuncType(args, result)
     if _callable is None:
-        _callable = ll2ctypes.LL2CtypesCallable(ext_type, calling_conv)
+        if macro is not None:
+            if macro is True:
+                macro = name
+            _callable = generate_macro_wrapper(
+                name, macro, ext_type, compilation_info)
+        else:
+            _callable = ll2ctypes.LL2CtypesCallable(ext_type, calling_conv)
     if pure_function:
         _callable._pure_function_ = True
     kwds = {}
@@ -314,6 +322,41 @@
                       compilation_info=eci, sandboxsafe=True, _nowrapper=True,
                       _callable=lambda: None)
+def generate_macro_wrapper(name, macro, functype, eci):
+    """Wraps a function-like macro inside a real function, and expose
+    it with llexternal."""
+    # Generate the function call
+    from pypy.translator.c.database import LowLevelDatabase
+    from pypy.translator.c.support import cdecl
+    wrapper_name = 'pypy_macro_wrapper_%s' % (name,)
+    argnames = ['arg%d' % (i,) for i in range(len(functype.ARGS))]
+    db = LowLevelDatabase()
+    implementationtypename = db.gettype(functype, argnames=argnames)
+    if functype.RESULT is lltype.Void:
+        pattern = '%s { %s(%s); }'
+    else:
+        pattern = '%s { return %s(%s); }'
+    source = pattern % (
+        cdecl(implementationtypename, wrapper_name),
+        macro, ', '.join(argnames))
+    # Now stuff this source into a "companion" eci that will be used
+    # by ll2ctypes.  We replace eci._with_ctypes, so that only one
+    # shared library is actually compiled (when ll2ctypes calls the
+    # first function)
+    ctypes_eci = eci.merge(ExternalCompilationInfo(
+            separate_module_sources=[source],
+            export_symbols=[wrapper_name],
+            ))
+    if hasattr(eci, '_with_ctypes'):
+        ctypes_eci = eci._with_ctypes.merge(ctypes_eci)
+    eci._with_ctypes = ctypes_eci
+    func = llexternal(wrapper_name, functype.ARGS, functype.RESULT,
+                      compilation_info=eci, _nowrapper=True)
+    # _nowrapper=True returns a pointer which is not hashable
+    return lambda *args: func(*args)
 # ____________________________________________________________
 # Few helpers for keeping callback arguments alive
 # this makes passing opaque objects possible (they don't even pass
@@ -496,7 +539,7 @@
             val = rffi_platform.sizeof(name, compilation_info)
             cache[name] = val
             return val
     hints['getsize'] = lazy_getsize
     return lltype.OpaqueType(name, hints)
@@ -594,24 +637,24 @@
 # conversions between str and char*
 # conversions between unicode and wchar_t*
 def make_string_mappings(strtype):
     if strtype is str:
         from pypy.rpython.lltypesystem.rstr import STR as STRTYPE
         from pypy.rpython.annlowlevel import llstr as llstrtype
         from pypy.rpython.annlowlevel import hlstr as hlstrtype
         TYPEP = CCHARP
         ll_char_type = lltype.Char
-        emptystr = ''
         lastchar = '\x00'
+        builder_class = StringBuilder
         from pypy.rpython.lltypesystem.rstr import UNICODE as STRTYPE
         from pypy.rpython.annlowlevel import llunicode as llstrtype
         from pypy.rpython.annlowlevel import hlunicode as hlstrtype
         TYPEP = CWCHARP
         ll_char_type = lltype.UniChar
-        emptystr = u''
         lastchar = u'\x00'
+        builder_class = UnicodeBuilder
     # str -> char*
     def str2charp(s):
         """ str -> char*
@@ -632,12 +675,12 @@
     # char* -> str
     # doesn't free char*
     def charp2str(cp):
-        l = []
+        b = builder_class()
         i = 0
         while cp[i] != lastchar:
-            l.append(cp[i])
+            b.append(cp[i])
             i += 1
-        return emptystr.join(l)
+        return b.build()
     # str -> char*
     def get_nonmovingbuffer(data):
@@ -735,17 +778,19 @@
     # char* -> str, with an upper bound on the length in case there is no \x00
     def charp2strn(cp, maxlen):
-        l = []
+        b = builder_class(maxlen)
         i = 0
         while i < maxlen and cp[i] != lastchar:
-            l.append(cp[i])
+            b.append(cp[i])
             i += 1
-        return emptystr.join(l)
+        return b.build()
     # char* and size -> str (which can contain null bytes)
     def charpsize2str(cp, size):
-        l = [cp[i] for i in range(size)]
-        return emptystr.join(l)
+        b = builder_class(size)
+        for i in xrange(size):
+            b.append(cp[i])
+        return b.build()
     charpsize2str._annenforceargs_ = [None, int]
     return (str2charp, free_charp, charp2str,
diff --git a/pypy/rpython/lltypesystem/test/test_ll2ctypes.py b/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
--- a/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
@@ -787,6 +787,19 @@
         res = fn()
         assert res == 42
+    def test_llexternal_macro(self):
+        eci = ExternalCompilationInfo(
+            post_include_bits = ["#define fn(x) (42 + x)"],
+        )
+        fn1 = rffi.llexternal('fn', [rffi.INT], rffi.INT, 
+                              compilation_info=eci, macro=True)
+        fn2 = rffi.llexternal('fn2', [rffi.DOUBLE], rffi.DOUBLE, 
+                              compilation_info=eci, macro='fn')
+        res = fn1(10)
+        assert res == 52
+        res = fn2(10.5)
+        assert res == 52.5
     def test_prebuilt_constant(self):
         header = py.code.Source("""
         #ifndef _SOME_H
@@ -1318,7 +1331,6 @@
 class TestPlatform(object):
     def test_lib_on_libpaths(self):
         from pypy.translator.platform import platform
-        from pypy.translator.tool.cbuild import ExternalCompilationInfo
         tmpdir = udir.join('lib_on_libppaths')
@@ -1340,7 +1352,6 @@
             py.test.skip("Not supported")
         from pypy.translator.platform import platform
-        from pypy.translator.tool.cbuild import ExternalCompilationInfo
         tmpdir = udir.join('lib_on_libppaths_prefix')
diff --git a/pypy/rpython/memory/gctransform/framework.py b/pypy/rpython/memory/gctransform/framework.py
--- a/pypy/rpython/memory/gctransform/framework.py
+++ b/pypy/rpython/memory/gctransform/framework.py
@@ -714,8 +714,7 @@
                     malloc_ptr = self.malloc_varsize_clear_ptr
                 args = [self.c_const_gc, c_type_id, v_length, c_size,
                         c_varitemsize, c_ofstolength, c_can_collect]
-        keep_current_args = flags.get('keep_current_args', False)
-        livevars = self.push_roots(hop, keep_current_args=keep_current_args)
+        livevars = self.push_roots(hop)
         v_result = hop.genop("direct_call", [malloc_ptr] + args,
         self.pop_roots(hop, livevars)
diff --git a/pypy/rpython/memory/test/test_gctypelayout.py b/pypy/rpython/memory/test/test_gctypelayout.py
--- a/pypy/rpython/memory/test/test_gctypelayout.py
+++ b/pypy/rpython/memory/test/test_gctypelayout.py
@@ -4,6 +4,7 @@
 from pypy.rpython.memory.gctypelayout import gc_pointers_inside
 from pypy.rpython.lltypesystem import lltype, llmemory, rclass
 from pypy.rpython.test.test_llinterp import get_interpreter
+from pypy.rpython.rclass import IR_IMMUTABLE
 from pypy.objspace.flow.model import Constant
 class FakeGC:
@@ -101,7 +102,7 @@
     accessor = rclass.FieldListAccessor()
     S3 = lltype.GcStruct('S', ('x', PT), ('y', PT),
                          hints={'immutable_fields': accessor})
-    accessor.initialize(S3, {'x': ''})
+    accessor.initialize(S3, {'x': IR_IMMUTABLE})
     s1 = lltype.malloc(S1)
     adr = llmemory.cast_ptr_to_adr(s1)
diff --git a/pypy/rpython/test/test_rfloat.py b/pypy/rpython/test/test_rfloat.py
--- a/pypy/rpython/test/test_rfloat.py
+++ b/pypy/rpython/test/test_rfloat.py
@@ -177,7 +177,11 @@
             n1 = x * x
             n2 = y * y * y
             return rfloat.isnan(n1 / n2)
-        assert self.interpret(fn, [1e200, 1e200])   # nan
+        if self.__class__.__name__ != 'TestCliFloat':
+            # the next line currently fails on mono 2.6.7 (ubuntu 11.04), see:
+            # https://bugzilla.novell.com/show_bug.cgi?id=692493
+            assert self.interpret(fn, [1e200, 1e200])   # nan
+        #
         assert not self.interpret(fn, [1e200, 1.0])   # +inf
         assert not self.interpret(fn, [1e200, -1.0])  # -inf
         assert not self.interpret(fn, [42.5, 2.3])    # +finite
@@ -205,7 +209,11 @@
         assert self.interpret(fn, [42.5, -2.3])       # -finite
         assert not self.interpret(fn, [1e200, 1.0])   # +inf
         assert not self.interpret(fn, [1e200, -1.0])  # -inf
-        assert not self.interpret(fn, [1e200, 1e200]) # nan
+        #
+        if self.__class__.__name__ != 'TestCliFloat':
+            # the next line currently fails on mono 2.6.7 (ubuntu 11.04), see:
+            # https://bugzilla.novell.com/show_bug.cgi?id=692493
+            assert not self.interpret(fn, [1e200, 1e200]) # nan
 class TestLLtype(BaseTestRfloat, LLRtypeMixin):
diff --git a/pypy/tool/clean_old_branches.py b/pypy/tool/clean_old_branches.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/clean_old_branches.py
@@ -0,0 +1,72 @@
+For branches that have been closed but still have a dangling head
+in 'hg heads --topo --closed', force them to join with the branch
+called 'closed-branch'.  It reduces the number of heads.
+import os, sys
+if not os.listdir('.hg'):
+    print 'Must run this script from the top-level directory.'
+    sys.exit(1)
+def heads(args):
+    g = os.popen(r"hg heads --topo %s --template '{branches} {node|short}\n'"
+                 % args, 'r')
+    result = g.read()
+    g.close()
+    result = result.splitlines(False)
+    result = [s for s in result
+                if not s.startswith(' ')
+                   and not s.startswith('closed-branches ')]
+    return result
+all_heads = heads("--closed")
+opened_heads = heads("")
+closed_heads = [s for s in all_heads if s not in opened_heads]
+if not closed_heads:
+    print >> sys.stderr, 'no dangling closed heads.'
+    sys.exit()
+# ____________________________________________________________
+for branch_head in closed_heads:
+    branch, head = branch_head.split()
+    print '\t', branch
+print 'The branches listed above will be merged to "closed-branches".'
+print 'You need to run this script in a clean working copy where you'
+print 'don''t mind all files being removed.'
+if raw_input('Continue? [y/n] ').upper() != 'Y':
+    sys.exit(1)
+# ____________________________________________________________
+def do(cmd):
+    print cmd
+    err = os.system(cmd)
+    if err != 0:
+        print '*** error %r' % (err,)
+        sys.exit(1)
+for branch_head in closed_heads:
+    branch, head = branch_head.split()
+    print
+    print '***** %s ***** %s *****' % (branch, head)
+    do("hg up --clean closed-branches")
+    do("hg --config extensions.purge= purge --all")
+    do("hg merge -y %s" % head)
+    for fn in os.listdir('.'):
+        if fn.lower() != '.hg':
+            do("rm -fr -- '%s'" % fn)
+            do("hg rm --after -- '%s' || true" % fn)
+    do("hg ci -m'Merge closed head %s on branch %s'" % (head, branch))
+do("hg ci --close-branch -m're-close this branch'")
+do("hg up default")
diff --git a/pypy/tool/frozenlist.py b/pypy/tool/frozenlist.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/frozenlist.py
@@ -0,0 +1,19 @@
+from pypy.tool.sourcetools import func_with_new_name
+def forbid(*args):
+    raise TypeError, "cannot mutate a frozenlist"
+class frozenlist(list):
+    __setitem__  = func_with_new_name(forbid, '__setitem__')
+    __delitem__  = func_with_new_name(forbid, '__delitem__')
+    __setslice__ = func_with_new_name(forbid, '__setslice__')
+    __delslice__ = func_with_new_name(forbid, '__delslice__')
+    __iadd__     = func_with_new_name(forbid, '__iadd__')
+    __imul__     = func_with_new_name(forbid, '__imul__')
+    append       = func_with_new_name(forbid, 'append')
+    insert       = func_with_new_name(forbid, 'insert')
+    pop          = func_with_new_name(forbid, 'pop')
+    remove       = func_with_new_name(forbid, 'remove')
+    reverse      = func_with_new_name(forbid, 'reverse')
+    sort         = func_with_new_name(forbid, 'sort')
+    extend       = func_with_new_name(forbid, 'extend')
diff --git a/pypy/tool/runsubprocess.py b/pypy/tool/runsubprocess.py
--- a/pypy/tool/runsubprocess.py
+++ b/pypy/tool/runsubprocess.py
@@ -3,7 +3,7 @@
 if the current process already grew very large.
-import sys
+import sys, gc
 import os
 from subprocess import PIPE, Popen
@@ -21,6 +21,11 @@
             args = [str(executable)] + args
         shell = False
+    # Just before spawning the subprocess, do a gc.collect().  This
+    # should help if we are running on top of PyPy, if the subprocess
+    # is going to need a lot of RAM and we are using a lot too.
+    gc.collect()
+    #
     pipe = Popen(args, stdout=PIPE, stderr=PIPE, shell=shell, env=env, cwd=cwd)
     stdout, stderr = pipe.communicate()
     return pipe.returncode, stdout, stderr
diff --git a/pypy/tool/test/test_frozenlist.py b/pypy/tool/test/test_frozenlist.py
new file mode 100644
--- /dev/null
+++ b/pypy/tool/test/test_frozenlist.py
@@ -0,0 +1,21 @@
+import py
+from pypy.tool.frozenlist import frozenlist
+def test_frozenlist():
+    l = frozenlist([1, 2, 3])
+    assert l[0] == 1
+    assert l[:2] == [1, 2]
+    assert l.index(2) == 1
+    py.test.raises(TypeError, "l[0] = 1")
+    py.test.raises(TypeError, "del l[0]")
+    py.test.raises(TypeError, "l[:] = []")
+    py.test.raises(TypeError, "del l[:]")
+    py.test.raises(TypeError, "l += []")
+    py.test.raises(TypeError, "l *= 2")
+    py.test.raises(TypeError, "l.append(1)")
+    py.test.raises(TypeError, "l.insert(0, 0)")
+    py.test.raises(TypeError, "l.pop()")
+    py.test.raises(TypeError, "l.remove(1)")
+    py.test.raises(TypeError, "l.reverse()")
+    py.test.raises(TypeError, "l.sort()")
+    py.test.raises(TypeError, "l.extend([])")
diff --git a/pypy/translator/backendopt/test/test_constfold.py b/pypy/translator/backendopt/test/test_constfold.py
--- a/pypy/translator/backendopt/test/test_constfold.py
+++ b/pypy/translator/backendopt/test/test_constfold.py
@@ -49,7 +49,7 @@
     accessor = rclass.FieldListAccessor()
     S2 = lltype.GcStruct('S2', ('x', lltype.Signed),
                          hints={'immutable_fields': accessor})
-    accessor.initialize(S2, {'x': ''})
+    accessor.initialize(S2, {'x': rclass.IR_IMMUTABLE})
diff --git a/pypy/translator/c/funcgen.py b/pypy/translator/c/funcgen.py
--- a/pypy/translator/c/funcgen.py
+++ b/pypy/translator/c/funcgen.py
@@ -843,6 +843,9 @@
         return '%s = %s; /* JIT_FORCE_VIRTUAL */' % (self.expr(op.result),
+    def OP_JIT_FORCE_QUASI_IMMUTABLE(self, op):
+        return '/* JIT_FORCE_QUASI_IMMUTABLE %s */' % op
     def OP_GET_GROUP_MEMBER(self, op):
         typename = self.db.gettype(op.result.concretetype)
         return '%s = (%s)_OP_GET_GROUP_MEMBER(%s, %s);' % (
diff --git a/pypy/translator/c/genc.py b/pypy/translator/c/genc.py
--- a/pypy/translator/c/genc.py
+++ b/pypy/translator/c/genc.py
@@ -508,27 +508,15 @@
         shared = self.config.translation.shared
-        if (self.config.translation.gcrootfinder == "asmgcc" or
-            self.config.translation.force_make):
-            extra_opts = []
-            if self.config.translation.make_jobs != 1:
-                extra_opts += ['-j', str(self.config.translation.make_jobs)]
-            self.translator.platform.execute_makefile(self.targetdir,
-                                                      extra_opts)
-            if shared:
-                self.shared_library_name = self.executable_name.new(
-                    purebasename='lib' + self.executable_name.purebasename,
-                    ext=self.translator.platform.so_ext)
-        else:
-            compiler = CCompilerDriver(self.translator.platform,
-                                       [self.c_source_filename] + self.extrafiles,
-                                       self.eci, profbased=self.getprofbased(),
-                                       outputfilename=exe_name)
-            self.executable_name = compiler.build(shared=shared)
-            if shared:
-                self.executable_name = self.build_main_for_shared(
-                    self.executable_name, "pypy_main_startup", exe_name)
-            assert self.executable_name
+        extra_opts = []
+        if self.config.translation.make_jobs != 1:
+            extra_opts += ['-j', str(self.config.translation.make_jobs)]
+        self.translator.platform.execute_makefile(self.targetdir,
+                                                  extra_opts)
+        if shared:
+            self.shared_library_name = self.executable_name.new(
+                purebasename='lib' + self.executable_name.purebasename,
+                ext=self.translator.platform.so_ext)
         self._compiled = True
         return self.executable_name
diff --git a/pypy/translator/c/src/cjkcodecs/README b/pypy/translator/c/src/cjkcodecs/README
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/README
@@ -0,0 +1,86 @@
+The .c and .h files come directly from CPython, with the exception of
+cjkcodecs.h and multibytecodec.h, which have been ripped of their
+CPython dependencies.
+To generate or modify mapping headers
+Mapping headers are imported from CJKCodecs as pre-generated form.
+If you need to tweak or add something on it, please look at tools/
+subdirectory of CJKCodecs' distribution.
+Notes on implmentation characteristics of each codecs
+1) Big5 codec
+  The big5 codec maps the following characters as cp950 does rather
+  than conforming Unicode.org's that maps to 0xFFFD.
+    BIG5        Unicode     Description
+    0xA15A      0x2574      SPACING UNDERSCORE
+    0xA1C3      0xFFE3      SPACING HEAVY OVERSCORE
+    0xA1C5      0x02CD      SPACING HEAVY UNDERSCORE
+    0xA1FE      0xFF0F      LT DIAG UP RIGHT TO LOW LEFT
+    0xA240      0xFF3C      LT DIAG UP LEFT TO LOW RIGHT
+    0xA2CC      0x5341      HANGZHOU NUMERAL TEN
+    0xA2CE      0x5345      HANGZHOU NUMERAL THIRTY
+  Because unicode 0x5341, 0x5345, 0xFF0F, 0xFF3C is mapped to another
+  big5 codes already, a roundtrip compatibility is not guaranteed for
+  them.
+2) cp932 codec
+  To conform to Windows's real mapping, cp932 codec maps the following
+  codepoints in addition of the official cp932 mapping.
+    CP932     Unicode     Description
+    0x80      0x80        UNDEFINED
+    0xA0      0xF8F0      UNDEFINED
+    0xFD      0xF8F1      UNDEFINED
+    0xFE      0xF8F2      UNDEFINED
+    0xFF      0xF8F3      UNDEFINED
+3) euc-jisx0213 codec
+  The euc-jisx0213 codec maps JIS X 0213 Plane 1 code 0x2140 into
+  unicode U+FF3C instead of U+005C as on unicode.org's mapping.
+  Because euc-jisx0213 has REVERSE SOLIDUS on 0x5c already and A140
+  is shown as a full width character, mapping to U+FF3C can make
+  more sense.
+  The euc-jisx0213 codec is enabled to decode JIS X 0212 codes on
+  codeset 2. Because JIS X 0212 and JIS X 0213 Plane 2 don't have
+  overlapped by each other, it doesn't bother standard conformations
+  (and JIS X 0213 Plane 2 is intended to use so.) On encoding
+  sessions, the codec will try to encode kanji characters in this
+  order:
+    JIS X 0213 Plane 1 -> JIS X 0213 Plane 2 -> JIS X 0212
+4) euc-jp codec
+  The euc-jp codec is a compatibility instance on these points:
+   - U+FF3C FULLWIDTH REVERSE SOLIDUS is mapped to EUC-JP A1C0 (vice versa)
+   - U+00A5 YEN SIGN is mapped to EUC-JP 0x5c. (one way)
+   - U+203E OVERLINE is mapped to EUC-JP 0x7e. (one way)
+5) shift-jis codec
+  The shift-jis codec is mapping 0x20-0x7e area to U+20-U+7E directly
+  instead of using JIS X 0201 for compatibility. The differences are:
+   - U+005C REVERSE SOLIDUS is mapped to SHIFT-JIS 0x5c.
+   - U+007E TILDE is mapped to SHIFT-JIS 0x7e.
diff --git a/pypy/translator/c/src/cjkcodecs/_codecs_cn.c b/pypy/translator/c/src/cjkcodecs/_codecs_cn.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/_codecs_cn.c
@@ -0,0 +1,444 @@
+ * _codecs_cn.c: Codecs collection for Mainland Chinese encodings
+ *
+ * Written by Hye-Shik Chang <perky at FreeBSD.org>
+ */
+#include "src/cjkcodecs/cjkcodecs.h"
+#include "src/cjkcodecs/mappings_cn.h"
+ * hz is predefined as 100 on AIX. So we undefine it to avoid
+ * conflict against hz codec's.
+ */
+#ifdef _AIX
+#undef hz
+/* GBK and GB2312 map differently in few codepoints that are listed below:
+ *
+ *              gb2312                          gbk
+ * A1A4         U+30FB KATAKANA MIDDLE DOT      U+00B7 MIDDLE DOT
+ * A1AA         U+2015 HORIZONTAL BAR           U+2014 EM DASH
+ * A844         undefined                       U+2015 HORIZONTAL BAR
+ */
+#define GBK_DECODE(dc1, dc2, assi) \
+    if ((dc1) == 0xa1 && (dc2) == 0xaa) (assi) = 0x2014; \
+    else if ((dc1) == 0xa8 && (dc2) == 0x44) (assi) = 0x2015; \
+    else if ((dc1) == 0xa1 && (dc2) == 0xa4) (assi) = 0x00b7; \
+    else TRYMAP_DEC(gb2312, assi, dc1 ^ 0x80, dc2 ^ 0x80); \
+    else TRYMAP_DEC(gbkext, assi, dc1, dc2);
+#define GBK_ENCODE(code, assi) \
+    if ((code) == 0x2014) (assi) = 0xa1aa; \
+    else if ((code) == 0x2015) (assi) = 0xa844; \
+    else if ((code) == 0x00b7) (assi) = 0xa1a4; \
+    else if ((code) != 0x30fb && TRYMAP_ENC_COND(gbcommon, assi, code));
+ * GB2312 codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        TRYMAP_ENC(gbcommon, code, c);
+        else return 1;
+        if (code & 0x8000) /* MSB set: GBK */
+            return 1;
+        OUT1((code >> 8) | 0x80)
+        OUT2((code & 0xFF) | 0x80)
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = **inbuf;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        TRYMAP_DEC(gb2312, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
+            NEXT(2, 1)
+        }
+        else return 2;
+    }
+    return 0;
+ * GBK codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        GBK_ENCODE(c, code)
+        else return 1;
+        OUT1((code >> 8) | 0x80)
+        if (code & 0x8000)
+            OUT2((code & 0xFF)) /* MSB set: GBK */
+        else
+            OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        GBK_DECODE(c, IN2, **outbuf)
+        else return 2;
+        NEXT(2, 1)
+    }
+    return 0;
+ * GB18030 codec
+ */
+    while (inleft > 0) {
+        ucs4_t c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        if (c > 0x10FFFF)
+#if Py_UNICODE_SIZE == 2
+            return 2; /* surrogates pair */
+            return 1;
+        else if (c >= 0x10000) {
+            ucs4_t tc = c - 0x10000;
+            REQUIRE_OUTBUF(4)
+            OUT4((unsigned char)(tc % 10) + 0x30)
+            tc /= 10;
+            OUT3((unsigned char)(tc % 126) + 0x81)
+            tc /= 126;
+            OUT2((unsigned char)(tc % 10) + 0x30)
+            tc /= 10;
+            OUT1((unsigned char)(tc + 0x90))
+#if Py_UNICODE_SIZE == 2
+            NEXT(2, 4) /* surrogates pair */
+            NEXT(1, 4)
+            continue;
+        }
+        REQUIRE_OUTBUF(2)
+        GBK_ENCODE(c, code)
+        else TRYMAP_ENC(gb18030ext, code, c);
+        else {
+            const struct _gb18030_to_unibmp_ranges *utrrange;
+            REQUIRE_OUTBUF(4)
+            for (utrrange = gb18030_to_unibmp_ranges;
+                 utrrange->first != 0;
+                 utrrange++)
+                if (utrrange->first <= c &&
+                    c <= utrrange->last) {
+                    Py_UNICODE tc;
+                    tc = c - utrrange->first +
+                         utrrange->base;
+                    OUT4((unsigned char)(tc % 10) + 0x30)
+                    tc /= 10;
+                    OUT3((unsigned char)(tc % 126) + 0x81)
+                    tc /= 126;
+                    OUT2((unsigned char)(tc % 10) + 0x30)
+                    tc /= 10;
+                    OUT1((unsigned char)tc + 0x81)
+                    NEXT(1, 4)
+                    break;
+                }
+            if (utrrange->first == 0)
+                return 1;
+            continue;
+        }
+        OUT1((code >> 8) | 0x80)
+        if (code & 0x8000)
+            OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
+        else
+            OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1, c2;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        c2 = IN2;
+        if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
+            const struct _gb18030_to_unibmp_ranges *utr;
+            unsigned char c3, c4;
+            ucs4_t lseq;
+            REQUIRE_INBUF(4)
+            c3 = IN3;
+            c4 = IN4;
+            if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
+                return 4;
+            c -= 0x81;  c2 -= 0x30;
+            c3 -= 0x81; c4 -= 0x30;
+            if (c < 4) { /* U+0080 - U+FFFF */
+                lseq = ((ucs4_t)c * 10 + c2) * 1260 +
+                    (ucs4_t)c3 * 10 + c4;
+                if (lseq < 39420) {
+                    for (utr = gb18030_to_unibmp_ranges;
+                         lseq >= (utr + 1)->base;
+                         utr++) ;
+                    OUT1(utr->first - utr->base + lseq)
+                    NEXT(4, 1)
+                    continue;
+                }
+            }
+            else if (c >= 15) { /* U+10000 - U+10FFFF */
+                lseq = 0x10000 + (((ucs4_t)c-15) * 10 + c2)
+                    * 1260 + (ucs4_t)c3 * 10 + c4;
+                if (lseq <= 0x10FFFF) {
+                    WRITEUCS4(lseq);
+                    NEXT_IN(4)
+                    continue;
+                }
+            }
+            return 4;
+        }
+        GBK_DECODE(c, c2, **outbuf)
+        else TRYMAP_DEC(gb18030ext, **outbuf, c, c2);
+        else return 2;
+        NEXT(2, 1)
+    }
+    return 0;
+ * HZ codec
+ */
+    state->i = 0;
+    return 0;
+    if (state->i != 0) {
+        WRITE2('~', '}')
+        state->i = 0;
+        NEXT_OUT(2)
+    }
+    return 0;
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            if (state->i == 0) {
+                WRITE1((unsigned char)c)
+                NEXT(1, 1)
+            }
+            else {
+                WRITE3('~', '}', (unsigned char)c)
+                NEXT(1, 3)
+                state->i = 0;
+            }
+            continue;
+        }
+        UCS4INVALID(c)
+        TRYMAP_ENC(gbcommon, code, c);
+        else return 1;
+        if (code & 0x8000) /* MSB set: GBK */
+            return 1;
+        if (state->i == 0) {
+            WRITE4('~', '{', code >> 8, code & 0xff)
+            NEXT(1, 4)
+            state->i = 1;
+        }
+        else {
+            WRITE2(code >> 8, code & 0xff)
+            NEXT(1, 2)
+        }
+    }
+    return 0;
+    state->i = 0;
+    return 0;
+    state->i = 0;
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        if (c == '~') {
+            unsigned char c2 = IN2;
+            REQUIRE_INBUF(2)
+            if (c2 == '~') {
+                WRITE1('~')
+                NEXT(2, 1)
+                continue;
+            }
+            else if (c2 == '{' && state->i == 0)
+                state->i = 1; /* set GB */
+            else if (c2 == '}' && state->i == 1)
+                state->i = 0; /* set ASCII */
+            else if (c2 == '\n')
+                ; /* line-continuation */
+            else
+                return 2;
+            NEXT(2, 0);
+            continue;
+        }
+        if (c & 0x80)
+            return 1;
+        if (state->i == 0) { /* ASCII mode */
+            WRITE1(c)
+            NEXT(1, 1)
+        }
+        else { /* GB mode */
+            REQUIRE_INBUF(2)
+            REQUIRE_OUTBUF(1)
+            TRYMAP_DEC(gb2312, **outbuf, c, IN2) {
+                NEXT(2, 1)
+            }
+            else
+                return 2;
+        }
+    }
+    return 0;
+  MAPPING_ENCONLY(gbcommon)
+  MAPPING_ENCDEC(gb18030ext)
diff --git a/pypy/translator/c/src/cjkcodecs/_codecs_hk.c b/pypy/translator/c/src/cjkcodecs/_codecs_hk.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/_codecs_hk.c
@@ -0,0 +1,180 @@
+ * _codecs_hk.c: Codecs collection for encodings from Hong Kong
+ *
+ * Written by Hye-Shik Chang <perky at FreeBSD.org>
+ */
+#include "src/cjkcodecs/cjkcodecs.h"
+#include "src/cjkcodecs/mappings_hk.h"
+ * BIG5HKSCS codec
+ */
+static const encode_map *big5_encmap = NULL;
+static const decode_map *big5_decmap = NULL;
+  IMPORT_MAP(tw, big5, &big5_encmap, &big5_decmap);
+  return 0;
+ * There are four possible pair unicode -> big5hkscs maps as in HKSCS 2004:
+ *  U+00CA U+0304 -> 8862  (U+00CA alone is mapped to 8866)
+ *  U+00CA U+030C -> 8864
+ *  U+00EA U+0304 -> 88a3  (U+00EA alone is mapped to 88a7)
+ *  U+00EA U+030C -> 88a5
+ * These are handled by not mapping tables but a hand-written code.
+ */
+static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5};
+    while (inleft > 0) {
+        ucs4_t c = **inbuf;
+        DBCHAR code;
+        Py_ssize_t insize;
+        if (c < 0x80) {
+            REQUIRE_OUTBUF(1)
+            **outbuf = (unsigned char)c;
+            NEXT(1, 1)
+            continue;
+        }
+        insize = GET_INSIZE(c);
+        REQUIRE_OUTBUF(2)
+        if (c < 0x10000) {
+            TRYMAP_ENC(big5hkscs_bmp, code, c) {
+                if (code == MULTIC) {
+                    if (inleft >= 2 &&
+                        ((c & 0xffdf) == 0x00ca) &&
+                        (((*inbuf)[1] & 0xfff7) == 0x0304)) {
+                        code = big5hkscs_pairenc_table[
+                            ((c >> 4) |
+                             ((*inbuf)[1] >> 3)) & 3];
+                        insize = 2;
+                    }
+                    else if (inleft < 2 &&
+                             !(flags & MBENC_FLUSH))
+                        return MBERR_TOOFEW;
+                    else {
+                        if (c == 0xca)
+                            code = 0x8866;
+                        else /* c == 0xea */
+                            code = 0x88a7;
+                    }
+                }
+            }
+            else TRYMAP_ENC(big5, code, c);
+            else return 1;
+        }
+        else if (c < 0x20000)
+            return insize;
+        else if (c < 0x30000) {
+            TRYMAP_ENC(big5hkscs_nonbmp, code, c & 0xffff);
+            else return insize;
+        }
+        else
+            return insize;
+        OUT1(code >> 8)
+        OUT2(code & 0xFF)
+        NEXT(insize, 2)
+    }
+    return 0;
+#define BH2S(c1, c2) (((c1) - 0x87) * (0xfe - 0x40 + 1) + ((c2) - 0x40))
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        ucs4_t decoded;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        if (0xc6 <= c && c <= 0xc8 && (c >= 0xc7 || IN2 >= 0xa1))
+            goto hkscsdec;
+        TRYMAP_DEC(big5, **outbuf, c, IN2) {
+            NEXT(2, 1)
+        }
+        else
+hkscsdec:       TRYMAP_DEC(big5hkscs, decoded, c, IN2) {
+                        int s = BH2S(c, IN2);
+                        const unsigned char *hintbase;
+                        assert(0x87 <= c && c <= 0xfe);
+                        assert(0x40 <= IN2 && IN2 <= 0xfe);
+                        if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
+                                hintbase = big5hkscs_phint_0;
+                                s -= BH2S(0x87, 0x40);
+                        }
+                        else if (BH2S(0xc6,0xa1) <= s && s <= BH2S(0xc8,0xfe)){
+                                hintbase = big5hkscs_phint_12130;
+                                s -= BH2S(0xc6, 0xa1);
+                        }
+                        else if (BH2S(0xf9,0xd6) <= s && s <= BH2S(0xfe,0xfe)){
+                                hintbase = big5hkscs_phint_21924;
+                                s -= BH2S(0xf9, 0xd6);
+                        }
+                        else
+                                return MBERR_INTERNAL;
+                        if (hintbase[s >> 3] & (1 << (s & 7))) {
+                                WRITEUCS4(decoded | 0x20000)
+                                NEXT_IN(2)
+                        }
+                        else {
+                                OUT1(decoded)
+                                NEXT(2, 1)
+                        }
+                }
+                else {
+                        switch ((c << 8) | IN2) {
+                        case 0x8862: WRITE2(0x00ca, 0x0304); break;
+                        case 0x8864: WRITE2(0x00ca, 0x030c); break;
+                        case 0x88a3: WRITE2(0x00ea, 0x0304); break;
+                        case 0x88a5: WRITE2(0x00ea, 0x030c); break;
+                        default: return 2;
+                        }
+                        NEXT(2, 2) /* all decoded codepoints are pairs, above. */
+        }
+    }
+    return 0;
+  MAPPING_DECONLY(big5hkscs)
+  MAPPING_ENCONLY(big5hkscs_bmp)
+  MAPPING_ENCONLY(big5hkscs_nonbmp)
diff --git a/pypy/translator/c/src/cjkcodecs/_codecs_iso2022.c b/pypy/translator/c/src/cjkcodecs/_codecs_iso2022.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/_codecs_iso2022.c
@@ -0,0 +1,1112 @@
+ * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
+ *
+ * Written by Hye-Shik Chang <perky at FreeBSD.org>
+ */
+#define EXTERN_JISX0213_PAIR
+#include "src/cjkcodecs/cjkcodecs.h"
+#include "src/cjkcodecs/alg_jisx0201.h"
+#include "src/cjkcodecs/emu_jisx0213_2000.h"
+#include "src/cjkcodecs/mappings_jisx0213_pair.h"
+   state->c[0-3]
+    00000000
+    ||^^^^^|
+    |+-----+----  G0-3 Character Set
+    +-----------  Is G0-3 double byte?
+   state->c[4]
+    00000000
+          ||
+          |+----  Locked-Shift?
+          +-----  ESC Throughout
+#define ESC                     0x1B
+#define SO                      0x0E
+#define SI                      0x0F
+#define LF                      0x0A
+#define MAX_ESCSEQLEN           16
+#define CHARSET_ISO8859_1       'A'
+#define CHARSET_ASCII           'B'
+#define CHARSET_ISO8859_7       'F'
+#define CHARSET_JISX0201_K      'I'
+#define CHARSET_JISX0201_R      'J'
+#define CHARSET_GB2312          ('A'|CHARSET_DBCS)
+#define CHARSET_JISX0208        ('B'|CHARSET_DBCS)
+#define CHARSET_KSX1001         ('C'|CHARSET_DBCS)
+#define CHARSET_JISX0212        ('D'|CHARSET_DBCS)
+#define CHARSET_GB2312_8565     ('E'|CHARSET_DBCS)
+#define CHARSET_CNS11643_1      ('G'|CHARSET_DBCS)
+#define CHARSET_CNS11643_2      ('H'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2      ('P'|CHARSET_DBCS)
+#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
+#define CHARSET_JISX0208_O      ('@'|CHARSET_DBCS)
+#define CHARSET_DBCS            0x80
+#define ESCMARK(mark)           ((mark) & 0x7f)
+#define IS_ESCEND(c)    (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
+#define IS_ISO2022ESC(c2) \
+        ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
+         (c2) == '.' || (c2) == '&')
+    /* this is not a complete list of ISO-2022 escape sequence headers.
+     * but, it's enough to implement CJK instances of iso-2022. */
+#define MAP_UNMAPPABLE          0xFFFF
+#define MAP_MULTIPLE_AVAIL      0xFFFE /* for JIS X 0213 */
+#define F_SHIFTED               0x01
+#define F_ESCTHROUGHOUT         0x02
+#define STATE_SETG(dn, v)       ((state)->c[dn]) = (v);
+#define STATE_GETG(dn)          ((state)->c[dn])
+#define STATE_G0                STATE_GETG(0)
+#define STATE_G1                STATE_GETG(1)
+#define STATE_G2                STATE_GETG(2)
+#define STATE_G3                STATE_GETG(3)
+#define STATE_SETG0(v)          STATE_SETG(0, v)
+#define STATE_SETG1(v)          STATE_SETG(1, v)
+#define STATE_SETG2(v)          STATE_SETG(2, v)
+#define STATE_SETG3(v)          STATE_SETG(3, v)
+#define STATE_SETFLAG(f)        ((state)->c[4]) |= (f);
+#define STATE_GETFLAG(f)        ((state)->c[4] & (f))
+#define STATE_CLEARFLAG(f)      ((state)->c[4]) &= ~(f);
+#define STATE_CLEARFLAGS()      ((state)->c[4]) = 0;
+#define ISO2022_CONFIG          ((const struct iso2022_config *)config)
+#define CONFIG_ISSET(flag)      (ISO2022_CONFIG->flags & (flag))
+#define CONFIG_DESIGNATIONS     (ISO2022_CONFIG->designations)
+/* iso2022_config.flags */
+#define NO_SHIFT                0x01
+#define USE_G2                  0x02
+#define USE_JISX0208_EXT        0x04
+/*-*- internal data structures -*-*/
+typedef int (*iso2022_init_func)(void);
+typedef ucs4_t (*iso2022_decode_func)(const unsigned char *data);
+typedef DBCHAR (*iso2022_encode_func)(const ucs4_t *data, Py_ssize_t *length);
+struct iso2022_designation {
+    unsigned char mark;
+    unsigned char plane;
+    unsigned char width;
+    iso2022_init_func initializer;
+    iso2022_decode_func decoder;
+    iso2022_encode_func encoder;
+struct iso2022_config {
+    int flags;
+    const struct iso2022_designation *designations; /* non-ascii desigs */
+/*-*- iso-2022 codec implementation -*-*/
+    const struct iso2022_designation *desig = CONFIG_DESIGNATIONS;
+    for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
+        if (desig->initializer != NULL && desig->initializer() != 0)
+            return -1;
+    return 0;
+    return 0;
+        WRITE1(SI)
+        NEXT_OUT(1)
+    }
+    if (STATE_G0 != CHARSET_ASCII) {
+        WRITE3(ESC, '(', 'B')
+        NEXT_OUT(3)
+    }
+    return 0;
+    while (inleft > 0) {
+        const struct iso2022_designation *dsg;
+        DBCHAR encoded;
+        ucs4_t c = **inbuf;
+        Py_ssize_t insize;
+        if (c < 0x80) {
+            if (STATE_G0 != CHARSET_ASCII) {
+                WRITE3(ESC, '(', 'B')
+                STATE_SETG0(CHARSET_ASCII)
+                NEXT_OUT(3)
+            }
+            if (STATE_GETFLAG(F_SHIFTED)) {
+                WRITE1(SI)
+                NEXT_OUT(1)
+            }
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        insize = GET_INSIZE(c);
+        encoded = MAP_UNMAPPABLE;
+        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+            Py_ssize_t length = 1;
+            encoded = dsg->encoder(&c, &length);
+            if (encoded == MAP_MULTIPLE_AVAIL) {
+                /* this implementation won't work for pair
+                 * of non-bmp characters. */
+                if (inleft < 2) {
+                    if (!(flags & MBENC_FLUSH))
+                        return MBERR_TOOFEW;
+                    length = -1;
+                }
+                else
+                    length = 2;
+#if Py_UNICODE_SIZE == 2
+                if (length == 2) {
+                    ucs4_t u4in[2];
+                    u4in[0] = (ucs4_t)IN1;
+                    u4in[1] = (ucs4_t)IN2;
+                    encoded = dsg->encoder(u4in, &length);
+                } else
+                    encoded = dsg->encoder(&c, &length);
+                encoded = dsg->encoder(&c, &length);
+                if (encoded != MAP_UNMAPPABLE) {
+                    insize = length;
+                    break;
+                }
+            }
+            else if (encoded != MAP_UNMAPPABLE)
+                break;
+        }
+        if (!dsg->mark)
+            return 1;
+        assert(dsg->width == 1 || dsg->width == 2);
+        switch (dsg->plane) {
+        case 0: /* G0 */
+            if (STATE_GETFLAG(F_SHIFTED)) {
+                WRITE1(SI)
+                NEXT_OUT(1)
+            }
+            if (STATE_G0 != dsg->mark) {
+                if (dsg->width == 1) {
+                    WRITE3(ESC, '(', ESCMARK(dsg->mark))
+                    STATE_SETG0(dsg->mark)
+                    NEXT_OUT(3)
+                }
+                else if (dsg->mark == CHARSET_JISX0208) {
+                    WRITE3(ESC, '$', ESCMARK(dsg->mark))
+                    STATE_SETG0(dsg->mark)
+                    NEXT_OUT(3)
+                }
+                else {
+                    WRITE4(ESC, '$', '(',
+                        ESCMARK(dsg->mark))
+                    STATE_SETG0(dsg->mark)
+                    NEXT_OUT(4)
+                }
+            }
+            break;
+        case 1: /* G1 */
+            if (STATE_G1 != dsg->mark) {
+                if (dsg->width == 1) {
+                    WRITE3(ESC, ')', ESCMARK(dsg->mark))
+                    STATE_SETG1(dsg->mark)
+                    NEXT_OUT(3)
+                }
+                else {
+                    WRITE4(ESC, '$', ')',
+                        ESCMARK(dsg->mark))
+                    STATE_SETG1(dsg->mark)
+                    NEXT_OUT(4)
+                }
+            }
+            if (!STATE_GETFLAG(F_SHIFTED)) {
+                WRITE1(SO)
+                STATE_SETFLAG(F_SHIFTED)
+                NEXT_OUT(1)
+            }
+            break;
+        default: /* G2 and G3 is not supported: no encoding in
+                  * CJKCodecs are using them yet */
+            return MBERR_INTERNAL;
+        }
+        if (dsg->width == 1) {
+            WRITE1((unsigned char)encoded)
+            NEXT_OUT(1)
+        }
+        else {
+            WRITE2(encoded >> 8, encoded & 0xff)
+            NEXT_OUT(2)
+        }
+        NEXT_IN(insize)
+    }
+    return 0;
+    return 0;
+    return 0;
+static Py_ssize_t
+iso2022processesc(const void *config, MultibyteCodec_State *state,
+                  const unsigned char **inbuf, Py_ssize_t *inleft)
+    unsigned char charset, designation;
+    Py_ssize_t i, esclen;
+    for (i = 1;i < MAX_ESCSEQLEN;i++) {
+        if (i >= *inleft)
+            return MBERR_TOOFEW;
+        if (IS_ESCEND((*inbuf)[i])) {
+            esclen = i + 1;
+            break;
+        }
+        else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
+                 (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@')
+            i += 2;
+    }
+    if (i >= MAX_ESCSEQLEN)
+        return 1; /* unterminated escape sequence */
+    switch (esclen) {
+    case 3:
+        if (IN2 == '$') {
+            charset = IN3 | CHARSET_DBCS;
+            designation = 0;
+        }
+        else {
+            charset = IN3;
+            if (IN2 == '(') designation = 0;
+            else if (IN2 == ')') designation = 1;
+            else if (CONFIG_ISSET(USE_G2) && IN2 == '.')
+                designation = 2;
+            else return 3;
+        }
+        break;
+    case 4:
+        if (IN2 != '$')
+            return 4;
+        charset = IN4 | CHARSET_DBCS;
+        if (IN3 == '(') designation = 0;
+        else if (IN3 == ')') designation = 1;
+        else return 4;
+        break;
+    case 6: /* designation with prefix */
+        if (CONFIG_ISSET(USE_JISX0208_EXT) &&
+            (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
+            (*inbuf)[5] == 'B') {
+            charset = 'B' | CHARSET_DBCS;
+            designation = 0;
+        }
+        else
+            return 6;
+        break;
+    default:
+        return esclen;
+    }
+    /* raise error when the charset is not designated for this encoding */
+    if (charset != CHARSET_ASCII) {
+        const struct iso2022_designation *dsg;
+        for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++)
+            if (dsg->mark == charset)
+                break;
+        if (!dsg->mark)
+            return esclen;
+    }
+    STATE_SETG(designation, charset)
+    *inleft -= esclen;
+    (*inbuf) += esclen;
+    return 0;
+#define ISO8859_7_DECODE(c, assi)                                       \
+    if ((c) < 0xa0) (assi) = (c);                                       \
+    else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0))))          \
+        (assi) = (c);                                                   \
+    else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 ||              \
+             (0xbffffd77L & (1L << ((c)-0xb4)))))                       \
+        (assi) = 0x02d0 + (c);                                          \
+    else if ((c) == 0xa1) (assi) = 0x2018;                              \
+    else if ((c) == 0xa2) (assi) = 0x2019;                              \
+    else if ((c) == 0xaf) (assi) = 0x2015;
+static Py_ssize_t
+iso2022processg2(const void *config, MultibyteCodec_State *state,
+                 const unsigned char **inbuf, Py_ssize_t *inleft,
+                 Py_UNICODE **outbuf, Py_ssize_t *outleft)
+    /* not written to use encoder, decoder functions because only few
+     * encodings use G2 designations in CJKCodecs */
+    if (STATE_G2 == CHARSET_ISO8859_1) {
+        if (IN3 < 0x80)
+            OUT1(IN3 + 0x80)
+        else
+            return 3;
+    }
+    else if (STATE_G2 == CHARSET_ISO8859_7) {
+        ISO8859_7_DECODE(IN3 ^ 0x80, **outbuf)
+        else return 3;
+    }
+    else if (STATE_G2 == CHARSET_ASCII) {
+        if (IN3 & 0x80) return 3;
+        else **outbuf = IN3;
+    }
+    else
+        return MBERR_INTERNAL;
+    (*inbuf) += 3;
+    *inleft -= 3;
+    (*outbuf) += 1;
+    *outleft -= 1;
+    return 0;
+    const struct iso2022_designation *dsgcache = NULL;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        Py_ssize_t err;
+            /* ESC throughout mode:
+             * for non-iso2022 escape sequences */
+            WRITE1(c) /* assume as ISO-8859-1 */
+            NEXT(1, 1)
+            if (IS_ESCEND(c)) {
+            }
+            continue;
+        }
+        switch (c) {
+        case ESC:
+            REQUIRE_INBUF(2)
+            if (IS_ISO2022ESC(IN2)) {
+                err = iso2022processesc(config, state,
+                                        inbuf, &inleft);
+                if (err != 0)
+                    return err;
+            }
+            else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */
+                REQUIRE_INBUF(3)
+                err = iso2022processg2(config, state,
+                    inbuf, &inleft, outbuf, &outleft);
+                if (err != 0)
+                    return err;
+            }
+            else {
+                WRITE1(ESC)
+                NEXT(1, 1)
+            }
+            break;
+        case SI:
+            if (CONFIG_ISSET(NO_SHIFT))
+                goto bypass;
+            NEXT_IN(1)
+            break;
+        case SO:
+            if (CONFIG_ISSET(NO_SHIFT))
+                goto bypass;
+            NEXT_IN(1)
+            break;
+        case LF:
+            WRITE1(LF)
+            NEXT(1, 1)
+            break;
+        default:
+            if (c < 0x20) /* C0 */
+                goto bypass;
+            else if (c >= 0x80)
+                return 1;
+            else {
+                const struct iso2022_designation *dsg;
+                unsigned char charset;
+                ucs4_t decoded;
+                if (STATE_GETFLAG(F_SHIFTED))
+                    charset = STATE_G1;
+                else
+                    charset = STATE_G0;
+                if (charset == CHARSET_ASCII) {
+bypass:                                 WRITE1(c)
+                                        NEXT(1, 1)
+                                        break;
+                                }
+                                if (dsgcache != NULL &&
+                                    dsgcache->mark == charset)
+                                        dsg = dsgcache;
+                                else {
+                                        for (dsg = CONFIG_DESIGNATIONS;
+                                             dsg->mark != charset
+#ifdef Py_DEBUG
+                                                && dsg->mark != '\0'
+                                             ;dsg++)
+                                                /* noop */;
+                                        assert(dsg->mark != '\0');
+                                        dsgcache = dsg;
+                                }
+                                REQUIRE_INBUF(dsg->width)
+                                decoded = dsg->decoder(*inbuf);
+                                if (decoded == MAP_UNMAPPABLE)
+                                        return dsg->width;
+                                if (decoded < 0x10000) {
+                                        WRITE1(decoded)
+                                        NEXT_OUT(1)
+                                }
+                                else if (decoded < 0x30000) {
+                                        WRITEUCS4(decoded)
+                                }
+                                else { /* JIS X 0213 pairs */
+                    WRITE2(decoded >> 16, decoded & 0xffff)
+                    NEXT_OUT(2)
+                }
+                NEXT_IN(dsg->width)
+            }
+            break;
+        }
+    }
+    return 0;
+/*-*- mapping table holders -*-*/
+#define ENCMAP(enc) static const encode_map *enc##_encmap = NULL;
+#define DECMAP(enc) static const decode_map *enc##_decmap = NULL;
+/* kr */
+/* jp */
+/* cn */
+/* tw */
+/*-*- mapping access functions -*-*/
+static int
+  IMPORT_MAP(kr, cp949, &cp949_encmap, NULL);
+  IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap);
+  return 0;
+static ucs4_t
+ksx1001_decoder(const unsigned char *data)
+    ucs4_t u;
+    TRYMAP_DEC(ksx1001, u, data[0], data[1])
+        return u;
+    else
+        return MAP_UNMAPPABLE;
+static DBCHAR
+ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    assert(*length == 1);
+    if (*data < 0x10000) {
+        TRYMAP_ENC(cp949, coded, *data)
+            if (!(coded & 0x8000))
+                return coded;
+    }
+    return MAP_UNMAPPABLE;
+static int
+  IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL);
+  IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap);
+  return 0;
+static ucs4_t
+jisx0208_decoder(const unsigned char *data)
+    ucs4_t u;
+    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+        return 0xff3c;
+    else TRYMAP_DEC(jisx0208, u, data[0], data[1])
+        return u;
+    else
+        return MAP_UNMAPPABLE;
+static DBCHAR
+jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    assert(*length == 1);
+    if (*data < 0x10000) {
+        if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
+            return 0x2140;
+        else TRYMAP_ENC(jisxcommon, coded, *data) {
+            if (!(coded & 0x8000))
+                return coded;
+        }
+    }
+    return MAP_UNMAPPABLE;
+static int
+  IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL);
+  IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap);
+  return 0;
+static ucs4_t
+jisx0212_decoder(const unsigned char *data)
+    ucs4_t u;
+    TRYMAP_DEC(jisx0212, u, data[0], data[1])
+        return u;
+    else
+        return MAP_UNMAPPABLE;
+static DBCHAR
+jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    assert(*length == 1);
+    if (*data < 0x10000) {
+        TRYMAP_ENC(jisxcommon, coded, *data) {
+            if (coded & 0x8000)
+                return coded & 0x7fff;
+        }
+    }
+    return MAP_UNMAPPABLE;
+static int
+  jisx0208_init();
+  IMPORT_MAP(jp, jisx0213_bmp, &jisx0213_bmp_encmap, NULL);
+  IMPORT_MAP(jp, jisx0213_1_bmp, NULL, &jisx0213_1_bmp_decmap);
+  IMPORT_MAP(jp, jisx0213_2_bmp, NULL, &jisx0213_2_bmp_decmap);
+  IMPORT_MAP(jp, jisx0213_emp, &jisx0213_emp_encmap, NULL);
+  IMPORT_MAP(jp, jisx0213_1_emp, NULL, &jisx0213_1_emp_decmap);
+  IMPORT_MAP(jp, jisx0213_2_emp, NULL, &jisx0213_2_emp_decmap);
+  IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap, &jisx0213_pair_decmap);
+  return 0;
+#define config ((void *)2000)
+static ucs4_t
+jisx0213_2000_1_decoder(const unsigned char *data)
+    ucs4_t u;
+    EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
+    else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+        return 0xff3c;
+    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
+    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+        u |= 0x20000;
+    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+    else
+        return MAP_UNMAPPABLE;
+    return u;
+static ucs4_t
+jisx0213_2000_2_decoder(const unsigned char *data)
+    ucs4_t u;
+    EMULATE_JISX0213_2000_DECODE_PLANE2(u, data[0], data[1])
+    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+        u |= 0x20000;
+    else
+        return MAP_UNMAPPABLE;
+    return u;
+#undef config
+static ucs4_t
+jisx0213_2004_1_decoder(const unsigned char *data)
+    ucs4_t u;
+    if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
+        return 0xff3c;
+    else TRYMAP_DEC(jisx0208, u, data[0], data[1]);
+    else TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])
+        u |= 0x20000;
+    else TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]);
+    else
+        return MAP_UNMAPPABLE;
+    return u;
+static ucs4_t
+jisx0213_2004_2_decoder(const unsigned char *data)
+    ucs4_t u;
+    TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]);
+    else TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])
+        u |= 0x20000;
+    else
+        return MAP_UNMAPPABLE;
+    return u;
+static DBCHAR
+jisx0213_encoder(const ucs4_t *data, Py_ssize_t *length, void *config)
+    DBCHAR coded;
+    switch (*length) {
+    case 1: /* first character */
+        if (*data >= 0x10000) {
+            if ((*data) >> 16 == 0x20000 >> 16) {
+                EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
+                else TRYMAP_ENC(jisx0213_emp, coded,
+                                (*data) & 0xffff)
+                    return coded;
+            }
+            return MAP_UNMAPPABLE;
+        }
+        EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
+        else TRYMAP_ENC(jisx0213_bmp, coded, *data) {
+            if (coded == MULTIC)
+                return MAP_MULTIPLE_AVAIL;
+        }
+        else TRYMAP_ENC(jisxcommon, coded, *data) {
+            if (coded & 0x8000)
+                return MAP_UNMAPPABLE;
+        }
+        else
+            return MAP_UNMAPPABLE;
+        return coded;
+    case 2: /* second character of unicode pair */
+        coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
+                        jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+        if (coded == DBCINV) {
+            *length = 1;
+            coded = find_pairencmap((ucs2_t)data[0], 0,
+                      jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+            if (coded == DBCINV)
+                return MAP_UNMAPPABLE;
+        }
+        else
+            return coded;
+    case -1: /* flush unterminated */
+        *length = 1;
+        coded = find_pairencmap((ucs2_t)data[0], 0,
+                        jisx0213_pair_encmap, JISX0213_ENCPAIRS);
+        if (coded == DBCINV)
+            return MAP_UNMAPPABLE;
+        else
+            return coded;
+    default:
+        return MAP_UNMAPPABLE;
+    }
+static DBCHAR
+jisx0213_2000_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+        return coded;
+    else if (coded & 0x8000)
+        return MAP_UNMAPPABLE;
+    else
+        return coded;
+static DBCHAR
+jisx0213_2000_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    Py_ssize_t ilength = *length;
+    coded = jisx0213_encoder(data, length, (void *)2000);
+    switch (ilength) {
+    case 1:
+        if (coded == MAP_MULTIPLE_AVAIL)
+            return MAP_MULTIPLE_AVAIL;
+        else
+            return MAP_UNMAPPABLE;
+    case 2:
+        if (*length != 2)
+            return MAP_UNMAPPABLE;
+        else
+            return coded;
+    default:
+        return MAP_UNMAPPABLE;
+    }
+static DBCHAR
+jisx0213_2000_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+        return coded;
+    else if (coded & 0x8000)
+        return coded & 0x7fff;
+    else
+        return MAP_UNMAPPABLE;
+static DBCHAR
+jisx0213_2004_1_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded = jisx0213_encoder(data, length, NULL);
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+        return coded;
+    else if (coded & 0x8000)
+        return MAP_UNMAPPABLE;
+    else
+        return coded;
+static DBCHAR
+jisx0213_2004_1_encoder_paironly(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    Py_ssize_t ilength = *length;
+    coded = jisx0213_encoder(data, length, NULL);
+    switch (ilength) {
+    case 1:
+        if (coded == MAP_MULTIPLE_AVAIL)
+            return MAP_MULTIPLE_AVAIL;
+        else
+            return MAP_UNMAPPABLE;
+    case 2:
+        if (*length != 2)
+            return MAP_UNMAPPABLE;
+        else
+            return coded;
+    default:
+        return MAP_UNMAPPABLE;
+    }
+static DBCHAR
+jisx0213_2004_2_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded = jisx0213_encoder(data, length, NULL);
+    if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
+        return coded;
+    else if (coded & 0x8000)
+        return coded & 0x7fff;
+    else
+        return MAP_UNMAPPABLE;
+static ucs4_t
+jisx0201_r_decoder(const unsigned char *data)
+    ucs4_t u;
+    JISX0201_R_DECODE(*data, u)
+    else return MAP_UNMAPPABLE;
+    return u;
+static DBCHAR
+jisx0201_r_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    JISX0201_R_ENCODE(*data, coded)
+    else return MAP_UNMAPPABLE;
+    return coded;
+static ucs4_t
+jisx0201_k_decoder(const unsigned char *data)
+    ucs4_t u;
+    JISX0201_K_DECODE(*data ^ 0x80, u)
+    else return MAP_UNMAPPABLE;
+    return u;
+static DBCHAR
+jisx0201_k_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    JISX0201_K_ENCODE(*data, coded)
+    else return MAP_UNMAPPABLE;
+    return coded - 0x80;
+static int
+  IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL);
+  IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap);
+  return 0;
+static ucs4_t
+gb2312_decoder(const unsigned char *data)
+    ucs4_t u;
+    TRYMAP_DEC(gb2312, u, data[0], data[1])
+        return u;
+    else
+        return MAP_UNMAPPABLE;
+static DBCHAR
+gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
+    DBCHAR coded;
+    assert(*length == 1);
+    if (*data < 0x10000) {
+        TRYMAP_ENC(gbcommon, coded, *data) {
+            if (!(coded & 0x8000))
+                return coded;
+        }
+    }
+    return MAP_UNMAPPABLE;
+static ucs4_t
+dummy_decoder(const unsigned char *data)
+    return MAP_UNMAPPABLE;
+static DBCHAR
+dummy_encoder(const ucs4_t *data, Py_ssize_t *length)
+    return MAP_UNMAPPABLE;
+/*-*- registry tables -*-*/
+#define REGISTRY_KSX1001_G0     { CHARSET_KSX1001, 0, 2,                \
+                  ksx1001_init,                                         \
+                  ksx1001_decoder, ksx1001_encoder }
+#define REGISTRY_KSX1001_G1     { CHARSET_KSX1001, 1, 2,                \
+                  ksx1001_init,                                         \
+                  ksx1001_decoder, ksx1001_encoder }
+#define REGISTRY_JISX0201_R     { CHARSET_JISX0201_R, 0, 1,             \
+                  NULL,                                                 \
+                  jisx0201_r_decoder, jisx0201_r_encoder }
+#define REGISTRY_JISX0201_K     { CHARSET_JISX0201_K, 0, 1,             \
+                  NULL,                                                 \
+                  jisx0201_k_decoder, jisx0201_k_encoder }
+#define REGISTRY_JISX0208       { CHARSET_JISX0208, 0, 2,               \
+                  jisx0208_init,                                        \
+                  jisx0208_decoder, jisx0208_encoder }
+#define REGISTRY_JISX0208_O     { CHARSET_JISX0208_O, 0, 2,             \
+                  jisx0208_init,                                        \
+                  jisx0208_decoder, jisx0208_encoder }
+#define REGISTRY_JISX0212       { CHARSET_JISX0212, 0, 2,               \
+                  jisx0212_init,                                        \
+                  jisx0212_decoder, jisx0212_encoder }
+#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2,       \
+                  jisx0213_init,                                        \
+                  jisx0213_2000_1_decoder,                              \
+                  jisx0213_2000_1_encoder }
+#define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \
+                  jisx0213_init,                                        \
+                  jisx0213_2000_1_decoder,                              \
+                  jisx0213_2000_1_encoder_paironly }
+#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2,            \
+                  jisx0213_init,                                        \
+                  jisx0213_2000_2_decoder,                              \
+                  jisx0213_2000_2_encoder }
+#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2,       \
+                  jisx0213_init,                                        \
+                  jisx0213_2004_1_decoder,                              \
+                  jisx0213_2004_1_encoder }
+#define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \
+                  jisx0213_init,                                        \
+                  jisx0213_2004_1_decoder,                              \
+                  jisx0213_2004_1_encoder_paironly }
+#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2,            \
+                  jisx0213_init,                                        \
+                  jisx0213_2004_2_decoder,                              \
+                  jisx0213_2004_2_encoder }
+#define REGISTRY_GB2312         { CHARSET_GB2312, 0, 2,                 \
+                  gb2312_init,                                          \
+                  gb2312_decoder, gb2312_encoder }
+#define REGISTRY_CNS11643_1     { CHARSET_CNS11643_1, 1, 2,             \
+                  cns11643_init,                                        \
+                  cns11643_1_decoder, cns11643_1_encoder }
+#define REGISTRY_CNS11643_2     { CHARSET_CNS11643_2, 2, 2,             \
+                  cns11643_init,                                        \
+                  cns11643_2_decoder, cns11643_2_encoder }
+#define REGISTRY_ISO8859_1      { CHARSET_ISO8859_1, 2, 1,              \
+                  NULL, dummy_decoder, dummy_encoder }
+#define REGISTRY_ISO8859_7      { CHARSET_ISO8859_7, 2, 1,              \
+                  NULL, dummy_decoder, dummy_encoder }
+#define REGISTRY_SENTINEL       { 0, }
+#define CONFIGDEF(var, attrs)                                           \
+    static const struct iso2022_config iso2022_##var##_config = {       \
+        attrs, iso2022_##var##_designations                             \
+    };
+static const struct iso2022_designation iso2022_kr_designations[] = {
+static const struct iso2022_designation iso2022_jp_designations[] = {
+static const struct iso2022_designation iso2022_jp_1_designations[] = {
+static const struct iso2022_designation iso2022_jp_2_designations[] = {
+static const struct iso2022_designation iso2022_jp_2004_designations[] = {
+static const struct iso2022_designation iso2022_jp_3_designations[] = {
+static const struct iso2022_designation iso2022_jp_ext_designations[] = {
+  /* no mapping table here */
+#define ISO2022_CODEC(variation)                        \
+  CODEC_STATEFUL_CONFIG(iso2022,                        \
+                        variation,                      \
+                        &iso2022_##variation##_config)
+  ISO2022_CODEC(kr)
+  ISO2022_CODEC(jp)
+  ISO2022_CODEC(jp_1)
+  ISO2022_CODEC(jp_2)
+  ISO2022_CODEC(jp_2004)
+  ISO2022_CODEC(jp_3)
+  ISO2022_CODEC(jp_ext)
diff --git a/pypy/translator/c/src/cjkcodecs/_codecs_jp.c b/pypy/translator/c/src/cjkcodecs/_codecs_jp.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/_codecs_jp.c
@@ -0,0 +1,731 @@
+ * _codecs_jp.c: Codecs collection for Japanese encodings
+ *
+ * Written by Hye-Shik Chang <perky at FreeBSD.org>
+ */
+#define EMPBASE 0x20000
+#include "src/cjkcodecs/cjkcodecs.h"
+#include "src/cjkcodecs/mappings_jp.h"
+#include "src/cjkcodecs/mappings_jisx0213_pair.h"
+#include "src/cjkcodecs/alg_jisx0201.h"
+#include "src/cjkcodecs/emu_jisx0213_2000.h"
+ * CP932 codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        unsigned char c1, c2;
+        if (c <= 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        else if (c >= 0xff61 && c <= 0xff9f) {
+            WRITE1(c - 0xfec0)
+            NEXT(1, 1)
+            continue;
+        }
+        else if (c >= 0xf8f0 && c <= 0xf8f3) {
+            /* Windows compatibility */
+            REQUIRE_OUTBUF(1)
+            if (c == 0xf8f0)
+                OUT1(0xa0)
+            else
+                OUT1(c - 0xfef1 + 0xfd)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        TRYMAP_ENC(cp932ext, code, c) {
+            OUT1(code >> 8)
+            OUT2(code & 0xff)
+        }
+        else TRYMAP_ENC(jisxcommon, code, c) {
+            if (code & 0x8000) /* MSB set: JIS X 0212 */
+                return 1;
+            /* JIS X 0208 */
+            c1 = code >> 8;
+            c2 = code & 0xff;
+            c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
+            c1 = (c1 - 0x21) >> 1;
+            OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+            OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+        }
+        else if (c >= 0xe000 && c < 0xe758) {
+            /* User-defined area */
+            c1 = (Py_UNICODE)(c - 0xe000) / 188;
+            c2 = (Py_UNICODE)(c - 0xe000) % 188;
+            OUT1(c1 + 0xf0)
+            OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+        }
+        else
+            return 1;
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1, c2;
+        REQUIRE_OUTBUF(1)
+        if (c <= 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        else if (c >= 0xa0 && c <= 0xdf) {
+            if (c == 0xa0)
+                OUT1(0xf8f0) /* half-width katakana */
+            else
+                OUT1(0xfec0 + c)
+            NEXT(1, 1)
+            continue;
+        }
+        else if (c >= 0xfd/* && c <= 0xff*/) {
+            /* Windows compatibility */
+            OUT1(0xf8f1 - 0xfd + c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        c2 = IN2;
+        TRYMAP_DEC(cp932ext, **outbuf, c, c2);
+        else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
+            if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
+                return 2;
+            c = (c < 0xe0 ? c - 0x81 : c - 0xc1);
+            c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
+            c = (2 * c + (c2 < 0x5e ? 0 : 1) + 0x21);
+            c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
+            TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+            else return 2;
+        }
+        else if (c >= 0xf0 && c <= 0xf9) {
+            if ((c2 >= 0x40 && c2 <= 0x7e) ||
+                (c2 >= 0x80 && c2 <= 0xfc))
+                OUT1(0xe000 + 188 * (c - 0xf0) +
+                     (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41))
+            else
+                return 2;
+        }
+        else
+            return 2;
+        NEXT(2, 1)
+    }
+    return 0;
+ * EUC-JIS-2004 codec
+ */
+    while (inleft > 0) {
+        ucs4_t c = IN1;
+        DBCHAR code;
+        Py_ssize_t insize;
+        if (c < 0x80) {
+            WRITE1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        insize = GET_INSIZE(c);
+        if (c <= 0xFFFF) {
+            EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
+            else TRYMAP_ENC(jisx0213_bmp, code, c) {
+                if (code == MULTIC) {
+                    if (inleft < 2) {
+                        if (flags & MBENC_FLUSH) {
+                            code = find_pairencmap(
+                                (ucs2_t)c, 0,
+                              jisx0213_pair_encmap,
+                                JISX0213_ENCPAIRS);
+                            if (code == DBCINV)
+                                return 1;
+                        }
+                        else
+                            return MBERR_TOOFEW;
+                    }
+                    else {
+                        code = find_pairencmap(
+                            (ucs2_t)c, (*inbuf)[1],
+                            jisx0213_pair_encmap,
+                            JISX0213_ENCPAIRS);
+                        if (code == DBCINV) {
+                            code = find_pairencmap(
+                                (ucs2_t)c, 0,
+                              jisx0213_pair_encmap,
+                                JISX0213_ENCPAIRS);
+                            if (code == DBCINV)
+                                return 1;
+                        } else
+                            insize = 2;
+                    }
+                }
+            }
+            else TRYMAP_ENC(jisxcommon, code, c);
+            else if (c >= 0xff61 && c <= 0xff9f) {
+                /* JIS X 0201 half-width katakana */
+                WRITE2(0x8e, c - 0xfec0)
+                NEXT(1, 2)
+                continue;
+            }
+            else if (c == 0xff3c)
+                /* F/W REVERSE SOLIDUS (see NOTES) */
+                code = 0x2140;
+            else if (c == 0xff5e)
+                /* F/W TILDE (see NOTES) */
+                code = 0x2232;
+            else
+                return 1;
+        }
+        else if (c >> 16 == EMPBASE >> 16) {
+            EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
+            else TRYMAP_ENC(jisx0213_emp, code, c & 0xffff);
+            else return insize;
+        }
+        else
+            return insize;
+        if (code & 0x8000) {
+            /* Codeset 2 */
+            WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+            NEXT(insize, 3)
+        } else {
+            /* Codeset 1 */
+            WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+            NEXT(insize, 2)
+        }
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        ucs4_t code;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        if (c == 0x8e) {
+            /* JIS X 0201 half-width katakana */
+            unsigned char c2;
+            REQUIRE_INBUF(2)
+            c2 = IN2;
+            if (c2 >= 0xa1 && c2 <= 0xdf) {
+                OUT1(0xfec0 + c2)
+                NEXT(2, 1)
+            }
+            else
+                return 2;
+        }
+        else if (c == 0x8f) {
+            unsigned char c2, c3;
+            REQUIRE_INBUF(3)
+            c2 = IN2 ^ 0x80;
+            c3 = IN3 ^ 0x80;
+            /* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
+            EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf, c2, c3)
+            else TRYMAP_DEC(jisx0213_2_bmp, **outbuf, c2, c3) ;
+            else TRYMAP_DEC(jisx0213_2_emp, code, c2, c3) {
+                WRITEUCS4(EMPBASE | code)
+                NEXT_IN(3)
+                continue;
+            }
+            else TRYMAP_DEC(jisx0212, **outbuf, c2, c3) ;
+            else return 3;
+            NEXT(3, 1)
+        }
+        else {
+            unsigned char c2;
+            REQUIRE_INBUF(2)
+            c ^= 0x80;
+            c2 = IN2 ^ 0x80;
+            /* JIS X 0213 Plane 1 */
+            EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf, c, c2)
+            else if (c == 0x21 && c2 == 0x40) **outbuf = 0xff3c;
+            else if (c == 0x22 && c2 == 0x32) **outbuf = 0xff5e;
+            else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
+            else TRYMAP_DEC(jisx0213_1_bmp, **outbuf, c, c2);
+            else TRYMAP_DEC(jisx0213_1_emp, code, c, c2) {
+                WRITEUCS4(EMPBASE | code)
+                NEXT_IN(2)
+                continue;
+            }
+            else TRYMAP_DEC(jisx0213_pair, code, c, c2) {
+                WRITE2(code >> 16, code & 0xffff)
+                NEXT(2, 2)
+                continue;
+            }
+            else return 2;
+            NEXT(2, 1)
+        }
+    }
+    return 0;
+ * EUC-JP codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        TRYMAP_ENC(jisxcommon, code, c);
+        else if (c >= 0xff61 && c <= 0xff9f) {
+            /* JIS X 0201 half-width katakana */
+            WRITE2(0x8e, c - 0xfec0)
+            NEXT(1, 2)
+            continue;
+        }
+        else if (c == 0xff3c) /* FULL-WIDTH REVERSE SOLIDUS */
+            code = 0x2140;
+        else if (c == 0xa5) { /* YEN SIGN */
+            WRITE1(0x5c);
+            NEXT(1, 1)
+            continue;
+        } else if (c == 0x203e) { /* OVERLINE */
+            WRITE1(0x7e);
+            NEXT(1, 1)
+            continue;
+        }
+        else
+            return 1;
+        if (code & 0x8000) {
+            /* JIS X 0212 */
+            WRITE3(0x8f, code >> 8, (code & 0xFF) | 0x80)
+            NEXT(1, 3)
+        } else {
+            /* JIS X 0208 */
+            WRITE2((code >> 8) | 0x80, (code & 0xFF) | 0x80)
+            NEXT(1, 2)
+        }
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+            if (c < 0x80) {
+                OUT1(c)
+                NEXT(1, 1)
+                continue;
+            }
+        if (c == 0x8e) {
+            /* JIS X 0201 half-width katakana */
+            unsigned char c2;
+            REQUIRE_INBUF(2)
+            c2 = IN2;
+            if (c2 >= 0xa1 && c2 <= 0xdf) {
+                OUT1(0xfec0 + c2)
+                NEXT(2, 1)
+            }
+            else
+                return 2;
+        }
+        else if (c == 0x8f) {
+            unsigned char c2, c3;
+            REQUIRE_INBUF(3)
+            c2 = IN2;
+            c3 = IN3;
+            /* JIS X 0212 */
+            TRYMAP_DEC(jisx0212, **outbuf, c2 ^ 0x80, c3 ^ 0x80) {
+                NEXT(3, 1)
+            }
+            else
+                return 3;
+        }
+        else {
+            unsigned char c2;
+            REQUIRE_INBUF(2)
+            c2 = IN2;
+            /* JIS X 0208 */
+            if (c == 0xa1 && c2 == 0xc0)
+                /* FULL-WIDTH REVERSE SOLIDUS */
+                **outbuf = 0xff3c;
+            else
+                TRYMAP_DEC(jisx0208, **outbuf,
+                           c ^ 0x80, c2 ^ 0x80) ;
+            else return 2;
+            NEXT(2, 1)
+        }
+    }
+    return 0;
+ * SHIFT_JIS codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        unsigned char c1, c2;
+        JISX0201_R_ENCODE(c, code)
+        if (c < 0x80) code = c;
+        else if (c == 0x00a5) code = 0x5c; /* YEN SIGN */
+        else if (c == 0x203e) code = 0x7e; /* OVERLINE */
+        else JISX0201_K_ENCODE(c, code)
+        else UCS4INVALID(c)
+        else code = NOCHAR;
+        if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
+            REQUIRE_OUTBUF(1)
+            OUT1((unsigned char)code)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_OUTBUF(2)
+        if (code == NOCHAR) {
+            TRYMAP_ENC(jisxcommon, code, c);
+            else if (c == 0xff3c)
+                code = 0x2140; /* FULL-WIDTH REVERSE SOLIDUS */
+            else
+                return 1;
+            if (code & 0x8000) /* MSB set: JIS X 0212 */
+                return 1;
+        }
+        c1 = code >> 8;
+        c2 = code & 0xff;
+        c2 = (((c1 - 0x21) & 1) ? 0x5e : 0) + (c2 - 0x21);
+        c1 = (c1 - 0x21) >> 1;
+        OUT1(c1 < 0x1f ? c1 + 0x81 : c1 + 0xc1)
+        OUT2(c2 < 0x3f ? c2 + 0x40 : c2 + 0x41)
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+        JISX0201_R_DECODE(c, **outbuf)
+        if (c < 0x80) **outbuf = c;
+        else JISX0201_K_DECODE(c, **outbuf)
+        else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xea)){
+            unsigned char c1, c2;
+            REQUIRE_INBUF(2)
+            c2 = IN2;
+            if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
+                return 2;
+            c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
+            c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
+            c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1) + 0x21);
+            c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
+            if (c1 == 0x21 && c2 == 0x40) {
+                /* FULL-WIDTH REVERSE SOLIDUS */
+                OUT1(0xff3c)
+                NEXT(2, 1)
+                continue;
+            }
+            TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
+                NEXT(2, 1)
+                continue;
+            }
+            else
+                return 2;
+        }
+        else
+            return 2;
+        NEXT(1, 1) /* JIS X 0201 */
+    }
+    return 0;
+ * SHIFT_JIS-2004 codec
+ */
+    while (inleft > 0) {
+        ucs4_t c = IN1;
+        DBCHAR code = NOCHAR;
+        int c1, c2;
+        Py_ssize_t insize;
+        JISX0201_ENCODE(c, code)
+        else DECODE_SURROGATE(c)
+        if (code < 0x80 || (code >= 0xa1 && code <= 0xdf)) {
+            WRITE1((unsigned char)code)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_OUTBUF(2)
+        insize = GET_INSIZE(c);
+        if (code == NOCHAR) {
+            if (c <= 0xffff) {
+                EMULATE_JISX0213_2000_ENCODE_BMP(code, c)
+                else TRYMAP_ENC(jisx0213_bmp, code, c) {
+                    if (code == MULTIC) {
+                        if (inleft < 2) {
+                            if (flags & MBENC_FLUSH) {
+                            code = find_pairencmap
+                                ((ucs2_t)c, 0,
+                              jisx0213_pair_encmap,
+                                JISX0213_ENCPAIRS);
+                            if (code == DBCINV)
+                                return 1;
+                            }
+                            else
+                                return MBERR_TOOFEW;
+                        }
+                        else {
+                            code = find_pairencmap(
+                                (ucs2_t)c, IN2,
+                              jisx0213_pair_encmap,
+                                JISX0213_ENCPAIRS);
+                            if (code == DBCINV) {
+                            code = find_pairencmap(
+                                (ucs2_t)c, 0,
+                              jisx0213_pair_encmap,
+                                JISX0213_ENCPAIRS);
+                            if (code == DBCINV)
+                                return 1;
+                            }
+                            else
+                                insize = 2;
+                        }
+                    }
+                }
+                else TRYMAP_ENC(jisxcommon, code, c) {
+                    /* abandon JIS X 0212 codes */
+                    if (code & 0x8000)
+                        return 1;
+                }
+                else return 1;
+            }
+            else if (c >> 16 == EMPBASE >> 16) {
+                EMULATE_JISX0213_2000_ENCODE_EMP(code, c)
+                else TRYMAP_ENC(jisx0213_emp, code, c&0xffff);
+                else return insize;
+            }
+            else
+                return insize;
+        }
+        c1 = code >> 8;
+        c2 = (code & 0xff) - 0x21;
+        if (c1 & 0x80) { /* Plane 2 */
+            if (c1 >= 0xee) c1 -= 0x87;
+            else if (c1 >= 0xac || c1 == 0xa8) c1 -= 0x49;
+            else c1 -= 0x43;
+        }
+        else /* Plane 1 */
+            c1 -= 0x21;
+        if (c1 & 1) c2 += 0x5e;
+        c1 >>= 1;
+        OUT1(c1 + (c1 < 0x1f ? 0x81 : 0xc1))
+        OUT2(c2 + (c2 < 0x3f ? 0x40 : 0x41))
+        NEXT(insize, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+        JISX0201_DECODE(c, **outbuf)
+        else if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)){
+            unsigned char c1, c2;
+            ucs4_t code;
+            REQUIRE_INBUF(2)
+            c2 = IN2;
+            if (c2 < 0x40 || (c2 > 0x7e && c2 < 0x80) || c2 > 0xfc)
+                return 2;
+            c1 = (c < 0xe0 ? c - 0x81 : c - 0xc1);
+            c2 = (c2 < 0x80 ? c2 - 0x40 : c2 - 0x41);
+            c1 = (2 * c1 + (c2 < 0x5e ? 0 : 1));
+            c2 = (c2 < 0x5e ? c2 : c2 - 0x5e) + 0x21;
+            if (c1 < 0x5e) { /* Plane 1 */
+                c1 += 0x21;
+                EMULATE_JISX0213_2000_DECODE_PLANE1(**outbuf,
+                                c1, c2)
+                else TRYMAP_DEC(jisx0208, **outbuf, c1, c2) {
+                    NEXT_OUT(1)
+                }
+                else TRYMAP_DEC(jisx0213_1_bmp, **outbuf,
+                                c1, c2) {
+                    NEXT_OUT(1)
+                }
+                else TRYMAP_DEC(jisx0213_1_emp, code, c1, c2) {
+                    WRITEUCS4(EMPBASE | code)
+                }
+                else TRYMAP_DEC(jisx0213_pair, code, c1, c2) {
+                    WRITE2(code >> 16, code & 0xffff)
+                    NEXT_OUT(2)
+                }
+                else
+                    return 2;
+                NEXT_IN(2)
+            }
+            else { /* Plane 2 */
+                if (c1 >= 0x67) c1 += 0x07;
+                else if (c1 >= 0x63 || c1 == 0x5f) c1 -= 0x37;
+                else c1 -= 0x3d;
+                EMULATE_JISX0213_2000_DECODE_PLANE2(**outbuf,
+                                c1, c2)
+                else TRYMAP_DEC(jisx0213_2_bmp, **outbuf,
+                                c1, c2) ;
+                else TRYMAP_DEC(jisx0213_2_emp, code, c1, c2) {
+                    WRITEUCS4(EMPBASE | code)
+                    NEXT_IN(2)
+                    continue;
+                }
+                else
+                    return 2;
+                NEXT(2, 1)
+            }
+            continue;
+        }
+        else
+            return 2;
+        NEXT(1, 1) /* JIS X 0201 */
+    }
+    return 0;
+  MAPPING_DECONLY(jisx0208)
+  MAPPING_DECONLY(jisx0212)
+  MAPPING_ENCONLY(jisxcommon)
+  MAPPING_DECONLY(jisx0213_1_bmp)
+  MAPPING_DECONLY(jisx0213_2_bmp)
+  MAPPING_ENCONLY(jisx0213_bmp)
+  MAPPING_DECONLY(jisx0213_1_emp)
+  MAPPING_DECONLY(jisx0213_2_emp)
+  MAPPING_ENCONLY(jisx0213_emp)
+  MAPPING_ENCDEC(jisx0213_pair)
+  MAPPING_ENCDEC(cp932ext)
+  CODEC_STATELESS(shift_jis)
+  CODEC_STATELESS(shift_jis_2004)
+  CODEC_STATELESS(euc_jis_2004)
+  CODEC_STATELESS_CONFIG(euc_jisx0213,   (void *)2000, euc_jis_2004)
+  CODEC_STATELESS_CONFIG(shift_jisx0213, (void *)2000, shift_jis_2004)
diff --git a/pypy/translator/c/src/cjkcodecs/_codecs_kr.c b/pypy/translator/c/src/cjkcodecs/_codecs_kr.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/_codecs_kr.c
@@ -0,0 +1,452 @@
+ * _codecs_kr.c: Codecs collection for Korean encodings
+ *
+ * Written by Hye-Shik Chang <perky at FreeBSD.org>
+ */
+#include "src/cjkcodecs/cjkcodecs.h"
+#include "src/cjkcodecs/mappings_kr.h"
+ * EUC-KR codec
+ */
+#define EUCKR_JAMO_FILLER       0xD4
+static const unsigned char u2cgk_choseong[19] = {
+    0xa1, 0xa2, 0xa4, 0xa7, 0xa8, 0xa9, 0xb1, 0xb2,
+    0xb3, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb,
+    0xbc, 0xbd, 0xbe
+static const unsigned char u2cgk_jungseong[21] = {
+    0xbf, 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6,
+    0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce,
+    0xcf, 0xd0, 0xd1, 0xd2, 0xd3
+static const unsigned char u2cgk_jongseong[28] = {
+    0xd4, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, 0xb0,
+    0xb1, 0xb2, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xba,
+    0xbb, 0xbc, 0xbd, 0xbe
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        TRYMAP_ENC(cp949, code, c);
+        else return 1;
+        if ((code & 0x8000) == 0) {
+            /* KS X 1001 coded character */
+            OUT1((code >> 8) | 0x80)
+            OUT2((code & 0xFF) | 0x80)
+            NEXT(1, 2)
+        }
+        else {          /* Mapping is found in CP949 extension,
+                 * but we encode it in KS X 1001:1998 Annex 3,
+                 * make-up sequence for EUC-KR. */
+            REQUIRE_OUTBUF(8)
+            /* syllable composition precedence */
+            OUT2(EUCKR_JAMO_FILLER)
+            /* All codepoints in CP949 extension are in unicode
+             * Hangul Syllable area. */
+            assert(0xac00 <= c && c <= 0xd7a3);
+            c -= 0xac00;
+            OUT4(u2cgk_choseong[c / 588])
+            NEXT_OUT(4)
+            OUT2(u2cgk_jungseong[(c / 28) % 21])
+            OUT4(u2cgk_jongseong[c % 28])
+            NEXT(1, 4)
+        }
+    }
+    return 0;
+#define NONE    127
+static const unsigned char cgk2u_choseong[] = { /* [A1, BE] */
+       0,    1, NONE,    2, NONE, NONE,    3,    4,
+       6,    7,    8, NONE,    9,   10,   11,   12,
+      13,   14,   15,   16,   17,   18
+static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
+       1,    2,    3,    4,    5,    6,    7, NONE,
+       8,    9,   10,   11,   12,   13,   14,   15,
+      16,   17, NONE,   18,   19,   20,   21,   22,
+    NONE,   23,   24,   25,   26,   27
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        if (c == EUCKR_JAMO_FIRSTBYTE &&
+            IN2 == EUCKR_JAMO_FILLER) {
+            /* KS X 1001:1998 Annex 3 make-up sequence */
+            DBCHAR cho, jung, jong;
+            REQUIRE_INBUF(8)
+            if ((*inbuf)[2] != EUCKR_JAMO_FIRSTBYTE ||
+                (*inbuf)[4] != EUCKR_JAMO_FIRSTBYTE ||
+                (*inbuf)[6] != EUCKR_JAMO_FIRSTBYTE)
+                return 8;
+            c = (*inbuf)[3];
+            if (0xa1 <= c && c <= 0xbe)
+                cho = cgk2u_choseong[c - 0xa1];
+            else
+                cho = NONE;
+            c = (*inbuf)[5];
+            jung = (0xbf <= c && c <= 0xd3) ? c - 0xbf : NONE;
+            c = (*inbuf)[7];
+            if (c == EUCKR_JAMO_FILLER)
+                jong = 0;
+            else if (0xa1 <= c && c <= 0xbe)
+                jong = cgk2u_jongseong[c - 0xa1];
+            else
+                jong = NONE;
+            if (cho == NONE || jung == NONE || jong == NONE)
+                return 8;
+            OUT1(0xac00 + cho*588 + jung*28 + jong);
+            NEXT(8, 1)
+        }
+        else TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80) {
+            NEXT(2, 1)
+        }
+        else
+            return 2;
+    }
+    return 0;
+#undef NONE
+ * CP949 codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        TRYMAP_ENC(cp949, code, c);
+        else return 1;
+        OUT1((code >> 8) | 0x80)
+        if (code & 0x8000)
+            OUT2(code & 0xFF) /* MSB set: CP949 */
+        else
+            OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        TRYMAP_DEC(ksx1001, **outbuf, c ^ 0x80, IN2 ^ 0x80);
+        else TRYMAP_DEC(cp949ext, **outbuf, c, IN2);
+        else return 2;
+        NEXT(2, 1)
+    }
+    return 0;
+ * JOHAB codec
+ */
+static const unsigned char u2johabidx_choseong[32] = {
+                0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11, 0x12, 0x13, 0x14,
+static const unsigned char u2johabidx_jungseong[32] = {
+                      0x03, 0x04, 0x05, 0x06, 0x07,
+                0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+                0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+                0x1a, 0x1b, 0x1c, 0x1d,
+static const unsigned char u2johabidx_jongseong[32] = {
+          0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x10, 0x11,       0x13, 0x14, 0x15, 0x16, 0x17,
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d,
+static const DBCHAR u2johabjamo[] = {
+            0x8841, 0x8c41, 0x8444, 0x9041, 0x8446, 0x8447, 0x9441,
+    0x9841, 0x9c41, 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f,
+    0x8450, 0xa041, 0xa441, 0xa841, 0x8454, 0xac41, 0xb041, 0xb441,
+    0xb841, 0xbc41, 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041, 0x8461,
+    0x8481, 0x84a1, 0x84c1, 0x84e1, 0x8541, 0x8561, 0x8581, 0x85a1,
+    0x85c1, 0x85e1, 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
+    0x8741, 0x8761, 0x8781, 0x87a1,
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        if (c >= 0xac00 && c <= 0xd7a3) {
+            c -= 0xac00;
+            code = 0x8000 |
+                (u2johabidx_choseong[c / 588] << 10) |
+                (u2johabidx_jungseong[(c / 28) % 21] << 5) |
+                u2johabidx_jongseong[c % 28];
+        }
+        else if (c >= 0x3131 && c <= 0x3163)
+            code = u2johabjamo[c - 0x3131];
+        else TRYMAP_ENC(cp949, code, c) {
+            unsigned char c1, c2, t2;
+            unsigned short t1;
+            assert((code & 0x8000) == 0);
+            c1 = code >> 8;
+            c2 = code & 0xff;
+            if (((c1 >= 0x21 && c1 <= 0x2c) ||
+                (c1 >= 0x4a && c1 <= 0x7d)) &&
+                (c2 >= 0x21 && c2 <= 0x7e)) {
+                t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
+                          (c1 - 0x21 + 0x197));
+                t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
+                OUT1(t1 >> 1)
+                OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
+                NEXT(1, 2)
+                continue;
+            }
+            else
+                return 1;
+        }
+        else
+            return 1;
+        OUT1(code >> 8)
+        OUT2(code & 0xff)
+        NEXT(1, 2)
+    }
+    return 0;
+#define FILL 0xfd
+#define NONE 0xff
+static const unsigned char johabidx_choseong[32] = {
+    NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
+    0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
+    0x0e, 0x0f, 0x10, 0x11, 0x12, NONE, NONE, NONE,
+static const unsigned char johabidx_jungseong[32] = {
+    NONE, NONE, FILL, 0x00, 0x01, 0x02, 0x03, 0x04,
+    NONE, NONE, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a,
+    NONE, NONE, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10,
+    NONE, NONE, 0x11, 0x12, 0x13, 0x14, NONE, NONE,
+static const unsigned char johabidx_jongseong[32] = {
+    NONE, FILL, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+    0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e,
+    0x0f, 0x10, NONE, 0x11, 0x12, 0x13, 0x14, 0x15,
+    0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, NONE, NONE,
+static const unsigned char johabjamo_choseong[32] = {
+    NONE, FILL, 0x31, 0x32, 0x34, 0x37, 0x38, 0x39,
+    0x41, 0x42, 0x43, 0x45, 0x46, 0x47, 0x48, 0x49,
+    0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE, NONE,
+static const unsigned char johabjamo_jungseong[32] = {
+    NONE, NONE, FILL, 0x4f, 0x50, 0x51, 0x52, 0x53,
+    NONE, NONE, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+    NONE, NONE, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+    NONE, NONE, 0x60, 0x61, 0x62, 0x63, NONE, NONE,
+static const unsigned char johabjamo_jongseong[32] = {
+    NONE, FILL, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
+    0x37, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+    0x40, 0x41, NONE, 0x42, 0x44, 0x45, 0x46, 0x47,
+    0x48, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, NONE, NONE,
+    while (inleft > 0) {
+        unsigned char    c = IN1, c2;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        c2 = IN2;
+        if (c < 0xd8) {
+            /* johab hangul */
+            unsigned char c_cho, c_jung, c_jong;
+            unsigned char i_cho, i_jung, i_jong;
+            c_cho = (c >> 2) & 0x1f;
+            c_jung = ((c << 3) | c2 >> 5) & 0x1f;
+            c_jong = c2 & 0x1f;
+            i_cho = johabidx_choseong[c_cho];
+            i_jung = johabidx_jungseong[c_jung];
+            i_jong = johabidx_jongseong[c_jong];
+            if (i_cho == NONE || i_jung == NONE || i_jong == NONE)
+                return 2;
+            /* we don't use U+1100 hangul jamo yet. */
+            if (i_cho == FILL) {
+                if (i_jung == FILL) {
+                    if (i_jong == FILL)
+                        OUT1(0x3000)
+                    else
+                        OUT1(0x3100 |
+                          johabjamo_jongseong[c_jong])
+                }
+                else {
+                    if (i_jong == FILL)
+                        OUT1(0x3100 |
+                          johabjamo_jungseong[c_jung])
+                    else
+                        return 2;
+                }
+            } else {
+                if (i_jung == FILL) {
+                    if (i_jong == FILL)
+                        OUT1(0x3100 |
+                          johabjamo_choseong[c_cho])
+                    else
+                        return 2;
+                }
+                else
+                    OUT1(0xac00 +
+                         i_cho * 588 +
+                         i_jung * 28 +
+                         (i_jong == FILL ? 0 : i_jong))
+            }
+            NEXT(2, 1)
+        } else {
+            /* KS X 1001 except hangul jamos and syllables */
+            if (c == 0xdf || c > 0xf9 ||
+                c2 < 0x31 || (c2 >= 0x80 && c2 < 0x91) ||
+                (c2 & 0x7f) == 0x7f ||
+                (c == 0xda && (c2 >= 0xa1 && c2 <= 0xd3)))
+                return 2;
+            else {
+                unsigned char t1, t2;
+                t1 = (c < 0xe0 ? 2 * (c - 0xd9) :
+                         2 * c - 0x197);
+                t2 = (c2 < 0x91 ? c2 - 0x31 : c2 - 0x43);
+                t1 = t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
+                t2 = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
+                TRYMAP_DEC(ksx1001, **outbuf, t1, t2);
+                else return 2;
+                NEXT(2, 1)
+            }
+        }
+    }
+    return 0;
+#undef NONE
+#undef FILL
+  MAPPING_DECONLY(cp949ext)
diff --git a/pypy/translator/c/src/cjkcodecs/_codecs_tw.c b/pypy/translator/c/src/cjkcodecs/_codecs_tw.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/_codecs_tw.c
@@ -0,0 +1,132 @@
+ * _codecs_tw.c: Codecs collection for Taiwan's encodings
+ *
+ * Written by Hye-Shik Chang <perky at FreeBSD.org>
+ */
+#include "src/cjkcodecs/cjkcodecs.h"
+#include "src/cjkcodecs/mappings_tw.h"
+ * BIG5 codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = **inbuf;
+        DBCHAR code;
+        if (c < 0x80) {
+            REQUIRE_OUTBUF(1)
+            **outbuf = (unsigned char)c;
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        TRYMAP_ENC(big5, code, c);
+        else return 1;
+        OUT1(code >> 8)
+        OUT2(code & 0xFF)
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        TRYMAP_DEC(big5, **outbuf, c, IN2) {
+            NEXT(2, 1)
+        }
+        else return 2;
+    }
+    return 0;
+ * CP950 codec
+ */
+    while (inleft > 0) {
+        Py_UNICODE c = IN1;
+        DBCHAR code;
+        if (c < 0x80) {
+            WRITE1((unsigned char)c)
+            NEXT(1, 1)
+            continue;
+        }
+        UCS4INVALID(c)
+        REQUIRE_OUTBUF(2)
+        TRYMAP_ENC(cp950ext, code, c);
+        else TRYMAP_ENC(big5, code, c);
+        else return 1;
+        OUT1(code >> 8)
+        OUT2(code & 0xFF)
+        NEXT(1, 2)
+    }
+    return 0;
+    while (inleft > 0) {
+        unsigned char c = IN1;
+        REQUIRE_OUTBUF(1)
+        if (c < 0x80) {
+            OUT1(c)
+            NEXT(1, 1)
+            continue;
+        }
+        REQUIRE_INBUF(2)
+        TRYMAP_DEC(cp950ext, **outbuf, c, IN2);
+        else TRYMAP_DEC(big5, **outbuf, c, IN2);
+        else return 2;
+        NEXT(2, 1)
+    }
+    return 0;
+  MAPPING_ENCDEC(cp950ext)
diff --git a/pypy/translator/c/src/cjkcodecs/alg_jisx0201.h b/pypy/translator/c/src/cjkcodecs/alg_jisx0201.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/alg_jisx0201.h
@@ -0,0 +1,24 @@
+#define JISX0201_R_ENCODE(c, assi)                      \
+    if ((c) < 0x80 && (c) != 0x5c && (c) != 0x7e)       \
+        (assi) = (c);                                   \
+    else if ((c) == 0x00a5) (assi) = 0x5c;              \
+    else if ((c) == 0x203e) (assi) = 0x7e;
+#define JISX0201_K_ENCODE(c, assi)                      \
+    if ((c) >= 0xff61 && (c) <= 0xff9f)                 \
+        (assi) = (c) - 0xfec0;
+#define JISX0201_ENCODE(c, assi)                        \
+    JISX0201_R_ENCODE(c, assi)                          \
+    else JISX0201_K_ENCODE(c, assi)
+#define JISX0201_R_DECODE(c, assi)                      \
+    if ((c) < 0x5c) (assi) = (c);                       \
+    else if ((c) == 0x5c) (assi) = 0x00a5;              \
+    else if ((c) < 0x7e) (assi) = (c);                  \
+    else if ((c) == 0x7e) (assi) = 0x203e;              \
+    else if ((c) == 0x7f) (assi) = 0x7f;
+#define JISX0201_K_DECODE(c, assi)                      \
+    if ((c) >= 0xa1 && (c) <= 0xdf)                     \
+    (assi) = 0xfec0 + (c);
+#define JISX0201_DECODE(c, assi)                        \
+    JISX0201_R_DECODE(c, assi)                          \
+    else JISX0201_K_DECODE(c, assi)
diff --git a/pypy/translator/c/src/cjkcodecs/cjkcodecs.h b/pypy/translator/c/src/cjkcodecs/cjkcodecs.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/cjkcodecs.h
@@ -0,0 +1,309 @@
+ * cjkcodecs.h is inspired by the file of the same name from CPython,
+ * but was heavily modified to suit PyPy.
+ *
+ * Original author: Hye-Shik Chang <perky at FreeBSD.org>
+ * Modified by: Armin Rigo <arigo at tunes.org>
+ */
+#ifndef _CJKCODECS_H_
+#define _CJKCODECS_H_
+#include "src/cjkcodecs/multibytecodec.h"
+/* a unicode "undefined" codepoint */
+#define UNIINV  0xFFFE
+/* internal-use DBCS codepoints which aren't used by any charsets */
+#define NOCHAR  0xFFFF
+#define MULTIC  0xFFFE
+#define DBCINV  0xFFFD
+/* shorter macros to save source size of mapping tables */
+#define U UNIINV
+#define N NOCHAR
+#define M MULTIC
+#define D DBCINV
+struct dbcs_index {
+    const ucs2_t *map;
+    unsigned char bottom, top;
+typedef struct dbcs_index decode_map;
+struct widedbcs_index {
+    const ucs4_t *map;
+    unsigned char bottom, top;
+typedef struct widedbcs_index widedecode_map;
+struct unim_index {
+    const DBCHAR *map;
+    unsigned char bottom, top;
+typedef struct unim_index encode_map;
+struct unim_index_bytebased {
+    const unsigned char *map;
+    unsigned char bottom, top;
+struct dbcs_map {
+    const char *charset;
+    const struct unim_index *encmap;
+    const struct dbcs_index *decmap;
+struct pair_encodemap {
+    ucs4_t uniseq;
+    DBCHAR code;
+#define CODEC_INIT(encoding)                                            \
+    static int encoding##_codec_init(const void *config)
+#define ENCODER_INIT(encoding)                                          \
+    static int encoding##_encode_init(                                  \
+        MultibyteCodec_State *state, const void *config)
+#define ENCODER(encoding)                                               \
+    static Py_ssize_t encoding##_encode(                                \
+        MultibyteCodec_State *state, const void *config,                \
+        const Py_UNICODE **inbuf, Py_ssize_t inleft,                    \
+        unsigned char **outbuf, Py_ssize_t outleft, int flags)
+#define ENCODER_RESET(encoding)                                         \
+    static Py_ssize_t encoding##_encode_reset(                          \
+        MultibyteCodec_State *state, const void *config,                \
+        unsigned char **outbuf, Py_ssize_t outleft)
+#define DECODER_INIT(encoding)                                          \
+    static int encoding##_decode_init(                                  \
+        MultibyteCodec_State *state, const void *config)
+#define DECODER(encoding)                                               \
+    static Py_ssize_t encoding##_decode(                                \
+        MultibyteCodec_State *state, const void *config,                \
+        const unsigned char **inbuf, Py_ssize_t inleft,                 \
+        Py_UNICODE **outbuf, Py_ssize_t outleft)
+#define DECODER_RESET(encoding)                                         \
+    static Py_ssize_t encoding##_decode_reset(                          \
+        MultibyteCodec_State *state, const void *config)
+#if Py_UNICODE_SIZE == 4
+#define UCS4INVALID(code)       \
+    if ((code) > 0xFFFF)        \
+    return 1;
+#define UCS4INVALID(code)       \
+    if (0) ;
+#define NEXT_IN(i)                              \
+    (*inbuf) += (i);                            \
+    (inleft) -= (i);
+#define NEXT_OUT(o)                             \
+    (*outbuf) += (o);                           \
+    (outleft) -= (o);
+#define NEXT(i, o)                              \
+    NEXT_IN(i) NEXT_OUT(o)
+#define REQUIRE_INBUF(n)                        \
+    if (inleft < (n))                           \
+        return MBERR_TOOFEW;
+#define REQUIRE_OUTBUF(n)                       \
+    if (outleft < (n))                          \
+        return MBERR_TOOSMALL;
+#define IN1 ((*inbuf)[0])
+#define IN2 ((*inbuf)[1])
+#define IN3 ((*inbuf)[2])
+#define IN4 ((*inbuf)[3])
+#define OUT1(c) ((*outbuf)[0]) = (c);
+#define OUT2(c) ((*outbuf)[1]) = (c);
+#define OUT3(c) ((*outbuf)[2]) = (c);
+#define OUT4(c) ((*outbuf)[3]) = (c);
+#define WRITE1(c1)              \
+    REQUIRE_OUTBUF(1)           \
+    (*outbuf)[0] = (c1);
+#define WRITE2(c1, c2)          \
+    REQUIRE_OUTBUF(2)           \
+    (*outbuf)[0] = (c1);        \
+    (*outbuf)[1] = (c2);
+#define WRITE3(c1, c2, c3)      \
+    REQUIRE_OUTBUF(3)           \
+    (*outbuf)[0] = (c1);        \
+    (*outbuf)[1] = (c2);        \
+    (*outbuf)[2] = (c3);
+#define WRITE4(c1, c2, c3, c4)  \
+    REQUIRE_OUTBUF(4)           \
+    (*outbuf)[0] = (c1);        \
+    (*outbuf)[1] = (c2);        \
+    (*outbuf)[2] = (c3);        \
+    (*outbuf)[3] = (c4);
+#if Py_UNICODE_SIZE == 2
+# define WRITEUCS4(c)                                           \
+    REQUIRE_OUTBUF(2)                                           \
+    (*outbuf)[0] = 0xd800 + (((c) - 0x10000) >> 10);            \
+    (*outbuf)[1] = 0xdc00 + (((c) - 0x10000) & 0x3ff);          \
+    NEXT_OUT(2)
+# define WRITEUCS4(c)                                           \
+    REQUIRE_OUTBUF(1)                                           \
+    **outbuf = (Py_UNICODE)(c);                                 \
+    NEXT_OUT(1)
+#define _TRYMAP_ENC(m, assi, val)                               \
+    ((m)->map != NULL && (val) >= (m)->bottom &&                \
+        (val)<= (m)->top && ((assi) = (m)->map[(val) -          \
+        (m)->bottom]) != NOCHAR)
+#define TRYMAP_ENC_COND(charset, assi, uni)                     \
+    _TRYMAP_ENC(&charset##_encmap[(uni) >> 8], assi, (uni) & 0xff)
+#define TRYMAP_ENC(charset, assi, uni)                          \
+    if TRYMAP_ENC_COND(charset, assi, uni)
+#define _TRYMAP_DEC(m, assi, val)                               \
+    ((m)->map != NULL && (val) >= (m)->bottom &&                \
+        (val)<= (m)->top && ((assi) = (m)->map[(val) -          \
+        (m)->bottom]) != UNIINV)
+#define TRYMAP_DEC(charset, assi, c1, c2)                       \
+    if _TRYMAP_DEC(&charset##_decmap[c1], assi, c2)
+#define _TRYMAP_ENC_MPLANE(m, assplane, asshi, asslo, val)      \
+    ((m)->map != NULL && (val) >= (m)->bottom &&                \
+        (val)<= (m)->top &&                                     \
+        ((assplane) = (m)->map[((val) - (m)->bottom)*3]) != 0 && \
+        (((asshi) = (m)->map[((val) - (m)->bottom)*3 + 1]), 1) && \
+        (((asslo) = (m)->map[((val) - (m)->bottom)*3 + 2]), 1))
+#define TRYMAP_ENC_MPLANE(charset, assplane, asshi, asslo, uni) \
+    if _TRYMAP_ENC_MPLANE(&charset##_encmap[(uni) >> 8], \
+                       assplane, asshi, asslo, (uni) & 0xff)
+#define TRYMAP_DEC_MPLANE(charset, assi, plane, c1, c2)         \
+    if _TRYMAP_DEC(&charset##_decmap[plane][c1], assi, c2)
+#if Py_UNICODE_SIZE == 2
+#define DECODE_SURROGATE(c)                                     \
+    if (c >> 10 == 0xd800 >> 10) { /* high surrogate */         \
+        REQUIRE_INBUF(2)                                        \
+        if (IN2 >> 10 == 0xdc00 >> 10) { /* low surrogate */ \
+            c = 0x10000 + ((ucs4_t)(c - 0xd800) << 10) + \
+            ((ucs4_t)(IN2) - 0xdc00);                           \
+        }                                                       \
+    }
+#define GET_INSIZE(c)   ((c) > 0xffff ? 2 : 1)
+#define DECODE_SURROGATE(c) {;}
+#define GET_INSIZE(c)   1
+#define BEGIN_MAPPINGS_LIST /* empty */
+#define MAPPING_ENCONLY(enc)                                            \
+  const struct dbcs_map pypy_cjkmap_##enc = {#enc, (void*)enc##_encmap, NULL};
+#define MAPPING_DECONLY(enc)                                            \
+  const struct dbcs_map pypy_cjkmap_##enc = {#enc, NULL, (void*)enc##_decmap};
+#define MAPPING_ENCDEC(enc)                                             \
+  const struct dbcs_map pypy_cjkmap_##enc = {#enc, (void*)enc##_encmap, \
+                                             (void*)enc##_decmap};
+#define END_MAPPINGS_LIST /* empty */
+#define BEGIN_CODECS_LIST /* empty */
+#define _CODEC(name)                                                    \
+  static const MultibyteCodec _pypy_cjkcodec_##name;                    \
+  const MultibyteCodec *pypy_cjkcodec_##name(void) {                    \
+    if (_pypy_cjkcodec_##name.codecinit != NULL) {                      \
+      int r = _pypy_cjkcodec_##name.codecinit(_pypy_cjkcodec_##name.config); \
+      assert(r == 0);                                                   \
+    }                                                                   \
+    return &_pypy_cjkcodec_##name;                                      \
+  }                                                                     \
+  static const MultibyteCodec _pypy_cjkcodec_##name
+#define _STATEFUL_METHODS(enc)          \
+    enc##_encode,                       \
+    enc##_encode_init,                  \
+    enc##_encode_reset,                 \
+    enc##_decode,                       \
+    enc##_decode_init,                  \
+    enc##_decode_reset,
+#define _STATELESS_METHODS(enc)         \
+    enc##_encode, NULL, NULL,           \
+    enc##_decode, NULL, NULL,
+#define CODEC_STATEFUL(enc) _CODEC(enc) = {     \
+    #enc, NULL, NULL,                           \
+    _STATEFUL_METHODS(enc)                      \
+  };
+#define CODEC_STATELESS(enc) _CODEC(enc) = {    \
+    #enc, NULL, NULL,                           \
+    _STATELESS_METHODS(enc)                     \
+  };
+#define CODEC_STATELESS_WINIT(enc) _CODEC(enc) = {      \
+    #enc, NULL,                                         \
+    enc##_codec_init,                                   \
+    _STATELESS_METHODS(enc)                             \
+  };
+#define CODEC_STATELESS_CONFIG(enc, config, baseenc) _CODEC(enc) = {    \
+    #enc, config, NULL,                                                 \
+    _STATELESS_METHODS(baseenc)                                         \
+  };
+#define CODEC_STATEFUL_CONFIG(enc, variation, config)   \
+  _CODEC(enc##_##variation) = {                         \
+    #enc "_" #variation,                                \
+    config,                                             \
+    enc##_codec_init,                                   \
+    _STATEFUL_METHODS(enc)                              \
+  };
+#define END_CODECS_LIST /* empty */
+static DBCHAR
+find_pairencmap(ucs2_t body, ucs2_t modifier,
+                const struct pair_encodemap *haystack, int haystacksize)
+    int pos, min, max;
+    ucs4_t value = body << 16 | modifier;
+    min = 0;
+    max = haystacksize;
+    for (pos = haystacksize >> 1; min != max; pos = (min + max) >> 1)
+        if (value < haystack[pos].uniseq) {
+            if (max == pos) break;
+            else max = pos;
+        }
+        else if (value > haystack[pos].uniseq) {
+            if (min == pos) break;
+            else min = pos;
+        }
+        else
+            break;
+        if (value == haystack[pos].uniseq)
+            return haystack[pos].code;
+        else
+            return DBCINV;
+#define USING_IMPORTED_MAP(charset) \
+  extern const struct dbcs_map pypy_cjkmap_##charset;
+#define IMPORT_MAP(locale, charset, encmap, decmap)                     \
+  importmap(&pypy_cjkmap_##charset, encmap, decmap)
+static void importmap(const struct dbcs_map *src, void *encmp,
+                      void *decmp)
+  if (encmp) *(const encode_map **)encmp = src->encmap;
+  if (decmp) *(const decode_map **)decmp = src->decmap;
+#define I_AM_A_MODULE_FOR(loc) /* empty */
diff --git a/pypy/translator/c/src/cjkcodecs/emu_jisx0213_2000.h b/pypy/translator/c/src/cjkcodecs/emu_jisx0213_2000.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/emu_jisx0213_2000.h
@@ -0,0 +1,43 @@
+/* These routines may be quite inefficient, but it's used only to emulate old
+ * standards. */
+#define EMULATE_JISX0213_2000_ENCODE_BMP(assi, c)                       \
+    if (config == (void *)2000 && (                                     \
+                    (c) == 0x9B1C || (c) == 0x4FF1 ||                   \
+                    (c) == 0x525D || (c) == 0x541E ||                   \
+                    (c) == 0x5653 || (c) == 0x59F8 ||                   \
+                    (c) == 0x5C5B || (c) == 0x5E77 ||                   \
+                    (c) == 0x7626 || (c) == 0x7E6B))                    \
+        return EMULATE_JISX0213_2000_ENCODE_INVALID;                    \
+    else if (config == (void *)2000 && (c) == 0x9B1D)                   \
+        (assi) = 0x8000 | 0x7d3b;                                       \
+#define EMULATE_JISX0213_2000_ENCODE_EMP(assi, c)                       \
+    if (config == (void *)2000 && (c) == 0x20B9F)                       \
+        return EMULATE_JISX0213_2000_ENCODE_INVALID;
+#define EMULATE_JISX0213_2000_DECODE_PLANE1(assi, c1, c2)               \
+    if (config == (void *)2000 &&                                       \
+                    (((c1) == 0x2E && (c2) == 0x21) ||                  \
+                     ((c1) == 0x2F && (c2) == 0x7E) ||                  \
+                     ((c1) == 0x4F && (c2) == 0x54) ||                  \
+                     ((c1) == 0x4F && (c2) == 0x7E) ||                  \
+                     ((c1) == 0x74 && (c2) == 0x27) ||                  \
+                     ((c1) == 0x7E && (c2) == 0x7A) ||                  \
+                     ((c1) == 0x7E && (c2) == 0x7B) ||                  \
+                     ((c1) == 0x7E && (c2) == 0x7C) ||                  \
+                     ((c1) == 0x7E && (c2) == 0x7D) ||                  \
+                     ((c1) == 0x7E && (c2) == 0x7E)))                   \
+        return EMULATE_JISX0213_2000_DECODE_INVALID;
+#define EMULATE_JISX0213_2000_DECODE_PLANE2(assi, c1, c2)               \
+    if (config == (void *)2000 && (c1) == 0x7D && (c2) == 0x3B)         \
+        (assi) = 0x9B1D;
diff --git a/pypy/translator/c/src/cjkcodecs/mappings_cn.h b/pypy/translator/c/src/cjkcodecs/mappings_cn.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/mappings_cn.h
@@ -0,0 +1,4103 @@
+static const ucs2_t __gb2312_decmap[7482] = {
+static const struct dbcs_index gb2312_decmap[256] = {
+static const ucs2_t __gbkext_decmap[14531] = {
+static const struct dbcs_index gbkext_decmap[256] = {
+static const DBCHAR __gbcommon_encmap[23231] = {
+static const struct unim_index gbcommon_encmap[256] = {
+static const ucs2_t __gb18030ext_decmap[2729] = {
+static const struct dbcs_index gb18030ext_decmap[256] = {
+static const DBCHAR __gb18030ext_encmap[3227] = {
+static const struct unim_index gb18030ext_encmap[256] = {
+static const struct _gb18030_to_unibmp_ranges {
+    Py_UNICODE   first, last;
+    DBCHAR       base;
+} gb18030_to_unibmp_ranges[] = {
diff --git a/pypy/translator/c/src/cjkcodecs/mappings_hk.h b/pypy/translator/c/src/cjkcodecs/mappings_hk.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/mappings_hk.h
@@ -0,0 +1,2378 @@
+static const ucs2_t __big5hkscs_decmap[6219] = {
+static const struct dbcs_index big5hkscs_decmap[256] = {
+static const unsigned char big5hkscs_phint_0[] = {
+static const unsigned char big5hkscs_phint_12130[] = {
+static const unsigned char big5hkscs_phint_21924[] = {
+static const DBCHAR __big5hkscs_bmp_encmap[26401] = {
+static const struct unim_index big5hkscs_bmp_encmap[256] = {
+static const DBCHAR __big5hkscs_nonbmp_encmap[29306] = {
+static const struct unim_index big5hkscs_nonbmp_encmap[256] = {
diff --git a/pypy/translator/c/src/cjkcodecs/mappings_jisx0213_pair.h b/pypy/translator/c/src/cjkcodecs/mappings_jisx0213_pair.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/mappings_jisx0213_pair.h
@@ -0,0 +1,59 @@
+#define JISX0213_ENCPAIRS 46
+#ifdef EXTERN_JISX0213_PAIR
+static const struct widedbcs_index *jisx0213_pair_decmap;
+static const struct pair_encodemap *jisx0213_pair_encmap;
+static const ucs4_t __jisx0213_pair_decmap[49] = {
+static const struct widedbcs_index jisx0213_pair_decmap[256] = {
+static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {
diff --git a/pypy/translator/c/src/cjkcodecs/mappings_jp.h b/pypy/translator/c/src/cjkcodecs/mappings_jp.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/mappings_jp.h
@@ -0,0 +1,4765 @@
+static const ucs2_t __jisx0208_decmap[6956] = {
+static const struct dbcs_index jisx0208_decmap[256] = {
+static const ucs2_t __jisx0212_decmap[6179] = {
+static const struct dbcs_index jisx0212_decmap[256] = {
+static const DBCHAR __jisxcommon_encmap[22016] = {
+static const struct unim_index jisxcommon_encmap[256] = {
+static const ucs2_t __cp932ext_decmap[969] = {
+static const struct dbcs_index cp932ext_decmap[256] = {
+static const DBCHAR __cp932ext_encmap[9686] = {
+static const struct unim_index cp932ext_encmap[256] = {
+static const ucs2_t __jisx0213_1_bmp_decmap[2197] = {
+static const struct dbcs_index jisx0213_1_bmp_decmap[256] = {
+static const ucs2_t __jisx0213_2_bmp_decmap[2425] = {
+static const struct dbcs_index jisx0213_2_bmp_decmap[256] = {
+static const DBCHAR __jisx0213_bmp_encmap[27287] = {
+static const struct unim_index jisx0213_bmp_encmap[256] = {
+static const ucs2_t __jisx0213_1_emp_decmap[340] = {
+static const struct dbcs_index jisx0213_1_emp_decmap[256] = {
+static const ucs2_t __jisx0213_2_emp_decmap[2053] = {
+static const struct dbcs_index jisx0213_2_emp_decmap[256] = {
+static const DBCHAR __jisx0213_emp_encmap[8787] = {
+static const struct unim_index jisx0213_emp_encmap[256] = {
diff --git a/pypy/translator/c/src/cjkcodecs/mappings_kr.h b/pypy/translator/c/src/cjkcodecs/mappings_kr.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/mappings_kr.h
@@ -0,0 +1,3251 @@
+static const ucs2_t __ksx1001_decmap[8264] = {
+static const struct dbcs_index ksx1001_decmap[256] = {
+static const ucs2_t __cp949ext_decmap[9650] = {
+static const struct dbcs_index cp949ext_decmap[256] = {
+static const DBCHAR __cp949_encmap[33133] = {
+static const struct unim_index cp949_encmap[256] = {
diff --git a/pypy/translator/c/src/cjkcodecs/mappings_tw.h b/pypy/translator/c/src/cjkcodecs/mappings_tw.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/mappings_tw.h
@@ -0,0 +1,2633 @@
+static const ucs2_t __big5_decmap[16702] = {
+static const struct dbcs_index big5_decmap[256] = {
+static const DBCHAR __big5_encmap[21764] = {
+static const struct unim_index big5_encmap[256] = {
+static const ucs2_t __cp950ext_decmap[224] = {
+static const struct dbcs_index cp950ext_decmap[256] = {
+static const DBCHAR __cp950ext_encmap[581] = {
+static const struct unim_index cp950ext_encmap[256] = {
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.c b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.c
@@ -0,0 +1,211 @@
+#include <stdlib.h>
+#include "src/cjkcodecs/multibytecodec.h"
+struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
+                                         char *inbuf, Py_ssize_t inlen)
+  struct pypy_cjk_dec_s *d = malloc(sizeof(struct pypy_cjk_dec_s));
+  if (!d)
+    return NULL;
+  if (codec->decinit != NULL && codec->decinit(&d->state, codec->config) != 0)
+    goto errorexit;
+  d->codec = codec;
+  d->inbuf_start = inbuf;
+  d->inbuf = inbuf;
+  d->inbuf_end = inbuf + inlen;
+  d->outbuf_start = (inlen <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE)) ?
+                     malloc(inlen * sizeof(Py_UNICODE)) :
+                     NULL);
+  if (!d->outbuf_start)
+    goto errorexit;
+  d->outbuf = d->outbuf_start;
+  d->outbuf_end = d->outbuf_start + inlen;
+  return d;
+ errorexit:
+  free(d);
+  return NULL;
+void pypy_cjk_dec_free(struct pypy_cjk_dec_s *d)
+  free(d->outbuf_start);
+  free(d);
+static int expand_decodebuffer(struct pypy_cjk_dec_s *d, Py_ssize_t esize)
+  Py_ssize_t orgpos, orgsize;
+  Py_UNICODE *newbuf;
+  orgpos = d->outbuf - d->outbuf_start;
+  orgsize = d->outbuf_end - d->outbuf_start;
+  esize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
+  newbuf = (esize <= (PY_SSIZE_T_MAX / sizeof(Py_UNICODE) - orgsize) ?
+            realloc(d->outbuf_start, (orgsize + esize) * sizeof(Py_UNICODE)) :
+            NULL);
+  if (!newbuf)
+    return -1;
+  d->outbuf_start = newbuf;
+  d->outbuf = newbuf + orgpos;
+  d->outbuf_end = newbuf + orgsize + esize;
+  return 0;
+Py_ssize_t pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *d)
+  while (1)
+    {
+      Py_ssize_t r;
+      Py_ssize_t inleft = (Py_ssize_t)(d->inbuf_end - d->inbuf);
+      Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf);
+      if (inleft == 0)
+        return 0;
+      r = d->codec->decode(&d->state, d->codec->config,
+                           &d->inbuf, inleft, &d->outbuf, outleft);
+      if (r != MBERR_TOOSMALL)
+        return r;
+      /* output buffer too small; grow it and continue. */
+      if (expand_decodebuffer(d, -1) == -1)
+        return MBERR_NOMEMORY;
+    }
+Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *d)
+  return d->outbuf_start;
+Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *d)
+  return d->outbuf - d->outbuf_start;
+Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d)
+  return d->inbuf_end - d->inbuf;
+Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d)
+  return d->inbuf - d->inbuf_start;
+struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
+                                         Py_UNICODE *inbuf, Py_ssize_t inlen)
+  Py_ssize_t outlen;
+  struct pypy_cjk_enc_s *d = malloc(sizeof(struct pypy_cjk_enc_s));
+  if (!d)
+    return NULL;
+  if (codec->encinit != NULL && codec->encinit(&d->state, codec->config) != 0)
+    goto errorexit;
+  d->codec = codec;
+  d->inbuf_start = inbuf;
+  d->inbuf = inbuf;
+  d->inbuf_end = inbuf + inlen;
+  if (inlen > (PY_SSIZE_T_MAX - 16) / 2)
+    goto errorexit;
+  outlen = inlen * 2 + 16;
+  d->outbuf_start = malloc(outlen);
+  if (!d->outbuf_start)
+    goto errorexit;
+  d->outbuf = d->outbuf_start;
+  d->outbuf_end = d->outbuf_start + outlen;
+  return d;
+ errorexit:
+  free(d);
+  return NULL;
+void pypy_cjk_enc_free(struct pypy_cjk_enc_s *d)
+  free(d->outbuf_start);
+  free(d);
+static int expand_encodebuffer(struct pypy_cjk_enc_s *d, Py_ssize_t esize)
+  Py_ssize_t orgpos, orgsize;
+  unsigned char *newbuf;
+  orgpos = d->outbuf - d->outbuf_start;
+  orgsize = d->outbuf_end - d->outbuf_start;
+  esize = (esize < (orgsize >> 1) ? (orgsize >> 1) | 1 : esize);
+  newbuf = (esize <= PY_SSIZE_T_MAX - orgsize ?
+            realloc(d->outbuf_start, orgsize + esize) :
+            NULL);
+  if (!newbuf)
+    return -1;
+  d->outbuf_start = newbuf;
+  d->outbuf = newbuf + orgpos;
+  d->outbuf_end = newbuf + orgsize + esize;
+  return 0;
+#define MBENC_RESET     MBENC_MAX<<1
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *d)
+  int flags = MBENC_FLUSH | MBENC_RESET;   /* XXX always, for now */
+  while (1)
+    {
+      Py_ssize_t r;
+      Py_ssize_t inleft = (Py_ssize_t)(d->inbuf_end - d->inbuf);
+      Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf);
+      if (inleft == 0)
+        return 0;
+      r = d->codec->encode(&d->state, d->codec->config,
+                           &d->inbuf, inleft, &d->outbuf, outleft, flags);
+      if (r != MBERR_TOOSMALL)
+        return r;
+      /* output buffer too small; grow it and continue. */
+      if (expand_encodebuffer(d, -1) == -1)
+        return MBERR_NOMEMORY;
+    }
+Py_ssize_t pypy_cjk_enc_reset(struct pypy_cjk_enc_s *d)
+  if (d->codec->encreset == NULL)
+    return 0;
+  while (1)
+    {
+      Py_ssize_t r;
+      Py_ssize_t outleft = (Py_ssize_t)(d->outbuf_end - d->outbuf);
+      r = d->codec->encreset(&d->state, d->codec->config, &d->outbuf, outleft);
+      if (r != MBERR_TOOSMALL)
+        return r;
+      /* output buffer too small; grow it and continue. */
+      if (expand_encodebuffer(d, -1) == -1)
+        return MBERR_NOMEMORY;
+    }
+char *pypy_cjk_enc_outbuf(struct pypy_cjk_enc_s *d)
+  return d->outbuf_start;
+Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *d)
+  return d->outbuf - d->outbuf_start;
+Py_ssize_t pypy_cjk_enc_inbuf_remaining(struct pypy_cjk_enc_s *d)
+  return d->inbuf_end - d->inbuf;
+Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d)
+  return d->inbuf - d->inbuf_start;
diff --git a/pypy/translator/c/src/cjkcodecs/multibytecodec.h b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
new file mode 100644
--- /dev/null
+++ b/pypy/translator/c/src/cjkcodecs/multibytecodec.h
@@ -0,0 +1,165 @@
+#include <stddef.h>
+#include <assert.h>
+#ifdef _WIN64
+typedef __int64 ssize_t
+#elif defined(_WIN32)
+typedef int ssize_t;
+#include <unistd.h>
+#ifndef Py_UNICODE_SIZE
+#ifdef _WIN32
+#define Py_UNICODE_SIZE 2
+#define Py_UNICODE_SIZE 4
+typedef wchar_t Py_UNICODE;
+typedef ssize_t Py_ssize_t;
+#define PY_SSIZE_T_MAX   ((Py_ssize_t)(((size_t) -1) >> 1))
+#ifdef _WIN32
+typedef unsigned int ucs4_t;
+typedef unsigned short ucs2_t, DBCHAR;
+#include <stdint.h>
+typedef uint32_t ucs4_t;
+typedef uint16_t ucs2_t, DBCHAR;
+typedef union {
+    void *p;
+    int i;
+    unsigned char c[8];
+    ucs2_t u2[4];
+    ucs4_t u4[2];
+} MultibyteCodec_State;
+typedef int (*mbcodec_init)(const void *config);
+typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
+                        const void *config,
+                        const Py_UNICODE **inbuf, Py_ssize_t inleft,
+                        unsigned char **outbuf, Py_ssize_t outleft,
+                        int flags);
+typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
+                                 const void *config);
+typedef Py_ssize_t (*mbencodereset_func)(MultibyteCodec_State *state,
+                        const void *config,
+                        unsigned char **outbuf, Py_ssize_t outleft);
+typedef Py_ssize_t (*mbdecode_func)(MultibyteCodec_State *state,
+                        const void *config,
+                        const unsigned char **inbuf, Py_ssize_t inleft,
+                        Py_UNICODE **outbuf, Py_ssize_t outleft);
+typedef int (*mbdecodeinit_func)(MultibyteCodec_State *state,
+                                 const void *config);
+typedef Py_ssize_t (*mbdecodereset_func)(MultibyteCodec_State *state,
+                                         const void *config);
+typedef struct MultibyteCodec_s {
+    const char *encoding;
+    const void *config;
+    mbcodec_init codecinit;
+    mbencode_func encode;
+    mbencodeinit_func encinit;
+    mbencodereset_func encreset;
+    mbdecode_func decode;
+    mbdecodeinit_func decinit;
+    mbdecodereset_func decreset;
+} MultibyteCodec;
+/* positive values for illegal sequences */
+#define MBERR_TOOSMALL          (-1) /* insufficient output buffer space */
+#define MBERR_TOOFEW            (-2) /* incomplete input buffer */
+#define MBERR_INTERNAL          (-3) /* internal runtime error */
+#define MBERR_NOMEMORY          (-4) /* out of memory */
+#define MBENC_FLUSH             0x0001 /* encode all characters encodable */
+#define MBENC_MAX               MBENC_FLUSH
+struct pypy_cjk_dec_s {
+  const MultibyteCodec *codec;
+  MultibyteCodec_State state;
+  const unsigned char *inbuf_start, *inbuf, *inbuf_end;
+  Py_UNICODE *outbuf_start, *outbuf, *outbuf_end;
+struct pypy_cjk_dec_s *pypy_cjk_dec_init(const MultibyteCodec *codec,
+                                         char *inbuf, Py_ssize_t inlen);
+void pypy_cjk_dec_free(struct pypy_cjk_dec_s *);
+Py_ssize_t pypy_cjk_dec_chunk(struct pypy_cjk_dec_s *);
+Py_UNICODE *pypy_cjk_dec_outbuf(struct pypy_cjk_dec_s *);
+Py_ssize_t pypy_cjk_dec_outlen(struct pypy_cjk_dec_s *);
+Py_ssize_t pypy_cjk_dec_inbuf_remaining(struct pypy_cjk_dec_s *d);
+Py_ssize_t pypy_cjk_dec_inbuf_consumed(struct pypy_cjk_dec_s* d);
+struct pypy_cjk_enc_s {
+  const MultibyteCodec *codec;
+  MultibyteCodec_State state;
+  const Py_UNICODE *inbuf_start, *inbuf, *inbuf_end;
+  unsigned char *outbuf_start, *outbuf, *outbuf_end;
+struct pypy_cjk_enc_s *pypy_cjk_enc_init(const MultibyteCodec *codec,
+                                         Py_UNICODE *inbuf, Py_ssize_t inlen);
+void pypy_cjk_enc_free(struct pypy_cjk_enc_s *);
+Py_ssize_t pypy_cjk_enc_chunk(struct pypy_cjk_enc_s *);
+Py_ssize_t pypy_cjk_enc_reset(struct pypy_cjk_enc_s *);
+char *pypy_cjk_enc_outbuf(struct pypy_cjk_enc_s *);
+Py_ssize_t pypy_cjk_enc_outlen(struct pypy_cjk_enc_s *);
+Py_ssize_t pypy_cjk_enc_inbuf_remaining(struct pypy_cjk_enc_s *d);
+Py_ssize_t pypy_cjk_enc_inbuf_consumed(struct pypy_cjk_enc_s* d);
+/* list of codecs defined in the .c files */
+#define DEFINE_CODEC(name)                              \
+    const MultibyteCodec *pypy_cjkcodec_##name(void);
+// _codecs_cn
diff --git a/pypy/translator/goal/app_main.py b/pypy/translator/goal/app_main.py
--- a/pypy/translator/goal/app_main.py
+++ b/pypy/translator/goal/app_main.py
@@ -204,9 +204,11 @@
         dirname = resolvedirof(search)
         if dirname == search:
             # not found!  let's hope that the compiled-in path is ok
-            print >> sys.stderr, ('debug: WARNING: library path not found, '
-                                  'using compiled-in sys.path '
-                                  'and sys.prefix will be unset')
+            print >> sys.stderr, """\
+debug: WARNING: Library path not found, using compiled-in sys.path.
+debug: WARNING: 'sys.prefix' will not be set.
+debug: WARNING: Make sure the pypy binary is kept inside its tree of files.
+debug: WARNING: It is ok to create a symlink to it from somewhere else."""
             newpath = sys.path[:]
         newpath = sys.pypy_initial_path(dirname)

More information about the pypy-commit mailing list