[Python-checkins] cpython (merge default -> default): merge heads

giampaolo.rodola python-checkins at python.org
Wed May 16 16:03:13 CEST 2012


http://hg.python.org/cpython/rev/828be43434e8
changeset:   76998:828be43434e8
parent:      76997:39d24533c6b7
parent:      76996:8c8709b98762
user:        Giampaolo Rodola' <g.rodola at gmail.com>
date:        Wed May 16 16:03:07 2012 +0200
summary:
  merge heads

files:
  .hgignore                       |    9 +-
  Doc/library/email.generator.rst |   17 +-
  Doc/library/http.client.rst     |   15 +
  Lib/http/client.py              |    8 +
  Lib/http/server.py              |   10 +-
  Lib/test/test_bisect.py         |   47 ++-
  Lib/test/test_pkgutil.py        |   51 +++-
  Lib/tkinter/__init__.py         |   28 +-
  Misc/ACKS                       |    2 +
  Misc/NEWS                       |   18 +
  Modules/_bisectmodule.c         |    4 +-
  Modules/_csv.c                  |  105 ++++--
  Objects/exceptions.c            |    3 +-
  Objects/rangeobject.c           |    2 +-
  Objects/stringlib/codecs.h      |  149 +++++++++-
  Objects/unicodeobject.c         |  297 +++++--------------
  Python/freeze_importlib.py      |    2 +
  Python/importlib.h              |  Bin 
  18 files changed, 507 insertions(+), 260 deletions(-)


diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -32,7 +32,6 @@
 Modules/config.c
 Modules/ld_so_aix$
 Parser/pgen$
-PCbuild/amd64/
 ^core
 ^python-gdb.py
 ^python.exe-gdb.py
@@ -56,6 +55,12 @@
 PC/pythonnt_rc*.h
 PC/*.obj
 PC/*.exe
+PC/*/*.user
+PC/*/*.ncb
+PC/*/*.suo
+PC/*/Win32-temp-*
+PC/*/x64-temp-*
+PC/*/amd64
 PCbuild/*.exe
 PCbuild/*.dll
 PCbuild/*.pdb
@@ -69,6 +74,8 @@
 PCbuild/*.*sdf
 PCbuild/Win32-temp-*
 PCbuild/x64-temp-*
+PCbuild/amd64
+BuildLog.htm
 __pycache__
 Modules/_testembed
 .coverage
diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst
--- a/Doc/library/email.generator.rst
+++ b/Doc/library/email.generator.rst
@@ -17,10 +17,10 @@
 standards-compliant way, should handle MIME and non-MIME email messages just
 fine, and is designed so that the transformation from flat text, to a message
 structure via the :class:`~email.parser.Parser` class, and back to flat text,
-is idempotent (the input is identical to the output).  On the other hand, using
-the Generator on a :class:`~email.message.Message` constructed by program may
-result in changes to the :class:`~email.message.Message` object as defaults are
-filled in.
+is idempotent (the input is identical to the output) [#]_.  On the other hand,
+using the Generator on a :class:`~email.message.Message` constructed by program
+may result in changes to the :class:`~email.message.Message` object as defaults
+are filled in.
 
 :class:`bytes` output can be generated using the :class:`BytesGenerator` class.
 If the message object structure contains non-ASCII bytes, this generator's
@@ -223,3 +223,12 @@
    The default value for *fmt* is ``None``, meaning ::
 
       [Non-text (%(type)s) part of message omitted, filename %(filename)s]
+
+
+.. rubric:: Footnotes
+
+.. [#] This statement assumes that you use the appropriate setting for the
+       ``unixfrom`` argument, and that you set maxheaderlen=0 (which will
+       preserve whatever the input line lengths were).  It is also not strictly
+       true, since in many cases runs of whitespace in headers are collapsed
+       into single blanks.  The latter is a bug that will eventually be fixed.
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -339,6 +339,15 @@
 | :const:`UPGRADE_REQUIRED`                | ``426`` | HTTP Upgrade to TLS,                                                  |
 |                                          |         | :rfc:`2817`, Section 6                                                |
 +------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`PRECONDITION_REQUIRED`           | ``428`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 3                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`TOO_MANY_REQUESTS`               | ``429`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 4                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 5                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
 | :const:`INTERNAL_SERVER_ERROR`           | ``500`` | HTTP/1.1, `RFC 2616, Section                                          |
 |                                          |         | 10.5.1                                                                |
 |                                          |         | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_  |
@@ -369,6 +378,12 @@
 | :const:`NOT_EXTENDED`                    | ``510`` | An HTTP Extension Framework,                                          |
 |                                          |         | :rfc:`2774`, Section 7                                                |
 +------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes,                                         |
+|                                          |         | :rfc:`6585`, Section 6                                                |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+
+  .. versionchanged:: 3.3
+     Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`.
 
 
 .. data:: responses
diff --git a/Lib/http/client.py b/Lib/http/client.py
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -141,6 +141,9 @@
 LOCKED = 423
 FAILED_DEPENDENCY = 424
 UPGRADE_REQUIRED = 426
+PRECONDITION_REQUIRED = 428
+TOO_MANY_REQUESTS = 429
+REQUEST_HEADER_FIELDS_TOO_LARGE = 431
 
 # server error
 INTERNAL_SERVER_ERROR = 500
@@ -151,6 +154,7 @@
 HTTP_VERSION_NOT_SUPPORTED = 505
 INSUFFICIENT_STORAGE = 507
 NOT_EXTENDED = 510
+NETWORK_AUTHENTICATION_REQUIRED = 511
 
 # Mapping status codes to official W3C names
 responses = {
@@ -192,6 +196,9 @@
     415: 'Unsupported Media Type',
     416: 'Requested Range Not Satisfiable',
     417: 'Expectation Failed',
+    428: 'Precondition Required',
+    429: 'Too Many Requests',
+    431: 'Request Header Fields Too Large',
 
     500: 'Internal Server Error',
     501: 'Not Implemented',
@@ -199,6 +206,7 @@
     503: 'Service Unavailable',
     504: 'Gateway Timeout',
     505: 'HTTP Version Not Supported',
+    511: 'Network Authentication Required',
 }
 
 # maximal amount of data to read at one time in _safe_read
diff --git a/Lib/http/server.py b/Lib/http/server.py
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -573,7 +573,7 @@
 
     # Table mapping response codes to messages; entries have the
     # form {code: (shortmessage, longmessage)}.
-    # See RFC 2616.
+    # See RFC 2616 and 6585.
     responses = {
         100: ('Continue', 'Request received, please continue'),
         101: ('Switching Protocols',
@@ -628,6 +628,12 @@
               'Cannot satisfy request range.'),
         417: ('Expectation Failed',
               'Expect condition could not be satisfied.'),
+        428: ('Precondition Required',
+              'The origin server requires the request to be conditional.'),
+        429: ('Too Many Requests', 'The user has sent too many requests '
+              'in a given amount of time ("rate limiting").'),
+        431: ('Request Header Fields Too Large', 'The server is unwilling to '
+              'process the request because its header fields are too large.'),
 
         500: ('Internal Server Error', 'Server got itself in trouble'),
         501: ('Not Implemented',
@@ -638,6 +644,8 @@
         504: ('Gateway Timeout',
               'The gateway server did not receive a timely response'),
         505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
+        511: ('Network Authentication Required',
+              'The client needs to authenticate to gain network access.'),
         }
 
 
diff --git a/Lib/test/test_bisect.py b/Lib/test/test_bisect.py
--- a/Lib/test/test_bisect.py
+++ b/Lib/test/test_bisect.py
@@ -23,6 +23,28 @@
 import bisect as c_bisect
 
 
+class Range(object):
+    """A trivial range()-like object without any integer width limitations."""
+    def __init__(self, start, stop):
+        self.start = start
+        self.stop = stop
+        self.last_insert = None
+
+    def __len__(self):
+        return self.stop - self.start
+
+    def __getitem__(self, idx):
+        n = self.stop - self.start
+        if idx < 0:
+            idx += n
+        if idx >= n:
+            raise IndexError(idx)
+        return self.start + idx
+
+    def insert(self, idx, item):
+        self.last_insert = idx, item
+
+
 class TestBisect(unittest.TestCase):
     module = None
 
@@ -125,9 +147,28 @@
     def test_large_range(self):
         # Issue 13496
         mod = self.module
-        data = range(sys.maxsize-1)
-        self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3)
-        self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2)
+        n = sys.maxsize
+        data = range(n-1)
+        self.assertEqual(mod.bisect_left(data, n-3), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3), n-2)
+        self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+
+    def test_large_pyrange(self):
+        # Same as above, but without C-imposed limits on range() parameters
+        mod = self.module
+        n = sys.maxsize
+        data = Range(0, n-1)
+        self.assertEqual(mod.bisect_left(data, n-3), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3), n-2)
+        self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+        self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+        x = n - 100
+        mod.insort_left(data, x, x - 50, x + 50)
+        self.assertEqual(data.last_insert, (x, x))
+        x = n - 200
+        mod.insort_right(data, x, x - 50, x + 50)
+        self.assertEqual(data.last_insert, (x + 1, x))
 
     def test_random(self, n=25):
         from random import randrange
diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -137,8 +137,57 @@
         self.assertEqual(foo.loads, 1)
         del sys.modules['foo']
 
+
+class ExtendPathTests(unittest.TestCase):
+    def create_init(self, pkgname):
+        dirname = tempfile.mkdtemp()
+        self.addCleanup(shutil.rmtree, dirname)
+        sys.path.insert(0, dirname)
+
+        pkgdir = os.path.join(dirname, pkgname)
+        os.mkdir(pkgdir)
+        with open(os.path.join(pkgdir, '__init__.py'), 'w') as fl:
+            fl.write('from pkgutil import extend_path\n__path__ = extend_path(__path__, __name__)\n')
+
+        return dirname
+
+    def create_submodule(self, dirname, pkgname, submodule_name, value):
+        module_name = os.path.join(dirname, pkgname, submodule_name + '.py')
+        with open(module_name, 'w') as fl:
+            print('value={}'.format(value), file=fl)
+
+    def setUp(self):
+        # Create 2 directories on sys.path
+        self.pkgname = 'foo'
+        self.dirname_0 = self.create_init(self.pkgname)
+        self.dirname_1 = self.create_init(self.pkgname)
+
+    def tearDown(self):
+        del sys.path[0]
+        del sys.path[0]
+        del sys.modules['foo']
+        del sys.modules['foo.bar']
+        del sys.modules['foo.baz']
+
+    def test_simple(self):
+        self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0)
+        self.create_submodule(self.dirname_1, self.pkgname, 'baz', 1)
+        import foo.bar
+        import foo.baz
+        # Ensure we read the expected values
+        self.assertEqual(foo.bar.value, 0)
+        self.assertEqual(foo.baz.value, 1)
+
+        # Ensure the path is set up correctly
+        self.assertEqual(sorted(foo.__path__),
+                         sorted([os.path.join(self.dirname_0, self.pkgname),
+                                 os.path.join(self.dirname_1, self.pkgname)]))
+
+    # XXX: test .pkg files
+
+
 def test_main():
-    run_unittest(PkgutilTests, PkgutilPEP302Tests)
+    run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests)
     # this is necessary if test is run repeated (like when finding leaks)
     import zipimport
     zipimport._zip_directory_cache.clear()
diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py
--- a/Lib/tkinter/__init__.py
+++ b/Lib/tkinter/__init__.py
@@ -540,12 +540,19 @@
 
         The type keyword specifies the form in which the data is
         to be returned and should be an atom name such as STRING
-        or FILE_NAME.  Type defaults to STRING.
+        or FILE_NAME.  Type defaults to STRING, except on X11, where the default
+        is to try UTF8_STRING and fall back to STRING.
 
         This command is equivalent to:
 
         selection_get(CLIPBOARD)
         """
+        if 'type' not in kw and self._windowingsystem == 'x11':
+            try:
+                kw['type'] = 'UTF8_STRING'
+                return self.tk.call(('clipboard', 'get') + self._options(kw))
+            except TclError:
+                del kw['type']
         return self.tk.call(('clipboard', 'get') + self._options(kw))
 
     def clipboard_clear(self, **kw):
@@ -627,8 +634,16 @@
         A keyword parameter selection specifies the name of
         the selection and defaults to PRIMARY.  A keyword
         parameter displayof specifies a widget on the display
-        to use."""
+        to use. A keyword parameter type specifies the form of data to be
+        fetched, defaulting to STRING except on X11, where UTF8_STRING is tried
+        before STRING."""
         if 'displayof' not in kw: kw['displayof'] = self._w
+        if 'type' not in kw and self._windowingsystem == 'x11':
+            try:
+                kw['type'] = 'UTF8_STRING'
+                return self.tk.call(('selection', 'get') + self._options(kw))
+            except TclError:
+                del kw['type']
         return self.tk.call(('selection', 'get') + self._options(kw))
     def selection_handle(self, command, **kw):
         """Specify a function COMMAND to call if the X
@@ -1043,6 +1058,15 @@
         if displayof is None:
             return ('-displayof', self._w)
         return ()
+    @property
+    def _windowingsystem(self):
+        """Internal function."""
+        try:
+            return self._root()._windowingsystem_cached
+        except AttributeError:
+            ws = self._root()._windowingsystem_cached = \
+                        self.tk.call('tk', 'windowingsystem')
+            return ws
     def _options(self, cnf, kw = None):
         """Internal function."""
         if kw:
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -919,6 +919,7 @@
 Michael Schneider
 Peter Schneider-Kamp
 Arvin Schnell
+Robin Schreiber
 Chad J. Schroeder
 Sam Schulenburg
 Stefan Schwarzer
@@ -1129,6 +1130,7 @@
 Hirokazu Yamamoto
 Ka-Ping Yee
 Jason Yeo
+EungJun Yi
 Bob Yodlowski
 Danny Yoo
 George Yoshida
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
 Core and Builtins
 -----------------
 
+- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
+  Patch by Serhiy Storchaka.
+
 - asdl_seq and asdl_int_seq are now Py_ssize_t sized.
 
 - Issue #14133 (PEP 415): Implement suppression of __context__ display with an
@@ -31,6 +34,21 @@
 Library
 -------
 
+- Issue #14829: Fix bisect and range() indexing with large indices
+  (>= 2 ** 32) under 64-bit Windows.
+
+- Issue #14732: The _csv module now uses PEP 3121 module initialization.
+  Patch by Robin Schreiber.
+
+- Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server
+  and http.client. Patch by EungJun Yi.
+
+- Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
+  accessing the Tk clipboard.  Modify clipboad_get() to first request type
+  UTF8_STRING when no specific type is requested in an X11 windowing
+  environment, falling back to the current default type STRING if that fails.
+  Original patch by Thomas Kluyver.
+
 - Issue #14773: Fix os.fwalk() failing on dangling symlinks.
 
 - Issue #12541: Be lenient with quotes around Realm field of HTTP Basic
diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c
--- a/Modules/_bisectmodule.c
+++ b/Modules/_bisectmodule.c
@@ -3,6 +3,7 @@
 Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru).
 */
 
+#define PY_SSIZE_T_CLEAN
 #include "Python.h"
 
 static Py_ssize_t
@@ -195,8 +196,7 @@
             return NULL;
     } else {
         _Py_IDENTIFIER(insert);
-
-        result = _PyObject_CallMethodId(list, &PyId_insert, "iO", index, item);
+        result = _PyObject_CallMethodId(list, &PyId_insert, "nO", index, item);
         if (result == NULL)
             return NULL;
         Py_DECREF(result);
diff --git a/Modules/_csv.c b/Modules/_csv.c
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -16,9 +16,39 @@
 #define IS_BASESTRING(o) \
     PyUnicode_Check(o)
 
-static PyObject *error_obj;     /* CSV exception */
-static PyObject *dialects;      /* Dialect registry */
-static long field_limit = 128 * 1024;   /* max parsed field size */
+typedef struct {
+    PyObject *error_obj;   /* CSV exception */
+    PyObject *dialects;   /* Dialect registry */
+    long field_limit;   /* max parsed field size */
+} _csvstate;
+
+#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
+
+static int
+_csv_clear(PyObject *m)
+{
+    Py_CLEAR(_csvstate(m)->error_obj);
+    Py_CLEAR(_csvstate(m)->dialects);
+    return 0;
+}
+
+static int
+_csv_traverse(PyObject *m, visitproc visit, void *arg)
+{
+    Py_VISIT(_csvstate(m)->error_obj);
+    Py_VISIT(_csvstate(m)->dialects);
+    return 0;
+}
+
+static void
+_csv_free(void *m)
+{
+   _csv_clear((PyObject *)m);
+}
+
+static struct PyModuleDef _csvmodule;
+
+#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
 
 typedef enum {
     START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
@@ -103,10 +133,10 @@
 {
     PyObject *dialect_obj;
 
-    dialect_obj = PyDict_GetItem(dialects, name_obj);
+    dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
     if (dialect_obj == NULL) {
         if (!PyErr_Occurred())
-            PyErr_Format(error_obj, "unknown dialect");
+            PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
     }
     else
         Py_INCREF(dialect_obj);
@@ -544,9 +574,9 @@
 static int
 parse_add_char(ReaderObj *self, Py_UCS4 c)
 {
-    if (self->field_len >= field_limit) {
-        PyErr_Format(error_obj, "field larger than field limit (%ld)",
-                     field_limit);
+    if (self->field_len >= _csvstate_global->field_limit) {
+        PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
+                     _csvstate_global->field_limit);
         return -1;
     }
     if (self->field_len == self->field_size && !parse_grow_buff(self))
@@ -703,7 +733,7 @@
         }
         else {
             /* illegal */
-            PyErr_Format(error_obj, "'%c' expected after '%c'",
+            PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
                             dialect->delimiter,
                             dialect->quotechar);
             return -1;
@@ -716,7 +746,7 @@
         else if (c == '\0')
             self->state = START_RECORD;
         else {
-            PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
+            PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
             return -1;
         }
         break;
@@ -755,12 +785,12 @@
         if (lineobj == NULL) {
             /* End of input OR exception */
             if (!PyErr_Occurred() && self->field_len != 0)
-                PyErr_Format(error_obj,
+                PyErr_Format(_csvstate_global->error_obj,
                              "newline inside string");
             return NULL;
         }
         if (!PyUnicode_Check(lineobj)) {
-            PyErr_Format(error_obj,
+            PyErr_Format(_csvstate_global->error_obj,
                          "iterator should return strings, "
                          "not %.200s "
                          "(did you open the file in text mode?)",
@@ -778,7 +808,7 @@
             c = PyUnicode_READ(kind, data, pos);
             if (c == '\0') {
                 Py_DECREF(lineobj);
-                PyErr_Format(error_obj,
+                PyErr_Format(_csvstate_global->error_obj,
                              "line contains NULL byte");
                 goto err;
             }
@@ -994,7 +1024,7 @@
             }
             if (want_escape) {
                 if (!dialect->escapechar) {
-                    PyErr_Format(error_obj,
+                    PyErr_Format(_csvstate_global->error_obj,
                                  "need to escape, but no escapechar set");
                     return -1;
                 }
@@ -1010,7 +1040,7 @@
      */
     if (i == 0 && quote_empty) {
         if (dialect->quoting == QUOTE_NONE) {
-            PyErr_Format(error_obj,
+            PyErr_Format(_csvstate_global->error_obj,
                 "single empty field record must be quoted");
             return -1;
         }
@@ -1127,7 +1157,7 @@
     PyObject *line, *result;
 
     if (!PySequence_Check(seq))
-        return PyErr_Format(error_obj, "sequence expected");
+        return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
 
     len = PySequence_Length(seq);
     if (len < 0)
@@ -1353,7 +1383,7 @@
 static PyObject *
 csv_list_dialects(PyObject *module, PyObject *args)
 {
-    return PyDict_Keys(dialects);
+    return PyDict_Keys(_csvstate_global->dialects);
 }
 
 static PyObject *
@@ -1372,7 +1402,7 @@
     dialect = _call_dialect(dialect_obj, kwargs);
     if (dialect == NULL)
         return NULL;
-    if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
+    if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
         Py_DECREF(dialect);
         return NULL;
     }
@@ -1384,8 +1414,8 @@
 static PyObject *
 csv_unregister_dialect(PyObject *module, PyObject *name_obj)
 {
-    if (PyDict_DelItem(dialects, name_obj) < 0)
-        return PyErr_Format(error_obj, "unknown dialect");
+    if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
+        return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
     Py_INCREF(Py_None);
     return Py_None;
 }
@@ -1400,7 +1430,7 @@
 csv_field_size_limit(PyObject *module, PyObject *args)
 {
     PyObject *new_limit = NULL;
-    long old_limit = field_limit;
+    long old_limit = _csvstate_global->field_limit;
 
     if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
         return NULL;
@@ -1410,9 +1440,9 @@
                          "limit must be an integer");
             return NULL;
         }
-        field_limit = PyLong_AsLong(new_limit);
-        if (field_limit == -1 && PyErr_Occurred()) {
-            field_limit = old_limit;
+        _csvstate_global->field_limit = PyLong_AsLong(new_limit);
+        if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
+            _csvstate_global->field_limit = old_limit;
             return NULL;
         }
     }
@@ -1551,17 +1581,16 @@
     { NULL, NULL }
 };
 
-
 static struct PyModuleDef _csvmodule = {
     PyModuleDef_HEAD_INIT,
     "_csv",
     csv_module_doc,
-    -1,
+    sizeof(_csvstate),
     csv_methods,
     NULL,
-    NULL,
-    NULL,
-    NULL
+    _csv_traverse,
+    _csv_clear,
+    _csv_free
 };
 
 PyMODINIT_FUNC
@@ -1589,11 +1618,16 @@
                                    MODULE_VERSION) == -1)
         return NULL;
 
+    /* Set the field limit */
+    _csvstate(module)->field_limit = 128 * 1024;
+    /* Do I still need to add this var to the Module Dict? */
+
     /* Add _dialects dictionary */
-    dialects = PyDict_New();
-    if (dialects == NULL)
+    _csvstate(module)->dialects = PyDict_New();
+    if (_csvstate(module)->dialects == NULL)
         return NULL;
-    if (PyModule_AddObject(module, "_dialects", dialects))
+    Py_INCREF(_csvstate(module)->dialects);
+    if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
         return NULL;
 
     /* Add quote styles into dictionary */
@@ -1609,9 +1643,10 @@
         return NULL;
 
     /* Add the CSV exception object to the module. */
-    error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
-    if (error_obj == NULL)
+    _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
+    if (_csvstate(module)->error_obj == NULL)
         return NULL;
-    PyModule_AddObject(module, "Error", error_obj);
+    Py_INCREF(_csvstate(module)->error_obj);
+    PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
     return module;
 }
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -349,7 +349,8 @@
 
 static struct PyMemberDef BaseException_members[] = {
     {"__suppress_context__", T_BOOL,
-     offsetof(PyBaseExceptionObject, suppress_context)}
+     offsetof(PyBaseExceptionObject, suppress_context)},
+    {NULL}
 };
 
 
diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@@ -308,7 +308,7 @@
 static PyObject *
 range_item(rangeobject *r, Py_ssize_t i)
 {
-    PyObject *res, *arg = PyLong_FromLong(i);
+    PyObject *res, *arg = PyLong_FromSsize_t(i);
     if (!arg) {
         return NULL;
     }
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -215,7 +215,6 @@
     goto Return;
 }
 
-#undef LONG_PTR_MASK
 #undef ASCII_CHAR_MASK
 
 
@@ -415,4 +414,152 @@
 #undef MAX_SHORT_UNICHARS
 }
 
+/* The pattern for constructing UCS2-repeated masks. */
+#if SIZEOF_LONG == 8
+# define UCS2_REPEAT_MASK 0x0001000100010001ul
+#elif SIZEOF_LONG == 4
+# define UCS2_REPEAT_MASK 0x00010001ul
+#else
+# error C 'long' size should be either 4 or 8!
+#endif
+
+/* The mask for fast checking. */
+#if STRINGLIB_SIZEOF_CHAR == 1
+/* The mask for fast checking of whether a C 'long' contains a
+   non-ASCII or non-Latin1 UTF16-encoded characters. */
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
+#else
+/* The mask for fast checking of whether a C 'long' may contain
+   UTF16-encoded surrogate characters. This is an efficient heuristic,
+   assuming that non-surrogate characters with a code point >= 0x8000 are
+   rare in most input.
+*/
+# define FAST_CHAR_MASK         (UCS2_REPEAT_MASK * 0x8000u)
+#endif
+/* The mask for fast byte-swapping. */
+#define STRIPPED_MASK           (UCS2_REPEAT_MASK * 0x00FFu)
+/* Swap bytes. */
+#define SWAB(value)             ((((value) >> 8) & STRIPPED_MASK) | \
+                                 (((value) & STRIPPED_MASK) << 8))
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
+                        STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
+                        int native_ordering)
+{
+    Py_UCS4 ch;
+    const unsigned char *aligned_end =
+            (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
+    const unsigned char *q = *inptr;
+    STRINGLIB_CHAR *p = dest + *outpos;
+    /* Offsets from q for retrieving byte pairs in the right order. */
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+    int ihi = !!native_ordering, ilo = !native_ordering;
+#else
+    int ihi = !native_ordering, ilo = !!native_ordering;
+#endif
+    --e;
+
+    while (q < e) {
+        Py_UCS4 ch2;
+        /* First check for possible aligned read of a C 'long'. Unaligned
+           reads are more expensive, better to defer to another iteration. */
+        if (!((size_t) q & LONG_PTR_MASK)) {
+            /* Fast path for runs of in-range non-surrogate chars. */
+            register const unsigned char *_q = q;
+            while (_q < aligned_end) {
+                unsigned long block = * (unsigned long *) _q;
+                if (native_ordering) {
+                    /* Can use buffer directly */
+                    if (block & FAST_CHAR_MASK)
+                        break;
+                }
+                else {
+                    /* Need to byte-swap */
+                    if (block & SWAB(FAST_CHAR_MASK))
+                        break;
+#if STRINGLIB_SIZEOF_CHAR == 1
+                    block >>= 8;
+#else
+                    block = SWAB(block);
+#endif
+                }
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)(block >> 16);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+                p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block >> 48);
+# endif
+#else
+# if SIZEOF_LONG == 4
+                p[0] = (STRINGLIB_CHAR)(block >> 16);
+                p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# elif SIZEOF_LONG == 8
+                p[0] = (STRINGLIB_CHAR)(block >> 48);
+                p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+                p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+                p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# endif
+#endif
+                _q += SIZEOF_LONG;
+                p += SIZEOF_LONG / 2;
+            }
+            q = _q;
+            if (q >= e)
+                break;
+        }
+
+        ch = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+#if STRINGLIB_SIZEOF_CHAR < 2
+            if (ch > STRINGLIB_MAX_CHAR)
+                /* Out-of-range */
+                goto Return;
+#endif
+            *p++ = (STRINGLIB_CHAR)ch;
+            continue;
+        }
+
+        /* UTF-16 code pair: */
+        if (q >= e)
+            goto UnexpectedEnd;
+        if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
+            goto IllegalEncoding;
+        ch2 = (q[ihi] << 8) | q[ilo];
+        q += 2;
+        if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
+            goto IllegalSurrogate;
+        ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
+#if STRINGLIB_SIZEOF_CHAR < 4
+        /* Out-of-range */
+        goto Return;
+#else
+        *p++ = (STRINGLIB_CHAR)ch;
+#endif
+    }
+    ch = 0;
+Return:
+    *inptr = q;
+    *outpos = p - dest;
+    return ch;
+UnexpectedEnd:
+    ch = 1;
+    goto Return;
+IllegalEncoding:
+    ch = 2;
+    goto Return;
+IllegalSurrogate:
+    ch = 3;
+    goto Return;
+}
+#undef UCS2_REPEAT_MASK
+#undef FAST_CHAR_MASK
+#undef STRIPPED_MASK
+#undef SWAB
+#undef LONG_PTR_MASK
 #endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5195,25 +5195,6 @@
     return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
 }
 
-/* Two masks for fast checking of whether a C 'long' may contain
-   UTF16-encoded surrogate characters. This is an efficient heuristic,
-   assuming that non-surrogate characters with a code point >= 0x8000 are
-   rare in most input.
-   FAST_CHAR_MASK is used when the input is in native byte ordering,
-   SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering.
-*/
-#if (SIZEOF_LONG == 8)
-# define FAST_CHAR_MASK         0x8000800080008000L
-# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
-# define STRIPPED_MASK          0x00FF00FF00FF00FFL
-#elif (SIZEOF_LONG == 4)
-# define FAST_CHAR_MASK         0x80008000L
-# define SWAPPED_FAST_CHAR_MASK 0x00800080L
-# define STRIPPED_MASK          0x00FF00FFL
-#else
-# error C 'long' size should be either 4 or 8!
-#endif
-
 PyObject *
 PyUnicode_DecodeUTF16Stateful(const char *s,
                               Py_ssize_t size,
@@ -5226,30 +5207,15 @@
     Py_ssize_t endinpos;
     Py_ssize_t outpos;
     PyObject *unicode;
-    const unsigned char *q, *e, *aligned_end;
+    const unsigned char *q, *e;
     int bo = 0;       /* assume native ordering by default */
-    int native_ordering = 0;
+    int native_ordering;
     const char *errmsg = "";
-    /* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    int ihi = 1, ilo = 0;
-#else
-    int ihi = 0, ilo = 1;
-#endif
     PyObject *errorHandler = NULL;
     PyObject *exc = NULL;
 
-    /* Note: size will always be longer than the resulting Unicode
-       character count */
-    unicode = PyUnicode_New(size, 127);
-    if (!unicode)
-        return NULL;
-    if (size == 0)
-        return unicode;
-    outpos = 0;
-
     q = (unsigned char *)s;
-    e = q + size - 1;
+    e = q + size;
 
     if (byteorder)
         bo = *byteorder;
@@ -5258,155 +5224,98 @@
        byte order setting accordingly. In native mode, the leading BOM
        mark is skipped, in all other modes, it is copied to the output
        stream as-is (giving a ZWNBSP character). */
-    if (bo == 0) {
-        if (size >= 2) {
-            const Py_UCS4 bom = (q[ihi] << 8) | q[ilo];
+    if (bo == 0 && size >= 2) {
+        const Py_UCS4 bom = (q[1] << 8) | q[0];
+        if (bom == 0xFEFF) {
+            q += 2;
+            bo = -1;
+        }
+        else if (bom == 0xFFFE) {
+            q += 2;
+            bo = 1;
+        }
+        if (byteorder)
+            *byteorder = bo;
+    }
+
+    if (q == e) {
+        if (consumed)
+            *consumed = size;
+        Py_INCREF(unicode_empty);
+        return unicode_empty;
+    }
+
 #ifdef BYTEORDER_IS_LITTLE_ENDIAN
-            if (bom == 0xFEFF) {
-                q += 2;
-                bo = -1;
-            }
-            else if (bom == 0xFFFE) {
-                q += 2;
-                bo = 1;
-            }
+    native_ordering = bo <= 0;
 #else
-            if (bom == 0xFEFF) {
-                q += 2;
-                bo = 1;
-            }
-            else if (bom == 0xFFFE) {
-                q += 2;
-                bo = -1;
-            }
-#endif
-        }
-    }
-
-    if (bo == -1) {
-        /* force LE */
-        ihi = 1;
-        ilo = 0;
-    }
-    else if (bo == 1) {
-        /* force BE */
-        ihi = 0;
-        ilo = 1;
-    }
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-    native_ordering = ilo < ihi;
-#else
-    native_ordering = ilo > ihi;
-#endif
-
-    aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
-    while (q < e) {
-        Py_UCS4 ch;
-        /* First check for possible aligned read of a C 'long'. Unaligned
-           reads are more expensive, better to defer to another iteration. */
-        if (!((size_t) q & LONG_PTR_MASK)) {
-            /* Fast path for runs of non-surrogate chars. */
-            register const unsigned char *_q = q;
+    native_ordering = bo >= 0;
+#endif
+
+    /* Note: size will always be longer than the resulting Unicode
+       character count */
+    unicode = PyUnicode_New((e - q + 1) / 2, 127);
+    if (!unicode)
+        return NULL;
+
+    outpos = 0;
+    while (1) {
+        Py_UCS4 ch = 0;
+        if (e - q >= 2) {
             int kind = PyUnicode_KIND(unicode);
-            void *data = PyUnicode_DATA(unicode);
-            while (_q < aligned_end) {
-                unsigned long block = * (unsigned long *) _q;
-                Py_UCS4 maxch;
-                if (native_ordering) {
-                    /* Can use buffer directly */
-                    if (block & FAST_CHAR_MASK)
-                        break;
-                }
-                else {
-                    /* Need to byte-swap */
-                    if (block & SWAPPED_FAST_CHAR_MASK)
-                        break;
-                    block = ((block >> 8) & STRIPPED_MASK) |
-                            ((block & STRIPPED_MASK) << 8);
-                }
-                maxch = (Py_UCS2)(block & 0xFFFF);
-#if SIZEOF_LONG == 8
-                ch = (Py_UCS2)((block >> 16) & 0xFFFF);
-                maxch = MAX_MAXCHAR(maxch, ch);
-                ch = (Py_UCS2)((block >> 32) & 0xFFFF);
-                maxch = MAX_MAXCHAR(maxch, ch);
-                ch = (Py_UCS2)(block >> 48);
-                maxch = MAX_MAXCHAR(maxch, ch);
-#else
-                ch = (Py_UCS2)(block >> 16);
-                maxch = MAX_MAXCHAR(maxch, ch);
-#endif
-                if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
-                    if (unicode_widen(&unicode, outpos, maxch) < 0)
-                        goto onError;
-                    kind = PyUnicode_KIND(unicode);
-                    data = PyUnicode_DATA(unicode);
-                }
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#if SIZEOF_LONG == 8
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
-#else
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
-#else
-#if SIZEOF_LONG == 8
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
-#else
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
-                PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#endif
-                _q += SIZEOF_LONG;
-            }
-            q = _q;
-            if (q >= e)
-                break;
-        }
-        ch = (q[ihi] << 8) | q[ilo];
-
-        q += 2;
-
-        if (!Py_UNICODE_IS_SURROGATE(ch)) {
+            if (kind == PyUnicode_1BYTE_KIND) {
+                if (PyUnicode_IS_ASCII(unicode))
+                    ch = asciilib_utf16_decode(&q, e,
+                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            native_ordering);
+                else
+                    ch = ucs1lib_utf16_decode(&q, e,
+                            PyUnicode_1BYTE_DATA(unicode), &outpos,
+                            native_ordering);
+            } else if (kind == PyUnicode_2BYTE_KIND) {
+                ch = ucs2lib_utf16_decode(&q, e,
+                        PyUnicode_2BYTE_DATA(unicode), &outpos,
+                        native_ordering);
+            } else {
+                assert(kind == PyUnicode_4BYTE_KIND);
+                ch = ucs4lib_utf16_decode(&q, e,
+                        PyUnicode_4BYTE_DATA(unicode), &outpos,
+                        native_ordering);
+            }
+        }
+
+        switch (ch)
+        {
+        case 0:
+            /* remaining byte at the end? (size should be even) */
+            if (q == e || consumed)
+                goto End;
+            errmsg = "truncated data";
+            startinpos = ((const char *)q) - starts;
+            endinpos = ((const char *)e) - starts;
+            break;
+            /* The remaining input chars are ignored if the callback
+               chooses to skip the input */
+        case 1:
+            errmsg = "unexpected end of data";
+            startinpos = ((const char *)q) - 2 - starts;
+            endinpos = ((const char *)e) - starts;
+            break;
+        case 2:
+            errmsg = "illegal encoding";
+            startinpos = ((const char *)q) - 2 - starts;
+            endinpos = startinpos + 2;
+            break;
+        case 3:
+            errmsg = "illegal UTF-16 surrogate";
+            startinpos = ((const char *)q) - 4 - starts;
+            endinpos = startinpos + 2;
+            break;
+        default:
             if (unicode_putchar(&unicode, &outpos, ch) < 0)
                 goto onError;
             continue;
         }
 
-        /* UTF-16 code pair: */
-        if (q > e) {
-            errmsg = "unexpected end of data";
-            startinpos = (((const char *)q) - 2) - starts;
-            endinpos = ((const char *)e) + 1 - starts;
-            goto utf16Error;
-        }
-        if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) {
-            Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo];
-            q += 2;
-            if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) {
-                if (unicode_putchar(&unicode, &outpos,
-                                    Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0)
-                    goto onError;
-                continue;
-            }
-            else {
-                errmsg = "illegal UTF-16 surrogate";
-                startinpos = (((const char *)q)-4)-starts;
-                endinpos = startinpos+2;
-                goto utf16Error;
-            }
-
-        }
-        errmsg = "illegal encoding";
-        startinpos = (((const char *)q)-2)-starts;
-        endinpos = startinpos+2;
-        /* Fall through to report the error */
-
-      utf16Error:
         if (unicode_decode_call_errorhandler(
                 errors,
                 &errorHandler,
@@ -5421,33 +5330,8 @@
                 &outpos))
             goto onError;
     }
-    /* remaining byte at the end? (size should be even) */
-    if (e == q) {
-        if (!consumed) {
-            errmsg = "truncated data";
-            startinpos = ((const char *)q) - starts;
-            endinpos = ((const char *)e) + 1 - starts;
-            if (unicode_decode_call_errorhandler(
-                    errors,
-                    &errorHandler,
-                    "utf16", errmsg,
-                    &starts,
-                    (const char **)&e,
-                    &startinpos,
-                    &endinpos,
-                    &exc,
-                    (const char **)&q,
-                    &unicode,
-                    &outpos))
-                goto onError;
-            /* The remaining input chars are ignored if the callback
-               chooses to skip the input */
-        }
-    }
-
-    if (byteorder)
-        *byteorder = bo;
-
+
+End:
     if (consumed)
         *consumed = (const char *)q-starts;
 
@@ -5466,9 +5350,6 @@
     return NULL;
 }
 
-#undef FAST_CHAR_MASK
-#undef SWAPPED_FAST_CHAR_MASK
-
 PyObject *
 _PyUnicode_EncodeUTF16(PyObject *str,
                        const char *errors,
diff --git a/Python/freeze_importlib.py b/Python/freeze_importlib.py
--- a/Python/freeze_importlib.py
+++ b/Python/freeze_importlib.py
@@ -25,6 +25,8 @@
     with open(output_path, 'w', encoding='utf-8') as output_file:
         output_file.write('\n'.join(lines))
         output_file.write('/* Mercurial binary marker: \x00 */')
+        # Avoid a compiler warning for lack of EOL
+        output_file.write('\n')
 
 
 if __name__ == '__main__':
diff --git a/Python/importlib.h b/Python/importlib.h
index 0beeb595dbc38d821fb4f6de0981347f3983420a..cf5619a6c4b0587815b87145eae5bf212ec7e5f1
GIT binary patch
[stripped]

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list