[Python-checkins] cpython (merge default -> default): merge heads
giampaolo.rodola
python-checkins at python.org
Wed May 16 16:03:13 CEST 2012
http://hg.python.org/cpython/rev/828be43434e8
changeset: 76998:828be43434e8
parent: 76997:39d24533c6b7
parent: 76996:8c8709b98762
user: Giampaolo Rodola' <g.rodola at gmail.com>
date: Wed May 16 16:03:07 2012 +0200
summary:
merge heads
files:
.hgignore | 9 +-
Doc/library/email.generator.rst | 17 +-
Doc/library/http.client.rst | 15 +
Lib/http/client.py | 8 +
Lib/http/server.py | 10 +-
Lib/test/test_bisect.py | 47 ++-
Lib/test/test_pkgutil.py | 51 +++-
Lib/tkinter/__init__.py | 28 +-
Misc/ACKS | 2 +
Misc/NEWS | 18 +
Modules/_bisectmodule.c | 4 +-
Modules/_csv.c | 105 ++++--
Objects/exceptions.c | 3 +-
Objects/rangeobject.c | 2 +-
Objects/stringlib/codecs.h | 149 +++++++++-
Objects/unicodeobject.c | 297 +++++--------------
Python/freeze_importlib.py | 2 +
Python/importlib.h | Bin
18 files changed, 507 insertions(+), 260 deletions(-)
diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -32,7 +32,6 @@
Modules/config.c
Modules/ld_so_aix$
Parser/pgen$
-PCbuild/amd64/
^core
^python-gdb.py
^python.exe-gdb.py
@@ -56,6 +55,12 @@
PC/pythonnt_rc*.h
PC/*.obj
PC/*.exe
+PC/*/*.user
+PC/*/*.ncb
+PC/*/*.suo
+PC/*/Win32-temp-*
+PC/*/x64-temp-*
+PC/*/amd64
PCbuild/*.exe
PCbuild/*.dll
PCbuild/*.pdb
@@ -69,6 +74,8 @@
PCbuild/*.*sdf
PCbuild/Win32-temp-*
PCbuild/x64-temp-*
+PCbuild/amd64
+BuildLog.htm
__pycache__
Modules/_testembed
.coverage
diff --git a/Doc/library/email.generator.rst b/Doc/library/email.generator.rst
--- a/Doc/library/email.generator.rst
+++ b/Doc/library/email.generator.rst
@@ -17,10 +17,10 @@
standards-compliant way, should handle MIME and non-MIME email messages just
fine, and is designed so that the transformation from flat text, to a message
structure via the :class:`~email.parser.Parser` class, and back to flat text,
-is idempotent (the input is identical to the output). On the other hand, using
-the Generator on a :class:`~email.message.Message` constructed by program may
-result in changes to the :class:`~email.message.Message` object as defaults are
-filled in.
+is idempotent (the input is identical to the output) [#]_. On the other hand,
+using the Generator on a :class:`~email.message.Message` constructed by program
+may result in changes to the :class:`~email.message.Message` object as defaults
+are filled in.
:class:`bytes` output can be generated using the :class:`BytesGenerator` class.
If the message object structure contains non-ASCII bytes, this generator's
@@ -223,3 +223,12 @@
The default value for *fmt* is ``None``, meaning ::
[Non-text (%(type)s) part of message omitted, filename %(filename)s]
+
+
+.. rubric:: Footnotes
+
+.. [#] This statement assumes that you use the appropriate setting for the
+ ``unixfrom`` argument, and that you set maxheaderlen=0 (which will
+ preserve whatever the input line lengths were). It is also not strictly
+ true, since in many cases runs of whitespace in headers are collapsed
+ into single blanks. The latter is a bug that will eventually be fixed.
diff --git a/Doc/library/http.client.rst b/Doc/library/http.client.rst
--- a/Doc/library/http.client.rst
+++ b/Doc/library/http.client.rst
@@ -339,6 +339,15 @@
| :const:`UPGRADE_REQUIRED` | ``426`` | HTTP Upgrade to TLS, |
| | | :rfc:`2817`, Section 6 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`PRECONDITION_REQUIRED` | ``428`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 3 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`TOO_MANY_REQUESTS` | ``429`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 4 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`REQUEST_HEADER_FIELDS_TOO_LARGE` | ``431`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 5 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
| :const:`INTERNAL_SERVER_ERROR` | ``500`` | HTTP/1.1, `RFC 2616, Section |
| | | 10.5.1 |
| | | <http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html#sec10.5.1>`_ |
@@ -369,6 +378,12 @@
| :const:`NOT_EXTENDED` | ``510`` | An HTTP Extension Framework, |
| | | :rfc:`2774`, Section 7 |
+------------------------------------------+---------+-----------------------------------------------------------------------+
+| :const:`NETWORK_AUTHENTICATION_REQUIRED` | ``511`` | Additional HTTP Status Codes, |
+| | | :rfc:`6585`, Section 6 |
++------------------------------------------+---------+-----------------------------------------------------------------------+
+
+ .. versionchanged:: 3.3
+ Added codes ``428``, ``429``, ``431`` and ``511`` from :rfc:`6585`.
.. data:: responses
diff --git a/Lib/http/client.py b/Lib/http/client.py
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -141,6 +141,9 @@
LOCKED = 423
FAILED_DEPENDENCY = 424
UPGRADE_REQUIRED = 426
+PRECONDITION_REQUIRED = 428
+TOO_MANY_REQUESTS = 429
+REQUEST_HEADER_FIELDS_TOO_LARGE = 431
# server error
INTERNAL_SERVER_ERROR = 500
@@ -151,6 +154,7 @@
HTTP_VERSION_NOT_SUPPORTED = 505
INSUFFICIENT_STORAGE = 507
NOT_EXTENDED = 510
+NETWORK_AUTHENTICATION_REQUIRED = 511
# Mapping status codes to official W3C names
responses = {
@@ -192,6 +196,9 @@
415: 'Unsupported Media Type',
416: 'Requested Range Not Satisfiable',
417: 'Expectation Failed',
+ 428: 'Precondition Required',
+ 429: 'Too Many Requests',
+ 431: 'Request Header Fields Too Large',
500: 'Internal Server Error',
501: 'Not Implemented',
@@ -199,6 +206,7 @@
503: 'Service Unavailable',
504: 'Gateway Timeout',
505: 'HTTP Version Not Supported',
+ 511: 'Network Authentication Required',
}
# maximal amount of data to read at one time in _safe_read
diff --git a/Lib/http/server.py b/Lib/http/server.py
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -573,7 +573,7 @@
# Table mapping response codes to messages; entries have the
# form {code: (shortmessage, longmessage)}.
- # See RFC 2616.
+ # See RFC 2616 and 6585.
responses = {
100: ('Continue', 'Request received, please continue'),
101: ('Switching Protocols',
@@ -628,6 +628,12 @@
'Cannot satisfy request range.'),
417: ('Expectation Failed',
'Expect condition could not be satisfied.'),
+ 428: ('Precondition Required',
+ 'The origin server requires the request to be conditional.'),
+ 429: ('Too Many Requests', 'The user has sent too many requests '
+ 'in a given amount of time ("rate limiting").'),
+ 431: ('Request Header Fields Too Large', 'The server is unwilling to '
+ 'process the request because its header fields are too large.'),
500: ('Internal Server Error', 'Server got itself in trouble'),
501: ('Not Implemented',
@@ -638,6 +644,8 @@
504: ('Gateway Timeout',
'The gateway server did not receive a timely response'),
505: ('HTTP Version Not Supported', 'Cannot fulfill request.'),
+ 511: ('Network Authentication Required',
+ 'The client needs to authenticate to gain network access.'),
}
diff --git a/Lib/test/test_bisect.py b/Lib/test/test_bisect.py
--- a/Lib/test/test_bisect.py
+++ b/Lib/test/test_bisect.py
@@ -23,6 +23,28 @@
import bisect as c_bisect
+class Range(object):
+ """A trivial range()-like object without any integer width limitations."""
+ def __init__(self, start, stop):
+ self.start = start
+ self.stop = stop
+ self.last_insert = None
+
+ def __len__(self):
+ return self.stop - self.start
+
+ def __getitem__(self, idx):
+ n = self.stop - self.start
+ if idx < 0:
+ idx += n
+ if idx >= n:
+ raise IndexError(idx)
+ return self.start + idx
+
+ def insert(self, idx, item):
+ self.last_insert = idx, item
+
+
class TestBisect(unittest.TestCase):
module = None
@@ -125,9 +147,28 @@
def test_large_range(self):
# Issue 13496
mod = self.module
- data = range(sys.maxsize-1)
- self.assertEqual(mod.bisect_left(data, sys.maxsize-3), sys.maxsize-3)
- self.assertEqual(mod.bisect_right(data, sys.maxsize-3), sys.maxsize-2)
+ n = sys.maxsize
+ data = range(n-1)
+ self.assertEqual(mod.bisect_left(data, n-3), n-3)
+ self.assertEqual(mod.bisect_right(data, n-3), n-2)
+ self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+ self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+
+ def test_large_pyrange(self):
+ # Same as above, but without C-imposed limits on range() parameters
+ mod = self.module
+ n = sys.maxsize
+ data = Range(0, n-1)
+ self.assertEqual(mod.bisect_left(data, n-3), n-3)
+ self.assertEqual(mod.bisect_right(data, n-3), n-2)
+ self.assertEqual(mod.bisect_left(data, n-3, n-10, n), n-3)
+ self.assertEqual(mod.bisect_right(data, n-3, n-10, n), n-2)
+ x = n - 100
+ mod.insort_left(data, x, x - 50, x + 50)
+ self.assertEqual(data.last_insert, (x, x))
+ x = n - 200
+ mod.insort_right(data, x, x - 50, x + 50)
+ self.assertEqual(data.last_insert, (x + 1, x))
def test_random(self, n=25):
from random import randrange
diff --git a/Lib/test/test_pkgutil.py b/Lib/test/test_pkgutil.py
--- a/Lib/test/test_pkgutil.py
+++ b/Lib/test/test_pkgutil.py
@@ -137,8 +137,57 @@
self.assertEqual(foo.loads, 1)
del sys.modules['foo']
+
+class ExtendPathTests(unittest.TestCase):
+ def create_init(self, pkgname):
+ dirname = tempfile.mkdtemp()
+ self.addCleanup(shutil.rmtree, dirname)
+ sys.path.insert(0, dirname)
+
+ pkgdir = os.path.join(dirname, pkgname)
+ os.mkdir(pkgdir)
+ with open(os.path.join(pkgdir, '__init__.py'), 'w') as fl:
+ fl.write('from pkgutil import extend_path\n__path__ = extend_path(__path__, __name__)\n')
+
+ return dirname
+
+ def create_submodule(self, dirname, pkgname, submodule_name, value):
+ module_name = os.path.join(dirname, pkgname, submodule_name + '.py')
+ with open(module_name, 'w') as fl:
+ print('value={}'.format(value), file=fl)
+
+ def setUp(self):
+ # Create 2 directories on sys.path
+ self.pkgname = 'foo'
+ self.dirname_0 = self.create_init(self.pkgname)
+ self.dirname_1 = self.create_init(self.pkgname)
+
+ def tearDown(self):
+ del sys.path[0]
+ del sys.path[0]
+ del sys.modules['foo']
+ del sys.modules['foo.bar']
+ del sys.modules['foo.baz']
+
+ def test_simple(self):
+ self.create_submodule(self.dirname_0, self.pkgname, 'bar', 0)
+ self.create_submodule(self.dirname_1, self.pkgname, 'baz', 1)
+ import foo.bar
+ import foo.baz
+ # Ensure we read the expected values
+ self.assertEqual(foo.bar.value, 0)
+ self.assertEqual(foo.baz.value, 1)
+
+ # Ensure the path is set up correctly
+ self.assertEqual(sorted(foo.__path__),
+ sorted([os.path.join(self.dirname_0, self.pkgname),
+ os.path.join(self.dirname_1, self.pkgname)]))
+
+ # XXX: test .pkg files
+
+
def test_main():
- run_unittest(PkgutilTests, PkgutilPEP302Tests)
+ run_unittest(PkgutilTests, PkgutilPEP302Tests, ExtendPathTests)
# this is necessary if test is run repeated (like when finding leaks)
import zipimport
zipimport._zip_directory_cache.clear()
diff --git a/Lib/tkinter/__init__.py b/Lib/tkinter/__init__.py
--- a/Lib/tkinter/__init__.py
+++ b/Lib/tkinter/__init__.py
@@ -540,12 +540,19 @@
The type keyword specifies the form in which the data is
to be returned and should be an atom name such as STRING
- or FILE_NAME. Type defaults to STRING.
+ or FILE_NAME. Type defaults to STRING, except on X11, where the default
+ is to try UTF8_STRING and fall back to STRING.
This command is equivalent to:
selection_get(CLIPBOARD)
"""
+ if 'type' not in kw and self._windowingsystem == 'x11':
+ try:
+ kw['type'] = 'UTF8_STRING'
+ return self.tk.call(('clipboard', 'get') + self._options(kw))
+ except TclError:
+ del kw['type']
return self.tk.call(('clipboard', 'get') + self._options(kw))
def clipboard_clear(self, **kw):
@@ -627,8 +634,16 @@
A keyword parameter selection specifies the name of
the selection and defaults to PRIMARY. A keyword
parameter displayof specifies a widget on the display
- to use."""
+ to use. A keyword parameter type specifies the form of data to be
+ fetched, defaulting to STRING except on X11, where UTF8_STRING is tried
+ before STRING."""
if 'displayof' not in kw: kw['displayof'] = self._w
+ if 'type' not in kw and self._windowingsystem == 'x11':
+ try:
+ kw['type'] = 'UTF8_STRING'
+ return self.tk.call(('selection', 'get') + self._options(kw))
+ except TclError:
+ del kw['type']
return self.tk.call(('selection', 'get') + self._options(kw))
def selection_handle(self, command, **kw):
"""Specify a function COMMAND to call if the X
@@ -1043,6 +1058,15 @@
if displayof is None:
return ('-displayof', self._w)
return ()
+ @property
+ def _windowingsystem(self):
+ """Internal function."""
+ try:
+ return self._root()._windowingsystem_cached
+ except AttributeError:
+ ws = self._root()._windowingsystem_cached = \
+ self.tk.call('tk', 'windowingsystem')
+ return ws
def _options(self, cnf, kw = None):
"""Internal function."""
if kw:
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -919,6 +919,7 @@
Michael Schneider
Peter Schneider-Kamp
Arvin Schnell
+Robin Schreiber
Chad J. Schroeder
Sam Schulenburg
Stefan Schwarzer
@@ -1129,6 +1130,7 @@
Hirokazu Yamamoto
Ka-Ping Yee
Jason Yeo
+EungJun Yi
Bob Yodlowski
Danny Yoo
George Yoshida
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,9 @@
Core and Builtins
-----------------
+- Issue #14624: UTF-16 decoding is now 3x to 4x faster on various inputs.
+ Patch by Serhiy Storchaka.
+
- asdl_seq and asdl_int_seq are now Py_ssize_t sized.
- Issue #14133 (PEP 415): Implement suppression of __context__ display with an
@@ -31,6 +34,21 @@
Library
-------
+- Issue #14829: Fix bisect and range() indexing with large indices
+ (>= 2 ** 32) under 64-bit Windows.
+
+- Issue #14732: The _csv module now uses PEP 3121 module initialization.
+ Patch by Robin Schreiber.
+
+- Issue #14809: Add HTTP status codes introduced by RFC 6585 to http.server
+ and http.client. Patch by EungJun Yi.
+
+- Issue #14777: tkinter may return undecoded UTF-8 bytes as a string when
+ accessing the Tk clipboard. Modify clipboad_get() to first request type
+ UTF8_STRING when no specific type is requested in an X11 windowing
+ environment, falling back to the current default type STRING if that fails.
+ Original patch by Thomas Kluyver.
+
- Issue #14773: Fix os.fwalk() failing on dangling symlinks.
- Issue #12541: Be lenient with quotes around Realm field of HTTP Basic
diff --git a/Modules/_bisectmodule.c b/Modules/_bisectmodule.c
--- a/Modules/_bisectmodule.c
+++ b/Modules/_bisectmodule.c
@@ -3,6 +3,7 @@
Converted to C by Dmitry Vasiliev (dima at hlabs.spb.ru).
*/
+#define PY_SSIZE_T_CLEAN
#include "Python.h"
static Py_ssize_t
@@ -195,8 +196,7 @@
return NULL;
} else {
_Py_IDENTIFIER(insert);
-
- result = _PyObject_CallMethodId(list, &PyId_insert, "iO", index, item);
+ result = _PyObject_CallMethodId(list, &PyId_insert, "nO", index, item);
if (result == NULL)
return NULL;
Py_DECREF(result);
diff --git a/Modules/_csv.c b/Modules/_csv.c
--- a/Modules/_csv.c
+++ b/Modules/_csv.c
@@ -16,9 +16,39 @@
#define IS_BASESTRING(o) \
PyUnicode_Check(o)
-static PyObject *error_obj; /* CSV exception */
-static PyObject *dialects; /* Dialect registry */
-static long field_limit = 128 * 1024; /* max parsed field size */
+typedef struct {
+ PyObject *error_obj; /* CSV exception */
+ PyObject *dialects; /* Dialect registry */
+ long field_limit; /* max parsed field size */
+} _csvstate;
+
+#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
+
+static int
+_csv_clear(PyObject *m)
+{
+ Py_CLEAR(_csvstate(m)->error_obj);
+ Py_CLEAR(_csvstate(m)->dialects);
+ return 0;
+}
+
+static int
+_csv_traverse(PyObject *m, visitproc visit, void *arg)
+{
+ Py_VISIT(_csvstate(m)->error_obj);
+ Py_VISIT(_csvstate(m)->dialects);
+ return 0;
+}
+
+static void
+_csv_free(void *m)
+{
+ _csv_clear((PyObject *)m);
+}
+
+static struct PyModuleDef _csvmodule;
+
+#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
typedef enum {
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
@@ -103,10 +133,10 @@
{
PyObject *dialect_obj;
- dialect_obj = PyDict_GetItem(dialects, name_obj);
+ dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
if (dialect_obj == NULL) {
if (!PyErr_Occurred())
- PyErr_Format(error_obj, "unknown dialect");
+ PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
}
else
Py_INCREF(dialect_obj);
@@ -544,9 +574,9 @@
static int
parse_add_char(ReaderObj *self, Py_UCS4 c)
{
- if (self->field_len >= field_limit) {
- PyErr_Format(error_obj, "field larger than field limit (%ld)",
- field_limit);
+ if (self->field_len >= _csvstate_global->field_limit) {
+ PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
+ _csvstate_global->field_limit);
return -1;
}
if (self->field_len == self->field_size && !parse_grow_buff(self))
@@ -703,7 +733,7 @@
}
else {
/* illegal */
- PyErr_Format(error_obj, "'%c' expected after '%c'",
+ PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
dialect->delimiter,
dialect->quotechar);
return -1;
@@ -716,7 +746,7 @@
else if (c == '\0')
self->state = START_RECORD;
else {
- PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
+ PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
return -1;
}
break;
@@ -755,12 +785,12 @@
if (lineobj == NULL) {
/* End of input OR exception */
if (!PyErr_Occurred() && self->field_len != 0)
- PyErr_Format(error_obj,
+ PyErr_Format(_csvstate_global->error_obj,
"newline inside string");
return NULL;
}
if (!PyUnicode_Check(lineobj)) {
- PyErr_Format(error_obj,
+ PyErr_Format(_csvstate_global->error_obj,
"iterator should return strings, "
"not %.200s "
"(did you open the file in text mode?)",
@@ -778,7 +808,7 @@
c = PyUnicode_READ(kind, data, pos);
if (c == '\0') {
Py_DECREF(lineobj);
- PyErr_Format(error_obj,
+ PyErr_Format(_csvstate_global->error_obj,
"line contains NULL byte");
goto err;
}
@@ -994,7 +1024,7 @@
}
if (want_escape) {
if (!dialect->escapechar) {
- PyErr_Format(error_obj,
+ PyErr_Format(_csvstate_global->error_obj,
"need to escape, but no escapechar set");
return -1;
}
@@ -1010,7 +1040,7 @@
*/
if (i == 0 && quote_empty) {
if (dialect->quoting == QUOTE_NONE) {
- PyErr_Format(error_obj,
+ PyErr_Format(_csvstate_global->error_obj,
"single empty field record must be quoted");
return -1;
}
@@ -1127,7 +1157,7 @@
PyObject *line, *result;
if (!PySequence_Check(seq))
- return PyErr_Format(error_obj, "sequence expected");
+ return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
len = PySequence_Length(seq);
if (len < 0)
@@ -1353,7 +1383,7 @@
static PyObject *
csv_list_dialects(PyObject *module, PyObject *args)
{
- return PyDict_Keys(dialects);
+ return PyDict_Keys(_csvstate_global->dialects);
}
static PyObject *
@@ -1372,7 +1402,7 @@
dialect = _call_dialect(dialect_obj, kwargs);
if (dialect == NULL)
return NULL;
- if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
+ if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Py_DECREF(dialect);
return NULL;
}
@@ -1384,8 +1414,8 @@
static PyObject *
csv_unregister_dialect(PyObject *module, PyObject *name_obj)
{
- if (PyDict_DelItem(dialects, name_obj) < 0)
- return PyErr_Format(error_obj, "unknown dialect");
+ if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
+ return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Py_INCREF(Py_None);
return Py_None;
}
@@ -1400,7 +1430,7 @@
csv_field_size_limit(PyObject *module, PyObject *args)
{
PyObject *new_limit = NULL;
- long old_limit = field_limit;
+ long old_limit = _csvstate_global->field_limit;
if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
return NULL;
@@ -1410,9 +1440,9 @@
"limit must be an integer");
return NULL;
}
- field_limit = PyLong_AsLong(new_limit);
- if (field_limit == -1 && PyErr_Occurred()) {
- field_limit = old_limit;
+ _csvstate_global->field_limit = PyLong_AsLong(new_limit);
+ if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
+ _csvstate_global->field_limit = old_limit;
return NULL;
}
}
@@ -1551,17 +1581,16 @@
{ NULL, NULL }
};
-
static struct PyModuleDef _csvmodule = {
PyModuleDef_HEAD_INIT,
"_csv",
csv_module_doc,
- -1,
+ sizeof(_csvstate),
csv_methods,
NULL,
- NULL,
- NULL,
- NULL
+ _csv_traverse,
+ _csv_clear,
+ _csv_free
};
PyMODINIT_FUNC
@@ -1589,11 +1618,16 @@
MODULE_VERSION) == -1)
return NULL;
+ /* Set the field limit */
+ _csvstate(module)->field_limit = 128 * 1024;
+ /* Do I still need to add this var to the Module Dict? */
+
/* Add _dialects dictionary */
- dialects = PyDict_New();
- if (dialects == NULL)
+ _csvstate(module)->dialects = PyDict_New();
+ if (_csvstate(module)->dialects == NULL)
return NULL;
- if (PyModule_AddObject(module, "_dialects", dialects))
+ Py_INCREF(_csvstate(module)->dialects);
+ if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
return NULL;
/* Add quote styles into dictionary */
@@ -1609,9 +1643,10 @@
return NULL;
/* Add the CSV exception object to the module. */
- error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
- if (error_obj == NULL)
+ _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
+ if (_csvstate(module)->error_obj == NULL)
return NULL;
- PyModule_AddObject(module, "Error", error_obj);
+ Py_INCREF(_csvstate(module)->error_obj);
+ PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
return module;
}
diff --git a/Objects/exceptions.c b/Objects/exceptions.c
--- a/Objects/exceptions.c
+++ b/Objects/exceptions.c
@@ -349,7 +349,8 @@
static struct PyMemberDef BaseException_members[] = {
{"__suppress_context__", T_BOOL,
- offsetof(PyBaseExceptionObject, suppress_context)}
+ offsetof(PyBaseExceptionObject, suppress_context)},
+ {NULL}
};
diff --git a/Objects/rangeobject.c b/Objects/rangeobject.c
--- a/Objects/rangeobject.c
+++ b/Objects/rangeobject.c
@@ -308,7 +308,7 @@
static PyObject *
range_item(rangeobject *r, Py_ssize_t i)
{
- PyObject *res, *arg = PyLong_FromLong(i);
+ PyObject *res, *arg = PyLong_FromSsize_t(i);
if (!arg) {
return NULL;
}
diff --git a/Objects/stringlib/codecs.h b/Objects/stringlib/codecs.h
--- a/Objects/stringlib/codecs.h
+++ b/Objects/stringlib/codecs.h
@@ -215,7 +215,6 @@
goto Return;
}
-#undef LONG_PTR_MASK
#undef ASCII_CHAR_MASK
@@ -415,4 +414,152 @@
#undef MAX_SHORT_UNICHARS
}
+/* The pattern for constructing UCS2-repeated masks. */
+#if SIZEOF_LONG == 8
+# define UCS2_REPEAT_MASK 0x0001000100010001ul
+#elif SIZEOF_LONG == 4
+# define UCS2_REPEAT_MASK 0x00010001ul
+#else
+# error C 'long' size should be either 4 or 8!
+#endif
+
+/* The mask for fast checking. */
+#if STRINGLIB_SIZEOF_CHAR == 1
+/* The mask for fast checking of whether a C 'long' contains a
+ non-ASCII or non-Latin1 UTF16-encoded characters. */
+# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * (0xFFFFu & ~STRINGLIB_MAX_CHAR))
+#else
+/* The mask for fast checking of whether a C 'long' may contain
+ UTF16-encoded surrogate characters. This is an efficient heuristic,
+ assuming that non-surrogate characters with a code point >= 0x8000 are
+ rare in most input.
+*/
+# define FAST_CHAR_MASK (UCS2_REPEAT_MASK * 0x8000u)
+#endif
+/* The mask for fast byte-swapping. */
+#define STRIPPED_MASK (UCS2_REPEAT_MASK * 0x00FFu)
+/* Swap bytes. */
+#define SWAB(value) ((((value) >> 8) & STRIPPED_MASK) | \
+ (((value) & STRIPPED_MASK) << 8))
+
+Py_LOCAL_INLINE(Py_UCS4)
+STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
+ STRINGLIB_CHAR *dest, Py_ssize_t *outpos,
+ int native_ordering)
+{
+ Py_UCS4 ch;
+ const unsigned char *aligned_end =
+ (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
+ const unsigned char *q = *inptr;
+ STRINGLIB_CHAR *p = dest + *outpos;
+ /* Offsets from q for retrieving byte pairs in the right order. */
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+ int ihi = !!native_ordering, ilo = !native_ordering;
+#else
+ int ihi = !native_ordering, ilo = !!native_ordering;
+#endif
+ --e;
+
+ while (q < e) {
+ Py_UCS4 ch2;
+ /* First check for possible aligned read of a C 'long'. Unaligned
+ reads are more expensive, better to defer to another iteration. */
+ if (!((size_t) q & LONG_PTR_MASK)) {
+ /* Fast path for runs of in-range non-surrogate chars. */
+ register const unsigned char *_q = q;
+ while (_q < aligned_end) {
+ unsigned long block = * (unsigned long *) _q;
+ if (native_ordering) {
+ /* Can use buffer directly */
+ if (block & FAST_CHAR_MASK)
+ break;
+ }
+ else {
+ /* Need to byte-swap */
+ if (block & SWAB(FAST_CHAR_MASK))
+ break;
+#if STRINGLIB_SIZEOF_CHAR == 1
+ block >>= 8;
+#else
+ block = SWAB(block);
+#endif
+ }
+#ifdef BYTEORDER_IS_LITTLE_ENDIAN
+# if SIZEOF_LONG == 4
+ p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+ p[1] = (STRINGLIB_CHAR)(block >> 16);
+# elif SIZEOF_LONG == 8
+ p[0] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+ p[1] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+ p[2] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+ p[3] = (STRINGLIB_CHAR)(block >> 48);
+# endif
+#else
+# if SIZEOF_LONG == 4
+ p[0] = (STRINGLIB_CHAR)(block >> 16);
+ p[1] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# elif SIZEOF_LONG == 8
+ p[0] = (STRINGLIB_CHAR)(block >> 48);
+ p[1] = (STRINGLIB_CHAR)((block >> 32) & 0xFFFFu);
+ p[2] = (STRINGLIB_CHAR)((block >> 16) & 0xFFFFu);
+ p[3] = (STRINGLIB_CHAR)(block & 0xFFFFu);
+# endif
+#endif
+ _q += SIZEOF_LONG;
+ p += SIZEOF_LONG / 2;
+ }
+ q = _q;
+ if (q >= e)
+ break;
+ }
+
+ ch = (q[ihi] << 8) | q[ilo];
+ q += 2;
+ if (!Py_UNICODE_IS_SURROGATE(ch)) {
+#if STRINGLIB_SIZEOF_CHAR < 2
+ if (ch > STRINGLIB_MAX_CHAR)
+ /* Out-of-range */
+ goto Return;
+#endif
+ *p++ = (STRINGLIB_CHAR)ch;
+ continue;
+ }
+
+ /* UTF-16 code pair: */
+ if (q >= e)
+ goto UnexpectedEnd;
+ if (!Py_UNICODE_IS_HIGH_SURROGATE(ch))
+ goto IllegalEncoding;
+ ch2 = (q[ihi] << 8) | q[ilo];
+ q += 2;
+ if (!Py_UNICODE_IS_LOW_SURROGATE(ch2))
+ goto IllegalSurrogate;
+ ch = Py_UNICODE_JOIN_SURROGATES(ch, ch2);
+#if STRINGLIB_SIZEOF_CHAR < 4
+ /* Out-of-range */
+ goto Return;
+#else
+ *p++ = (STRINGLIB_CHAR)ch;
+#endif
+ }
+ ch = 0;
+Return:
+ *inptr = q;
+ *outpos = p - dest;
+ return ch;
+UnexpectedEnd:
+ ch = 1;
+ goto Return;
+IllegalEncoding:
+ ch = 2;
+ goto Return;
+IllegalSurrogate:
+ ch = 3;
+ goto Return;
+}
+#undef UCS2_REPEAT_MASK
+#undef FAST_CHAR_MASK
+#undef STRIPPED_MASK
+#undef SWAB
+#undef LONG_PTR_MASK
#endif /* STRINGLIB_IS_UNICODE */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5195,25 +5195,6 @@
return PyUnicode_DecodeUTF16Stateful(s, size, errors, byteorder, NULL);
}
-/* Two masks for fast checking of whether a C 'long' may contain
- UTF16-encoded surrogate characters. This is an efficient heuristic,
- assuming that non-surrogate characters with a code point >= 0x8000 are
- rare in most input.
- FAST_CHAR_MASK is used when the input is in native byte ordering,
- SWAPPED_FAST_CHAR_MASK when the input is in byteswapped ordering.
-*/
-#if (SIZEOF_LONG == 8)
-# define FAST_CHAR_MASK 0x8000800080008000L
-# define SWAPPED_FAST_CHAR_MASK 0x0080008000800080L
-# define STRIPPED_MASK 0x00FF00FF00FF00FFL
-#elif (SIZEOF_LONG == 4)
-# define FAST_CHAR_MASK 0x80008000L
-# define SWAPPED_FAST_CHAR_MASK 0x00800080L
-# define STRIPPED_MASK 0x00FF00FFL
-#else
-# error C 'long' size should be either 4 or 8!
-#endif
-
PyObject *
PyUnicode_DecodeUTF16Stateful(const char *s,
Py_ssize_t size,
@@ -5226,30 +5207,15 @@
Py_ssize_t endinpos;
Py_ssize_t outpos;
PyObject *unicode;
- const unsigned char *q, *e, *aligned_end;
+ const unsigned char *q, *e;
int bo = 0; /* assume native ordering by default */
- int native_ordering = 0;
+ int native_ordering;
const char *errmsg = "";
- /* Offsets from q for retrieving byte pairs in the right order. */
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- int ihi = 1, ilo = 0;
-#else
- int ihi = 0, ilo = 1;
-#endif
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
- /* Note: size will always be longer than the resulting Unicode
- character count */
- unicode = PyUnicode_New(size, 127);
- if (!unicode)
- return NULL;
- if (size == 0)
- return unicode;
- outpos = 0;
-
q = (unsigned char *)s;
- e = q + size - 1;
+ e = q + size;
if (byteorder)
bo = *byteorder;
@@ -5258,155 +5224,98 @@
byte order setting accordingly. In native mode, the leading BOM
mark is skipped, in all other modes, it is copied to the output
stream as-is (giving a ZWNBSP character). */
- if (bo == 0) {
- if (size >= 2) {
- const Py_UCS4 bom = (q[ihi] << 8) | q[ilo];
+ if (bo == 0 && size >= 2) {
+ const Py_UCS4 bom = (q[1] << 8) | q[0];
+ if (bom == 0xFEFF) {
+ q += 2;
+ bo = -1;
+ }
+ else if (bom == 0xFFFE) {
+ q += 2;
+ bo = 1;
+ }
+ if (byteorder)
+ *byteorder = bo;
+ }
+
+ if (q == e) {
+ if (consumed)
+ *consumed = size;
+ Py_INCREF(unicode_empty);
+ return unicode_empty;
+ }
+
#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- if (bom == 0xFEFF) {
- q += 2;
- bo = -1;
- }
- else if (bom == 0xFFFE) {
- q += 2;
- bo = 1;
- }
+ native_ordering = bo <= 0;
#else
- if (bom == 0xFEFF) {
- q += 2;
- bo = 1;
- }
- else if (bom == 0xFFFE) {
- q += 2;
- bo = -1;
- }
-#endif
- }
- }
-
- if (bo == -1) {
- /* force LE */
- ihi = 1;
- ilo = 0;
- }
- else if (bo == 1) {
- /* force BE */
- ihi = 0;
- ilo = 1;
- }
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- native_ordering = ilo < ihi;
-#else
- native_ordering = ilo > ihi;
-#endif
-
- aligned_end = (const unsigned char *) ((size_t) e & ~LONG_PTR_MASK);
- while (q < e) {
- Py_UCS4 ch;
- /* First check for possible aligned read of a C 'long'. Unaligned
- reads are more expensive, better to defer to another iteration. */
- if (!((size_t) q & LONG_PTR_MASK)) {
- /* Fast path for runs of non-surrogate chars. */
- register const unsigned char *_q = q;
+ native_ordering = bo >= 0;
+#endif
+
+ /* Note: size will always be longer than the resulting Unicode
+ character count */
+ unicode = PyUnicode_New((e - q + 1) / 2, 127);
+ if (!unicode)
+ return NULL;
+
+ outpos = 0;
+ while (1) {
+ Py_UCS4 ch = 0;
+ if (e - q >= 2) {
int kind = PyUnicode_KIND(unicode);
- void *data = PyUnicode_DATA(unicode);
- while (_q < aligned_end) {
- unsigned long block = * (unsigned long *) _q;
- Py_UCS4 maxch;
- if (native_ordering) {
- /* Can use buffer directly */
- if (block & FAST_CHAR_MASK)
- break;
- }
- else {
- /* Need to byte-swap */
- if (block & SWAPPED_FAST_CHAR_MASK)
- break;
- block = ((block >> 8) & STRIPPED_MASK) |
- ((block & STRIPPED_MASK) << 8);
- }
- maxch = (Py_UCS2)(block & 0xFFFF);
-#if SIZEOF_LONG == 8
- ch = (Py_UCS2)((block >> 16) & 0xFFFF);
- maxch = MAX_MAXCHAR(maxch, ch);
- ch = (Py_UCS2)((block >> 32) & 0xFFFF);
- maxch = MAX_MAXCHAR(maxch, ch);
- ch = (Py_UCS2)(block >> 48);
- maxch = MAX_MAXCHAR(maxch, ch);
-#else
- ch = (Py_UCS2)(block >> 16);
- maxch = MAX_MAXCHAR(maxch, ch);
-#endif
- if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
- if (unicode_widen(&unicode, outpos, maxch) < 0)
- goto onError;
- kind = PyUnicode_KIND(unicode);
- data = PyUnicode_DATA(unicode);
- }
-#ifdef BYTEORDER_IS_LITTLE_ENDIAN
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#if SIZEOF_LONG == 8
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
-#else
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
-#else
-#if SIZEOF_LONG == 8
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 48)));
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 32) & 0xFFFF));
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)((block >> 16) & 0xFFFF));
-#else
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block >> 16));
-#endif
- PyUnicode_WRITE(kind, data, outpos++, (Py_UCS2)(block & 0xFFFF));
-#endif
- _q += SIZEOF_LONG;
- }
- q = _q;
- if (q >= e)
- break;
- }
- ch = (q[ihi] << 8) | q[ilo];
-
- q += 2;
-
- if (!Py_UNICODE_IS_SURROGATE(ch)) {
+ if (kind == PyUnicode_1BYTE_KIND) {
+ if (PyUnicode_IS_ASCII(unicode))
+ ch = asciilib_utf16_decode(&q, e,
+ PyUnicode_1BYTE_DATA(unicode), &outpos,
+ native_ordering);
+ else
+ ch = ucs1lib_utf16_decode(&q, e,
+ PyUnicode_1BYTE_DATA(unicode), &outpos,
+ native_ordering);
+ } else if (kind == PyUnicode_2BYTE_KIND) {
+ ch = ucs2lib_utf16_decode(&q, e,
+ PyUnicode_2BYTE_DATA(unicode), &outpos,
+ native_ordering);
+ } else {
+ assert(kind == PyUnicode_4BYTE_KIND);
+ ch = ucs4lib_utf16_decode(&q, e,
+ PyUnicode_4BYTE_DATA(unicode), &outpos,
+ native_ordering);
+ }
+ }
+
+ switch (ch)
+ {
+ case 0:
+ /* remaining byte at the end? (size should be even) */
+ if (q == e || consumed)
+ goto End;
+ errmsg = "truncated data";
+ startinpos = ((const char *)q) - starts;
+ endinpos = ((const char *)e) - starts;
+ break;
+ /* The remaining input chars are ignored if the callback
+ chooses to skip the input */
+ case 1:
+ errmsg = "unexpected end of data";
+ startinpos = ((const char *)q) - 2 - starts;
+ endinpos = ((const char *)e) - starts;
+ break;
+ case 2:
+ errmsg = "illegal encoding";
+ startinpos = ((const char *)q) - 2 - starts;
+ endinpos = startinpos + 2;
+ break;
+ case 3:
+ errmsg = "illegal UTF-16 surrogate";
+ startinpos = ((const char *)q) - 4 - starts;
+ endinpos = startinpos + 2;
+ break;
+ default:
if (unicode_putchar(&unicode, &outpos, ch) < 0)
goto onError;
continue;
}
- /* UTF-16 code pair: */
- if (q > e) {
- errmsg = "unexpected end of data";
- startinpos = (((const char *)q) - 2) - starts;
- endinpos = ((const char *)e) + 1 - starts;
- goto utf16Error;
- }
- if (Py_UNICODE_IS_HIGH_SURROGATE(ch)) {
- Py_UCS4 ch2 = (q[ihi] << 8) | q[ilo];
- q += 2;
- if (Py_UNICODE_IS_LOW_SURROGATE(ch2)) {
- if (unicode_putchar(&unicode, &outpos,
- Py_UNICODE_JOIN_SURROGATES(ch, ch2)) < 0)
- goto onError;
- continue;
- }
- else {
- errmsg = "illegal UTF-16 surrogate";
- startinpos = (((const char *)q)-4)-starts;
- endinpos = startinpos+2;
- goto utf16Error;
- }
-
- }
- errmsg = "illegal encoding";
- startinpos = (((const char *)q)-2)-starts;
- endinpos = startinpos+2;
- /* Fall through to report the error */
-
- utf16Error:
if (unicode_decode_call_errorhandler(
errors,
&errorHandler,
@@ -5421,33 +5330,8 @@
&outpos))
goto onError;
}
- /* remaining byte at the end? (size should be even) */
- if (e == q) {
- if (!consumed) {
- errmsg = "truncated data";
- startinpos = ((const char *)q) - starts;
- endinpos = ((const char *)e) + 1 - starts;
- if (unicode_decode_call_errorhandler(
- errors,
- &errorHandler,
- "utf16", errmsg,
- &starts,
- (const char **)&e,
- &startinpos,
- &endinpos,
- &exc,
- (const char **)&q,
- &unicode,
- &outpos))
- goto onError;
- /* The remaining input chars are ignored if the callback
- chooses to skip the input */
- }
- }
-
- if (byteorder)
- *byteorder = bo;
-
+
+End:
if (consumed)
*consumed = (const char *)q-starts;
@@ -5466,9 +5350,6 @@
return NULL;
}
-#undef FAST_CHAR_MASK
-#undef SWAPPED_FAST_CHAR_MASK
-
PyObject *
_PyUnicode_EncodeUTF16(PyObject *str,
const char *errors,
diff --git a/Python/freeze_importlib.py b/Python/freeze_importlib.py
--- a/Python/freeze_importlib.py
+++ b/Python/freeze_importlib.py
@@ -25,6 +25,8 @@
with open(output_path, 'w', encoding='utf-8') as output_file:
output_file.write('\n'.join(lines))
output_file.write('/* Mercurial binary marker: \x00 */')
+ # Avoid a compiler warning for lack of EOL
+ output_file.write('\n')
if __name__ == '__main__':
diff --git a/Python/importlib.h b/Python/importlib.h
index 0beeb595dbc38d821fb4f6de0981347f3983420a..cf5619a6c4b0587815b87145eae5bf212ec7e5f1
GIT binary patch
[stripped]
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list