Mailman 3 November 2018 - Python-checkins

bpo-33723: Fix test_time.test_thread_time() (GH-10724)
by Miss Islington (bot) 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/5350dd1b50e60882a2da6d53ed27e02d2b… commit: 5350dd1b50e60882a2da6d53ed27e02d2b698f2e branch: 3.7 author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com> committer: GitHub <noreply(a)github.com> date: 2018-11-26T12:36:54-08:00 summary: bpo-33723: Fix test_time.test_thread_time() (GH-10724) Tolerate up to 30 ms, instead of 15 min, in other threads. (cherry picked from commit 65c216e74f7957006ef7653b7e2afe83007c45ce) Co-authored-by: Victor Stinner <vstinner(a)redhat.com> files: M Lib/test/test_time.py diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 62abd891aafa..b9c678640808 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -497,7 +497,8 @@ def test_process_time(self): self.assertLess(stop - start, 0.020) # bpo-33723: A busy loop of 100 ms should increase process_time() - # by at least 15 ms + # by at least 15 ms. Tolerate 15 ms because of the bad resolution of + # the clock on Windows (around 15.6 ms). min_time = 0.015 busy_time = 0.100 @@ -535,8 +536,11 @@ def test_thread_time(self): self.assertLess(stop - start, 0.020) # bpo-33723: A busy loop of 100 ms should increase thread_time() - # by at least 15 ms + # by at least 15 ms, but less than 30 ms in other threads. + # Tolerate 15 and 30 ms because of the bad resolution + # of the clock on Windows (around 15.6 ms). min_time = 0.015 + max_time = 0.030 busy_time = 0.100 # thread_time() should include CPU time spent in current thread... @@ -551,7 +555,7 @@ def test_thread_time(self): t.start() t.join() stop = time.thread_time() - self.assertLess(stop - start, min_time) + self.assertLess(stop - start, max_time) info = time.get_clock_info('thread_time') self.assertTrue(info.monotonic)

1 0

bpo-33723: Fix test_time.test_thread_time() (GH-10724)
by Victor Stinner 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/65c216e74f7957006ef7653b7e2afe8300… commit: 65c216e74f7957006ef7653b7e2afe83007c45ce branch: master author: Victor Stinner <vstinner(a)redhat.com> committer: GitHub <noreply(a)github.com> date: 2018-11-26T21:19:29+01:00 summary: bpo-33723: Fix test_time.test_thread_time() (GH-10724) Tolerate up to 30 ms, instead of 15 min, in other threads. files: M Lib/test/test_time.py diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py index 16b48a92f327..381cc823014e 100644 --- a/Lib/test/test_time.py +++ b/Lib/test/test_time.py @@ -496,7 +496,8 @@ def test_process_time(self): self.assertLess(stop - start, 0.020) # bpo-33723: A busy loop of 100 ms should increase process_time() - # by at least 15 ms + # by at least 15 ms. Tolerate 15 ms because of the bad resolution of + # the clock on Windows (around 15.6 ms). min_time = 0.015 busy_time = 0.100 @@ -534,8 +535,11 @@ def test_thread_time(self): self.assertLess(stop - start, 0.020) # bpo-33723: A busy loop of 100 ms should increase thread_time() - # by at least 15 ms + # by at least 15 ms, but less than 30 ms in other threads. + # Tolerate 15 and 30 ms because of the bad resolution + # of the clock on Windows (around 15.6 ms). min_time = 0.015 + max_time = 0.030 busy_time = 0.100 # thread_time() should include CPU time spent in current thread... @@ -550,7 +554,7 @@ def test_thread_time(self): t.start() t.join() stop = time.thread_time() - self.assertLess(stop - start, min_time) + self.assertLess(stop - start, max_time) info = time.get_clock_info('thread_time') self.assertTrue(info.monotonic)

1 0

Adds IPv6 support when invoking http.server directly. (GH-10595)
by Lisa Roach 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/433433fa6d55091600ce88dd19206b3902… commit: 433433fa6d55091600ce88dd19206b3902e0a87d branch: master author: Lisa Roach <lisaroach14(a)gmail.com> committer: GitHub <noreply(a)github.com> date: 2018-11-26T10:43:38-08:00 summary: Adds IPv6 support when invoking http.server directly. (GH-10595) files: A Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst M Doc/library/http.server.rst M Lib/http/server.py M Lib/test/test_httpservers.py diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst index 29f6e7da3bde..a367e373dc3c 100644 --- a/Doc/library/http.server.rst +++ b/Doc/library/http.server.rst @@ -410,14 +410,18 @@ the previous example, this serves files relative to the current directory:: python -m http.server 8000 By default, server binds itself to all interfaces. The option ``-b/--bind`` -specifies a specific address to which it should bind. For example, the -following command causes the server to bind to localhost only:: +specifies a specific address to which it should bind. Both IPv4 and IPv6 +addresses are supported. For example, the following command causes the server +to bind to localhost only:: python -m http.server 8000 --bind 127.0.0.1 .. versionadded:: 3.4 ``--bind`` argument was introduced. +.. versionadded:: 3.8 + ``--bind`` argument enhanced to support IPv6 + By default, server uses the current directory. The option ``-d/--directory`` specifies a directory to which it should serve the files. For example, the following command uses a specific directory:: diff --git a/Lib/http/server.py b/Lib/http/server.py index ca2dd507392c..22d865f2fdfa 100644 --- a/Lib/http/server.py +++ b/Lib/http/server.py @@ -1226,6 +1226,9 @@ def test(HandlerClass=BaseHTTPRequestHandler, """ server_address = (bind, port) + if ':' in bind: + ServerClass.address_family = socket.AF_INET6 + HandlerClass.protocol_version = protocol with ServerClass(server_address, HandlerClass) as httpd: sa = httpd.socket.getsockname() diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py index cc829a522b88..3d8e0af8b45c 100644 --- a/Lib/test/test_httpservers.py +++ b/Lib/test/test_httpservers.py @@ -9,6 +9,7 @@ from http import server, HTTPStatus import os +import socket import sys import re import base64 @@ -1116,6 +1117,24 @@ def test_all(self): self.assertCountEqual(server.__all__, expected) +class ScriptTestCase(unittest.TestCase): + @mock.patch('builtins.print') + def test_server_test_ipv6(self, _): + mock_server = mock.MagicMock() + server.test(ServerClass=mock_server, bind="::") + self.assertEqual(mock_server.address_family, socket.AF_INET6) + + mock_server.reset_mock() + server.test(ServerClass=mock_server, + bind="2001:0db8:85a3:0000:0000:8a2e:0370:7334") + self.assertEqual(mock_server.address_family, socket.AF_INET6) + + mock_server.reset_mock() + server.test(ServerClass=mock_server, + bind="::1") + self.assertEqual(mock_server.address_family, socket.AF_INET6) + + def test_main(verbose=None): cwd = os.getcwd() try: @@ -1127,6 +1146,7 @@ def test_main(verbose=None): CGIHTTPServerTestCase, SimpleHTTPRequestHandlerTestCase, MiscTestCase, + ScriptTestCase ) finally: os.chdir(cwd) diff --git a/Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst b/Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst new file mode 100644 index 000000000000..88749e920c2c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst @@ -0,0 +1 @@ +Adds IPv6 support when invoking http.server directly.

1 0

[3.6] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10725)
by Miss Islington (bot) 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/7f93f93fa68f54c1bc128d58fa17536245… commit: 7f93f93fa68f54c1bc128d58fa17536245396de3 branch: 3.6 author: Julien Palard <julien(a)palard.fr> committer: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com> date: 2018-11-26T09:16:13-08:00 summary: [3.6] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10725) (cherry picked from commit 5719f275b7153a00a800f5481271a6fc26659c65) Co-authored-by: Mathieu Dupuy <deronnax(a)users.noreply.github.com> https://bugs.python.org/issue35255 files: M Doc/faq/windows.rst diff --git a/Doc/faq/windows.rst b/Doc/faq/windows.rst index bcfd7f4f1fde..772b8c2012c3 100644 --- a/Doc/faq/windows.rst +++ b/Doc/faq/windows.rst @@ -301,14 +301,3 @@ In 2.7 and 3.2, :func:`os.kill` is implemented similar to the above function, with the additional feature of being able to send :kbd:`Ctrl+C` and :kbd:`Ctrl+Break` to console subprocesses which are designed to handle those signals. See :func:`os.kill` for further details. - -How do I extract the downloaded documentation on Windows? ---------------------------------------------------------- - -Sometimes, when you download the documentation package to a Windows machine -using a web browser, the file extension of the saved file ends up being .EXE. -This is a mistake; the extension should be .TGZ. - -Simply rename the downloaded file to have the .TGZ extension, and WinZip will be -able to handle it. (If your copy of WinZip doesn't, get a newer one from -https://www.winzip.com.)

1 0

[3.7] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10722)
by Miss Islington (bot) 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/6f8cab0db044132c6192d6ac1536d28cf7… commit: 6f8cab0db044132c6192d6ac1536d28cf72c9d27 branch: 3.7 author: Julien Palard <julien(a)palard.fr> committer: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com> date: 2018-11-26T09:05:48-08:00 summary: [3.7] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10722) (cherry picked from commit 5719f275b7153a00a800f5481271a6fc26659c65) Co-authored-by: Mathieu Dupuy <deronnax(a)users.noreply.github.com> https://bugs.python.org/issue35255 files: M Doc/faq/windows.rst diff --git a/Doc/faq/windows.rst b/Doc/faq/windows.rst index d063e7c550c0..9292a2407a75 100644 --- a/Doc/faq/windows.rst +++ b/Doc/faq/windows.rst @@ -301,14 +301,3 @@ In 2.7 and 3.2, :func:`os.kill` is implemented similar to the above function, with the additional feature of being able to send :kbd:`Ctrl+C` and :kbd:`Ctrl+Break` to console subprocesses which are designed to handle those signals. See :func:`os.kill` for further details. - -How do I extract the downloaded documentation on Windows? ---------------------------------------------------------- - -Sometimes, when you download the documentation package to a Windows machine -using a web browser, the file extension of the saved file ends up being .EXE. -This is a mistake; the extension should be .TGZ. - -Simply rename the downloaded file to have the .TGZ extension, and WinZip will be -able to handle it. (If your copy of WinZip doesn't, get a newer one from -https://www.winzip.com.)

1 0

bpo-35134: Create Include/cpython/unicodeobject.h (GH-10680)
by Victor Stinner 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/75e4699b31d1d88abad097ad13466c5c07… commit: 75e4699b31d1d88abad097ad13466c5c07711324 branch: master author: Victor Stinner <vstinner(a)redhat.com> committer: GitHub <noreply(a)github.com> date: 2018-11-26T17:29:38+01:00 summary: bpo-35134: Create Include/cpython/unicodeobject.h (GH-10680) Move unicodeobject.h code surrounded by "#ifndef Py_LIMITED_API" to a new Include/cpython/unicodeobject.h header file. files: A Include/cpython/unicodeobject.h M Include/unicodeobject.h diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h new file mode 100644 index 000000000000..c76349022485 --- /dev/null +++ b/Include/cpython/unicodeobject.h @@ -0,0 +1,1245 @@ +#ifndef Py_CPYTHON_UNICODEOBJECT_H +# error "this header file must not be included directly" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* Py_UNICODE was the native Unicode storage format (code unit) used by + Python and represents a single Unicode element in the Unicode type. + With PEP 393, Py_UNICODE is deprecated and replaced with a + typedef to wchar_t. */ +#define PY_UNICODE_TYPE wchar_t +typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; + +/* --- Internal Unicode Operations ---------------------------------------- */ + +/* Since splitting on whitespace is an important use case, and + whitespace in most situations is solely ASCII whitespace, we + optimize for the common case by using a quick look-up table + _Py_ascii_whitespace (see below) with an inlined check. + + */ +#define Py_UNICODE_ISSPACE(ch) \ + ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) + +#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) +#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) +#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) +#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) + +#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) +#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) +#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) + +#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) +#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) +#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) +#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) + +#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) +#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) +#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) + +#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) + +#define Py_UNICODE_ISALNUM(ch) \ + (Py_UNICODE_ISALPHA(ch) || \ + Py_UNICODE_ISDECIMAL(ch) || \ + Py_UNICODE_ISDIGIT(ch) || \ + Py_UNICODE_ISNUMERIC(ch)) + +#define Py_UNICODE_COPY(target, source, length) \ + memcpy((target), (source), (length)*sizeof(Py_UNICODE)) + +#define Py_UNICODE_FILL(target, value, length) \ + do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ + for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ + } while (0) + +/* macros to work with surrogates */ +#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) +#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) +#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) +/* Join two surrogate characters and return a single Py_UCS4 value. */ +#define Py_UNICODE_JOIN_SURROGATES(high, low) \ + (((((Py_UCS4)(high) & 0x03FF) << 10) | \ + ((Py_UCS4)(low) & 0x03FF)) + 0x10000) +/* high surrogate = top 10 bits added to D800 */ +#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) +/* low surrogate = bottom 10 bits added to DC00 */ +#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) + +/* Check if substring matches at given offset. The offset must be + valid, and the substring must not be empty. */ + +#define Py_UNICODE_MATCH(string, offset, substring) \ + ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ + ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ + !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) + +/* --- Unicode Type ------------------------------------------------------- */ + +/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject + structure. state.ascii and state.compact are set, and the data + immediately follow the structure. utf8_length and wstr_length can be found + in the length field; the utf8 pointer is equal to the data pointer. */ +typedef struct { + /* There are 4 forms of Unicode strings: + + - compact ascii: + + * structure = PyASCIIObject + * test: PyUnicode_IS_COMPACT_ASCII(op) + * kind = PyUnicode_1BYTE_KIND + * compact = 1 + * ascii = 1 + * ready = 1 + * (length is the length of the utf8 and wstr strings) + * (data starts just after the structure) + * (since ASCII is decoded from UTF-8, the utf8 string are the data) + + - compact: + + * structure = PyCompactUnicodeObject + * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) + * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or + PyUnicode_4BYTE_KIND + * compact = 1 + * ready = 1 + * ascii = 0 + * utf8 is not shared with data + * utf8_length = 0 if utf8 is NULL + * wstr is shared with data and wstr_length=length + if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 + or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 + * wstr_length = 0 if wstr is NULL + * (data starts just after the structure) + + - legacy string, not ready: + + * structure = PyUnicodeObject + * test: kind == PyUnicode_WCHAR_KIND + * length = 0 (use wstr_length) + * hash = -1 + * kind = PyUnicode_WCHAR_KIND + * compact = 0 + * ascii = 0 + * ready = 0 + * interned = SSTATE_NOT_INTERNED + * wstr is not NULL + * data.any is NULL + * utf8 is NULL + * utf8_length = 0 + + - legacy string, ready: + + * structure = PyUnicodeObject structure + * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND + * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or + PyUnicode_4BYTE_KIND + * compact = 0 + * ready = 1 + * data.any is not NULL + * utf8 is shared and utf8_length = length with data.any if ascii = 1 + * utf8_length = 0 if utf8 is NULL + * wstr is shared with data.any and wstr_length = length + if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 + or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 + * wstr_length = 0 if wstr is NULL + + Compact strings use only one memory block (structure + characters), + whereas legacy strings use one block for the structure and one block + for characters. + + Legacy strings are created by PyUnicode_FromUnicode() and + PyUnicode_FromStringAndSize(NULL, size) functions. They become ready + when PyUnicode_READY() is called. + + See also _PyUnicode_CheckConsistency(). + */ + PyObject_HEAD + Py_ssize_t length; /* Number of code points in the string */ + Py_hash_t hash; /* Hash value; -1 if not set */ + struct { + /* + SSTATE_NOT_INTERNED (0) + SSTATE_INTERNED_MORTAL (1) + SSTATE_INTERNED_IMMORTAL (2) + + If interned != SSTATE_NOT_INTERNED, the two references from the + dictionary to this object are *not* counted in ob_refcnt. + */ + unsigned int interned:2; + /* Character size: + + - PyUnicode_WCHAR_KIND (0): + + * character type = wchar_t (16 or 32 bits, depending on the + platform) + + - PyUnicode_1BYTE_KIND (1): + + * character type = Py_UCS1 (8 bits, unsigned) + * all characters are in the range U+0000-U+00FF (latin1) + * if ascii is set, all characters are in the range U+0000-U+007F + (ASCII), otherwise at least one character is in the range + U+0080-U+00FF + + - PyUnicode_2BYTE_KIND (2): + + * character type = Py_UCS2 (16 bits, unsigned) + * all characters are in the range U+0000-U+FFFF (BMP) + * at least one character is in the range U+0100-U+FFFF + + - PyUnicode_4BYTE_KIND (4): + + * character type = Py_UCS4 (32 bits, unsigned) + * all characters are in the range U+0000-U+10FFFF + * at least one character is in the range U+10000-U+10FFFF + */ + unsigned int kind:3; + /* Compact is with respect to the allocation scheme. Compact unicode + objects only require one memory block while non-compact objects use + one block for the PyUnicodeObject struct and another for its data + buffer. */ + unsigned int compact:1; + /* The string only contains characters in the range U+0000-U+007F (ASCII) + and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is + set, use the PyASCIIObject structure. */ + unsigned int ascii:1; + /* The ready flag indicates whether the object layout is initialized + completely. This means that this is either a compact object, or + the data pointer is filled out. The bit is redundant, and helps + to minimize the test in PyUnicode_IS_READY(). */ + unsigned int ready:1; + /* Padding to ensure that PyUnicode_DATA() is always aligned to + 4 bytes (see issue #19537 on m68k). */ + unsigned int :24; + } state; + wchar_t *wstr; /* wchar_t representation (null-terminated) */ +} PyASCIIObject; + +/* Non-ASCII strings allocated through PyUnicode_New use the + PyCompactUnicodeObject structure. state.compact is set, and the data + immediately follow the structure. */ +typedef struct { + PyASCIIObject _base; + Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the + * terminating \0. */ + char *utf8; /* UTF-8 representation (null-terminated) */ + Py_ssize_t wstr_length; /* Number of code points in wstr, possible + * surrogates count as two code points. */ +} PyCompactUnicodeObject; + +/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the + PyUnicodeObject structure. The actual string data is initially in the wstr + block, and copied into the data block using _PyUnicode_Ready. */ +typedef struct { + PyCompactUnicodeObject _base; + union { + void *any; + Py_UCS1 *latin1; + Py_UCS2 *ucs2; + Py_UCS4 *ucs4; + } data; /* Canonical, smallest-form Unicode buffer */ +} PyUnicodeObject; + +/* Fast access macros */ +#define PyUnicode_WSTR_LENGTH(op) \ + (PyUnicode_IS_COMPACT_ASCII(op) ? \ + ((PyASCIIObject*)op)->length : \ + ((PyCompactUnicodeObject*)op)->wstr_length) + +/* Returns the deprecated Py_UNICODE representation's size in code units + (this includes surrogate pairs as 2 units). + If the Py_UNICODE representation is not available, it will be computed + on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ + +#define PyUnicode_GET_SIZE(op) \ + (assert(PyUnicode_Check(op)), \ + (((PyASCIIObject *)(op))->wstr) ? \ + PyUnicode_WSTR_LENGTH(op) : \ + ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\ + assert(((PyASCIIObject *)(op))->wstr), \ + PyUnicode_WSTR_LENGTH(op))) + /* Py_DEPRECATED(3.3) */ + +#define PyUnicode_GET_DATA_SIZE(op) \ + (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE) + /* Py_DEPRECATED(3.3) */ + +/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE + representation on demand. Using this macro is very inefficient now, + try to port your code to use the new PyUnicode_*BYTE_DATA() macros or + use PyUnicode_WRITE() and PyUnicode_READ(). */ + +#define PyUnicode_AS_UNICODE(op) \ + (assert(PyUnicode_Check(op)), \ + (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ + PyUnicode_AsUnicode(_PyObject_CAST(op))) + /* Py_DEPRECATED(3.3) */ + +#define PyUnicode_AS_DATA(op) \ + ((const char *)(PyUnicode_AS_UNICODE(op))) + /* Py_DEPRECATED(3.3) */ + + +/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ + +/* Values for PyASCIIObject.state: */ + +/* Interning state. */ +#define SSTATE_NOT_INTERNED 0 +#define SSTATE_INTERNED_MORTAL 1 +#define SSTATE_INTERNED_IMMORTAL 2 + +/* Return true if the string contains only ASCII characters, or 0 if not. The + string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be + ready. */ +#define PyUnicode_IS_ASCII(op) \ + (assert(PyUnicode_Check(op)), \ + assert(PyUnicode_IS_READY(op)), \ + ((PyASCIIObject*)op)->state.ascii) + +/* Return true if the string is compact or 0 if not. + No type checks or Ready calls are performed. */ +#define PyUnicode_IS_COMPACT(op) \ + (((PyASCIIObject*)(op))->state.compact) + +/* Return true if the string is a compact ASCII string (use PyASCIIObject + structure), or 0 if not. No type checks or Ready calls are performed. */ +#define PyUnicode_IS_COMPACT_ASCII(op) \ + (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op)) + +enum PyUnicode_Kind { +/* String contains only wstr byte characters. This is only possible + when the string was created with a legacy API and _PyUnicode_Ready() + has not been called yet. */ + PyUnicode_WCHAR_KIND = 0, +/* Return values of the PyUnicode_KIND() macro: */ + PyUnicode_1BYTE_KIND = 1, + PyUnicode_2BYTE_KIND = 2, + PyUnicode_4BYTE_KIND = 4 +}; + +/* Return pointers to the canonical representation cast to unsigned char, + Py_UCS2, or Py_UCS4 for direct character access. + No checks are performed, use PyUnicode_KIND() before to ensure + these will work correctly. */ + +#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) +#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) +#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op)) + +/* Return one of the PyUnicode_*_KIND values defined above. */ +#define PyUnicode_KIND(op) \ + (assert(PyUnicode_Check(op)), \ + assert(PyUnicode_IS_READY(op)), \ + ((PyASCIIObject *)(op))->state.kind) + +/* Return a void pointer to the raw unicode buffer. */ +#define _PyUnicode_COMPACT_DATA(op) \ + (PyUnicode_IS_ASCII(op) ? \ + ((void*)((PyASCIIObject*)(op) + 1)) : \ + ((void*)((PyCompactUnicodeObject*)(op) + 1))) + +#define _PyUnicode_NONCOMPACT_DATA(op) \ + (assert(((PyUnicodeObject*)(op))->data.any), \ + ((((PyUnicodeObject *)(op))->data.any))) + +#define PyUnicode_DATA(op) \ + (assert(PyUnicode_Check(op)), \ + PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \ + _PyUnicode_NONCOMPACT_DATA(op)) + +/* In the access macros below, "kind" may be evaluated more than once. + All other macro parameters are evaluated exactly once, so it is safe + to put side effects into them (such as increasing the index). */ + +/* Write into the canonical representation, this macro does not do any sanity + checks and is intended for usage in loops. The caller should cache the + kind and data pointers obtained from other macro calls. + index is the index in the string (starts at 0) and value is the new + code point value which should be written to that location. */ +#define PyUnicode_WRITE(kind, data, index, value) \ + do { \ + switch ((kind)) { \ + case PyUnicode_1BYTE_KIND: { \ + ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ + break; \ + } \ + case PyUnicode_2BYTE_KIND: { \ + ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ + break; \ + } \ + default: { \ + assert((kind) == PyUnicode_4BYTE_KIND); \ + ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ + } \ + } \ + } while (0) + +/* Read a code point from the string's canonical representation. No checks + or ready calls are performed. */ +#define PyUnicode_READ(kind, data, index) \ + ((Py_UCS4) \ + ((kind) == PyUnicode_1BYTE_KIND ? \ + ((const Py_UCS1 *)(data))[(index)] : \ + ((kind) == PyUnicode_2BYTE_KIND ? \ + ((const Py_UCS2 *)(data))[(index)] : \ + ((const Py_UCS4 *)(data))[(index)] \ + ) \ + )) + +/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it + calls PyUnicode_KIND() and might call it twice. For single reads, use + PyUnicode_READ_CHAR, for multiple consecutive reads callers should + cache kind and use PyUnicode_READ instead. */ +#define PyUnicode_READ_CHAR(unicode, index) \ + (assert(PyUnicode_Check(unicode)), \ + assert(PyUnicode_IS_READY(unicode)), \ + (Py_UCS4) \ + (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \ + ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \ + (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \ + ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \ + ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \ + ) \ + )) + +/* Returns the length of the unicode string. The caller has to make sure that + the string has it's canonical representation set before calling + this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ +#define PyUnicode_GET_LENGTH(op) \ + (assert(PyUnicode_Check(op)), \ + assert(PyUnicode_IS_READY(op)), \ + ((PyASCIIObject *)(op))->length) + + +/* Fast check to determine whether an object is ready. Equivalent to + PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */ + +#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready) + +/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best + case. If the canonical representation is not yet set, it will still call + _PyUnicode_Ready(). + Returns 0 on success and -1 on errors. */ +#define PyUnicode_READY(op) \ + (assert(PyUnicode_Check(op)), \ + (PyUnicode_IS_READY(op) ? \ + 0 : _PyUnicode_Ready(_PyObject_CAST(op)))) + +/* Return a maximum character value which is suitable for creating another + string based on op. This is always an approximation but more efficient + than iterating over the string. */ +#define PyUnicode_MAX_CHAR_VALUE(op) \ + (assert(PyUnicode_IS_READY(op)), \ + (PyUnicode_IS_ASCII(op) ? \ + (0x7f) : \ + (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \ + (0xffU) : \ + (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \ + (0xffffU) : \ + (0x10ffffU))))) + +/* === Public API ========================================================= */ + +/* --- Plain Py_UNICODE --------------------------------------------------- */ + +/* With PEP 393, this is the recommended way to allocate a new unicode object. + This function will allocate the object and its buffer in a single memory + block. Objects created using this function are not resizable. */ +PyAPI_FUNC(PyObject*) PyUnicode_New( + Py_ssize_t size, /* Number of code points in the new string */ + Py_UCS4 maxchar /* maximum code point value in the string */ + ); + +/* Initializes the canonical string representation from the deprecated + wstr/Py_UNICODE representation. This function is used to convert Unicode + objects which were created using the old API to the new flexible format + introduced with PEP 393. + + Don't call this function directly, use the public PyUnicode_READY() macro + instead. */ +PyAPI_FUNC(int) _PyUnicode_Ready( + PyObject *unicode /* Unicode object */ + ); + +/* Get a copy of a Unicode string. */ +PyAPI_FUNC(PyObject*) _PyUnicode_Copy( + PyObject *unicode + ); + +/* Copy character from one unicode object into another, this function performs + character conversion when necessary and falls back to memcpy() if possible. + + Fail if to is too small (smaller than *how_many* or smaller than + len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > + kind(to), or if *to* has more than 1 reference. + + Return the number of written character, or return -1 and raise an exception + on error. + + Pseudo-code: + + how_many = min(how_many, len(from) - from_start) + to[to_start:to_start+how_many] = from[from_start:from_start+how_many] + return how_many + + Note: The function doesn't write a terminating null character. + */ +PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); + +/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so + may crash if parameters are invalid (e.g. if the output string + is too short). */ +PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); + +/* Fill a string with a character: write fill_char into + unicode[start:start+length]. + + Fail if fill_char is bigger than the string maximum character, or if the + string has more than 1 reference. + + Return the number of written character, or return -1 and raise an exception + on error. */ +PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t length, + Py_UCS4 fill_char + ); + +/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash + if parameters are invalid (e.g. if length is longer than the string). */ +PyAPI_FUNC(void) _PyUnicode_FastFill( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t length, + Py_UCS4 fill_char + ); + +/* Create a Unicode Object from the Py_UNICODE buffer u of the given + size. + + u may be NULL which causes the contents to be undefined. It is the + user's responsibility to fill in the needed data afterwards. Note + that modifying the Unicode object contents after construction is + only allowed if u was set to NULL. + + The buffer is copied into the new object. */ +PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( + const Py_UNICODE *u, /* Unicode buffer */ + Py_ssize_t size /* size of buffer */ + ) /* Py_DEPRECATED(3.3) */; + +/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. + Scan the string to find the maximum character. */ +PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( + int kind, + const void *buffer, + Py_ssize_t size); + +/* Create a new string from a buffer of ASCII characters. + WARNING: Don't check if the string contains any non-ASCII character. */ +PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( + const char *buffer, + Py_ssize_t size); + +/* Compute the maximum character of the substring unicode[start:end]. + Return 127 for an empty string. */ +PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t end); + +/* Return a read-only pointer to the Unicode object's internal + Py_UNICODE buffer. + If the wchar_t/Py_UNICODE representation is not yet available, this + function will calculate it. */ +PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( + PyObject *unicode /* Unicode object */ + ) /* Py_DEPRECATED(3.3) */; + +/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string + contains null characters. */ +PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( + PyObject *unicode /* Unicode object */ + ); + +/* Return a read-only pointer to the Unicode object's internal + Py_UNICODE buffer and save the length at size. + If the wchar_t/Py_UNICODE representation is not yet available, this + function will calculate it. */ + +PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( + PyObject *unicode, /* Unicode object */ + Py_ssize_t *size /* location where to save the length */ + ) /* Py_DEPRECATED(3.3) */; + +/* Get the maximum ordinal for a Unicode character. */ +PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3); + + +/* --- _PyUnicodeWriter API ----------------------------------------------- */ + +typedef struct { + PyObject *buffer; + void *data; + enum PyUnicode_Kind kind; + Py_UCS4 maxchar; + Py_ssize_t size; + Py_ssize_t pos; + + /* minimum number of allocated characters (default: 0) */ + Py_ssize_t min_length; + + /* minimum character (default: 127, ASCII) */ + Py_UCS4 min_char; + + /* If non-zero, overallocate the buffer (default: 0). */ + unsigned char overallocate; + + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly; +} _PyUnicodeWriter ; + +/* Initialize a Unicode writer. + * + * By default, the minimum buffer size is 0 character and overallocation is + * disabled. Set min_length, min_char and overallocate attributes to control + * the allocation of the buffer. */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer); + +/* Prepare the buffer to write 'length' characters + with the specified maximum character. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ + (((MAXCHAR) <= (WRITER)->maxchar \ + && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((LENGTH) == 0) \ + ? 0 \ + : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) + +/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +/* Prepare the buffer to have at least the kind KIND. + For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will + support characters in range U+000-U+FFFF. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ + (assert((KIND) != PyUnicode_WCHAR_KIND), \ + (KIND) <= (WRITER)->kind \ + ? 0 \ + : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) + +/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() + macro instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + enum PyUnicode_Kind kind); + +/* Append a Unicode character. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, + Py_UCS4 ch + ); + +/* Append a Unicode string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, + PyObject *str /* Unicode string */ + ); + +/* Append a substring of a Unicode string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, + PyObject *str, /* Unicode string */ + Py_ssize_t start, + Py_ssize_t end + ); + +/* Append an ASCII-encoded byte string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, + const char *str, /* ASCII-encoded byte string */ + Py_ssize_t len /* number of bytes, or -1 if unknown */ + ); + +/* Append a latin1-encoded byte string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, + const char *str, /* latin1-encoded byte string */ + Py_ssize_t len /* length in bytes */ + ); + +/* Get the value of the writer as a Unicode string. Clear the + buffer of the writer. Raise an exception and return NULL + on error. */ +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); + +/* Deallocate memory of a writer (clear its internal buffer). */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); + + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); + +PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); + +/* --- wchar_t support for platforms which support it --------------------- */ + +#ifdef HAVE_WCHAR_H +PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind); +#endif + +/* --- Manage the default encoding ---------------------------------------- */ + +/* Returns a pointer to the default encoding (UTF-8) of the + Unicode object unicode and the size of the encoded representation + in bytes stored in *size. + + In case of an error, no *size is set. + + This function caches the UTF-8 encoded string in the unicodeobject + and subsequent calls will return the same string. The memory is released + when the unicodeobject is deallocated. + + _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to + support the previous internal function with the same behaviour. + + *** This API is for interpreter INTERNAL USE ONLY and will likely + *** be removed or changed in the future. + + *** If you need to access the Unicode object as UTF-8 bytes string, + *** please use PyUnicode_AsUTF8String() instead. +*/ + +PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( + PyObject *unicode, + Py_ssize_t *size); + +#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize + +/* Returns a pointer to the default encoding (UTF-8) of the + Unicode object unicode. + + Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation + in the unicodeobject. + + _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to + support the previous internal function with the same behaviour. + + Use of this API is DEPRECATED since no size information can be + extracted from the returned data. + + *** This API is for interpreter INTERNAL USE ONLY and will likely + *** be removed or changed for Python 3.1. + + *** If you need to access the Unicode object as UTF-8 bytes string, + *** please use PyUnicode_AsUTF8String() instead. + +*/ + +PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); + +#define _PyUnicode_AsString PyUnicode_AsUTF8 + +/* --- Generic Codecs ----------------------------------------------------- */ + +/* Encodes a Py_UNICODE buffer of the given size and returns a + Python string object. */ +PyAPI_FUNC(PyObject*) PyUnicode_Encode( + const Py_UNICODE *s, /* Unicode char buffer */ + Py_ssize_t size, /* number of Py_UNICODE chars to encode */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); + +/* --- UTF-7 Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + int base64SetO, /* Encode RFC2152 Set O characters in base64 */ + int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( + PyObject *unicode, /* Unicode object */ + int base64SetO, /* Encode RFC2152 Set O characters in base64 */ + int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ + const char *errors /* error handling */ + ); + +/* --- UTF-8 Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( + PyObject *unicode, + const char *errors); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); + +/* --- UTF-32 Codecs ------------------------------------------------------ */ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors, /* error handling */ + int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( + PyObject *object, /* Unicode object */ + const char *errors, /* error handling */ + int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + ); + +/* --- UTF-16 Codecs ------------------------------------------------------ */ + +/* Returns a Python string object holding the UTF-16 encoded value of + the Unicode data. + + If byteorder is not 0, output is written according to the following + byte order: + + byteorder == -1: little endian + byteorder == 0: native byte order (writes a BOM mark) + byteorder == 1: big endian + + If byteorder is 0, the output string will always start with the + Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is + prepended. + + Note that Py_UNICODE data is being interpreted as UTF-16 reduced to + UCS-2. This trick makes it possible to add full UTF-16 capabilities + at a later point without compromising the APIs. + +*/ +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors, /* error handling */ + int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( + PyObject* unicode, /* Unicode object */ + const char *errors, /* error handling */ + int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + ); + +/* --- Unicode-Escape Codecs ---------------------------------------------- */ + +/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape + chars. */ +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + const char **first_invalid_escape /* on return, points to first + invalid escaped char in + string. */ +); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to encode */ + ) Py_DEPRECATED(3.3); + +/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to encode */ + ) Py_DEPRECATED(3.3); + +/* --- Unicode Internal Codec --------------------------------------------- */ + +/* Only for internal use in _codecsmodule.c */ +PyObject *_PyUnicode_DecodeUnicodeInternal( + const char *string, + Py_ssize_t length, + const char *errors + ); + +/* --- Latin-1 Codecs ----------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( + PyObject* unicode, + const char* errors); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); + +/* --- ASCII Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( + PyObject* unicode, + const char* errors); + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); + +/* --- Character Map Codecs ----------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + PyObject *mapping, /* encoding mapping */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( + PyObject *unicode, /* Unicode object */ + PyObject *mapping, /* encoding mapping */ + const char *errors /* error handling */ + ); + +/* Translate a Py_UNICODE buffer of the given length by applying a + character mapping table to it and return the resulting Unicode + object. + + The mapping table must map Unicode ordinal integers to Unicode strings, + Unicode ordinal integers or None (causing deletion of the character). + + Mapping tables may be dictionaries or sequences. Unmapped character + ordinals (ones which cause a LookupError) are left untouched and + are copied as-is. + +*/ +PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + PyObject *table, /* Translate table */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); + +/* --- MBCS codecs for Windows -------------------------------------------- */ + +#ifdef MS_WINDOWS +PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( + const Py_UNICODE *data, /* Unicode char buffer */ + Py_ssize_t length, /* number of Py_UNICODE chars to encode */ + const char *errors /* error handling */ + ) Py_DEPRECATED(3.3); +#endif + +/* --- Decimal Encoder ---------------------------------------------------- */ + +/* Takes a Unicode string holding a decimal value and writes it into + an output buffer using standard ASCII digit codes. + + The output buffer has to provide at least length+1 bytes of storage + area. The output string is 0-terminated. + + The encoder converts whitespace to ' ', decimal characters to their + corresponding ASCII digit and all other Latin-1 characters except + \0 as-is. Characters outside this range (Unicode ordinals 1-256) + are treated as errors. This includes embedded NULL bytes. + + Error handling is defined by the errors argument: + + NULL or "strict": raise a ValueError + "ignore": ignore the wrong characters (these are not copied to the + output buffer) + "replace": replaces illegal characters with '?' + + Returns 0 on success, -1 on failure. + +*/ + +PyAPI_FUNC(int) PyUnicode_EncodeDecimal( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ + char *output, /* Output buffer; must have size >= length */ + const char *errors /* error handling */ + ) /* Py_DEPRECATED(3.3) */; + +/* Transforms code points that have decimal digit property to the + corresponding ASCII digit code points. + + Returns a new Unicode string on success, NULL on failure. +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( + Py_UNICODE *s, /* Unicode buffer */ + Py_ssize_t length /* Number of Py_UNICODE chars to transform */ + ) /* Py_DEPRECATED(3.3) */; + +/* Coverts a Unicode object holding a decimal value to an ASCII string + for using in int, float and complex parsers. + Transforms code points that have decimal digit property to the + corresponding ASCII digit code points. Transforms spaces to ASCII. + Transforms code points starting from the first non-ASCII code point that + is neither a decimal digit nor a space to the end into '?'. */ + +PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( + PyObject *unicode /* Unicode object */ + ); + +/* --- Methods & Slots ---------------------------------------------------- */ + +PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( + PyObject *separator, + PyObject *const *items, + Py_ssize_t seqlen + ); + +/* Test whether a unicode is equal to ASCII identifier. Return 1 if true, + 0 otherwise. The right argument must be ASCII identifier. + Any error occurs inside will be cleared before return. */ +PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId( + PyObject *left, /* Left string */ + _Py_Identifier *right /* Right identifier */ + ); + +/* Test whether a unicode is equal to ASCII string. Return 1 if true, + 0 otherwise. The right argument must be ASCII-encoded string. + Any error occurs inside will be cleared before return. */ +PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( + PyObject *left, + const char *right /* ASCII-encoded string */ + ); + +/* Externally visible for str.strip(unicode) */ +PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( + PyObject *self, + int striptype, + PyObject *sepobj + ); + +/* Using explicit passed-in values, insert the thousands grouping + into the string pointed to by buffer. For the argument descriptions, + see Objects/stringlib/localeutil.h */ +PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( + _PyUnicodeWriter *writer, + Py_ssize_t n_buffer, + PyObject *digits, + Py_ssize_t d_pos, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + PyObject *thousands_sep, + Py_UCS4 *maxchar); + +/* === Characters Type APIs =============================================== */ + +/* Helper array used by Py_UNICODE_ISSPACE(). */ + +PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; + +/* These should not be used directly. Use the Py_UNICODE_IS* and + Py_UNICODE_TO* macros instead. + + These APIs are implemented in Objects/unicodectype.c. + +*/ + +PyAPI_FUNC(int) _PyUnicode_IsLowercase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsUppercase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsTitlecase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsXidStart( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsXidContinue( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsWhitespace( + const Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsLinebreak( + const Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( + Py_UCS4 ch /* Unicode character */ + ) /* Py_DEPRECATED(3.3) */; + +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( + Py_UCS4 ch /* Unicode character */ + ) /* Py_DEPRECATED(3.3) */; + +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( + Py_UCS4 ch /* Unicode character */ + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(int) _PyUnicode_ToLowerFull( + Py_UCS4 ch, /* Unicode character */ + Py_UCS4 *res + ); + +PyAPI_FUNC(int) _PyUnicode_ToTitleFull( + Py_UCS4 ch, /* Unicode character */ + Py_UCS4 *res + ); + +PyAPI_FUNC(int) _PyUnicode_ToUpperFull( + Py_UCS4 ch, /* Unicode character */ + Py_UCS4 *res + ); + +PyAPI_FUNC(int) _PyUnicode_ToFoldedFull( + Py_UCS4 ch, /* Unicode character */ + Py_UCS4 *res + ); + +PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsCased( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_ToDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(double) _PyUnicode_ToNumeric( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsNumeric( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsPrintable( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsAlpha( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(size_t) Py_UNICODE_strlen( + const Py_UNICODE *u + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( + Py_UNICODE *s1, + const Py_UNICODE *s2) Py_DEPRECATED(3.3); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( + Py_UNICODE *s1, const Py_UNICODE *s2) Py_DEPRECATED(3.3); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( + Py_UNICODE *s1, + const Py_UNICODE *s2, + size_t n) Py_DEPRECATED(3.3); + +PyAPI_FUNC(int) Py_UNICODE_strcmp( + const Py_UNICODE *s1, + const Py_UNICODE *s2 + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(int) Py_UNICODE_strncmp( + const Py_UNICODE *s1, + const Py_UNICODE *s2, + size_t n + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( + const Py_UNICODE *s, + Py_UNICODE c + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( + const Py_UNICODE *s, + Py_UNICODE c + ) Py_DEPRECATED(3.3); + +PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); + +/* Create a copy of a unicode string ending with a nul character. Return NULL + and raise a MemoryError exception on memory allocation failure, otherwise + return a new allocated buffer (use PyMem_Free() to free the buffer). */ + +PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( + PyObject *unicode + ) Py_DEPRECATED(3.3); + +/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ +PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); +/* Clear all static strings. */ +PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); + +/* Fast equality check when the inputs are known to be exact unicode types + and where the hash values are equal (i.e. a very probable match) */ +PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); + +#ifdef __cplusplus +} +#endif diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index e7192852f045..503aeb5d19a6 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -83,16 +83,6 @@ Copyright (c) Corporation for National Research Initiatives. /* #define HAVE_WCHAR_H */ /* #define HAVE_USABLE_WCHAR_T */ -/* Py_UNICODE was the native Unicode storage format (code unit) used by - Python and represents a single Unicode element in the Unicode type. - With PEP 393, Py_UNICODE is deprecated and replaced with a - typedef to wchar_t. */ - -#ifndef Py_LIMITED_API -#define PY_UNICODE_TYPE wchar_t -typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */; -#endif - /* If the compiler provides a wchar_t type we try to support it through the interface functions PyUnicode_FromWideChar(), PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ @@ -113,248 +103,10 @@ typedef uint32_t Py_UCS4; typedef uint16_t Py_UCS2; typedef uint8_t Py_UCS1; -/* --- Internal Unicode Operations ---------------------------------------- */ - -/* Since splitting on whitespace is an important use case, and - whitespace in most situations is solely ASCII whitespace, we - optimize for the common case by using a quick look-up table - _Py_ascii_whitespace (see below) with an inlined check. - - */ -#ifndef Py_LIMITED_API -#define Py_UNICODE_ISSPACE(ch) \ - ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch)) - -#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) -#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) -#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) -#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) - -#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) -#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) -#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) - -#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) -#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) -#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) -#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) - -#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) -#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) -#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) - -#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) - -#define Py_UNICODE_ISALNUM(ch) \ - (Py_UNICODE_ISALPHA(ch) || \ - Py_UNICODE_ISDECIMAL(ch) || \ - Py_UNICODE_ISDIGIT(ch) || \ - Py_UNICODE_ISNUMERIC(ch)) - -#define Py_UNICODE_COPY(target, source, length) \ - memcpy((target), (source), (length)*sizeof(Py_UNICODE)) - -#define Py_UNICODE_FILL(target, value, length) \ - do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\ - for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\ - } while (0) - -/* macros to work with surrogates */ -#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF) -#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF) -#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF) -/* Join two surrogate characters and return a single Py_UCS4 value. */ -#define Py_UNICODE_JOIN_SURROGATES(high, low) \ - (((((Py_UCS4)(high) & 0x03FF) << 10) | \ - ((Py_UCS4)(low) & 0x03FF)) + 0x10000) -/* high surrogate = top 10 bits added to D800 */ -#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10)) -/* low surrogate = bottom 10 bits added to DC00 */ -#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF)) - -/* Check if substring matches at given offset. The offset must be - valid, and the substring must not be empty. */ - -#define Py_UNICODE_MATCH(string, offset, substring) \ - ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \ - ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \ - !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE))) - -#endif /* Py_LIMITED_API */ - #ifdef __cplusplus extern "C" { #endif -/* --- Unicode Type ------------------------------------------------------- */ - -#ifndef Py_LIMITED_API - -/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject - structure. state.ascii and state.compact are set, and the data - immediately follow the structure. utf8_length and wstr_length can be found - in the length field; the utf8 pointer is equal to the data pointer. */ -typedef struct { - /* There are 4 forms of Unicode strings: - - - compact ascii: - - * structure = PyASCIIObject - * test: PyUnicode_IS_COMPACT_ASCII(op) - * kind = PyUnicode_1BYTE_KIND - * compact = 1 - * ascii = 1 - * ready = 1 - * (length is the length of the utf8 and wstr strings) - * (data starts just after the structure) - * (since ASCII is decoded from UTF-8, the utf8 string are the data) - - - compact: - - * structure = PyCompactUnicodeObject - * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) - * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or - PyUnicode_4BYTE_KIND - * compact = 1 - * ready = 1 - * ascii = 0 - * utf8 is not shared with data - * utf8_length = 0 if utf8 is NULL - * wstr is shared with data and wstr_length=length - if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 - or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4 - * wstr_length = 0 if wstr is NULL - * (data starts just after the structure) - - - legacy string, not ready: - - * structure = PyUnicodeObject - * test: kind == PyUnicode_WCHAR_KIND - * length = 0 (use wstr_length) - * hash = -1 - * kind = PyUnicode_WCHAR_KIND - * compact = 0 - * ascii = 0 - * ready = 0 - * interned = SSTATE_NOT_INTERNED - * wstr is not NULL - * data.any is NULL - * utf8 is NULL - * utf8_length = 0 - - - legacy string, ready: - - * structure = PyUnicodeObject structure - * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND - * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or - PyUnicode_4BYTE_KIND - * compact = 0 - * ready = 1 - * data.any is not NULL - * utf8 is shared and utf8_length = length with data.any if ascii = 1 - * utf8_length = 0 if utf8 is NULL - * wstr is shared with data.any and wstr_length = length - if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2 - or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4 - * wstr_length = 0 if wstr is NULL - - Compact strings use only one memory block (structure + characters), - whereas legacy strings use one block for the structure and one block - for characters. - - Legacy strings are created by PyUnicode_FromUnicode() and - PyUnicode_FromStringAndSize(NULL, size) functions. They become ready - when PyUnicode_READY() is called. - - See also _PyUnicode_CheckConsistency(). - */ - PyObject_HEAD - Py_ssize_t length; /* Number of code points in the string */ - Py_hash_t hash; /* Hash value; -1 if not set */ - struct { - /* - SSTATE_NOT_INTERNED (0) - SSTATE_INTERNED_MORTAL (1) - SSTATE_INTERNED_IMMORTAL (2) - - If interned != SSTATE_NOT_INTERNED, the two references from the - dictionary to this object are *not* counted in ob_refcnt. - */ - unsigned int interned:2; - /* Character size: - - - PyUnicode_WCHAR_KIND (0): - - * character type = wchar_t (16 or 32 bits, depending on the - platform) - - - PyUnicode_1BYTE_KIND (1): - - * character type = Py_UCS1 (8 bits, unsigned) - * all characters are in the range U+0000-U+00FF (latin1) - * if ascii is set, all characters are in the range U+0000-U+007F - (ASCII), otherwise at least one character is in the range - U+0080-U+00FF - - - PyUnicode_2BYTE_KIND (2): - - * character type = Py_UCS2 (16 bits, unsigned) - * all characters are in the range U+0000-U+FFFF (BMP) - * at least one character is in the range U+0100-U+FFFF - - - PyUnicode_4BYTE_KIND (4): - - * character type = Py_UCS4 (32 bits, unsigned) - * all characters are in the range U+0000-U+10FFFF - * at least one character is in the range U+10000-U+10FFFF - */ - unsigned int kind:3; - /* Compact is with respect to the allocation scheme. Compact unicode - objects only require one memory block while non-compact objects use - one block for the PyUnicodeObject struct and another for its data - buffer. */ - unsigned int compact:1; - /* The string only contains characters in the range U+0000-U+007F (ASCII) - and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is - set, use the PyASCIIObject structure. */ - unsigned int ascii:1; - /* The ready flag indicates whether the object layout is initialized - completely. This means that this is either a compact object, or - the data pointer is filled out. The bit is redundant, and helps - to minimize the test in PyUnicode_IS_READY(). */ - unsigned int ready:1; - /* Padding to ensure that PyUnicode_DATA() is always aligned to - 4 bytes (see issue #19537 on m68k). */ - unsigned int :24; - } state; - wchar_t *wstr; /* wchar_t representation (null-terminated) */ -} PyASCIIObject; - -/* Non-ASCII strings allocated through PyUnicode_New use the - PyCompactUnicodeObject structure. state.compact is set, and the data - immediately follow the structure. */ -typedef struct { - PyASCIIObject _base; - Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the - * terminating \0. */ - char *utf8; /* UTF-8 representation (null-terminated) */ - Py_ssize_t wstr_length; /* Number of code points in wstr, possible - * surrogates count as two code points. */ -} PyCompactUnicodeObject; - -/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the - PyUnicodeObject structure. The actual string data is initially in the wstr - block, and copied into the data block using _PyUnicode_Ready. */ -typedef struct { - PyCompactUnicodeObject _base; - union { - void *any; - Py_UCS1 *latin1; - Py_UCS2 *ucs2; - Py_UCS4 *ucs4; - } data; /* Canonical, smallest-form Unicode buffer */ -} PyUnicodeObject; -#endif PyAPI_DATA(PyTypeObject) PyUnicode_Type; PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; @@ -363,209 +115,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type) -/* Fast access macros */ -#ifndef Py_LIMITED_API - -#define PyUnicode_WSTR_LENGTH(op) \ - (PyUnicode_IS_COMPACT_ASCII(op) ? \ - ((PyASCIIObject*)op)->length : \ - ((PyCompactUnicodeObject*)op)->wstr_length) - -/* Returns the deprecated Py_UNICODE representation's size in code units - (this includes surrogate pairs as 2 units). - If the Py_UNICODE representation is not available, it will be computed - on request. Use PyUnicode_GET_LENGTH() for the length in code points. */ - -#define PyUnicode_GET_SIZE(op) \ - (assert(PyUnicode_Check(op)), \ - (((PyASCIIObject *)(op))->wstr) ? \ - PyUnicode_WSTR_LENGTH(op) : \ - ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\ - assert(((PyASCIIObject *)(op))->wstr), \ - PyUnicode_WSTR_LENGTH(op))) - /* Py_DEPRECATED(3.3) */ - -#define PyUnicode_GET_DATA_SIZE(op) \ - (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE) - /* Py_DEPRECATED(3.3) */ - -/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE - representation on demand. Using this macro is very inefficient now, - try to port your code to use the new PyUnicode_*BYTE_DATA() macros or - use PyUnicode_WRITE() and PyUnicode_READ(). */ - -#define PyUnicode_AS_UNICODE(op) \ - (assert(PyUnicode_Check(op)), \ - (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \ - PyUnicode_AsUnicode(_PyObject_CAST(op))) - /* Py_DEPRECATED(3.3) */ - -#define PyUnicode_AS_DATA(op) \ - ((const char *)(PyUnicode_AS_UNICODE(op))) - /* Py_DEPRECATED(3.3) */ - - -/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ - -/* Values for PyASCIIObject.state: */ - -/* Interning state. */ -#define SSTATE_NOT_INTERNED 0 -#define SSTATE_INTERNED_MORTAL 1 -#define SSTATE_INTERNED_IMMORTAL 2 - -/* Return true if the string contains only ASCII characters, or 0 if not. The - string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be - ready. */ -#define PyUnicode_IS_ASCII(op) \ - (assert(PyUnicode_Check(op)), \ - assert(PyUnicode_IS_READY(op)), \ - ((PyASCIIObject*)op)->state.ascii) - -/* Return true if the string is compact or 0 if not. - No type checks or Ready calls are performed. */ -#define PyUnicode_IS_COMPACT(op) \ - (((PyASCIIObject*)(op))->state.compact) - -/* Return true if the string is a compact ASCII string (use PyASCIIObject - structure), or 0 if not. No type checks or Ready calls are performed. */ -#define PyUnicode_IS_COMPACT_ASCII(op) \ - (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op)) - -enum PyUnicode_Kind { -/* String contains only wstr byte characters. This is only possible - when the string was created with a legacy API and _PyUnicode_Ready() - has not been called yet. */ - PyUnicode_WCHAR_KIND = 0, -/* Return values of the PyUnicode_KIND() macro: */ - PyUnicode_1BYTE_KIND = 1, - PyUnicode_2BYTE_KIND = 2, - PyUnicode_4BYTE_KIND = 4 -}; - -/* Return pointers to the canonical representation cast to unsigned char, - Py_UCS2, or Py_UCS4 for direct character access. - No checks are performed, use PyUnicode_KIND() before to ensure - these will work correctly. */ - -#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op)) -#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op)) -#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op)) - -/* Return one of the PyUnicode_*_KIND values defined above. */ -#define PyUnicode_KIND(op) \ - (assert(PyUnicode_Check(op)), \ - assert(PyUnicode_IS_READY(op)), \ - ((PyASCIIObject *)(op))->state.kind) - -/* Return a void pointer to the raw unicode buffer. */ -#define _PyUnicode_COMPACT_DATA(op) \ - (PyUnicode_IS_ASCII(op) ? \ - ((void*)((PyASCIIObject*)(op) + 1)) : \ - ((void*)((PyCompactUnicodeObject*)(op) + 1))) - -#define _PyUnicode_NONCOMPACT_DATA(op) \ - (assert(((PyUnicodeObject*)(op))->data.any), \ - ((((PyUnicodeObject *)(op))->data.any))) - -#define PyUnicode_DATA(op) \ - (assert(PyUnicode_Check(op)), \ - PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \ - _PyUnicode_NONCOMPACT_DATA(op)) - -/* In the access macros below, "kind" may be evaluated more than once. - All other macro parameters are evaluated exactly once, so it is safe - to put side effects into them (such as increasing the index). */ - -/* Write into the canonical representation, this macro does not do any sanity - checks and is intended for usage in loops. The caller should cache the - kind and data pointers obtained from other macro calls. - index is the index in the string (starts at 0) and value is the new - code point value which should be written to that location. */ -#define PyUnicode_WRITE(kind, data, index, value) \ - do { \ - switch ((kind)) { \ - case PyUnicode_1BYTE_KIND: { \ - ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \ - break; \ - } \ - case PyUnicode_2BYTE_KIND: { \ - ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \ - break; \ - } \ - default: { \ - assert((kind) == PyUnicode_4BYTE_KIND); \ - ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \ - } \ - } \ - } while (0) - -/* Read a code point from the string's canonical representation. No checks - or ready calls are performed. */ -#define PyUnicode_READ(kind, data, index) \ - ((Py_UCS4) \ - ((kind) == PyUnicode_1BYTE_KIND ? \ - ((const Py_UCS1 *)(data))[(index)] : \ - ((kind) == PyUnicode_2BYTE_KIND ? \ - ((const Py_UCS2 *)(data))[(index)] : \ - ((const Py_UCS4 *)(data))[(index)] \ - ) \ - )) - -/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it - calls PyUnicode_KIND() and might call it twice. For single reads, use - PyUnicode_READ_CHAR, for multiple consecutive reads callers should - cache kind and use PyUnicode_READ instead. */ -#define PyUnicode_READ_CHAR(unicode, index) \ - (assert(PyUnicode_Check(unicode)), \ - assert(PyUnicode_IS_READY(unicode)), \ - (Py_UCS4) \ - (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \ - ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \ - (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \ - ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \ - ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \ - ) \ - )) - -/* Returns the length of the unicode string. The caller has to make sure that - the string has it's canonical representation set before calling - this macro. Call PyUnicode_(FAST_)Ready to ensure that. */ -#define PyUnicode_GET_LENGTH(op) \ - (assert(PyUnicode_Check(op)), \ - assert(PyUnicode_IS_READY(op)), \ - ((PyASCIIObject *)(op))->length) - - -/* Fast check to determine whether an object is ready. Equivalent to - PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */ - -#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready) - -/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best - case. If the canonical representation is not yet set, it will still call - _PyUnicode_Ready(). - Returns 0 on success and -1 on errors. */ -#define PyUnicode_READY(op) \ - (assert(PyUnicode_Check(op)), \ - (PyUnicode_IS_READY(op) ? \ - 0 : _PyUnicode_Ready(_PyObject_CAST(op)))) - -/* Return a maximum character value which is suitable for creating another - string based on op. This is always an approximation but more efficient - than iterating over the string. */ -#define PyUnicode_MAX_CHAR_VALUE(op) \ - (assert(PyUnicode_IS_READY(op)), \ - (PyUnicode_IS_ASCII(op) ? \ - (0x7f) : \ - (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \ - (0xffU) : \ - (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \ - (0xffffU) : \ - (0x10ffffU))))) - -#endif - /* --- Constants ---------------------------------------------------------- */ /* This Unicode character will be used as replacement character during @@ -577,120 +126,6 @@ enum PyUnicode_Kind { /* === Public API ========================================================= */ -/* --- Plain Py_UNICODE --------------------------------------------------- */ - -/* With PEP 393, this is the recommended way to allocate a new unicode object. - This function will allocate the object and its buffer in a single memory - block. Objects created using this function are not resizable. */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_New( - Py_ssize_t size, /* Number of code points in the new string */ - Py_UCS4 maxchar /* maximum code point value in the string */ - ); -#endif - -/* Initializes the canonical string representation from the deprecated - wstr/Py_UNICODE representation. This function is used to convert Unicode - objects which were created using the old API to the new flexible format - introduced with PEP 393. - - Don't call this function directly, use the public PyUnicode_READY() macro - instead. */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(int) _PyUnicode_Ready( - PyObject *unicode /* Unicode object */ - ); -#endif - -/* Get a copy of a Unicode string. */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) _PyUnicode_Copy( - PyObject *unicode - ); -#endif - -/* Copy character from one unicode object into another, this function performs - character conversion when necessary and falls back to memcpy() if possible. - - Fail if to is too small (smaller than *how_many* or smaller than - len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > - kind(to), or if *to* has more than 1 reference. - - Return the number of written character, or return -1 and raise an exception - on error. - - Pseudo-code: - - how_many = min(how_many, len(from) - from_start) - to[to_start:to_start+how_many] = from[from_start:from_start+how_many] - return how_many - - Note: The function doesn't write a terminating null character. - */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( - PyObject *to, - Py_ssize_t to_start, - PyObject *from, - Py_ssize_t from_start, - Py_ssize_t how_many - ); - -/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so - may crash if parameters are invalid (e.g. if the output string - is too short). */ -PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters( - PyObject *to, - Py_ssize_t to_start, - PyObject *from, - Py_ssize_t from_start, - Py_ssize_t how_many - ); -#endif - -#ifndef Py_LIMITED_API -/* Fill a string with a character: write fill_char into - unicode[start:start+length]. - - Fail if fill_char is bigger than the string maximum character, or if the - string has more than 1 reference. - - Return the number of written character, or return -1 and raise an exception - on error. */ -PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( - PyObject *unicode, - Py_ssize_t start, - Py_ssize_t length, - Py_UCS4 fill_char - ); - -/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash - if parameters are invalid (e.g. if length is longer than the string). */ -PyAPI_FUNC(void) _PyUnicode_FastFill( - PyObject *unicode, - Py_ssize_t start, - Py_ssize_t length, - Py_UCS4 fill_char - ); -#endif - -/* Create a Unicode Object from the Py_UNICODE buffer u of the given - size. - - u may be NULL which causes the contents to be undefined. It is the - user's responsibility to fill in the needed data afterwards. Note - that modifying the Unicode object contents after construction is - only allowed if u was set to NULL. - - The buffer is copied into the new object. */ - -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode( - const Py_UNICODE *u, /* Unicode buffer */ - Py_ssize_t size /* size of buffer */ - ) /* Py_DEPRECATED(3.3) */; -#endif - /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( const char *u, /* UTF-8 encoded string */ @@ -703,21 +138,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString( const char *u /* UTF-8 encoded string */ ); -#ifndef Py_LIMITED_API -/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. - Scan the string to find the maximum character. */ -PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( - int kind, - const void *buffer, - Py_ssize_t size); - -/* Create a new string from a buffer of ASCII characters. - WARNING: Don't check if the string contains any non-ASCII character. */ -PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII( - const char *buffer, - Py_ssize_t size); -#endif - #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 PyAPI_FUNC(PyObject*) PyUnicode_Substring( PyObject *str, @@ -725,15 +145,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring( Py_ssize_t end); #endif -#ifndef Py_LIMITED_API -/* Compute the maximum character of the substring unicode[start:end]. - Return 127 for an empty string. */ -PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar ( - PyObject *unicode, - Py_ssize_t start, - Py_ssize_t end); -#endif - #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Copy the string into a UCS4 buffer including the null character if copy_null is set. Return NULL and raise an exception on error. Raise a SystemError if @@ -752,33 +163,6 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); #endif -#ifndef Py_LIMITED_API -/* Return a read-only pointer to the Unicode object's internal - Py_UNICODE buffer. - If the wchar_t/Py_UNICODE representation is not yet available, this - function will calculate it. */ - -PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode( - PyObject *unicode /* Unicode object */ - ) /* Py_DEPRECATED(3.3) */; - -/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string - contains null characters. */ -PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode( - PyObject *unicode /* Unicode object */ - ); - -/* Return a read-only pointer to the Unicode object's internal - Py_UNICODE buffer and save the length at size. - If the wchar_t/Py_UNICODE representation is not yet available, this - function will calculate it. */ - -PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize( - PyObject *unicode, /* Unicode object */ - Py_ssize_t *size /* location where to save the length */ - ) /* Py_DEPRECATED(3.3) */; -#endif - #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 /* Get the length of the Unicode object. */ @@ -814,11 +198,6 @@ PyAPI_FUNC(int) PyUnicode_WriteChar( ); #endif -#ifndef Py_LIMITED_API -/* Get the maximum ordinal for a Unicode character. */ -PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3); -#endif - /* Resize a Unicode object. The length is the number of characters, except if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length is the number of Py_UNICODE characters. @@ -881,141 +260,11 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( ... ); -#ifndef Py_LIMITED_API -typedef struct { - PyObject *buffer; - void *data; - enum PyUnicode_Kind kind; - Py_UCS4 maxchar; - Py_ssize_t size; - Py_ssize_t pos; - - /* minimum number of allocated characters (default: 0) */ - Py_ssize_t min_length; - - /* minimum character (default: 127, ASCII) */ - Py_UCS4 min_char; - - /* If non-zero, overallocate the buffer (default: 0). */ - unsigned char overallocate; - - /* If readonly is 1, buffer is a shared string (cannot be modified) - and size is set to 0. */ - unsigned char readonly; -} _PyUnicodeWriter ; - -/* Initialize a Unicode writer. - * - * By default, the minimum buffer size is 0 character and overallocation is - * disabled. Set min_length, min_char and overallocate attributes to control - * the allocation of the buffer. */ -PyAPI_FUNC(void) -_PyUnicodeWriter_Init(_PyUnicodeWriter *writer); - -/* Prepare the buffer to write 'length' characters - with the specified maximum character. - - Return 0 on success, raise an exception and return -1 on error. */ -#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ - (((MAXCHAR) <= (WRITER)->maxchar \ - && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ - ? 0 \ - : (((LENGTH) == 0) \ - ? 0 \ - : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) - -/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro - instead. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, - Py_ssize_t length, Py_UCS4 maxchar); - -/* Prepare the buffer to have at least the kind KIND. - For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will - support characters in range U+000-U+FFFF. - - Return 0 on success, raise an exception and return -1 on error. */ -#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ - (assert((KIND) != PyUnicode_WCHAR_KIND), \ - (KIND) <= (WRITER)->kind \ - ? 0 \ - : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) - -/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() - macro instead. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, - enum PyUnicode_Kind kind); - -/* Append a Unicode character. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, - Py_UCS4 ch - ); - -/* Append a Unicode string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, - PyObject *str /* Unicode string */ - ); - -/* Append a substring of a Unicode string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, - PyObject *str, /* Unicode string */ - Py_ssize_t start, - Py_ssize_t end - ); - -/* Append an ASCII-encoded byte string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, - const char *str, /* ASCII-encoded byte string */ - Py_ssize_t len /* number of bytes, or -1 if unknown */ - ); - -/* Append a latin1-encoded byte string. - Return 0 on success, raise an exception and return -1 on error. */ -PyAPI_FUNC(int) -_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, - const char *str, /* latin1-encoded byte string */ - Py_ssize_t len /* length in bytes */ - ); - -/* Get the value of the writer as a Unicode string. Clear the - buffer of the writer. Raise an exception and return NULL - on error. */ -PyAPI_FUNC(PyObject *) -_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); - -/* Deallocate memory of a writer (clear its internal buffer). */ -PyAPI_FUNC(void) -_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); -#endif - -#ifndef Py_LIMITED_API -/* Format the object based on the format_spec, as defined in PEP 3101 - (Advanced String Formatting). */ -PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter( - _PyUnicodeWriter *writer, - PyObject *obj, - PyObject *format_spec, - Py_ssize_t start, - Py_ssize_t end); -#endif - PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **); PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( const char *u /* UTF-8 encoded string */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void); -#endif /* Use only if you know it's a string */ #define PyUnicode_CHECK_INTERNED(op) \ @@ -1066,10 +315,6 @@ PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( Py_ssize_t *size /* number of characters of the result */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind); -#endif - #endif /* --- Unicode ordinals --------------------------------------------------- */ @@ -1114,60 +359,7 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void); /* --- Manage the default encoding ---------------------------------------- */ -/* Returns a pointer to the default encoding (UTF-8) of the - Unicode object unicode and the size of the encoded representation - in bytes stored in *size. - - In case of an error, no *size is set. - - This function caches the UTF-8 encoded string in the unicodeobject - and subsequent calls will return the same string. The memory is released - when the unicodeobject is deallocated. - - _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to - support the previous internal function with the same behaviour. - - *** This API is for interpreter INTERNAL USE ONLY and will likely - *** be removed or changed in the future. - - *** If you need to access the Unicode object as UTF-8 bytes string, - *** please use PyUnicode_AsUTF8String() instead. -*/ - -#ifndef Py_LIMITED_API -PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( - PyObject *unicode, - Py_ssize_t *size); -#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize -#endif - -/* Returns a pointer to the default encoding (UTF-8) of the - Unicode object unicode. - - Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation - in the unicodeobject. - - _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to - support the previous internal function with the same behaviour. - - Use of this API is DEPRECATED since no size information can be - extracted from the returned data. - - *** This API is for interpreter INTERNAL USE ONLY and will likely - *** be removed or changed for Python 3.1. - - *** If you need to access the Unicode object as UTF-8 bytes string, - *** please use PyUnicode_AsUTF8String() instead. - -*/ - -#ifndef Py_LIMITED_API -PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); -#define _PyUnicode_AsString PyUnicode_AsUTF8 -#endif - /* Returns "utf-8". */ - PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); /* --- Generic Codecs ----------------------------------------------------- */ @@ -1208,18 +400,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( const char *errors /* error handling */ ) Py_DEPRECATED(3.6); -/* Encodes a Py_UNICODE buffer of the given size and returns a - Python string object. */ - -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_Encode( - const Py_UNICODE *s, /* Unicode char buffer */ - Py_ssize_t size, /* number of Py_UNICODE chars to encode */ - const char *encoding, /* encoding */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -#endif - /* Encodes a Unicode object and returns the result as Python object. @@ -1277,22 +457,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( Py_ssize_t *consumed /* bytes consumed */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - int base64SetO, /* Encode RFC2152 Set O characters in base64 */ - int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7( - PyObject *unicode, /* Unicode object */ - int base64SetO, /* Encode RFC2152 Set O characters in base64 */ - int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ - const char *errors /* error handling */ - ); -#endif - /* --- UTF-8 Codecs ------------------------------------------------------- */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( @@ -1312,18 +476,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( PyObject *unicode /* Unicode object */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( - PyObject *unicode, - const char *errors); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -#endif - /* --- UTF-32 Codecs ------------------------------------------------------ */ /* Decodes length bytes from a UTF-32 encoded buffer string and returns @@ -1391,20 +543,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ) Py_DEPRECATED(3.3); -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( - PyObject *object, /* Unicode object */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ); -#endif - /* --- UTF-16 Codecs ------------------------------------------------------ */ /* Decodes length bytes from a UTF-16 encoded buffer string and returns @@ -1456,40 +594,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( PyObject *unicode /* Unicode object */ ); -/* Returns a Python string object holding the UTF-16 encoded value of - the Unicode data. - - If byteorder is not 0, output is written according to the following - byte order: - - byteorder == -1: little endian - byteorder == 0: native byte order (writes a BOM mark) - byteorder == 1: big endian - - If byteorder is 0, the output string will always start with the - Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is - prepended. - - Note that Py_UNICODE data is being interpreted as UTF-16 reduced to - UCS-2. This trick makes it possible to add full UTF-16 capabilities - at a later point without compromising the APIs. - -*/ - -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ) Py_DEPRECATED(3.3); -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( - PyObject* unicode, /* Unicode object */ - const char *errors, /* error handling */ - int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */ - ); -#endif - /* --- Unicode-Escape Codecs ---------------------------------------------- */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( @@ -1498,30 +602,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( const char *errors /* error handling */ ); -#ifndef Py_LIMITED_API -/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape - chars. */ -PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape( - const char *string, /* Unicode-Escape encoded string */ - Py_ssize_t length, /* size of string */ - const char *errors, /* error handling */ - const char **first_invalid_escape /* on return, points to first - invalid escaped char in - string. */ -); -#endif - PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( PyObject *unicode /* Unicode object */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length /* Number of Py_UNICODE chars to encode */ - ) Py_DEPRECATED(3.3); -#endif - /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( @@ -1534,30 +618,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( PyObject *unicode /* Unicode object */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length /* Number of Py_UNICODE chars to encode */ - ) Py_DEPRECATED(3.3); -#endif - -/* --- Unicode Internal Codec --------------------------------------------- - - Only for internal use in _codecsmodule.c */ - -#ifndef Py_LIMITED_API -PyObject *_PyUnicode_DecodeUnicodeInternal( - const char *string, - Py_ssize_t length, - const char *errors - ); -#endif - /* --- Latin-1 Codecs ----------------------------------------------------- - Note: Latin-1 corresponds to the first 256 Unicode ordinals. - -*/ + Note: Latin-1 corresponds to the first 256 Unicode ordinals. */ PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( const char *string, /* Latin-1 encoded string */ @@ -1569,18 +632,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( PyObject *unicode /* Unicode object */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String( - PyObject* unicode, - const char* errors); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -#endif - /* --- ASCII Codecs ------------------------------------------------------- Only 7-bit ASCII data is excepted. All other codes generate errors. @@ -1597,18 +648,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( PyObject *unicode /* Unicode object */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString( - PyObject* unicode, - const char* errors); - -PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -#endif - /* --- Character Map Codecs ----------------------------------------------- This codec uses mappings to encode and decode characters. @@ -1638,46 +677,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( PyObject *mapping /* encoding mapping */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - PyObject *mapping, /* encoding mapping */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap( - PyObject *unicode, /* Unicode object */ - PyObject *mapping, /* encoding mapping */ - const char *errors /* error handling */ - ); -#endif - -/* Translate a Py_UNICODE buffer of the given length by applying a - character mapping table to it and return the resulting Unicode - object. - - The mapping table must map Unicode ordinal integers to Unicode strings, - Unicode ordinal integers or None (causing deletion of the character). - - Mapping tables may be dictionaries or sequences. Unmapped character - ordinals (ones which cause a LookupError) are left untouched and - are copied as-is. - -*/ - -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - PyObject *table, /* Translate table */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -#endif - -#ifdef MS_WINDOWS - /* --- MBCS codecs for Windows -------------------------------------------- */ +#ifdef MS_WINDOWS PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( const char *string, /* MBCS encoded string */ Py_ssize_t length, /* size of string */ @@ -1705,14 +707,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( PyObject *unicode /* Unicode object */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS( - const Py_UNICODE *data, /* Unicode char buffer */ - Py_ssize_t length, /* number of Py_UNICODE chars to encode */ - const char *errors /* error handling */ - ) Py_DEPRECATED(3.3); -#endif - #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( int code_page, /* code page number */ @@ -1723,61 +717,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( #endif /* MS_WINDOWS */ -#ifndef Py_LIMITED_API -/* --- Decimal Encoder ---------------------------------------------------- */ - -/* Takes a Unicode string holding a decimal value and writes it into - an output buffer using standard ASCII digit codes. - - The output buffer has to provide at least length+1 bytes of storage - area. The output string is 0-terminated. - - The encoder converts whitespace to ' ', decimal characters to their - corresponding ASCII digit and all other Latin-1 characters except - \0 as-is. Characters outside this range (Unicode ordinals 1-256) - are treated as errors. This includes embedded NULL bytes. - - Error handling is defined by the errors argument: - - NULL or "strict": raise a ValueError - "ignore": ignore the wrong characters (these are not copied to the - output buffer) - "replace": replaces illegal characters with '?' - - Returns 0 on success, -1 on failure. - -*/ - -PyAPI_FUNC(int) PyUnicode_EncodeDecimal( - Py_UNICODE *s, /* Unicode buffer */ - Py_ssize_t length, /* Number of Py_UNICODE chars to encode */ - char *output, /* Output buffer; must have size >= length */ - const char *errors /* error handling */ - ) /* Py_DEPRECATED(3.3) */; - -/* Transforms code points that have decimal digit property to the - corresponding ASCII digit code points. - - Returns a new Unicode string on success, NULL on failure. -*/ - -PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII( - Py_UNICODE *s, /* Unicode buffer */ - Py_ssize_t length /* Number of Py_UNICODE chars to transform */ - ) /* Py_DEPRECATED(3.3) */; - -/* Coverts a Unicode object holding a decimal value to an ASCII string - for using in int, float and complex parsers. - Transforms code points that have decimal digit property to the - corresponding ASCII digit code points. Transforms spaces to ASCII. - Transforms code points starting from the first non-ASCII code point that - is neither a decimal digit nor a space to the end into '?'. */ - -PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( - PyObject *unicode /* Unicode object */ - ); -#endif - /* --- Locale encoding --------------------------------------------------- */ #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 @@ -1976,14 +915,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_Join( PyObject *seq /* Sequence object */ ); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray( - PyObject *separator, - PyObject *const *items, - Py_ssize_t seqlen - ); -#endif /* Py_LIMITED_API */ - /* Return 1 if substr matches str[start:end] at the given tail end, 0 otherwise. */ @@ -2047,17 +978,6 @@ PyAPI_FUNC(int) PyUnicode_Compare( PyObject *right /* Right string */ ); -#ifndef Py_LIMITED_API -/* Test whether a unicode is equal to ASCII identifier. Return 1 if true, - 0 otherwise. The right argument must be ASCII identifier. - Any error occurs inside will be cleared before return. */ - -PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId( - PyObject *left, /* Left string */ - _Py_Identifier *right /* Right identifier */ - ); -#endif - /* Compare a Unicode object with C string and return -1, 0, 1 for less than, equal, and greater than, respectively. It is best to pass only ASCII-encoded strings, but the function interprets the input string as @@ -2069,17 +989,6 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( const char *right /* ASCII-encoded string */ ); -#ifndef Py_LIMITED_API -/* Test whether a unicode is equal to ASCII string. Return 1 if true, - 0 otherwise. The right argument must be ASCII-encoded string. - Any error occurs inside will be cleared before return. */ - -PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( - PyObject *left, - const char *right /* ASCII-encoded string */ - ); -#endif - /* Rich compare two strings and return one of the following: - NULL in case an exception was raised @@ -2121,192 +1030,8 @@ PyAPI_FUNC(int) PyUnicode_Contains( PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); -#ifndef Py_LIMITED_API -/* Externally visible for str.strip(unicode) */ -PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( - PyObject *self, - int striptype, - PyObject *sepobj - ); -#endif - -/* Using explicit passed-in values, insert the thousands grouping - into the string pointed to by buffer. For the argument descriptions, - see Objects/stringlib/localeutil.h */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( - _PyUnicodeWriter *writer, - Py_ssize_t n_buffer, - PyObject *digits, - Py_ssize_t d_pos, - Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - PyObject *thousands_sep, - Py_UCS4 *maxchar); -#endif /* === Characters Type APIs =============================================== */ -/* Helper array used by Py_UNICODE_ISSPACE(). */ - -#ifndef Py_LIMITED_API -PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; - -/* These should not be used directly. Use the Py_UNICODE_IS* and - Py_UNICODE_TO* macros instead. - - These APIs are implemented in Objects/unicodectype.c. - -*/ - -PyAPI_FUNC(int) _PyUnicode_IsLowercase( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsUppercase( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsTitlecase( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsXidStart( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsXidContinue( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsWhitespace( - const Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsLinebreak( - const Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( - Py_UCS4 ch /* Unicode character */ - ) /* Py_DEPRECATED(3.3) */; - -PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( - Py_UCS4 ch /* Unicode character */ - ) /* Py_DEPRECATED(3.3) */; - -PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( - Py_UCS4 ch /* Unicode character */ - ) Py_DEPRECATED(3.3); - -PyAPI_FUNC(int) _PyUnicode_ToLowerFull( - Py_UCS4 ch, /* Unicode character */ - Py_UCS4 *res - ); - -PyAPI_FUNC(int) _PyUnicode_ToTitleFull( - Py_UCS4 ch, /* Unicode character */ - Py_UCS4 *res - ); - -PyAPI_FUNC(int) _PyUnicode_ToUpperFull( - Py_UCS4 ch, /* Unicode character */ - Py_UCS4 *res - ); - -PyAPI_FUNC(int) _PyUnicode_ToFoldedFull( - Py_UCS4 ch, /* Unicode character */ - Py_UCS4 *res - ); - -PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsCased( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_ToDigit( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(double) _PyUnicode_ToNumeric( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsDigit( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsNumeric( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsPrintable( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(int) _PyUnicode_IsAlpha( - Py_UCS4 ch /* Unicode character */ - ); - -PyAPI_FUNC(size_t) Py_UNICODE_strlen( - const Py_UNICODE *u - ) Py_DEPRECATED(3.3); - -PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy( - Py_UNICODE *s1, - const Py_UNICODE *s2) Py_DEPRECATED(3.3); - -PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat( - Py_UNICODE *s1, const Py_UNICODE *s2) Py_DEPRECATED(3.3); - -PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy( - Py_UNICODE *s1, - const Py_UNICODE *s2, - size_t n) Py_DEPRECATED(3.3); - -PyAPI_FUNC(int) Py_UNICODE_strcmp( - const Py_UNICODE *s1, - const Py_UNICODE *s2 - ) Py_DEPRECATED(3.3); - -PyAPI_FUNC(int) Py_UNICODE_strncmp( - const Py_UNICODE *s1, - const Py_UNICODE *s2, - size_t n - ) Py_DEPRECATED(3.3); - -PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr( - const Py_UNICODE *s, - Py_UNICODE c - ) Py_DEPRECATED(3.3); - -PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr( - const Py_UNICODE *s, - Py_UNICODE c - ) Py_DEPRECATED(3.3); - -PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); - -/* Create a copy of a unicode string ending with a nul character. Return NULL - and raise a MemoryError exception on memory allocation failure, otherwise - return a new allocated buffer (use PyMem_Free() to free the buffer). */ - -PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( - PyObject *unicode - ) Py_DEPRECATED(3.3); -#endif /* Py_LIMITED_API */ - #if defined(Py_DEBUG) && !defined(Py_LIMITED_API) PyAPI_FUNC(int) _PyUnicode_CheckConsistency( PyObject *op, @@ -2318,15 +1043,10 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency( #endif #ifndef Py_LIMITED_API -/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ -PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); -/* Clear all static strings. */ -PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void); - -/* Fast equality check when the inputs are known to be exact unicode types - and where the hash values are equal (i.e. a very probable match) */ -PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); -#endif /* !Py_LIMITED_API */ +# define Py_CPYTHON_UNICODEOBJECT_H +# include "cpython/unicodeobject.h" +# undef Py_CPYTHON_UNICODEOBJECT_H +#endif #ifdef __cplusplus }

1 0

Doc: Delete now useless Windows FAQ section (GH-10557)
by Julien Palard 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/5719f275b7153a00a800f5481271a6fc26… commit: 5719f275b7153a00a800f5481271a6fc26659c65 branch: master author: Mathieu Dupuy <deronnax(a)users.noreply.github.com> committer: Julien Palard <julien(a)palard.fr> date: 2018-11-26T17:13:41+01:00 summary: Doc: Delete now useless Windows FAQ section (GH-10557) files: M Doc/faq/windows.rst diff --git a/Doc/faq/windows.rst b/Doc/faq/windows.rst index 74aa52ab1a49..75ebe603cb5b 100644 --- a/Doc/faq/windows.rst +++ b/Doc/faq/windows.rst @@ -281,14 +281,3 @@ Use the msvcrt module. This is a standard Windows-specific extension module. It defines a function ``kbhit()`` which checks whether a keyboard hit is present, and ``getch()`` which gets one character without echoing it. - -How do I extract the downloaded documentation on Windows? ---------------------------------------------------------- - -Sometimes, when you download the documentation package to a Windows machine -using a web browser, the file extension of the saved file ends up being .EXE. -This is a mistake; the extension should be .TGZ. - -Simply rename the downloaded file to have the .TGZ extension, and WinZip will be -able to handle it. (If your copy of WinZip doesn't, get a newer one from -https://www.winzip.com.)

1 0

bpo-35134: Create Include/cpython/object.h (GH-10679)
by Victor Stinner 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/6eb996685e25c09499858bee4be258776e… commit: 6eb996685e25c09499858bee4be258776e603c6f branch: master author: Victor Stinner <vstinner(a)redhat.com> committer: GitHub <noreply(a)github.com> date: 2018-11-26T17:09:16+01:00 summary: bpo-35134: Create Include/cpython/object.h (GH-10679) * Move object.h code surrounded by "#ifndef Py_LIMITED_API" to a new Include/cpython/object.h header file. * "typedef struct _typeobject PyTypeObject;" is now always defined in object.h, but if Py_LIMITED_API is not defined, Include/cpython/object.h also defines the structure. * Complete the printfunc comment to mention Py_LIMITED_API define. files: A Include/cpython/object.h M Include/object.h diff --git a/Include/cpython/object.h b/Include/cpython/object.h new file mode 100644 index 000000000000..77184c95151a --- /dev/null +++ b/Include/cpython/object.h @@ -0,0 +1,454 @@ +#ifndef Py_CPYTHON_OBJECT_H +# error "this header file must not be included directly" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/********************* String Literals ****************************************/ +/* This structure helps managing static strings. The basic usage goes like this: + Instead of doing + + r = PyObject_CallMethod(o, "foo", "args", ...); + + do + + _Py_IDENTIFIER(foo); + ... + r = _PyObject_CallMethodId(o, &PyId_foo, "args", ...); + + PyId_foo is a static variable, either on block level or file level. On first + usage, the string "foo" is interned, and the structures are linked. On interpreter + shutdown, all strings are released (through _PyUnicode_ClearStaticStrings). + + Alternatively, _Py_static_string allows choosing the variable name. + _PyUnicode_FromId returns a borrowed reference to the interned string. + _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*. +*/ +typedef struct _Py_Identifier { + struct _Py_Identifier *next; + const char* string; + PyObject *object; +} _Py_Identifier; + +#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL } +#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) +#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) + +/* buffer interface */ +typedef struct bufferinfo { + void *buf; + PyObject *obj; /* owned reference */ + Py_ssize_t len; + Py_ssize_t itemsize; /* This is Py_ssize_t so it can be + pointed to by strides in simple case.*/ + int readonly; + int ndim; + char *format; + Py_ssize_t *shape; + Py_ssize_t *strides; + Py_ssize_t *suboffsets; + void *internal; +} Py_buffer; + +typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); +typedef void (*releasebufferproc)(PyObject *, Py_buffer *); + +/* Maximum number of dimensions */ +#define PyBUF_MAX_NDIM 64 + +/* Flags for getting buffers */ +#define PyBUF_SIMPLE 0 +#define PyBUF_WRITABLE 0x0001 +/* we used to include an E, backwards compatible alias */ +#define PyBUF_WRITEABLE PyBUF_WRITABLE +#define PyBUF_FORMAT 0x0004 +#define PyBUF_ND 0x0008 +#define PyBUF_STRIDES (0x0010 | PyBUF_ND) +#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) +#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) +#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) +#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) + +#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) +#define PyBUF_CONTIG_RO (PyBUF_ND) + +#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) +#define PyBUF_STRIDED_RO (PyBUF_STRIDES) + +#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) + +#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) + + +#define PyBUF_READ 0x100 +#define PyBUF_WRITE 0x200 +/* End buffer interface */ + + +typedef struct { + /* Number implementations must check *both* + arguments for proper type and implement the necessary conversions + in the slot functions themselves. */ + + binaryfunc nb_add; + binaryfunc nb_subtract; + binaryfunc nb_multiply; + binaryfunc nb_remainder; + binaryfunc nb_divmod; + ternaryfunc nb_power; + unaryfunc nb_negative; + unaryfunc nb_positive; + unaryfunc nb_absolute; + inquiry nb_bool; + unaryfunc nb_invert; + binaryfunc nb_lshift; + binaryfunc nb_rshift; + binaryfunc nb_and; + binaryfunc nb_xor; + binaryfunc nb_or; + unaryfunc nb_int; + void *nb_reserved; /* the slot formerly known as nb_long */ + unaryfunc nb_float; + + binaryfunc nb_inplace_add; + binaryfunc nb_inplace_subtract; + binaryfunc nb_inplace_multiply; + binaryfunc nb_inplace_remainder; + ternaryfunc nb_inplace_power; + binaryfunc nb_inplace_lshift; + binaryfunc nb_inplace_rshift; + binaryfunc nb_inplace_and; + binaryfunc nb_inplace_xor; + binaryfunc nb_inplace_or; + + binaryfunc nb_floor_divide; + binaryfunc nb_true_divide; + binaryfunc nb_inplace_floor_divide; + binaryfunc nb_inplace_true_divide; + + unaryfunc nb_index; + + binaryfunc nb_matrix_multiply; + binaryfunc nb_inplace_matrix_multiply; +} PyNumberMethods; + +typedef struct { + lenfunc sq_length; + binaryfunc sq_concat; + ssizeargfunc sq_repeat; + ssizeargfunc sq_item; + void *was_sq_slice; + ssizeobjargproc sq_ass_item; + void *was_sq_ass_slice; + objobjproc sq_contains; + + binaryfunc sq_inplace_concat; + ssizeargfunc sq_inplace_repeat; +} PySequenceMethods; + +typedef struct { + lenfunc mp_length; + binaryfunc mp_subscript; + objobjargproc mp_ass_subscript; +} PyMappingMethods; + +typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; +} PyAsyncMethods; + +typedef struct { + getbufferproc bf_getbuffer; + releasebufferproc bf_releasebuffer; +} PyBufferProcs; + +/* We can't provide a full compile-time check that limited-API + users won't implement tp_print. However, not defining printfunc + and making tp_print of a different function pointer type + if Py_LIMITED_API is set should at least cause a warning + in most cases. */ +typedef int (*printfunc)(PyObject *, FILE *, int); + +typedef struct _typeobject { + PyObject_VAR_HEAD + const char *tp_name; /* For printing, in format "<module>.<name>" */ + Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ + + /* Methods to implement standard operations */ + + destructor tp_dealloc; + printfunc tp_print; + getattrfunc tp_getattr; + setattrfunc tp_setattr; + PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2) + or tp_reserved (Python 3) */ + reprfunc tp_repr; + + /* Method suites for standard classes */ + + PyNumberMethods *tp_as_number; + PySequenceMethods *tp_as_sequence; + PyMappingMethods *tp_as_mapping; + + /* More standard operations (here for binary compatibility) */ + + hashfunc tp_hash; + ternaryfunc tp_call; + reprfunc tp_str; + getattrofunc tp_getattro; + setattrofunc tp_setattro; + + /* Functions to access object as input/output buffer */ + PyBufferProcs *tp_as_buffer; + + /* Flags to define presence of optional/expanded features */ + unsigned long tp_flags; + + const char *tp_doc; /* Documentation string */ + + /* Assigned meaning in release 2.0 */ + /* call function for all accessible objects */ + traverseproc tp_traverse; + + /* delete references to contained objects */ + inquiry tp_clear; + + /* Assigned meaning in release 2.1 */ + /* rich comparisons */ + richcmpfunc tp_richcompare; + + /* weak reference enabler */ + Py_ssize_t tp_weaklistoffset; + + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + + /* Attribute descriptor and subclassing stuff */ + struct PyMethodDef *tp_methods; + struct PyMemberDef *tp_members; + struct PyGetSetDef *tp_getset; + struct _typeobject *tp_base; + PyObject *tp_dict; + descrgetfunc tp_descr_get; + descrsetfunc tp_descr_set; + Py_ssize_t tp_dictoffset; + initproc tp_init; + allocfunc tp_alloc; + newfunc tp_new; + freefunc tp_free; /* Low-level free-memory routine */ + inquiry tp_is_gc; /* For PyObject_IS_GC */ + PyObject *tp_bases; + PyObject *tp_mro; /* method resolution order */ + PyObject *tp_cache; + PyObject *tp_subclasses; + PyObject *tp_weaklist; + destructor tp_del; + + /* Type attribute cache version tag. Added in version 2.6 */ + unsigned int tp_version_tag; + + destructor tp_finalize; + +#ifdef COUNT_ALLOCS + /* these must be last and never explicitly initialized */ + Py_ssize_t tp_allocs; + Py_ssize_t tp_frees; + Py_ssize_t tp_maxalloc; + struct _typeobject *tp_prev; + struct _typeobject *tp_next; +#endif +} PyTypeObject; + +/* The *real* layout of a type object when allocated on the heap */ +typedef struct _heaptypeobject { + /* Note: there's a dependency on the order of these members + in slotptr() in typeobject.c . */ + PyTypeObject ht_type; + PyAsyncMethods as_async; + PyNumberMethods as_number; + PyMappingMethods as_mapping; + PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, + so that the mapping wins when both + the mapping and the sequence define + a given operator (e.g. __getitem__). + see add_operators() in typeobject.c . */ + PyBufferProcs as_buffer; + PyObject *ht_name, *ht_slots, *ht_qualname; + struct _dictkeysobject *ht_cached_keys; + /* here are optional user slots, followed by the members. */ +} PyHeapTypeObject; + +/* access macro to the members which are floating "behind" the object */ +#define PyHeapType_GET_MEMBERS(etype) \ + ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize)) + +PyAPI_FUNC(const char *) _PyType_Name(PyTypeObject *); +PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyType_LookupId(PyTypeObject *, _Py_Identifier *); +PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, _Py_Identifier *); +PyAPI_FUNC(PyTypeObject *) _PyType_CalculateMetaclass(PyTypeObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyType_GetDocFromInternalDoc(const char *, const char *); +PyAPI_FUNC(PyObject *) _PyType_GetTextSignatureFromInternalDoc(const char *, const char *); + +struct _Py_Identifier; +PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); +PyAPI_FUNC(void) _Py_BreakPoint(void); +PyAPI_FUNC(void) _PyObject_Dump(PyObject *); +PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); + +PyAPI_FUNC(int) _PyObject_IsAbstract(PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_GetAttrId(PyObject *, struct _Py_Identifier *); +PyAPI_FUNC(int) _PyObject_SetAttrId(PyObject *, struct _Py_Identifier *, PyObject *); +PyAPI_FUNC(int) _PyObject_HasAttrId(PyObject *, struct _Py_Identifier *); +/* Replacements of PyObject_GetAttr() and _PyObject_GetAttrId() which + don't raise AttributeError. + + Return 1 and set *result != NULL if an attribute is found. + Return 0 and set *result == NULL if an attribute is not found; + an AttributeError is silenced. + Return -1 and set *result == NULL if an error other than AttributeError + is raised. +*/ +PyAPI_FUNC(int) _PyObject_LookupAttr(PyObject *, PyObject *, PyObject **); +PyAPI_FUNC(int) _PyObject_LookupAttrId(PyObject *, struct _Py_Identifier *, PyObject **); +PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *); +PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *); +PyAPI_FUNC(void) PyObject_CallFinalizer(PyObject *); +PyAPI_FUNC(int) PyObject_CallFinalizerFromDealloc(PyObject *); + +/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes + dict as the last parameter. */ +PyAPI_FUNC(PyObject *) +_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *, int); +PyAPI_FUNC(int) +_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, + PyObject *, PyObject *); + +/* Helper to look up a builtin object */ +PyAPI_FUNC(PyObject *) _PyObject_GetBuiltin(const char *name); + +#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0) + +static inline void _Py_Dealloc_inline(PyObject *op) +{ + destructor dealloc = Py_TYPE(op)->tp_dealloc; +#ifdef Py_TRACE_REFS + _Py_ForgetReference(op); +#else + _Py_INC_TPFREES(op); +#endif + (*dealloc)(op); +} +#define _Py_Dealloc(op) _Py_Dealloc_inline(op) + + +/* Safely decref `op` and set `op` to `op2`. + * + * As in case of Py_CLEAR "the obvious" code can be deadly: + * + * Py_DECREF(op); + * op = op2; + * + * The safe way is: + * + * Py_SETREF(op, op2); + * + * That arranges to set `op` to `op2` _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. + * + * Py_XSETREF is a variant of Py_SETREF that uses Py_XDECREF instead of + * Py_DECREF. + */ + +#define Py_SETREF(op, op2) \ + do { \ + PyObject *_py_tmp = _PyObject_CAST(op); \ + (op) = (op2); \ + Py_DECREF(_py_tmp); \ + } while (0) + +#define Py_XSETREF(op, op2) \ + do { \ + PyObject *_py_tmp = _PyObject_CAST(op); \ + (op) = (op2); \ + Py_XDECREF(_py_tmp); \ + } while (0) + + +PyAPI_DATA(PyTypeObject) _PyNone_Type; +PyAPI_DATA(PyTypeObject) _PyNotImplemented_Type; + +/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE. + * Defined in object.c. + */ +PyAPI_DATA(int) _Py_SwappedOp[]; + +/* This is the old private API, invoked by the macros before 3.2.4. + Kept for binary compatibility of extensions using the stable ABI. */ +PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); +PyAPI_FUNC(void) _PyTrash_destroy_chain(void); + +PyAPI_FUNC(void) +_PyDebugAllocatorStats(FILE *out, const char *block_name, int num_blocks, + size_t sizeof_block); +PyAPI_FUNC(void) +_PyObject_DebugTypeStats(FILE *out); + +/* Define a pair of assertion macros: + _PyObject_ASSERT_FROM(), _PyObject_ASSERT_WITH_MSG() and _PyObject_ASSERT(). + + These work like the regular C assert(), in that they will abort the + process with a message on stderr if the given condition fails to hold, + but compile away to nothing if NDEBUG is defined. + + However, before aborting, Python will also try to call _PyObject_Dump() on + the given object. This may be of use when investigating bugs in which a + particular object is corrupt (e.g. buggy a tp_visit method in an extension + module breaking the garbage collector), to help locate the broken objects. + + The WITH_MSG variant allows you to supply an additional message that Python + will attempt to print to stderr, after the object dump. */ +#ifdef NDEBUG + /* No debugging: compile away the assertions: */ +# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \ + ((void)0) +#else + /* With debugging: generate checks: */ +# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \ + ((expr) \ + ? (void)(0) \ + : _PyObject_AssertFailed((obj), Py_STRINGIFY(expr), \ + (msg), (filename), (lineno), (func))) +#endif + +#define _PyObject_ASSERT_WITH_MSG(obj, expr, msg) \ + _PyObject_ASSERT_FROM(obj, expr, msg, __FILE__, __LINE__, __func__) +#define _PyObject_ASSERT(obj, expr) \ + _PyObject_ASSERT_WITH_MSG(obj, expr, NULL) + +#define _PyObject_ASSERT_FAILED_MSG(obj, msg) \ + _PyObject_AssertFailed((obj), NULL, (msg), __FILE__, __LINE__, __func__) + +/* Declare and define _PyObject_AssertFailed() even when NDEBUG is defined, + to avoid causing compiler/linker errors when building extensions without + NDEBUG against a Python built with NDEBUG defined. + + msg, expr and function can be NULL. */ +PyAPI_FUNC(void) _PyObject_AssertFailed( + PyObject *obj, + const char *expr, + const char *msg, + const char *file, + int line, + const char *function); + +#ifdef __cplusplus +} +#endif diff --git a/Include/object.h b/Include/object.h index cdcdca85c633..21c29e7bdb00 100644 --- a/Include/object.h +++ b/Include/object.h @@ -127,39 +127,6 @@ typedef struct { #define Py_TYPE(ob) (_PyObject_CAST(ob)->ob_type) #define Py_SIZE(ob) (_PyVarObject_CAST(ob)->ob_size) -#ifndef Py_LIMITED_API -/********************* String Literals ****************************************/ -/* This structure helps managing static strings. The basic usage goes like this: - Instead of doing - - r = PyObject_CallMethod(o, "foo", "args", ...); - - do - - _Py_IDENTIFIER(foo); - ... - r = _PyObject_CallMethodId(o, &PyId_foo, "args", ...); - - PyId_foo is a static variable, either on block level or file level. On first - usage, the string "foo" is interned, and the structures are linked. On interpreter - shutdown, all strings are released (through _PyUnicode_ClearStaticStrings). - - Alternatively, _Py_static_string allows choosing the variable name. - _PyUnicode_FromId returns a borrowed reference to the interned string. - _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*. -*/ -typedef struct _Py_Identifier { - struct _Py_Identifier *next; - const char* string; - PyObject *object; -} _Py_Identifier; - -#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL } -#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) -#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) - -#endif /* !Py_LIMITED_API */ - /* Type objects contain a string containing the type name (to help somewhat in debugging), the allocation parameters (see PyObject_New() and @@ -186,154 +153,13 @@ typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *); typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); typedef int(*objobjargproc)(PyObject *, PyObject *, PyObject *); -#ifndef Py_LIMITED_API -/* buffer interface */ -typedef struct bufferinfo { - void *buf; - PyObject *obj; /* owned reference */ - Py_ssize_t len; - Py_ssize_t itemsize; /* This is Py_ssize_t so it can be - pointed to by strides in simple case.*/ - int readonly; - int ndim; - char *format; - Py_ssize_t *shape; - Py_ssize_t *strides; - Py_ssize_t *suboffsets; - void *internal; -} Py_buffer; - -typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); -typedef void (*releasebufferproc)(PyObject *, Py_buffer *); - -/* Maximum number of dimensions */ -#define PyBUF_MAX_NDIM 64 - -/* Flags for getting buffers */ -#define PyBUF_SIMPLE 0 -#define PyBUF_WRITABLE 0x0001 -/* we used to include an E, backwards compatible alias */ -#define PyBUF_WRITEABLE PyBUF_WRITABLE -#define PyBUF_FORMAT 0x0004 -#define PyBUF_ND 0x0008 -#define PyBUF_STRIDES (0x0010 | PyBUF_ND) -#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) -#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) -#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) -#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) - -#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) -#define PyBUF_CONTIG_RO (PyBUF_ND) - -#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) -#define PyBUF_STRIDED_RO (PyBUF_STRIDES) - -#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) -#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) - -#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) -#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) - - -#define PyBUF_READ 0x100 -#define PyBUF_WRITE 0x200 - -/* End buffer interface */ -#endif /* Py_LIMITED_API */ - typedef int (*objobjproc)(PyObject *, PyObject *); typedef int (*visitproc)(PyObject *, void *); typedef int (*traverseproc)(PyObject *, visitproc, void *); -#ifndef Py_LIMITED_API -typedef struct { - /* Number implementations must check *both* - arguments for proper type and implement the necessary conversions - in the slot functions themselves. */ - - binaryfunc nb_add; - binaryfunc nb_subtract; - binaryfunc nb_multiply; - binaryfunc nb_remainder; - binaryfunc nb_divmod; - ternaryfunc nb_power; - unaryfunc nb_negative; - unaryfunc nb_positive; - unaryfunc nb_absolute; - inquiry nb_bool; - unaryfunc nb_invert; - binaryfunc nb_lshift; - binaryfunc nb_rshift; - binaryfunc nb_and; - binaryfunc nb_xor; - binaryfunc nb_or; - unaryfunc nb_int; - void *nb_reserved; /* the slot formerly known as nb_long */ - unaryfunc nb_float; - - binaryfunc nb_inplace_add; - binaryfunc nb_inplace_subtract; - binaryfunc nb_inplace_multiply; - binaryfunc nb_inplace_remainder; - ternaryfunc nb_inplace_power; - binaryfunc nb_inplace_lshift; - binaryfunc nb_inplace_rshift; - binaryfunc nb_inplace_and; - binaryfunc nb_inplace_xor; - binaryfunc nb_inplace_or; - - binaryfunc nb_floor_divide; - binaryfunc nb_true_divide; - binaryfunc nb_inplace_floor_divide; - binaryfunc nb_inplace_true_divide; - - unaryfunc nb_index; - - binaryfunc nb_matrix_multiply; - binaryfunc nb_inplace_matrix_multiply; -} PyNumberMethods; - -typedef struct { - lenfunc sq_length; - binaryfunc sq_concat; - ssizeargfunc sq_repeat; - ssizeargfunc sq_item; - void *was_sq_slice; - ssizeobjargproc sq_ass_item; - void *was_sq_ass_slice; - objobjproc sq_contains; - - binaryfunc sq_inplace_concat; - ssizeargfunc sq_inplace_repeat; -} PySequenceMethods; - -typedef struct { - lenfunc mp_length; - binaryfunc mp_subscript; - objobjargproc mp_ass_subscript; -} PyMappingMethods; - -typedef struct { - unaryfunc am_await; - unaryfunc am_aiter; - unaryfunc am_anext; -} PyAsyncMethods; - -typedef struct { - getbufferproc bf_getbuffer; - releasebufferproc bf_releasebuffer; -} PyBufferProcs; -#endif /* Py_LIMITED_API */ typedef void (*freefunc)(void *); typedef void (*destructor)(PyObject *); -#ifndef Py_LIMITED_API -/* We can't provide a full compile-time check that limited-API - users won't implement tp_print. However, not defining printfunc - and making tp_print of a different function pointer type - should at least cause a warning in most cases. */ -typedef int (*printfunc)(PyObject *, FILE *, int); -#endif typedef PyObject *(*getattrfunc)(PyObject *, char *); typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); typedef int (*setattrfunc)(PyObject *, char *, PyObject *); @@ -349,100 +175,8 @@ typedef int (*initproc)(PyObject *, PyObject *, PyObject *); typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *); typedef PyObject *(*allocfunc)(struct _typeobject *, Py_ssize_t); -#ifdef Py_LIMITED_API -typedef struct _typeobject PyTypeObject; /* opaque */ -#else -typedef struct _typeobject { - PyObject_VAR_HEAD - const char *tp_name; /* For printing, in format "<module>.<name>" */ - Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ - - /* Methods to implement standard operations */ - - destructor tp_dealloc; - printfunc tp_print; - getattrfunc tp_getattr; - setattrfunc tp_setattr; - PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2) - or tp_reserved (Python 3) */ - reprfunc tp_repr; - - /* Method suites for standard classes */ - - PyNumberMethods *tp_as_number; - PySequenceMethods *tp_as_sequence; - PyMappingMethods *tp_as_mapping; - - /* More standard operations (here for binary compatibility) */ - - hashfunc tp_hash; - ternaryfunc tp_call; - reprfunc tp_str; - getattrofunc tp_getattro; - setattrofunc tp_setattro; - - /* Functions to access object as input/output buffer */ - PyBufferProcs *tp_as_buffer; - - /* Flags to define presence of optional/expanded features */ - unsigned long tp_flags; - - const char *tp_doc; /* Documentation string */ - - /* Assigned meaning in release 2.0 */ - /* call function for all accessible objects */ - traverseproc tp_traverse; - - /* delete references to contained objects */ - inquiry tp_clear; - - /* Assigned meaning in release 2.1 */ - /* rich comparisons */ - richcmpfunc tp_richcompare; - - /* weak reference enabler */ - Py_ssize_t tp_weaklistoffset; - - /* Iterators */ - getiterfunc tp_iter; - iternextfunc tp_iternext; - - /* Attribute descriptor and subclassing stuff */ - struct PyMethodDef *tp_methods; - struct PyMemberDef *tp_members; - struct PyGetSetDef *tp_getset; - struct _typeobject *tp_base; - PyObject *tp_dict; - descrgetfunc tp_descr_get; - descrsetfunc tp_descr_set; - Py_ssize_t tp_dictoffset; - initproc tp_init; - allocfunc tp_alloc; - newfunc tp_new; - freefunc tp_free; /* Low-level free-memory routine */ - inquiry tp_is_gc; /* For PyObject_IS_GC */ - PyObject *tp_bases; - PyObject *tp_mro; /* method resolution order */ - PyObject *tp_cache; - PyObject *tp_subclasses; - PyObject *tp_weaklist; - destructor tp_del; - - /* Type attribute cache version tag. Added in version 2.6 */ - unsigned int tp_version_tag; - - destructor tp_finalize; - -#ifdef COUNT_ALLOCS - /* these must be last and never explicitly initialized */ - Py_ssize_t tp_allocs; - Py_ssize_t tp_frees; - Py_ssize_t tp_maxalloc; - struct _typeobject *tp_prev; - struct _typeobject *tp_next; -#endif -} PyTypeObject; -#endif +/* In Py_LIMITED_API, PyTypeObject is an opaque structure. */ +typedef struct _typeobject PyTypeObject; typedef struct{ int slot; /* slot id, see below */ @@ -465,31 +199,6 @@ PyAPI_FUNC(PyObject*) PyType_FromSpecWithBases(PyType_Spec*, PyObject*); PyAPI_FUNC(void*) PyType_GetSlot(PyTypeObject*, int); #endif -#ifndef Py_LIMITED_API -/* The *real* layout of a type object when allocated on the heap */ -typedef struct _heaptypeobject { - /* Note: there's a dependency on the order of these members - in slotptr() in typeobject.c . */ - PyTypeObject ht_type; - PyAsyncMethods as_async; - PyNumberMethods as_number; - PyMappingMethods as_mapping; - PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, - so that the mapping wins when both - the mapping and the sequence define - a given operator (e.g. __getitem__). - see add_operators() in typeobject.c . */ - PyBufferProcs as_buffer; - PyObject *ht_name, *ht_slots, *ht_qualname; - struct _dictkeysobject *ht_cached_keys; - /* here are optional user slots, followed by the members. */ -} PyHeapTypeObject; - -/* access macro to the members which are floating "behind" the object */ -#define PyHeapType_GET_MEMBERS(etype) \ - ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize)) -#endif - /* Generic type check */ PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *); #define PyObject_TypeCheck(ob, tp) \ @@ -509,29 +218,10 @@ PyAPI_FUNC(int) PyType_Ready(PyTypeObject *); PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t); PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *, PyObject *, PyObject *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(const char *) _PyType_Name(PyTypeObject *); -PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); -PyAPI_FUNC(PyObject *) _PyType_LookupId(PyTypeObject *, _Py_Identifier *); -PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, _Py_Identifier *); -PyAPI_FUNC(PyTypeObject *) _PyType_CalculateMetaclass(PyTypeObject *, PyObject *); -#endif PyAPI_FUNC(unsigned int) PyType_ClearCache(void); PyAPI_FUNC(void) PyType_Modified(PyTypeObject *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyType_GetDocFromInternalDoc(const char *, const char *); -PyAPI_FUNC(PyObject *) _PyType_GetTextSignatureFromInternalDoc(const char *, const char *); -#endif - /* Generic operations on objects */ -#ifndef Py_LIMITED_API -struct _Py_Identifier; -PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); -PyAPI_FUNC(void) _Py_BreakPoint(void); -PyAPI_FUNC(void) _PyObject_Dump(PyObject *); -PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); -#endif PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *); PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); PyAPI_FUNC(PyObject *) PyObject_ASCII(PyObject *); @@ -544,28 +234,7 @@ PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *); PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *); PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *); PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(int) _PyObject_IsAbstract(PyObject *); -PyAPI_FUNC(PyObject *) _PyObject_GetAttrId(PyObject *, struct _Py_Identifier *); -PyAPI_FUNC(int) _PyObject_SetAttrId(PyObject *, struct _Py_Identifier *, PyObject *); -PyAPI_FUNC(int) _PyObject_HasAttrId(PyObject *, struct _Py_Identifier *); -/* Replacements of PyObject_GetAttr() and _PyObject_GetAttrId() which - don't raise AttributeError. - - Return 1 and set *result != NULL if an attribute is found. - Return 0 and set *result == NULL if an attribute is not found; - an AttributeError is silenced. - Return -1 and set *result == NULL if an error other than AttributeError - is raised. -*/ -PyAPI_FUNC(int) _PyObject_LookupAttr(PyObject *, PyObject *, PyObject **); -PyAPI_FUNC(int) _PyObject_LookupAttrId(PyObject *, struct _Py_Identifier *, PyObject **); -PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *); -#endif PyAPI_FUNC(PyObject *) PyObject_SelfIter(PyObject *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *); -#endif PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *); PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *, PyObject *, PyObject *); @@ -577,28 +246,7 @@ PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *); PyAPI_FUNC(int) PyObject_IsTrue(PyObject *); PyAPI_FUNC(int) PyObject_Not(PyObject *); PyAPI_FUNC(int) PyCallable_Check(PyObject *); - PyAPI_FUNC(void) PyObject_ClearWeakRefs(PyObject *); -#ifndef Py_LIMITED_API -PyAPI_FUNC(void) PyObject_CallFinalizer(PyObject *); -PyAPI_FUNC(int) PyObject_CallFinalizerFromDealloc(PyObject *); -#endif - -#ifndef Py_LIMITED_API -/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes - dict as the last parameter. */ -PyAPI_FUNC(PyObject *) -_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *, int); -PyAPI_FUNC(int) -_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, - PyObject *, PyObject *); -#endif /* !Py_LIMITED_API */ - -/* Helper to look up a builtin object */ -#ifndef Py_LIMITED_API -PyAPI_FUNC(PyObject *) -_PyObject_GetBuiltin(const char *name); -#endif /* PyObject_Dir(obj) acts like Python builtins.dir(obj), returning a list of strings. PyObject_Dir(NULL) is like builtins.dir(), @@ -689,9 +337,7 @@ given type object has a specified feature. #define Py_TPFLAGS_HAVE_FINALIZE (1UL << 0) #ifdef Py_LIMITED_API -#define PyType_HasFeature(t,f) ((PyType_GetFlags(t) & (f)) != 0) -#else -#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0) +# define PyType_HasFeature(t,f) ((PyType_GetFlags(t) & (f)) != 0) #endif #define PyType_FastSubclass(t,f) PyType_HasFeature(t,f) @@ -775,7 +421,6 @@ PyAPI_FUNC(void) _Py_ForgetReference(PyObject *); PyAPI_FUNC(void) _Py_PrintReferences(FILE *); PyAPI_FUNC(void) _Py_PrintReferenceAddresses(FILE *); PyAPI_FUNC(void) _Py_AddToAllObjects(PyObject *, int force); - #else /* Without Py_TRACE_REFS, there's little enough to do that we expand code inline. */ @@ -798,22 +443,6 @@ static inline void _Py_ForgetReference(PyObject *op) PyAPI_FUNC(void) _Py_Dealloc(PyObject *); -#ifndef Py_LIMITED_API -static inline void _Py_Dealloc_inline(PyObject *op) -{ - destructor dealloc = Py_TYPE(op)->tp_dealloc; -#ifdef Py_TRACE_REFS - _Py_ForgetReference(op); -#else - _Py_INC_TPFREES(op); -#endif - (*dealloc)(op); -} - -# define _Py_Dealloc(op) _Py_Dealloc_inline(op) -#endif /* !defined(Py_LIMITED_API) */ - - static inline void _Py_INCREF(PyObject *op) { _Py_INC_REFTOTAL; @@ -903,42 +532,6 @@ static inline void _Py_XDECREF(PyObject *op) #define Py_XDECREF(op) _Py_XDECREF(_PyObject_CAST(op)) -#ifndef Py_LIMITED_API -/* Safely decref `op` and set `op` to `op2`. - * - * As in case of Py_CLEAR "the obvious" code can be deadly: - * - * Py_DECREF(op); - * op = op2; - * - * The safe way is: - * - * Py_SETREF(op, op2); - * - * That arranges to set `op` to `op2` _before_ decref'ing, so that any code - * triggered as a side-effect of `op` getting torn down no longer believes - * `op` points to a valid object. - * - * Py_XSETREF is a variant of Py_SETREF that uses Py_XDECREF instead of - * Py_DECREF. - */ - -#define Py_SETREF(op, op2) \ - do { \ - PyObject *_py_tmp = _PyObject_CAST(op); \ - (op) = (op2); \ - Py_DECREF(_py_tmp); \ - } while (0) - -#define Py_XSETREF(op, op2) \ - do { \ - PyObject *_py_tmp = _PyObject_CAST(op); \ - (op) = (op2); \ - Py_XDECREF(_py_tmp); \ - } while (0) - -#endif /* ifndef Py_LIMITED_API */ - /* These are provided as conveniences to Python runtime embedders, so that they can have object code that is not dependent on Python compilation flags. @@ -946,11 +539,6 @@ they can have object code that is not dependent on Python compilation flags. PyAPI_FUNC(void) Py_IncRef(PyObject *); PyAPI_FUNC(void) Py_DecRef(PyObject *); -#ifndef Py_LIMITED_API -PyAPI_DATA(PyTypeObject) _PyNone_Type; -PyAPI_DATA(PyTypeObject) _PyNotImplemented_Type; -#endif /* !Py_LIMITED_API */ - /* _Py_NoneStruct is an object of undefined type which can be used in contexts where NULL (nil) is not suitable (since NULL often means 'error'). @@ -1001,13 +589,6 @@ PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ } \ } while (0) -#ifndef Py_LIMITED_API -/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE. - * Defined in object.c. - */ -PyAPI_DATA(int) _Py_SwappedOp[]; -#endif /* !Py_LIMITED_API */ - /* More conventions @@ -1103,13 +684,6 @@ chain of N deallocations is broken into (N-1)/(PyTrash_UNWIND_LEVEL-1) pieces, with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL. */ -#ifndef Py_LIMITED_API -/* This is the old private API, invoked by the macros before 3.2.4. - Kept for binary compatibility of extensions using the stable ABI. */ -PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*); -PyAPI_FUNC(void) _PyTrash_destroy_chain(void); -#endif /* !Py_LIMITED_API */ - /* The new thread-safe private API, invoked by the macros below. */ PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyObject*); PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void); @@ -1131,66 +705,13 @@ PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void); _PyTrash_thread_deposit_object(_PyObject_CAST(op)); \ } while (0); -#ifndef Py_LIMITED_API -PyAPI_FUNC(void) -_PyDebugAllocatorStats(FILE *out, const char *block_name, int num_blocks, - size_t sizeof_block); -PyAPI_FUNC(void) -_PyObject_DebugTypeStats(FILE *out); -#endif /* ifndef Py_LIMITED_API */ - #ifndef Py_LIMITED_API -/* Define a pair of assertion macros: - _PyObject_ASSERT_FROM(), _PyObject_ASSERT_WITH_MSG() and _PyObject_ASSERT(). - - These work like the regular C assert(), in that they will abort the - process with a message on stderr if the given condition fails to hold, - but compile away to nothing if NDEBUG is defined. - - However, before aborting, Python will also try to call _PyObject_Dump() on - the given object. This may be of use when investigating bugs in which a - particular object is corrupt (e.g. buggy a tp_visit method in an extension - module breaking the garbage collector), to help locate the broken objects. - - The WITH_MSG variant allows you to supply an additional message that Python - will attempt to print to stderr, after the object dump. */ -#ifdef NDEBUG - /* No debugging: compile away the assertions: */ -# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \ - ((void)0) -#else - /* With debugging: generate checks: */ -# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \ - ((expr) \ - ? (void)(0) \ - : _PyObject_AssertFailed((obj), Py_STRINGIFY(expr), \ - (msg), (filename), (lineno), (func))) +# define Py_CPYTHON_OBJECT_H +# include "cpython/object.h" +# undef Py_CPYTHON_OBJECT_H #endif -#define _PyObject_ASSERT_WITH_MSG(obj, expr, msg) \ - _PyObject_ASSERT_FROM(obj, expr, msg, __FILE__, __LINE__, __func__) -#define _PyObject_ASSERT(obj, expr) \ - _PyObject_ASSERT_WITH_MSG(obj, expr, NULL) - -#define _PyObject_ASSERT_FAILED_MSG(obj, msg) \ - _PyObject_AssertFailed((obj), NULL, (msg), __FILE__, __LINE__, __func__) - -/* Declare and define _PyObject_AssertFailed() even when NDEBUG is defined, - to avoid causing compiler/linker errors when building extensions without - NDEBUG against a Python built with NDEBUG defined. - - msg, expr and function can be NULL. */ -PyAPI_FUNC(void) _PyObject_AssertFailed( - PyObject *obj, - const char *expr, - const char *msg, - const char *file, - int line, - const char *function); -#endif /* ifndef Py_LIMITED_API */ - - #ifdef __cplusplus } #endif

1 0

bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623) (GH-10718) (GH-10720)
by Victor Stinner 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/fc4a44b0c3a69390eca4680d89c2ae5fe9… commit: fc4a44b0c3a69390eca4680d89c2ae5fe967f882 branch: 3.6 author: Victor Stinner <vstinner(a)redhat.com> committer: GitHub <noreply(a)github.com> date: 2018-11-26T17:03:31+01:00 summary: bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623) (GH-10718) (GH-10720) Fix str.format(), float.__format__() and complex.__format__() methods for non-ASCII decimal point when using the "n" formatter. Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires a _PyUnicodeWriter object for the buffer and a Python str object for digits. (cherry picked from commit 59423e3ddd736387cef8f7632c71954c1859bed0) (cherry picked from commit 6f5fa1b4be735159e964906ab608dc467476e47c) files: A Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst M Include/unicodeobject.h M Objects/stringlib/localeutil.h M Objects/unicodeobject.c M Python/formatter_unicode.c diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index f49887387008..c81a38181e49 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -2143,10 +2143,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip( see Objects/stringlib/localeutil.h */ #ifndef Py_LIMITED_API PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping( - PyObject *unicode, - Py_ssize_t index, + _PyUnicodeWriter *writer, Py_ssize_t n_buffer, - void *digits, + PyObject *digits, + Py_ssize_t d_pos, Py_ssize_t n_digits, Py_ssize_t min_width, const char *grouping, diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst new file mode 100644 index 000000000000..9bfbe1644e16 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst @@ -0,0 +1,3 @@ +For :meth:`str.format`, :meth:`float.__format__` and +:meth:`complex.__format__` methods for non-ASCII decimal point when using +the "n" formatter. diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h index df501ed05c7e..31fed349caa5 100644 --- a/Objects/stringlib/localeutil.h +++ b/Objects/stringlib/localeutil.h @@ -1,28 +1,24 @@ -/* stringlib: locale related helpers implementation */ - -#include <locale.h> - -#if !STRINGLIB_IS_UNICODE -# error "localeutil.h is specific to Unicode" -#endif +/* _PyUnicode_InsertThousandsGrouping() helper functions */ typedef struct { const char *grouping; char previous; Py_ssize_t i; /* Where we're currently pointing in grouping. */ -} STRINGLIB(GroupGenerator); +} GroupGenerator; + static void -STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping) +GroupGenerator_init(GroupGenerator *self, const char *grouping) { self->grouping = grouping; self->i = 0; self->previous = 0; } + /* Returns the next grouping, or 0 to signify end. */ static Py_ssize_t -STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self) +GroupGenerator_next(GroupGenerator *self) { /* Note that we don't really do much error checking here. If a grouping string contains just CHAR_MAX, for example, then just @@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self) } } + /* Fill in some digits, leading zeros, and thousands separator. All are optional, depending on when we're called. */ static void -STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end, - Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep, - Py_ssize_t thousands_sep_len) +InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos, + PyObject *digits, Py_ssize_t *digits_pos, + Py_ssize_t n_chars, Py_ssize_t n_zeros, + PyObject *thousands_sep, Py_ssize_t thousands_sep_len, + Py_UCS4 *maxchar) { - Py_ssize_t i; + if (!writer) { + /* if maxchar > 127, maxchar is already set */ + if (*maxchar == 127 && thousands_sep) { + Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep); + *maxchar = Py_MAX(*maxchar, maxchar2); + } + return; + } if (thousands_sep) { - *buffer_end -= thousands_sep_len; + *buffer_pos -= thousands_sep_len; /* Copy the thousands_sep chars into the buffer. */ - memcpy(*buffer_end, thousands_sep, - thousands_sep_len * STRINGLIB_SIZEOF_CHAR); - } - - *buffer_end -= n_chars; - *digits_end -= n_chars; - memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR)); - - *buffer_end -= n_zeros; - for (i = 0; i < n_zeros; i++) - (*buffer_end)[i] = '0'; -} - -/** - * InsertThousandsGrouping: - * @buffer: A pointer to the start of a string. - * @n_buffer: Number of characters in @buffer. - * @digits: A pointer to the digits we're reading from. If count - * is non-NULL, this is unused. - * @n_digits: The number of digits in the string, in which we want - * to put the grouping chars. - * @min_width: The minimum width of the digits in the output string. - * Output will be zero-padded on the left to fill. - * @grouping: see definition in localeconv(). - * @thousands_sep: see definition in localeconv(). - * - * There are 2 modes: counting and filling. If @buffer is NULL, - * we are in counting mode, else filling mode. - * If counting, the required buffer size is returned. - * If filling, we know the buffer will be large enough, so we don't - * need to pass in the buffer size. - * Inserts thousand grouping characters (as defined by grouping and - * thousands_sep) into the string between buffer and buffer+n_digits. - * - * Return value: 0 on error, else 1. Note that no error can occur if - * count is non-NULL. - * - * This name won't be used, the includer of this file should define - * it to be the actual function name, based on unicode or string. - * - * As closely as possible, this code mimics the logic in decimal.py's - _insert_thousands_sep(). - **/ -static Py_ssize_t -STRINGLIB(InsertThousandsGrouping)( - STRINGLIB_CHAR *buffer, - Py_ssize_t n_buffer, - STRINGLIB_CHAR *digits, - Py_ssize_t n_digits, - Py_ssize_t min_width, - const char *grouping, - STRINGLIB_CHAR *thousands_sep, - Py_ssize_t thousands_sep_len) -{ - Py_ssize_t count = 0; - Py_ssize_t n_zeros; - int loop_broken = 0; - int use_separator = 0; /* First time through, don't append the - separator. They only go between - groups. */ - STRINGLIB_CHAR *buffer_end = NULL; - STRINGLIB_CHAR *digits_end = NULL; - Py_ssize_t l; - Py_ssize_t n_chars; - Py_ssize_t remaining = n_digits; /* Number of chars remaining to - be looked at */ - /* A generator that returns all of the grouping widths, until it - returns 0. */ - STRINGLIB(GroupGenerator) groupgen; - STRINGLIB(GroupGenerator_init)(&groupgen, grouping); - - if (buffer) { - buffer_end = buffer + n_buffer; - digits_end = digits + n_digits; - } - - while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) { - l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1)); - n_zeros = Py_MAX(0, l - remaining); - n_chars = Py_MAX(0, Py_MIN(remaining, l)); - - /* Use n_zero zero's and n_chars chars */ - - /* Count only, don't do anything. */ - count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; - - if (buffer) { - /* Copy into the output buffer. */ - STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros, - use_separator ? thousands_sep : NULL, thousands_sep_len); - } - - /* Use a separator next time. */ - use_separator = 1; - - remaining -= n_chars; - min_width -= l; - - if (remaining <= 0 && min_width <= 0) { - loop_broken = 1; - break; - } - min_width -= thousands_sep_len; + _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, + thousands_sep, 0, + thousands_sep_len); } - if (!loop_broken) { - /* We left the loop without using a break statement. */ - l = Py_MAX(Py_MAX(remaining, min_width), 1); - n_zeros = Py_MAX(0, l - remaining); - n_chars = Py_MAX(0, Py_MIN(remaining, l)); - - /* Use n_zero zero's and n_chars chars */ - count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; - if (buffer) { - /* Copy into the output buffer. */ - STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros, - use_separator ? thousands_sep : NULL, thousands_sep_len); - } + *buffer_pos -= n_chars; + *digits_pos -= n_chars; + _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos, + digits, *digits_pos, + n_chars); + + if (n_zeros) { + *buffer_pos -= n_zeros; + enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer); + void *data = PyUnicode_DATA(writer->buffer); + FILL(kind, data, '0', *buffer_pos, n_zeros); } - return count; } - diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 1f342bd199c7..6bfcddaa64e4 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -203,6 +203,31 @@ static PyObject *unicode_empty = NULL; return unicode_empty; \ } while (0) +#define FILL(kind, data, value, start, length) \ + do { \ + Py_ssize_t i_ = 0; \ + assert(kind != PyUnicode_WCHAR_KIND); \ + switch ((kind)) { \ + case PyUnicode_1BYTE_KIND: { \ + unsigned char * to_ = (unsigned char *)((data)) + (start); \ + memset(to_, (unsigned char)value, (length)); \ + break; \ + } \ + case PyUnicode_2BYTE_KIND: { \ + Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \ + for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ + break; \ + } \ + case PyUnicode_4BYTE_KIND: { \ + Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \ + for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ + break; \ + } \ + default: assert(0); \ + } \ + } while (0) + + /* Forward declaration */ static inline int _PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch); @@ -779,7 +804,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/count.h" #include "stringlib/find.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/ucs1lib.h" @@ -790,7 +814,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find.h" #include "stringlib/replace.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/ucs2lib.h" @@ -801,7 +824,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find.h" #include "stringlib/replace.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/ucs4lib.h" @@ -812,7 +834,6 @@ ensure_unicode(PyObject *obj) #include "stringlib/find.h" #include "stringlib/replace.h" #include "stringlib/find_max_char.h" -#include "stringlib/localeutil.h" #include "stringlib/undef.h" #include "stringlib/unicodedefs.h" @@ -9407,87 +9428,149 @@ any_find_slice(PyObject* s1, PyObject* s2, return result; } +/* _PyUnicode_InsertThousandsGrouping() helper functions */ +#include "stringlib/localeutil.h" + +/** + * InsertThousandsGrouping: + * @writer: Unicode writer. + * @n_buffer: Number of characters in @buffer. + * @digits: Digits we're reading from. If count is non-NULL, this is unused. + * @d_pos: Start of digits string. + * @n_digits: The number of digits in the string, in which we want + * to put the grouping chars. + * @min_width: The minimum width of the digits in the output string. + * Output will be zero-padded on the left to fill. + * @grouping: see definition in localeconv(). + * @thousands_sep: see definition in localeconv(). + * + * There are 2 modes: counting and filling. If @writer is NULL, + * we are in counting mode, else filling mode. + * If counting, the required buffer size is returned. + * If filling, we know the buffer will be large enough, so we don't + * need to pass in the buffer size. + * Inserts thousand grouping characters (as defined by grouping and + * thousands_sep) into @writer. + * + * Return value: -1 on error, number of characters otherwise. + **/ Py_ssize_t _PyUnicode_InsertThousandsGrouping( - PyObject *unicode, Py_ssize_t index, + _PyUnicodeWriter *writer, Py_ssize_t n_buffer, - void *digits, Py_ssize_t n_digits, + PyObject *digits, + Py_ssize_t d_pos, + Py_ssize_t n_digits, Py_ssize_t min_width, - const char *grouping, PyObject *thousands_sep, + const char *grouping, + PyObject *thousands_sep, Py_UCS4 *maxchar) { - unsigned int kind, thousands_sep_kind; - char *data, *thousands_sep_data; - Py_ssize_t thousands_sep_len; - Py_ssize_t len; - - if (unicode != NULL) { - kind = PyUnicode_KIND(unicode); - data = (char *) PyUnicode_DATA(unicode) + index * kind; + if (writer) { + assert(digits != NULL); + assert(maxchar == NULL); } else { - kind = PyUnicode_1BYTE_KIND; - data = NULL; - } - thousands_sep_kind = PyUnicode_KIND(thousands_sep); - thousands_sep_data = PyUnicode_DATA(thousands_sep); - thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep); - if (unicode != NULL && thousands_sep_kind != kind) { - if (thousands_sep_kind < kind) { - thousands_sep_data = _PyUnicode_AsKind(thousands_sep, kind); - if (!thousands_sep_data) - return -1; - } - else { - data = _PyUnicode_AsKind(unicode, thousands_sep_kind); - if (!data) - return -1; - } + assert(digits == NULL); + assert(maxchar != NULL); } + assert(0 <= d_pos); + assert(0 <= n_digits); + assert(0 <= min_width); + assert(grouping != NULL); - switch (kind) { - case PyUnicode_1BYTE_KIND: - if (unicode != NULL && PyUnicode_IS_ASCII(unicode)) - len = asciilib_InsertThousandsGrouping( - (Py_UCS1 *) data, n_buffer, (Py_UCS1 *) digits, n_digits, - min_width, grouping, - (Py_UCS1 *) thousands_sep_data, thousands_sep_len); - else - len = ucs1lib_InsertThousandsGrouping( - (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits, - min_width, grouping, - (Py_UCS1 *) thousands_sep_data, thousands_sep_len); - break; - case PyUnicode_2BYTE_KIND: - len = ucs2lib_InsertThousandsGrouping( - (Py_UCS2 *) data, n_buffer, (Py_UCS2 *) digits, n_digits, - min_width, grouping, - (Py_UCS2 *) thousands_sep_data, thousands_sep_len); - break; - case PyUnicode_4BYTE_KIND: - len = ucs4lib_InsertThousandsGrouping( - (Py_UCS4 *) data, n_buffer, (Py_UCS4 *) digits, n_digits, - min_width, grouping, - (Py_UCS4 *) thousands_sep_data, thousands_sep_len); - break; - default: - assert(0); + if (digits != NULL) { + if (PyUnicode_READY(digits) == -1) { + return -1; + } + } + if (PyUnicode_READY(thousands_sep) == -1) { return -1; } - if (unicode != NULL && thousands_sep_kind != kind) { - if (thousands_sep_kind < kind) - PyMem_Free(thousands_sep_data); - else - PyMem_Free(data); + + Py_ssize_t count = 0; + Py_ssize_t n_zeros; + int loop_broken = 0; + int use_separator = 0; /* First time through, don't append the + separator. They only go between + groups. */ + Py_ssize_t buffer_pos; + Py_ssize_t digits_pos; + Py_ssize_t len; + Py_ssize_t n_chars; + Py_ssize_t remaining = n_digits; /* Number of chars remaining to + be looked at */ + /* A generator that returns all of the grouping widths, until it + returns 0. */ + GroupGenerator groupgen; + GroupGenerator_init(&groupgen, grouping); + const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep); + + /* if digits are not grouped, thousands separator + should be an empty string */ + assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0)); + + digits_pos = d_pos + n_digits; + if (writer) { + buffer_pos = writer->pos + n_buffer; + assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer)); + assert(digits_pos <= PyUnicode_GET_LENGTH(digits)); } - if (unicode == NULL) { + else { + buffer_pos = n_buffer; + } + + if (!writer) { *maxchar = 127; - if (len != n_digits) { - *maxchar = Py_MAX(*maxchar, - PyUnicode_MAX_CHAR_VALUE(thousands_sep)); + } + + while ((len = GroupGenerator_next(&groupgen)) > 0) { + len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1)); + n_zeros = Py_MAX(0, len - remaining); + n_chars = Py_MAX(0, Py_MIN(remaining, len)); + + /* Use n_zero zero's and n_chars chars */ + + /* Count only, don't do anything. */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + /* Copy into the writer. */ + InsertThousandsGrouping_fill(writer, &buffer_pos, + digits, &digits_pos, + n_chars, n_zeros, + use_separator ? thousands_sep : NULL, + thousands_sep_len, maxchar); + + /* Use a separator next time. */ + use_separator = 1; + + remaining -= n_chars; + min_width -= len; + + if (remaining <= 0 && min_width <= 0) { + loop_broken = 1; + break; } + min_width -= thousands_sep_len; + } + if (!loop_broken) { + /* We left the loop without using a break statement. */ + + len = Py_MAX(Py_MAX(remaining, min_width), 1); + n_zeros = Py_MAX(0, len - remaining); + n_chars = Py_MAX(0, Py_MIN(remaining, len)); + + /* Use n_zero zero's and n_chars chars */ + count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars; + + /* Copy into the writer. */ + InsertThousandsGrouping_fill(writer, &buffer_pos, + digits, &digits_pos, + n_chars, n_zeros, + use_separator ? thousands_sep : NULL, + thousands_sep_len, maxchar); } - return len; + return count; } @@ -10174,30 +10257,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject **items, Py_ssize_t seqlen) return NULL; } -#define FILL(kind, data, value, start, length) \ - do { \ - Py_ssize_t i_ = 0; \ - assert(kind != PyUnicode_WCHAR_KIND); \ - switch ((kind)) { \ - case PyUnicode_1BYTE_KIND: { \ - unsigned char * to_ = (unsigned char *)((data)) + (start); \ - memset(to_, (unsigned char)value, (length)); \ - break; \ - } \ - case PyUnicode_2BYTE_KIND: { \ - Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \ - for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ - break; \ - } \ - case PyUnicode_4BYTE_KIND: { \ - Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \ - for (; i_ < (length); ++i_, ++to_) *to_ = (value); \ - break; \ - } \ - default: assert(0); \ - } \ - } while (0) - void _PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length, Py_UCS4 fill_char) diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c index c9a2c99dd257..4a38f7a30509 100644 --- a/Python/formatter_unicode.c +++ b/Python/formatter_unicode.c @@ -462,7 +462,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end, /* not all fields of format are used. for example, precision is unused. should this take discrete params in order to be more clear about what it does? or is passing a single format parameter easier - and more efficient enough to justify a little obfuscation? */ + and more efficient enough to justify a little obfuscation? + Return -1 on error. */ static Py_ssize_t calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start, @@ -541,9 +542,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix, Py_UCS4 grouping_maxchar; spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping( NULL, 0, - 0, NULL, - spec->n_digits, spec->n_min_width, + NULL, 0, spec->n_digits, + spec->n_min_width, locale->grouping, locale->thousands_sep, &grouping_maxchar); + if (spec->n_grouped_digits == -1) { + return -1; + } *maxchar = Py_MAX(*maxchar, grouping_maxchar); } @@ -637,26 +641,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec, /* Only for type 'c' special case, it has no digits. */ if (spec->n_digits != 0) { /* Fill the digits with InsertThousandsGrouping. */ - char *pdigits; - if (PyUnicode_READY(digits)) - return -1; - pdigits = PyUnicode_DATA(digits); - if (PyUnicode_KIND(digits) < kind) { - pdigits = _PyUnicode_AsKind(digits, kind); - if (pdigits == NULL) - return -1; - } r = _PyUnicode_InsertThousandsGrouping( - writer->buffer, writer->pos, - spec->n_grouped_digits, - pdigits + kind * d_pos, - spec->n_digits, spec->n_min_width, + writer, spec->n_grouped_digits, + digits, d_pos, spec->n_digits, + spec->n_min_width, locale->grouping, locale->thousands_sep, NULL); if (r == -1) return -1; assert(r == spec->n_grouped_digits); - if (PyUnicode_KIND(digits) < kind) - PyMem_Free(pdigits); d_pos += spec->n_digits; } if (toupper) { @@ -996,6 +988,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format, n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars, inumeric_chars + n_digits, n_remainder, 0, &locale, format, &maxchar); + if (n_total == -1) { + goto done; + } /* Allocate the memory. */ if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) @@ -1141,6 +1136,9 @@ format_float_internal(PyObject *value, n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index, index + n_digits, n_remainder, has_decimal, &locale, format, &maxchar); + if (n_total == -1) { + goto done; + } /* Allocate the memory. */ if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1) @@ -1324,6 +1322,9 @@ format_complex_internal(PyObject *value, i_re, i_re + n_re_digits, n_re_remainder, re_has_decimal, &locale, &tmp_format, &maxchar); + if (n_re_total == -1) { + goto done; + } /* Same formatting, but always include a sign, unless the real part is * going to be omitted, in which case we use whatever sign convention was @@ -1334,6 +1335,9 @@ format_complex_internal(PyObject *value, i_im, i_im + n_im_digits, n_im_remainder, im_has_decimal, &locale, &tmp_format, &maxchar); + if (n_im_total == -1) { + goto done; + } if (skip_re) n_re_total = 0;

1 0

pythoninfo: log more environment variable (GH-10719)
by Victor Stinner 26 Nov '18

26 Nov '18

https://github.com/python/cpython/commit/282c03d45d2d766c55904a4eb766923a2c… commit: 282c03d45d2d766c55904a4eb766923a2c459124 branch: master author: Victor Stinner <vstinner(a)redhat.com> committer: GitHub <noreply(a)github.com> date: 2018-11-26T17:03:16+01:00 summary: pythoninfo: log more environment variable (GH-10719) Log TZ to debug a timezone issue... and a few more :-) files: M Lib/test/pythoninfo.py diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py index 9257fdf332a6..30e6f21c2b48 100644 --- a/Lib/test/pythoninfo.py +++ b/Lib/test/pythoninfo.py @@ -199,32 +199,73 @@ def format_groups(groups): call_func(info_add, 'os.cpu_count', os, 'cpu_count') call_func(info_add, 'os.loadavg', os, 'getloadavg') - # Get environment variables: filter to list - # to not leak sensitive information - ENV_VARS = ( + # Environment variables used by the stdlib and tests. Don't log the full + # environment: filter to list to not leak sensitive information. + # + # HTTP_PROXY is not logged because it can contain a password. + ENV_VARS = frozenset(( + "APPDATA", + "AR", + "ARCHFLAGS", + "ARFLAGS", + "AUDIODEV", "CC", + "CFLAGS", + "COLUMNS", + "COMPUTERNAME", "COMSPEC", + "CPP", + "CPPFLAGS", "DISPLAY", + "DISTUTILS_DEBUG", "DISTUTILS_USE_SDK", "DYLD_LIBRARY_PATH", + "ENSUREPIP_OPTIONS", + "HISTORY_FILE", "HOME", "HOMEDRIVE", "HOMEPATH", + "IDLESTARTUP", "LANG", + "LDFLAGS", + "LDSHARED", "LD_LIBRARY_PATH", + "LINES", "MACOSX_DEPLOYMENT_TARGET", + "MAILCAPS", "MAKEFLAGS", + "MIXERDEV", "MSSDK", "PATH", + "PATHEXT", + "PIP_CONFIG_FILE", + "PLAT", + "POSIXLY_CORRECT", + "PY_SAX_PARSER", + "ProgramFiles", + "ProgramFiles(x86)", + "RUNNING_ON_VALGRIND", "SDK_TOOLS_BIN", + "SERVER_SOFTWARE", "SHELL", + "SOURCE_DATE_EPOCH", + "SYSTEMROOT", "TEMP", "TERM", + "TILE_LIBRARY", + "TIX_LIBRARY", "TMP", "TMPDIR", + "TZ", "USERPROFILE", + "VIRTUAL_ENV", "WAYLAND_DISPLAY", - ) + "WINDIR", + "_PYTHON_HOST_PLATFORM", + "_PYTHON_PROJECT_BASE", + "_PYTHON_SYSCONFIGDATA_NAME", + "__PYVENV_LAUNCHER__", + )) for name, value in os.environ.items(): uname = name.upper() if (uname in ENV_VARS

1 0