Python-checkins
Threads by month
- ----- 2024 -----
- July
- June
- May
- April
- March
- February
- January
- ----- 2023 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2022 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2021 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2020 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2019 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2018 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2017 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2016 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2015 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2014 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2013 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2012 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2011 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2010 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2009 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2008 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2007 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2006 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2005 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2004 -----
- December
- November
- October
- September
- August
- July
- June
- May
- April
- March
- February
- January
- ----- 2003 -----
- December
- November
- October
- September
- August
November 2018
- 2 participants
- 507 discussions
![](https://secure.gravatar.com/avatar/cc7737cd64a84f1b5c61a160798e97ee.jpg?s=120&d=mm&r=g)
26 Nov '18
https://github.com/python/cpython/commit/5350dd1b50e60882a2da6d53ed27e02d2b…
commit: 5350dd1b50e60882a2da6d53ed27e02d2b698f2e
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
committer: GitHub <noreply(a)github.com>
date: 2018-11-26T12:36:54-08:00
summary:
bpo-33723: Fix test_time.test_thread_time() (GH-10724)
Tolerate up to 30 ms, instead of 15 min, in other threads.
(cherry picked from commit 65c216e74f7957006ef7653b7e2afe83007c45ce)
Co-authored-by: Victor Stinner <vstinner(a)redhat.com>
files:
M Lib/test/test_time.py
diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py
index 62abd891aafa..b9c678640808 100644
--- a/Lib/test/test_time.py
+++ b/Lib/test/test_time.py
@@ -497,7 +497,8 @@ def test_process_time(self):
self.assertLess(stop - start, 0.020)
# bpo-33723: A busy loop of 100 ms should increase process_time()
- # by at least 15 ms
+ # by at least 15 ms. Tolerate 15 ms because of the bad resolution of
+ # the clock on Windows (around 15.6 ms).
min_time = 0.015
busy_time = 0.100
@@ -535,8 +536,11 @@ def test_thread_time(self):
self.assertLess(stop - start, 0.020)
# bpo-33723: A busy loop of 100 ms should increase thread_time()
- # by at least 15 ms
+ # by at least 15 ms, but less than 30 ms in other threads.
+ # Tolerate 15 and 30 ms because of the bad resolution
+ # of the clock on Windows (around 15.6 ms).
min_time = 0.015
+ max_time = 0.030
busy_time = 0.100
# thread_time() should include CPU time spent in current thread...
@@ -551,7 +555,7 @@ def test_thread_time(self):
t.start()
t.join()
stop = time.thread_time()
- self.assertLess(stop - start, min_time)
+ self.assertLess(stop - start, max_time)
info = time.get_clock_info('thread_time')
self.assertTrue(info.monotonic)
1
0
https://github.com/python/cpython/commit/65c216e74f7957006ef7653b7e2afe8300…
commit: 65c216e74f7957006ef7653b7e2afe83007c45ce
branch: master
author: Victor Stinner <vstinner(a)redhat.com>
committer: GitHub <noreply(a)github.com>
date: 2018-11-26T21:19:29+01:00
summary:
bpo-33723: Fix test_time.test_thread_time() (GH-10724)
Tolerate up to 30 ms, instead of 15 min, in other threads.
files:
M Lib/test/test_time.py
diff --git a/Lib/test/test_time.py b/Lib/test/test_time.py
index 16b48a92f327..381cc823014e 100644
--- a/Lib/test/test_time.py
+++ b/Lib/test/test_time.py
@@ -496,7 +496,8 @@ def test_process_time(self):
self.assertLess(stop - start, 0.020)
# bpo-33723: A busy loop of 100 ms should increase process_time()
- # by at least 15 ms
+ # by at least 15 ms. Tolerate 15 ms because of the bad resolution of
+ # the clock on Windows (around 15.6 ms).
min_time = 0.015
busy_time = 0.100
@@ -534,8 +535,11 @@ def test_thread_time(self):
self.assertLess(stop - start, 0.020)
# bpo-33723: A busy loop of 100 ms should increase thread_time()
- # by at least 15 ms
+ # by at least 15 ms, but less than 30 ms in other threads.
+ # Tolerate 15 and 30 ms because of the bad resolution
+ # of the clock on Windows (around 15.6 ms).
min_time = 0.015
+ max_time = 0.030
busy_time = 0.100
# thread_time() should include CPU time spent in current thread...
@@ -550,7 +554,7 @@ def test_thread_time(self):
t.start()
t.join()
stop = time.thread_time()
- self.assertLess(stop - start, min_time)
+ self.assertLess(stop - start, max_time)
info = time.get_clock_info('thread_time')
self.assertTrue(info.monotonic)
1
0
![](https://secure.gravatar.com/avatar/cc7737cd64a84f1b5c61a160798e97ee.jpg?s=120&d=mm&r=g)
26 Nov '18
https://github.com/python/cpython/commit/433433fa6d55091600ce88dd19206b3902…
commit: 433433fa6d55091600ce88dd19206b3902e0a87d
branch: master
author: Lisa Roach <lisaroach14(a)gmail.com>
committer: GitHub <noreply(a)github.com>
date: 2018-11-26T10:43:38-08:00
summary:
Adds IPv6 support when invoking http.server directly. (GH-10595)
files:
A Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst
M Doc/library/http.server.rst
M Lib/http/server.py
M Lib/test/test_httpservers.py
diff --git a/Doc/library/http.server.rst b/Doc/library/http.server.rst
index 29f6e7da3bde..a367e373dc3c 100644
--- a/Doc/library/http.server.rst
+++ b/Doc/library/http.server.rst
@@ -410,14 +410,18 @@ the previous example, this serves files relative to the current directory::
python -m http.server 8000
By default, server binds itself to all interfaces. The option ``-b/--bind``
-specifies a specific address to which it should bind. For example, the
-following command causes the server to bind to localhost only::
+specifies a specific address to which it should bind. Both IPv4 and IPv6
+addresses are supported. For example, the following command causes the server
+to bind to localhost only::
python -m http.server 8000 --bind 127.0.0.1
.. versionadded:: 3.4
``--bind`` argument was introduced.
+.. versionadded:: 3.8
+ ``--bind`` argument enhanced to support IPv6
+
By default, server uses the current directory. The option ``-d/--directory``
specifies a directory to which it should serve the files. For example,
the following command uses a specific directory::
diff --git a/Lib/http/server.py b/Lib/http/server.py
index ca2dd507392c..22d865f2fdfa 100644
--- a/Lib/http/server.py
+++ b/Lib/http/server.py
@@ -1226,6 +1226,9 @@ def test(HandlerClass=BaseHTTPRequestHandler,
"""
server_address = (bind, port)
+ if ':' in bind:
+ ServerClass.address_family = socket.AF_INET6
+
HandlerClass.protocol_version = protocol
with ServerClass(server_address, HandlerClass) as httpd:
sa = httpd.socket.getsockname()
diff --git a/Lib/test/test_httpservers.py b/Lib/test/test_httpservers.py
index cc829a522b88..3d8e0af8b45c 100644
--- a/Lib/test/test_httpservers.py
+++ b/Lib/test/test_httpservers.py
@@ -9,6 +9,7 @@
from http import server, HTTPStatus
import os
+import socket
import sys
import re
import base64
@@ -1116,6 +1117,24 @@ def test_all(self):
self.assertCountEqual(server.__all__, expected)
+class ScriptTestCase(unittest.TestCase):
+ @mock.patch('builtins.print')
+ def test_server_test_ipv6(self, _):
+ mock_server = mock.MagicMock()
+ server.test(ServerClass=mock_server, bind="::")
+ self.assertEqual(mock_server.address_family, socket.AF_INET6)
+
+ mock_server.reset_mock()
+ server.test(ServerClass=mock_server,
+ bind="2001:0db8:85a3:0000:0000:8a2e:0370:7334")
+ self.assertEqual(mock_server.address_family, socket.AF_INET6)
+
+ mock_server.reset_mock()
+ server.test(ServerClass=mock_server,
+ bind="::1")
+ self.assertEqual(mock_server.address_family, socket.AF_INET6)
+
+
def test_main(verbose=None):
cwd = os.getcwd()
try:
@@ -1127,6 +1146,7 @@ def test_main(verbose=None):
CGIHTTPServerTestCase,
SimpleHTTPRequestHandlerTestCase,
MiscTestCase,
+ ScriptTestCase
)
finally:
os.chdir(cwd)
diff --git a/Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst b/Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst
new file mode 100644
index 000000000000..88749e920c2c
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-11-18-18-44-40.bpo-24209.p3YWOf.rst
@@ -0,0 +1 @@
+Adds IPv6 support when invoking http.server directly.
1
0
![](https://secure.gravatar.com/avatar/cc7737cd64a84f1b5c61a160798e97ee.jpg?s=120&d=mm&r=g)
[3.6] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10725)
by Miss Islington (bot) 26 Nov '18
by Miss Islington (bot) 26 Nov '18
26 Nov '18
https://github.com/python/cpython/commit/7f93f93fa68f54c1bc128d58fa17536245…
commit: 7f93f93fa68f54c1bc128d58fa17536245396de3
branch: 3.6
author: Julien Palard <julien(a)palard.fr>
committer: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
date: 2018-11-26T09:16:13-08:00
summary:
[3.6] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10725)
(cherry picked from commit 5719f275b7153a00a800f5481271a6fc26659c65)
Co-authored-by: Mathieu Dupuy <deronnax(a)users.noreply.github.com>
https://bugs.python.org/issue35255
files:
M Doc/faq/windows.rst
diff --git a/Doc/faq/windows.rst b/Doc/faq/windows.rst
index bcfd7f4f1fde..772b8c2012c3 100644
--- a/Doc/faq/windows.rst
+++ b/Doc/faq/windows.rst
@@ -301,14 +301,3 @@ In 2.7 and 3.2, :func:`os.kill` is implemented similar to the above function,
with the additional feature of being able to send :kbd:`Ctrl+C` and :kbd:`Ctrl+Break`
to console subprocesses which are designed to handle those signals. See
:func:`os.kill` for further details.
-
-How do I extract the downloaded documentation on Windows?
----------------------------------------------------------
-
-Sometimes, when you download the documentation package to a Windows machine
-using a web browser, the file extension of the saved file ends up being .EXE.
-This is a mistake; the extension should be .TGZ.
-
-Simply rename the downloaded file to have the .TGZ extension, and WinZip will be
-able to handle it. (If your copy of WinZip doesn't, get a newer one from
-https://www.winzip.com.)
1
0
![](https://secure.gravatar.com/avatar/cc7737cd64a84f1b5c61a160798e97ee.jpg?s=120&d=mm&r=g)
[3.7] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10722)
by Miss Islington (bot) 26 Nov '18
by Miss Islington (bot) 26 Nov '18
26 Nov '18
https://github.com/python/cpython/commit/6f8cab0db044132c6192d6ac1536d28cf7…
commit: 6f8cab0db044132c6192d6ac1536d28cf72c9d27
branch: 3.7
author: Julien Palard <julien(a)palard.fr>
committer: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com>
date: 2018-11-26T09:05:48-08:00
summary:
[3.7] bpo-35255: Doc: Delete now useless Windows FAQ section (GH-10557) (GH-10722)
(cherry picked from commit 5719f275b7153a00a800f5481271a6fc26659c65)
Co-authored-by: Mathieu Dupuy <deronnax(a)users.noreply.github.com>
https://bugs.python.org/issue35255
files:
M Doc/faq/windows.rst
diff --git a/Doc/faq/windows.rst b/Doc/faq/windows.rst
index d063e7c550c0..9292a2407a75 100644
--- a/Doc/faq/windows.rst
+++ b/Doc/faq/windows.rst
@@ -301,14 +301,3 @@ In 2.7 and 3.2, :func:`os.kill` is implemented similar to the above function,
with the additional feature of being able to send :kbd:`Ctrl+C` and :kbd:`Ctrl+Break`
to console subprocesses which are designed to handle those signals. See
:func:`os.kill` for further details.
-
-How do I extract the downloaded documentation on Windows?
----------------------------------------------------------
-
-Sometimes, when you download the documentation package to a Windows machine
-using a web browser, the file extension of the saved file ends up being .EXE.
-This is a mistake; the extension should be .TGZ.
-
-Simply rename the downloaded file to have the .TGZ extension, and WinZip will be
-able to handle it. (If your copy of WinZip doesn't, get a newer one from
-https://www.winzip.com.)
1
0
![](https://secure.gravatar.com/avatar/cc7737cd64a84f1b5c61a160798e97ee.jpg?s=120&d=mm&r=g)
26 Nov '18
https://github.com/python/cpython/commit/75e4699b31d1d88abad097ad13466c5c07…
commit: 75e4699b31d1d88abad097ad13466c5c07711324
branch: master
author: Victor Stinner <vstinner(a)redhat.com>
committer: GitHub <noreply(a)github.com>
date: 2018-11-26T17:29:38+01:00
summary:
bpo-35134: Create Include/cpython/unicodeobject.h (GH-10680)
Move unicodeobject.h code surrounded by "#ifndef Py_LIMITED_API"
to a new Include/cpython/unicodeobject.h header file.
files:
A Include/cpython/unicodeobject.h
M Include/unicodeobject.h
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
new file mode 100644
index 000000000000..c76349022485
--- /dev/null
+++ b/Include/cpython/unicodeobject.h
@@ -0,0 +1,1245 @@
+#ifndef Py_CPYTHON_UNICODEOBJECT_H
+# error "this header file must not be included directly"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Py_UNICODE was the native Unicode storage format (code unit) used by
+ Python and represents a single Unicode element in the Unicode type.
+ With PEP 393, Py_UNICODE is deprecated and replaced with a
+ typedef to wchar_t. */
+#define PY_UNICODE_TYPE wchar_t
+typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */;
+
+/* --- Internal Unicode Operations ---------------------------------------- */
+
+/* Since splitting on whitespace is an important use case, and
+ whitespace in most situations is solely ASCII whitespace, we
+ optimize for the common case by using a quick look-up table
+ _Py_ascii_whitespace (see below) with an inlined check.
+
+ */
+#define Py_UNICODE_ISSPACE(ch) \
+ ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
+
+#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
+#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
+#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
+#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
+
+#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
+#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
+#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
+
+#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
+#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
+#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
+#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
+
+#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
+#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
+#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
+
+#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
+
+#define Py_UNICODE_ISALNUM(ch) \
+ (Py_UNICODE_ISALPHA(ch) || \
+ Py_UNICODE_ISDECIMAL(ch) || \
+ Py_UNICODE_ISDIGIT(ch) || \
+ Py_UNICODE_ISNUMERIC(ch))
+
+#define Py_UNICODE_COPY(target, source, length) \
+ memcpy((target), (source), (length)*sizeof(Py_UNICODE))
+
+#define Py_UNICODE_FILL(target, value, length) \
+ do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
+ for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
+ } while (0)
+
+/* macros to work with surrogates */
+#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
+#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
+#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
+/* Join two surrogate characters and return a single Py_UCS4 value. */
+#define Py_UNICODE_JOIN_SURROGATES(high, low) \
+ (((((Py_UCS4)(high) & 0x03FF) << 10) | \
+ ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
+/* high surrogate = top 10 bits added to D800 */
+#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
+/* low surrogate = bottom 10 bits added to DC00 */
+#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
+
+/* Check if substring matches at given offset. The offset must be
+ valid, and the substring must not be empty. */
+
+#define Py_UNICODE_MATCH(string, offset, substring) \
+ ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
+ ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
+ !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
+
+/* --- Unicode Type ------------------------------------------------------- */
+
+/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
+ structure. state.ascii and state.compact are set, and the data
+ immediately follow the structure. utf8_length and wstr_length can be found
+ in the length field; the utf8 pointer is equal to the data pointer. */
+typedef struct {
+ /* There are 4 forms of Unicode strings:
+
+ - compact ascii:
+
+ * structure = PyASCIIObject
+ * test: PyUnicode_IS_COMPACT_ASCII(op)
+ * kind = PyUnicode_1BYTE_KIND
+ * compact = 1
+ * ascii = 1
+ * ready = 1
+ * (length is the length of the utf8 and wstr strings)
+ * (data starts just after the structure)
+ * (since ASCII is decoded from UTF-8, the utf8 string are the data)
+
+ - compact:
+
+ * structure = PyCompactUnicodeObject
+ * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
+ * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+ PyUnicode_4BYTE_KIND
+ * compact = 1
+ * ready = 1
+ * ascii = 0
+ * utf8 is not shared with data
+ * utf8_length = 0 if utf8 is NULL
+ * wstr is shared with data and wstr_length=length
+ if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+ or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
+ * wstr_length = 0 if wstr is NULL
+ * (data starts just after the structure)
+
+ - legacy string, not ready:
+
+ * structure = PyUnicodeObject
+ * test: kind == PyUnicode_WCHAR_KIND
+ * length = 0 (use wstr_length)
+ * hash = -1
+ * kind = PyUnicode_WCHAR_KIND
+ * compact = 0
+ * ascii = 0
+ * ready = 0
+ * interned = SSTATE_NOT_INTERNED
+ * wstr is not NULL
+ * data.any is NULL
+ * utf8 is NULL
+ * utf8_length = 0
+
+ - legacy string, ready:
+
+ * structure = PyUnicodeObject structure
+ * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
+ * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
+ PyUnicode_4BYTE_KIND
+ * compact = 0
+ * ready = 1
+ * data.any is not NULL
+ * utf8 is shared and utf8_length = length with data.any if ascii = 1
+ * utf8_length = 0 if utf8 is NULL
+ * wstr is shared with data.any and wstr_length = length
+ if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
+ or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
+ * wstr_length = 0 if wstr is NULL
+
+ Compact strings use only one memory block (structure + characters),
+ whereas legacy strings use one block for the structure and one block
+ for characters.
+
+ Legacy strings are created by PyUnicode_FromUnicode() and
+ PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
+ when PyUnicode_READY() is called.
+
+ See also _PyUnicode_CheckConsistency().
+ */
+ PyObject_HEAD
+ Py_ssize_t length; /* Number of code points in the string */
+ Py_hash_t hash; /* Hash value; -1 if not set */
+ struct {
+ /*
+ SSTATE_NOT_INTERNED (0)
+ SSTATE_INTERNED_MORTAL (1)
+ SSTATE_INTERNED_IMMORTAL (2)
+
+ If interned != SSTATE_NOT_INTERNED, the two references from the
+ dictionary to this object are *not* counted in ob_refcnt.
+ */
+ unsigned int interned:2;
+ /* Character size:
+
+ - PyUnicode_WCHAR_KIND (0):
+
+ * character type = wchar_t (16 or 32 bits, depending on the
+ platform)
+
+ - PyUnicode_1BYTE_KIND (1):
+
+ * character type = Py_UCS1 (8 bits, unsigned)
+ * all characters are in the range U+0000-U+00FF (latin1)
+ * if ascii is set, all characters are in the range U+0000-U+007F
+ (ASCII), otherwise at least one character is in the range
+ U+0080-U+00FF
+
+ - PyUnicode_2BYTE_KIND (2):
+
+ * character type = Py_UCS2 (16 bits, unsigned)
+ * all characters are in the range U+0000-U+FFFF (BMP)
+ * at least one character is in the range U+0100-U+FFFF
+
+ - PyUnicode_4BYTE_KIND (4):
+
+ * character type = Py_UCS4 (32 bits, unsigned)
+ * all characters are in the range U+0000-U+10FFFF
+ * at least one character is in the range U+10000-U+10FFFF
+ */
+ unsigned int kind:3;
+ /* Compact is with respect to the allocation scheme. Compact unicode
+ objects only require one memory block while non-compact objects use
+ one block for the PyUnicodeObject struct and another for its data
+ buffer. */
+ unsigned int compact:1;
+ /* The string only contains characters in the range U+0000-U+007F (ASCII)
+ and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
+ set, use the PyASCIIObject structure. */
+ unsigned int ascii:1;
+ /* The ready flag indicates whether the object layout is initialized
+ completely. This means that this is either a compact object, or
+ the data pointer is filled out. The bit is redundant, and helps
+ to minimize the test in PyUnicode_IS_READY(). */
+ unsigned int ready:1;
+ /* Padding to ensure that PyUnicode_DATA() is always aligned to
+ 4 bytes (see issue #19537 on m68k). */
+ unsigned int :24;
+ } state;
+ wchar_t *wstr; /* wchar_t representation (null-terminated) */
+} PyASCIIObject;
+
+/* Non-ASCII strings allocated through PyUnicode_New use the
+ PyCompactUnicodeObject structure. state.compact is set, and the data
+ immediately follow the structure. */
+typedef struct {
+ PyASCIIObject _base;
+ Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
+ * terminating \0. */
+ char *utf8; /* UTF-8 representation (null-terminated) */
+ Py_ssize_t wstr_length; /* Number of code points in wstr, possible
+ * surrogates count as two code points. */
+} PyCompactUnicodeObject;
+
+/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
+ PyUnicodeObject structure. The actual string data is initially in the wstr
+ block, and copied into the data block using _PyUnicode_Ready. */
+typedef struct {
+ PyCompactUnicodeObject _base;
+ union {
+ void *any;
+ Py_UCS1 *latin1;
+ Py_UCS2 *ucs2;
+ Py_UCS4 *ucs4;
+ } data; /* Canonical, smallest-form Unicode buffer */
+} PyUnicodeObject;
+
+/* Fast access macros */
+#define PyUnicode_WSTR_LENGTH(op) \
+ (PyUnicode_IS_COMPACT_ASCII(op) ? \
+ ((PyASCIIObject*)op)->length : \
+ ((PyCompactUnicodeObject*)op)->wstr_length)
+
+/* Returns the deprecated Py_UNICODE representation's size in code units
+ (this includes surrogate pairs as 2 units).
+ If the Py_UNICODE representation is not available, it will be computed
+ on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
+
+#define PyUnicode_GET_SIZE(op) \
+ (assert(PyUnicode_Check(op)), \
+ (((PyASCIIObject *)(op))->wstr) ? \
+ PyUnicode_WSTR_LENGTH(op) : \
+ ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
+ assert(((PyASCIIObject *)(op))->wstr), \
+ PyUnicode_WSTR_LENGTH(op)))
+ /* Py_DEPRECATED(3.3) */
+
+#define PyUnicode_GET_DATA_SIZE(op) \
+ (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
+ /* Py_DEPRECATED(3.3) */
+
+/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
+ representation on demand. Using this macro is very inefficient now,
+ try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
+ use PyUnicode_WRITE() and PyUnicode_READ(). */
+
+#define PyUnicode_AS_UNICODE(op) \
+ (assert(PyUnicode_Check(op)), \
+ (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
+ PyUnicode_AsUnicode(_PyObject_CAST(op)))
+ /* Py_DEPRECATED(3.3) */
+
+#define PyUnicode_AS_DATA(op) \
+ ((const char *)(PyUnicode_AS_UNICODE(op)))
+ /* Py_DEPRECATED(3.3) */
+
+
+/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
+
+/* Values for PyASCIIObject.state: */
+
+/* Interning state. */
+#define SSTATE_NOT_INTERNED 0
+#define SSTATE_INTERNED_MORTAL 1
+#define SSTATE_INTERNED_IMMORTAL 2
+
+/* Return true if the string contains only ASCII characters, or 0 if not. The
+ string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
+ ready. */
+#define PyUnicode_IS_ASCII(op) \
+ (assert(PyUnicode_Check(op)), \
+ assert(PyUnicode_IS_READY(op)), \
+ ((PyASCIIObject*)op)->state.ascii)
+
+/* Return true if the string is compact or 0 if not.
+ No type checks or Ready calls are performed. */
+#define PyUnicode_IS_COMPACT(op) \
+ (((PyASCIIObject*)(op))->state.compact)
+
+/* Return true if the string is a compact ASCII string (use PyASCIIObject
+ structure), or 0 if not. No type checks or Ready calls are performed. */
+#define PyUnicode_IS_COMPACT_ASCII(op) \
+ (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
+
+enum PyUnicode_Kind {
+/* String contains only wstr byte characters. This is only possible
+ when the string was created with a legacy API and _PyUnicode_Ready()
+ has not been called yet. */
+ PyUnicode_WCHAR_KIND = 0,
+/* Return values of the PyUnicode_KIND() macro: */
+ PyUnicode_1BYTE_KIND = 1,
+ PyUnicode_2BYTE_KIND = 2,
+ PyUnicode_4BYTE_KIND = 4
+};
+
+/* Return pointers to the canonical representation cast to unsigned char,
+ Py_UCS2, or Py_UCS4 for direct character access.
+ No checks are performed, use PyUnicode_KIND() before to ensure
+ these will work correctly. */
+
+#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
+#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
+#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
+
+/* Return one of the PyUnicode_*_KIND values defined above. */
+#define PyUnicode_KIND(op) \
+ (assert(PyUnicode_Check(op)), \
+ assert(PyUnicode_IS_READY(op)), \
+ ((PyASCIIObject *)(op))->state.kind)
+
+/* Return a void pointer to the raw unicode buffer. */
+#define _PyUnicode_COMPACT_DATA(op) \
+ (PyUnicode_IS_ASCII(op) ? \
+ ((void*)((PyASCIIObject*)(op) + 1)) : \
+ ((void*)((PyCompactUnicodeObject*)(op) + 1)))
+
+#define _PyUnicode_NONCOMPACT_DATA(op) \
+ (assert(((PyUnicodeObject*)(op))->data.any), \
+ ((((PyUnicodeObject *)(op))->data.any)))
+
+#define PyUnicode_DATA(op) \
+ (assert(PyUnicode_Check(op)), \
+ PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
+ _PyUnicode_NONCOMPACT_DATA(op))
+
+/* In the access macros below, "kind" may be evaluated more than once.
+ All other macro parameters are evaluated exactly once, so it is safe
+ to put side effects into them (such as increasing the index). */
+
+/* Write into the canonical representation, this macro does not do any sanity
+ checks and is intended for usage in loops. The caller should cache the
+ kind and data pointers obtained from other macro calls.
+ index is the index in the string (starts at 0) and value is the new
+ code point value which should be written to that location. */
+#define PyUnicode_WRITE(kind, data, index, value) \
+ do { \
+ switch ((kind)) { \
+ case PyUnicode_1BYTE_KIND: { \
+ ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
+ break; \
+ } \
+ case PyUnicode_2BYTE_KIND: { \
+ ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
+ break; \
+ } \
+ default: { \
+ assert((kind) == PyUnicode_4BYTE_KIND); \
+ ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
+ } \
+ } \
+ } while (0)
+
+/* Read a code point from the string's canonical representation. No checks
+ or ready calls are performed. */
+#define PyUnicode_READ(kind, data, index) \
+ ((Py_UCS4) \
+ ((kind) == PyUnicode_1BYTE_KIND ? \
+ ((const Py_UCS1 *)(data))[(index)] : \
+ ((kind) == PyUnicode_2BYTE_KIND ? \
+ ((const Py_UCS2 *)(data))[(index)] : \
+ ((const Py_UCS4 *)(data))[(index)] \
+ ) \
+ ))
+
+/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
+ calls PyUnicode_KIND() and might call it twice. For single reads, use
+ PyUnicode_READ_CHAR, for multiple consecutive reads callers should
+ cache kind and use PyUnicode_READ instead. */
+#define PyUnicode_READ_CHAR(unicode, index) \
+ (assert(PyUnicode_Check(unicode)), \
+ assert(PyUnicode_IS_READY(unicode)), \
+ (Py_UCS4) \
+ (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
+ ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
+ (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
+ ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
+ ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
+ ) \
+ ))
+
+/* Returns the length of the unicode string. The caller has to make sure that
+ the string has it's canonical representation set before calling
+ this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
+#define PyUnicode_GET_LENGTH(op) \
+ (assert(PyUnicode_Check(op)), \
+ assert(PyUnicode_IS_READY(op)), \
+ ((PyASCIIObject *)(op))->length)
+
+
+/* Fast check to determine whether an object is ready. Equivalent to
+ PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
+
+#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
+
+/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
+ case. If the canonical representation is not yet set, it will still call
+ _PyUnicode_Ready().
+ Returns 0 on success and -1 on errors. */
+#define PyUnicode_READY(op) \
+ (assert(PyUnicode_Check(op)), \
+ (PyUnicode_IS_READY(op) ? \
+ 0 : _PyUnicode_Ready(_PyObject_CAST(op))))
+
+/* Return a maximum character value which is suitable for creating another
+ string based on op. This is always an approximation but more efficient
+ than iterating over the string. */
+#define PyUnicode_MAX_CHAR_VALUE(op) \
+ (assert(PyUnicode_IS_READY(op)), \
+ (PyUnicode_IS_ASCII(op) ? \
+ (0x7f) : \
+ (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
+ (0xffU) : \
+ (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
+ (0xffffU) : \
+ (0x10ffffU)))))
+
+/* === Public API ========================================================= */
+
+/* --- Plain Py_UNICODE --------------------------------------------------- */
+
+/* With PEP 393, this is the recommended way to allocate a new unicode object.
+ This function will allocate the object and its buffer in a single memory
+ block. Objects created using this function are not resizable. */
+PyAPI_FUNC(PyObject*) PyUnicode_New(
+ Py_ssize_t size, /* Number of code points in the new string */
+ Py_UCS4 maxchar /* maximum code point value in the string */
+ );
+
+/* Initializes the canonical string representation from the deprecated
+ wstr/Py_UNICODE representation. This function is used to convert Unicode
+ objects which were created using the old API to the new flexible format
+ introduced with PEP 393.
+
+ Don't call this function directly, use the public PyUnicode_READY() macro
+ instead. */
+PyAPI_FUNC(int) _PyUnicode_Ready(
+ PyObject *unicode /* Unicode object */
+ );
+
+/* Get a copy of a Unicode string. */
+PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
+ PyObject *unicode
+ );
+
+/* Copy character from one unicode object into another, this function performs
+ character conversion when necessary and falls back to memcpy() if possible.
+
+ Fail if to is too small (smaller than *how_many* or smaller than
+ len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
+ kind(to), or if *to* has more than 1 reference.
+
+ Return the number of written character, or return -1 and raise an exception
+ on error.
+
+ Pseudo-code:
+
+ how_many = min(how_many, len(from) - from_start)
+ to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
+ return how_many
+
+ Note: The function doesn't write a terminating null character.
+ */
+PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
+ PyObject *to,
+ Py_ssize_t to_start,
+ PyObject *from,
+ Py_ssize_t from_start,
+ Py_ssize_t how_many
+ );
+
+/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
+ may crash if parameters are invalid (e.g. if the output string
+ is too short). */
+PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
+ PyObject *to,
+ Py_ssize_t to_start,
+ PyObject *from,
+ Py_ssize_t from_start,
+ Py_ssize_t how_many
+ );
+
+/* Fill a string with a character: write fill_char into
+ unicode[start:start+length].
+
+ Fail if fill_char is bigger than the string maximum character, or if the
+ string has more than 1 reference.
+
+ Return the number of written character, or return -1 and raise an exception
+ on error. */
+PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t length,
+ Py_UCS4 fill_char
+ );
+
+/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
+ if parameters are invalid (e.g. if length is longer than the string). */
+PyAPI_FUNC(void) _PyUnicode_FastFill(
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t length,
+ Py_UCS4 fill_char
+ );
+
+/* Create a Unicode Object from the Py_UNICODE buffer u of the given
+ size.
+
+ u may be NULL which causes the contents to be undefined. It is the
+ user's responsibility to fill in the needed data afterwards. Note
+ that modifying the Unicode object contents after construction is
+ only allowed if u was set to NULL.
+
+ The buffer is copied into the new object. */
+PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
+ const Py_UNICODE *u, /* Unicode buffer */
+ Py_ssize_t size /* size of buffer */
+ ) /* Py_DEPRECATED(3.3) */;
+
+/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
+ Scan the string to find the maximum character. */
+PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
+ int kind,
+ const void *buffer,
+ Py_ssize_t size);
+
+/* Create a new string from a buffer of ASCII characters.
+ WARNING: Don't check if the string contains any non-ASCII character. */
+PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
+ const char *buffer,
+ Py_ssize_t size);
+
+/* Compute the maximum character of the substring unicode[start:end].
+ Return 127 for an empty string. */
+PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
+ PyObject *unicode,
+ Py_ssize_t start,
+ Py_ssize_t end);
+
+/* Return a read-only pointer to the Unicode object's internal
+ Py_UNICODE buffer.
+ If the wchar_t/Py_UNICODE representation is not yet available, this
+ function will calculate it. */
+PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
+ PyObject *unicode /* Unicode object */
+ ) /* Py_DEPRECATED(3.3) */;
+
+/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
+ contains null characters. */
+PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
+ PyObject *unicode /* Unicode object */
+ );
+
+/* Return a read-only pointer to the Unicode object's internal
+ Py_UNICODE buffer and save the length at size.
+ If the wchar_t/Py_UNICODE representation is not yet available, this
+ function will calculate it. */
+
+PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
+ PyObject *unicode, /* Unicode object */
+ Py_ssize_t *size /* location where to save the length */
+ ) /* Py_DEPRECATED(3.3) */;
+
+/* Get the maximum ordinal for a Unicode character. */
+PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3);
+
+
+/* --- _PyUnicodeWriter API ----------------------------------------------- */
+
+typedef struct {
+ PyObject *buffer;
+ void *data;
+ enum PyUnicode_Kind kind;
+ Py_UCS4 maxchar;
+ Py_ssize_t size;
+ Py_ssize_t pos;
+
+ /* minimum number of allocated characters (default: 0) */
+ Py_ssize_t min_length;
+
+ /* minimum character (default: 127, ASCII) */
+ Py_UCS4 min_char;
+
+ /* If non-zero, overallocate the buffer (default: 0). */
+ unsigned char overallocate;
+
+ /* If readonly is 1, buffer is a shared string (cannot be modified)
+ and size is set to 0. */
+ unsigned char readonly;
+} _PyUnicodeWriter ;
+
+/* Initialize a Unicode writer.
+ *
+ * By default, the minimum buffer size is 0 character and overallocation is
+ * disabled. Set min_length, min_char and overallocate attributes to control
+ * the allocation of the buffer. */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
+
+/* Prepare the buffer to write 'length' characters
+ with the specified maximum character.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
+ (((MAXCHAR) <= (WRITER)->maxchar \
+ && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
+ ? 0 \
+ : (((LENGTH) == 0) \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
+ instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+ Py_ssize_t length, Py_UCS4 maxchar);
+
+/* Prepare the buffer to have at least the kind KIND.
+ For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
+ support characters in range U+000-U+FFFF.
+
+ Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
+ (assert((KIND) != PyUnicode_WCHAR_KIND), \
+ (KIND) <= (WRITER)->kind \
+ ? 0 \
+ : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
+ macro instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+ enum PyUnicode_Kind kind);
+
+/* Append a Unicode character.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
+ Py_UCS4 ch
+ );
+
+/* Append a Unicode string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
+ PyObject *str /* Unicode string */
+ );
+
+/* Append a substring of a Unicode string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
+ PyObject *str, /* Unicode string */
+ Py_ssize_t start,
+ Py_ssize_t end
+ );
+
+/* Append an ASCII-encoded byte string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+ const char *str, /* ASCII-encoded byte string */
+ Py_ssize_t len /* number of bytes, or -1 if unknown */
+ );
+
+/* Append a latin1-encoded byte string.
+ Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+ const char *str, /* latin1-encoded byte string */
+ Py_ssize_t len /* length in bytes */
+ );
+
+/* Get the value of the writer as a Unicode string. Clear the
+ buffer of the writer. Raise an exception and return NULL
+ on error. */
+PyAPI_FUNC(PyObject *)
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
+
+/* Deallocate memory of a writer (clear its internal buffer). */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+
+
+/* Format the object based on the format_spec, as defined in PEP 3101
+ (Advanced String Formatting). */
+PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
+ _PyUnicodeWriter *writer,
+ PyObject *obj,
+ PyObject *format_spec,
+ Py_ssize_t start,
+ Py_ssize_t end);
+
+PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
+
+/* --- wchar_t support for platforms which support it --------------------- */
+
+#ifdef HAVE_WCHAR_H
+PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
+#endif
+
+/* --- Manage the default encoding ---------------------------------------- */
+
+/* Returns a pointer to the default encoding (UTF-8) of the
+ Unicode object unicode and the size of the encoded representation
+ in bytes stored in *size.
+
+ In case of an error, no *size is set.
+
+ This function caches the UTF-8 encoded string in the unicodeobject
+ and subsequent calls will return the same string. The memory is released
+ when the unicodeobject is deallocated.
+
+ _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
+ support the previous internal function with the same behaviour.
+
+ *** This API is for interpreter INTERNAL USE ONLY and will likely
+ *** be removed or changed in the future.
+
+ *** If you need to access the Unicode object as UTF-8 bytes string,
+ *** please use PyUnicode_AsUTF8String() instead.
+*/
+
+PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
+ PyObject *unicode,
+ Py_ssize_t *size);
+
+#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
+
+/* Returns a pointer to the default encoding (UTF-8) of the
+ Unicode object unicode.
+
+ Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
+ in the unicodeobject.
+
+ _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
+ support the previous internal function with the same behaviour.
+
+ Use of this API is DEPRECATED since no size information can be
+ extracted from the returned data.
+
+ *** This API is for interpreter INTERNAL USE ONLY and will likely
+ *** be removed or changed for Python 3.1.
+
+ *** If you need to access the Unicode object as UTF-8 bytes string,
+ *** please use PyUnicode_AsUTF8String() instead.
+
+*/
+
+PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
+
+#define _PyUnicode_AsString PyUnicode_AsUTF8
+
+/* --- Generic Codecs ----------------------------------------------------- */
+
+/* Encodes a Py_UNICODE buffer of the given size and returns a
+ Python string object. */
+PyAPI_FUNC(PyObject*) PyUnicode_Encode(
+ const Py_UNICODE *s, /* Unicode char buffer */
+ Py_ssize_t size, /* number of Py_UNICODE chars to encode */
+ const char *encoding, /* encoding */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+
+/* --- UTF-7 Codecs ------------------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
+ int base64SetO, /* Encode RFC2152 Set O characters in base64 */
+ int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
+ PyObject *unicode, /* Unicode object */
+ int base64SetO, /* Encode RFC2152 Set O characters in base64 */
+ int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
+ const char *errors /* error handling */
+ );
+
+/* --- UTF-8 Codecs ------------------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
+ PyObject *unicode,
+ const char *errors);
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+
+/* --- UTF-32 Codecs ------------------------------------------------------ */
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
+ const char *errors, /* error handling */
+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
+ PyObject *object, /* Unicode object */
+ const char *errors, /* error handling */
+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+ );
+
+/* --- UTF-16 Codecs ------------------------------------------------------ */
+
+/* Returns a Python string object holding the UTF-16 encoded value of
+ the Unicode data.
+
+ If byteorder is not 0, output is written according to the following
+ byte order:
+
+ byteorder == -1: little endian
+ byteorder == 0: native byte order (writes a BOM mark)
+ byteorder == 1: big endian
+
+ If byteorder is 0, the output string will always start with the
+ Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
+ prepended.
+
+ Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
+ UCS-2. This trick makes it possible to add full UTF-16 capabilities
+ at a later point without compromising the APIs.
+
+*/
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
+ const char *errors, /* error handling */
+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
+ PyObject* unicode, /* Unicode object */
+ const char *errors, /* error handling */
+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
+ );
+
+/* --- Unicode-Escape Codecs ---------------------------------------------- */
+
+/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
+ chars. */
+PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
+ const char *string, /* Unicode-Escape encoded string */
+ Py_ssize_t length, /* size of string */
+ const char *errors, /* error handling */
+ const char **first_invalid_escape /* on return, points to first
+ invalid escaped char in
+ string. */
+);
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length /* Number of Py_UNICODE chars to encode */
+ ) Py_DEPRECATED(3.3);
+
+/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length /* Number of Py_UNICODE chars to encode */
+ ) Py_DEPRECATED(3.3);
+
+/* --- Unicode Internal Codec --------------------------------------------- */
+
+/* Only for internal use in _codecsmodule.c */
+PyObject *_PyUnicode_DecodeUnicodeInternal(
+ const char *string,
+ Py_ssize_t length,
+ const char *errors
+ );
+
+/* --- Latin-1 Codecs ----------------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
+ PyObject* unicode,
+ const char* errors);
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+
+/* --- ASCII Codecs ------------------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
+ PyObject* unicode,
+ const char* errors);
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+
+/* --- Character Map Codecs ----------------------------------------------- */
+
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
+ PyObject *mapping, /* encoding mapping */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
+ PyObject *unicode, /* Unicode object */
+ PyObject *mapping, /* encoding mapping */
+ const char *errors /* error handling */
+ );
+
+/* Translate a Py_UNICODE buffer of the given length by applying a
+ character mapping table to it and return the resulting Unicode
+ object.
+
+ The mapping table must map Unicode ordinal integers to Unicode strings,
+ Unicode ordinal integers or None (causing deletion of the character).
+
+ Mapping tables may be dictionaries or sequences. Unmapped character
+ ordinals (ones which cause a LookupError) are left untouched and
+ are copied as-is.
+
+*/
+PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
+ PyObject *table, /* Translate table */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+
+/* --- MBCS codecs for Windows -------------------------------------------- */
+
+#ifdef MS_WINDOWS
+PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
+ const Py_UNICODE *data, /* Unicode char buffer */
+ Py_ssize_t length, /* number of Py_UNICODE chars to encode */
+ const char *errors /* error handling */
+ ) Py_DEPRECATED(3.3);
+#endif
+
+/* --- Decimal Encoder ---------------------------------------------------- */
+
+/* Takes a Unicode string holding a decimal value and writes it into
+ an output buffer using standard ASCII digit codes.
+
+ The output buffer has to provide at least length+1 bytes of storage
+ area. The output string is 0-terminated.
+
+ The encoder converts whitespace to ' ', decimal characters to their
+ corresponding ASCII digit and all other Latin-1 characters except
+ \0 as-is. Characters outside this range (Unicode ordinals 1-256)
+ are treated as errors. This includes embedded NULL bytes.
+
+ Error handling is defined by the errors argument:
+
+ NULL or "strict": raise a ValueError
+ "ignore": ignore the wrong characters (these are not copied to the
+ output buffer)
+ "replace": replaces illegal characters with '?'
+
+ Returns 0 on success, -1 on failure.
+
+*/
+
+PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
+ Py_UNICODE *s, /* Unicode buffer */
+ Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
+ char *output, /* Output buffer; must have size >= length */
+ const char *errors /* error handling */
+ ) /* Py_DEPRECATED(3.3) */;
+
+/* Transforms code points that have decimal digit property to the
+ corresponding ASCII digit code points.
+
+ Returns a new Unicode string on success, NULL on failure.
+*/
+
+PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
+ Py_UNICODE *s, /* Unicode buffer */
+ Py_ssize_t length /* Number of Py_UNICODE chars to transform */
+ ) /* Py_DEPRECATED(3.3) */;
+
+/* Coverts a Unicode object holding a decimal value to an ASCII string
+ for using in int, float and complex parsers.
+ Transforms code points that have decimal digit property to the
+ corresponding ASCII digit code points. Transforms spaces to ASCII.
+ Transforms code points starting from the first non-ASCII code point that
+ is neither a decimal digit nor a space to the end into '?'. */
+
+PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
+ PyObject *unicode /* Unicode object */
+ );
+
+/* --- Methods & Slots ---------------------------------------------------- */
+
+PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
+ PyObject *separator,
+ PyObject *const *items,
+ Py_ssize_t seqlen
+ );
+
+/* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
+ 0 otherwise. The right argument must be ASCII identifier.
+ Any error occurs inside will be cleared before return. */
+PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
+ PyObject *left, /* Left string */
+ _Py_Identifier *right /* Right identifier */
+ );
+
+/* Test whether a unicode is equal to ASCII string. Return 1 if true,
+ 0 otherwise. The right argument must be ASCII-encoded string.
+ Any error occurs inside will be cleared before return. */
+PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
+ PyObject *left,
+ const char *right /* ASCII-encoded string */
+ );
+
+/* Externally visible for str.strip(unicode) */
+PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
+ PyObject *self,
+ int striptype,
+ PyObject *sepobj
+ );
+
+/* Using explicit passed-in values, insert the thousands grouping
+ into the string pointed to by buffer. For the argument descriptions,
+ see Objects/stringlib/localeutil.h */
+PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
+ _PyUnicodeWriter *writer,
+ Py_ssize_t n_buffer,
+ PyObject *digits,
+ Py_ssize_t d_pos,
+ Py_ssize_t n_digits,
+ Py_ssize_t min_width,
+ const char *grouping,
+ PyObject *thousands_sep,
+ Py_UCS4 *maxchar);
+
+/* === Characters Type APIs =============================================== */
+
+/* Helper array used by Py_UNICODE_ISSPACE(). */
+
+PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
+
+/* These should not be used directly. Use the Py_UNICODE_IS* and
+ Py_UNICODE_TO* macros instead.
+
+ These APIs are implemented in Objects/unicodectype.c.
+
+*/
+
+PyAPI_FUNC(int) _PyUnicode_IsLowercase(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsUppercase(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsXidStart(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
+ const Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
+ const Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
+ Py_UCS4 ch /* Unicode character */
+ ) /* Py_DEPRECATED(3.3) */;
+
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
+ Py_UCS4 ch /* Unicode character */
+ ) /* Py_DEPRECATED(3.3) */;
+
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
+ Py_UCS4 ch /* Unicode character */
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
+ Py_UCS4 ch, /* Unicode character */
+ Py_UCS4 *res
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsCased(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_ToDigit(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(double) _PyUnicode_ToNumeric(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsDigit(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsNumeric(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsPrintable(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(int) _PyUnicode_IsAlpha(
+ Py_UCS4 ch /* Unicode character */
+ );
+
+PyAPI_FUNC(size_t) Py_UNICODE_strlen(
+ const Py_UNICODE *u
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
+ Py_UNICODE *s1,
+ const Py_UNICODE *s2) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
+ Py_UNICODE *s1, const Py_UNICODE *s2) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
+ Py_UNICODE *s1,
+ const Py_UNICODE *s2,
+ size_t n) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(int) Py_UNICODE_strcmp(
+ const Py_UNICODE *s1,
+ const Py_UNICODE *s2
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(int) Py_UNICODE_strncmp(
+ const Py_UNICODE *s1,
+ const Py_UNICODE *s2,
+ size_t n
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
+ const Py_UNICODE *s,
+ Py_UNICODE c
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
+ const Py_UNICODE *s,
+ Py_UNICODE c
+ ) Py_DEPRECATED(3.3);
+
+PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
+
+/* Create a copy of a unicode string ending with a nul character. Return NULL
+ and raise a MemoryError exception on memory allocation failure, otherwise
+ return a new allocated buffer (use PyMem_Free() to free the buffer). */
+
+PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
+ PyObject *unicode
+ ) Py_DEPRECATED(3.3);
+
+/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
+PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
+/* Clear all static strings. */
+PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
+
+/* Fast equality check when the inputs are known to be exact unicode types
+ and where the hash values are equal (i.e. a very probable match) */
+PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index e7192852f045..503aeb5d19a6 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -83,16 +83,6 @@ Copyright (c) Corporation for National Research Initiatives.
/* #define HAVE_WCHAR_H */
/* #define HAVE_USABLE_WCHAR_T */
-/* Py_UNICODE was the native Unicode storage format (code unit) used by
- Python and represents a single Unicode element in the Unicode type.
- With PEP 393, Py_UNICODE is deprecated and replaced with a
- typedef to wchar_t. */
-
-#ifndef Py_LIMITED_API
-#define PY_UNICODE_TYPE wchar_t
-typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */;
-#endif
-
/* If the compiler provides a wchar_t type we try to support it
through the interface functions PyUnicode_FromWideChar(),
PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
@@ -113,248 +103,10 @@ typedef uint32_t Py_UCS4;
typedef uint16_t Py_UCS2;
typedef uint8_t Py_UCS1;
-/* --- Internal Unicode Operations ---------------------------------------- */
-
-/* Since splitting on whitespace is an important use case, and
- whitespace in most situations is solely ASCII whitespace, we
- optimize for the common case by using a quick look-up table
- _Py_ascii_whitespace (see below) with an inlined check.
-
- */
-#ifndef Py_LIMITED_API
-#define Py_UNICODE_ISSPACE(ch) \
- ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
-
-#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
-#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
-#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
-#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
-
-#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
-#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
-#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
-
-#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
-#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
-#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
-#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
-
-#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
-#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
-#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
-
-#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
-
-#define Py_UNICODE_ISALNUM(ch) \
- (Py_UNICODE_ISALPHA(ch) || \
- Py_UNICODE_ISDECIMAL(ch) || \
- Py_UNICODE_ISDIGIT(ch) || \
- Py_UNICODE_ISNUMERIC(ch))
-
-#define Py_UNICODE_COPY(target, source, length) \
- memcpy((target), (source), (length)*sizeof(Py_UNICODE))
-
-#define Py_UNICODE_FILL(target, value, length) \
- do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
- for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
- } while (0)
-
-/* macros to work with surrogates */
-#define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
-#define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
-#define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
-/* Join two surrogate characters and return a single Py_UCS4 value. */
-#define Py_UNICODE_JOIN_SURROGATES(high, low) \
- (((((Py_UCS4)(high) & 0x03FF) << 10) | \
- ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
-/* high surrogate = top 10 bits added to D800 */
-#define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
-/* low surrogate = bottom 10 bits added to DC00 */
-#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
-
-/* Check if substring matches at given offset. The offset must be
- valid, and the substring must not be empty. */
-
-#define Py_UNICODE_MATCH(string, offset, substring) \
- ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
- ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
- !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
-
-#endif /* Py_LIMITED_API */
-
#ifdef __cplusplus
extern "C" {
#endif
-/* --- Unicode Type ------------------------------------------------------- */
-
-#ifndef Py_LIMITED_API
-
-/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
- structure. state.ascii and state.compact are set, and the data
- immediately follow the structure. utf8_length and wstr_length can be found
- in the length field; the utf8 pointer is equal to the data pointer. */
-typedef struct {
- /* There are 4 forms of Unicode strings:
-
- - compact ascii:
-
- * structure = PyASCIIObject
- * test: PyUnicode_IS_COMPACT_ASCII(op)
- * kind = PyUnicode_1BYTE_KIND
- * compact = 1
- * ascii = 1
- * ready = 1
- * (length is the length of the utf8 and wstr strings)
- * (data starts just after the structure)
- * (since ASCII is decoded from UTF-8, the utf8 string are the data)
-
- - compact:
-
- * structure = PyCompactUnicodeObject
- * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
- * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
- PyUnicode_4BYTE_KIND
- * compact = 1
- * ready = 1
- * ascii = 0
- * utf8 is not shared with data
- * utf8_length = 0 if utf8 is NULL
- * wstr is shared with data and wstr_length=length
- if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
- or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
- * wstr_length = 0 if wstr is NULL
- * (data starts just after the structure)
-
- - legacy string, not ready:
-
- * structure = PyUnicodeObject
- * test: kind == PyUnicode_WCHAR_KIND
- * length = 0 (use wstr_length)
- * hash = -1
- * kind = PyUnicode_WCHAR_KIND
- * compact = 0
- * ascii = 0
- * ready = 0
- * interned = SSTATE_NOT_INTERNED
- * wstr is not NULL
- * data.any is NULL
- * utf8 is NULL
- * utf8_length = 0
-
- - legacy string, ready:
-
- * structure = PyUnicodeObject structure
- * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
- * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
- PyUnicode_4BYTE_KIND
- * compact = 0
- * ready = 1
- * data.any is not NULL
- * utf8 is shared and utf8_length = length with data.any if ascii = 1
- * utf8_length = 0 if utf8 is NULL
- * wstr is shared with data.any and wstr_length = length
- if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
- or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
- * wstr_length = 0 if wstr is NULL
-
- Compact strings use only one memory block (structure + characters),
- whereas legacy strings use one block for the structure and one block
- for characters.
-
- Legacy strings are created by PyUnicode_FromUnicode() and
- PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
- when PyUnicode_READY() is called.
-
- See also _PyUnicode_CheckConsistency().
- */
- PyObject_HEAD
- Py_ssize_t length; /* Number of code points in the string */
- Py_hash_t hash; /* Hash value; -1 if not set */
- struct {
- /*
- SSTATE_NOT_INTERNED (0)
- SSTATE_INTERNED_MORTAL (1)
- SSTATE_INTERNED_IMMORTAL (2)
-
- If interned != SSTATE_NOT_INTERNED, the two references from the
- dictionary to this object are *not* counted in ob_refcnt.
- */
- unsigned int interned:2;
- /* Character size:
-
- - PyUnicode_WCHAR_KIND (0):
-
- * character type = wchar_t (16 or 32 bits, depending on the
- platform)
-
- - PyUnicode_1BYTE_KIND (1):
-
- * character type = Py_UCS1 (8 bits, unsigned)
- * all characters are in the range U+0000-U+00FF (latin1)
- * if ascii is set, all characters are in the range U+0000-U+007F
- (ASCII), otherwise at least one character is in the range
- U+0080-U+00FF
-
- - PyUnicode_2BYTE_KIND (2):
-
- * character type = Py_UCS2 (16 bits, unsigned)
- * all characters are in the range U+0000-U+FFFF (BMP)
- * at least one character is in the range U+0100-U+FFFF
-
- - PyUnicode_4BYTE_KIND (4):
-
- * character type = Py_UCS4 (32 bits, unsigned)
- * all characters are in the range U+0000-U+10FFFF
- * at least one character is in the range U+10000-U+10FFFF
- */
- unsigned int kind:3;
- /* Compact is with respect to the allocation scheme. Compact unicode
- objects only require one memory block while non-compact objects use
- one block for the PyUnicodeObject struct and another for its data
- buffer. */
- unsigned int compact:1;
- /* The string only contains characters in the range U+0000-U+007F (ASCII)
- and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
- set, use the PyASCIIObject structure. */
- unsigned int ascii:1;
- /* The ready flag indicates whether the object layout is initialized
- completely. This means that this is either a compact object, or
- the data pointer is filled out. The bit is redundant, and helps
- to minimize the test in PyUnicode_IS_READY(). */
- unsigned int ready:1;
- /* Padding to ensure that PyUnicode_DATA() is always aligned to
- 4 bytes (see issue #19537 on m68k). */
- unsigned int :24;
- } state;
- wchar_t *wstr; /* wchar_t representation (null-terminated) */
-} PyASCIIObject;
-
-/* Non-ASCII strings allocated through PyUnicode_New use the
- PyCompactUnicodeObject structure. state.compact is set, and the data
- immediately follow the structure. */
-typedef struct {
- PyASCIIObject _base;
- Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
- * terminating \0. */
- char *utf8; /* UTF-8 representation (null-terminated) */
- Py_ssize_t wstr_length; /* Number of code points in wstr, possible
- * surrogates count as two code points. */
-} PyCompactUnicodeObject;
-
-/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
- PyUnicodeObject structure. The actual string data is initially in the wstr
- block, and copied into the data block using _PyUnicode_Ready. */
-typedef struct {
- PyCompactUnicodeObject _base;
- union {
- void *any;
- Py_UCS1 *latin1;
- Py_UCS2 *ucs2;
- Py_UCS4 *ucs4;
- } data; /* Canonical, smallest-form Unicode buffer */
-} PyUnicodeObject;
-#endif
PyAPI_DATA(PyTypeObject) PyUnicode_Type;
PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
@@ -363,209 +115,6 @@ PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
#define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
-/* Fast access macros */
-#ifndef Py_LIMITED_API
-
-#define PyUnicode_WSTR_LENGTH(op) \
- (PyUnicode_IS_COMPACT_ASCII(op) ? \
- ((PyASCIIObject*)op)->length : \
- ((PyCompactUnicodeObject*)op)->wstr_length)
-
-/* Returns the deprecated Py_UNICODE representation's size in code units
- (this includes surrogate pairs as 2 units).
- If the Py_UNICODE representation is not available, it will be computed
- on request. Use PyUnicode_GET_LENGTH() for the length in code points. */
-
-#define PyUnicode_GET_SIZE(op) \
- (assert(PyUnicode_Check(op)), \
- (((PyASCIIObject *)(op))->wstr) ? \
- PyUnicode_WSTR_LENGTH(op) : \
- ((void)PyUnicode_AsUnicode(_PyObject_CAST(op)),\
- assert(((PyASCIIObject *)(op))->wstr), \
- PyUnicode_WSTR_LENGTH(op)))
- /* Py_DEPRECATED(3.3) */
-
-#define PyUnicode_GET_DATA_SIZE(op) \
- (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
- /* Py_DEPRECATED(3.3) */
-
-/* Alias for PyUnicode_AsUnicode(). This will create a wchar_t/Py_UNICODE
- representation on demand. Using this macro is very inefficient now,
- try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
- use PyUnicode_WRITE() and PyUnicode_READ(). */
-
-#define PyUnicode_AS_UNICODE(op) \
- (assert(PyUnicode_Check(op)), \
- (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
- PyUnicode_AsUnicode(_PyObject_CAST(op)))
- /* Py_DEPRECATED(3.3) */
-
-#define PyUnicode_AS_DATA(op) \
- ((const char *)(PyUnicode_AS_UNICODE(op)))
- /* Py_DEPRECATED(3.3) */
-
-
-/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
-
-/* Values for PyASCIIObject.state: */
-
-/* Interning state. */
-#define SSTATE_NOT_INTERNED 0
-#define SSTATE_INTERNED_MORTAL 1
-#define SSTATE_INTERNED_IMMORTAL 2
-
-/* Return true if the string contains only ASCII characters, or 0 if not. The
- string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
- ready. */
-#define PyUnicode_IS_ASCII(op) \
- (assert(PyUnicode_Check(op)), \
- assert(PyUnicode_IS_READY(op)), \
- ((PyASCIIObject*)op)->state.ascii)
-
-/* Return true if the string is compact or 0 if not.
- No type checks or Ready calls are performed. */
-#define PyUnicode_IS_COMPACT(op) \
- (((PyASCIIObject*)(op))->state.compact)
-
-/* Return true if the string is a compact ASCII string (use PyASCIIObject
- structure), or 0 if not. No type checks or Ready calls are performed. */
-#define PyUnicode_IS_COMPACT_ASCII(op) \
- (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
-
-enum PyUnicode_Kind {
-/* String contains only wstr byte characters. This is only possible
- when the string was created with a legacy API and _PyUnicode_Ready()
- has not been called yet. */
- PyUnicode_WCHAR_KIND = 0,
-/* Return values of the PyUnicode_KIND() macro: */
- PyUnicode_1BYTE_KIND = 1,
- PyUnicode_2BYTE_KIND = 2,
- PyUnicode_4BYTE_KIND = 4
-};
-
-/* Return pointers to the canonical representation cast to unsigned char,
- Py_UCS2, or Py_UCS4 for direct character access.
- No checks are performed, use PyUnicode_KIND() before to ensure
- these will work correctly. */
-
-#define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
-#define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
-#define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
-
-/* Return one of the PyUnicode_*_KIND values defined above. */
-#define PyUnicode_KIND(op) \
- (assert(PyUnicode_Check(op)), \
- assert(PyUnicode_IS_READY(op)), \
- ((PyASCIIObject *)(op))->state.kind)
-
-/* Return a void pointer to the raw unicode buffer. */
-#define _PyUnicode_COMPACT_DATA(op) \
- (PyUnicode_IS_ASCII(op) ? \
- ((void*)((PyASCIIObject*)(op) + 1)) : \
- ((void*)((PyCompactUnicodeObject*)(op) + 1)))
-
-#define _PyUnicode_NONCOMPACT_DATA(op) \
- (assert(((PyUnicodeObject*)(op))->data.any), \
- ((((PyUnicodeObject *)(op))->data.any)))
-
-#define PyUnicode_DATA(op) \
- (assert(PyUnicode_Check(op)), \
- PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) : \
- _PyUnicode_NONCOMPACT_DATA(op))
-
-/* In the access macros below, "kind" may be evaluated more than once.
- All other macro parameters are evaluated exactly once, so it is safe
- to put side effects into them (such as increasing the index). */
-
-/* Write into the canonical representation, this macro does not do any sanity
- checks and is intended for usage in loops. The caller should cache the
- kind and data pointers obtained from other macro calls.
- index is the index in the string (starts at 0) and value is the new
- code point value which should be written to that location. */
-#define PyUnicode_WRITE(kind, data, index, value) \
- do { \
- switch ((kind)) { \
- case PyUnicode_1BYTE_KIND: { \
- ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
- break; \
- } \
- case PyUnicode_2BYTE_KIND: { \
- ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
- break; \
- } \
- default: { \
- assert((kind) == PyUnicode_4BYTE_KIND); \
- ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
- } \
- } \
- } while (0)
-
-/* Read a code point from the string's canonical representation. No checks
- or ready calls are performed. */
-#define PyUnicode_READ(kind, data, index) \
- ((Py_UCS4) \
- ((kind) == PyUnicode_1BYTE_KIND ? \
- ((const Py_UCS1 *)(data))[(index)] : \
- ((kind) == PyUnicode_2BYTE_KIND ? \
- ((const Py_UCS2 *)(data))[(index)] : \
- ((const Py_UCS4 *)(data))[(index)] \
- ) \
- ))
-
-/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
- calls PyUnicode_KIND() and might call it twice. For single reads, use
- PyUnicode_READ_CHAR, for multiple consecutive reads callers should
- cache kind and use PyUnicode_READ instead. */
-#define PyUnicode_READ_CHAR(unicode, index) \
- (assert(PyUnicode_Check(unicode)), \
- assert(PyUnicode_IS_READY(unicode)), \
- (Py_UCS4) \
- (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
- ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
- (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
- ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
- ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
- ) \
- ))
-
-/* Returns the length of the unicode string. The caller has to make sure that
- the string has it's canonical representation set before calling
- this macro. Call PyUnicode_(FAST_)Ready to ensure that. */
-#define PyUnicode_GET_LENGTH(op) \
- (assert(PyUnicode_Check(op)), \
- assert(PyUnicode_IS_READY(op)), \
- ((PyASCIIObject *)(op))->length)
-
-
-/* Fast check to determine whether an object is ready. Equivalent to
- PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
-
-#define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
-
-/* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
- case. If the canonical representation is not yet set, it will still call
- _PyUnicode_Ready().
- Returns 0 on success and -1 on errors. */
-#define PyUnicode_READY(op) \
- (assert(PyUnicode_Check(op)), \
- (PyUnicode_IS_READY(op) ? \
- 0 : _PyUnicode_Ready(_PyObject_CAST(op))))
-
-/* Return a maximum character value which is suitable for creating another
- string based on op. This is always an approximation but more efficient
- than iterating over the string. */
-#define PyUnicode_MAX_CHAR_VALUE(op) \
- (assert(PyUnicode_IS_READY(op)), \
- (PyUnicode_IS_ASCII(op) ? \
- (0x7f) : \
- (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ? \
- (0xffU) : \
- (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ? \
- (0xffffU) : \
- (0x10ffffU)))))
-
-#endif
-
/* --- Constants ---------------------------------------------------------- */
/* This Unicode character will be used as replacement character during
@@ -577,120 +126,6 @@ enum PyUnicode_Kind {
/* === Public API ========================================================= */
-/* --- Plain Py_UNICODE --------------------------------------------------- */
-
-/* With PEP 393, this is the recommended way to allocate a new unicode object.
- This function will allocate the object and its buffer in a single memory
- block. Objects created using this function are not resizable. */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_New(
- Py_ssize_t size, /* Number of code points in the new string */
- Py_UCS4 maxchar /* maximum code point value in the string */
- );
-#endif
-
-/* Initializes the canonical string representation from the deprecated
- wstr/Py_UNICODE representation. This function is used to convert Unicode
- objects which were created using the old API to the new flexible format
- introduced with PEP 393.
-
- Don't call this function directly, use the public PyUnicode_READY() macro
- instead. */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(int) _PyUnicode_Ready(
- PyObject *unicode /* Unicode object */
- );
-#endif
-
-/* Get a copy of a Unicode string. */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
- PyObject *unicode
- );
-#endif
-
-/* Copy character from one unicode object into another, this function performs
- character conversion when necessary and falls back to memcpy() if possible.
-
- Fail if to is too small (smaller than *how_many* or smaller than
- len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
- kind(to), or if *to* has more than 1 reference.
-
- Return the number of written character, or return -1 and raise an exception
- on error.
-
- Pseudo-code:
-
- how_many = min(how_many, len(from) - from_start)
- to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
- return how_many
-
- Note: The function doesn't write a terminating null character.
- */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
- PyObject *to,
- Py_ssize_t to_start,
- PyObject *from,
- Py_ssize_t from_start,
- Py_ssize_t how_many
- );
-
-/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
- may crash if parameters are invalid (e.g. if the output string
- is too short). */
-PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
- PyObject *to,
- Py_ssize_t to_start,
- PyObject *from,
- Py_ssize_t from_start,
- Py_ssize_t how_many
- );
-#endif
-
-#ifndef Py_LIMITED_API
-/* Fill a string with a character: write fill_char into
- unicode[start:start+length].
-
- Fail if fill_char is bigger than the string maximum character, or if the
- string has more than 1 reference.
-
- Return the number of written character, or return -1 and raise an exception
- on error. */
-PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
- PyObject *unicode,
- Py_ssize_t start,
- Py_ssize_t length,
- Py_UCS4 fill_char
- );
-
-/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
- if parameters are invalid (e.g. if length is longer than the string). */
-PyAPI_FUNC(void) _PyUnicode_FastFill(
- PyObject *unicode,
- Py_ssize_t start,
- Py_ssize_t length,
- Py_UCS4 fill_char
- );
-#endif
-
-/* Create a Unicode Object from the Py_UNICODE buffer u of the given
- size.
-
- u may be NULL which causes the contents to be undefined. It is the
- user's responsibility to fill in the needed data afterwards. Note
- that modifying the Unicode object contents after construction is
- only allowed if u was set to NULL.
-
- The buffer is copied into the new object. */
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
- const Py_UNICODE *u, /* Unicode buffer */
- Py_ssize_t size /* size of buffer */
- ) /* Py_DEPRECATED(3.3) */;
-#endif
-
/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
const char *u, /* UTF-8 encoded string */
@@ -703,21 +138,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromString(
const char *u /* UTF-8 encoded string */
);
-#ifndef Py_LIMITED_API
-/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
- Scan the string to find the maximum character. */
-PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
- int kind,
- const void *buffer,
- Py_ssize_t size);
-
-/* Create a new string from a buffer of ASCII characters.
- WARNING: Don't check if the string contains any non-ASCII character. */
-PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
- const char *buffer,
- Py_ssize_t size);
-#endif
-
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
PyAPI_FUNC(PyObject*) PyUnicode_Substring(
PyObject *str,
@@ -725,15 +145,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_Substring(
Py_ssize_t end);
#endif
-#ifndef Py_LIMITED_API
-/* Compute the maximum character of the substring unicode[start:end].
- Return 127 for an empty string. */
-PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
- PyObject *unicode,
- Py_ssize_t start,
- Py_ssize_t end);
-#endif
-
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
/* Copy the string into a UCS4 buffer including the null character if copy_null
is set. Return NULL and raise an exception on error. Raise a SystemError if
@@ -752,33 +163,6 @@ PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
#endif
-#ifndef Py_LIMITED_API
-/* Return a read-only pointer to the Unicode object's internal
- Py_UNICODE buffer.
- If the wchar_t/Py_UNICODE representation is not yet available, this
- function will calculate it. */
-
-PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
- PyObject *unicode /* Unicode object */
- ) /* Py_DEPRECATED(3.3) */;
-
-/* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
- contains null characters. */
-PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
- PyObject *unicode /* Unicode object */
- );
-
-/* Return a read-only pointer to the Unicode object's internal
- Py_UNICODE buffer and save the length at size.
- If the wchar_t/Py_UNICODE representation is not yet available, this
- function will calculate it. */
-
-PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
- PyObject *unicode, /* Unicode object */
- Py_ssize_t *size /* location where to save the length */
- ) /* Py_DEPRECATED(3.3) */;
-#endif
-
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
/* Get the length of the Unicode object. */
@@ -814,11 +198,6 @@ PyAPI_FUNC(int) PyUnicode_WriteChar(
);
#endif
-#ifndef Py_LIMITED_API
-/* Get the maximum ordinal for a Unicode character. */
-PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3);
-#endif
-
/* Resize a Unicode object. The length is the number of characters, except
if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
is the number of Py_UNICODE characters.
@@ -881,141 +260,11 @@ PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
...
);
-#ifndef Py_LIMITED_API
-typedef struct {
- PyObject *buffer;
- void *data;
- enum PyUnicode_Kind kind;
- Py_UCS4 maxchar;
- Py_ssize_t size;
- Py_ssize_t pos;
-
- /* minimum number of allocated characters (default: 0) */
- Py_ssize_t min_length;
-
- /* minimum character (default: 127, ASCII) */
- Py_UCS4 min_char;
-
- /* If non-zero, overallocate the buffer (default: 0). */
- unsigned char overallocate;
-
- /* If readonly is 1, buffer is a shared string (cannot be modified)
- and size is set to 0. */
- unsigned char readonly;
-} _PyUnicodeWriter ;
-
-/* Initialize a Unicode writer.
- *
- * By default, the minimum buffer size is 0 character and overallocation is
- * disabled. Set min_length, min_char and overallocate attributes to control
- * the allocation of the buffer. */
-PyAPI_FUNC(void)
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
-
-/* Prepare the buffer to write 'length' characters
- with the specified maximum character.
-
- Return 0 on success, raise an exception and return -1 on error. */
-#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \
- (((MAXCHAR) <= (WRITER)->maxchar \
- && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \
- ? 0 \
- : (((LENGTH) == 0) \
- ? 0 \
- : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
-
-/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
- instead. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
- Py_ssize_t length, Py_UCS4 maxchar);
-
-/* Prepare the buffer to have at least the kind KIND.
- For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
- support characters in range U+000-U+FFFF.
-
- Return 0 on success, raise an exception and return -1 on error. */
-#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \
- (assert((KIND) != PyUnicode_WCHAR_KIND), \
- (KIND) <= (WRITER)->kind \
- ? 0 \
- : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
-
-/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
- macro instead. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
- enum PyUnicode_Kind kind);
-
-/* Append a Unicode character.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
- Py_UCS4 ch
- );
-
-/* Append a Unicode string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
- PyObject *str /* Unicode string */
- );
-
-/* Append a substring of a Unicode string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
- PyObject *str, /* Unicode string */
- Py_ssize_t start,
- Py_ssize_t end
- );
-
-/* Append an ASCII-encoded byte string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
- const char *str, /* ASCII-encoded byte string */
- Py_ssize_t len /* number of bytes, or -1 if unknown */
- );
-
-/* Append a latin1-encoded byte string.
- Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
- const char *str, /* latin1-encoded byte string */
- Py_ssize_t len /* length in bytes */
- );
-
-/* Get the value of the writer as a Unicode string. Clear the
- buffer of the writer. Raise an exception and return NULL
- on error. */
-PyAPI_FUNC(PyObject *)
-_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
-
-/* Deallocate memory of a writer (clear its internal buffer). */
-PyAPI_FUNC(void)
-_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
-#endif
-
-#ifndef Py_LIMITED_API
-/* Format the object based on the format_spec, as defined in PEP 3101
- (Advanced String Formatting). */
-PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
- _PyUnicodeWriter *writer,
- PyObject *obj,
- PyObject *format_spec,
- Py_ssize_t start,
- Py_ssize_t end);
-#endif
-
PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
const char *u /* UTF-8 encoded string */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
-#endif
/* Use only if you know it's a string */
#define PyUnicode_CHECK_INTERNED(op) \
@@ -1066,10 +315,6 @@ PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
Py_ssize_t *size /* number of characters of the result */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
-#endif
-
#endif
/* --- Unicode ordinals --------------------------------------------------- */
@@ -1114,60 +359,7 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
/* --- Manage the default encoding ---------------------------------------- */
-/* Returns a pointer to the default encoding (UTF-8) of the
- Unicode object unicode and the size of the encoded representation
- in bytes stored in *size.
-
- In case of an error, no *size is set.
-
- This function caches the UTF-8 encoded string in the unicodeobject
- and subsequent calls will return the same string. The memory is released
- when the unicodeobject is deallocated.
-
- _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
- support the previous internal function with the same behaviour.
-
- *** This API is for interpreter INTERNAL USE ONLY and will likely
- *** be removed or changed in the future.
-
- *** If you need to access the Unicode object as UTF-8 bytes string,
- *** please use PyUnicode_AsUTF8String() instead.
-*/
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
- PyObject *unicode,
- Py_ssize_t *size);
-#define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
-#endif
-
-/* Returns a pointer to the default encoding (UTF-8) of the
- Unicode object unicode.
-
- Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
- in the unicodeobject.
-
- _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
- support the previous internal function with the same behaviour.
-
- Use of this API is DEPRECATED since no size information can be
- extracted from the returned data.
-
- *** This API is for interpreter INTERNAL USE ONLY and will likely
- *** be removed or changed for Python 3.1.
-
- *** If you need to access the Unicode object as UTF-8 bytes string,
- *** please use PyUnicode_AsUTF8String() instead.
-
-*/
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
-#define _PyUnicode_AsString PyUnicode_AsUTF8
-#endif
-
/* Returns "utf-8". */
-
PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
/* --- Generic Codecs ----------------------------------------------------- */
@@ -1208,18 +400,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
const char *errors /* error handling */
) Py_DEPRECATED(3.6);
-/* Encodes a Py_UNICODE buffer of the given size and returns a
- Python string object. */
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_Encode(
- const Py_UNICODE *s, /* Unicode char buffer */
- Py_ssize_t size, /* number of Py_UNICODE chars to encode */
- const char *encoding, /* encoding */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-#endif
-
/* Encodes a Unicode object and returns the result as Python
object.
@@ -1277,22 +457,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
Py_ssize_t *consumed /* bytes consumed */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* number of Py_UNICODE chars to encode */
- int base64SetO, /* Encode RFC2152 Set O characters in base64 */
- int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
- PyObject *unicode, /* Unicode object */
- int base64SetO, /* Encode RFC2152 Set O characters in base64 */
- int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */
- const char *errors /* error handling */
- );
-#endif
-
/* --- UTF-8 Codecs ------------------------------------------------------- */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
@@ -1312,18 +476,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
PyObject *unicode /* Unicode object */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
- PyObject *unicode,
- const char *errors);
-
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* number of Py_UNICODE chars to encode */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-#endif
-
/* --- UTF-32 Codecs ------------------------------------------------------ */
/* Decodes length bytes from a UTF-32 encoded buffer string and returns
@@ -1391,20 +543,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
*/
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* number of Py_UNICODE chars to encode */
- const char *errors, /* error handling */
- int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
- ) Py_DEPRECATED(3.3);
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
- PyObject *object, /* Unicode object */
- const char *errors, /* error handling */
- int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
- );
-#endif
-
/* --- UTF-16 Codecs ------------------------------------------------------ */
/* Decodes length bytes from a UTF-16 encoded buffer string and returns
@@ -1456,40 +594,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
PyObject *unicode /* Unicode object */
);
-/* Returns a Python string object holding the UTF-16 encoded value of
- the Unicode data.
-
- If byteorder is not 0, output is written according to the following
- byte order:
-
- byteorder == -1: little endian
- byteorder == 0: native byte order (writes a BOM mark)
- byteorder == 1: big endian
-
- If byteorder is 0, the output string will always start with the
- Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
- prepended.
-
- Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
- UCS-2. This trick makes it possible to add full UTF-16 capabilities
- at a later point without compromising the APIs.
-
-*/
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* number of Py_UNICODE chars to encode */
- const char *errors, /* error handling */
- int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
- ) Py_DEPRECATED(3.3);
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
- PyObject* unicode, /* Unicode object */
- const char *errors, /* error handling */
- int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
- );
-#endif
-
/* --- Unicode-Escape Codecs ---------------------------------------------- */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
@@ -1498,30 +602,10 @@ PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
const char *errors /* error handling */
);
-#ifndef Py_LIMITED_API
-/* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
- chars. */
-PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
- const char *string, /* Unicode-Escape encoded string */
- Py_ssize_t length, /* size of string */
- const char *errors, /* error handling */
- const char **first_invalid_escape /* on return, points to first
- invalid escaped char in
- string. */
-);
-#endif
-
PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
PyObject *unicode /* Unicode object */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length /* Number of Py_UNICODE chars to encode */
- ) Py_DEPRECATED(3.3);
-#endif
-
/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
@@ -1534,30 +618,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
PyObject *unicode /* Unicode object */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length /* Number of Py_UNICODE chars to encode */
- ) Py_DEPRECATED(3.3);
-#endif
-
-/* --- Unicode Internal Codec ---------------------------------------------
-
- Only for internal use in _codecsmodule.c */
-
-#ifndef Py_LIMITED_API
-PyObject *_PyUnicode_DecodeUnicodeInternal(
- const char *string,
- Py_ssize_t length,
- const char *errors
- );
-#endif
-
/* --- Latin-1 Codecs -----------------------------------------------------
- Note: Latin-1 corresponds to the first 256 Unicode ordinals.
-
-*/
+ Note: Latin-1 corresponds to the first 256 Unicode ordinals. */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
const char *string, /* Latin-1 encoded string */
@@ -1569,18 +632,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
PyObject *unicode /* Unicode object */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
- PyObject* unicode,
- const char* errors);
-
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-#endif
-
/* --- ASCII Codecs -------------------------------------------------------
Only 7-bit ASCII data is excepted. All other codes generate errors.
@@ -1597,18 +648,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
PyObject *unicode /* Unicode object */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
- PyObject* unicode,
- const char* errors);
-
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-#endif
-
/* --- Character Map Codecs -----------------------------------------------
This codec uses mappings to encode and decode characters.
@@ -1638,46 +677,9 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
PyObject *mapping /* encoding mapping */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
- PyObject *mapping, /* encoding mapping */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
- PyObject *unicode, /* Unicode object */
- PyObject *mapping, /* encoding mapping */
- const char *errors /* error handling */
- );
-#endif
-
-/* Translate a Py_UNICODE buffer of the given length by applying a
- character mapping table to it and return the resulting Unicode
- object.
-
- The mapping table must map Unicode ordinal integers to Unicode strings,
- Unicode ordinal integers or None (causing deletion of the character).
-
- Mapping tables may be dictionaries or sequences. Unmapped character
- ordinals (ones which cause a LookupError) are left untouched and
- are copied as-is.
-
-*/
-
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
- PyObject *table, /* Translate table */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-#endif
-
-#ifdef MS_WINDOWS
-
/* --- MBCS codecs for Windows -------------------------------------------- */
+#ifdef MS_WINDOWS
PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
const char *string, /* MBCS encoded string */
Py_ssize_t length, /* size of string */
@@ -1705,14 +707,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
PyObject *unicode /* Unicode object */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
- const Py_UNICODE *data, /* Unicode char buffer */
- Py_ssize_t length, /* number of Py_UNICODE chars to encode */
- const char *errors /* error handling */
- ) Py_DEPRECATED(3.3);
-#endif
-
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
int code_page, /* code page number */
@@ -1723,61 +717,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
#endif /* MS_WINDOWS */
-#ifndef Py_LIMITED_API
-/* --- Decimal Encoder ---------------------------------------------------- */
-
-/* Takes a Unicode string holding a decimal value and writes it into
- an output buffer using standard ASCII digit codes.
-
- The output buffer has to provide at least length+1 bytes of storage
- area. The output string is 0-terminated.
-
- The encoder converts whitespace to ' ', decimal characters to their
- corresponding ASCII digit and all other Latin-1 characters except
- \0 as-is. Characters outside this range (Unicode ordinals 1-256)
- are treated as errors. This includes embedded NULL bytes.
-
- Error handling is defined by the errors argument:
-
- NULL or "strict": raise a ValueError
- "ignore": ignore the wrong characters (these are not copied to the
- output buffer)
- "replace": replaces illegal characters with '?'
-
- Returns 0 on success, -1 on failure.
-
-*/
-
-PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
- Py_UNICODE *s, /* Unicode buffer */
- Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
- char *output, /* Output buffer; must have size >= length */
- const char *errors /* error handling */
- ) /* Py_DEPRECATED(3.3) */;
-
-/* Transforms code points that have decimal digit property to the
- corresponding ASCII digit code points.
-
- Returns a new Unicode string on success, NULL on failure.
-*/
-
-PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
- Py_UNICODE *s, /* Unicode buffer */
- Py_ssize_t length /* Number of Py_UNICODE chars to transform */
- ) /* Py_DEPRECATED(3.3) */;
-
-/* Coverts a Unicode object holding a decimal value to an ASCII string
- for using in int, float and complex parsers.
- Transforms code points that have decimal digit property to the
- corresponding ASCII digit code points. Transforms spaces to ASCII.
- Transforms code points starting from the first non-ASCII code point that
- is neither a decimal digit nor a space to the end into '?'. */
-
-PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
- PyObject *unicode /* Unicode object */
- );
-#endif
-
/* --- Locale encoding --------------------------------------------------- */
#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
@@ -1976,14 +915,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_Join(
PyObject *seq /* Sequence object */
);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
- PyObject *separator,
- PyObject *const *items,
- Py_ssize_t seqlen
- );
-#endif /* Py_LIMITED_API */
-
/* Return 1 if substr matches str[start:end] at the given tail end, 0
otherwise. */
@@ -2047,17 +978,6 @@ PyAPI_FUNC(int) PyUnicode_Compare(
PyObject *right /* Right string */
);
-#ifndef Py_LIMITED_API
-/* Test whether a unicode is equal to ASCII identifier. Return 1 if true,
- 0 otherwise. The right argument must be ASCII identifier.
- Any error occurs inside will be cleared before return. */
-
-PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
- PyObject *left, /* Left string */
- _Py_Identifier *right /* Right identifier */
- );
-#endif
-
/* Compare a Unicode object with C string and return -1, 0, 1 for less than,
equal, and greater than, respectively. It is best to pass only
ASCII-encoded strings, but the function interprets the input string as
@@ -2069,17 +989,6 @@ PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
const char *right /* ASCII-encoded string */
);
-#ifndef Py_LIMITED_API
-/* Test whether a unicode is equal to ASCII string. Return 1 if true,
- 0 otherwise. The right argument must be ASCII-encoded string.
- Any error occurs inside will be cleared before return. */
-
-PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
- PyObject *left,
- const char *right /* ASCII-encoded string */
- );
-#endif
-
/* Rich compare two strings and return one of the following:
- NULL in case an exception was raised
@@ -2121,192 +1030,8 @@ PyAPI_FUNC(int) PyUnicode_Contains(
PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
-#ifndef Py_LIMITED_API
-/* Externally visible for str.strip(unicode) */
-PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
- PyObject *self,
- int striptype,
- PyObject *sepobj
- );
-#endif
-
-/* Using explicit passed-in values, insert the thousands grouping
- into the string pointed to by buffer. For the argument descriptions,
- see Objects/stringlib/localeutil.h */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
- _PyUnicodeWriter *writer,
- Py_ssize_t n_buffer,
- PyObject *digits,
- Py_ssize_t d_pos,
- Py_ssize_t n_digits,
- Py_ssize_t min_width,
- const char *grouping,
- PyObject *thousands_sep,
- Py_UCS4 *maxchar);
-#endif
/* === Characters Type APIs =============================================== */
-/* Helper array used by Py_UNICODE_ISSPACE(). */
-
-#ifndef Py_LIMITED_API
-PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
-
-/* These should not be used directly. Use the Py_UNICODE_IS* and
- Py_UNICODE_TO* macros instead.
-
- These APIs are implemented in Objects/unicodectype.c.
-
-*/
-
-PyAPI_FUNC(int) _PyUnicode_IsLowercase(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsUppercase(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsXidStart(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
- const Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
- const Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
- Py_UCS4 ch /* Unicode character */
- ) /* Py_DEPRECATED(3.3) */;
-
-PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
- Py_UCS4 ch /* Unicode character */
- ) /* Py_DEPRECATED(3.3) */;
-
-PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
- Py_UCS4 ch /* Unicode character */
- ) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
- Py_UCS4 ch, /* Unicode character */
- Py_UCS4 *res
- );
-
-PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
- Py_UCS4 ch, /* Unicode character */
- Py_UCS4 *res
- );
-
-PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
- Py_UCS4 ch, /* Unicode character */
- Py_UCS4 *res
- );
-
-PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
- Py_UCS4 ch, /* Unicode character */
- Py_UCS4 *res
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsCased(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_ToDigit(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(double) _PyUnicode_ToNumeric(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsDigit(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsNumeric(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsPrintable(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(int) _PyUnicode_IsAlpha(
- Py_UCS4 ch /* Unicode character */
- );
-
-PyAPI_FUNC(size_t) Py_UNICODE_strlen(
- const Py_UNICODE *u
- ) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
- Py_UNICODE *s1,
- const Py_UNICODE *s2) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
- Py_UNICODE *s1, const Py_UNICODE *s2) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
- Py_UNICODE *s1,
- const Py_UNICODE *s2,
- size_t n) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(int) Py_UNICODE_strcmp(
- const Py_UNICODE *s1,
- const Py_UNICODE *s2
- ) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(int) Py_UNICODE_strncmp(
- const Py_UNICODE *s1,
- const Py_UNICODE *s2,
- size_t n
- ) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
- const Py_UNICODE *s,
- Py_UNICODE c
- ) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
- const Py_UNICODE *s,
- Py_UNICODE c
- ) Py_DEPRECATED(3.3);
-
-PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
-
-/* Create a copy of a unicode string ending with a nul character. Return NULL
- and raise a MemoryError exception on memory allocation failure, otherwise
- return a new allocated buffer (use PyMem_Free() to free the buffer). */
-
-PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
- PyObject *unicode
- ) Py_DEPRECATED(3.3);
-#endif /* Py_LIMITED_API */
-
#if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
PyObject *op,
@@ -2318,15 +1043,10 @@ PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
#endif
#ifndef Py_LIMITED_API
-/* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
-PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
-/* Clear all static strings. */
-PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
-
-/* Fast equality check when the inputs are known to be exact unicode types
- and where the hash values are equal (i.e. a very probable match) */
-PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
-#endif /* !Py_LIMITED_API */
+# define Py_CPYTHON_UNICODEOBJECT_H
+# include "cpython/unicodeobject.h"
+# undef Py_CPYTHON_UNICODEOBJECT_H
+#endif
#ifdef __cplusplus
}
1
0
https://github.com/python/cpython/commit/5719f275b7153a00a800f5481271a6fc26…
commit: 5719f275b7153a00a800f5481271a6fc26659c65
branch: master
author: Mathieu Dupuy <deronnax(a)users.noreply.github.com>
committer: Julien Palard <julien(a)palard.fr>
date: 2018-11-26T17:13:41+01:00
summary:
Doc: Delete now useless Windows FAQ section (GH-10557)
files:
M Doc/faq/windows.rst
diff --git a/Doc/faq/windows.rst b/Doc/faq/windows.rst
index 74aa52ab1a49..75ebe603cb5b 100644
--- a/Doc/faq/windows.rst
+++ b/Doc/faq/windows.rst
@@ -281,14 +281,3 @@ Use the msvcrt module. This is a standard Windows-specific extension module.
It defines a function ``kbhit()`` which checks whether a keyboard hit is
present, and ``getch()`` which gets one character without echoing it.
-
-How do I extract the downloaded documentation on Windows?
----------------------------------------------------------
-
-Sometimes, when you download the documentation package to a Windows machine
-using a web browser, the file extension of the saved file ends up being .EXE.
-This is a mistake; the extension should be .TGZ.
-
-Simply rename the downloaded file to have the .TGZ extension, and WinZip will be
-able to handle it. (If your copy of WinZip doesn't, get a newer one from
-https://www.winzip.com.)
1
0
https://github.com/python/cpython/commit/6eb996685e25c09499858bee4be258776e…
commit: 6eb996685e25c09499858bee4be258776e603c6f
branch: master
author: Victor Stinner <vstinner(a)redhat.com>
committer: GitHub <noreply(a)github.com>
date: 2018-11-26T17:09:16+01:00
summary:
bpo-35134: Create Include/cpython/object.h (GH-10679)
* Move object.h code surrounded by "#ifndef Py_LIMITED_API"
to a new Include/cpython/object.h header file.
* "typedef struct _typeobject PyTypeObject;" is now always defined
in object.h, but if Py_LIMITED_API is not defined,
Include/cpython/object.h also defines the structure.
* Complete the printfunc comment to mention Py_LIMITED_API define.
files:
A Include/cpython/object.h
M Include/object.h
diff --git a/Include/cpython/object.h b/Include/cpython/object.h
new file mode 100644
index 000000000000..77184c95151a
--- /dev/null
+++ b/Include/cpython/object.h
@@ -0,0 +1,454 @@
+#ifndef Py_CPYTHON_OBJECT_H
+# error "this header file must not be included directly"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/********************* String Literals ****************************************/
+/* This structure helps managing static strings. The basic usage goes like this:
+ Instead of doing
+
+ r = PyObject_CallMethod(o, "foo", "args", ...);
+
+ do
+
+ _Py_IDENTIFIER(foo);
+ ...
+ r = _PyObject_CallMethodId(o, &PyId_foo, "args", ...);
+
+ PyId_foo is a static variable, either on block level or file level. On first
+ usage, the string "foo" is interned, and the structures are linked. On interpreter
+ shutdown, all strings are released (through _PyUnicode_ClearStaticStrings).
+
+ Alternatively, _Py_static_string allows choosing the variable name.
+ _PyUnicode_FromId returns a borrowed reference to the interned string.
+ _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
+*/
+typedef struct _Py_Identifier {
+ struct _Py_Identifier *next;
+ const char* string;
+ PyObject *object;
+} _Py_Identifier;
+
+#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
+#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value)
+#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)
+
+/* buffer interface */
+typedef struct bufferinfo {
+ void *buf;
+ PyObject *obj; /* owned reference */
+ Py_ssize_t len;
+ Py_ssize_t itemsize; /* This is Py_ssize_t so it can be
+ pointed to by strides in simple case.*/
+ int readonly;
+ int ndim;
+ char *format;
+ Py_ssize_t *shape;
+ Py_ssize_t *strides;
+ Py_ssize_t *suboffsets;
+ void *internal;
+} Py_buffer;
+
+typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
+typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
+
+/* Maximum number of dimensions */
+#define PyBUF_MAX_NDIM 64
+
+/* Flags for getting buffers */
+#define PyBUF_SIMPLE 0
+#define PyBUF_WRITABLE 0x0001
+/* we used to include an E, backwards compatible alias */
+#define PyBUF_WRITEABLE PyBUF_WRITABLE
+#define PyBUF_FORMAT 0x0004
+#define PyBUF_ND 0x0008
+#define PyBUF_STRIDES (0x0010 | PyBUF_ND)
+#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
+#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
+#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
+#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
+
+#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE)
+#define PyBUF_CONTIG_RO (PyBUF_ND)
+
+#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE)
+#define PyBUF_STRIDED_RO (PyBUF_STRIDES)
+
+#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT)
+#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT)
+
+#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT)
+#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT)
+
+
+#define PyBUF_READ 0x100
+#define PyBUF_WRITE 0x200
+/* End buffer interface */
+
+
+typedef struct {
+ /* Number implementations must check *both*
+ arguments for proper type and implement the necessary conversions
+ in the slot functions themselves. */
+
+ binaryfunc nb_add;
+ binaryfunc nb_subtract;
+ binaryfunc nb_multiply;
+ binaryfunc nb_remainder;
+ binaryfunc nb_divmod;
+ ternaryfunc nb_power;
+ unaryfunc nb_negative;
+ unaryfunc nb_positive;
+ unaryfunc nb_absolute;
+ inquiry nb_bool;
+ unaryfunc nb_invert;
+ binaryfunc nb_lshift;
+ binaryfunc nb_rshift;
+ binaryfunc nb_and;
+ binaryfunc nb_xor;
+ binaryfunc nb_or;
+ unaryfunc nb_int;
+ void *nb_reserved; /* the slot formerly known as nb_long */
+ unaryfunc nb_float;
+
+ binaryfunc nb_inplace_add;
+ binaryfunc nb_inplace_subtract;
+ binaryfunc nb_inplace_multiply;
+ binaryfunc nb_inplace_remainder;
+ ternaryfunc nb_inplace_power;
+ binaryfunc nb_inplace_lshift;
+ binaryfunc nb_inplace_rshift;
+ binaryfunc nb_inplace_and;
+ binaryfunc nb_inplace_xor;
+ binaryfunc nb_inplace_or;
+
+ binaryfunc nb_floor_divide;
+ binaryfunc nb_true_divide;
+ binaryfunc nb_inplace_floor_divide;
+ binaryfunc nb_inplace_true_divide;
+
+ unaryfunc nb_index;
+
+ binaryfunc nb_matrix_multiply;
+ binaryfunc nb_inplace_matrix_multiply;
+} PyNumberMethods;
+
+typedef struct {
+ lenfunc sq_length;
+ binaryfunc sq_concat;
+ ssizeargfunc sq_repeat;
+ ssizeargfunc sq_item;
+ void *was_sq_slice;
+ ssizeobjargproc sq_ass_item;
+ void *was_sq_ass_slice;
+ objobjproc sq_contains;
+
+ binaryfunc sq_inplace_concat;
+ ssizeargfunc sq_inplace_repeat;
+} PySequenceMethods;
+
+typedef struct {
+ lenfunc mp_length;
+ binaryfunc mp_subscript;
+ objobjargproc mp_ass_subscript;
+} PyMappingMethods;
+
+typedef struct {
+ unaryfunc am_await;
+ unaryfunc am_aiter;
+ unaryfunc am_anext;
+} PyAsyncMethods;
+
+typedef struct {
+ getbufferproc bf_getbuffer;
+ releasebufferproc bf_releasebuffer;
+} PyBufferProcs;
+
+/* We can't provide a full compile-time check that limited-API
+ users won't implement tp_print. However, not defining printfunc
+ and making tp_print of a different function pointer type
+ if Py_LIMITED_API is set should at least cause a warning
+ in most cases. */
+typedef int (*printfunc)(PyObject *, FILE *, int);
+
+typedef struct _typeobject {
+ PyObject_VAR_HEAD
+ const char *tp_name; /* For printing, in format "<module>.<name>" */
+ Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */
+
+ /* Methods to implement standard operations */
+
+ destructor tp_dealloc;
+ printfunc tp_print;
+ getattrfunc tp_getattr;
+ setattrfunc tp_setattr;
+ PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
+ or tp_reserved (Python 3) */
+ reprfunc tp_repr;
+
+ /* Method suites for standard classes */
+
+ PyNumberMethods *tp_as_number;
+ PySequenceMethods *tp_as_sequence;
+ PyMappingMethods *tp_as_mapping;
+
+ /* More standard operations (here for binary compatibility) */
+
+ hashfunc tp_hash;
+ ternaryfunc tp_call;
+ reprfunc tp_str;
+ getattrofunc tp_getattro;
+ setattrofunc tp_setattro;
+
+ /* Functions to access object as input/output buffer */
+ PyBufferProcs *tp_as_buffer;
+
+ /* Flags to define presence of optional/expanded features */
+ unsigned long tp_flags;
+
+ const char *tp_doc; /* Documentation string */
+
+ /* Assigned meaning in release 2.0 */
+ /* call function for all accessible objects */
+ traverseproc tp_traverse;
+
+ /* delete references to contained objects */
+ inquiry tp_clear;
+
+ /* Assigned meaning in release 2.1 */
+ /* rich comparisons */
+ richcmpfunc tp_richcompare;
+
+ /* weak reference enabler */
+ Py_ssize_t tp_weaklistoffset;
+
+ /* Iterators */
+ getiterfunc tp_iter;
+ iternextfunc tp_iternext;
+
+ /* Attribute descriptor and subclassing stuff */
+ struct PyMethodDef *tp_methods;
+ struct PyMemberDef *tp_members;
+ struct PyGetSetDef *tp_getset;
+ struct _typeobject *tp_base;
+ PyObject *tp_dict;
+ descrgetfunc tp_descr_get;
+ descrsetfunc tp_descr_set;
+ Py_ssize_t tp_dictoffset;
+ initproc tp_init;
+ allocfunc tp_alloc;
+ newfunc tp_new;
+ freefunc tp_free; /* Low-level free-memory routine */
+ inquiry tp_is_gc; /* For PyObject_IS_GC */
+ PyObject *tp_bases;
+ PyObject *tp_mro; /* method resolution order */
+ PyObject *tp_cache;
+ PyObject *tp_subclasses;
+ PyObject *tp_weaklist;
+ destructor tp_del;
+
+ /* Type attribute cache version tag. Added in version 2.6 */
+ unsigned int tp_version_tag;
+
+ destructor tp_finalize;
+
+#ifdef COUNT_ALLOCS
+ /* these must be last and never explicitly initialized */
+ Py_ssize_t tp_allocs;
+ Py_ssize_t tp_frees;
+ Py_ssize_t tp_maxalloc;
+ struct _typeobject *tp_prev;
+ struct _typeobject *tp_next;
+#endif
+} PyTypeObject;
+
+/* The *real* layout of a type object when allocated on the heap */
+typedef struct _heaptypeobject {
+ /* Note: there's a dependency on the order of these members
+ in slotptr() in typeobject.c . */
+ PyTypeObject ht_type;
+ PyAsyncMethods as_async;
+ PyNumberMethods as_number;
+ PyMappingMethods as_mapping;
+ PySequenceMethods as_sequence; /* as_sequence comes after as_mapping,
+ so that the mapping wins when both
+ the mapping and the sequence define
+ a given operator (e.g. __getitem__).
+ see add_operators() in typeobject.c . */
+ PyBufferProcs as_buffer;
+ PyObject *ht_name, *ht_slots, *ht_qualname;
+ struct _dictkeysobject *ht_cached_keys;
+ /* here are optional user slots, followed by the members. */
+} PyHeapTypeObject;
+
+/* access macro to the members which are floating "behind" the object */
+#define PyHeapType_GET_MEMBERS(etype) \
+ ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize))
+
+PyAPI_FUNC(const char *) _PyType_Name(PyTypeObject *);
+PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *);
+PyAPI_FUNC(PyObject *) _PyType_LookupId(PyTypeObject *, _Py_Identifier *);
+PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, _Py_Identifier *);
+PyAPI_FUNC(PyTypeObject *) _PyType_CalculateMetaclass(PyTypeObject *, PyObject *);
+PyAPI_FUNC(PyObject *) _PyType_GetDocFromInternalDoc(const char *, const char *);
+PyAPI_FUNC(PyObject *) _PyType_GetTextSignatureFromInternalDoc(const char *, const char *);
+
+struct _Py_Identifier;
+PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int);
+PyAPI_FUNC(void) _Py_BreakPoint(void);
+PyAPI_FUNC(void) _PyObject_Dump(PyObject *);
+PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *);
+
+PyAPI_FUNC(int) _PyObject_IsAbstract(PyObject *);
+PyAPI_FUNC(PyObject *) _PyObject_GetAttrId(PyObject *, struct _Py_Identifier *);
+PyAPI_FUNC(int) _PyObject_SetAttrId(PyObject *, struct _Py_Identifier *, PyObject *);
+PyAPI_FUNC(int) _PyObject_HasAttrId(PyObject *, struct _Py_Identifier *);
+/* Replacements of PyObject_GetAttr() and _PyObject_GetAttrId() which
+ don't raise AttributeError.
+
+ Return 1 and set *result != NULL if an attribute is found.
+ Return 0 and set *result == NULL if an attribute is not found;
+ an AttributeError is silenced.
+ Return -1 and set *result == NULL if an error other than AttributeError
+ is raised.
+*/
+PyAPI_FUNC(int) _PyObject_LookupAttr(PyObject *, PyObject *, PyObject **);
+PyAPI_FUNC(int) _PyObject_LookupAttrId(PyObject *, struct _Py_Identifier *, PyObject **);
+PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *);
+PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *);
+PyAPI_FUNC(void) PyObject_CallFinalizer(PyObject *);
+PyAPI_FUNC(int) PyObject_CallFinalizerFromDealloc(PyObject *);
+
+/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes
+ dict as the last parameter. */
+PyAPI_FUNC(PyObject *)
+_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *, int);
+PyAPI_FUNC(int)
+_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *,
+ PyObject *, PyObject *);
+
+/* Helper to look up a builtin object */
+PyAPI_FUNC(PyObject *) _PyObject_GetBuiltin(const char *name);
+
+#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0)
+
+static inline void _Py_Dealloc_inline(PyObject *op)
+{
+ destructor dealloc = Py_TYPE(op)->tp_dealloc;
+#ifdef Py_TRACE_REFS
+ _Py_ForgetReference(op);
+#else
+ _Py_INC_TPFREES(op);
+#endif
+ (*dealloc)(op);
+}
+#define _Py_Dealloc(op) _Py_Dealloc_inline(op)
+
+
+/* Safely decref `op` and set `op` to `op2`.
+ *
+ * As in case of Py_CLEAR "the obvious" code can be deadly:
+ *
+ * Py_DECREF(op);
+ * op = op2;
+ *
+ * The safe way is:
+ *
+ * Py_SETREF(op, op2);
+ *
+ * That arranges to set `op` to `op2` _before_ decref'ing, so that any code
+ * triggered as a side-effect of `op` getting torn down no longer believes
+ * `op` points to a valid object.
+ *
+ * Py_XSETREF is a variant of Py_SETREF that uses Py_XDECREF instead of
+ * Py_DECREF.
+ */
+
+#define Py_SETREF(op, op2) \
+ do { \
+ PyObject *_py_tmp = _PyObject_CAST(op); \
+ (op) = (op2); \
+ Py_DECREF(_py_tmp); \
+ } while (0)
+
+#define Py_XSETREF(op, op2) \
+ do { \
+ PyObject *_py_tmp = _PyObject_CAST(op); \
+ (op) = (op2); \
+ Py_XDECREF(_py_tmp); \
+ } while (0)
+
+
+PyAPI_DATA(PyTypeObject) _PyNone_Type;
+PyAPI_DATA(PyTypeObject) _PyNotImplemented_Type;
+
+/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE.
+ * Defined in object.c.
+ */
+PyAPI_DATA(int) _Py_SwappedOp[];
+
+/* This is the old private API, invoked by the macros before 3.2.4.
+ Kept for binary compatibility of extensions using the stable ABI. */
+PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*);
+PyAPI_FUNC(void) _PyTrash_destroy_chain(void);
+
+PyAPI_FUNC(void)
+_PyDebugAllocatorStats(FILE *out, const char *block_name, int num_blocks,
+ size_t sizeof_block);
+PyAPI_FUNC(void)
+_PyObject_DebugTypeStats(FILE *out);
+
+/* Define a pair of assertion macros:
+ _PyObject_ASSERT_FROM(), _PyObject_ASSERT_WITH_MSG() and _PyObject_ASSERT().
+
+ These work like the regular C assert(), in that they will abort the
+ process with a message on stderr if the given condition fails to hold,
+ but compile away to nothing if NDEBUG is defined.
+
+ However, before aborting, Python will also try to call _PyObject_Dump() on
+ the given object. This may be of use when investigating bugs in which a
+ particular object is corrupt (e.g. buggy a tp_visit method in an extension
+ module breaking the garbage collector), to help locate the broken objects.
+
+ The WITH_MSG variant allows you to supply an additional message that Python
+ will attempt to print to stderr, after the object dump. */
+#ifdef NDEBUG
+ /* No debugging: compile away the assertions: */
+# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \
+ ((void)0)
+#else
+ /* With debugging: generate checks: */
+# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \
+ ((expr) \
+ ? (void)(0) \
+ : _PyObject_AssertFailed((obj), Py_STRINGIFY(expr), \
+ (msg), (filename), (lineno), (func)))
+#endif
+
+#define _PyObject_ASSERT_WITH_MSG(obj, expr, msg) \
+ _PyObject_ASSERT_FROM(obj, expr, msg, __FILE__, __LINE__, __func__)
+#define _PyObject_ASSERT(obj, expr) \
+ _PyObject_ASSERT_WITH_MSG(obj, expr, NULL)
+
+#define _PyObject_ASSERT_FAILED_MSG(obj, msg) \
+ _PyObject_AssertFailed((obj), NULL, (msg), __FILE__, __LINE__, __func__)
+
+/* Declare and define _PyObject_AssertFailed() even when NDEBUG is defined,
+ to avoid causing compiler/linker errors when building extensions without
+ NDEBUG against a Python built with NDEBUG defined.
+
+ msg, expr and function can be NULL. */
+PyAPI_FUNC(void) _PyObject_AssertFailed(
+ PyObject *obj,
+ const char *expr,
+ const char *msg,
+ const char *file,
+ int line,
+ const char *function);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Include/object.h b/Include/object.h
index cdcdca85c633..21c29e7bdb00 100644
--- a/Include/object.h
+++ b/Include/object.h
@@ -127,39 +127,6 @@ typedef struct {
#define Py_TYPE(ob) (_PyObject_CAST(ob)->ob_type)
#define Py_SIZE(ob) (_PyVarObject_CAST(ob)->ob_size)
-#ifndef Py_LIMITED_API
-/********************* String Literals ****************************************/
-/* This structure helps managing static strings. The basic usage goes like this:
- Instead of doing
-
- r = PyObject_CallMethod(o, "foo", "args", ...);
-
- do
-
- _Py_IDENTIFIER(foo);
- ...
- r = _PyObject_CallMethodId(o, &PyId_foo, "args", ...);
-
- PyId_foo is a static variable, either on block level or file level. On first
- usage, the string "foo" is interned, and the structures are linked. On interpreter
- shutdown, all strings are released (through _PyUnicode_ClearStaticStrings).
-
- Alternatively, _Py_static_string allows choosing the variable name.
- _PyUnicode_FromId returns a borrowed reference to the interned string.
- _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*.
-*/
-typedef struct _Py_Identifier {
- struct _Py_Identifier *next;
- const char* string;
- PyObject *object;
-} _Py_Identifier;
-
-#define _Py_static_string_init(value) { .next = NULL, .string = value, .object = NULL }
-#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value)
-#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname)
-
-#endif /* !Py_LIMITED_API */
-
/*
Type objects contain a string containing the type name (to help somewhat
in debugging), the allocation parameters (see PyObject_New() and
@@ -186,154 +153,13 @@ typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *);
typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *);
typedef int(*objobjargproc)(PyObject *, PyObject *, PyObject *);
-#ifndef Py_LIMITED_API
-/* buffer interface */
-typedef struct bufferinfo {
- void *buf;
- PyObject *obj; /* owned reference */
- Py_ssize_t len;
- Py_ssize_t itemsize; /* This is Py_ssize_t so it can be
- pointed to by strides in simple case.*/
- int readonly;
- int ndim;
- char *format;
- Py_ssize_t *shape;
- Py_ssize_t *strides;
- Py_ssize_t *suboffsets;
- void *internal;
-} Py_buffer;
-
-typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
-typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
-
-/* Maximum number of dimensions */
-#define PyBUF_MAX_NDIM 64
-
-/* Flags for getting buffers */
-#define PyBUF_SIMPLE 0
-#define PyBUF_WRITABLE 0x0001
-/* we used to include an E, backwards compatible alias */
-#define PyBUF_WRITEABLE PyBUF_WRITABLE
-#define PyBUF_FORMAT 0x0004
-#define PyBUF_ND 0x0008
-#define PyBUF_STRIDES (0x0010 | PyBUF_ND)
-#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
-#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
-#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
-#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
-
-#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE)
-#define PyBUF_CONTIG_RO (PyBUF_ND)
-
-#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE)
-#define PyBUF_STRIDED_RO (PyBUF_STRIDES)
-
-#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT)
-#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT)
-
-#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT)
-#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT)
-
-
-#define PyBUF_READ 0x100
-#define PyBUF_WRITE 0x200
-
-/* End buffer interface */
-#endif /* Py_LIMITED_API */
-
typedef int (*objobjproc)(PyObject *, PyObject *);
typedef int (*visitproc)(PyObject *, void *);
typedef int (*traverseproc)(PyObject *, visitproc, void *);
-#ifndef Py_LIMITED_API
-typedef struct {
- /* Number implementations must check *both*
- arguments for proper type and implement the necessary conversions
- in the slot functions themselves. */
-
- binaryfunc nb_add;
- binaryfunc nb_subtract;
- binaryfunc nb_multiply;
- binaryfunc nb_remainder;
- binaryfunc nb_divmod;
- ternaryfunc nb_power;
- unaryfunc nb_negative;
- unaryfunc nb_positive;
- unaryfunc nb_absolute;
- inquiry nb_bool;
- unaryfunc nb_invert;
- binaryfunc nb_lshift;
- binaryfunc nb_rshift;
- binaryfunc nb_and;
- binaryfunc nb_xor;
- binaryfunc nb_or;
- unaryfunc nb_int;
- void *nb_reserved; /* the slot formerly known as nb_long */
- unaryfunc nb_float;
-
- binaryfunc nb_inplace_add;
- binaryfunc nb_inplace_subtract;
- binaryfunc nb_inplace_multiply;
- binaryfunc nb_inplace_remainder;
- ternaryfunc nb_inplace_power;
- binaryfunc nb_inplace_lshift;
- binaryfunc nb_inplace_rshift;
- binaryfunc nb_inplace_and;
- binaryfunc nb_inplace_xor;
- binaryfunc nb_inplace_or;
-
- binaryfunc nb_floor_divide;
- binaryfunc nb_true_divide;
- binaryfunc nb_inplace_floor_divide;
- binaryfunc nb_inplace_true_divide;
-
- unaryfunc nb_index;
-
- binaryfunc nb_matrix_multiply;
- binaryfunc nb_inplace_matrix_multiply;
-} PyNumberMethods;
-
-typedef struct {
- lenfunc sq_length;
- binaryfunc sq_concat;
- ssizeargfunc sq_repeat;
- ssizeargfunc sq_item;
- void *was_sq_slice;
- ssizeobjargproc sq_ass_item;
- void *was_sq_ass_slice;
- objobjproc sq_contains;
-
- binaryfunc sq_inplace_concat;
- ssizeargfunc sq_inplace_repeat;
-} PySequenceMethods;
-
-typedef struct {
- lenfunc mp_length;
- binaryfunc mp_subscript;
- objobjargproc mp_ass_subscript;
-} PyMappingMethods;
-
-typedef struct {
- unaryfunc am_await;
- unaryfunc am_aiter;
- unaryfunc am_anext;
-} PyAsyncMethods;
-
-typedef struct {
- getbufferproc bf_getbuffer;
- releasebufferproc bf_releasebuffer;
-} PyBufferProcs;
-#endif /* Py_LIMITED_API */
typedef void (*freefunc)(void *);
typedef void (*destructor)(PyObject *);
-#ifndef Py_LIMITED_API
-/* We can't provide a full compile-time check that limited-API
- users won't implement tp_print. However, not defining printfunc
- and making tp_print of a different function pointer type
- should at least cause a warning in most cases. */
-typedef int (*printfunc)(PyObject *, FILE *, int);
-#endif
typedef PyObject *(*getattrfunc)(PyObject *, char *);
typedef PyObject *(*getattrofunc)(PyObject *, PyObject *);
typedef int (*setattrfunc)(PyObject *, char *, PyObject *);
@@ -349,100 +175,8 @@ typedef int (*initproc)(PyObject *, PyObject *, PyObject *);
typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *);
typedef PyObject *(*allocfunc)(struct _typeobject *, Py_ssize_t);
-#ifdef Py_LIMITED_API
-typedef struct _typeobject PyTypeObject; /* opaque */
-#else
-typedef struct _typeobject {
- PyObject_VAR_HEAD
- const char *tp_name; /* For printing, in format "<module>.<name>" */
- Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */
-
- /* Methods to implement standard operations */
-
- destructor tp_dealloc;
- printfunc tp_print;
- getattrfunc tp_getattr;
- setattrfunc tp_setattr;
- PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2)
- or tp_reserved (Python 3) */
- reprfunc tp_repr;
-
- /* Method suites for standard classes */
-
- PyNumberMethods *tp_as_number;
- PySequenceMethods *tp_as_sequence;
- PyMappingMethods *tp_as_mapping;
-
- /* More standard operations (here for binary compatibility) */
-
- hashfunc tp_hash;
- ternaryfunc tp_call;
- reprfunc tp_str;
- getattrofunc tp_getattro;
- setattrofunc tp_setattro;
-
- /* Functions to access object as input/output buffer */
- PyBufferProcs *tp_as_buffer;
-
- /* Flags to define presence of optional/expanded features */
- unsigned long tp_flags;
-
- const char *tp_doc; /* Documentation string */
-
- /* Assigned meaning in release 2.0 */
- /* call function for all accessible objects */
- traverseproc tp_traverse;
-
- /* delete references to contained objects */
- inquiry tp_clear;
-
- /* Assigned meaning in release 2.1 */
- /* rich comparisons */
- richcmpfunc tp_richcompare;
-
- /* weak reference enabler */
- Py_ssize_t tp_weaklistoffset;
-
- /* Iterators */
- getiterfunc tp_iter;
- iternextfunc tp_iternext;
-
- /* Attribute descriptor and subclassing stuff */
- struct PyMethodDef *tp_methods;
- struct PyMemberDef *tp_members;
- struct PyGetSetDef *tp_getset;
- struct _typeobject *tp_base;
- PyObject *tp_dict;
- descrgetfunc tp_descr_get;
- descrsetfunc tp_descr_set;
- Py_ssize_t tp_dictoffset;
- initproc tp_init;
- allocfunc tp_alloc;
- newfunc tp_new;
- freefunc tp_free; /* Low-level free-memory routine */
- inquiry tp_is_gc; /* For PyObject_IS_GC */
- PyObject *tp_bases;
- PyObject *tp_mro; /* method resolution order */
- PyObject *tp_cache;
- PyObject *tp_subclasses;
- PyObject *tp_weaklist;
- destructor tp_del;
-
- /* Type attribute cache version tag. Added in version 2.6 */
- unsigned int tp_version_tag;
-
- destructor tp_finalize;
-
-#ifdef COUNT_ALLOCS
- /* these must be last and never explicitly initialized */
- Py_ssize_t tp_allocs;
- Py_ssize_t tp_frees;
- Py_ssize_t tp_maxalloc;
- struct _typeobject *tp_prev;
- struct _typeobject *tp_next;
-#endif
-} PyTypeObject;
-#endif
+/* In Py_LIMITED_API, PyTypeObject is an opaque structure. */
+typedef struct _typeobject PyTypeObject;
typedef struct{
int slot; /* slot id, see below */
@@ -465,31 +199,6 @@ PyAPI_FUNC(PyObject*) PyType_FromSpecWithBases(PyType_Spec*, PyObject*);
PyAPI_FUNC(void*) PyType_GetSlot(PyTypeObject*, int);
#endif
-#ifndef Py_LIMITED_API
-/* The *real* layout of a type object when allocated on the heap */
-typedef struct _heaptypeobject {
- /* Note: there's a dependency on the order of these members
- in slotptr() in typeobject.c . */
- PyTypeObject ht_type;
- PyAsyncMethods as_async;
- PyNumberMethods as_number;
- PyMappingMethods as_mapping;
- PySequenceMethods as_sequence; /* as_sequence comes after as_mapping,
- so that the mapping wins when both
- the mapping and the sequence define
- a given operator (e.g. __getitem__).
- see add_operators() in typeobject.c . */
- PyBufferProcs as_buffer;
- PyObject *ht_name, *ht_slots, *ht_qualname;
- struct _dictkeysobject *ht_cached_keys;
- /* here are optional user slots, followed by the members. */
-} PyHeapTypeObject;
-
-/* access macro to the members which are floating "behind" the object */
-#define PyHeapType_GET_MEMBERS(etype) \
- ((PyMemberDef *)(((char *)etype) + Py_TYPE(etype)->tp_basicsize))
-#endif
-
/* Generic type check */
PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *);
#define PyObject_TypeCheck(ob, tp) \
@@ -509,29 +218,10 @@ PyAPI_FUNC(int) PyType_Ready(PyTypeObject *);
PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t);
PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *,
PyObject *, PyObject *);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(const char *) _PyType_Name(PyTypeObject *);
-PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *);
-PyAPI_FUNC(PyObject *) _PyType_LookupId(PyTypeObject *, _Py_Identifier *);
-PyAPI_FUNC(PyObject *) _PyObject_LookupSpecial(PyObject *, _Py_Identifier *);
-PyAPI_FUNC(PyTypeObject *) _PyType_CalculateMetaclass(PyTypeObject *, PyObject *);
-#endif
PyAPI_FUNC(unsigned int) PyType_ClearCache(void);
PyAPI_FUNC(void) PyType_Modified(PyTypeObject *);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyType_GetDocFromInternalDoc(const char *, const char *);
-PyAPI_FUNC(PyObject *) _PyType_GetTextSignatureFromInternalDoc(const char *, const char *);
-#endif
-
/* Generic operations on objects */
-#ifndef Py_LIMITED_API
-struct _Py_Identifier;
-PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int);
-PyAPI_FUNC(void) _Py_BreakPoint(void);
-PyAPI_FUNC(void) _PyObject_Dump(PyObject *);
-PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *);
-#endif
PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *);
PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *);
PyAPI_FUNC(PyObject *) PyObject_ASCII(PyObject *);
@@ -544,28 +234,7 @@ PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *);
PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *);
PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *);
PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(int) _PyObject_IsAbstract(PyObject *);
-PyAPI_FUNC(PyObject *) _PyObject_GetAttrId(PyObject *, struct _Py_Identifier *);
-PyAPI_FUNC(int) _PyObject_SetAttrId(PyObject *, struct _Py_Identifier *, PyObject *);
-PyAPI_FUNC(int) _PyObject_HasAttrId(PyObject *, struct _Py_Identifier *);
-/* Replacements of PyObject_GetAttr() and _PyObject_GetAttrId() which
- don't raise AttributeError.
-
- Return 1 and set *result != NULL if an attribute is found.
- Return 0 and set *result == NULL if an attribute is not found;
- an AttributeError is silenced.
- Return -1 and set *result == NULL if an error other than AttributeError
- is raised.
-*/
-PyAPI_FUNC(int) _PyObject_LookupAttr(PyObject *, PyObject *, PyObject **);
-PyAPI_FUNC(int) _PyObject_LookupAttrId(PyObject *, struct _Py_Identifier *, PyObject **);
-PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *);
-#endif
PyAPI_FUNC(PyObject *) PyObject_SelfIter(PyObject *);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *) _PyObject_NextNotImplemented(PyObject *);
-#endif
PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *);
PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *,
PyObject *, PyObject *);
@@ -577,28 +246,7 @@ PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *);
PyAPI_FUNC(int) PyObject_IsTrue(PyObject *);
PyAPI_FUNC(int) PyObject_Not(PyObject *);
PyAPI_FUNC(int) PyCallable_Check(PyObject *);
-
PyAPI_FUNC(void) PyObject_ClearWeakRefs(PyObject *);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(void) PyObject_CallFinalizer(PyObject *);
-PyAPI_FUNC(int) PyObject_CallFinalizerFromDealloc(PyObject *);
-#endif
-
-#ifndef Py_LIMITED_API
-/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes
- dict as the last parameter. */
-PyAPI_FUNC(PyObject *)
-_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *, int);
-PyAPI_FUNC(int)
-_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *,
- PyObject *, PyObject *);
-#endif /* !Py_LIMITED_API */
-
-/* Helper to look up a builtin object */
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(PyObject *)
-_PyObject_GetBuiltin(const char *name);
-#endif
/* PyObject_Dir(obj) acts like Python builtins.dir(obj), returning a
list of strings. PyObject_Dir(NULL) is like builtins.dir(),
@@ -689,9 +337,7 @@ given type object has a specified feature.
#define Py_TPFLAGS_HAVE_FINALIZE (1UL << 0)
#ifdef Py_LIMITED_API
-#define PyType_HasFeature(t,f) ((PyType_GetFlags(t) & (f)) != 0)
-#else
-#define PyType_HasFeature(t,f) (((t)->tp_flags & (f)) != 0)
+# define PyType_HasFeature(t,f) ((PyType_GetFlags(t) & (f)) != 0)
#endif
#define PyType_FastSubclass(t,f) PyType_HasFeature(t,f)
@@ -775,7 +421,6 @@ PyAPI_FUNC(void) _Py_ForgetReference(PyObject *);
PyAPI_FUNC(void) _Py_PrintReferences(FILE *);
PyAPI_FUNC(void) _Py_PrintReferenceAddresses(FILE *);
PyAPI_FUNC(void) _Py_AddToAllObjects(PyObject *, int force);
-
#else
/* Without Py_TRACE_REFS, there's little enough to do that we expand code
inline. */
@@ -798,22 +443,6 @@ static inline void _Py_ForgetReference(PyObject *op)
PyAPI_FUNC(void) _Py_Dealloc(PyObject *);
-#ifndef Py_LIMITED_API
-static inline void _Py_Dealloc_inline(PyObject *op)
-{
- destructor dealloc = Py_TYPE(op)->tp_dealloc;
-#ifdef Py_TRACE_REFS
- _Py_ForgetReference(op);
-#else
- _Py_INC_TPFREES(op);
-#endif
- (*dealloc)(op);
-}
-
-# define _Py_Dealloc(op) _Py_Dealloc_inline(op)
-#endif /* !defined(Py_LIMITED_API) */
-
-
static inline void _Py_INCREF(PyObject *op)
{
_Py_INC_REFTOTAL;
@@ -903,42 +532,6 @@ static inline void _Py_XDECREF(PyObject *op)
#define Py_XDECREF(op) _Py_XDECREF(_PyObject_CAST(op))
-#ifndef Py_LIMITED_API
-/* Safely decref `op` and set `op` to `op2`.
- *
- * As in case of Py_CLEAR "the obvious" code can be deadly:
- *
- * Py_DECREF(op);
- * op = op2;
- *
- * The safe way is:
- *
- * Py_SETREF(op, op2);
- *
- * That arranges to set `op` to `op2` _before_ decref'ing, so that any code
- * triggered as a side-effect of `op` getting torn down no longer believes
- * `op` points to a valid object.
- *
- * Py_XSETREF is a variant of Py_SETREF that uses Py_XDECREF instead of
- * Py_DECREF.
- */
-
-#define Py_SETREF(op, op2) \
- do { \
- PyObject *_py_tmp = _PyObject_CAST(op); \
- (op) = (op2); \
- Py_DECREF(_py_tmp); \
- } while (0)
-
-#define Py_XSETREF(op, op2) \
- do { \
- PyObject *_py_tmp = _PyObject_CAST(op); \
- (op) = (op2); \
- Py_XDECREF(_py_tmp); \
- } while (0)
-
-#endif /* ifndef Py_LIMITED_API */
-
/*
These are provided as conveniences to Python runtime embedders, so that
they can have object code that is not dependent on Python compilation flags.
@@ -946,11 +539,6 @@ they can have object code that is not dependent on Python compilation flags.
PyAPI_FUNC(void) Py_IncRef(PyObject *);
PyAPI_FUNC(void) Py_DecRef(PyObject *);
-#ifndef Py_LIMITED_API
-PyAPI_DATA(PyTypeObject) _PyNone_Type;
-PyAPI_DATA(PyTypeObject) _PyNotImplemented_Type;
-#endif /* !Py_LIMITED_API */
-
/*
_Py_NoneStruct is an object of undefined type which can be used in contexts
where NULL (nil) is not suitable (since NULL often means 'error').
@@ -1001,13 +589,6 @@ PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */
} \
} while (0)
-#ifndef Py_LIMITED_API
-/* Maps Py_LT to Py_GT, ..., Py_GE to Py_LE.
- * Defined in object.c.
- */
-PyAPI_DATA(int) _Py_SwappedOp[];
-#endif /* !Py_LIMITED_API */
-
/*
More conventions
@@ -1103,13 +684,6 @@ chain of N deallocations is broken into (N-1)/(PyTrash_UNWIND_LEVEL-1) pieces,
with the call stack never exceeding a depth of PyTrash_UNWIND_LEVEL.
*/
-#ifndef Py_LIMITED_API
-/* This is the old private API, invoked by the macros before 3.2.4.
- Kept for binary compatibility of extensions using the stable ABI. */
-PyAPI_FUNC(void) _PyTrash_deposit_object(PyObject*);
-PyAPI_FUNC(void) _PyTrash_destroy_chain(void);
-#endif /* !Py_LIMITED_API */
-
/* The new thread-safe private API, invoked by the macros below. */
PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyObject*);
PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void);
@@ -1131,66 +705,13 @@ PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(void);
_PyTrash_thread_deposit_object(_PyObject_CAST(op)); \
} while (0);
-#ifndef Py_LIMITED_API
-PyAPI_FUNC(void)
-_PyDebugAllocatorStats(FILE *out, const char *block_name, int num_blocks,
- size_t sizeof_block);
-PyAPI_FUNC(void)
-_PyObject_DebugTypeStats(FILE *out);
-#endif /* ifndef Py_LIMITED_API */
-
#ifndef Py_LIMITED_API
-/* Define a pair of assertion macros:
- _PyObject_ASSERT_FROM(), _PyObject_ASSERT_WITH_MSG() and _PyObject_ASSERT().
-
- These work like the regular C assert(), in that they will abort the
- process with a message on stderr if the given condition fails to hold,
- but compile away to nothing if NDEBUG is defined.
-
- However, before aborting, Python will also try to call _PyObject_Dump() on
- the given object. This may be of use when investigating bugs in which a
- particular object is corrupt (e.g. buggy a tp_visit method in an extension
- module breaking the garbage collector), to help locate the broken objects.
-
- The WITH_MSG variant allows you to supply an additional message that Python
- will attempt to print to stderr, after the object dump. */
-#ifdef NDEBUG
- /* No debugging: compile away the assertions: */
-# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \
- ((void)0)
-#else
- /* With debugging: generate checks: */
-# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \
- ((expr) \
- ? (void)(0) \
- : _PyObject_AssertFailed((obj), Py_STRINGIFY(expr), \
- (msg), (filename), (lineno), (func)))
+# define Py_CPYTHON_OBJECT_H
+# include "cpython/object.h"
+# undef Py_CPYTHON_OBJECT_H
#endif
-#define _PyObject_ASSERT_WITH_MSG(obj, expr, msg) \
- _PyObject_ASSERT_FROM(obj, expr, msg, __FILE__, __LINE__, __func__)
-#define _PyObject_ASSERT(obj, expr) \
- _PyObject_ASSERT_WITH_MSG(obj, expr, NULL)
-
-#define _PyObject_ASSERT_FAILED_MSG(obj, msg) \
- _PyObject_AssertFailed((obj), NULL, (msg), __FILE__, __LINE__, __func__)
-
-/* Declare and define _PyObject_AssertFailed() even when NDEBUG is defined,
- to avoid causing compiler/linker errors when building extensions without
- NDEBUG against a Python built with NDEBUG defined.
-
- msg, expr and function can be NULL. */
-PyAPI_FUNC(void) _PyObject_AssertFailed(
- PyObject *obj,
- const char *expr,
- const char *msg,
- const char *file,
- int line,
- const char *function);
-#endif /* ifndef Py_LIMITED_API */
-
-
#ifdef __cplusplus
}
#endif
1
0
![](https://secure.gravatar.com/avatar/cc7737cd64a84f1b5c61a160798e97ee.jpg?s=120&d=mm&r=g)
bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623) (GH-10718) (GH-10720)
by Victor Stinner 26 Nov '18
by Victor Stinner 26 Nov '18
26 Nov '18
https://github.com/python/cpython/commit/fc4a44b0c3a69390eca4680d89c2ae5fe9…
commit: fc4a44b0c3a69390eca4680d89c2ae5fe967f882
branch: 3.6
author: Victor Stinner <vstinner(a)redhat.com>
committer: GitHub <noreply(a)github.com>
date: 2018-11-26T17:03:31+01:00
summary:
bpo-33954: Fix _PyUnicode_InsertThousandsGrouping() (GH-10623) (GH-10718) (GH-10720)
Fix str.format(), float.__format__() and complex.__format__() methods
for non-ASCII decimal point when using the "n" formatter.
Rewrite _PyUnicode_InsertThousandsGrouping(): it now requires
a _PyUnicodeWriter object for the buffer and a Python str object
for digits.
(cherry picked from commit 59423e3ddd736387cef8f7632c71954c1859bed0)
(cherry picked from commit 6f5fa1b4be735159e964906ab608dc467476e47c)
files:
A Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst
M Include/unicodeobject.h
M Objects/stringlib/localeutil.h
M Objects/unicodeobject.c
M Python/formatter_unicode.c
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
index f49887387008..c81a38181e49 100644
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -2143,10 +2143,10 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
see Objects/stringlib/localeutil.h */
#ifndef Py_LIMITED_API
PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
- PyObject *unicode,
- Py_ssize_t index,
+ _PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
- void *digits,
+ PyObject *digits,
+ Py_ssize_t d_pos,
Py_ssize_t n_digits,
Py_ssize_t min_width,
const char *grouping,
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst
new file mode 100644
index 000000000000..9bfbe1644e16
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-20-22-33-38.bpo-33954.RzSngM.rst
@@ -0,0 +1,3 @@
+For :meth:`str.format`, :meth:`float.__format__` and
+:meth:`complex.__format__` methods for non-ASCII decimal point when using
+the "n" formatter.
diff --git a/Objects/stringlib/localeutil.h b/Objects/stringlib/localeutil.h
index df501ed05c7e..31fed349caa5 100644
--- a/Objects/stringlib/localeutil.h
+++ b/Objects/stringlib/localeutil.h
@@ -1,28 +1,24 @@
-/* stringlib: locale related helpers implementation */
-
-#include <locale.h>
-
-#if !STRINGLIB_IS_UNICODE
-# error "localeutil.h is specific to Unicode"
-#endif
+/* _PyUnicode_InsertThousandsGrouping() helper functions */
typedef struct {
const char *grouping;
char previous;
Py_ssize_t i; /* Where we're currently pointing in grouping. */
-} STRINGLIB(GroupGenerator);
+} GroupGenerator;
+
static void
-STRINGLIB(GroupGenerator_init)(STRINGLIB(GroupGenerator) *self, const char *grouping)
+GroupGenerator_init(GroupGenerator *self, const char *grouping)
{
self->grouping = grouping;
self->i = 0;
self->previous = 0;
}
+
/* Returns the next grouping, or 0 to signify end. */
static Py_ssize_t
-STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
+GroupGenerator_next(GroupGenerator *self)
{
/* Note that we don't really do much error checking here. If a
grouping string contains just CHAR_MAX, for example, then just
@@ -43,138 +39,44 @@ STRINGLIB(GroupGenerator_next)(STRINGLIB(GroupGenerator) *self)
}
}
+
/* Fill in some digits, leading zeros, and thousands separator. All
are optional, depending on when we're called. */
static void
-STRINGLIB(fill)(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
- Py_ssize_t n_chars, Py_ssize_t n_zeros, STRINGLIB_CHAR* thousands_sep,
- Py_ssize_t thousands_sep_len)
+InsertThousandsGrouping_fill(_PyUnicodeWriter *writer, Py_ssize_t *buffer_pos,
+ PyObject *digits, Py_ssize_t *digits_pos,
+ Py_ssize_t n_chars, Py_ssize_t n_zeros,
+ PyObject *thousands_sep, Py_ssize_t thousands_sep_len,
+ Py_UCS4 *maxchar)
{
- Py_ssize_t i;
+ if (!writer) {
+ /* if maxchar > 127, maxchar is already set */
+ if (*maxchar == 127 && thousands_sep) {
+ Py_UCS4 maxchar2 = PyUnicode_MAX_CHAR_VALUE(thousands_sep);
+ *maxchar = Py_MAX(*maxchar, maxchar2);
+ }
+ return;
+ }
if (thousands_sep) {
- *buffer_end -= thousands_sep_len;
+ *buffer_pos -= thousands_sep_len;
/* Copy the thousands_sep chars into the buffer. */
- memcpy(*buffer_end, thousands_sep,
- thousands_sep_len * STRINGLIB_SIZEOF_CHAR);
- }
-
- *buffer_end -= n_chars;
- *digits_end -= n_chars;
- memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
-
- *buffer_end -= n_zeros;
- for (i = 0; i < n_zeros; i++)
- (*buffer_end)[i] = '0';
-}
-
-/**
- * InsertThousandsGrouping:
- * @buffer: A pointer to the start of a string.
- * @n_buffer: Number of characters in @buffer.
- * @digits: A pointer to the digits we're reading from. If count
- * is non-NULL, this is unused.
- * @n_digits: The number of digits in the string, in which we want
- * to put the grouping chars.
- * @min_width: The minimum width of the digits in the output string.
- * Output will be zero-padded on the left to fill.
- * @grouping: see definition in localeconv().
- * @thousands_sep: see definition in localeconv().
- *
- * There are 2 modes: counting and filling. If @buffer is NULL,
- * we are in counting mode, else filling mode.
- * If counting, the required buffer size is returned.
- * If filling, we know the buffer will be large enough, so we don't
- * need to pass in the buffer size.
- * Inserts thousand grouping characters (as defined by grouping and
- * thousands_sep) into the string between buffer and buffer+n_digits.
- *
- * Return value: 0 on error, else 1. Note that no error can occur if
- * count is non-NULL.
- *
- * This name won't be used, the includer of this file should define
- * it to be the actual function name, based on unicode or string.
- *
- * As closely as possible, this code mimics the logic in decimal.py's
- _insert_thousands_sep().
- **/
-static Py_ssize_t
-STRINGLIB(InsertThousandsGrouping)(
- STRINGLIB_CHAR *buffer,
- Py_ssize_t n_buffer,
- STRINGLIB_CHAR *digits,
- Py_ssize_t n_digits,
- Py_ssize_t min_width,
- const char *grouping,
- STRINGLIB_CHAR *thousands_sep,
- Py_ssize_t thousands_sep_len)
-{
- Py_ssize_t count = 0;
- Py_ssize_t n_zeros;
- int loop_broken = 0;
- int use_separator = 0; /* First time through, don't append the
- separator. They only go between
- groups. */
- STRINGLIB_CHAR *buffer_end = NULL;
- STRINGLIB_CHAR *digits_end = NULL;
- Py_ssize_t l;
- Py_ssize_t n_chars;
- Py_ssize_t remaining = n_digits; /* Number of chars remaining to
- be looked at */
- /* A generator that returns all of the grouping widths, until it
- returns 0. */
- STRINGLIB(GroupGenerator) groupgen;
- STRINGLIB(GroupGenerator_init)(&groupgen, grouping);
-
- if (buffer) {
- buffer_end = buffer + n_buffer;
- digits_end = digits + n_digits;
- }
-
- while ((l = STRINGLIB(GroupGenerator_next)(&groupgen)) > 0) {
- l = Py_MIN(l, Py_MAX(Py_MAX(remaining, min_width), 1));
- n_zeros = Py_MAX(0, l - remaining);
- n_chars = Py_MAX(0, Py_MIN(remaining, l));
-
- /* Use n_zero zero's and n_chars chars */
-
- /* Count only, don't do anything. */
- count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
-
- if (buffer) {
- /* Copy into the output buffer. */
- STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
- use_separator ? thousands_sep : NULL, thousands_sep_len);
- }
-
- /* Use a separator next time. */
- use_separator = 1;
-
- remaining -= n_chars;
- min_width -= l;
-
- if (remaining <= 0 && min_width <= 0) {
- loop_broken = 1;
- break;
- }
- min_width -= thousands_sep_len;
+ _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
+ thousands_sep, 0,
+ thousands_sep_len);
}
- if (!loop_broken) {
- /* We left the loop without using a break statement. */
- l = Py_MAX(Py_MAX(remaining, min_width), 1);
- n_zeros = Py_MAX(0, l - remaining);
- n_chars = Py_MAX(0, Py_MIN(remaining, l));
-
- /* Use n_zero zero's and n_chars chars */
- count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
- if (buffer) {
- /* Copy into the output buffer. */
- STRINGLIB(fill)(&digits_end, &buffer_end, n_chars, n_zeros,
- use_separator ? thousands_sep : NULL, thousands_sep_len);
- }
+ *buffer_pos -= n_chars;
+ *digits_pos -= n_chars;
+ _PyUnicode_FastCopyCharacters(writer->buffer, *buffer_pos,
+ digits, *digits_pos,
+ n_chars);
+
+ if (n_zeros) {
+ *buffer_pos -= n_zeros;
+ enum PyUnicode_Kind kind = PyUnicode_KIND(writer->buffer);
+ void *data = PyUnicode_DATA(writer->buffer);
+ FILL(kind, data, '0', *buffer_pos, n_zeros);
}
- return count;
}
-
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 1f342bd199c7..6bfcddaa64e4 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -203,6 +203,31 @@ static PyObject *unicode_empty = NULL;
return unicode_empty; \
} while (0)
+#define FILL(kind, data, value, start, length) \
+ do { \
+ Py_ssize_t i_ = 0; \
+ assert(kind != PyUnicode_WCHAR_KIND); \
+ switch ((kind)) { \
+ case PyUnicode_1BYTE_KIND: { \
+ unsigned char * to_ = (unsigned char *)((data)) + (start); \
+ memset(to_, (unsigned char)value, (length)); \
+ break; \
+ } \
+ case PyUnicode_2BYTE_KIND: { \
+ Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \
+ for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
+ break; \
+ } \
+ case PyUnicode_4BYTE_KIND: { \
+ Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \
+ for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
+ break; \
+ } \
+ default: assert(0); \
+ } \
+ } while (0)
+
+
/* Forward declaration */
static inline int
_PyUnicodeWriter_WriteCharInline(_PyUnicodeWriter *writer, Py_UCS4 ch);
@@ -779,7 +804,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/count.h"
#include "stringlib/find.h"
#include "stringlib/find_max_char.h"
-#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
#include "stringlib/ucs1lib.h"
@@ -790,7 +814,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
-#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
#include "stringlib/ucs2lib.h"
@@ -801,7 +824,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
-#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
#include "stringlib/ucs4lib.h"
@@ -812,7 +834,6 @@ ensure_unicode(PyObject *obj)
#include "stringlib/find.h"
#include "stringlib/replace.h"
#include "stringlib/find_max_char.h"
-#include "stringlib/localeutil.h"
#include "stringlib/undef.h"
#include "stringlib/unicodedefs.h"
@@ -9407,87 +9428,149 @@ any_find_slice(PyObject* s1, PyObject* s2,
return result;
}
+/* _PyUnicode_InsertThousandsGrouping() helper functions */
+#include "stringlib/localeutil.h"
+
+/**
+ * InsertThousandsGrouping:
+ * @writer: Unicode writer.
+ * @n_buffer: Number of characters in @buffer.
+ * @digits: Digits we're reading from. If count is non-NULL, this is unused.
+ * @d_pos: Start of digits string.
+ * @n_digits: The number of digits in the string, in which we want
+ * to put the grouping chars.
+ * @min_width: The minimum width of the digits in the output string.
+ * Output will be zero-padded on the left to fill.
+ * @grouping: see definition in localeconv().
+ * @thousands_sep: see definition in localeconv().
+ *
+ * There are 2 modes: counting and filling. If @writer is NULL,
+ * we are in counting mode, else filling mode.
+ * If counting, the required buffer size is returned.
+ * If filling, we know the buffer will be large enough, so we don't
+ * need to pass in the buffer size.
+ * Inserts thousand grouping characters (as defined by grouping and
+ * thousands_sep) into @writer.
+ *
+ * Return value: -1 on error, number of characters otherwise.
+ **/
Py_ssize_t
_PyUnicode_InsertThousandsGrouping(
- PyObject *unicode, Py_ssize_t index,
+ _PyUnicodeWriter *writer,
Py_ssize_t n_buffer,
- void *digits, Py_ssize_t n_digits,
+ PyObject *digits,
+ Py_ssize_t d_pos,
+ Py_ssize_t n_digits,
Py_ssize_t min_width,
- const char *grouping, PyObject *thousands_sep,
+ const char *grouping,
+ PyObject *thousands_sep,
Py_UCS4 *maxchar)
{
- unsigned int kind, thousands_sep_kind;
- char *data, *thousands_sep_data;
- Py_ssize_t thousands_sep_len;
- Py_ssize_t len;
-
- if (unicode != NULL) {
- kind = PyUnicode_KIND(unicode);
- data = (char *) PyUnicode_DATA(unicode) + index * kind;
+ if (writer) {
+ assert(digits != NULL);
+ assert(maxchar == NULL);
}
else {
- kind = PyUnicode_1BYTE_KIND;
- data = NULL;
- }
- thousands_sep_kind = PyUnicode_KIND(thousands_sep);
- thousands_sep_data = PyUnicode_DATA(thousands_sep);
- thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
- if (unicode != NULL && thousands_sep_kind != kind) {
- if (thousands_sep_kind < kind) {
- thousands_sep_data = _PyUnicode_AsKind(thousands_sep, kind);
- if (!thousands_sep_data)
- return -1;
- }
- else {
- data = _PyUnicode_AsKind(unicode, thousands_sep_kind);
- if (!data)
- return -1;
- }
+ assert(digits == NULL);
+ assert(maxchar != NULL);
}
+ assert(0 <= d_pos);
+ assert(0 <= n_digits);
+ assert(0 <= min_width);
+ assert(grouping != NULL);
- switch (kind) {
- case PyUnicode_1BYTE_KIND:
- if (unicode != NULL && PyUnicode_IS_ASCII(unicode))
- len = asciilib_InsertThousandsGrouping(
- (Py_UCS1 *) data, n_buffer, (Py_UCS1 *) digits, n_digits,
- min_width, grouping,
- (Py_UCS1 *) thousands_sep_data, thousands_sep_len);
- else
- len = ucs1lib_InsertThousandsGrouping(
- (Py_UCS1*)data, n_buffer, (Py_UCS1*)digits, n_digits,
- min_width, grouping,
- (Py_UCS1 *) thousands_sep_data, thousands_sep_len);
- break;
- case PyUnicode_2BYTE_KIND:
- len = ucs2lib_InsertThousandsGrouping(
- (Py_UCS2 *) data, n_buffer, (Py_UCS2 *) digits, n_digits,
- min_width, grouping,
- (Py_UCS2 *) thousands_sep_data, thousands_sep_len);
- break;
- case PyUnicode_4BYTE_KIND:
- len = ucs4lib_InsertThousandsGrouping(
- (Py_UCS4 *) data, n_buffer, (Py_UCS4 *) digits, n_digits,
- min_width, grouping,
- (Py_UCS4 *) thousands_sep_data, thousands_sep_len);
- break;
- default:
- assert(0);
+ if (digits != NULL) {
+ if (PyUnicode_READY(digits) == -1) {
+ return -1;
+ }
+ }
+ if (PyUnicode_READY(thousands_sep) == -1) {
return -1;
}
- if (unicode != NULL && thousands_sep_kind != kind) {
- if (thousands_sep_kind < kind)
- PyMem_Free(thousands_sep_data);
- else
- PyMem_Free(data);
+
+ Py_ssize_t count = 0;
+ Py_ssize_t n_zeros;
+ int loop_broken = 0;
+ int use_separator = 0; /* First time through, don't append the
+ separator. They only go between
+ groups. */
+ Py_ssize_t buffer_pos;
+ Py_ssize_t digits_pos;
+ Py_ssize_t len;
+ Py_ssize_t n_chars;
+ Py_ssize_t remaining = n_digits; /* Number of chars remaining to
+ be looked at */
+ /* A generator that returns all of the grouping widths, until it
+ returns 0. */
+ GroupGenerator groupgen;
+ GroupGenerator_init(&groupgen, grouping);
+ const Py_ssize_t thousands_sep_len = PyUnicode_GET_LENGTH(thousands_sep);
+
+ /* if digits are not grouped, thousands separator
+ should be an empty string */
+ assert(!(grouping[0] == CHAR_MAX && thousands_sep_len != 0));
+
+ digits_pos = d_pos + n_digits;
+ if (writer) {
+ buffer_pos = writer->pos + n_buffer;
+ assert(buffer_pos <= PyUnicode_GET_LENGTH(writer->buffer));
+ assert(digits_pos <= PyUnicode_GET_LENGTH(digits));
}
- if (unicode == NULL) {
+ else {
+ buffer_pos = n_buffer;
+ }
+
+ if (!writer) {
*maxchar = 127;
- if (len != n_digits) {
- *maxchar = Py_MAX(*maxchar,
- PyUnicode_MAX_CHAR_VALUE(thousands_sep));
+ }
+
+ while ((len = GroupGenerator_next(&groupgen)) > 0) {
+ len = Py_MIN(len, Py_MAX(Py_MAX(remaining, min_width), 1));
+ n_zeros = Py_MAX(0, len - remaining);
+ n_chars = Py_MAX(0, Py_MIN(remaining, len));
+
+ /* Use n_zero zero's and n_chars chars */
+
+ /* Count only, don't do anything. */
+ count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+
+ /* Copy into the writer. */
+ InsertThousandsGrouping_fill(writer, &buffer_pos,
+ digits, &digits_pos,
+ n_chars, n_zeros,
+ use_separator ? thousands_sep : NULL,
+ thousands_sep_len, maxchar);
+
+ /* Use a separator next time. */
+ use_separator = 1;
+
+ remaining -= n_chars;
+ min_width -= len;
+
+ if (remaining <= 0 && min_width <= 0) {
+ loop_broken = 1;
+ break;
}
+ min_width -= thousands_sep_len;
+ }
+ if (!loop_broken) {
+ /* We left the loop without using a break statement. */
+
+ len = Py_MAX(Py_MAX(remaining, min_width), 1);
+ n_zeros = Py_MAX(0, len - remaining);
+ n_chars = Py_MAX(0, Py_MIN(remaining, len));
+
+ /* Use n_zero zero's and n_chars chars */
+ count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+
+ /* Copy into the writer. */
+ InsertThousandsGrouping_fill(writer, &buffer_pos,
+ digits, &digits_pos,
+ n_chars, n_zeros,
+ use_separator ? thousands_sep : NULL,
+ thousands_sep_len, maxchar);
}
- return len;
+ return count;
}
@@ -10174,30 +10257,6 @@ _PyUnicode_JoinArray(PyObject *separator, PyObject **items, Py_ssize_t seqlen)
return NULL;
}
-#define FILL(kind, data, value, start, length) \
- do { \
- Py_ssize_t i_ = 0; \
- assert(kind != PyUnicode_WCHAR_KIND); \
- switch ((kind)) { \
- case PyUnicode_1BYTE_KIND: { \
- unsigned char * to_ = (unsigned char *)((data)) + (start); \
- memset(to_, (unsigned char)value, (length)); \
- break; \
- } \
- case PyUnicode_2BYTE_KIND: { \
- Py_UCS2 * to_ = (Py_UCS2 *)((data)) + (start); \
- for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
- break; \
- } \
- case PyUnicode_4BYTE_KIND: { \
- Py_UCS4 * to_ = (Py_UCS4 *)((data)) + (start); \
- for (; i_ < (length); ++i_, ++to_) *to_ = (value); \
- break; \
- } \
- default: assert(0); \
- } \
- } while (0)
-
void
_PyUnicode_FastFill(PyObject *unicode, Py_ssize_t start, Py_ssize_t length,
Py_UCS4 fill_char)
diff --git a/Python/formatter_unicode.c b/Python/formatter_unicode.c
index c9a2c99dd257..4a38f7a30509 100644
--- a/Python/formatter_unicode.c
+++ b/Python/formatter_unicode.c
@@ -462,7 +462,8 @@ parse_number(PyObject *s, Py_ssize_t pos, Py_ssize_t end,
/* not all fields of format are used. for example, precision is
unused. should this take discrete params in order to be more clear
about what it does? or is passing a single format parameter easier
- and more efficient enough to justify a little obfuscation? */
+ and more efficient enough to justify a little obfuscation?
+ Return -1 on error. */
static Py_ssize_t
calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 sign_char, PyObject *number, Py_ssize_t n_start,
@@ -541,9 +542,12 @@ calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
Py_UCS4 grouping_maxchar;
spec->n_grouped_digits = _PyUnicode_InsertThousandsGrouping(
NULL, 0,
- 0, NULL,
- spec->n_digits, spec->n_min_width,
+ NULL, 0, spec->n_digits,
+ spec->n_min_width,
locale->grouping, locale->thousands_sep, &grouping_maxchar);
+ if (spec->n_grouped_digits == -1) {
+ return -1;
+ }
*maxchar = Py_MAX(*maxchar, grouping_maxchar);
}
@@ -637,26 +641,14 @@ fill_number(_PyUnicodeWriter *writer, const NumberFieldWidths *spec,
/* Only for type 'c' special case, it has no digits. */
if (spec->n_digits != 0) {
/* Fill the digits with InsertThousandsGrouping. */
- char *pdigits;
- if (PyUnicode_READY(digits))
- return -1;
- pdigits = PyUnicode_DATA(digits);
- if (PyUnicode_KIND(digits) < kind) {
- pdigits = _PyUnicode_AsKind(digits, kind);
- if (pdigits == NULL)
- return -1;
- }
r = _PyUnicode_InsertThousandsGrouping(
- writer->buffer, writer->pos,
- spec->n_grouped_digits,
- pdigits + kind * d_pos,
- spec->n_digits, spec->n_min_width,
+ writer, spec->n_grouped_digits,
+ digits, d_pos, spec->n_digits,
+ spec->n_min_width,
locale->grouping, locale->thousands_sep, NULL);
if (r == -1)
return -1;
assert(r == spec->n_grouped_digits);
- if (PyUnicode_KIND(digits) < kind)
- PyMem_Free(pdigits);
d_pos += spec->n_digits;
}
if (toupper) {
@@ -996,6 +988,9 @@ format_long_internal(PyObject *value, const InternalFormatSpec *format,
n_total = calc_number_widths(&spec, n_prefix, sign_char, tmp, inumeric_chars,
inumeric_chars + n_digits, n_remainder, 0,
&locale, format, &maxchar);
+ if (n_total == -1) {
+ goto done;
+ }
/* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
@@ -1141,6 +1136,9 @@ format_float_internal(PyObject *value,
n_total = calc_number_widths(&spec, 0, sign_char, unicode_tmp, index,
index + n_digits, n_remainder, has_decimal,
&locale, format, &maxchar);
+ if (n_total == -1) {
+ goto done;
+ }
/* Allocate the memory. */
if (_PyUnicodeWriter_Prepare(writer, n_total, maxchar) == -1)
@@ -1324,6 +1322,9 @@ format_complex_internal(PyObject *value,
i_re, i_re + n_re_digits, n_re_remainder,
re_has_decimal, &locale, &tmp_format,
&maxchar);
+ if (n_re_total == -1) {
+ goto done;
+ }
/* Same formatting, but always include a sign, unless the real part is
* going to be omitted, in which case we use whatever sign convention was
@@ -1334,6 +1335,9 @@ format_complex_internal(PyObject *value,
i_im, i_im + n_im_digits, n_im_remainder,
im_has_decimal, &locale, &tmp_format,
&maxchar);
+ if (n_im_total == -1) {
+ goto done;
+ }
if (skip_re)
n_re_total = 0;
1
0
https://github.com/python/cpython/commit/282c03d45d2d766c55904a4eb766923a2c…
commit: 282c03d45d2d766c55904a4eb766923a2c459124
branch: master
author: Victor Stinner <vstinner(a)redhat.com>
committer: GitHub <noreply(a)github.com>
date: 2018-11-26T17:03:16+01:00
summary:
pythoninfo: log more environment variable (GH-10719)
Log TZ to debug a timezone issue... and a few more :-)
files:
M Lib/test/pythoninfo.py
diff --git a/Lib/test/pythoninfo.py b/Lib/test/pythoninfo.py
index 9257fdf332a6..30e6f21c2b48 100644
--- a/Lib/test/pythoninfo.py
+++ b/Lib/test/pythoninfo.py
@@ -199,32 +199,73 @@ def format_groups(groups):
call_func(info_add, 'os.cpu_count', os, 'cpu_count')
call_func(info_add, 'os.loadavg', os, 'getloadavg')
- # Get environment variables: filter to list
- # to not leak sensitive information
- ENV_VARS = (
+ # Environment variables used by the stdlib and tests. Don't log the full
+ # environment: filter to list to not leak sensitive information.
+ #
+ # HTTP_PROXY is not logged because it can contain a password.
+ ENV_VARS = frozenset((
+ "APPDATA",
+ "AR",
+ "ARCHFLAGS",
+ "ARFLAGS",
+ "AUDIODEV",
"CC",
+ "CFLAGS",
+ "COLUMNS",
+ "COMPUTERNAME",
"COMSPEC",
+ "CPP",
+ "CPPFLAGS",
"DISPLAY",
+ "DISTUTILS_DEBUG",
"DISTUTILS_USE_SDK",
"DYLD_LIBRARY_PATH",
+ "ENSUREPIP_OPTIONS",
+ "HISTORY_FILE",
"HOME",
"HOMEDRIVE",
"HOMEPATH",
+ "IDLESTARTUP",
"LANG",
+ "LDFLAGS",
+ "LDSHARED",
"LD_LIBRARY_PATH",
+ "LINES",
"MACOSX_DEPLOYMENT_TARGET",
+ "MAILCAPS",
"MAKEFLAGS",
+ "MIXERDEV",
"MSSDK",
"PATH",
+ "PATHEXT",
+ "PIP_CONFIG_FILE",
+ "PLAT",
+ "POSIXLY_CORRECT",
+ "PY_SAX_PARSER",
+ "ProgramFiles",
+ "ProgramFiles(x86)",
+ "RUNNING_ON_VALGRIND",
"SDK_TOOLS_BIN",
+ "SERVER_SOFTWARE",
"SHELL",
+ "SOURCE_DATE_EPOCH",
+ "SYSTEMROOT",
"TEMP",
"TERM",
+ "TILE_LIBRARY",
+ "TIX_LIBRARY",
"TMP",
"TMPDIR",
+ "TZ",
"USERPROFILE",
+ "VIRTUAL_ENV",
"WAYLAND_DISPLAY",
- )
+ "WINDIR",
+ "_PYTHON_HOST_PLATFORM",
+ "_PYTHON_PROJECT_BASE",
+ "_PYTHON_SYSCONFIGDATA_NAME",
+ "__PYVENV_LAUNCHER__",
+ ))
for name, value in os.environ.items():
uname = name.upper()
if (uname in ENV_VARS
1
0