Mailman 3 June 2020 - Python-checkins

bpo-41142: Add support of non-ASCII paths for CAB files. (GH-21195)
by Serhiy Storchaka June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/ba67d7386edf20bcc0f878a518de0894cb… commit: ba67d7386edf20bcc0f878a518de0894cb574e9f branch: master author: Serhiy Storchaka <storchaka(a)gmail.com> committer: GitHub <noreply(a)github.com> date: 2020-06-30T11:56:03+03:00 summary: bpo-41142: Add support of non-ASCII paths for CAB files. (GH-21195) * The path to the CAB file can be non-ASCII. * Paths of added files can be non-ASCII. files: A Misc/NEWS.d/next/Windows/2020-06-28-12-40-41.bpo-… [View More]41142.jpZzzh.rst M Lib/test/test_msilib.py M PC/_msi.c diff --git a/Lib/test/test_msilib.py b/Lib/test/test_msilib.py index 4a233c3784e51..743bea7c14d0e 100644 --- a/Lib/test/test_msilib.py +++ b/Lib/test/test_msilib.py @@ -112,6 +112,16 @@ def test_getproperty_uninitialized_var(self): with self.assertRaises(msilib.MSIError): si.GetProperty(-1) + def test_FCICreate(self): + filepath = TESTFN + '.txt' + cabpath = TESTFN + '.cab' + self.addCleanup(unlink, filepath) + with open(filepath, 'wb'): + pass + self.addCleanup(unlink, cabpath) + msilib.FCICreate(cabpath, [(filepath, 'test.txt')]) + self.assertTrue(os.path.isfile(cabpath)) + class Test_make_id(unittest.TestCase): #http://msdn.microsoft.com/en-us/library/aa369212(v=vs.85).aspx diff --git a/Misc/NEWS.d/next/Windows/2020-06-28-12-40-41.bpo-41142.jpZzzh.rst b/Misc/NEWS.d/next/Windows/2020-06-28-12-40-41.bpo-41142.jpZzzh.rst new file mode 100644 index 0000000000000..91406da7a2544 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2020-06-28-12-40-41.bpo-41142.jpZzzh.rst @@ -0,0 +1,2 @@ +:mod:`msilib` now supports creating CAB files with non-ASCII file path and +adding files with non-ASCII file path to them. diff --git a/PC/_msi.c b/PC/_msi.c index 58c1cfd997bf8..60a0c3aebb1e7 100644 --- a/PC/_msi.c +++ b/PC/_msi.c @@ -41,21 +41,50 @@ uuidcreate(PyObject* obj, PyObject*args) } +/* Helper for converting file names from UTF-8 to wchat_t*. */ +static wchar_t * +utf8_to_wchar(const char *s, int *err) +{ + PyObject *obj = PyUnicode_FromString(s); + if (obj == NULL) { + if (PyErr_ExceptionMatches(PyExc_MemoryError)) { + *err = ENOMEM; + } + else { + *err = EINVAL; + } + PyErr_Clear(); + return NULL; + } + wchar_t *ws = PyUnicode_AsWideCharString(obj, NULL); + if (ws == NULL) { + *err = ENOMEM; + PyErr_Clear(); + } + Py_DECREF(obj); + return ws; +} + /* FCI callback functions */ static FNFCIALLOC(cb_alloc) { - return malloc(cb); + return PyMem_RawMalloc(cb); } static FNFCIFREE(cb_free) { - free(memory); + PyMem_RawFree(memory); } static FNFCIOPEN(cb_open) { - int result = _open(pszFile, oflag | O_NOINHERIT, pmode); + wchar_t *ws = utf8_to_wchar(pszFile, err); + if (ws == NULL) { + return -1; + } + int result = _wopen(ws, oflag | O_NOINHERIT, pmode); + PyMem_Free(ws); if (result == -1) *err = errno; return result; @@ -95,7 +124,12 @@ static FNFCISEEK(cb_seek) static FNFCIDELETE(cb_delete) { - int result = remove(pszFile); + wchar_t *ws = utf8_to_wchar(pszFile, err); + if (ws == NULL) { + return -1; + } + int result = _wremove(ws); + PyMem_Free(ws); if (result != 0) *err = errno; return result; @@ -159,15 +193,22 @@ static FNFCIGETOPENINFO(cb_getopeninfo) FILETIME filetime; HANDLE handle; + wchar_t *ws = utf8_to_wchar(pszName, err); + if (ws == NULL) { + return -1; + } + /* Need Win32 handle to get time stamps */ - handle = CreateFile(pszName, GENERIC_READ, FILE_SHARE_READ, NULL, + handle = CreateFileW(ws, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); - if (handle == INVALID_HANDLE_VALUE) + if (handle == INVALID_HANDLE_VALUE) { + PyMem_Free(ws); return -1; + } - if (GetFileInformationByHandle(handle, &bhfi) == FALSE) - { + if (GetFileInformationByHandle(handle, &bhfi) == FALSE) { CloseHandle(handle); + PyMem_Free(ws); return -1; } @@ -179,7 +220,9 @@ static FNFCIGETOPENINFO(cb_getopeninfo) CloseHandle(handle); - return _open(pszName, _O_RDONLY | _O_BINARY | O_NOINHERIT); + int result = _wopen(ws, _O_RDONLY | _O_BINARY | O_NOINHERIT); + PyMem_Free(ws); + return result; } static PyObject* fcicreate(PyObject* obj, PyObject* args) @@ -212,7 +255,7 @@ static PyObject* fcicreate(PyObject* obj, PyObject* args) ccab.setID = 0; ccab.szDisk[0] = '\0'; - for (i = 0, p = cabname; *p; p = CharNext(p)) + for (i = 0, p = cabname; *p; p++) if (*p == '\\' || *p == '/') i = p - cabname + 1; [View Less]

1 0

bpo-41123: Remove PyUnicode_AsUnicodeCopy in 3.10 (GH-21227)
by Miss Islington (bot) June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/41d6e3fbb8bcfd41db37782523caac47e7… commit: 41d6e3fbb8bcfd41db37782523caac47e7c8ad23 branch: 3.8 author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com> committer: GitHub <noreply(a)github.com> date: 2020-06-30T01:49:09-07:00 summary: bpo-41123: Remove PyUnicode_AsUnicodeCopy in 3.10 (GH-21227) (cherry picked from commit 2ea6a9928e4fa135888cc8f4733c28d93e642301) Co-authored-by: Inada Naoki <songofacandy(a)gmail.… [View More]

1 0

bpo-41158: IDLE: rewrite the code for handling file encoding (GH-21215)
by Serhiy Storchaka June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/694d31e714074176f0c324f95948b75dc7… commit: 694d31e714074176f0c324f95948b75dc768c091 branch: master author: Serhiy Storchaka <storchaka(a)gmail.com> committer: GitHub <noreply(a)github.com> date: 2020-06-30T09:33:22+03:00 summary: bpo-41158: IDLE: rewrite the code for handling file encoding (GH-21215) files: M Lib/idlelib/iomenu.py diff --git a/Lib/idlelib/iomenu.py b/Lib/idlelib/iomenu.py index 7f3f656ee2874..7641d866858a1 100644 --- a/… [View More]Lib/idlelib/iomenu.py +++ b/Lib/idlelib/iomenu.py @@ -1,10 +1,8 @@ -import codecs -from codecs import BOM_UTF8 import os -import re import shlex import sys import tempfile +import tokenize import tkinter.filedialog as tkFileDialog import tkinter.messagebox as tkMessageBox @@ -20,49 +18,6 @@ errors = 'surrogateescape' -coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) -blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) - -def coding_spec(data): - """Return the encoding declaration according to PEP 263. - - When checking encoded data, only the first two lines should be passed - in to avoid a UnicodeDecodeError if the rest of the data is not unicode. - The first two lines would contain the encoding specification. - - Raise a LookupError if the encoding is declared but unknown. - """ - if isinstance(data, bytes): - # This encoding might be wrong. However, the coding - # spec must be ASCII-only, so any non-ASCII characters - # around here will be ignored. Decoding to Latin-1 should - # never fail (except for memory outage) - lines = data.decode('iso-8859-1') - else: - lines = data - # consider only the first two lines - if '\n' in lines: - lst = lines.split('\n', 2)[:2] - elif '\r' in lines: - lst = lines.split('\r', 2)[:2] - else: - lst = [lines] - for line in lst: - match = coding_re.match(line) - if match is not None: - break - if not blank_re.match(line): - return None - else: - return None - name = match.group(1) - try: - codecs.lookup(name) - except LookupError: - # The standard encoding error does not indicate the encoding - raise LookupError("Unknown encoding: "+name) - return name - class IOBinding: # One instance per editor Window so methods know which to save, close. @@ -78,7 +33,7 @@ def __init__(self, editwin): self.save_as) self.__id_savecopy = self.text.bind("<<save-copy-of-window-as-file>>", self.save_a_copy) - self.fileencoding = None + self.fileencoding = 'utf-8' self.__id_print = self.text.bind("<<print-window>>", self.print_window) def close(self): @@ -165,34 +120,44 @@ def open(self, event=None, editFile=None): self.text.focus_set() return "break" - eol = r"(\r\n)|\n|\r" # \r\n (Windows), \n (UNIX), or \r (Mac) - eol_re = re.compile(eol) eol_convention = os.linesep # default def loadfile(self, filename): try: - # open the file in binary mode so that we can handle - # end-of-line convention ourselves. - with open(filename, 'rb') as f: - two_lines = f.readline() + f.readline() - f.seek(0) - bytes = f.read() - except OSError as msg: - tkMessageBox.showerror("I/O Error", str(msg), parent=self.text) + try: + with tokenize.open(filename) as f: + chars = f.read() + fileencoding = f.encoding + eol_convention = f.newlines + converted = False + except (UnicodeDecodeError, SyntaxError): + # Wait for the editor window to appear + self.editwin.text.update() + enc = askstring( + "Specify file encoding", + "The file's encoding is invalid for Python 3.x.\n" + "IDLE will convert it to UTF-8.\n" + "What is the current encoding of the file?", + initialvalue='utf-8', + parent=self.editwin.text) + with open(filename, encoding=enc) as f: + chars = f.read() + fileencoding = f.encoding + eol_convention = f.newlines + converted = True + except OSError as err: + tkMessageBox.showerror("I/O Error", str(err), parent=self.text) return False - chars, converted = self._decode(two_lines, bytes) - if chars is None: + except UnicodeDecodeError: tkMessageBox.showerror("Decoding Error", "File %s\nFailed to Decode" % filename, parent=self.text) return False - # We now convert all end-of-lines to '\n's - firsteol = self.eol_re.search(chars) - if firsteol: - self.eol_convention = firsteol.group(0) - chars = self.eol_re.sub(r"\n", chars) + self.text.delete("1.0", "end") self.set_filename(None) + self.fileencoding = fileencoding + self.eol_convention = eol_convention self.text.insert("1.0", chars) self.reset_undo() self.set_filename(filename) @@ -205,74 +170,6 @@ def loadfile(self, filename): self.updaterecentfileslist(filename) return True - def _decode(self, two_lines, bytes): - "Create a Unicode string." - chars = None - # Check presence of a UTF-8 signature first - if bytes.startswith(BOM_UTF8): - try: - chars = bytes[3:].decode("utf-8") - except UnicodeDecodeError: - # has UTF-8 signature, but fails to decode... - return None, False - else: - # Indicates that this file originally had a BOM - self.fileencoding = 'BOM' - return chars, False - # Next look for coding specification - try: - enc = coding_spec(two_lines) - except LookupError as name: - tkMessageBox.showerror( - title="Error loading the file", - message="The encoding '%s' is not known to this Python "\ - "installation. The file may not display correctly" % name, - parent = self.text) - enc = None - except UnicodeDecodeError: - return None, False - if enc: - try: - chars = str(bytes, enc) - self.fileencoding = enc - return chars, False - except UnicodeDecodeError: - pass - # Try ascii: - try: - chars = str(bytes, 'ascii') - self.fileencoding = None - return chars, False - except UnicodeDecodeError: - pass - # Try utf-8: - try: - chars = str(bytes, 'utf-8') - self.fileencoding = 'utf-8' - return chars, False - except UnicodeDecodeError: - pass - # Finally, try the locale's encoding. This is deprecated; - # the user should declare a non-ASCII encoding - try: - # Wait for the editor window to appear - self.editwin.text.update() - enc = askstring( - "Specify file encoding", - "The file's encoding is invalid for Python 3.x.\n" - "IDLE will convert it to UTF-8.\n" - "What is the current encoding of the file?", - initialvalue = encoding, - parent = self.editwin.text) - - if enc: - chars = str(bytes, enc) - self.fileencoding = None - return chars, True - except (UnicodeDecodeError, LookupError): - pass - return None, False # None on failure - def maybesave(self): if self.get_saved(): return "yes" @@ -360,38 +257,30 @@ def encode(self, chars): # text to us. Don't try to guess further. return chars # Preserve a BOM that might have been present on opening - if self.fileencoding == 'BOM': - return BOM_UTF8 + chars.encode("utf-8") + if self.fileencoding == 'utf-8-sig': + return chars.encode('utf-8-sig') # See whether there is anything non-ASCII in it. # If not, no need to figure out the encoding. try: return chars.encode('ascii') - except UnicodeError: + except UnicodeEncodeError: pass # Check if there is an encoding declared try: - # a string, let coding_spec slice it to the first two lines - enc = coding_spec(chars) - failed = None - except LookupError as msg: - failed = msg - enc = None - else: - if not enc: - # PEP 3120: default source encoding is UTF-8 - enc = 'utf-8' - if enc: - try: - return chars.encode(enc) - except UnicodeError: - failed = "Invalid encoding '%s'" % enc + encoded = chars.encode('ascii', 'replace') + enc, _ = tokenize.detect_encoding(io.BytesIO(encoded).readline) + return chars.encode(enc) + except SyntaxError as err: + failed = str(err) + except UnicodeEncodeError: + failed = "Invalid encoding '%s'" % enc tkMessageBox.showerror( "I/O Error", "%s.\nSaving as UTF-8" % failed, - parent = self.text) + parent=self.text) # Fallback: save as UTF-8, with BOM - ignoring the incorrect # declared encoding - return BOM_UTF8 + chars.encode("utf-8") + return chars.encode('utf-8-sig') def print_window(self, event): confirm = tkMessageBox.askokcancel( [View Less]

1 0

bpo-36346: Raise DeprecationWarning when creating legacy Unicode (GH-20933)
by Inada Naoki June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/038dd0f79dc89566b01ba66a5a018266b2… commit: 038dd0f79dc89566b01ba66a5a018266b2917a19 branch: master author: Inada Naoki <songofacandy(a)gmail.com> committer: GitHub <noreply(a)github.com> date: 2020-06-30T15:26:56+09:00 summary: bpo-36346: Raise DeprecationWarning when creating legacy Unicode (GH-20933) files: A Misc/NEWS.d/next/C API/2020-06-17-20-31-12.bpo-36346.mwIyxi.rst M Doc/whatsnew/3.10.rst M Lib/test/test_unicode.py M Objects/… [View More]unicodeobject.c diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 0674ce8cff177..a3b53ba48e9b7 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -213,6 +213,11 @@ Porting to Python 3.10 for historical reason. It is no longer allowed. (Contributed by Victor Stinner in :issue:`40839`.) +* ``PyUnicode_FromUnicode(NULL, size)`` and ``PyUnicode_FromStringAndSize(NULL, size)`` + raise ``DeprecationWarning`` now. Use :c:func:`PyUnicode_New` to allocate + Unicode object without initial data. + (Contributed by Inada Naoki in :issue:`36346`.) + Removed ------- diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py index 6e397161fd98d..59697935fe5cd 100644 --- a/Lib/test/test_unicode.py +++ b/Lib/test/test_unicode.py @@ -725,7 +725,9 @@ def test_isidentifier_legacy(self): import _testcapi u = '𝖀𝖓𝖎𝖈𝖔𝖉𝖊' self.assertTrue(u.isidentifier()) - self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier()) + with support.check_warnings(): + warnings.simplefilter('ignore', DeprecationWarning) + self.assertTrue(_testcapi.unicode_legacy_string(u).isidentifier()) def test_isprintable(self): self.assertTrue("".isprintable()) diff --git a/Misc/NEWS.d/next/C API/2020-06-17-20-31-12.bpo-36346.mwIyxi.rst b/Misc/NEWS.d/next/C API/2020-06-17-20-31-12.bpo-36346.mwIyxi.rst new file mode 100644 index 0000000000000..9b0400399beb9 --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-06-17-20-31-12.bpo-36346.mwIyxi.rst @@ -0,0 +1,2 @@ +Raises DeprecationWarning for ``PyUnicode_FromUnicode(NULL, size)`` and +``PyUnicode_FromStringAndSize(NULL, size)`` with ``size > 0``. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index db3f55e02b98b..fe46de2ae4743 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -2179,8 +2179,16 @@ unicode_char(Py_UCS4 ch) PyObject * PyUnicode_FromUnicode(const Py_UNICODE *u, Py_ssize_t size) { - if (u == NULL) + if (u == NULL) { + if (size > 0) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyUnicode_FromUnicode(NULL, size) is deprecated; " + "use PyUnicode_New() instead", 1) < 0) { + return NULL; + } + } return (PyObject*)_PyUnicode_New(size); + } if (size < 0) { PyErr_BadInternalCall(); @@ -2266,10 +2274,19 @@ PyUnicode_FromStringAndSize(const char *u, Py_ssize_t size) "Negative size passed to PyUnicode_FromStringAndSize"); return NULL; } - if (u != NULL) + if (u != NULL) { return PyUnicode_DecodeUTF8Stateful(u, size, NULL, NULL); - else + } + else { + if (size > 0) { + if (PyErr_WarnEx(PyExc_DeprecationWarning, + "PyUnicode_FromStringAndSize(NULL, size) is deprecated; " + "use PyUnicode_New() instead", 1) < 0) { + return NULL; + } + } return (PyObject *)_PyUnicode_New(size); + } } PyObject * [View Less]

1 0

bpo-36346: Prepare for removing the legacy Unicode C API (AC only). (GH-21223)
by Serhiy Storchaka June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/349f76c6aace5a4a2b57f6b442a532faf0… commit: 349f76c6aace5a4a2b57f6b442a532faf0027d6b branch: master author: Serhiy Storchaka <storchaka(a)gmail.com> committer: GitHub <noreply(a)github.com> date: 2020-06-30T09:03:15+03:00 summary: bpo-36346: Prepare for removing the legacy Unicode C API (AC only). (GH-21223) files: M Include/cpython/unicodeobject.h M Lib/test/clinic.test M Modules/clinic/_winapi.c.h M Modules/clinic/posixmodule.c.h M … [View More]Objects/unicodeobject.c M PC/clinic/winreg.c.h M Tools/clinic/clinic.py diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index 88a97a4cb5f71..0f19b2a14bcd0 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -11,6 +11,8 @@ /* --- Internal Unicode Operations ---------------------------------------- */ +#define USE_UNICODE_WCHAR_CACHE 1 + /* Since splitting on whitespace is an important use case, and whitespace in most situations is solely ASCII whitespace, we optimize for the common case by using a quick look-up table @@ -1169,4 +1171,7 @@ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); and where the hash values are equal (i.e. a very probable match) */ PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *); +PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *); +PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); + PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *); diff --git a/Lib/test/clinic.test b/Lib/test/clinic.test index f2be61355cc97..07e13829d5db9 100644 --- a/Lib/test/clinic.test +++ b/Lib/test/clinic.test @@ -1813,13 +1813,26 @@ test_Py_UNICODE_converter(PyObject *module, PyObject *const *args, Py_ssize_t na const Py_UNICODE *e; Py_ssize_clean_t e_length; - if (!_PyArg_ParseStack(args, nargs, "uuZu#Z#:test_Py_UNICODE_converter", - &a, &b, &c, &d, &d_length, &e, &e_length)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&O&u#Z#:test_Py_UNICODE_converter", + _PyUnicode_WideCharString_Converter, &a, _PyUnicode_WideCharString_Converter, &b, _PyUnicode_WideCharString_Opt_Converter, &c, &d, &d_length, &e, &e_length)) { goto exit; } return_value = test_Py_UNICODE_converter_impl(module, a, b, c, d, d_length, e, e_length); exit: + /* Cleanup for a */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)a); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for b */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)b); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for c */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)c); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1830,7 +1843,7 @@ test_Py_UNICODE_converter_impl(PyObject *module, const Py_UNICODE *a, Py_ssize_clean_t d_length, const Py_UNICODE *e, Py_ssize_clean_t e_length) -/*[clinic end generated code: output=dd0a09a1b772e57b input=064a3b68ad7f04b0]*/ +/*[clinic end generated code: output=ef45e982fedf0b3d input=064a3b68ad7f04b0]*/ /*[clinic input] diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index e21f2bc2b6fd6..6022dfe0db4b2 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -367,13 +367,22 @@ _winapi_CreateProcess(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *current_directory; PyObject *startup_info; - if (!_PyArg_ParseStack(args, nargs, "ZOOOikOZO:CreateProcess", - &application_name, &command_line, &proc_attrs, &thread_attrs, &inherit_handles, &creation_flags, &env_mapping, &current_directory, &startup_info)) { + if (!_PyArg_ParseStack(args, nargs, "O&OOOikOO&O:CreateProcess", + _PyUnicode_WideCharString_Opt_Converter, &application_name, &command_line, &proc_attrs, &thread_attrs, &inherit_handles, &creation_flags, &env_mapping, _PyUnicode_WideCharString_Opt_Converter, &current_directory, &startup_info)) { goto exit; } return_value = _winapi_CreateProcess_impl(module, application_name, command_line, proc_attrs, thread_attrs, inherit_handles, creation_flags, env_mapping, current_directory, startup_info); exit: + /* Cleanup for application_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)application_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for current_directory */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)current_directory); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1097,4 +1106,4 @@ _winapi_GetFileType(PyObject *module, PyObject *const *args, Py_ssize_t nargs, P exit: return return_value; } -/*[clinic end generated code: output=f3897898ea1da99d input=a9049054013a1b77]*/ +/*[clinic end generated code: output=db87076a32fa7abe input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index b691cfbc6edef..6533edfdb47d2 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1674,12 +1674,28 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k { PyObject *return_value = NULL; static const char * const _keywords[] = {"command", NULL}; - static _PyArg_Parser _parser = {"u:system", _keywords, 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "system", 0}; + PyObject *argsbuf[1]; const Py_UNICODE *command; long _return_value; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - &command)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("system", "argument 'command'", "str", args[0]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + command = _PyUnicode_AsUnicode(args[0]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + command = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (command == NULL) { goto exit; } _return_value = os_system_impl(module, command); @@ -1689,6 +1705,11 @@ os_system(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *k return_value = PyLong_FromLong(_return_value); exit: + /* Cleanup for command */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)command); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -6998,19 +7019,47 @@ os_startfile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject { PyObject *return_value = NULL; static const char * const _keywords[] = {"filepath", "operation", NULL}; - static _PyArg_Parser _parser = {"O&|u:startfile", _keywords, 0}; + static _PyArg_Parser _parser = {NULL, _keywords, "startfile", 0}; + PyObject *argsbuf[2]; + Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; path_t filepath = PATH_T_INITIALIZE("startfile", "filepath", 0, 0); const Py_UNICODE *operation = NULL; - if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - path_converter, &filepath, &operation)) { + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &filepath)) { + goto exit; + } + if (!noptargs) { + goto skip_optional_pos; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("startfile", "argument 'operation'", "str", args[1]); goto exit; } + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + operation = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + operation = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (operation == NULL) { + goto exit; + } +skip_optional_pos: return_value = os_startfile_impl(module, &filepath, operation); exit: /* Cleanup for filepath */ path_cleanup(&filepath); + /* Cleanup for operation */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)operation); + #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -8876,4 +8925,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=d7c1212a94613496 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ba3d4b35fda2c208 input=a9049054013a1b77]*/ diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 8eafdacf55974..db3f55e02b98b 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3273,6 +3273,80 @@ PyUnicode_AsWideCharString(PyObject *unicode, #endif /* HAVE_WCHAR_H */ +int +_PyUnicode_WideCharString_Converter(PyObject *obj, void *ptr) +{ + wchar_t **p = (wchar_t **)ptr; + if (obj == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(*p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + *p = NULL; + return 1; + } + if (PyUnicode_Check(obj)) { +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + *p = (wchar_t *)_PyUnicode_AsUnicode(obj); + if (*p == NULL) { + return 0; + } + return 1; +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + *p = PyUnicode_AsWideCharString(obj, NULL); + if (*p == NULL) { + return 0; + } + return Py_CLEANUP_SUPPORTED; +#endif /* USE_UNICODE_WCHAR_CACHE */ + } + PyErr_Format(PyExc_TypeError, + "argument must be str, not %.50s", + obj->ob_type->tp_name); + return 0; +} + +int +_PyUnicode_WideCharString_Opt_Converter(PyObject *obj, void *ptr) +{ + wchar_t **p = (wchar_t **)ptr; + if (obj == NULL) { +#if !USE_UNICODE_WCHAR_CACHE + PyMem_Free(*p); +#endif /* USE_UNICODE_WCHAR_CACHE */ + *p = NULL; + return 1; + } + if (obj == Py_None) { + *p = NULL; + return 1; + } + if (PyUnicode_Check(obj)) { +#if USE_UNICODE_WCHAR_CACHE +_Py_COMP_DIAG_PUSH +_Py_COMP_DIAG_IGNORE_DEPR_DECLS + *p = (wchar_t *)_PyUnicode_AsUnicode(obj); + if (*p == NULL) { + return 0; + } + return 1; +_Py_COMP_DIAG_POP +#else /* USE_UNICODE_WCHAR_CACHE */ + *p = PyUnicode_AsWideCharString(obj, NULL); + if (*p == NULL) { + return 0; + } + return Py_CLEANUP_SUPPORTED; +#endif /* USE_UNICODE_WCHAR_CACHE */ + } + PyErr_Format(PyExc_TypeError, + "argument must be str or None, not %.50s", + obj->ob_type->tp_name); + return 0; +} + PyObject * PyUnicode_FromOrdinal(int ordinal) { diff --git a/PC/clinic/winreg.c.h b/PC/clinic/winreg.c.h index 5f37fcda0a9ab..5c97eaeee9e27 100644 --- a/PC/clinic/winreg.c.h +++ b/PC/clinic/winreg.c.h @@ -152,8 +152,30 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs HKEY key; HKEY _return_value; - if (!_PyArg_ParseStack(args, nargs, "ZO&:ConnectRegistry", - &computer_name, clinic_HKEY_converter, &key)) { + if (!_PyArg_CheckPositional("ConnectRegistry", nargs, 2, 2)) { + goto exit; + } + if (args[0] == Py_None) { + computer_name = NULL; + } + else if (PyUnicode_Check(args[0])) { + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + computer_name = _PyUnicode_AsUnicode(args[0]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + computer_name = PyUnicode_AsWideCharString(args[0], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (computer_name == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("ConnectRegistry", "argument 1", "str or None", args[0]); + goto exit; + } + if (!clinic_HKEY_converter(args[1], &key)) { goto exit; } _return_value = winreg_ConnectRegistry_impl(module, computer_name, key); @@ -163,6 +185,11 @@ winreg_ConnectRegistry(PyObject *module, PyObject *const *args, Py_ssize_t nargs return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for computer_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)computer_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -199,8 +226,30 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *sub_key; HKEY _return_value; - if (!_PyArg_ParseStack(args, nargs, "O&Z:CreateKey", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("CreateKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + sub_key = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + sub_key = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("CreateKey", "argument 2", "str or None", args[1]); goto exit; } _return_value = winreg_CreateKey_impl(module, key, sub_key); @@ -210,6 +259,11 @@ winreg_CreateKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -251,7 +305,7 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:CreateKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:CreateKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -259,7 +313,7 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_CreateKeyEx_impl(module, key, sub_key, reserved, access); @@ -269,6 +323,11 @@ winreg_CreateKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -303,13 +362,35 @@ winreg_DeleteKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *sub_key; - if (!_PyArg_ParseStack(args, nargs, "O&u:DeleteKey", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("DeleteKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("DeleteKey", "argument 2", "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + sub_key = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { goto exit; } return_value = winreg_DeleteKey_impl(module, key, sub_key); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -351,19 +432,24 @@ winreg_DeleteKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "access", "reserved", NULL}; - static _PyArg_Parser _parser = {"O&u|ii:DeleteKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:DeleteKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; REGSAM access = KEY_WOW64_64KEY; int reserved = 0; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &access, &reserved)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Converter, &sub_key, &access, &reserved)) { goto exit; } return_value = winreg_DeleteKeyEx_impl(module, key, sub_key, access, reserved); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -391,13 +477,40 @@ winreg_DeleteValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *value; - if (!_PyArg_ParseStack(args, nargs, "O&Z:DeleteValue", - clinic_HKEY_converter, &key, &value)) { + if (!_PyArg_CheckPositional("DeleteValue", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + value = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + value = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + value = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (value == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("DeleteValue", "argument 2", "str or None", args[1]); goto exit; } return_value = winreg_DeleteValue_impl(module, key, value); exit: + /* Cleanup for value */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)value); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -517,12 +630,29 @@ winreg_ExpandEnvironmentStrings(PyObject *module, PyObject *arg) PyObject *return_value = NULL; const Py_UNICODE *string; - if (!PyArg_Parse(arg, "u:ExpandEnvironmentStrings", &string)) { + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("ExpandEnvironmentStrings", "argument", "str", arg); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + string = _PyUnicode_AsUnicode(arg); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + string = PyUnicode_AsWideCharString(arg, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (string == NULL) { goto exit; } return_value = winreg_ExpandEnvironmentStrings_impl(module, string); exit: + /* Cleanup for string */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)string); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -609,13 +739,54 @@ winreg_LoadKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *sub_key; const Py_UNICODE *file_name; - if (!_PyArg_ParseStack(args, nargs, "O&uu:LoadKey", - clinic_HKEY_converter, &key, &sub_key, &file_name)) { + if (!_PyArg_CheckPositional("LoadKey", nargs, 3, 3)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("LoadKey", "argument 2", "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + sub_key = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + if (!PyUnicode_Check(args[2])) { + _PyArg_BadArgument("LoadKey", "argument 3", "str", args[2]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + file_name = _PyUnicode_AsUnicode(args[2]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + file_name = PyUnicode_AsWideCharString(args[2], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (file_name == NULL) { goto exit; } return_value = winreg_LoadKey_impl(module, key, sub_key, file_name); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for file_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)file_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -650,7 +821,7 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:OpenKey", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:OpenKey", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -658,7 +829,7 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_OpenKey_impl(module, key, sub_key, reserved, access); @@ -668,6 +839,11 @@ winreg_OpenKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -702,7 +878,7 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb { PyObject *return_value = NULL; static const char * const _keywords[] = {"key", "sub_key", "reserved", "access", NULL}; - static _PyArg_Parser _parser = {"O&Z|ii:OpenKeyEx", _keywords, 0}; + static _PyArg_Parser _parser = {"O&O&|ii:OpenKeyEx", _keywords, 0}; HKEY key; const Py_UNICODE *sub_key; int reserved = 0; @@ -710,7 +886,7 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb HKEY _return_value; if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, - clinic_HKEY_converter, &key, &sub_key, &reserved, &access)) { + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &reserved, &access)) { goto exit; } _return_value = winreg_OpenKeyEx_impl(module, key, sub_key, reserved, access); @@ -720,6 +896,11 @@ winreg_OpenKeyEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyOb return_value = PyHKEY_FromHKEY(_return_value); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -792,13 +973,40 @@ winreg_QueryValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *sub_key; - if (!_PyArg_ParseStack(args, nargs, "O&Z:QueryValue", - clinic_HKEY_converter, &key, &sub_key)) { + if (!_PyArg_CheckPositional("QueryValue", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + sub_key = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + sub_key = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + sub_key = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (sub_key == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("QueryValue", "argument 2", "str or None", args[1]); goto exit; } return_value = winreg_QueryValue_impl(module, key, sub_key); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -831,13 +1039,40 @@ winreg_QueryValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *name; - if (!_PyArg_ParseStack(args, nargs, "O&Z:QueryValueEx", - clinic_HKEY_converter, &key, &name)) { + if (!_PyArg_CheckPositional("QueryValueEx", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (args[1] == Py_None) { + name = NULL; + } + else if (PyUnicode_Check(args[1])) { + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + name = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + name = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (name == NULL) { + goto exit; + } + } + else { + _PyArg_BadArgument("QueryValueEx", "argument 2", "str or None", args[1]); goto exit; } return_value = winreg_QueryValueEx_impl(module, key, name); exit: + /* Cleanup for name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -875,13 +1110,35 @@ winreg_SaveKey(PyObject *module, PyObject *const *args, Py_ssize_t nargs) HKEY key; const Py_UNICODE *file_name; - if (!_PyArg_ParseStack(args, nargs, "O&u:SaveKey", - clinic_HKEY_converter, &key, &file_name)) { + if (!_PyArg_CheckPositional("SaveKey", nargs, 2, 2)) { + goto exit; + } + if (!clinic_HKEY_converter(args[0], &key)) { + goto exit; + } + if (!PyUnicode_Check(args[1])) { + _PyArg_BadArgument("SaveKey", "argument 2", "str", args[1]); + goto exit; + } + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + file_name = _PyUnicode_AsUnicode(args[1]); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + file_name = PyUnicode_AsWideCharString(args[1], NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if (file_name == NULL) { goto exit; } return_value = winreg_SaveKey_impl(module, key, file_name); exit: + /* Cleanup for file_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)file_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -929,13 +1186,18 @@ winreg_SetValue(PyObject *module, PyObject *const *args, Py_ssize_t nargs) const Py_UNICODE *value; Py_ssize_clean_t value_length; - if (!_PyArg_ParseStack(args, nargs, "O&Zku#:SetValue", - clinic_HKEY_converter, &key, &sub_key, &type, &value, &value_length)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&ku#:SetValue", + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &sub_key, &type, &value, &value_length)) { goto exit; } return_value = winreg_SetValue_impl(module, key, sub_key, type, value, value_length); exit: + /* Cleanup for sub_key */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)sub_key); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1000,13 +1262,18 @@ winreg_SetValueEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs) DWORD type; PyObject *value; - if (!_PyArg_ParseStack(args, nargs, "O&ZOkO:SetValueEx", - clinic_HKEY_converter, &key, &value_name, &reserved, &type, &value)) { + if (!_PyArg_ParseStack(args, nargs, "O&O&OkO:SetValueEx", + clinic_HKEY_converter, &key, _PyUnicode_WideCharString_Opt_Converter, &value_name, &reserved, &type, &value)) { goto exit; } return_value = winreg_SetValueEx_impl(module, key, value_name, reserved, type, value); exit: + /* Cleanup for value_name */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)value_name); + #endif /* USE_UNICODE_WCHAR_CACHE */ + return return_value; } @@ -1111,4 +1378,4 @@ winreg_QueryReflectionKey(PyObject *module, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=f4f996d40d06f14c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fa5f21ea6a75d0e9 input=a9049054013a1b77]*/ diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index b1bf7826ebf9f..3a9f4c228c22b 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -3374,20 +3374,81 @@ def parse_arg(self, argname, displayname): displayname=displayname) return super().parse_arg(argname, displayname) +@add_legacy_c_converter('u') @add_legacy_c_converter('u#', zeroes=True) @add_legacy_c_converter('Z', accept={str, NoneType}) @add_legacy_c_converter('Z#', accept={str, NoneType}, zeroes=True) class Py_UNICODE_converter(CConverter): type = 'const Py_UNICODE *' default_type = (str, Null, NoneType) - format_unit = 'u' def converter_init(self, *, accept={str}, zeroes=False): format_unit = 'Z' if accept=={str, NoneType} else 'u' if zeroes: format_unit += '#' self.length = True - self.format_unit = format_unit + self.format_unit = format_unit + else: + self.accept = accept + if accept == {str}: + self.converter = '_PyUnicode_WideCharString_Converter' + elif accept == {str, NoneType}: + self.converter = '_PyUnicode_WideCharString_Opt_Converter' + else: + fail("Py_UNICODE_converter: illegal 'accept' argument " + repr(accept)) + + def cleanup(self): + if not self.length: + return """\ +#if !USE_UNICODE_WCHAR_CACHE +PyMem_Free((void *){name}); +#endif /* USE_UNICODE_WCHAR_CACHE */ +""".format(name=self.name) + + def parse_arg(self, argname, argnum): + if not self.length: + if self.accept == {str}: + return """ + if (!PyUnicode_Check({argname})) {{{{ + _PyArg_BadArgument("{{name}}", {argnum}, "str", {argname}); + goto exit; + }}}} + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + {paramname} = _PyUnicode_AsUnicode({argname}); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + {paramname} = PyUnicode_AsWideCharString({argname}, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if ({paramname} == NULL) {{{{ + goto exit; + }}}} + """.format(argname=argname, paramname=self.name, argnum=argnum) + elif self.accept == {str, NoneType}: + return """ + if ({argname} == Py_None) {{{{ + {paramname} = NULL; + }}}} + else if (PyUnicode_Check({argname})) {{{{ + #if USE_UNICODE_WCHAR_CACHE + _Py_COMP_DIAG_PUSH + _Py_COMP_DIAG_IGNORE_DEPR_DECLS + {paramname} = _PyUnicode_AsUnicode({argname}); + _Py_COMP_DIAG_POP + #else /* USE_UNICODE_WCHAR_CACHE */ + {paramname} = PyUnicode_AsWideCharString({argname}, NULL); + #endif /* USE_UNICODE_WCHAR_CACHE */ + if ({paramname} == NULL) {{{{ + goto exit; + }}}} + }}}} + else {{{{ + _PyArg_BadArgument("{{name}}", {argnum}, "str or None", {argname}); + goto exit; + }}}} + """.format(argname=argname, paramname=self.name, argnum=argnum) + return super().parse_arg(argname, argnum) @add_legacy_c_converter('s*', accept={str, buffer}) @add_legacy_c_converter('z*', accept={str, buffer, NoneType}) [View Less]

1 0

bpo-41123: Remove PyUnicode_AsUnicodeCopy (GH-21209)
by Inada Naoki June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/b3332660adb02babb7e66e45310c66dc9a… commit: b3332660adb02babb7e66e45310c66dc9a9a94da branch: master author: Inada Naoki <songofacandy(a)gmail.com> committer: GitHub <noreply(a)github.com> date: 2020-06-30T12:23:07+09:00 summary: bpo-41123: Remove PyUnicode_AsUnicodeCopy (GH-21209) files: A Misc/NEWS.d/next/C API/2020-06-29-15-49-36.bpo-41123.wYY4E1.rst M Doc/c-api/unicode.rst M Doc/data/refcounts.dat M Doc/whatsnew/3.10.rst M Include/… [View More]cpython/unicodeobject.h M Objects/unicodeobject.c diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index b1787ed1ce89c..0748a1e319489 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -724,20 +724,6 @@ Extension modules can continue using them, as they will not be removed in Python .. versionadded:: 3.3 -.. c:function:: Py_UNICODE* PyUnicode_AsUnicodeCopy(PyObject *unicode) - - Create a copy of a Unicode string ending with a null code point. Return ``NULL`` - and raise a :exc:`MemoryError` exception on memory allocation failure, - otherwise return a new allocated buffer (use :c:func:`PyMem_Free` to free - the buffer). Note that the resulting :c:type:`Py_UNICODE*` string may - contain embedded null code points, which would cause the string to be - truncated when used in most C functions. - - .. versionadded:: 3.2 - - Please migrate to using :c:func:`PyUnicode_AsUCS4Copy` or similar new APIs. - - .. c:function:: Py_ssize_t PyUnicode_GetSize(PyObject *unicode) Return the size of the deprecated :c:type:`Py_UNICODE` representation, in diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat index 4d9aee370c61d..882d7d6d62fc3 100644 --- a/Doc/data/refcounts.dat +++ b/Doc/data/refcounts.dat @@ -2419,9 +2419,6 @@ PyUnicode_AsUnicodeAndSize:Py_UNICODE*::: PyUnicode_AsUnicodeAndSize:PyObject*:unicode:0: PyUnicode_AsUnicodeAndSize:Py_ssize_t*:size:: -PyUnicode_AsUnicodeCopy:Py_UNICODE*::: -PyUnicode_AsUnicodeCopy:PyObject*:unicode:0: - PyUnicode_GetSize:Py_ssize_t::: PyUnicode_GetSize:PyObject*:unicode:0: diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index a755d2672ae6d..0674ce8cff177 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -240,3 +240,7 @@ Removed * Removed ``PyLong_FromUnicode()``. Please migrate to :c:func:`PyLong_FromUnicodeObject`. (Contributed by Inada Naoki in :issue:`41103`.) + +* Removed ``PyUnicode_AsUnicodeCopy()``. Please use :c:func:`PyUnicode_AsUCS4Copy` or + :c:func:`PyUnicode_AsWideCharString` + (Contributed by Inada Naoki in :issue:`41103`.) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index c1a8564349b99..88a97a4cb5f71 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -1162,14 +1162,6 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha( PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int); -/* Create a copy of a unicode string ending with a nul character. Return NULL - and raise a MemoryError exception on memory allocation failure, otherwise - return a new allocated buffer (use PyMem_Free() to free the buffer). */ - -Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy( - PyObject *unicode - ); - /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/ PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); diff --git a/Misc/NEWS.d/next/C API/2020-06-29-15-49-36.bpo-41123.wYY4E1.rst b/Misc/NEWS.d/next/C API/2020-06-29-15-49-36.bpo-41123.wYY4E1.rst new file mode 100644 index 0000000000000..74ac45462773e --- /dev/null +++ b/Misc/NEWS.d/next/C API/2020-06-29-15-49-36.bpo-41123.wYY4E1.rst @@ -0,0 +1 @@ +Remove ``PyUnicode_AsUnicodeCopy``. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index c46ba4ae57dc6..8eafdacf55974 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -15862,39 +15862,6 @@ unicode_iter(PyObject *seq) return (PyObject *)it; } -Py_UNICODE* -PyUnicode_AsUnicodeCopy(PyObject *unicode) -{ - Py_UNICODE *u, *copy; - Py_ssize_t len, size; - - if (!PyUnicode_Check(unicode)) { - PyErr_BadArgument(); - return NULL; - } -_Py_COMP_DIAG_PUSH -_Py_COMP_DIAG_IGNORE_DEPR_DECLS - u = PyUnicode_AsUnicodeAndSize(unicode, &len); -_Py_COMP_DIAG_POP - if (u == NULL) - return NULL; - /* Ensure we won't overflow the size. */ - if (len > ((PY_SSIZE_T_MAX / (Py_ssize_t)sizeof(Py_UNICODE)) - 1)) { - PyErr_NoMemory(); - return NULL; - } - size = len + 1; /* copy the null character */ - size *= sizeof(Py_UNICODE); - copy = PyMem_Malloc(size); - if (copy == NULL) { - PyErr_NoMemory(); - return NULL; - } - memcpy(copy, u, size); - return copy; -} - - static int encode_wstr_utf8(wchar_t *wstr, char **str, const char *name) { [View Less]

1 0

bpo-41152: IDLE: always use UTF-8 for standard IO streams (GH-21214)
by Miss Islington (bot) June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/00fd04b9b7537c473c3f9396a861868b8d… commit: 00fd04b9b7537c473c3f9396a861868b8ddd3bb2 branch: 3.8 author: Miss Islington (bot) <31488909+miss-islington(a)users.noreply.github.com> committer: GitHub <noreply(a)github.com> date: 2020-06-29T17:39:02-07:00 summary: bpo-41152: IDLE: always use UTF-8 for standard IO streams (GH-21214) (cherry picked from commit 2515a28230b1a011205f30263da6b01c6bd167a3) Co-authored-by: Serhiy Storchaka <… [View More]storchaka(a)gmail.com> files: A Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst M Lib/idlelib/NEWS.txt M Lib/idlelib/idle_test/test_outwin.py M Lib/idlelib/iomenu.py M Lib/idlelib/outwin.py diff --git a/Lib/idlelib/NEWS.txt b/Lib/idlelib/NEWS.txt index 584fd4631fbc2..59b34b1519fdf 100644 --- a/Lib/idlelib/NEWS.txt +++ b/Lib/idlelib/NEWS.txt @@ -3,6 +3,9 @@ Released on 2020-07-03? ====================================== +bpo-41152: The encoding of ``stdin``, ``stdout`` and ``stderr`` in IDLE +is now always UTF-8. + bpo-41144: Make Open Module open a special module such as os.path. bpo-40723: Make test_idle pass when run after import. diff --git a/Lib/idlelib/idle_test/test_outwin.py b/Lib/idlelib/idle_test/test_outwin.py index cd099ecd841b3..e347bfca7f191 100644 --- a/Lib/idlelib/idle_test/test_outwin.py +++ b/Lib/idlelib/idle_test/test_outwin.py @@ -58,11 +58,6 @@ def test_write(self): get = self.text.get write = self.window.write - # Test bytes. - b = b'Test bytes.' - eq(write(b), len(b)) - eq(get('1.0', '1.end'), b.decode()) - # No new line - insert stays on same line. delete('1.0', 'end') test_text = 'test text' diff --git a/Lib/idlelib/iomenu.py b/Lib/idlelib/iomenu.py index 4b2833b8ca56f..7f3f656ee2874 100644 --- a/Lib/idlelib/iomenu.py +++ b/Lib/idlelib/iomenu.py @@ -13,52 +13,12 @@ import idlelib from idlelib.config import idleConf -if idlelib.testing: # Set True by test.test_idle to avoid setlocale. - encoding = 'utf-8' - errors = 'surrogateescape' +encoding = 'utf-8' +if sys.platform == 'win32': + errors = 'surrogatepass' else: - # Try setting the locale, so that we can find out - # what encoding to use - try: - import locale - locale.setlocale(locale.LC_CTYPE, "") - except (ImportError, locale.Error): - pass - - if sys.platform == 'win32': - encoding = 'utf-8' - errors = 'surrogateescape' - else: - try: - # Different things can fail here: the locale module may not be - # loaded, it may not offer nl_langinfo, or CODESET, or the - # resulting codeset may be unknown to Python. We ignore all - # these problems, falling back to ASCII - locale_encoding = locale.nl_langinfo(locale.CODESET) - if locale_encoding: - codecs.lookup(locale_encoding) - except (NameError, AttributeError, LookupError): - # Try getdefaultlocale: it parses environment variables, - # which may give a clue. Unfortunately, getdefaultlocale has - # bugs that can cause ValueError. - try: - locale_encoding = locale.getdefaultlocale()[1] - if locale_encoding: - codecs.lookup(locale_encoding) - except (ValueError, LookupError): - pass + errors = 'surrogateescape' - if locale_encoding: - encoding = locale_encoding.lower() - errors = 'strict' - else: - # POSIX locale or macOS - encoding = 'ascii' - errors = 'surrogateescape' - # Encoding is used in multiple files; locale_encoding nowhere. - # The only use of 'encoding' below is in _decode as initial value - # of deprecated block asking user for encoding. - # Perhaps use elsewhere should be reviewed. coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) diff --git a/Lib/idlelib/outwin.py b/Lib/idlelib/outwin.py index 90272b6feb4af..5ab08bbaf4bc9 100644 --- a/Lib/idlelib/outwin.py +++ b/Lib/idlelib/outwin.py @@ -6,7 +6,6 @@ from tkinter import messagebox from idlelib.editor import EditorWindow -from idlelib import iomenu file_line_pats = [ @@ -110,8 +109,7 @@ def write(self, s, tags=(), mark="insert"): Return: Length of text inserted. """ - if isinstance(s, bytes): - s = s.decode(iomenu.encoding, "replace") + assert isinstance(s, str) self.text.insert(mark, s, tags) self.text.see(mark) self.text.update() diff --git a/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst b/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst new file mode 100644 index 0000000000000..434be10b5309c --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst @@ -0,0 +1,2 @@ +The encoding of ``stdin``, ``stdout`` and ``stderr`` in IDLE is now always +UTF-8. [View Less]

1 0

bpo-41152: IDLE: always use UTF-8 for standard IO streams (GH-21214)
by Serhiy Storchaka June 30, 2020

June 30, 2020

https://github.com/python/cpython/commit/2515a28230b1a011205f30263da6b01c6b… commit: 2515a28230b1a011205f30263da6b01c6bd167a3 branch: master author: Serhiy Storchaka <storchaka(a)gmail.com> committer: GitHub <noreply(a)github.com> date: 2020-06-29T20:18:22-04:00 summary: bpo-41152: IDLE: always use UTF-8 for standard IO streams (GH-21214) files: A Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst M Lib/idlelib/NEWS.txt M Lib/idlelib/idle_test/test_outwin.py M Lib/… [View More]idlelib/iomenu.py M Lib/idlelib/outwin.py diff --git a/Lib/idlelib/NEWS.txt b/Lib/idlelib/NEWS.txt index c270fcbae2bd1..7ae29af0b30ce 100644 --- a/Lib/idlelib/NEWS.txt +++ b/Lib/idlelib/NEWS.txt @@ -3,6 +3,9 @@ Released on 2020-10-05? ====================================== +bpo-41152: The encoding of ``stdin``, ``stdout`` and ``stderr`` in IDLE +is now always UTF-8. + bpo-41144: Make Open Module open a special module such as os.path. bpo-40723: Make test_idle pass when run after import. diff --git a/Lib/idlelib/idle_test/test_outwin.py b/Lib/idlelib/idle_test/test_outwin.py index cd099ecd841b3..e347bfca7f191 100644 --- a/Lib/idlelib/idle_test/test_outwin.py +++ b/Lib/idlelib/idle_test/test_outwin.py @@ -58,11 +58,6 @@ def test_write(self): get = self.text.get write = self.window.write - # Test bytes. - b = b'Test bytes.' - eq(write(b), len(b)) - eq(get('1.0', '1.end'), b.decode()) - # No new line - insert stays on same line. delete('1.0', 'end') test_text = 'test text' diff --git a/Lib/idlelib/iomenu.py b/Lib/idlelib/iomenu.py index 4b2833b8ca56f..7f3f656ee2874 100644 --- a/Lib/idlelib/iomenu.py +++ b/Lib/idlelib/iomenu.py @@ -13,52 +13,12 @@ import idlelib from idlelib.config import idleConf -if idlelib.testing: # Set True by test.test_idle to avoid setlocale. - encoding = 'utf-8' - errors = 'surrogateescape' +encoding = 'utf-8' +if sys.platform == 'win32': + errors = 'surrogatepass' else: - # Try setting the locale, so that we can find out - # what encoding to use - try: - import locale - locale.setlocale(locale.LC_CTYPE, "") - except (ImportError, locale.Error): - pass - - if sys.platform == 'win32': - encoding = 'utf-8' - errors = 'surrogateescape' - else: - try: - # Different things can fail here: the locale module may not be - # loaded, it may not offer nl_langinfo, or CODESET, or the - # resulting codeset may be unknown to Python. We ignore all - # these problems, falling back to ASCII - locale_encoding = locale.nl_langinfo(locale.CODESET) - if locale_encoding: - codecs.lookup(locale_encoding) - except (NameError, AttributeError, LookupError): - # Try getdefaultlocale: it parses environment variables, - # which may give a clue. Unfortunately, getdefaultlocale has - # bugs that can cause ValueError. - try: - locale_encoding = locale.getdefaultlocale()[1] - if locale_encoding: - codecs.lookup(locale_encoding) - except (ValueError, LookupError): - pass + errors = 'surrogateescape' - if locale_encoding: - encoding = locale_encoding.lower() - errors = 'strict' - else: - # POSIX locale or macOS - encoding = 'ascii' - errors = 'surrogateescape' - # Encoding is used in multiple files; locale_encoding nowhere. - # The only use of 'encoding' below is in _decode as initial value - # of deprecated block asking user for encoding. - # Perhaps use elsewhere should be reviewed. coding_re = re.compile(r'^[ \t\f]*#.*?coding[:=][ \t]*([-\w.]+)', re.ASCII) blank_re = re.compile(r'^[ \t\f]*(?:[#\r\n]|$)', re.ASCII) diff --git a/Lib/idlelib/outwin.py b/Lib/idlelib/outwin.py index 90272b6feb4af..5ab08bbaf4bc9 100644 --- a/Lib/idlelib/outwin.py +++ b/Lib/idlelib/outwin.py @@ -6,7 +6,6 @@ from tkinter import messagebox from idlelib.editor import EditorWindow -from idlelib import iomenu file_line_pats = [ @@ -110,8 +109,7 @@ def write(self, s, tags=(), mark="insert"): Return: Length of text inserted. """ - if isinstance(s, bytes): - s = s.decode(iomenu.encoding, "replace") + assert isinstance(s, str) self.text.insert(mark, s, tags) self.text.see(mark) self.text.update() diff --git a/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst b/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst new file mode 100644 index 0000000000000..434be10b5309c --- /dev/null +++ b/Misc/NEWS.d/next/IDLE/2020-06-29-14-51-15.bpo-41152.d6mV0C.rst @@ -0,0 +1,2 @@ +The encoding of ``stdin``, ``stdout`` and ``stderr`` in IDLE is now always +UTF-8. [View Less]

1 0

bpo-23427: Add sys.orig_argv attribute (GH-20729)
by Victor Stinner June 29, 2020

June 29, 2020

https://github.com/python/cpython/commit/dd8a93e23b5c4f9290e1cea6183d97eb9b… commit: dd8a93e23b5c4f9290e1cea6183d97eb9b5e61c0 branch: master author: Victor Stinner <vstinner(a)python.org> committer: GitHub <noreply(a)github.com> date: 2020-06-30T00:49:03+02:00 summary: bpo-23427: Add sys.orig_argv attribute (GH-20729) Add sys.orig_argv attribute: the list of the original command line arguments passed to the Python executable. Rename also PyConfig._orig_argv to PyConfig.orig_argv … [View More]and document it. files: A Misc/NEWS.d/next/Library/2020-06-08-18-59-16.bpo-23427.ilg1Cz.rst M Doc/c-api/init_config.rst M Doc/library/sys.rst M Doc/whatsnew/3.10.rst M Include/cpython/initconfig.h M Lib/test/test_embed.py M Lib/test/test_sys.py M Python/initconfig.c M Python/sysmodule.c diff --git a/Doc/c-api/init_config.rst b/Doc/c-api/init_config.rst index 9b0728d962152..84064d93ea3b1 100644 --- a/Doc/c-api/init_config.rst +++ b/Doc/c-api/init_config.rst @@ -424,6 +424,8 @@ PyConfig :c:member:`~PyConfig.argv` is empty, an empty string is added to ensure that :data:`sys.argv` always exists and is never empty. + See also the :c:member:`~PyConfig.orig_argv` member. + .. c:member:: wchar_t* base_exec_prefix :data:`sys.base_exec_prefix`. @@ -586,6 +588,23 @@ PyConfig * 1: Remove assertions, set ``__debug__`` to ``False`` * 2: Strip docstrings + .. c:member:: PyWideStringList orig_argv + + The list of the original command line arguments passed to the Python + executable. + + If :c:member:`~PyConfig.orig_argv` list is empty and + :c:member:`~PyConfig.argv` is not a list only containing an empty + string, :c:func:`PyConfig_Read()` copies :c:member:`~PyConfig.argv` into + :c:member:`~PyConfig.orig_argv` before modifying + :c:member:`~PyConfig.argv` (if :c:member:`~PyConfig.parse_argv` is + non-zero). + + See also the :c:member:`~PyConfig.argv` member and the + :c:func:`Py_GetArgcArgv` function. + + .. versionadded:: 3.10 + .. c:member:: int parse_argv If non-zero, parse :c:member:`~PyConfig.argv` the same way the regular @@ -982,6 +1001,8 @@ Py_GetArgcArgv() Get the original command line arguments, before Python modified them. + See also :c:member:`PyConfig.orig_argv` member. + Multi-Phase Initialization Private Provisional API -------------------------------------------------- diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index 880f252f84aa0..d201d7061f980 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -66,6 +66,8 @@ always available. To loop over the standard input, or the list of files given on the command line, see the :mod:`fileinput` module. + See also :data:`sys.orig_argv`. + .. note:: On Unix, command line arguments are passed by bytes from OS. Python decodes them with filesystem encoding and "surrogateescape" error handler. @@ -1037,6 +1039,16 @@ always available. deleting essential items from the dictionary may cause Python to fail. +.. data:: orig_argv + + The list of the original command line arguments passed to the Python + executable. + + See also :data:`sys.argv`. + + .. versionadded:: 3.10 + + .. data:: path .. index:: triple: module; search; path diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst index 0c4ff026bd201..a755d2672ae6d 100644 --- a/Doc/whatsnew/3.10.rst +++ b/Doc/whatsnew/3.10.rst @@ -110,6 +110,13 @@ Added the *root_dir* and *dir_fd* parameters in :func:`~glob.glob` and :func:`~glob.iglob` which allow to specify the root directory for searching. (Contributed by Serhiy Storchaka in :issue:`38144`.) +sys +--- + +Add :data:`sys.orig_argv` attribute: the list of the original command line +arguments passed to the Python executable. +(Contributed by Victor Stinner in :issue:`23427`.) + Optimizations ============= @@ -150,10 +157,14 @@ C API Changes New Features ------------ - The result of :c:func:`PyNumber_Index` now always has exact type :class:`int`. +* The result of :c:func:`PyNumber_Index` now always has exact type :class:`int`. Previously, the result could have been an instance of a subclass of ``int``. (Contributed by Serhiy Storchaka in :issue:`40792`.) +* Add a new :c:member:`~PyConfig.orig_argv` member to the :c:type:`PyConfig` + structure: the list of the original command line arguments passed to the + Python executable. + (Contributed by Victor Stinner in :issue:`23427`.) Porting to Python 3.10 ---------------------- diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 5b05eab63bb46..bbe8387677715 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -408,13 +408,15 @@ typedef struct { Default: 0. */ int _isolated_interpreter; - /* Original command line arguments. If _orig_argv is empty and _argv is - not equal to [''], PyConfig_Read() copies the configuration 'argv' list - into '_orig_argv' list before modifying 'argv' list (if parse_argv - is non-zero). + /* The list of the original command line arguments passed to the Python + executable. + + If 'orig_argv' list is empty and 'argv' is not a list only containing an + empty string, PyConfig_Read() copies 'argv' into 'orig_argv' before + modifying 'argv' (if 'parse_argv is non-zero). _PyConfig_Write() initializes Py_GetArgcArgv() to this list. */ - PyWideStringList _orig_argv; + PyWideStringList orig_argv; } PyConfig; PyAPI_FUNC(void) PyConfig_InitPythonConfig(PyConfig *config); @@ -445,7 +447,7 @@ PyAPI_FUNC(PyStatus) PyConfig_SetWideStringList(PyConfig *config, /* Get the original command line arguments, before Python modified them. - See also PyConfig._orig_argv. */ + See also PyConfig.orig_argv. */ PyAPI_FUNC(void) Py_GetArgcArgv(int *argc, wchar_t ***argv); #endif /* !Py_LIMITED_API */ diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index fe47289777a42..174892a22b48b 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -365,7 +365,7 @@ class InitConfigTests(EmbeddingTestsMixin, unittest.TestCase): 'program_name': GET_DEFAULT_CONFIG, 'parse_argv': 0, 'argv': [""], - '_orig_argv': [], + 'orig_argv': [], 'xoptions': [], 'warnoptions': [], @@ -739,11 +739,11 @@ def test_init_from_config(self): 'pycache_prefix': 'conf_pycache_prefix', 'program_name': './conf_program_name', 'argv': ['-c', 'arg2'], - '_orig_argv': ['python3', - '-W', 'cmdline_warnoption', - '-X', 'cmdline_xoption', - '-c', 'pass', - 'arg2'], + 'orig_argv': ['python3', + '-W', 'cmdline_warnoption', + '-X', 'cmdline_xoption', + '-c', 'pass', + 'arg2'], 'parse_argv': 1, 'xoptions': [ 'config_xoption1=3', @@ -874,7 +874,7 @@ def test_preinit_parse_argv(self): } config = { 'argv': ['script.py'], - '_orig_argv': ['python3', '-X', 'dev', 'script.py'], + 'orig_argv': ['python3', '-X', 'dev', 'script.py'], 'run_filename': os.path.abspath('script.py'), 'dev_mode': 1, 'faulthandler': 1, @@ -896,7 +896,7 @@ def test_preinit_dont_parse_argv(self): "script.py"] config = { 'argv': argv, - '_orig_argv': argv, + 'orig_argv': argv, 'isolated': 0, } self.check_all_configs("test_preinit_dont_parse_argv", config, preconfig, @@ -975,9 +975,9 @@ def test_init_sys_add(self): 'ignore:::sysadd_warnoption', 'ignore:::config_warnoption', ], - '_orig_argv': ['python3', - '-W', 'ignore:::cmdline_warnoption', - '-X', 'cmdline_xoption'], + 'orig_argv': ['python3', + '-W', 'ignore:::cmdline_warnoption', + '-X', 'cmdline_xoption'], } self.check_all_configs("test_init_sys_add", config, api=API_PYTHON) @@ -986,7 +986,7 @@ def test_init_run_main(self): 'print(json.dumps(_testinternalcapi.get_configs()))') config = { 'argv': ['-c', 'arg2'], - '_orig_argv': ['python3', '-c', code, 'arg2'], + 'orig_argv': ['python3', '-c', code, 'arg2'], 'program_name': './python3', 'run_command': code + '\n', 'parse_argv': 1, @@ -998,9 +998,9 @@ def test_init_main(self): 'print(json.dumps(_testinternalcapi.get_configs()))') config = { 'argv': ['-c', 'arg2'], - '_orig_argv': ['python3', - '-c', code, - 'arg2'], + 'orig_argv': ['python3', + '-c', code, + 'arg2'], 'program_name': './python3', 'run_command': code + '\n', 'parse_argv': 1, @@ -1014,7 +1014,7 @@ def test_init_parse_argv(self): config = { 'parse_argv': 1, 'argv': ['-c', 'arg1', '-v', 'arg3'], - '_orig_argv': ['./argv0', '-E', '-c', 'pass', 'arg1', '-v', 'arg3'], + 'orig_argv': ['./argv0', '-E', '-c', 'pass', 'arg1', '-v', 'arg3'], 'program_name': './argv0', 'run_command': 'pass\n', 'use_environment': 0, @@ -1028,7 +1028,7 @@ def test_init_dont_parse_argv(self): config = { 'parse_argv': 0, 'argv': ['./argv0', '-E', '-c', 'pass', 'arg1', '-v', 'arg3'], - '_orig_argv': ['./argv0', '-E', '-c', 'pass', 'arg1', '-v', 'arg3'], + 'orig_argv': ['./argv0', '-E', '-c', 'pass', 'arg1', '-v', 'arg3'], 'program_name': './argv0', } self.check_all_configs("test_init_dont_parse_argv", config, pre_config, @@ -1316,9 +1316,9 @@ def test_init_warnoptions(self): 'faulthandler': 1, 'bytes_warning': 1, 'warnoptions': warnoptions, - '_orig_argv': ['python3', - '-Wignore:::cmdline1', - '-Wignore:::cmdline2'], + 'orig_argv': ['python3', + '-Wignore:::cmdline1', + '-Wignore:::cmdline2'], } self.check_all_configs("test_init_warnoptions", config, preconfig, api=API_PYTHON) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 194128e5c6bf2..aaba6630ff439 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -434,6 +434,11 @@ def g456(): def test_attributes(self): self.assertIsInstance(sys.api_version, int) self.assertIsInstance(sys.argv, list) + for arg in sys.argv: + self.assertIsInstance(arg, str) + self.assertIsInstance(sys.orig_argv, list) + for arg in sys.orig_argv: + self.assertIsInstance(arg, str) self.assertIn(sys.byteorder, ("little", "big")) self.assertIsInstance(sys.builtin_module_names, tuple) self.assertIsInstance(sys.copyright, str) @@ -930,6 +935,21 @@ def test__enablelegacywindowsfsencoding(self): out = out.decode('ascii', 'replace').rstrip() self.assertEqual(out, 'mbcs replace') + def test_orig_argv(self): + code = textwrap.dedent(''' + import sys + print(sys.argv) + print(sys.orig_argv) + ''') + args = [sys.executable, '-I', '-X', 'utf8', '-c', code, 'arg'] + proc = subprocess.run(args, check=True, capture_output=True, text=True) + expected = [ + repr(['-c', 'arg']), # sys.argv + repr(args), # sys.orig_argv + ] + self.assertEqual(proc.stdout.rstrip().splitlines(), expected, + proc) + @test.support.cpython_only class UnraisableHookTest(unittest.TestCase): diff --git a/Misc/NEWS.d/next/Library/2020-06-08-18-59-16.bpo-23427.ilg1Cz.rst b/Misc/NEWS.d/next/Library/2020-06-08-18-59-16.bpo-23427.ilg1Cz.rst new file mode 100644 index 0000000000000..37382975bb4fc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-06-08-18-59-16.bpo-23427.ilg1Cz.rst @@ -0,0 +1,2 @@ +Add :data:`sys.orig_argv` attribute: the list of the original command line +arguments passed to the Python executable. diff --git a/Python/initconfig.c b/Python/initconfig.c index 96169454506cb..86285c77e2307 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -601,7 +601,7 @@ PyConfig_Clear(PyConfig *config) CLEAR(config->run_filename); CLEAR(config->check_hash_pycs_mode); - _PyWideStringList_Clear(&config->_orig_argv); + _PyWideStringList_Clear(&config->orig_argv); #undef CLEAR } @@ -856,7 +856,7 @@ _PyConfig_Copy(PyConfig *config, const PyConfig *config2) COPY_ATTR(pathconfig_warnings); COPY_ATTR(_init_main); COPY_ATTR(_isolated_interpreter); - COPY_WSTRLIST(_orig_argv); + COPY_WSTRLIST(orig_argv); #undef COPY_ATTR #undef COPY_WSTR_ATTR @@ -957,7 +957,7 @@ config_as_dict(const PyConfig *config) SET_ITEM_INT(pathconfig_warnings); SET_ITEM_INT(_init_main); SET_ITEM_INT(_isolated_interpreter); - SET_ITEM_WSTRLIST(_orig_argv); + SET_ITEM_WSTRLIST(orig_argv); return dict; @@ -1864,8 +1864,8 @@ _PyConfig_Write(const PyConfig *config, _PyRuntimeState *runtime) preconfig->use_environment = config->use_environment; preconfig->dev_mode = config->dev_mode; - if (_Py_SetArgcArgv(config->_orig_argv.length, - config->_orig_argv.items) < 0) + if (_Py_SetArgcArgv(config->orig_argv.length, + config->orig_argv.items) < 0) { return _PyStatus_NO_MEMORY(); } @@ -2501,11 +2501,11 @@ PyConfig_Read(PyConfig *config) config_get_global_vars(config); - if (config->_orig_argv.length == 0 + if (config->orig_argv.length == 0 && !(config->argv.length == 1 && wcscmp(config->argv.items[0], L"") == 0)) { - if (_PyWideStringList_Copy(&config->_orig_argv, &config->argv) < 0) { + if (_PyWideStringList_Copy(&config->orig_argv, &config->argv) < 0) { return _PyStatus_NO_MEMORY(); } } @@ -2589,7 +2589,7 @@ PyConfig_Read(PyConfig *config) assert(config->check_hash_pycs_mode != NULL); assert(config->_install_importlib >= 0); assert(config->pathconfig_warnings >= 0); - assert(_PyWideStringList_CheckConsistency(&config->_orig_argv)); + assert(_PyWideStringList_CheckConsistency(&config->orig_argv)); status = _PyStatus_OK(); diff --git a/Python/sysmodule.c b/Python/sysmodule.c index f3b5a6afdf1e5..9fcdb5dbc49b1 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2931,6 +2931,7 @@ _PySys_InitMain(PyThreadState *tstate) } COPY_LIST("argv", config->argv); + COPY_LIST("orig_argv", config->orig_argv); COPY_LIST("warnoptions", config->warnoptions); PyObject *xoptions = sys_create_xoptions_dict(config); [View Less]

1 0

bpo-40924: Ensure importlib.resources.path returns an extant path (GH-20857)
by Jason R. Coombs June 29, 2020

June 29, 2020

https://github.com/python/cpython/commit/2fb5f038f2a2e91a7293d62dfd5601e6eb… commit: 2fb5f038f2a2e91a7293d62dfd5601e6eb500c55 branch: master author: Jason R. Coombs <jaraco(a)jaraco.com> committer: GitHub <noreply(a)github.com> date: 2020-06-29T22:59:22+02:00 summary: bpo-40924: Ensure importlib.resources.path returns an extant path (GH-20857) files: A Misc/NEWS.d/next/Library/2020-06-13-12-04-50.bpo-40924.SM_luS.rst M Lib/importlib/readers.py M Lib/test/test_importlib/test_path.… [View More]py diff --git a/Lib/importlib/readers.py b/Lib/importlib/readers.py index fb49ebe2b1642..6331e4daf4313 100644 --- a/Lib/importlib/readers.py +++ b/Lib/importlib/readers.py @@ -7,11 +7,19 @@ class FileReader(abc.TraversableResources): def __init__(self, loader): self.path = pathlib.Path(loader.path).parent + def resource_path(self, resource): + """ + Return the file system path to prevent + `resources.path()` from creating a temporary + copy. + """ + return str(self.path.joinpath(resource)) + def files(self): return self.path -class ZipReader(FileReader): +class ZipReader(abc.TraversableResources): def __init__(self, loader, module): _, _, name = module.rpartition('.') prefix = loader.prefix.replace('\\', '/') + name + '/' @@ -28,3 +36,6 @@ def is_resource(self, path): # for non-existent paths. target = self.files().joinpath(path) return target.is_file() and target.exists() + + def files(self): + return self.path diff --git a/Lib/test/test_importlib/test_path.py b/Lib/test/test_importlib/test_path.py index c4e7285411322..abf8086558158 100644 --- a/Lib/test/test_importlib/test_path.py +++ b/Lib/test/test_importlib/test_path.py @@ -27,6 +27,15 @@ def test_reading(self): class PathDiskTests(PathTests, unittest.TestCase): data = data01 + def test_natural_path(self): + """ + Guarantee the internal implementation detail that + file-system-backed resources do not get the tempdir + treatment. + """ + with resources.path(self.data, 'utf-8.file') as path: + assert 'data' in str(path) + class PathZipTests(PathTests, util.ZipSetup, unittest.TestCase): def test_remove_in_context_manager(self): diff --git a/Misc/NEWS.d/next/Library/2020-06-13-12-04-50.bpo-40924.SM_luS.rst b/Misc/NEWS.d/next/Library/2020-06-13-12-04-50.bpo-40924.SM_luS.rst new file mode 100644 index 0000000000000..4e4c6e88ac572 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-06-13-12-04-50.bpo-40924.SM_luS.rst @@ -0,0 +1,3 @@ +Ensure ``importlib.resources.path`` returns an extant path for the +SourceFileLoader's resource reader. Avoids the regression identified in +master while a long-term solution is devised. [View Less]

1 0