[Python-checkins] bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-93591)

zooba webhook-mailer at python.org
Fri Jun 10 06:14:57 EDT 2022


https://github.com/python/cpython/commit/927b5afee73218500a8fa80df86216cfdc24ef5a
commit: 927b5afee73218500a8fa80df86216cfdc24ef5a
branch: 3.11
author: Steve Dower <steve.dower at python.org>
committer: zooba <steve.dower at microsoft.com>
date: 2022-06-10T11:14:25+01:00
summary:

bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-93591)

* bpo-42658: Use LCMapStringEx in ntpath.normcase to match OS behaviour for case-folding (GH-32010)

* Use AsWideCharString to avoid memory leaks in deprectated unicode converter

Co-authored-by: AN Long <aisk at users.noreply.github.com>

files:
A Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst
M Lib/ntpath.py
M Lib/test/test_ntpath.py
M Modules/_winapi.c
M Modules/clinic/_winapi.c.h

diff --git a/Lib/ntpath.py b/Lib/ntpath.py
index 041ebc75cb127..73b1bd12ddca7 100644
--- a/Lib/ntpath.py
+++ b/Lib/ntpath.py
@@ -23,6 +23,7 @@
 import genericpath
 from genericpath import *
 
+
 __all__ = ["normcase","isabs","join","splitdrive","split","splitext",
            "basename","dirname","commonprefix","getsize","getmtime",
            "getatime","getctime", "islink","exists","lexists","isdir","isfile",
@@ -41,14 +42,39 @@ def _get_bothseps(path):
 # Other normalizations (such as optimizing '../' away) are not done
 # (this is done by normpath).
 
-def normcase(s):
-    """Normalize case of pathname.
-
-    Makes all characters lowercase and all slashes into backslashes."""
-    s = os.fspath(s)
-    if isinstance(s, bytes):
-        return s.replace(b'/', b'\\').lower()
-    else:
+try:
+    from _winapi import (
+        LCMapStringEx as _LCMapStringEx,
+        LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT,
+        LCMAP_LOWERCASE as _LCMAP_LOWERCASE)
+
+    def normcase(s):
+        """Normalize case of pathname.
+
+        Makes all characters lowercase and all slashes into backslashes.
+        """
+        s = os.fspath(s)
+        if not s:
+            return s
+        if isinstance(s, bytes):
+            encoding = sys.getfilesystemencoding()
+            s = s.decode(encoding, 'surrogateescape').replace('/', '\\')
+            s = _LCMapStringEx(_LOCALE_NAME_INVARIANT,
+                               _LCMAP_LOWERCASE, s)
+            return s.encode(encoding, 'surrogateescape')
+        else:
+            return _LCMapStringEx(_LOCALE_NAME_INVARIANT,
+                                  _LCMAP_LOWERCASE,
+                                  s.replace('/', '\\'))
+except ImportError:
+    def normcase(s):
+        """Normalize case of pathname.
+
+        Makes all characters lowercase and all slashes into backslashes.
+        """
+        s = os.fspath(s)
+        if isinstance(s, bytes):
+            return os.fsencode(os.fsdecode(s).replace('/', '\\').lower())
         return s.replace('/', '\\').lower()
 
 
diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py
index 7211ed861762b..ab3603bdd7301 100644
--- a/Lib/test/test_ntpath.py
+++ b/Lib/test/test_ntpath.py
@@ -852,6 +852,8 @@ def _check_function(self, func):
 
     def test_path_normcase(self):
         self._check_function(self.path.normcase)
+        if sys.platform == 'win32':
+            self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ')
 
     def test_path_isabs(self):
         self._check_function(self.path.isabs)
diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst
new file mode 100644
index 0000000000000..852cc77676a31
--- /dev/null
+++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst
@@ -0,0 +1,3 @@
+Support native Windows case-insensitive path comparisons by using
+``LCMapStringEx`` instead of :func:`str.lower` in :func:`ntpath.normcase`.
+Add ``LCMapStringEx`` to the :mod:`_winapi` module.
diff --git a/Modules/_winapi.c b/Modules/_winapi.c
index 3e24d512cac38..a3c30f2395545 100644
--- a/Modules/_winapi.c
+++ b/Modules/_winapi.c
@@ -1512,6 +1512,68 @@ _winapi_PeekNamedPipe_impl(PyObject *module, HANDLE handle, int size)
     }
 }
 
+/*[clinic input]
+_winapi.LCMapStringEx
+
+    locale: unicode
+    flags: DWORD
+    src: unicode
+
+[clinic start generated code]*/
+
+static PyObject *
+_winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags,
+                           PyObject *src)
+/*[clinic end generated code: output=8ea4c9d85a4a1f23 input=2fa6ebc92591731b]*/
+{
+    if (flags & (LCMAP_SORTHANDLE | LCMAP_HASH | LCMAP_BYTEREV |
+                 LCMAP_SORTKEY)) {
+        return PyErr_Format(PyExc_ValueError, "unsupported flags");
+    }
+
+    wchar_t *locale_ = PyUnicode_AsWideCharString(locale, NULL);
+    if (!locale_) {
+        return NULL;
+    }
+    wchar_t *src_ = PyUnicode_AsWideCharString(src, NULL);
+    if (!src_) {
+        PyMem_Free(locale_);
+        return NULL;
+    }
+
+    int dest_size = LCMapStringEx(locale_, flags, src_, -1, NULL, 0,
+                                  NULL, NULL, 0);
+    if (dest_size == 0) {
+        PyMem_Free(locale_);
+        PyMem_Free(src_);
+        return PyErr_SetFromWindowsErr(0);
+    }
+
+    wchar_t* dest = PyMem_NEW(wchar_t, dest_size);
+    if (dest == NULL) {
+        PyMem_Free(locale_);
+        PyMem_Free(src_);
+        return PyErr_NoMemory();
+    }
+
+    int nmapped = LCMapStringEx(locale_, flags, src_, -1, dest, dest_size,
+                                NULL, NULL, 0);
+    if (nmapped == 0) {
+        DWORD error = GetLastError();
+        PyMem_Free(locale_);
+        PyMem_Free(src_);
+        PyMem_DEL(dest);
+        return PyErr_SetFromWindowsErr(error);
+    }
+
+    PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1);
+    PyMem_Free(locale_);
+    PyMem_Free(src_);
+    PyMem_DEL(dest);
+
+    return ret;
+}
+
 /*[clinic input]
 _winapi.ReadFile
 
@@ -2023,6 +2085,7 @@ static PyMethodDef winapi_functions[] = {
     _WINAPI_OPENFILEMAPPING_METHODDEF
     _WINAPI_OPENPROCESS_METHODDEF
     _WINAPI_PEEKNAMEDPIPE_METHODDEF
+    _WINAPI_LCMAPSTRINGEX_METHODDEF
     _WINAPI_READFILE_METHODDEF
     _WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF
     _WINAPI_TERMINATEPROCESS_METHODDEF
@@ -2160,6 +2223,22 @@ static int winapi_exec(PyObject *m)
     WINAPI_CONSTANT(F_DWORD, FILE_TYPE_PIPE);
     WINAPI_CONSTANT(F_DWORD, FILE_TYPE_REMOTE);
 
+    WINAPI_CONSTANT("u", LOCALE_NAME_INVARIANT);
+    WINAPI_CONSTANT(F_DWORD, LOCALE_NAME_MAX_LENGTH);
+    WINAPI_CONSTANT("u", LOCALE_NAME_SYSTEM_DEFAULT);
+    WINAPI_CONSTANT("u", LOCALE_NAME_USER_DEFAULT);
+
+    WINAPI_CONSTANT(F_DWORD, LCMAP_FULLWIDTH);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_HALFWIDTH);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_HIRAGANA);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_KATAKANA);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_LINGUISTIC_CASING);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_LOWERCASE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_SIMPLIFIED_CHINESE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE);
+    WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE);
+
     WINAPI_CONSTANT("i", NULL);
 
     return 0;
diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h
index 9c83d0ba2c545..87f624f9816de 100644
--- a/Modules/clinic/_winapi.c.h
+++ b/Modules/clinic/_winapi.c.h
@@ -840,6 +840,38 @@ _winapi_PeekNamedPipe(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
     return return_value;
 }
 
+PyDoc_STRVAR(_winapi_LCMapStringEx__doc__,
+"LCMapStringEx($module, /, locale, flags, src)\n"
+"--\n"
+"\n");
+
+#define _WINAPI_LCMAPSTRINGEX_METHODDEF    \
+    {"LCMapStringEx", _PyCFunction_CAST(_winapi_LCMapStringEx), METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__},
+
+static PyObject *
+_winapi_LCMapStringEx_impl(PyObject *module, PyObject *locale, DWORD flags,
+                           PyObject *src);
+
+static PyObject *
+_winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
+{
+    PyObject *return_value = NULL;
+    static const char * const _keywords[] = {"locale", "flags", "src", NULL};
+    static _PyArg_Parser _parser = {"UkU:LCMapStringEx", _keywords, 0};
+    PyObject *locale;
+    DWORD flags;
+    PyObject *src;
+
+    if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
+        &locale, &flags, &src)) {
+        goto exit;
+    }
+    return_value = _winapi_LCMapStringEx_impl(module, locale, flags, src);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(_winapi_ReadFile__doc__,
 "ReadFile($module, /, handle, size, overlapped=False)\n"
 "--\n"
@@ -1184,4 +1216,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=a4ede01aede352a4 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=dfbccec8f11b7433 input=a9049054013a1b77]*/



More information about the Python-checkins mailing list