[Python-checkins] bpo-32030: Add _Py_EncodeLocaleRaw() (#4961)

Victor Stinner webhook-mailer at python.org
Thu Dec 21 10:20:35 EST 2017


https://github.com/python/cpython/commit/9dd762013fd9fcf975ad51700b55d050ca9ed60e
commit: 9dd762013fd9fcf975ad51700b55d050ca9ed60e
branch: master
author: Victor Stinner <victor.stinner at gmail.com>
committer: GitHub <noreply at github.com>
date: 2017-12-21T16:20:32+01:00
summary:

bpo-32030: Add _Py_EncodeLocaleRaw() (#4961)

Replace Py_EncodeLocale() with _Py_EncodeLocaleRaw() in:

* _Py_wfopen()
* _Py_wreadlink()
* _Py_wrealpath()
* _Py_wstat()
* pymain_open_filename()

These functions are called early during Python intialization, only
the RAW memory allocator must be used.

files:
M Include/fileutils.h
M Modules/getpath.c
M Modules/main.c
M Objects/unicodeobject.c
M Python/fileutils.c

diff --git a/Include/fileutils.h b/Include/fileutils.h
index 900c70faad7..b7b6cd26c00 100644
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@@ -13,10 +13,13 @@ PyAPI_FUNC(wchar_t *) Py_DecodeLocale(
 PyAPI_FUNC(char*) Py_EncodeLocale(
     const wchar_t *text,
     size_t *error_pos);
+
+PyAPI_FUNC(char*) _Py_EncodeLocaleRaw(
+    const wchar_t *text,
+    size_t *error_pos);
 #endif
 
 #ifndef Py_LIMITED_API
-
 PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
 
 #ifdef MS_WINDOWS
diff --git a/Modules/getpath.c b/Modules/getpath.c
index b4b33437b6f..494fa19bdf3 100644
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -140,13 +140,13 @@ _Py_wstat(const wchar_t* path, struct stat *buf)
 {
     int err;
     char *fname;
-    fname = Py_EncodeLocale(path, NULL);
+    fname = _Py_EncodeLocaleRaw(path, NULL);
     if (fname == NULL) {
         errno = EINVAL;
         return -1;
     }
     err = stat(fname, buf);
-    PyMem_Free(fname);
+    PyMem_RawFree(fname);
     return err;
 }
 
diff --git a/Modules/main.c b/Modules/main.c
index 1bf706b162c..dff7894bf35 100644
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -1490,14 +1490,14 @@ pymain_open_filename(_PyMain *pymain)
         char *cfilename_buffer;
         const char *cfilename;
         int err = errno;
-        cfilename_buffer = Py_EncodeLocale(pymain->filename, NULL);
+        cfilename_buffer = _Py_EncodeLocaleRaw(pymain->filename, NULL);
         if (cfilename_buffer != NULL)
             cfilename = cfilename_buffer;
         else
             cfilename = "<unprintable file name>";
         fprintf(stderr, "%ls: can't open file '%s': [Errno %d] %s\n",
                 pymain->config.program, cfilename, err, strerror(err));
-        PyMem_Free(cfilename_buffer);
+        PyMem_RawFree(cfilename_buffer);
         pymain->status = 2;
         return NULL;
     }
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index 716e352dea6..92a6ad6b979 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5158,7 +5158,8 @@ _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size, size_t *p_wlen)
    On memory allocation failure, return NULL and write (size_t)-1 into
    *error_pos (if error_pos is set). */
 char*
-_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
+_Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos,
+                               int raw_malloc)
 {
     const Py_ssize_t max_char_size = 4;
     Py_ssize_t len = wcslen(text);
@@ -5167,7 +5168,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
 
     char *bytes;
     if (len <= PY_SSIZE_T_MAX / max_char_size - 1) {
-        bytes = PyMem_Malloc((len + 1) * max_char_size);
+        if (raw_malloc) {
+            bytes = PyMem_RawMalloc((len + 1) * max_char_size);
+        }
+        else {
+            bytes = PyMem_Malloc((len + 1) * max_char_size);
+        }
     }
     else {
         bytes = NULL;
@@ -5221,7 +5227,13 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
     *p++ = '\0';
 
     size_t final_size = (p - bytes);
-    char *bytes2 = PyMem_Realloc(bytes, final_size);
+    char *bytes2;
+    if (raw_malloc) {
+        bytes2 = PyMem_RawRealloc(bytes, final_size);
+    }
+    else {
+        bytes2 = PyMem_Realloc(bytes, final_size);
+    }
     if (bytes2 == NULL) {
         if (error_pos != NULL) {
             *error_pos = (size_t)-1;
@@ -5231,7 +5243,12 @@ _Py_EncodeUTF8_surrogateescape(const wchar_t *text, size_t *error_pos)
     return bytes2;
 
  error:
-    PyMem_Free(bytes);
+    if (raw_malloc) {
+        PyMem_RawFree(bytes);
+    }
+    else {
+        PyMem_Free(bytes);
+    }
     return NULL;
 }
 
diff --git a/Python/fileutils.c b/Python/fileutils.c
index eeb5f2e89d2..1ccd4baa6d2 100644
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -23,7 +23,7 @@ extern int winerror_to_errno(int);
 extern wchar_t* _Py_DecodeUTF8_surrogateescape(const char *s, Py_ssize_t size,
                                                size_t *p_wlen);
 extern char* _Py_EncodeUTF8_surrogateescape(const wchar_t *text,
-                                            size_t *error_pos);
+                                            size_t *error_pos, int raw_malloc);
 
 #ifdef O_CLOEXEC
 /* Does open() support the O_CLOEXEC flag? Possible values:
@@ -183,7 +183,7 @@ check_force_ascii(void)
 }
 
 static char*
-encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
+encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos, int raw_malloc)
 {
     char *result = NULL, *out;
     size_t len, i;
@@ -194,7 +194,13 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
 
     len = wcslen(text);
 
-    result = PyMem_Malloc(len + 1);  /* +1 for NUL byte */
+    /* +1 for NUL byte */
+    if (raw_malloc) {
+        result = PyMem_RawMalloc(len + 1);
+    }
+    else {
+        result = PyMem_Malloc(len + 1);
+    }
     if (result == NULL)
         return NULL;
 
@@ -211,9 +217,15 @@ encode_ascii_surrogateescape(const wchar_t *text, size_t *error_pos)
             *out++ = (char)(ch - 0xdc00);
         }
         else {
-            if (error_pos != NULL)
+            if (error_pos != NULL) {
                 *error_pos = i;
-            PyMem_Free(result);
+            }
+            if (raw_malloc) {
+                PyMem_RawFree(result);
+            }
+            else {
+                PyMem_Free(result);
+            }
             return NULL;
         }
     }
@@ -423,7 +435,7 @@ Py_DecodeLocale(const char* arg, size_t *size)
 
 #if !defined(__APPLE__) && !defined(__ANDROID__)
 static char*
-encode_locale(const wchar_t *text, size_t *error_pos)
+encode_current_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
 {
     const size_t len = wcslen(text);
     char *result = NULL, *bytes = NULL;
@@ -455,8 +467,14 @@ encode_locale(const wchar_t *text, size_t *error_pos)
                 else
                     converted = wcstombs(NULL, buf, 0);
                 if (converted == (size_t)-1) {
-                    if (result != NULL)
-                        PyMem_Free(result);
+                    if (result != NULL) {
+                        if (raw_malloc) {
+                            PyMem_RawFree(result);
+                        }
+                        else {
+                            PyMem_Free(result);
+                        }
+                    }
                     if (error_pos != NULL)
                         *error_pos = i;
                     return NULL;
@@ -475,10 +493,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
         }
 
         size += 1; /* nul byte at the end */
-        result = PyMem_Malloc(size);
+        if (raw_malloc) {
+            result = PyMem_RawMalloc(size);
+        }
+        else {
+            result = PyMem_Malloc(size);
+        }
         if (result == NULL) {
-            if (error_pos != NULL)
+            if (error_pos != NULL) {
                 *error_pos = (size_t)-1;
+            }
             return NULL;
         }
         bytes = result;
@@ -487,6 +511,28 @@ encode_locale(const wchar_t *text, size_t *error_pos)
 }
 #endif
 
+static char*
+encode_locale(const wchar_t *text, size_t *error_pos, int raw_malloc)
+{
+#if defined(__APPLE__) || defined(__ANDROID__)
+    return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
+#else   /* __APPLE__ */
+    if (Py_UTF8Mode == 1) {
+        return _Py_EncodeUTF8_surrogateescape(text, error_pos, raw_malloc);
+    }
+
+#ifndef MS_WINDOWS
+    if (force_ascii == -1)
+        force_ascii = check_force_ascii();
+
+    if (force_ascii)
+        return encode_ascii_surrogateescape(text, error_pos, raw_malloc);
+#endif
+
+    return encode_current_locale(text, error_pos, raw_malloc);
+#endif   /* __APPLE__ or __ANDROID__ */
+}
+
 /* Encode a wide character string to the locale encoding with the
    surrogateescape error handler: surrogate characters in the range
    U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
@@ -502,23 +548,16 @@ encode_locale(const wchar_t *text, size_t *error_pos)
 char*
 Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 {
-#if defined(__APPLE__) || defined(__ANDROID__)
-    return _Py_EncodeUTF8_surrogateescape(text, error_pos);
-#else   /* __APPLE__ */
-    if (Py_UTF8Mode == 1) {
-        return _Py_EncodeUTF8_surrogateescape(text, error_pos);
-    }
-
-#ifndef MS_WINDOWS
-    if (force_ascii == -1)
-        force_ascii = check_force_ascii();
+    return encode_locale(text, error_pos, 0);
+}
 
-    if (force_ascii)
-        return encode_ascii_surrogateescape(text, error_pos);
-#endif
 
-    return encode_locale(text, error_pos);
-#endif   /* __APPLE__ or __ANDROID__ */
+/* Similar to Py_EncodeLocale(), but result must be freed by PyMem_RawFree()
+   instead of PyMem_Free(). */
+char*
+_Py_EncodeLocaleRaw(const wchar_t *text, size_t *error_pos)
+{
+    return encode_locale(text, error_pos, 1);
 }
 
 
@@ -1029,11 +1068,12 @@ _Py_wfopen(const wchar_t *path, const wchar_t *mode)
         errno = EINVAL;
         return NULL;
     }
-    cpath = Py_EncodeLocale(path, NULL);
-    if (cpath == NULL)
+    cpath = _Py_EncodeLocaleRaw(path, NULL);
+    if (cpath == NULL) {
         return NULL;
+    }
     f = fopen(cpath, cmode);
-    PyMem_Free(cpath);
+    PyMem_RawFree(cpath);
 #else
     f = _wfopen(path, mode);
 #endif
@@ -1341,13 +1381,13 @@ _Py_wreadlink(const wchar_t *path, wchar_t *buf, size_t bufsiz)
     int res;
     size_t r1;
 
-    cpath = Py_EncodeLocale(path, NULL);
+    cpath = _Py_EncodeLocaleRaw(path, NULL);
     if (cpath == NULL) {
         errno = EINVAL;
         return -1;
     }
     res = (int)readlink(cpath, cbuf, Py_ARRAY_LENGTH(cbuf));
-    PyMem_Free(cpath);
+    PyMem_RawFree(cpath);
     if (res == -1)
         return -1;
     if (res == Py_ARRAY_LENGTH(cbuf)) {
@@ -1386,13 +1426,13 @@ _Py_wrealpath(const wchar_t *path,
     wchar_t *wresolved_path;
     char *res;
     size_t r;
-    cpath = Py_EncodeLocale(path, NULL);
+    cpath = _Py_EncodeLocaleRaw(path, NULL);
     if (cpath == NULL) {
         errno = EINVAL;
         return NULL;
     }
     res = realpath(cpath, cresolved_path);
-    PyMem_Free(cpath);
+    PyMem_RawFree(cpath);
     if (res == NULL)
         return NULL;
 



More information about the Python-checkins mailing list