[Python-checkins] r85582 - in python/branches/py3k: Include/fileutils.h Modules/main.c Modules/python.c Objects/unicodeobject.c Python/fileutils.c

victor.stinner python-checkins at python.org
Sun Oct 17 01:16:17 CEST 2010


Author: victor.stinner
Date: Sun Oct 17 01:16:16 2010
New Revision: 85582

Log:
Add an optional size argument to _Py_char2wchar()

_Py_char2wchar() callers usually need the result size in characters. Since it's
trivial to compute it in _Py_char2wchar() (O(1) whereas wcslen() is O(n)), add
an option to get it.


Modified:
   python/branches/py3k/Include/fileutils.h
   python/branches/py3k/Modules/main.c
   python/branches/py3k/Modules/python.c
   python/branches/py3k/Objects/unicodeobject.c
   python/branches/py3k/Python/fileutils.c

Modified: python/branches/py3k/Include/fileutils.h
==============================================================================
--- python/branches/py3k/Include/fileutils.h	(original)
+++ python/branches/py3k/Include/fileutils.h	Sun Oct 17 01:16:16 2010
@@ -6,7 +6,8 @@
 #endif
 
 PyAPI_FUNC(wchar_t *) _Py_char2wchar(
-    const char *arg);
+    const char *arg,
+    size_t *size);
 
 PyAPI_FUNC(char*) _Py_wchar2char(
     const wchar_t *text);

Modified: python/branches/py3k/Modules/main.c
==============================================================================
--- python/branches/py3k/Modules/main.c	(original)
+++ python/branches/py3k/Modules/main.c	Sun Oct 17 01:16:16 2010
@@ -486,10 +486,12 @@
             /* Use utf-8 on Mac OS X */
             unicode = PyUnicode_FromString(p);
 #else
-            wchar_t *wchar = _Py_char2wchar(p);
+            wchar_t *wchar;
+            size_t len;
+            wchar = _Py_char2wchar(p, &len);
             if (wchar == NULL)
                 continue;
-            unicode = PyUnicode_FromWideChar(wchar, wcslen(wchar));
+            unicode = PyUnicode_FromWideChar(wchar, len);
             PyMem_Free(wchar);
 #endif
             if (unicode == NULL)

Modified: python/branches/py3k/Modules/python.c
==============================================================================
--- python/branches/py3k/Modules/python.c	(original)
+++ python/branches/py3k/Modules/python.c	Sun Oct 17 01:16:16 2010
@@ -41,7 +41,7 @@
     oldloc = strdup(setlocale(LC_ALL, NULL));
     setlocale(LC_ALL, "");
     for (i = 0; i < argc; i++) {
-        argv_copy[i] = _Py_char2wchar(argv[i]);
+        argv_copy[i] = _Py_char2wchar(argv[i], NULL);
         if (!argv_copy[i])
             return 1;
         argv_copy2[i] = argv_copy[i];

Modified: python/branches/py3k/Objects/unicodeobject.c
==============================================================================
--- python/branches/py3k/Objects/unicodeobject.c	(original)
+++ python/branches/py3k/Objects/unicodeobject.c	Sun Oct 17 01:16:16 2010
@@ -1783,17 +1783,18 @@
         /* locale encoding with surrogateescape */
         wchar_t *wchar;
         PyObject *unicode;
+        size_t len;
 
         if (s[size] != '\0' || size != strlen(s)) {
             PyErr_SetString(PyExc_TypeError, "embedded NUL character");
             return NULL;
         }
 
-        wchar = _Py_char2wchar(s);
+        wchar = _Py_char2wchar(s, &len);
         if (wchar == NULL)
             return NULL;
 
-        unicode = PyUnicode_FromWideChar(wchar, -1);
+        unicode = PyUnicode_FromWideChar(wchar, len);
         PyMem_Free(wchar);
         return unicode;
     }

Modified: python/branches/py3k/Python/fileutils.c
==============================================================================
--- python/branches/py3k/Python/fileutils.c	(original)
+++ python/branches/py3k/Python/fileutils.c	Sun Oct 17 01:16:16 2010
@@ -13,11 +13,12 @@
 
    Use _Py_wchar2char() to encode the character string back to a byte string.
 
-   Return a pointer to a newly allocated (wide) character string (use
-   PyMem_Free() to free the memory), or NULL on error (conversion error or
-   memory error). */
+   Return a pointer to a newly allocated wide character string (use
+   PyMem_Free() to free the memory) and write the number of written wide
+   characters excluding the null character into *size if size is not NULL, or
+   NULL on error (conversion error or memory error). */
 wchar_t*
-_Py_char2wchar(const char* arg)
+_Py_char2wchar(const char* arg, size_t *size)
 {
     wchar_t *res;
 #ifdef HAVE_BROKEN_MBSTOWCS
@@ -47,8 +48,11 @@
             for (tmp = res; *tmp != 0 &&
                          (*tmp < 0xd800 || *tmp > 0xdfff); tmp++)
                 ;
-            if (*tmp == 0)
+            if (*tmp == 0) {
+                if (size != NULL)
+                    *size = count;
                 return res;
+            }
         }
         PyMem_Free(res);
     }
@@ -113,6 +117,8 @@
             *out++ = 0xdc00 + *in++;
     *out = 0;
 #endif
+    if (size != NULL)
+        *size = out - res;
     return res;
 oom:
     fprintf(stderr, "out of memory\n");
@@ -325,12 +331,11 @@
         return -1;
     }
     cbuf[res] = '\0'; /* buf will be null terminated */
-    wbuf = _Py_char2wchar(cbuf);
+    wbuf = _Py_char2wchar(cbuf, &r1);
     if (wbuf == NULL) {
         errno = EINVAL;
         return -1;
     }
-    r1 = wcslen(wbuf);
     if (bufsiz <= r1) {
         PyMem_Free(wbuf);
         errno = EINVAL;
@@ -366,12 +371,11 @@
     if (res == NULL)
         return NULL;
 
-    wresolved_path = _Py_char2wchar(cresolved_path);
+    wresolved_path = _Py_char2wchar(cresolved_path, &r);
     if (wresolved_path == NULL) {
         errno = EINVAL;
         return NULL;
     }
-    r = wcslen(wresolved_path);
     if (resolved_path_size <= r) {
         PyMem_Free(wresolved_path);
         errno = EINVAL;
@@ -394,13 +398,14 @@
 #else
     char fname[PATH_MAX];
     wchar_t *wname;
+    size_t len;
 
     if (getcwd(fname, PATH_MAX) == NULL)
         return NULL;
-    wname = _Py_char2wchar(fname);
+    wname = _Py_char2wchar(fname, &len);
     if (wname == NULL)
         return NULL;
-    if (size <= wcslen(wname)) {
+    if (size <= len) {
         PyMem_Free(wname);
         return NULL;
     }


More information about the Python-checkins mailing list