[Python-checkins] cpython: Issues #15169, #14599: Make PyImport_ExecCodeModuleWithPathnames() use

brett.cannon python-checkins at python.org
Fri Jul 13 19:57:13 CEST 2012


http://hg.python.org/cpython/rev/9e164b404983
changeset:   78078:9e164b404983
user:        Brett Cannon <brett at python.org>
date:        Fri Jul 13 13:57:03 2012 -0400
summary:
  Issues #15169, #14599: Make PyImport_ExecCodeModuleWithPathnames() use
Lib/imp.py for imp.source_from_cache() instead of its own C version.

Also change PyImport_ExecCodeModuleObject() to not infer the source
path from the bytecode path like
PyImport_ExecCodeModuleWithPathnames() does. This makes the function
less magical.

This also has the side-effect of removing all uses of MAXPATHLEN in
Python/import.c which can cause failures on really long filenames.

files:
  Doc/c-api/import.rst        |     7 +-
  Lib/imp.py                  |    25 +-
  Lib/importlib/_bootstrap.py |    44 +
  Misc/NEWS                   |     9 +
  Python/import.c             |   195 +-
  Python/importlib.h          |  7865 +++++++++++-----------
  6 files changed, 4083 insertions(+), 4062 deletions(-)


diff --git a/Doc/c-api/import.rst b/Doc/c-api/import.rst
--- a/Doc/c-api/import.rst
+++ b/Doc/c-api/import.rst
@@ -163,9 +163,14 @@
 .. c:function:: PyObject* PyImport_ExecCodeModuleWithPathnames(char *name, PyObject *co, char *pathname, char *cpathname)
 
    Like :c:func:`PyImport_ExecCodeModuleObject`, but *name*, *pathname* and
-   *cpathname* are UTF-8 encoded strings.
+   *cpathname* are UTF-8 encoded strings. Attempts are also made to figure out
+   what the value for *pathname* should be from *cpathname* if the former is
+   set to ``NULL``.
 
    .. versionadded:: 3.2
+   .. versionchanged:: 3.3
+      Uses :func:`imp.source_from_cache()` in calculating the source path if
+      only the bytecode path is provided.
 
 
 .. c:function:: long PyImport_GetMagicNumber()
diff --git a/Lib/imp.py b/Lib/imp.py
--- a/Lib/imp.py
+++ b/Lib/imp.py
@@ -13,7 +13,7 @@
 
 # Directly exposed by this module
 from importlib._bootstrap import new_module
-from importlib._bootstrap import cache_from_source
+from importlib._bootstrap import cache_from_source, source_from_cache
 
 
 from importlib import _bootstrap
@@ -58,29 +58,6 @@
     return extensions + source + bytecode
 
 
-def source_from_cache(path):
-    """Given the path to a .pyc./.pyo file, return the path to its .py file.
-
-    The .pyc/.pyo file does not need to exist; this simply returns the path to
-    the .py file calculated to correspond to the .pyc/.pyo file.  If path does
-    not conform to PEP 3147 format, ValueError will be raised. If
-    sys.implementation.cache_tag is None then NotImplementedError is raised.
-
-    """
-    if sys.implementation.cache_tag is None:
-        raise NotImplementedError('sys.implementation.cache_tag is None')
-    head, pycache_filename = os.path.split(path)
-    head, pycache = os.path.split(head)
-    if pycache != _bootstrap._PYCACHE:
-        raise ValueError('{} not bottom-level directory in '
-                         '{!r}'.format(_bootstrap._PYCACHE, path))
-    if pycache_filename.count('.') != 2:
-        raise ValueError('expected only 2 dots in '
-                         '{!r}'.format(pycache_filename))
-    base_filename = pycache_filename.partition('.')[0]
-    return os.path.join(head, base_filename + machinery.SOURCE_SUFFIXES[0])
-
-
 class NullImporter:
 
     """Null import object."""
diff --git a/Lib/importlib/_bootstrap.py b/Lib/importlib/_bootstrap.py
--- a/Lib/importlib/_bootstrap.py
+++ b/Lib/importlib/_bootstrap.py
@@ -428,6 +428,50 @@
     return _path_join(head, _PYCACHE, filename)
 
 
+def source_from_cache(path):
+    """Given the path to a .pyc./.pyo file, return the path to its .py file.
+
+    The .pyc/.pyo file does not need to exist; this simply returns the path to
+    the .py file calculated to correspond to the .pyc/.pyo file.  If path does
+    not conform to PEP 3147 format, ValueError will be raised. If
+    sys.implementation.cache_tag is None then NotImplementedError is raised.
+
+    """
+    if sys.implementation.cache_tag is None:
+        raise NotImplementedError('sys.implementation.cache_tag is None')
+    head, pycache_filename = _path_split(path)
+    head, pycache = _path_split(head)
+    if pycache != _PYCACHE:
+        raise ValueError('{} not bottom-level directory in '
+                         '{!r}'.format(_PYCACHE, path))
+    if pycache_filename.count('.') != 2:
+        raise ValueError('expected only 2 dots in '
+                         '{!r}'.format(pycache_filename))
+    base_filename = pycache_filename.partition('.')[0]
+    return _path_join(head, base_filename + SOURCE_SUFFIXES[0])
+
+
+def _get_sourcefile(bytecode_path):
+    """Convert a bytecode file path to a source path (if possible).
+
+    This function exists purely for backwards-compatibility for
+    PyImport_ExecCodeModuleWithFilenames() in the C API.
+
+    """
+    if len(bytecode_path) == 0:
+        return None
+    rest, _, extension = bytecode_path.rparition('.')
+    if not rest or extension.lower()[-3:-1] != '.py':
+        return bytecode_path
+
+    try:
+        source_path = source_from_cache(bytecode_path)
+    except (NotImplementedError, ValueError):
+        source_path = bytcode_path[-1:]
+
+    return source_path if _path_isfile(source_stats) else bytecode_path
+
+
 def _verbose_message(message, *args):
     """Print the message to stderr if -v/PYTHONVERBOSE is turned on."""
     if sys.flags.verbose:
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -92,6 +92,15 @@
 - Issue 10924: Fixed mksalt() to use a RNG that is suitable for cryptographic
   purpose.
 
+C API
+-----
+
+- Issues #15169, #14599: Strip out the C implementation of
+  imp.source_from_cache() used by PyImport_ExecCodeModuleWithPathnames() and
+  used the Python code instead. Leads to PyImport_ExecCodeModuleObject() to not
+  try to infer the source path from the bytecode path as
+  PyImport_ExecCodeModuleWithPathnames() does.
+
 Extension Modules
 -----------------
 
diff --git a/Python/import.c b/Python/import.c
--- a/Python/import.c
+++ b/Python/import.c
@@ -630,8 +630,6 @@
                       "sys.modules failed");
 }
 
-static PyObject * get_sourcefile(PyObject *filename);
-static PyObject *make_source_pathname(PyObject *pathname);
 
 /* Execute a code object in a module and return the module object
  * WITH INCREMENTED REFERENCE COUNT.  If an error occurs, name is
@@ -668,18 +666,37 @@
     if (nameobj == NULL)
         return NULL;
 
+    if (cpathname != NULL) {
+        cpathobj = PyUnicode_DecodeFSDefault(cpathname);
+        if (cpathobj == NULL)
+            goto error;
+    }
+    else
+        cpathobj = NULL;
+
     if (pathname != NULL) {
         pathobj = PyUnicode_DecodeFSDefault(pathname);
         if (pathobj == NULL)
             goto error;
-    } else
+    }
+    else if (cpathobj != NULL) {
+        PyInterpreterState *interp = PyThreadState_GET()->interp;
+        _Py_IDENTIFIER(_get_sourcefile);
+
+        if (interp == NULL) {
+            Py_FatalError("PyImport_ExecCodeModuleWithPathnames: "
+                          "no interpreter!");
+        }
+
+        pathobj = _PyObject_CallMethodObjIdArgs(interp->importlib,
+                                                &PyId__get_sourcefile, cpathobj,
+                                                NULL);
+        if (pathobj == NULL)
+            PyErr_Clear();
+    }
+    else
         pathobj = NULL;
-    if (cpathname != NULL) {
-        cpathobj = PyUnicode_DecodeFSDefault(cpathname);
-        if (cpathobj == NULL)
-            goto error;
-    } else
-        cpathobj = NULL;
+
     m = PyImport_ExecCodeModuleObject(nameobj, co, pathobj, cpathobj);
 error:
     Py_DECREF(nameobj);
@@ -706,18 +723,13 @@
                                  PyEval_GetBuiltins()) != 0)
             goto error;
     }
-    /* Remember the filename as the __file__ attribute */
     if (pathname != NULL) {
-        v = get_sourcefile(pathname);
-        if (v == NULL)
-            PyErr_Clear();
+        v = pathname;
     }
-    else
-        v = NULL;
-    if (v == NULL) {
+    else {
         v = ((PyCodeObject *)co)->co_filename;
-        Py_INCREF(v);
     }
+    Py_INCREF(v);
     if (PyDict_SetItemString(d, "__file__", v) != 0)
         PyErr_Clear(); /* Not important enough to report */
     Py_DECREF(v);
@@ -752,100 +764,6 @@
 }
 
 
-/* Like rightmost_sep, but operate on unicode objects. */
-static Py_ssize_t
-rightmost_sep_obj(PyObject* o, Py_ssize_t start, Py_ssize_t end)
-{
-    Py_ssize_t found, i;
-    Py_UCS4 c;
-    for (found = -1, i = start; i < end; i++) {
-        c = PyUnicode_READ_CHAR(o, i);
-        if (c == SEP
-#ifdef ALTSEP
-            || c == ALTSEP
-#endif
-            )
-        {
-            found = i;
-        }
-    }
-    return found;
-}
-
-
-/* Given a pathname to a Python byte compiled file, return the path to the
-   source file, if the path matches the PEP 3147 format.  This does not check
-   for any file existence, however, if the pyc file name does not match PEP
-   3147 style, NULL is returned.  buf must be at least as big as pathname;
-   the resulting path will always be shorter.
-
-   (...)/__pycache__/foo.<tag>.pyc -> (...)/foo.py */
-
-static PyObject*
-make_source_pathname(PyObject *path)
-{
-    Py_ssize_t left, right, dot0, dot1, len;
-    Py_ssize_t i, j;
-    PyObject *result;
-    int kind;
-    void *data;
-
-    len = PyUnicode_GET_LENGTH(path);
-    if (len > MAXPATHLEN)
-        return NULL;
-
-    /* Look back two slashes from the end.  In between these two slashes
-       must be the string __pycache__ or this is not a PEP 3147 style
-       path.  It's possible for there to be only one slash.
-    */
-    right = rightmost_sep_obj(path, 0, len);
-    if (right == -1)
-        return NULL;
-    left = rightmost_sep_obj(path, 0, right);
-    if (left == -1)
-        left = 0;
-    else
-        left++;
-    if (right-left !=  sizeof(CACHEDIR)-1)
-        return NULL;
-    for (i = 0; i < sizeof(CACHEDIR)-1; i++)
-        if (PyUnicode_READ_CHAR(path, left+i) != CACHEDIR[i])
-            return NULL;
-
-    /* Now verify that the path component to the right of the last slash
-       has two dots in it.
-    */
-    dot0 = PyUnicode_FindChar(path, '.', right+1, len, 1);
-    if (dot0 < 0)
-        return NULL;
-    dot1 = PyUnicode_FindChar(path, '.', dot0+1, len, 1);
-    if (dot1 < 0)
-        return NULL;
-    /* Too many dots? */
-    if (PyUnicode_FindChar(path, '.', dot1+1, len, 1) != -1)
-        return NULL;
-
-    /* This is a PEP 3147 path.  Start by copying everything from the
-       start of pathname up to and including the leftmost slash.  Then
-       copy the file's basename, removing the magic tag and adding a .py
-       suffix.
-    */
-    result = PyUnicode_New(left + (dot0-right) + 2,
-                           PyUnicode_MAX_CHAR_VALUE(path));
-    if (!result)
-        return NULL;
-    kind = PyUnicode_KIND(result);
-    data = PyUnicode_DATA(result);
-    PyUnicode_CopyCharacters(result, 0, path, 0, (i = left));
-    PyUnicode_CopyCharacters(result, left, path, right+1,
-                             (j = dot0-right));
-    PyUnicode_WRITE(kind, data, i+j,   'p');
-    PyUnicode_WRITE(kind, data, i+j+1, 'y');
-    assert(_PyUnicode_CheckConsistency(result, 1));
-    return result;
-}
-
-
 static void
 update_code_filenames(PyCodeObject *co, PyObject *oldname, PyObject *newname)
 {
@@ -911,61 +829,6 @@
 }
 
 
-/* Get source file -> unicode or None
- * Returns the path to the py file if available, else the given path
- */
-static PyObject *
-get_sourcefile(PyObject *filename)
-{
-    Py_ssize_t len;
-    PyObject *py;
-    struct stat statbuf;
-    int err;
-    void *data;
-    unsigned int kind;
-
-    len = PyUnicode_GET_LENGTH(filename);
-    if (len == 0)
-        Py_RETURN_NONE;
-
-    /* don't match *.pyc or *.pyo? */
-    data = PyUnicode_DATA(filename);
-    kind = PyUnicode_KIND(filename);
-    if (len < 5
-        || PyUnicode_READ(kind, data, len-4) != '.'
-        || (PyUnicode_READ(kind, data, len-3) != 'p'
-            && PyUnicode_READ(kind, data, len-3) != 'P')
-        || (PyUnicode_READ(kind, data, len-2) != 'y'
-            && PyUnicode_READ(kind, data, len-2) != 'Y'))
-        goto unchanged;
-
-    /* Start by trying to turn PEP 3147 path into source path.  If that
-     * fails, just chop off the trailing character, i.e. legacy pyc path
-     * to py.
-     */
-    py = make_source_pathname(filename);
-    if (py == NULL) {
-        PyErr_Clear();
-        py = PyUnicode_Substring(filename, 0, len - 1);
-    }
-    if (py == NULL)
-        goto error;
-
-    err = _Py_stat(py, &statbuf);
-    if (err == -2)
-        goto error;
-    if (err == 0 && S_ISREG(statbuf.st_mode))
-        return py;
-    Py_DECREF(py);
-    goto unchanged;
-
-error:
-    PyErr_Clear();
-unchanged:
-    Py_INCREF(filename);
-    return filename;
-}
-
 /* Forward */
 static struct _frozen * find_frozen(PyObject *);
 
diff --git a/Python/importlib.h b/Python/importlib.h
--- a/Python/importlib.h
+++ b/Python/importlib.h
[stripped]

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list