[Python-checkins] python/nondist/sandbox/setuptools pkg_resources.py, 1.51, 1.52 setuptools.txt, 1.21, 1.22

Sun Jul 24 19:59:28 CEST 2005

Update of /cvsroot/python/python/nondist/sandbox/setuptools
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv8768

Modified Files:
	pkg_resources.py setuptools.txt 
Log Message:
Fix eager resource extraction. Add eager_resources setup() argument.  Add
support for obtaining project-level resources by making get_provider()
accept Requirement objects.


Index: pkg_resources.py
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/pkg_resources.py,v
retrieving revision 1.51
retrieving revision 1.52
diff -u -d -r1.51 -r1.52

--- pkg_resources.py	21 Jul 2005 16:11:34 -0000	1.51
+++ pkg_resources.py	24 Jul 2005 17:59:26 -0000	1.52
@@ -25,6 +25,7 @@
     'safe_name', 'safe_version', 'run_main', 'BINARY_DIST', 'run_script',
     'get_default_cache', 'EmptyProvider', 'empty_provider', 'normalize_path',
     'WorkingSet', 'working_set', 'add_activation_listener', 'CHECKOUT_DIST',
+    'list_resources', 'resource_exists', 'resource_isdir',
 ]
 
 import sys, os, zipimport, time, re, imp
@@ -38,7 +39,6 @@
 
 
 
-
 class ResolutionError(Exception):
     """Abstract base for dependency resolution errors"""
 
@@ -68,18 +68,18 @@
     """
     _provider_factories[loader_type] = provider_factory
 
-def get_provider(moduleName):
-    """Return an IResourceProvider for the named module"""
+def get_provider(moduleOrReq):
+    """Return an IResourceProvider for the named module or requirement"""
+    if isinstance(moduleOrReq,Requirement):
+        return working_set.find(moduleOrReq) or require(str(moduleOrReq))[0]
     try:
-        module = sys.modules[moduleName]
+        module = sys.modules[moduleOrReq]
     except KeyError:
-        __import__(moduleName)
-        module = sys.modules[moduleName]
+        __import__(moduleOrReq)
+        module = sys.modules[moduleOrReq]
     loader = getattr(module, '__loader__', None)
     return _find_adapter(_provider_factories, loader)(module)
 
-
-
 def _macosx_vers(_cache=[]):
     if not _cache:
         info = os.popen('/usr/bin/sw_vers').read().splitlines()
@@ -627,7 +627,7 @@
 
     def resource_isdir(self, package_name, resource_name):
         """Does the named resource exist in the named package?"""
-        return get_provider(package_name).resource_isdir(self, resource_name)
+        return get_provider(package_name).resource_isdir(resource_name)
 
     def resource_filename(self, package_name, resource_name):
         """Return a true filesystem path for specified resource"""
@@ -648,7 +648,7 @@
         )
 
     def list_resources(self,  package_name, resource_name):
-        return get_provider(package_name).resource_listdir(self, resource_name)
+        return get_provider(package_name).resource_listdir(resource_name)
 
 
 
@@ -913,8 +913,8 @@
 register_loader_type(object, NullProvider)
 
 
-class DefaultProvider(NullProvider):
-    """Provides access to package resources in the filesystem"""
+class EggProvider(NullProvider):
+    """Provider based on a virtual filesystem"""
 
     def __init__(self,module):
         NullProvider.__init__(self,module)
@@ -925,22 +925,28 @@
         # of multiple eggs; that's why we use module_path instead of .archive
         path = self.module_path
         old = None
-        self.prefix = []
         while path!=old:
             if path.lower().endswith('.egg'):
                 self.egg_name = os.path.basename(path)
                 self.egg_info = os.path.join(path, 'EGG-INFO')
+                self.egg_root = path
                 break
             old = path
             path, base = os.path.split(path)
-            self.prefix.append(base)
-        self.prefix.reverse()
 
-    def _has(self, path):
-        return os.path.exists(path)
 
 
 
+
+
+
+
+class DefaultProvider(EggProvider):
+    """Provides access to package resources in the filesystem"""
+
+    def _has(self, path):
+        return os.path.exists(path)
+
     def _isdir(self,path):
         return os.path.isdir(path)
 
@@ -976,67 +982,63 @@
 
 
 
-
-
-
-
-
-
-class ZipProvider(DefaultProvider):
+class ZipProvider(EggProvider):
     """Resource support for zips and eggs"""
 
     eagers = None
 
     def __init__(self, module):
-        DefaultProvider.__init__(self,module)
+        EggProvider.__init__(self,module)
         self.zipinfo = zipimport._zip_directory_cache[self.loader.archive]
         self.zip_pre = self.loader.archive+os.sep
 
-    def _short_name(self, path):
-        if path.startswith(self.zip_pre):
-            return path[len(self.zip_pre):]
-        return path
+    def _zipinfo_name(self, fspath):
+        # Convert a virtual filename (full path to file) into a zipfile subpath
+        # usable with the zipimport directory cache for our target archive
+        if fspath.startswith(self.zip_pre):
+            return fspath[len(self.zip_pre):]
+        raise AssertionError(
+            "%s is not a subpath of %s" % (fspath,self.zip_pre)
+        )
 
-    def get_resource_stream(self, manager, resource_name):
-        return StringIO(self.get_resource_string(manager, resource_name))
+    def _parts(self,zip_path):
+        # Convert a zipfile subpath into an egg-relative path part list
+        fspath = self.zip_pre+zip_path  # pseudo-fs path
+        if fspath.startswith(self.egg_root+os.sep):
+            return fspath[len(self.egg_root)+1:].split(os.sep)
+        raise AssertionError(
+            "%s is not a subpath of %s" % (fspath,self.egg_root)
+        )           
 
-    def get_resource_filename(self, manager, resource_name):
+    def get_resource_filename(self, manager, resource_name):       
         if not self.egg_name:
             raise NotImplementedError(
                 "resource_filename() only supported for .egg, not .zip"
             )
-
         # no need to lock for extraction, since we use temp names
+        zip_path = self._resource_to_zip(resource_name)
         eagers = self._get_eager_resources()
-        if resource_name in eagers:
+        if '/'.join(self._parts(zip_path)) in eagers:
             for name in eagers:
-                self._extract_resource(manager, name)
-
-        return self._extract_resource(manager, resource_name)
-
-    def _extract_directory(self, manager, resource_name):
-        if resource_name.endswith('/'):
-            resource_name = resource_name[:-1]
-        for resource in self.resource_listdir(resource_name):
-            last = self._extract_resource(manager, resource_name+'/'+resource)
-        return os.path.dirname(last)    # return the directory path
-
-
+                self._extract_resource(manager, self._eager_to_zip(name))
+        return self._extract_resource(manager, zip_path)
 
-    def _extract_resource(self, manager, resource_name):
-        if self.resource_isdir(resource_name):
-            return self._extract_directory(manager, resource_name)
+    def _extract_resource(self, manager, zip_path):
+        if zip_path in self._index():
+            for name in self._index()[zip_path]:
+                last = self._extract_resource(
+                    manager, os.path.join(zip_path, name)
+                )
+            return os.path.dirname(last)  # return the extracted directory name
 
-        parts = resource_name.split('/')
-        zip_path = os.path.join(self.module_path, *parts)
-        zip_stat = self.zipinfo[os.path.join(*self.prefix+parts)]
+        zip_stat = self.zipinfo[zip_path]
         t,d,size = zip_stat[5], zip_stat[6], zip_stat[3]
         date_time = (
             (d>>9)+1980, (d>>5)&0xF, d&0x1F,                      # ymd
             (t&0xFFFF)>>11, (t>>5)&0x3F, (t&0x1F) * 2, 0, 0, -1   # hms, etc.
         )
         timestamp = time.mktime(date_time)
-        real_path = manager.get_cache_path(self.egg_name, self.prefix+parts)
+        real_path = manager.get_cache_path(self.egg_name, self._parts(zip_path))
 
         if os.path.isfile(real_path):
             stat = os.stat(real_path)
@@ -1060,10 +1062,8 @@
                     # so we're done
                     return real_path
             raise
-
         return real_path
 
-
     def _get_eager_resources(self):
         if self.eagers is None:
             eagers = []
@@ -1077,12 +1077,9 @@
         try:
             return self._dirindex
         except AttributeError:
-            ind = {}; skip = len(self.prefix)
+            ind = {}
             for path in self.zipinfo:
                 parts = path.split(os.sep)
-                if parts[:skip] != self.prefix:
-                    continue    # only include items under our prefix
-                parts = parts[skip:]   # but don't include prefix in paths
                 while parts:
                     parent = '/'.join(parts[:-1])
                     if parent in ind:
@@ -1093,26 +1090,26 @@
             self._dirindex = ind
             return ind
 
-    def _has(self, path):
-        return self._short_name(path) in self.zipinfo or self._isdir(path)
+    def _has(self, fspath):
+        zip_path = self._zipinfo_name(fspath)
+        return zip_path in self.zipinfo or zip_path in self._index()
 
-    def _isdir(self,path):
-        return self._dir_name(path) in self._index()
+    def _isdir(self,fspath):
+        return self._zipinfo_name(fspath) in self._index()
 
-    def _listdir(self,path):
-        return list(self._index().get(self._dir_name(path), ()))
+    def _listdir(self,fspath):
+        return list(self._index().get(self._zipinfo_name(fspath), ()))
 
 
 
 
-    def _dir_name(self,path):
-        if path.startswith(self.module_path+os.sep):
-            path = path[len(self.module_path+os.sep):]
-        path = path.replace(os.sep,'/')
-        if path.endswith('/'): path = path[:-1]
-        return path
 
-    _get = NullProvider._get
+
+    def _eager_to_zip(self,resource_name):
+        return self._zipinfo_name(self._fn(self.egg_root,resource_name))
+
+    def _resource_to_zip(self,resource_name):
+        return self._zipinfo_name(self._fn(self.module_path,resource_name))
 
 register_loader_type(zipimport.zipimporter, ZipProvider)
 
@@ -1146,6 +1143,9 @@
 
 
 
+
+
+
 class PathMetadata(DefaultProvider):
     """Metadata provider for egg directories
 

Index: setuptools.txt
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/setuptools/setuptools.txt,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -d -r1.21 -r1.22
--- setuptools.txt	24 Jul 2005 02:41:43 -0000	1.21
+++ setuptools.txt	24 Jul 2005 17:59:26 -0000	1.22
@@ -180,6 +180,22 @@
     does not contain any code.  See the section below on `Namespace Packages`_
     for more information.
 
+``eager_resources``
+    A list of strings naming resources that should be extracted together, if
+    any of them is needed, or if any C extensions included in the project are
+    imported.  This argument is only useful if the project will be installed as
+    a zipfile, and there is a need to have all of the listed resources be
+    extracted to the filesystem *as a unit*.  Resources listed here
+    should be '/'-separated paths, relative to the source root, so to list a
+    resource ``foo.png`` in package ``bar.baz``, you would include the string
+    ``bar/baz/foo.png`` in this argument.
+
+    If you only need to obtain resources one at a time, or you don't have any C
+    extensions that access other files in the project (such as data files or
+    shared libraries), you probably do NOT need this argument and shouldn't
+    mess with it.  For more details on how this argument works, see the section
+    below on `Automatic Resource Extraction`_.
+
 
 Using ``find_packages()``
 -------------------------
@@ -414,6 +430,7 @@
 __ http://docs.python.org/dist/node11.html 
 
 
+
 Accessing Data Files at Runtime
 -------------------------------
 
@@ -432,6 +449,76 @@
 .. _Accessing Package Resources: http://peak.telecommunity.com/DevCenter/PythonEggs#accessing-package-resources
 
 
+Non-Package Data Files
+----------------------
+
+The ``distutils`` normally install general "data files" to a platform-specific
+location (e.g. ``/usr/share``).  This feature intended to be used for things
+like documentation, example configuration files, and the like.  ``setuptools``
+does not install these data files in a separate location, however.  They are
+bundled inside the egg file or directory, alongside the Python modules and
+packages.  The data files can also be accessed using the `Resource Management
+API`_, by specifying a ``Requirement`` instead of a package name::
+
+    from pkg_resources import Requirement, resource_filename
+    filename = resource_filename(Requirement.parse("MyProject"),"sample.conf")
+
+The above code will obtain the filename of the "sample.conf" file in the data
+root of the "MyProject" distribution.
+
+Note, by the way, that this encapsulation of data files means that you can't
+actually install data files to some arbitrary location on a user's machine;
+this is a feature, not a bug.  You can always include a script in your
+distribution that extracts and copies your the documentation or data files to
+a user-specified location, at their discretion.  If you put related data files
+in a single directory, you can use ``resource_filename()`` with the directory
+name to get a filesystem directory that then can be copied with the ``shutil``
+module.  (Even if your package is installed as a zipfile, calling
+``resource_filename()`` on a directory will return an actual filesystem
+directory, whose contents will be that entire subtree of your distribution.)
+
+(Of course, if you're writing a new package, you can just as easily place your
+data files or directories inside one of your packages, rather than using the
+distutils' approach.  However, if you're updating an existing application, it
+may be simpler not to change the way it currently specifies these data files.)
+
+
+Automatic Resource Extraction
+-----------------------------
+
+If you are using tools that expect your resources to be "real" files, or your
+project includes non-extension native libraries or other files that your C
+extensions expect to be able to access, you may need to list those files in
+the ``eager_resources`` argument to ``setup()``, so that the files will be
+extracted together, whenever a C extension in the project is imported.  This
+is especially important if your project includes shared libraries *other* than
+distutils-built C extensions.  Those shared libraries should be listed as
+``eager_resources``, because they need to be present in the filesystem when the
+C extensions that link to them are used.
+
+The ``pkg_resources`` runtime for compressed packages will automatically
+extract *all* C extensions and ``eager_resources`` at the same time, whenever
+*any* C extension or eager resource is requested via the ``resource_filename()``
+API.  (C extensions are imported using ``resource_filename()`` internally.)
+This ensures that C extensions will see all of the "real" files that they
+expect to see.
+
+Note also that you can list directory resource names in ``eager_resources`` as
+well, in which case the directory's contents (including subdirectories) will be
+extracted whenever any C extension or eager resource is requested.
+
+Please note that if you're not sure whether you need to use this argument, you
+don't!  It's really intended to support projects with lots of non-Python
+dependencies and as a last resort for crufty projects that can't otherwise
+handle being compressed.  If your package is pure Python, Python plus data
+files, or Python plus C, you really don't need this.  You've got to be using
+either C or an external program that needs "real" files in your project before
+there's any possibility of ``eager_resources`` being relevant to your project.
+
+
+
+
+
 "Development Mode"
 ==================
 
@@ -1396,14 +1483,32 @@
  * Fixed the ``--tag-svn-revision`` option of ``egg_info`` not finding the
    latest revision number; it was using the revision number of the directory
    containing ``setup.py``, not the highest revision number in the project.
+
+ * Added ``eager_resources`` setup argument
    
  * Fixed some problems using ``pkg_resources`` w/PEP 302 loaders other than
-   ``zipimport``.
-   
- * Fixed ``pkg_resources.resource_exists()`` not working correctly.
+   ``zipimport``, and the previously-broken "eager resource" support.
+
+ * Fixed ``pkg_resources.resource_exists()`` not working correctly, along with
+   some other resource API bugs.
+
 
  * Many ``pkg_resources`` API changes and enhancements:
 
+   * Resource API functions like ``resource_string()`` that accepted a package
+     name and resource name, will now also accept a ``Requirement`` object in
+     place of the package name (to allow access to non-package data files in
+     an egg).
+
+   * ``get_provider()`` will now accept a ``Requirement`` instance or a module
+     name.  If it is given a ``Requirement``, it will return a corresponding
+     ``Distribution`` (by calling ``require()`` if a suitable distribution
+     isn't already in the working set), rather than returning a metadata and
+     resource provider for a specific module.  (The difference is in how
+     resource paths are interpreted; supplying a module name means resources
+     path will be module-relative, rather than relative to the distribution's
+     root.)
+
    * ``Distribution`` objects now implement the ``IResourceProvider`` and
      ``IMetadataProvider`` interfaces, so you don't need to reference the (no
      longer available) ``metadata`` attribute to get at these interfaces.