[Python-checkins] r45510 - python/trunk/Lib/pkgutil.py python/trunk/Lib/pydoc.py

phillip.eby python-checkins at python.org
Tue Apr 18 02:59:56 CEST 2006


Author: phillip.eby
Date: Tue Apr 18 02:59:55 2006
New Revision: 45510

Modified:
   python/trunk/Lib/pkgutil.py
   python/trunk/Lib/pydoc.py
Log:
Second phase of refactoring for runpy, pkgutil, pydoc, and setuptools
to share common PEP 302 support code, as described here:

http://mail.python.org/pipermail/python-dev/2006-April/063724.html

pydoc now supports PEP 302 importers, by way of utility functions in
pkgutil, such as 'walk_packages()'.  It will properly document
modules that are in zip files, and is backward compatible to Python
2.3 (setuptools installs for Python <2.5 will bundle it so pydoc
doesn't break when used with eggs.)

What has not changed is that pydoc command line options do not support
zip paths or other importer paths, and the webserver index does not
support sys.meta_path.  Those are probably okay as limitations.

Tasks remaining: write docs and Misc/NEWS for pkgutil/pydoc changes,
and update setuptools to use pkgutil wherever possible, then add it
to the stdlib.


Modified: python/trunk/Lib/pkgutil.py
==============================================================================
--- python/trunk/Lib/pkgutil.py	(original)
+++ python/trunk/Lib/pkgutil.py	Tue Apr 18 02:59:55 2006
@@ -11,6 +11,7 @@
 
 __all__ = [
     'get_importer', 'iter_importers', 'get_loader', 'find_loader',
+    'walk_packages', 'iter_modules',
     'ImpImporter', 'ImpLoader', 'read_code', 'extend_path',
 ]
 
@@ -27,6 +28,95 @@
     return marshal.load(stream)
 
 
+def simplegeneric(func):
+    """Make a trivial single-dispatch generic function"""
+    registry = {}
+    def wrapper(*args,**kw):
+        ob = args[0]
+        try:
+            cls = ob.__class__
+        except AttributeError:
+            cls = type(ob)
+        try:
+            mro = cls.__mro__
+        except AttributeError:
+            try:
+                class cls(cls,object): pass
+                mro = cls.__mro__[1:]
+            except TypeError:
+                mro = object,   # must be an ExtensionClass or some such  :(               
+        for t in mro:
+            if t in registry:
+                return registry[t](*args,**kw)
+        else:
+            return func(*args,**kw)
+    try:
+        wrapper.__name__ = func.__name__
+    except (TypeError,AttributeError):
+        pass    # Python 2.3 doesn't allow functions to be renamed
+
+    def register(typ, func=None):
+        if func is None:
+            return lambda f: register(typ, f)
+        registry[typ] = func
+        return func
+
+    wrapper.__dict__ = func.__dict__
+    wrapper.__doc__ = func.__doc__
+    wrapper.register = register
+    return wrapper        
+
+
+def walk_packages(path=None, prefix='', onerror=None):
+    """Yield submodule names+loaders recursively, for path or sys.path"""
+
+    def seen(p,m={}):
+        if p in m: return True
+        m[p] = True
+
+    for importer, name, ispkg in iter_modules(path, prefix):
+        yield importer, name, ispkg
+
+        if ispkg:
+            try:
+                __import__(name)
+            except ImportError:
+                if onerror is not None:
+                    onerror()
+            else:
+                path = getattr(sys.modules[name], '__path__', None) or []
+
+                # don't traverse path items we've seen before
+                path = [p for p in path if not seen(p)]
+
+                for item in walk_packages(path, name+'.'):
+                    yield item
+
+
+def iter_modules(path=None, prefix=''):
+    """Yield submodule names+loaders for path or sys.path"""
+    if path is None:
+        importers = iter_importers()
+    else:
+        importers = map(get_importer, path)
+
+    yielded = {}
+    for i in importers:
+        for name, ispkg in iter_importer_modules(i, prefix):
+            if name not in yielded:
+                yielded[name] = 1
+                yield i, name, ispkg
+
+
+#@simplegeneric
+def iter_importer_modules(importer, prefix=''):
+    if not hasattr(importer,'iter_modules'):
+        return []
+    return importer.iter_modules(prefix)
+
+iter_importer_modules = simplegeneric(iter_importer_modules)
+
+
 class ImpImporter:
     """PEP 302 Importer that wraps Python's "classic" import algorithm
 
@@ -49,13 +139,45 @@
         if self.path is None:
             path = None
         else:
-            path = [self.path]
+            path = [os.path.realpath(self.path)]
         try:
             file, filename, etc = imp.find_module(subname, path)
         except ImportError:
             return None
         return ImpLoader(fullname, file, filename, etc)
 
+    def iter_modules(self, prefix=''):
+        if self.path is None or not os.path.isdir(self.path):
+            return
+
+        yielded = {}
+        import inspect
+
+        filenames = os.listdir(self.path)
+        filenames.sort()  # handle packages before same-named modules
+
+        for fn in filenames:
+            modname = inspect.getmodulename(fn)
+            if modname=='__init__' or modname in yielded:
+                continue
+            
+            path = os.path.join(self.path, fn)
+            ispkg = False
+
+            if not modname and os.path.isdir(path) and '.' not in fn:
+                modname = fn
+                for fn in os.listdir(path):
+                    subname = inspect.getmodulename(fn)
+                    if subname=='__init__':
+                        ispkg = True
+                        break
+                else:
+                    continue    # not a package
+
+            if modname and '.' not in modname:
+                yielded[modname] = 1
+                yield prefix + modname, ispkg
+
 
 class ImpLoader:
     """PEP 302 Loader that wraps Python's "classic" import algorithm
@@ -97,7 +219,8 @@
                               "module %s" % (self.fullname, fullname))
         return fullname
 
-    def is_package(self):
+    def is_package(self, fullname):
+        fullname = self._fix_name(fullname)
         return self.etc[2]==imp.PKG_DIRECTORY
 
     def get_code(self, fullname=None):
@@ -136,6 +259,7 @@
                 self.source = self._get_delegate().get_source()
         return self.source
 
+
     def _get_delegate(self):
         return ImpImporter(self.filename).find_module('__init__')
 
@@ -149,6 +273,45 @@
         return None
 
 
+try:
+    import zipimport
+    from zipimport import zipimporter
+    
+    def iter_zipimport_modules(importer, prefix=''):
+        dirlist = zipimport._zip_directory_cache[importer.archive].keys()
+        dirlist.sort()
+        _prefix = importer.prefix
+        plen = len(_prefix)
+        yielded = {}
+        import inspect
+        for fn in dirlist:
+            if not fn.startswith(_prefix):
+                continue
+
+            fn = fn[plen:].split(os.sep)
+
+            if len(fn)==2 and fn[1].startswith('__init__.py'):
+                if fn[0] not in yielded:
+                    yielded[fn[0]] = 1
+                    yield fn[0], True
+
+            if len(fn)!=1:
+                continue
+
+            modname = inspect.getmodulename(fn[0])
+            if modname=='__init__':
+                continue
+
+            if modname and '.' not in modname and modname not in yielded:
+                yielded[modname] = 1
+                yield prefix + modname, False
+
+    iter_importer_modules.register(zipimporter, iter_zipimport_modules)
+
+except ImportError:
+    pass
+
+
 def get_importer(path_item):
     """Retrieve a PEP 302 importer for the given path item
 
@@ -183,7 +346,7 @@
     return importer
 
 
-def iter_importers(fullname):
+def iter_importers(fullname=""):
     """Yield PEP 302 importers for the given module name
 
     If fullname contains a '.', the importers will be for the package
@@ -224,7 +387,6 @@
     if '.' not in fullname:
         yield ImpImporter()
 
-
 def get_loader(module_or_name):
     """Get a PEP 302 "loader" object for module_or_name
 
@@ -250,7 +412,6 @@
         fullname = module_or_name
     return find_loader(fullname)
 
-
 def find_loader(fullname):
     """Find a PEP 302 "loader" object for fullname
 

Modified: python/trunk/Lib/pydoc.py
==============================================================================
--- python/trunk/Lib/pydoc.py	(original)
+++ python/trunk/Lib/pydoc.py	Tue Apr 18 02:59:55 2006
@@ -52,10 +52,16 @@
 #     the current directory is changed with os.chdir(), an incorrect
 #     path will be displayed.
 
-import sys, imp, os, re, types, inspect, __builtin__
+import sys, imp, os, re, types, inspect, __builtin__, pkgutil
 from repr import Repr
 from string import expandtabs, find, join, lower, split, strip, rfind, rstrip
-from collections import deque
+try:
+    from collections import deque
+except ImportError:
+    # Python 2.3 compatibility
+    class deque(list):
+        def popleft(self):
+            return self.pop(0)
 
 # --------------------------------------------------------- common routines
 
@@ -182,6 +188,23 @@
                 return True
     return False
 
+def source_synopsis(file):
+    line = file.readline()
+    while line[:1] == '#' or not strip(line):
+        line = file.readline()
+        if not line: break
+    line = strip(line)
+    if line[:4] == 'r"""': line = line[1:]
+    if line[:3] == '"""':
+        line = line[3:]
+        if line[-1:] == '\\': line = line[:-1]
+        while not strip(line):
+            line = file.readline()
+            if not line: break
+        result = strip(split(line, '"""')[0])
+    else: result = None
+    return result
+
 def synopsis(filename, cache={}):
     """Get the one-line summary out of a module file."""
     mtime = os.stat(filename).st_mtime
@@ -196,24 +219,11 @@
         if info and 'b' in info[2]: # binary modules have to be imported
             try: module = imp.load_module('__temp__', file, filename, info[1:])
             except: return None
-            result = split(module.__doc__ or '', '\n')[0]
+            result = (module.__doc__ or '').splitlines()[0]
             del sys.modules['__temp__']
         else: # text modules can be directly examined
-            line = file.readline()
-            while line[:1] == '#' or not strip(line):
-                line = file.readline()
-                if not line: break
-            line = strip(line)
-            if line[:4] == 'r"""': line = line[1:]
-            if line[:3] == '"""':
-                line = line[3:]
-                if line[-1:] == '\\': line = line[:-1]
-                while not strip(line):
-                    line = file.readline()
-                    if not line: break
-                result = strip(split(line, '"""')[0])
-            else: result = None
-        file.close()
+            result = source_synopsis(file)
+            file.close()
         cache[filename] = (mtime, result)
     return result
 
@@ -643,16 +653,8 @@
 
         if hasattr(object, '__path__'):
             modpkgs = []
-            modnames = []
-            for file in os.listdir(object.__path__[0]):
-                path = os.path.join(object.__path__[0], file)
-                modname = inspect.getmodulename(file)
-                if modname != '__init__':
-                    if modname and modname not in modnames:
-                        modpkgs.append((modname, name, 0, 0))
-                        modnames.append(modname)
-                    elif ispackage(path):
-                        modpkgs.append((file, name, 1, 0))
+            for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
+                modpkgs.append((modname, name, ispkg, 0))
             modpkgs.sort()
             contents = self.multicolumn(modpkgs, self.modpkglink)
             result = result + self.bigsection(
@@ -796,7 +798,10 @@
             tag += ':<br>\n'
 
             # Sort attrs by name.
-            attrs.sort(key=lambda t: t[0])
+            try:
+                attrs.sort(key=lambda t: t[0])
+            except TypeError:
+                attrs.sort(lambda t1, t2: cmp(t1[0], t2[0]))    # 2.3 compat
 
             # Pump out the attrs, segregated by kind.
             attrs = spill('Methods %s' % tag, attrs,
@@ -914,25 +919,9 @@
         """Generate an HTML index for a directory of modules."""
         modpkgs = []
         if shadowed is None: shadowed = {}
-        seen = {}
-        files = os.listdir(dir)
-
-        def found(name, ispackage,
-                  modpkgs=modpkgs, shadowed=shadowed, seen=seen):
-            if name not in seen:
-                modpkgs.append((name, '', ispackage, name in shadowed))
-                seen[name] = 1
-                shadowed[name] = 1
-
-        # Package spam/__init__.py takes precedence over module spam.py.
-        for file in files:
-            path = os.path.join(dir, file)
-            if ispackage(path): found(file, 1)
-        for file in files:
-            path = os.path.join(dir, file)
-            if os.path.isfile(path):
-                modname = inspect.getmodulename(file)
-                if modname: found(modname, 0)
+        for importer, name, ispkg in pkgutil.iter_modules([dir]):
+            modpkgs.append((name, '', ispkg, name in shadowed))
+            shadowed[name] = 1
 
         modpkgs.sort()
         contents = self.multicolumn(modpkgs, self.modpkglink)
@@ -1059,14 +1048,12 @@
 
         if hasattr(object, '__path__'):
             modpkgs = []
-            for file in os.listdir(object.__path__[0]):
-                path = os.path.join(object.__path__[0], file)
-                modname = inspect.getmodulename(file)
-                if modname != '__init__':
-                    if modname and modname not in modpkgs:
-                        modpkgs.append(modname)
-                    elif ispackage(path):
-                        modpkgs.append(file + ' (package)')
+            for importer, modname, ispkg in pkgutil.iter_modules(object.__path__):
+                if ispkg:
+                    modpkgs.append(modname + ' (package)')
+                else:
+                    modpkgs.append(modname)
+
             modpkgs.sort()
             result = result + self.section(
                 'PACKAGE CONTENTS', join(modpkgs, '\n'))
@@ -1490,20 +1477,9 @@
 def writedocs(dir, pkgpath='', done=None):
     """Write out HTML documentation for all modules in a directory tree."""
     if done is None: done = {}
-    for file in os.listdir(dir):
-        path = os.path.join(dir, file)
-        if ispackage(path):
-            writedocs(path, pkgpath + file + '.', done)
-        elif os.path.isfile(path):
-            modname = inspect.getmodulename(path)
-            if modname:
-                if modname == '__init__':
-                    modname = pkgpath[:-1] # remove trailing period
-                else:
-                    modname = pkgpath + modname
-                if modname not in done:
-                    done[modname] = 1
-                    writedoc(modname)
+    for importer, modname, ispkg in pkgutil.walk_packages([dir], pkgpath):
+        writedoc(modname)
+    return
 
 class Helper:
     keywords = {
@@ -1830,30 +1806,9 @@
             self.state.append((child, self.children(child)))
         return child
 
-class ModuleScanner(Scanner):
+
+class ModuleScanner:
     """An interruptible scanner that searches module synopses."""
-    def __init__(self):
-        roots = map(lambda dir: (dir, ''), pathdirs())
-        Scanner.__init__(self, roots, self.submodules, self.isnewpackage)
-        self.inodes = map(lambda (dir, pkg): os.stat(dir).st_ino, roots)
-
-    def submodules(self, (dir, package)):
-        children = []
-        for file in os.listdir(dir):
-            path = os.path.join(dir, file)
-            if ispackage(path):
-                children.append((path, package + (package and '.') + file))
-            else:
-                children.append((path, package))
-        children.sort() # so that spam.py comes before spam.pyc or spam.pyo
-        return children
-
-    def isnewpackage(self, (dir, package)):
-        inode = os.path.exists(dir) and os.stat(dir).st_ino
-        if not (os.path.islink(dir) and inode in self.inodes):
-            self.inodes.append(inode) # detect circular symbolic links
-            return ispackage(dir)
-        return False
 
     def run(self, callback, key=None, completer=None):
         if key: key = lower(key)
@@ -1870,22 +1825,31 @@
                     if find(lower(modname + ' - ' + desc), key) >= 0:
                         callback(None, modname, desc)
 
-        while not self.quit:
-            node = self.next()
-            if not node: break
-            path, package = node
-            modname = inspect.getmodulename(path)
-            if os.path.isfile(path) and modname:
-                modname = package + (package and '.') + modname
-                if not modname in seen:
-                    seen[modname] = 1 # if we see spam.py, skip spam.pyc
-                    if key is None:
-                        callback(path, modname, '')
+        for importer, modname, ispkg in pkgutil.walk_packages():
+            if self.quit:
+                break
+            if key is None:
+                callback(None, modname, '')
+            else:
+                loader = importer.find_module(modname)
+                if hasattr(loader,'get_source'):
+                    import StringIO
+                    desc = source_synopsis(
+                        StringIO.StringIO(loader.get_source(modname))
+                    ) or ''
+                    if hasattr(loader,'get_filename'):
+                        path = loader.get_filename(modname)
                     else:
-                        desc = synopsis(path) or ''
-                        if find(lower(modname + ' - ' + desc), key) >= 0:
-                            callback(path, modname, desc)
-        if completer: completer()
+                        path = None
+                else:
+                    module = loader.load_module(modname)
+                    desc = (module.__doc__ or '').splitlines()[0]
+                    path = getattr(module,'__file__',None)
+                if find(lower(modname + ' - ' + desc), key) >= 0:
+                    callback(path, modname, desc)
+
+        if completer:
+            completer()
 
 def apropos(key):
     """Print all the one-line module summaries that contain a substring."""
@@ -1950,7 +1914,7 @@
                     'Built-in Modules', '#ffffff', '#ee77aa', contents)]
 
                 seen = {}
-                for dir in pathdirs():
+                for dir in sys.path:
                     indices.append(html.index(dir, seen))
                 contents = heading + join(indices) + '''<p align=right>
 <font color="#909090" face="helvetica, arial"><strong>


More information about the Python-checkins mailing list