[Python-checkins] python/dist/src/Lib modulefinder.py,1.1,1.2

jvr@users.sourceforge.net jvr@users.sourceforge.net
Tue, 31 Dec 2002 08:33:03 -0800


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv23955/Lib

Modified Files:
	modulefinder.py 
Log Message:
patch attached to sf item #643711:
any_missing() returns less bogus missing modules.

- I've rewritten scan_code() more or less from scratch,
factored bits and pieces out for readability.
- keep track of global assignments and failed imports per
module; use this to determine whether the Y in "from X
import Y" is a submodule or just a global name. This is not
100% doable: you can't tell which symbols are imported when
doing a star import of a non-Python module short of actually
importing it.
- added a new method to ModuleFinder: any_missing_maybe(),
which returns *two* lists, one with certain misses, one with
possible misses. The possible misses are *very* often false
alarms, so it's useful to keep this list separate.
any_misses() now simply returns the union of
any_missing_maybe().

TODO: documentation, test_modulefinder.py


Index: modulefinder.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/modulefinder.py,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -d -r1.1 -r1.2
*** modulefinder.py	31 Dec 2002 16:27:33 -0000	1.1
--- modulefinder.py	31 Dec 2002 16:33:00 -0000	1.2
***************
*** 16,25 ****
      READ_MODE = "r"
  
  IMPORT_NAME = dis.opname.index('IMPORT_NAME')
- IMPORT_FROM = dis.opname.index('IMPORT_FROM')
  STORE_NAME = dis.opname.index('STORE_NAME')
- STORE_FAST = dis.opname.index('STORE_FAST')
  STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
! STORE_OPS = [STORE_NAME, STORE_FAST, STORE_GLOBAL]
  
  # Modulefinder does a good job at simulating Python's, but it can not
--- 16,24 ----
      READ_MODE = "r"
  
+ LOAD_CONST = dis.opname.index('LOAD_CONST')
  IMPORT_NAME = dis.opname.index('IMPORT_NAME')
  STORE_NAME = dis.opname.index('STORE_NAME')
  STORE_GLOBAL = dis.opname.index('STORE_GLOBAL')
! STORE_OPS = [STORE_NAME, STORE_GLOBAL]
  
  # Modulefinder does a good job at simulating Python's, but it can not
***************
*** 55,58 ****
--- 54,64 ----
          self.__path__ = path
          self.__code__ = None
+         # The set of global names that are assigned to in the module.
+         # This includes those names imported through starimports of
+         # Python modules.
+         self.globalnames = {}
+         # The set of starimports this module did that could not be
+         # resolved, ie. a starimport from a non-Python module.
+         self.starimports = {}
  
      def __repr__(self):
***************
*** 67,71 ****
  class ModuleFinder:
  
!     def __init__(self, path=None, debug=0, excludes = [], replace_paths = []):
          if path is None:
              path = sys.path
--- 73,77 ----
  class ModuleFinder:
  
!     def __init__(self, path=None, debug=0, excludes=[], replace_paths=[]):
          if path is None:
              path = sys.path
***************
*** 234,239 ****
          if self.badmodules.has_key(fqname):
              self.msgout(3, "import_module -> None")
-             if parent:
-                 self.badmodules[fqname][parent.__name__] = None
              return None
          try:
--- 240,243 ----
***************
*** 278,286 ****
          return m
  
      def scan_code(self, co, m):
          code = co.co_code
          n = len(code)
          i = 0
!         lastname = None
          while i < n:
              c = code[i]
--- 282,318 ----
          return m
  
+     def _add_badmodule(self, name, caller):
+         if name not in self.badmodules:
+             self.badmodules[name] = {}
+         self.badmodules[name][caller.__name__] = 1
+ 
+     def _safe_import_hook(self, name, caller, fromlist):
+         # wrapper for self.import_hook() that won't raise ImportError
+         if name in self.badmodules:
+             self._add_badmodule(name, caller)
+             return
+         try:
+             self.import_hook(name, caller)
+         except ImportError, msg:
+             self.msg(2, "ImportError:", str(msg))
+             self._add_badmodule(name, caller)
+         else:
+             if fromlist:
+                 for sub in fromlist:
+                     if sub in self.badmodules:
+                         self._add_badmodule(sub, caller)
+                         continue
+                     try:
+                         self.import_hook(name, caller, [sub])
+                     except ImportError, msg:
+                         self.msg(2, "ImportError:", str(msg))
+                         fullname = name + "." + sub
+                         self._add_badmodule(fullname, caller)
+ 
      def scan_code(self, co, m):
          code = co.co_code
          n = len(code)
          i = 0
!         fromlist = None
          while i < n:
              c = code[i]
***************
*** 290,320 ****
                  oparg = ord(code[i]) + ord(code[i+1])*256
                  i = i+2
!             if op == IMPORT_NAME:
!                 name = lastname = co.co_names[oparg]
!                 if not self.badmodules.has_key(lastname):
!                     try:
!                         self.import_hook(name, m)
!                     except ImportError, msg:
!                         self.msg(2, "ImportError:", str(msg))
!                         if not self.badmodules.has_key(name):
!                             self.badmodules[name] = {}
!                         self.badmodules[name][m.__name__] = None
!             elif op == IMPORT_FROM:
                  name = co.co_names[oparg]
!                 assert lastname is not None
!                 if not self.badmodules.has_key(lastname):
!                     try:
!                         self.import_hook(lastname, m, [name])
!                     except ImportError, msg:
!                         self.msg(2, "ImportError:", str(msg))
!                         fullname = lastname + "." + name
!                         if not self.badmodules.has_key(fullname):
!                             self.badmodules[fullname] = {}
!                         self.badmodules[fullname][m.__name__] = None
              elif op in STORE_OPS:
!                 # Skip; each IMPORT_FROM is followed by a STORE_* opcode
!                 pass
!             else:
!                 lastname = None
          for c in co.co_consts:
              if isinstance(c, type(co)):
--- 322,362 ----
                  oparg = ord(code[i]) + ord(code[i+1])*256
                  i = i+2
!             if op == LOAD_CONST:
!                 # An IMPORT_NAME is always preceded by a LOAD_CONST, it's
!                 # a tuple of "from" names, or None for a regular import.
!                 # The tuple may contain "*" for "from <mod> import *"
!                 fromlist = co.co_consts[oparg]
!             elif op == IMPORT_NAME:
!                 assert fromlist is None or type(fromlist) is tuple
                  name = co.co_names[oparg]
!                 have_star = 0
!                 if fromlist is not None:
!                     if "*" in fromlist:
!                         have_star = 1
!                     fromlist = [f for f in fromlist if f != "*"]
!                 self._safe_import_hook(name, m, fromlist)
!                 if have_star:
!                     # We've encountered an "import *". If it is a Python module,
!                     # the code has already been parsed and we can suck out the
!                     # global names.
!                     mm = None
!                     if m.__path__:
!                         # At this point we don't know whether 'name' is a
!                         # submodule of 'm' or a global module. Let's just try
!                         # the full name first.
!                         mm = self.modules.get(m.__name__ + "." + name)
!                     if mm is None:
!                         mm = self.modules.get(name)
!                     if mm is not None:
!                         m.globalnames.update(mm.globalnames)
!                         m.starimports.update(mm.starimports)
!                         if mm.__code__ is None:
!                             m.starimports[name] = 1
!                     else:
!                         m.starimports[name] = 1
              elif op in STORE_OPS:
!                 # keep track of all global names that are assigned to
!                 name = co.co_names[oparg]
!                 m.globalnames[name] = 1
          for c in co.co_consts:
              if isinstance(c, type(co)):
***************
*** 361,364 ****
--- 403,409 ----
  
      def report(self):
+         """Print a report to stdout, listing the found modules with their
+         paths, as well as modules that are missing, or seem to be missing.
+         """
          print
          print "  %-25s %s" % ("Name", "File")
***************
*** 368,371 ****
--- 413,417 ----
          keys.sort()
          for key in keys:
+             continue
              m = self.modules[key]
              if m.__path__:
***************
*** 376,406 ****
  
          # Print missing modules
!         keys = self.badmodules.keys()
!         keys.sort()
!         for key in keys:
!             # ... but not if they were explicitly excluded.
!             if key not in self.excludes:
!                 mods = self.badmodules[key].keys()
                  mods.sort()
!                 print "?", key, "from", ', '.join(mods)
  
      def any_missing(self):
!         keys = self.badmodules.keys()
          missing = []
!         for key in keys:
!             if key not in self.excludes:
!                 # Missing, and its not supposed to be
!                 missing.append(key)
!         return missing
  
      def replace_paths_in_code(self, co):
          new_filename = original_filename = os.path.normpath(co.co_filename)
!         for f,r in self.replace_paths:
              if original_filename.startswith(f):
!                 new_filename = r+original_filename[len(f):]
                  break
  
          if self.debug and original_filename not in self.processed_paths:
!             if new_filename!=original_filename:
                  self.msgout(2, "co_filename %r changed to %r" \
                                      % (original_filename,new_filename,))
--- 422,506 ----
  
          # Print missing modules
!         missing, maybe = self.any_missing_maybe()
!         if missing:
!             print
!             print "Missing modules:"
!             for name in missing:
!                 mods = self.badmodules[name].keys()
                  mods.sort()
!                 print "?", name, "imported from", ', '.join(mods)
!         # Print modules that may be missing, but then again, maybe not...
!         if maybe:
!             print
!             print "Submodules thay appear to be missing, but could also be",
!             print "global names in the parent package:"
!             for name in maybe:
!                 mods = self.badmodules[name].keys()
!                 mods.sort()
!                 print "?", name, "imported from", ', '.join(mods)
  
      def any_missing(self):
!         """Return a list of modules that appear to be missing. Use
!         any_missing_maybe() if you want to know which modules are
!         certain to be missing, and which *may* be missing.
!         """
!         missing, maybe = self.any_missing_maybe()
!         return missing + maybe
! 
!     def any_missing_maybe(self):
!         """Return two lists, one with modules that are certainly missing
!         and one with modules that *may* be missing. The latter names could
!         either be submodules *or* just global names in the package.
! 
!         The reason it can't always be determined is that it's impossible to
!         tell which names are imported when "from module import *" is done
!         with an extension module, short of actually importing it.
!         """
          missing = []
!         maybe = []
!         for name in self.badmodules:
!             if name in self.excludes:
!                 continue
!             i = name.rfind(".")
!             if i < 0:
!                 missing.append(name)
!                 continue
!             subname = name[i+1:]
!             pkgname = name[:i]
!             pkg = self.modules.get(pkgname)
!             if pkg is not None:
!                 if pkgname in self.badmodules[name]:
!                     # The package tried to import this module itself and
!                     # failed. It's definitely missing.
!                     missing.append(name)
!                 elif subname in pkg.globalnames:
!                     # It's a global in the package: definitely not missing.
!                     pass
!                 elif pkg.starimports:
!                     # It could be missing, but the package did an "import *"
!                     # from a non-Python module, so we simply can't be sure.
!                     maybe.append(name)
!                 else:
!                     # It's not a global in the package, the package didn't
!                     # do funny star imports, it's very likely to be missing.
!                     # The symbol could be inserted into the package from the
!                     # outside, but since that's not good style we simply list
!                     # it missing.
!                     missing.append(name)
!             else:
!                 missing.append(name)
!         missing.sort()
!         maybe.sort()
!         return missing, maybe
  
      def replace_paths_in_code(self, co):
          new_filename = original_filename = os.path.normpath(co.co_filename)
!         for f, r in self.replace_paths:
              if original_filename.startswith(f):
!                 new_filename = r + original_filename[len(f):]
                  break
  
          if self.debug and original_filename not in self.processed_paths:
!             if new_filename != original_filename:
                  self.msgout(2, "co_filename %r changed to %r" \
                                      % (original_filename,new_filename,))
***************
*** 478,486 ****
      mf.run_script(script)
      mf.report()
  
  
  if __name__ == '__main__':
      try:
!         test()
      except KeyboardInterrupt:
          print "\n[interrupt]"
--- 578,587 ----
      mf.run_script(script)
      mf.report()
+     return mf  # for -i debugging
  
  
  if __name__ == '__main__':
      try:
!         mf = test()
      except KeyboardInterrupt:
          print "\n[interrupt]"