[IPython-dev] pydoc and introspective features

Greg Novak novak at ucolick.org
Wed Feb 28 15:53:44 EST 2007


"Fernando Perez" <fperez.net at gmail.com> wrote:
> - search.  This is probably the biggest gripe everyone has with python
> vs. commercial interactive systems (such as Matlab or Mathematica).
> Tab-completion and 'foo?' work great, but if you don't even know where
> to begin looking for something, you're stuck.    A builtin indexing
> system that could be either exposed via a web browser or to a
> command-line program (such as ipython) would be very welcome by a lot
> of users.

While we're on the subject, I humbly submit my slow-as-a-slug but
fairly general code to recursively search python objects looking for
things.  

It looks inside modules, dicts, tuples, lists, and instances looking
for things based on name, value, or docstring.  It's also pretty easy
to extend it either to look inside different objects or else have a
different definition of a 'match.'  

It returns a list of strings that tell you how to get to the thing you
want.  A typical call would be:

aproposName('needle', compoundObject)

returns:
['arg[foo].bar[3]']
Ie: "There's something named 'needle' in the third element of the
attribute named bar of the object with dict key foo in the object
passed as the argument."

I've posted this before--this version fixes major problems (ie, some
things I thought worked didn't work in the previously posted version).

I've also attached test code.  

This is probably more useful as food for thought than for anything
practical.  On the other hand it solves a somewhat more general
problem, being able to look inside live object as opposed to searching
only doc strings.  

Greg
-------------- next part --------------
import unittest;

import apropos as aproposModule
from apropos import *

class AproposTest(unittest.TestCase):
    # Untested functions, but I think it's ok that way:
    # _apropos  apropos

    def testAproposName(self):
        class Composite:
            def __init__(self):
                self.a = 1
                self.foo = 'bar'
                self.b = 3
        self.assertEqual(aproposName('foo', [1,'foo',2]),
                         [])
        self.assertEqual(aproposName('foo', (1,'foo',3)),
                         [])
        self.assertEqual(aproposName('foo', dict(a=1,foo='bar',b=3)),
                         ['arg[foo]'])
        self.assertEqual(aproposName('foo', Composite()),
                         ['arg.foo'])

        lst = aproposName('aproposName', aproposModule, exclude='_')
        self.assertTrue('apropos.aproposName' in lst)
        self.assertTrue('apropos.aproposNameRegexp' in lst)
        self.assertFalse('apropos.__builtins__[_ip].user_ns[aproposName]'
                         in lst)

        self.assertEqual(aproposName('foo', Composite(), name='name'),
                         ['name.foo'])

    def testMaxDepth(self):
        lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3),
                          maxDepth=0)
        self.assertFalse('arg][foo][foo]' in lst)
        self.assertFalse('arg][foo]' in lst)

        lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3),
                          maxDepth=1)
        self.assertFalse('arg[foo][foo]' in lst)
        self.assertTrue('arg[foo]' in lst)

        lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3),
                          maxDepth=2)
        self.assertTrue('arg[foo][foo]' in lst)
        self.assertTrue('arg[foo]' in lst)

        lst = aproposName('foo', dict(foo=dict(foo=1, bar=2), b=3))
        self.assertTrue('arg[foo][foo]' in lst)
        self.assertTrue('arg[foo]' in lst)

    # FIXME -- Sometimes causes bus error?
    def disable_testModuleSearch(self):
        # Sequester the long-running test.
        lst = aproposName('aproposName', aproposModule)
        self.assertTrue('apropos.aproposName' in lst)
        self.assertTrue('apropos.aproposNameRegexp' in lst)
        self.assertTrue('apropos.__builtins__[_ip].user_ns[aproposName]'
                        in lst)

    def testSyntax(self):
        """Functionality has been tested... just make sure that these
        functions can be called"""
        class Composite:
            def __init__(self, str):
                self.__doc__ = str

        self.assertEqual(aproposValue('foo', dict(a=1, bar='foo')),
                         ['arg[bar]'])
        self.assertEqual(aproposDoc('foo', Composite('foo')),
                         ['arg'])
        self.assertEqual(aproposNameRegexp ('^foo', dict(foo=1, barfoo=2)),
                         ['arg[foo]'])
        self.assertEqual(aproposValueRegexp ('^foo', dict(bar='foo',
                                                          the='afoo')),
                         ['arg[bar]'])
        self.assertEqual(aproposDocRegexp ('^foo', Composite('foo')),
                         ['arg'])
        self.assertEqual(aproposDocRegexp ('^foo', Composite('theFoo')),
                         [])
            
    def testNullIntrospector(self):
        i = NullIntrospector()
        # I think this is how this is supposed to work
        self.assertEqual(id(i), id(i.__iter__()))
        self.assertRaises(StopIteration, i.next)

        # make sure code doens't freak out
        i = NullIntrospector(exclude='_')

    def testListIntrospector(self):
        i = ListIntrospector([1,2])
        self.assertEqual(id(i), id(i.__iter__()))
        self.assertEqual(i.next(), (1, None, '[0]'))
        self.assertEqual(i.next(), (2, None, '[1]'))
        self.assertRaises(StopIteration, i.next)

        # make sure code doens't freak out
        i = ListIntrospector([1,2], exclude='_')

    def testInstanceIntrospector(self):
        class Composite:
            pass

        c = Composite()
        c.a = 1
        c.b = 2

        lst = [el for el in InstanceIntrospector(c)]
        # depending on how I'm running the test, one or the other of
        # these should be in the list
        self.assertTrue(('test_apropos', '__module__', '.__module__') in lst
                        or ('__builtin__', '__module__', '.__module__') in lst)
        self.assertTrue((None, '__doc__', '.__doc__') in lst)
        self.assertTrue((1, 'a', '.a') in lst)
        self.assertTrue((2, 'b', '.b') in lst)
        self.assertEqual(len(lst), 4)

        lst = [el for el in InstanceIntrospector(c, exclude='_')]
        self.assertFalse(() in lst)
        self.assertFalse((None, None, '.__doc__') in lst)
        self.assertEqual(len(lst), 2)

    def testDictIntrospector(self):
        lst = [el for el in DictIntrospector(dict(a=1,_b=2))]

        self.assertEqual(len(lst), 2)
        self.assertTrue((1, 'a', '[a]') in lst)
        self.assertTrue((2, '_b', '[_b]') in lst)

        lst = [el for el in DictIntrospector(dict(a=1,_b=2), exclude='_')]
        self.assertEqual(len(lst), 1)
        self.assertTrue((1, 'a', '[a]') in lst)
        self.assertFalse((2, '_b', '[_b]') in lst)            

    def testSearchName(self):
        self.assertTrue(searchName('needle', 'the needle', None))
        self.assertTrue(searchName('needle', 'needle more', None))
        self.assertTrue(searchName('needle', 'the needle more', None))

        # Make sure function doesn't freak out for no name
        self.assertFalse(searchName('needle', None, None))
        
    def testSearchValue(self):
        class Composite:
            def __init__(self, str):
                self._str = str
            def __repr__(self):
                return self._str
            def __str__(self):
                return self._str
            
        self.assertTrue(searchValue('needle', None,
                                    Composite('the needle')))
        self.assertTrue(searchValue('needle', None,
                                    Composite('needle more')))
        self.assertTrue(searchValue('needle', None,
                                    Composite('the needle more')))
        # These are not true because searchValue doens't split
        # apart built-in containers
        self.assertFalse(searchValue('needle', None,
                                    ['needle', 2, 3]))
        self.assertFalse(searchValue('needle', None,
                                    ('needle', 2, 3)))
        self.assertFalse(searchValue('needle', None,
                                    dict(a='needle', b=2, c=3)))

        
    def testSearchDoc(self):   
        class Composite:
            def __init__(self, str):
                self.__doc__ = str

        self.assertTrue(searchDoc('needle', None,
                                  Composite('the needle')))
        self.assertTrue(searchDoc('needle', None,
                                  Composite('needle more')))
        self.assertTrue(searchDoc('needle', None,
                                  Composite('the needle more')))

        # Make sure search fn doesn't freak out
        self.assertFalse(searchDoc('needle', None,
                                   Composite(None)))

        
    def testSearchNameRegexp(self):  
        self.assertFalse(searchNameRegexp('^needle', 'the needle', None))
        self.assertTrue(searchNameRegexp('^needle', 'needle more', None))
        self.assertFalse(searchNameRegexp('^needle', 'the needle more', None))

        # Make sure function doesn't freak out for no name
        self.assertFalse(searchName('^needle', None, None))

    def testSearchValueRegexp(self): 
        class Composite:
            def __init__(self, str):
                self._str = str
            def __repr__(self):
                return self._str
            def __str__(self):
                return self._str
            
        self.assertFalse(searchValueRegexp('^needle', None,
                                           Composite('the needle')))
        self.assertTrue(searchValueRegexp('^needle', None,
                                          Composite('needle more')))
        self.assertFalse(searchValueRegexp('^needle', None,
                                           Composite('the needle more')))

        # Make sure we don't search inside containers
        self.assertFalse(searchValueRegexp('needle', None,
                                           ['needle', 2, 3]))
        self.assertFalse(searchValueRegexp('needle', None,
                                           ('needle', 2, 3)))
        self.assertFalse(searchValueRegexp('needle', None,
                                           dict(a='needle', b=2, c=3)))

    def testSearchDocRegexp(self):   
        class Composite:
            def __init__(self, str):
                self.__doc__ = str

        self.assertFalse(searchDocRegexp('^needle', None,
                                         Composite('the needle')))
        self.assertTrue(searchDocRegexp('^needle', None,
                                        Composite('needle more')))
        self.assertFalse(searchDocRegexp('^needle', None,
                                         Composite('the needle more')))

        # Make sure function doesn't freak out if no doc
        self.assertFalse(searchDocRegexp('^needle', None,
                                         Composite(None)))
        
def suite():
    suites = [unittest.TestLoader().loadTestsFromTestCase(test)
              for test in (AproposTest,)]
    return unittest.TestSuite(suites)

def test():
    unittest.TextTestRunner().run(suite())

def itest():
    suite().debug()
-------------- next part --------------
import types
import re

__version__ = 0.2
__author__ = "Greg Novak <novak at ucolick.org"
# Date: January 14, 2007
# Code released public domain.  Do whatever you want with it.

# You can add your own types to these lists if you want apropos to
# descend into them.  If you have a container that you want apropos to
# search, but it doesn't respond appropriately to the methods listed
# below, you can give it a function called __apropos__.  This function
# takes no arguments and should return an iterator.  The iterator
# should return the contents of the object, as tuples of
# (elementObject, nameString, accessString)

# Must respond to __iter__ and [string].  Designed for things you
# access via [string]
dictTypes = [types.DictType]
# Must respond to __iter__().  Designed for things you access via
# [int]
listTypes = [types.ListType, types.TupleType]
# Must give sensible results to dir(), getattr().  Designed for things
# you access via .
instanceTypes = [types.InstanceType, types.ModuleType]

##################################################
## Interface

## Common Usage
def aproposName(needle, haystack=None, **kw):
    """Recursively search for attributes with where needle is a
    substring of the name.  See apropos() for addtional keyword
    arguments.  Typical usage is aproposName('string', module).

    Return a list of strings showing the path to reach the matching
    object"""
    return apropos(needle, haystack, searchFn=searchName, **kw)

def aproposValue(needle, haystack=None, **kw):
    """Recursively search for attributes with where needle is a
    substring the string representation of the object.  See apropos()
    for addtional keyword arguments.  Typical usage is
    aproposValue('string', module).

    Return a list of strings showing the path to reach the matching
    object"""
    return apropos(needle, haystack, searchFn=searchValue, **kw)

def aproposDoc(needle, haystack=None, **kw):
    """Recursively search for attributes with where needle is a
    substring of the documentation string of the object.  See
    apropos() for addtional keyword arguments.  Typical usage is
    aproposDoc('string', module).

    Return a list of strings showing the path to reach the matching
    object"""
    return apropos(needle, haystack, searchFn=searchDoc, **kw)

def aproposNameRegexp (needle, haystack=None, **kw):
    """Recursively search for attributes with where needle is a regexp
    matching the name.  See apropos() for addtional keyword arguments.
    Typical usage is aproposNameRegexp('string', module).

    Return a list of strings showing the path to reach the matching
    object"""
    return apropos(needle, haystack, searchFn=searchNameRegexp, **kw)

def aproposValueRegexp(needle, haystack=None, **kw):
    """Recursively search for attributes with where needle is a regexp
    matching the string representation of the object.  See apropos()
    for addtional keyword arguments.  Typical usage is
    aproposValueRegexp('string', module).

    Return a list of strings showing the path to reach the matching
    object"""
    return apropos(needle, haystack, searchFn=searchValueRegexp, **kw)

def aproposDocRegexp(needle, haystack=None, **kw):
    """Recursively search for attributes with where needle is a regexp
    matching the docstring of the object.  See apropos() for addtional
    keyword arguments.  Typical usage is aproposDocRegexp('string',
    module).

    Return a list of strings showing the path to reach the matching
    object"""
    return apropos(needle, haystack, searchFn=searchDocRegexp, **kw)

## Handles default values of arguments
def apropos(needle, haystack=None, name=None,
            searchFn=None, **kw):
    """Recursively search through haystack looking for needle.
    Typical usage is apropos('string', module).
    
    haystack can be any python object.  Typically it's a module.  If
    it's not given, it's the dict returned by globals() (ie, watch
    out, it's going to take a while).

    name is the name of the top level object.  It's first bit of the
    'accessor' strings that are returned.  If not specified, defaults
    to 'arg'.
    
    Matches determined by searchFn.  searchFn(needle, name, obj)
    returns true if the object should be considered a match.  By
    default, searchFn matches if needle is a substring of the name of
    the object.

    Return a list of strings showing the path to reach the matching
    object"""
    if haystack is None:
        haystack = globals()
        name = ''
    elif name is None:
        if hasattr(haystack, "__name__"):
            name = haystack.__name__
        else:
            name = 'arg'
    
    if searchFn is None: searchFn = searchName

    return _apropos(needle, haystack, name, searchFn, **kw)

##################################################
## Common search functions

def searchName(needle, name, obj):
    return name and needle in name    

def searchValue(needle, name, obj):
    # String representation of dicts, lists, and tuples includes the
    # objects within them, so don't consider that to be a match on the
    # desired value.  Wait to get inside the container class...
    #
    # TODO What I really want to do is match the container if none of
    # its contents matched.
    if type(obj) not in (types.TupleType, types.ListType,
                         types.DictType):
        return needle in str(obj)
# NOTE -- should be repr()?

def searchDoc(needle, name, obj):
    return hasattr(obj, '__doc__') and obj.__doc__ \
           and needle in obj.__doc__
    
def searchNameRegexp(needle, name, obj):
    return name and re.search(needle, name)

def searchValueRegexp(needle, name, obj):
    if type(obj) not in (types.TupleType, types.ListType,
                         types.DictType):
        return re.search(needle, str(obj))

def searchDocRegexp(needle, name, obj):
    return hasattr(obj, '__doc__') \
           and obj.__doc__ \
           and re.search(needle, obj.__doc__)

##################################################
## The guts

def _apropos(needle, haystack, haystackName,
             searchFn, maxDepth=None, **kw):
    """Recursively search through haystack looking for needle.

    haystack can be any python object.  Typically it's a module.  If
    it's not given, it's the dict returned by globals() (ie, watch
    out, it's going to take a while).
    
    Matches determined by searchFn.  searchFn(needle, name, obj)
    returns true if the object should be considered a match.  By
    default, searchFn matches if needle is a substring of the name of
    the object.  

    name is the name of the top level object.  It's first bit of the
    'accessor' strings that are returned.  If not specified, defaults
    to 'arg'.

    Return a list of strings showing the path to reach the matching
    object."""
    def search(haystack, haystackName, fullName, depth):
        '''Free variable: needle, searchTypes'''
        # print "Searched", len(searchedIds), "Searching", depth, fullName
        if searchFn(needle, haystackName, haystack):
            found.append(fullName)

        # break apart if obj is not already searched
        if type(haystack) in searchTypes \
                and (not maxDepth or depth < maxDepth) \
                and id(haystack) not in searchedIds:
            # Prevent loops with circular references by setting this
            # _before_ descending into sub-objects
            searchedIds.append(id(haystack))

            for hay, hayName, hayAccess in introspect(haystack, **kw):
                search(hay, hayName, fullName + hayAccess, depth+1)

    searchedIds = []
    found = []
    searchTypes = dictTypes + listTypes + instanceTypes

    search(haystack, haystackName, haystackName, 0)
    return found

def introspect(obj, **kw):
    if type(obj) in dictTypes:
        return DictIntrospector(obj, **kw)
    if type(obj) in listTypes:
        return ListIntrospector(obj, **kw)
    if type(obj) in instanceTypes:
        return InstanceIntrospector(obj, **kw)

    # User objects
    if hasattr(obj, '__apropos__'):
        return obj.__apropos__(**kw)

    # Stymied
    print "apropos.py: Warning, don't know how to deal with " + str(obj)
    return NullIntrospector()

# NOTE These introspectors simplify the code, but they seem to take about five
# times as long, very unfortunately.
class Introspector (object):
    def __iter__(self):
        return self

    def next(self):
        pass

class NullIntrospector (Introspector):
    def __init__(self, **kw):
        pass

    def next(self):
        raise StopIteration

class DictIntrospector (Introspector):
    # types that respond to __iter__, obj.[key] to get a value
    def __init__(self, dict, exclude=None):
        self.dict = dict
        self.iter = self.dict.__iter__()        
        self.exclude = exclude
        
    def next(self):
        # return tuple of obj, name, accessName
        k = self.iter.next()
        # FIXME -- completely skip non-string key entries
        while type(k) is not types.StringType \
              or (self.exclude and k.startswith(self.exclude)):
            k = self.iter.next()
        return self.dict[k], k, '[' + k + ']'

class ListIntrospector (Introspector):
    # types that respond to __iter__
    def __init__(self, list, exclude=None):
        self.list = list
        self.iter = self.list.__iter__()
        self.i = 0

    def next(self):
        # return tuple of obj, name, accessName
        self.i += 1
        return self.iter.next(), None, '[' + str(self.i-1) + ']'

class InstanceIntrospector (Introspector):
    # classes that respond to dir and getattr
    def __init__(self, inst, exclude=None):
        self.inst = inst
        self.iter = dir(self.inst).__iter__()
        self.exclude = exclude

    def next(self):
        # return tuple of obj, name, accessName

        # IPython structs allow non-string attributes.  Filter them
        # out because they cause problems.  That is, you have to
        # access them via obj[1], not getattr(obj, 1) or
        # getattr(obj, '1')    
        # FIXME -- filter out non-string things that appear in dir()

        name = self.iter.next()
        while type(name) is not types.StringType \
              or (self.exclude and name.startswith(self.exclude)):
            name = self.iter.next()
        return getattr(self.inst, name), name, "." + name



More information about the IPython-dev mailing list