ANN: Python Language Reference

Fri Dec 26 17:56:08 EST 2003

Stephen Ferg wrote:
> An attempt to produce a complete, alphabetized reference of all of
> Python's language features. The purpose is support developers, who
> need a quick way to look up information about a language feature.
> 
> The table of contents was extracted from:
>  * the index of the language reference 
>  * the index of the library reference 
>  * the global module index 
> 
> http://www.ferg.org/pyref/index.html

Neat.

I have some code I find to be very useful. It searches all the Python 
documentation trying to match a regular expression. HTML in the docs is 
ignored. The result is formatted in HTML and put where my browser can 
find it.

==============================================================
docsdata.py:

#! /usr/bin/env python
from __future__ import generators

import os, sys, time, re, htmllib, formatter, cStringIO, string, cPickle

""" Strips html from Python docs.

  ./docsdata.py <dir> <datafile>
"""

# Look at the source code for htmllib.
class Parser(htmllib.HTMLParser):
     def __init__(self, formatter, verbose=0):
         htmllib.HTMLParser.__init__(self, formatter, verbose)

     # Print nothing for </a>.
     def anchor_end(self):
         if self.anchor:
             self.anchor = None

# Look at the source code for formatter.
class StripWriter(formatter.DumbWriter):
     def __init__(self, f=None, maxcol=72):
         formatter.DumbWriter.__init__(self, f, maxcol)

     # Ignore horizontal rules.
     def send_hor_rule(self, *args, **kw):
         self.file.write('\n\n')
         self.col = 0
         self.atbreak = 0

     # Don't cut long lines into pieces.
     def send_flowing_data(self, data):
         formatter.DumbWriter.send_literal_data(self, data)

# Strip all the html from a piece of text.
def strip_file(textin):
     memfile = cStringIO.StringIO()
     form = formatter.AbstractFormatter(StripWriter(memfile))
     parser = Parser(form)
     parser.feed(textin)
     title = parser.title
     parser.close()
     text = memfile.getvalue()
     memfile.close()
     return title, text

def process_files(topdir, exts):
     count = 0
     bigdict = {}
     bigdict[None] = topdir
     count = 0
     for dirpath, dirnames, filenames in os.walk(topdir):
         for name in filenames:
             fullname = os.path.join(dirpath, name)
             if not os.path.isfile(fullname):
                 continue
             root, ext = os.path.splitext(fullname)
             if ext.lower() not in exts:
                 continue
             text = open(fullname, 'r').read()
             title, text = strip_file(text)
             size = len(text)
             oldsize = 0
             while size != oldsize:
                 text = text.replace('\n\n', '\n')
                 oldsize = size
                 size = len(text)
             if title is None or title.strip() == '':
                 title = fullname
             bigdict[fullname] = [title, text]
             count += 1
             if count % 50 == 0:
                 print 'file count', count
     print 'final count', count
     return bigdict

if len(sys.argv) != 3:
     raise Exception, 'program must have exactly two arguments.'
topdir = sys.argv[1]
datafile = sys.argv[2]

bigdict = process_files(topdir, ['.html', '.htm'])
cPickle.dump(bigdict, file(datafile, 'w'), 1)

===================================================================
doc_search.py:

#! /usr/bin/env python
from __future__ import generators

import os, sys, time, re, htmllib, formatter, cStringIO, string, cPickle

""" Searches for text from Python documentation that matches a regex 
pattern.

      ./doc_search.py <datafile> <pattern>
where <datafile> has been output by "docsdata.py" and <pattern> is a 
regex pattern as defined in module "re". The output is a page of html 
which is put in NONCE_DIR. A link to the output is added to NONCE_FILE.
"""
NONCE_DIR = '/home/edcjones/nonce_files'
NONCE_FILE = '/home/edcjones/bookmarks/nonce.html'
BEFORE = 50
AFTER = 50

def extract_lines(bigdict, pattern):
     compiled_pattern = re.compile(pattern)
     fullnames = bigdict.keys()
     fullnames.sort()
     filedict = {}
     for fullname in fullnames:
         title =  bigdict[fullname][0]
         filedict[fullname] = [title]
         text = bigdict[fullname][1]
         start = 0
         while 1:
             match_object = compiled_pattern.search(text, start)
             if match_object is None:
                 break
             start, end = match_object.span()
             insert = '<font color=red>' + text[start:end] + '</font>'
             text2 = text[:start] + insert + text[end:]
             lo = max(start - BEFORE, 0)
             hi = min(start + len(insert) + AFTER, len(text2))
             output = text2[lo:hi].replace('\n', '<br>\n')
             filedict[fullname].append(output)
             start = end
     return filedict

def write_nonce(filename, nonce_file):
     lines = open(nonce_file, 'r').readlines()
     isthere = 0
     for line in lines:
         if line.find(filename) != -1:
             isthere = 1
             break
     if not isthere:
         head, tail = os.path.split(filename)
         line = '<br><a href="%s">%s</a>\n' % (filename, tail)
         lines.append(line)
         open(nonce_file, 'w').writelines(lines)

def make_html(filedict, topdir):
     html_lines = ['<html>', '<head></head>', '<body>']
     fullnames = filedict.keys()
     fullnames.sort()
     for fullname in fullnames:
         if len(filedict[fullname]) < 3:
             continue
         title = filedict[fullname][0]
         html_line = '<p><a href="%s">%s</a>\n' % (fullname, title)
         html_lines.append(html_line)
         html_lines.append('<ul>')
         for text in filedict[fullname][1:]:
             html_lines.append('<li>')
             html_lines.append(text)
         html_lines.append('</ul>')
     html_lines.append('</body\n</html>\n')
     return '\n'.join(html_lines)

if len(sys.argv) != 3:
     raise Exception, 'program must have exactly two arguments.'
datafile = sys.argv[1]
pattern = sys.argv[2]

bigdict = cPickle.load(open(datafile, 'r'))
topdir = bigdict[None]
filedict = extract_lines(bigdict, pattern)
html_text = make_html(filedict, topdir)
tail = time.strftime('docs.%Y.%b.%d.%H.%M.%S.html', \
         time.gmtime(time.time()))
filename = os.path.join(NONCE_DIR, tail)
open(filename, 'w').write(html_text)
write_nonce(filename, NONCE_FILE)