ANN: Python Language Reference
Edward C. Jones
edcjones at erols.com
Fri Dec 26 17:56:08 EST 2003
Stephen Ferg wrote:
> An attempt to produce a complete, alphabetized reference of all of
> Python's language features. The purpose is support developers, who
> need a quick way to look up information about a language feature.
>
> The table of contents was extracted from:
> * the index of the language reference
> * the index of the library reference
> * the global module index
>
> http://www.ferg.org/pyref/index.html
Neat.
I have some code I find to be very useful. It searches all the Python
documentation trying to match a regular expression. HTML in the docs is
ignored. The result is formatted in HTML and put where my browser can
find it.
==============================================================
docsdata.py:
#! /usr/bin/env python
from __future__ import generators
import os, sys, time, re, htmllib, formatter, cStringIO, string, cPickle
""" Strips html from Python docs.
./docsdata.py <dir> <datafile>
"""
# Look at the source code for htmllib.
class Parser(htmllib.HTMLParser):
def __init__(self, formatter, verbose=0):
htmllib.HTMLParser.__init__(self, formatter, verbose)
# Print nothing for </a>.
def anchor_end(self):
if self.anchor:
self.anchor = None
# Look at the source code for formatter.
class StripWriter(formatter.DumbWriter):
def __init__(self, f=None, maxcol=72):
formatter.DumbWriter.__init__(self, f, maxcol)
# Ignore horizontal rules.
def send_hor_rule(self, *args, **kw):
self.file.write('\n\n')
self.col = 0
self.atbreak = 0
# Don't cut long lines into pieces.
def send_flowing_data(self, data):
formatter.DumbWriter.send_literal_data(self, data)
# Strip all the html from a piece of text.
def strip_file(textin):
memfile = cStringIO.StringIO()
form = formatter.AbstractFormatter(StripWriter(memfile))
parser = Parser(form)
parser.feed(textin)
title = parser.title
parser.close()
text = memfile.getvalue()
memfile.close()
return title, text
def process_files(topdir, exts):
count = 0
bigdict = {}
bigdict[None] = topdir
count = 0
for dirpath, dirnames, filenames in os.walk(topdir):
for name in filenames:
fullname = os.path.join(dirpath, name)
if not os.path.isfile(fullname):
continue
root, ext = os.path.splitext(fullname)
if ext.lower() not in exts:
continue
text = open(fullname, 'r').read()
title, text = strip_file(text)
size = len(text)
oldsize = 0
while size != oldsize:
text = text.replace('\n\n', '\n')
oldsize = size
size = len(text)
if title is None or title.strip() == '':
title = fullname
bigdict[fullname] = [title, text]
count += 1
if count % 50 == 0:
print 'file count', count
print 'final count', count
return bigdict
if len(sys.argv) != 3:
raise Exception, 'program must have exactly two arguments.'
topdir = sys.argv[1]
datafile = sys.argv[2]
bigdict = process_files(topdir, ['.html', '.htm'])
cPickle.dump(bigdict, file(datafile, 'w'), 1)
===================================================================
doc_search.py:
#! /usr/bin/env python
from __future__ import generators
import os, sys, time, re, htmllib, formatter, cStringIO, string, cPickle
""" Searches for text from Python documentation that matches a regex
pattern.
./doc_search.py <datafile> <pattern>
where <datafile> has been output by "docsdata.py" and <pattern> is a
regex pattern as defined in module "re". The output is a page of html
which is put in NONCE_DIR. A link to the output is added to NONCE_FILE.
"""
NONCE_DIR = '/home/edcjones/nonce_files'
NONCE_FILE = '/home/edcjones/bookmarks/nonce.html'
BEFORE = 50
AFTER = 50
def extract_lines(bigdict, pattern):
compiled_pattern = re.compile(pattern)
fullnames = bigdict.keys()
fullnames.sort()
filedict = {}
for fullname in fullnames:
title = bigdict[fullname][0]
filedict[fullname] = [title]
text = bigdict[fullname][1]
start = 0
while 1:
match_object = compiled_pattern.search(text, start)
if match_object is None:
break
start, end = match_object.span()
insert = '<font color=red>' + text[start:end] + '</font>'
text2 = text[:start] + insert + text[end:]
lo = max(start - BEFORE, 0)
hi = min(start + len(insert) + AFTER, len(text2))
output = text2[lo:hi].replace('\n', '<br>\n')
filedict[fullname].append(output)
start = end
return filedict
def write_nonce(filename, nonce_file):
lines = open(nonce_file, 'r').readlines()
isthere = 0
for line in lines:
if line.find(filename) != -1:
isthere = 1
break
if not isthere:
head, tail = os.path.split(filename)
line = '<br><a href="%s">%s</a>\n' % (filename, tail)
lines.append(line)
open(nonce_file, 'w').writelines(lines)
def make_html(filedict, topdir):
html_lines = ['<html>', '<head></head>', '<body>']
fullnames = filedict.keys()
fullnames.sort()
for fullname in fullnames:
if len(filedict[fullname]) < 3:
continue
title = filedict[fullname][0]
html_line = '<p><a href="%s">%s</a>\n' % (fullname, title)
html_lines.append(html_line)
html_lines.append('<ul>')
for text in filedict[fullname][1:]:
html_lines.append('<li>')
html_lines.append(text)
html_lines.append('</ul>')
html_lines.append('</body\n</html>\n')
return '\n'.join(html_lines)
if len(sys.argv) != 3:
raise Exception, 'program must have exactly two arguments.'
datafile = sys.argv[1]
pattern = sys.argv[2]
bigdict = cPickle.load(open(datafile, 'r'))
topdir = bigdict[None]
filedict = extract_lines(bigdict, pattern)
html_text = make_html(filedict, topdir)
tail = time.strftime('docs.%Y.%b.%d.%H.%M.%S.html', \
time.gmtime(time.time()))
filename = os.path.join(NONCE_DIR, tail)
open(filename, 'w').write(html_text)
write_nonce(filename, NONCE_FILE)
More information about the Python-list
mailing list