Markov.py
cpsoct at my-deja.com
cpsoct at my-deja.com
Sun Jan 21 04:43:22 EST 2001
I found the following script in the mac standard distribution (see path
below). It seems to be Guido's, i am not sure however, i am at a loss
as to how to use this. I was unable to run it and there is not
documentation. In otherwords, i put on my detective hat but was still
unable to get the thing to run. Anyone use this and can explain to me
how? The script is a bit to big and complex for my level of python kno-
how, but how sweet would it be to be able to make a markov chain in
python without having to re-invent the wheel. Particularly since i
wouldn't be able to in this case!
cheers,
kevin parks
seoul, korea
cpsoct at lycos.com
----- cut here ------
#found in apps:Python 2.0:Demo:scripts:markov.py
#
#! /usr/bin/env python
class Markov:
def __init__(self, histsize, choice):
self.histsize = histsize
self.choice = choice
self.trans = {}
def add(self, state, next):
if not self.trans.has_key(state):
self.trans[state] = [next]
else:
self.trans[state].append(next)
def put(self, seq):
n = self.histsize
add = self.add
add(None, seq[:0])
for i in range(len(seq)):
add(seq[max(0, i-n):i], seq[i:i+1])
add(seq[len(seq)-n:], None)
def get(self):
choice = self.choice
trans = self.trans
n = self.histsize
seq = choice(trans[None])
while 1:
subseq = seq[max(0, len(seq)-n):]
options = trans[subseq]
next = choice(options)
if not next: break
seq = seq + next
return seq
def test():
import sys, string, random, getopt
args = sys.argv[1:]
try:
opts, args = getopt.getopt(args, '0123456789cdw')
except getopt.error:
print 'Usage: markov [-#] [-cddqw] [file] ...'
print 'Options:'
print '-#: 1-digit history size (default 2)'
print '-c: characters (default)'
print '-w: words'
print '-d: more debugging output'
print '-q: no debugging output'
print 'Input files (default stdin) are split in
paragraphs'
print 'separated blank lines and each paragraph is
split'
print 'in words by whitespace, then reconcatenated
with'
print 'exactly one space separating words.'
print 'Output consists of paragraphs separated by
blank'
print 'lines, where lines are no longer than 72
characters.'
histsize = 2
do_words = 0
debug = 1
for o, a in opts:
if '-0' <= o <= '-9': histsize = eval(o[1:])
if o == '-c': do_words = 0
if o == '-d': debug = debug + 1
if o == '-q': debug = 0
if o == '-w': do_words = 1
if not args: args = ['-']
m = Markov(histsize, random.choice)
try:
for filename in args:
if filename == '-':
f = sys.stdin
if f.isatty():
print 'Sorry, need stdin from
file'
continue
else:
f = open(filename, 'r')
if debug: print 'processing', filename, '...'
text = f.read()
f.close()
paralist = string.splitfields(text, '\n\n')
for para in paralist:
if debug > 1: print 'feeding ...'
words = string.split(para)
if words:
if do_words: data = tuple
(words)
else: data = string.joinfields
(words, ' ')
m.put(data)
except KeyboardInterrupt:
print 'Interrupted -- continue with data read so far'
if not m.trans:
print 'No valid input files'
return
if debug: print 'done.'
if debug > 1:
for key in m.trans.keys():
if key is None or len(key) < histsize:
print `key`, m.trans[key]
if histsize == 0: print `''`, m.trans['']
print
while 1:
data = m.get()
if do_words: words = data
else: words = string.split(data)
n = 0
limit = 72
for w in words:
if n + len(w) > limit:
print
n = 0
print w,
n = n + len(w) + 1
print
print
def tuple(list):
if len(list) == 0: return ()
if len(list) == 1: return (list[0],)
i = len(list)/2
return tuple(list[:i]) + tuple(list[i:])
test()
Sent via Deja.com
http://www.deja.com/
More information about the Python-list
mailing list