[Tutor] Question on a example in the book "Learn to program using python" by Alan Gauld

Klas Marteleur klas.martelleur at telia.com
Sun Aug 1 17:10:24 CEST 2004


Hi

First of all thanks for a interesting mailing list, i think i learn a lot by 
reading all good questions and answers.

I bought a copy of the great book "Learn o program using python" 2001, i have 
read it back and forth a couple of times and now i am going thru testing and 
trying to understand all examples (...i think i am slowly starting to learn 
:) )

I got stuck on the case studie for the very useful :) program "Grammar 
counter" though. 
No matter what i try i get the error "Error analyzing file....."

I type "python document.py example_file.txt" in a console.

Can somebody of you professionals point me to what i am doing wrong?

Many thanks
Klas Marteleur

code:

#!/usr/bin/python
import sys,string, re

class Document:
    def __init__(self, filename):
        self.filename = filename
        self.para_count = 1
        self.line_count, self.sentence_count = 0,0
        self.clause_count, self.word_count = 0,0
        self.alphas = string.letters + string.digits
        self.stop_tokens = ['.','?','!']
        self.punctuation_chars = ['&','(',')','-',
                                  ';',':',','] + \
                                 self.stop_tokens
        self.punctuation_counts = {}
        self.groups = []
        for c in self.punctuation_chars:
            self.punctuation_counts[c] = 0
        self.format = """%s contains:
        %d paragraphs, %d lines and %d sentences.
        These in turn contain %d clauses and a total of %d words."""

    def getCharGroups(self):
        try:
            f = open(self.filename,"r")
            for line in f.readlines():
                self.line_count = self.line_count + 1
                if len(line) == 1:
                    self.para_count = self.para_count + 1
                else:
                    self.groups = self.groups + string.split(line)
        except:
            print "Failed to read file", self.filename
            sys.exit()

    def getWords(self):
        for i in range(len(self.groups)):
            self.groups[i] = self.ltrim(self.groups)
            self.groups[i] = self.rtrim(self.groups[i])
        self.removeExceptions()

    def removeExceptions(self):
        pass

    def ltrim(self,word):
        return word

    def rtrim(self,word):
        return word

    def generateStats(self):
        self.word_count = len(self.groups)
        for c in self.stop_tokens:
            sentence_count = sentence_count + \
                             self.punctuation_counts[c]
        for c in self.punctuation_counts.keys():
            clause_count = clause_count + \
                           self.punctuation_counts[c]

    def printStats(self):
        print self.format % (self.filename, self.para_count, self.line_count, 
self.sentence_count, self.clause_count, self.words_count)
        print "The following punctuation characters were used:"
        for i in self.punctuation_counts.keys():
            print "\t%s\t:\t%4d" % \
                  (i,self.punctuation_counts[i])

    def Analyze(self):
        self.getCharGroups()
        self.getWords()
        self.generateStats()

class TextDocument(Document):
    def ltrim(self, word):
        while (len(word) > 0) and \
              (word[0] not in self.alphas):
            ch = word[0]
            if ch in self.punctuation_counts.keys():
                self.punctuation_counts[ch] = \
                                            self.punctuation_counts[ch]+1
            word = word[1:]
        return word

    def rtrim(self,word):
        while (len(word) > 0) and \
              (word[-1] not in self.alphas):
            ch = word[-1]
            if ch in self.punctuation_counts.keys():
                self.punctuation_counts[ch] = \
                                            self.punctuation_counts[ch]+1
            word = word[:-1]
        return word

    def removeExceptions(self):
        top = len(self.groups)
        i = 0
        while i < top:
            if (len(self.groups[i]) == 0):
                del(self.groups[i])
                top = top - 1
            i = i + 1

class HTMLDocument(Document):
    def getCharGroups(self):
        tag = re.compile("<.+?>")
        para = re.compile("<[pP]>")
        self.para_count = 0
        f = open(self.filename, "r")
        lines = f.readlines()
        n = 0
        while n < len(lines):
            if len(lines[n]) > 1:
                if para.search(lines[n]):
                    self.para_count = self.para_count + 1
                lines[n] = tag.sub('',lines[n])
                if len(lines[n]) <= 1:
                    del(lines)[n]
                else:
                    self.groups = self.groups + string.split(lines[n])
                    n = n + 1
            else:
                n = n + 1
        self.line_count = len(lines)

if __name__ == "__main__":
    if len (sys.argv) <> 2:
        print "Usage: python document.py"
        sys.exit()
    else:
        try:
            D = HTMLDocument(sys.argv[1])
            D.Analyze()
            D.printStats()
        except:
            print "Error analyzing file: %s" % sys.argv[1]
-------------- next part --------------
A non-text attachment was scrubbed...
Name: document.py
Type: text/x-python
Size: 4448 bytes
Desc: not available
Url : http://mail.python.org/pipermail/tutor/attachments/20040801/5bbe1a1c/document.py


More information about the Tutor mailing list