[Tutor] Here is the whole script I'm having issues with.

Wed, 21 Aug 2002 18:26:28 -0700

Hi Everyone-

As promised, I would send the whole script I'm having trouble with to see if someone else can figure out what is going wrong here:

import re
import urllib, urllister
import sys
import os
import StringIO
import htmllib
import formatter
import string

class Html:
     def __init__(self, address):
     self.address = address
     #connection
     def connect(self):
     sock = urllib.urlopen("http://" + address)

     #parse main doc
     def parser(self):
     parser = urllister.URLLister()
     parser.feed(self.sock.read())
     parser.close()
     self.sock.close()

     #search main doc for story links
     def linkSearch(self):
     source = self.parser.urls
     link = r'''\/story\/0,\d+,6\d+,\d+,00\.html'''
     sch = re.findall(link, string.join(source))
     sch = string.join(sch)
     return sch

     #compare story links to "news.fox"
     def compare(self):
     t = time.strftime("%j_%H%M%S_%Y", time.localtime())
     folder_contents = os.listdir("/Users/montana/News/Fox/")
     l = len(folder_contents)
     for i in folder_contents:
     if i[-3:] == "fox":
     newsin = open("/Users/montana/News/Fox/"+i, "rb")
     newsfile = newsin.read()
     if self.sch != newsin: print "News is being updated ..."
     else:
     print "Nothing to update. Bye."
     exit
     newsout = open("/Users/montana/News/Fox/news"+t+".fox", "wb")
     newsout.write(self.sch)
     newsout.close()

     #from story links, download story files
     def stories(self):
     sch = string.split(self.sch)
     l = len(sch)
     for i in range(l):
     lsock = urllib.open(self.address + sch[i], "rb")
     links = lsock.read()
     lsock.close()
     return links
     output = open(sch[i].html, "wb")
     output.write(string.join(self.links))
     output.close()

class TextExtractor:
     def __init__(self, html, t):
     self.html = html
     self.t = t
     #search and crop text from html
#     def crop(self, start="<!--Storytext-->", end="<!--/Storytext-->"):
#     story = '(?sx)%s.+%s' % (start, end)
     def crop(self):
     story = "(?sx)%s.+%s" % ("<!--Storytext-->", "<!--/Storytext-->")
        newhtml = re.findall(story, self.html)

    #format text from html
      def text(self):
     data = StringIO.StringIO()
        fmt = formatter.AbstractFormatter(formatter.DumbWriter(data))
        parser = htmllib.HTMLParser(fmt)
        parser.feed("".join(self.html))
        text = data.getvalue()

    #save fromatted text to final file
     def finalSave(self):
     final = open(self.address + self.t + ".news", "ab")
     final.append(self.text)
     final.close()

Any weird indenting is due solely to this email program. I went through the script already to check indentation. Ok.

When importing this module in idle I get the following error:

Python 2.2.1 (#1, 07/27/02, 23:05:03) 
[GCC Apple devkit-based CPP 6.02] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import update
Traceback (most recent call last):
  File "<stdin>", line 1, in ?
  File "update.py", line 63, in ?
    class TextExtractor:
  File "update.py", line 72, in TextExtractor
    newhtml = re.findall(story, self.html)
NameError: name 'story' is not defined
>>> 

I can't figure out why it is saying that story is not defined when it is clearly defined in the script. Any ideas?

Sorry if this is long.

Thanks.
SA

"I can do everything on my Mac that I used to do on my PC, plus alot more ..."

-Me

------------------------------------------------------------
Free, BeOS-friendly email accounts: http://BeMail.org/
BeOS News and Community: http://www.BeGroovy.com/

---------------------------------------------------------------------
Express yourself with a super cool email address from BigMailBox.com.
Hundreds of choices. It's free!
http://www.bigmailbox.com
---------------------------------------------------------------------