[Tutor] Here is the whole script I'm having issues with.
S A
buc40@bemail.org
Wed, 21 Aug 2002 18:26:28 -0700
Hi Everyone-
As promised, I would send the whole script I'm having trouble with to see if someone else can figure out what is going wrong here:
import re
import urllib, urllister
import sys
import os
import StringIO
import htmllib
import formatter
import string
class Html:
def __init__(self, address):
self.address = address
#connection
def connect(self):
sock = urllib.urlopen("http://" + address)
#parse main doc
def parser(self):
parser = urllister.URLLister()
parser.feed(self.sock.read())
parser.close()
self.sock.close()
#search main doc for story links
def linkSearch(self):
source = self.parser.urls
link = r'''\/story\/0,\d+,6\d+,\d+,00\.html'''
sch = re.findall(link, string.join(source))
sch = string.join(sch)
return sch
#compare story links to "news.fox"
def compare(self):
t = time.strftime("%j_%H%M%S_%Y", time.localtime())
folder_contents = os.listdir("/Users/montana/News/Fox/")
l = len(folder_contents)
for i in folder_contents:
if i[-3:] == "fox":
newsin = open("/Users/montana/News/Fox/"+i, "rb")
newsfile = newsin.read()
if self.sch != newsin: print "News is being updated ..."
else:
print "Nothing to update. Bye."
exit
newsout = open("/Users/montana/News/Fox/news"+t+".fox", "wb")
newsout.write(self.sch)
newsout.close()
#from story links, download story files
def stories(self):
sch = string.split(self.sch)
l = len(sch)
for i in range(l):
lsock = urllib.open(self.address + sch[i], "rb")
links = lsock.read()
lsock.close()
return links
output = open(sch[i].html, "wb")
output.write(string.join(self.links))
output.close()
class TextExtractor:
def __init__(self, html, t):
self.html = html
self.t = t
#search and crop text from html
# def crop(self, start="<!--Storytext-->", end="<!--/Storytext-->"):
# story = '(?sx)%s.+%s' % (start, end)
def crop(self):
story = "(?sx)%s.+%s" % ("<!--Storytext-->", "<!--/Storytext-->")
newhtml = re.findall(story, self.html)
#format text from html
def text(self):
data = StringIO.StringIO()
fmt = formatter.AbstractFormatter(formatter.DumbWriter(data))
parser = htmllib.HTMLParser(fmt)
parser.feed("".join(self.html))
text = data.getvalue()
#save fromatted text to final file
def finalSave(self):
final = open(self.address + self.t + ".news", "ab")
final.append(self.text)
final.close()
Any weird indenting is due solely to this email program. I went through the script already to check indentation. Ok.
When importing this module in idle I get the following error:
Python 2.2.1 (#1, 07/27/02, 23:05:03)
[GCC Apple devkit-based CPP 6.02] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import update
Traceback (most recent call last):
File "<stdin>", line 1, in ?
File "update.py", line 63, in ?
class TextExtractor:
File "update.py", line 72, in TextExtractor
newhtml = re.findall(story, self.html)
NameError: name 'story' is not defined
>>>
I can't figure out why it is saying that story is not defined when it is clearly defined in the script. Any ideas?
Sorry if this is long.
Thanks.
SA
"I can do everything on my Mac that I used to do on my PC, plus alot more ..."
-Me
------------------------------------------------------------
Free, BeOS-friendly email accounts: http://BeMail.org/
BeOS News and Community: http://www.BeGroovy.com/
---------------------------------------------------------------------
Express yourself with a super cool email address from BigMailBox.com.
Hundreds of choices. It's free!
http://www.bigmailbox.com
---------------------------------------------------------------------