Parsing an HTML a tag
Leo Jay
python.leojay at gmail.com
Sat Sep 24 15:53:49 EDT 2005
you may define a start_a in MyHTMLParser.
e.g.
import htmllib
import formatter
class HTML_Parser(htmllib.HTMLParser):
def __init__(self):
htmllib.HTMLParser.__init__(self,
formatter.AbstractFormatter(formatter.NullWriter()))
def start_a(self, args):
for key, value in args:
if key.lower() == 'href':
print value
html = HTML_Parser()
html.feed(open(r'a.htm','r').read())
html.close()
On 24 Sep 2005 10:13:30 -0700, George <buffer_88 at hotmail.com> wrote:
> How can I parse an HTML file and collect only that the A tags. I have a
> start for the code but an unable to figure out how to finish the code.
> HTML_parse gets the data from the URL document. Thanks for the help
>
> def HTML_parse(data):
> from HTMLParser import HTMLParser
> parser = MyHTMLParser()
>
> parser.feed(data)
>
> class MyHTMLParser(HTMLParser):
>
> def handle_starttag(self, tag, attrs):
>
> def handle_endtag(self, tag):
>
> def read_page(URL):
> "this function returns the entire content of the specified URL
> document"
> import urllib
> connect = urllib.urlopen(url)
> data = connect.read()
> connect.close()
> return data
>
> --
> http://mail.python.org/mailman/listinfo/python-list
>
--
Best Regards,
Leo Jay
More information about the Python-list
mailing list