a quick program to download tv listings
Rain Dog
raindog at no.spam.net
Tue Sep 16 00:52:43 EDT 2003
In article <1f0bdf30.0309102237.d29f0de at posting.google.com>,
prochak at netzero.net (Erik Lechak) wrote:
> Is there anyone out there that has written anything in python to
> download tv listings (no XML)? ... I wrote the test
> program below and it works. I am just curious if anyone has a more
> robust python implementation before I take the time to add all the
> bells and whistles.
Here's a version that uses an HTMLParser and does its own filtering
by channel, rather than setting a cookie.
Some sample output:
% tvsearch "college football" news
College Football ABC 7 12:30 PM Sat Sep 13
College Football CBS 2 12:30 PM Sat Sep 13
Eyewitness News ABC 7 4:00 PM Sat Sep 13
CBS 2 News at 5:00 CBS 2 5:00 PM Sat Sep 13
College Football ABC 7 5:00 PM Sat Sep 13
Channel 4 News NBC 4 5:00 PM Sat Sep 13
CBS Evening News CBS 2 5:30 PM Sat Sep 13
---------- 8< ---------- 8< ---------- 8< ---------- 8< ----------
#!/usr/bin/env python
import formatter, re, time, urllib
from htmllib import HTMLParser
# Channel lineup (leave empty to search all channels)
CHANNELS = [2, 3, 4, 5, 7, 9, 11, 13, 18, 22, 30, 32, 34, 35, 36, 39,
40, 41, 42, 43, 44, 46, 50, 57, 62]
# Yahoo location code
LOCATION = 'us_CA57315'
# Yahoo TV listing URL
YAHOO_TV_URL = ('http://tv.yahoo.com/grid?lineup=' + LOCATION
+ '&starttime=%(epoch)d&.intl=us')
class Show(object):
'''Just a structure to hold program information'''
__slots__ = ('name', 'channel', 'station', 'start', 'end')
def __init__(self, **kwargs):
for k, v in kwargs.items():
self.__setattr__(k, v)
def __str__(self):
showTime=time.strftime('%I:%M %p %a %b %d', time.localtime(self.start))
if showTime[0] == '0':
showTime = ' ' + showTime[1:]
return '%-35s %8s %-4d %-s' % (self.name, self.station,
self.channel, showTime)
class YahooTVParser(HTMLParser):
'''Minimal HTML parser for Yahoo TV listings'''
showRE = re.compile('\/tvpdb\?d=tvp&id=(.*)')
showInfoRE = re.compile('(\d*)&cf.*channels=us_([^&]*).*'
'&chname=([^\+]*)\+(\d+)&progutn=(\d*)')
def __init__(self):
HTMLParser.__init__(self, formatter.NullFormatter())
self.shows = []
self.inShow = 0
def start_a(self, attrs): # <A> handler
'''If the tag's HREF matches showRE, record the show info.'''
self.newShow = None
self.showName = ''
# Check if the HREF matches a show.
for k, v in attrs:
if k == 'href':
url = ''.join(v.split('\n'))
if self.showRE.search(url):
m = self.showInfoRE.search(url)
if m:
# Create a new Show--its name isn't known yet.
self.newShow = Show(start=float(m.group(5)),
channel=int(m.group(4)), station=m.group(3))
self.inShow = 1
break
def end_a(self): # </A> handler
'''If done with a show, record its name and add it to the list.'''
if self.inShow and self.showName:
self.newShow.name = self.showName
self.shows.append(self.newShow)
self.inShow = 0
def handle_data(self, text):
'''Handle the data between, e.g., <A> and </A> tags.'''
if self.inShow:
self.showName += text
def getGrid(epoch):
url = YAHOO_TV_URL % vars()
parser = YahooTVParser()
parser.feed(urllib.urlopen(url).read())
parser.close()
return parser.shows
def findShows(patterns):
isMatchingShow = None
if patterns:
nameRE = re.compile('|'.join(['(%s)' % n for n in patterns]), re.I)
if CHANNELS:
def isMatchingShow(show):
return (show.channel in CHANNELS) and nameRE.search(show.name)
else:
def isMatchingShow(show):
return (nameRE.search(show.name) is not None)
elif CHANNELS:
def isMatchingShow(show):
return (show.channel in CHANNELS)
THREE_HOURS = 3 * 60 * 60
ONE_WEEK = THREE_HOURS * 8 * 7
startTime = int(time.time())
endTime = startTime + ONE_WEEK
for h in range(startTime, endTime, THREE_HOURS):
allShows = getGrid(h)
# Print matching shows sorted by starting time.
if isMatchingShow is not None:
shows = [(s.start, s) for s in allShows if isMatchingShow(s)]
else:
shows = [(s.start, s) for s in allShows]
shows.sort()
for t, s in shows:
print s
def main():
import os.path, sys
args = sys.argv[1:]
if '-h' in args:
sys.stderr.write("Usage: %s [PATTERN]...\n"
% os.path.basename(sys.argv[0]))
sys.exit(1)
try: findShows(args)
except KeyboardInterrupt: pass
sys.exit(0)
if __name__ == '__main__':
main()
More information about the Python-list
mailing list