nerdnews.py

Sun Jun 20 16:45:30 EDT 1999

Creates a htmlpage with todays Dilbert strip and 
headlines from Linux Today and Slashdot.

-- 
Fredrik Henbjork

Email: frehe491 at student.liu.se
WWW: http://o112.ryd.student.liu.se
-------------- next part --------------
#! /usr/bin/python
#
# Fredrik Henbjork <frehe491 at student.liu.se> 1999
# License: GNU GPL <http://www.gnu.org/copyleft/gpl.html>
#
# Why not take a look at the PyApache homepage <http://www.msg.com.mx/pyapache/>
#

import re
import urllib

# don't ask why it's done this way ...
# return url to today's strip
def get_dilbert():
	url = ''
	tries = 1
	while url == '' and tries < 10:
		try:
			dilbert = urllib.urlopen('http://www.dilbert.com/comics/dilbert/index.html')
			html = dilbert.read()
			dilbert.close()
			url = 'http://www.dilbert.com/comics/dilbert/archive/images/dilbert%s.gif' % (re.split('.gif\"> <IMG SRC=\"/comics/dilbert/archive/images/dilbert', re.split('<A HREF=\"/comics/dilbert/archive/images/dilbert', re.sub("\'", '', html), 1)[1], 1)[0])
		except:
			url = ''
		tries = tries + 1		
	return url

# get headlines from linuxtoday.com
# return list of triples (headline, url, date)
def get_linux_today():
	try:
		linux_today = urllib.urlopen('http://linuxtoday.com/lthead.txt')
		raw_news = re.split('&&', linux_today.read())[1:]
		linux_today.close()
		news = []
		for item in raw_news:
			news.append(re.split('\n', item)[1:4])
		return news
	except:
		return []

# get headlines from slashdot.org
# return list of triples (headline, url, date)
def get_slashdot():
	try:
		slashdot = urllib.urlopen('http://slashdot.org/ultramode.txt')
		raw_news = re.split('%%', slashdot.read())[1:-1]
		slashdot.close()
		news = []
		for item in raw_news:
			news.append(re.split('\n', item)[1:4])
		return news
	except:
		return []

def nerd_news():
	print '<html>'
	print '  <head>'
	print '    <title>Nerd News</title>'
	print '  </head>'
	print '  <body bgcolor=\"#ffffff\" text=\"#000000\" link=\"#64284c\" alink=\"#64284c\" vlink=\"#2c507c\">'
	print '    <center>'
	print '      <br><br>'
	print '      <table border=\"0\" cellspacing=\"0\" cellpadding=\"0\">'

	# news from Linux Today
	linux_today_news = get_linux_today()
	if linux_today_news != []:
		print '        <tr><td>Linux Today</td></tr>'
		print '        <tr><td><small>(<a href=\"http://linuxtoday.com\">http://linuxtoday.com</a>)</small></td></tr>'
		for item in linux_today_news:
			print '        <tr><td><a href=\"%s\">%s</a></td></tr>' % (item[1], item[0])
		print '        <tr><td> </td></tr>'

	# news from Slashdot
	slashdot_news = get_slashdot()
	if slashdot_news != []:
		print '        <tr><td>Slashdot</td/tr>'
		print '        <tr><td><small>(<a href=\"http://slashdot.org\">http://slashdot.org</a>)</small></td></tr>'
		for item in slashdot_news:
			print '        <tr><td><a href=\"%s\">%s</a></td></tr>' % (item[1], item[0])
		print '        <tr><td> </td></tr>'

	# today's Dilbert strip
	dilbert_url = get_dilbert()
	if dilbert_url != '':
		print '        <tr><td>Dilbert</td></tr>'
		print '        <tr><td><small>(<a href=\"http://www.dilbert.com/comics/dilbert/\">http://www.dilbert.com/comics/dilbert/</a>)</small></td></tr>'
		print '        <tr><td><img src=\"%s\"></td></tr>' % (dilbert_url)
		print '        <tr><td> </td></tr>'

	print '      </table>'
	print '    </center>'
	print '  </body>'
	print '</html>'

# main
if __name__ == '__main__':
# Uncomment the two lines below if you are going to use this as a cgi script
#	print "Content-type: text/html"
#	print
	nerd_news()