[ANN] slashbox - class to get/display slashdot.org (and others) stories

richard offer richard at whitequeen.com
Sat Jul 8 20:46:18 EDT 2000


Hi,

Here's a quick hack of a class to get and format news stories from a
number of sites.

It was designed with mod_python in mind, so all you need to do under
mod_python is

import slashbox
...

def handler(req):
    ...
    req.write(str(slashbox.Slashdot()))

To add the latest stories from slashdot to your page.


However there's nothing mod_pythong specific to it....


A number of other sites are included, its easy to add new ones simply
by sub-classing slashbox. The trick is to find the sites .rss/.rdf file.


You'll need the latest PyXML code.

I'm in the process of re-implementing my web server, so I have no url to
give you....


richard.
-------------- next part --------------

# $Id: slashbox.py,v 1.3 2000/07/09 00:34:38 richard Exp $
# Copyright (c) 2000 Richard Offer <richard at whitequeen.com>. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 
# Richard Offer BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# 
# Except as contained in this notice, the name of Richard Offer shall not be
# used in advertising or otherwise to promote the sale, use or other dealings
# in this Software without prior written authorization from Richard Offer.



#
#
# A python class to grab the Slashdot (and other sites using RSS/RDF) stories.


import stat
import xmllib
import urllib
import urlparse
import os
import time
import string
import sys
from xml.sax import saxlib,saxexts

class _myrdf(saxlib.HandlerBase):
	def __init__(self):
		self._in={}
		self._in["title"]=0
		self._in["link"]=0
		self._in["description"]=0
		self._txt={}
		self._txt["title"]=""
		self._txt["link"]=""
		self._txt["description"]=""

		self._stories=[]


	def startElement(self,ele,attr):
		if ele == 'item' :
			self._txt={}
			self._txt["title"]=""
			self._txt["link"]=""
			self._txt["description"]=""
		elif self._in.has_key(ele) :
			self._in[ele]=1
			
		else:
			pass

	def endElement(self,ele):
		if self._in.has_key(ele) :
			self._in[ele] = 0
			self._txt[ele] = string.join(string.split((self._txt[ele])))
		elif ele == 'item'  :
			self._stories.append(self._txt)

			
	
	def characters(self,ch,start,length):
		for i in self._in.keys():
				if self._in[i] == 1:
					self._txt[i] = self._txt[i] + ch[start:start+length]

	def stories(self):
		return self._stories

# class Site()

class slashbox:
	"""
Create a single slashbox.
  slashbox(url,freq)
		url is the url of the RSS/RDF file, freq is the update period (mins).
		The freq cannot be less than 30
  raw() 
		returns a list of stories (each story is stored in a dict).
  __str__() 
		returns a HTML table containing the stories.

Several subclasses are available, these already have the url defined,
so to get the headlines from slashdot.org simply do


>>> print Slashdot()

The list of built-in sites is 

	Slashdot()
	Freshmeat()
	SecurityFocus()
	Parnassus()
	LWN()
	Fool()
	LinuxNetNews()
	SourceForgeReleases()
	SourceForgeNews()


If you are using mod_python, you can embed a slashbox simply by

import slashbox
...

	req.write(str(slashbox.Slashdot()))


"""
	def __init__(self, url='http://slashdot.org/slashdot.rdf', freq=60):
		self._url=url
		if freq < 30:
			freq=30
		self._freq=freq
		_u=urlparse.urlparse(self._url)
		self._file=_u[1] + "-" + os.path.basename(url)
		if self._file == '':
			self._file = os.path.basename(url)

		self._file = "/tmp/slashbox-" + self._file

		self.site= _u[0]+ "://" + _u[1] + "/"
		self.name= _u[1]
		
		try:
			st=os.stat(self._file)
			if st[8] < ( time.time() - (self._freq*60) ):
				(fname,headers)=urllib.urlretrieve(self._url, self._file)	
		except OSError:
			(fname,headers)=urllib.urlretrieve(self._url,self._file)	

		fp=open(self._file)

		xmlp=saxexts.make_parser()

		self.dh=_myrdf()
		xmlp.setDocumentHandler(self.dh)
		xmlp.parseFile(fp)

	def stories(self):
		return self.dh.stories()
		

	def __str__(self):
		str="<SMALL>\n<TABLE BORDER=2 WIDTH=200>\n"
		str=str + '<TH><A HREF="%s">%s</A></TH>\n' % ( self.site,
				self.name )
		str=str + "<TR><TD>\n  <TABLE><TBODY>\n" 

		for i in self.dh.stories():
			str=str + '    <TR><TD><A HREF="%s">%s</A>' % ( i['link'],i['title'] )
			if i.has_key("description"):
				str=str + "<BR><SMALL>%s</SMALL>" % ( i['description'])
			str=str + '</TD></TR>\n' 
		str=str+"  </TBODY></TABLE></TD></TR>\n</TABLE></SMALL>\n\n"
		return str
		
	def setName(self,name):
		self.name=name



class Slashdot(slashbox):
	"""slashbox sub-class for Slashdot.org"""
	def __init__(self):
		_url='http://slashdot.org/slashdot.rdf'
		slashbox.__init__(self,url=_url)
		self.setName('Slashdot')

class Freshmeat(slashbox):
	"""slashbox sub-class for Freshmeat.net"""
	def __init__(self):
		_url='http://freshmeat.net/backend/fm.rdf'
		slashbox.__init__(self,url=_url)
		self.setName('Freshmeat')

class SecurityFocus(slashbox):
	"""slashbox sub-class for SecurityFocus.com"""
	def __init__(self):
		_url='http://www.securityfocus.com/topnews-rss.html'
		slashbox.__init__(self,url=_url)
		self.setName('Security Focus')

class Parnassus(slashbox):
	"""slashbox sub-class for the Vaults of Parnassus"""
	def __init__(self):
		_url='http://www.vex.net/parnassus/parnassus.rss'
		slashbox.__init__(self,url=_url)
		self.setName('Vaults of Parnassus')

class LWN(slashbox):
	"""slashbox sub-class for the Linux Weekly News"""
	def __init__(self):
		_url='http://lwn.net/headlines/rss'
		slashbox.__init__(self,url=_url)
		self.setName('Linux Weekly News')


class Fool(slashbox):
	"""slashbox sub-class for the Motley Fool"""
	def __init__(self):
		_url='http://www.fool.com/About/headlines/rss_headlines.asp'
		slashbox.__init__(self,url=_url)
		self.setName('The Motley Fool')

class LinuxNetNews(slashbox):
	"""slashbox sub-class for Linux Net News"""
	def __init__(self):
		_url='http://www.netnews.opensrc.org/index.rdf'
		slashbox.__init__(self,url=_url)
		self.setName('Linux Net News')

class SourceForgeReleases(slashbox):
	"""slashbox sub-class for new releases on SourceForge"""
	def __init__(self):
		_url='http://sourceforge.net/export/rss_sfnewreleases.php'
		slashbox.__init__(self,url=_url)
		self.setName('Sourceforge New Releases')

class SourceForgeNews(slashbox):
	"""slashbox sub-class for Project News at SourceForge"""
	def __init__(self):
		_url='http://sourceforge.net/export/rss_sfnews.php'
		slashbox.__init__(self,url=_url)
		self.setName('Sourceforge News')



if __name__ == "__main__":

	print Slashdot()	


More information about the Python-list mailing list