Is c.l.py becoming less friendly?

mk mrkafk at gmail.com
Thu Feb 5 11:00:05 EST 2009


(duck)

542 comp.lang.python rtfm

467 comp.lang.python shut+up

263 comp.lang.perl rtfm

45 comp.lang.perl shut+up




Code:

import urllib2
import re
import time

def fillurlfmt(args):
     urlfmt, ggroup, gkw = args
     return {'group':ggroup, 'keyword':gkw, 'url': urlfmt % (gkw, ggroup)}

def consqurls(args):
     ggroup, gkeywords = args
     urlfmt = 
'http://groups.google.com/groups/search?as_q=%s&as_epq=&as_oq=&as_eq=&num=10&scoring=&lr=&as_sitesearch=&as_drrb=q&as_qdr=&as_mind=1&as_minm=1&as_miny=1999&as_maxd=1&as_maxm=1&as_maxy=2009&as_ugroup=%s&as_usubject=&as_uauthors=&safe=off'
     qurls = map(fillurlfmt, [ (urlfmt, ggroup, gkw) for gkw in gkeywords ])
     return qurls

def flatten_list(x):
     res = []
     for el in x:
         if isinstance(el,list):
             res.extend(flatten_list(el))
         else:
             res.append(el)
     return res

def ggsearch(urldict):
     opener = urllib2.build_opener()
     opener.addheaders = [('User-agent', 'Mozilla/5.0 (Windows; U; 
Windows NT 5.1; en-US; rv:1.8.1.20) Gecko/20081217 (CK-IBM) 
Firefox/2.0.0.20')]
     time.sleep(0.1)
     urlf = opener.open(urldict['url'])
     resdict = {'result': urlf.read()}
     resdict.update(urldict)
     urlf.close()
     return resdict

def extrclosure(resregexp, groupno):
     def extrres(resdict):
         txtgr = resregexp.search(resdict['result'])
         resdict['result']=txtgr.group(groupno)
         return resdict
     return extrres

def delcomma(x):
     x['result'] = x['result'].replace(',','')
     return x

if __name__ == "__main__":
     gkeywords = ['rtfm', 'shut+up']
     ggroups = ['comp.lang.python', 'comp.lang.perl']
     params = [(ggroup, gkeywords) for ggroup in ggroups]
     qurls = map(consqurls, params)
     qurls = flatten_list(qurls)
     gresults = map(ggsearch, qurls)
     resre = re.compile('Results \<b\>1\</b\> - \<b\>.+?\</b\> of about 
\<b\>(.+?)\</b\>')
     gextrsearchresult = extrclosure(resre,1)
     gresults = map(gextrsearchresult, gresults)
     gresults = map(delcomma, gresults)
     for el in gresults:
         print el['result'], el['group'], el['keyword']
         print


This was inspired by 
http://mail.python.org/pipermail/python-list/2002-November/172466.html

Regards,
mk




More information about the Python-list mailing list