Python multithreading problem

abhinav abhinavduggal at gmail.com
Sun Mar 26 22:23:23 CEST 2006


//A CRAWLER IMPLEMENTATION
please run this prog. on the shell and under the control of debugger
when this prog. is run normally the prog. does not terminate .It
doesn't come out of the cond. if c<5: so this prog. continues
infinitely
but if this prog is run under the control of debugger the prog
terminates when the cond. if c<5: becomes false
i think this prob. may be due to multithreading pls help.


from sgmllib import SGMLParser
import threading
import re
import urllib
import pdb
import time
class urlist(SGMLParser):
    def reset(self):
        SGMLParser.reset(self)
        self.list=[]

    def start_a(self,attr):
        href=[v for k,v in attr if k=="href"]
        if href:
            self.list.extend(href)
mid=2
c=0
class mythread(threading.Thread):
	 stdmutex=threading.Lock()
	 global threads
	 threads=[]
	 def __init__(self,u,myid):
		self.u=u
		self.myid=myid
		threading.Thread.__init__(self)
	 def run(self):
		global c
		global mid
		if c<5:
		        self.stdmutex.acquire()
			self.usock=urllib.urlopen(self.u)
			self.p=urlist()
			self.s=self.usock.read()
			self.p.feed(self.s)
			self.usock.close()
			self.p.close()
			c=c+1
			fname="/root/" + str(c) + ".txt"
			self.f=open(fname,"w")
			self.f.write(self.s)
			self.f.close()
			print c
			print self.p.list
			print self.u
			print self.myid
			for j in self.p.list:
				k=re.search("^https?:",j)
				if k:
				   i=mythread(j,mid)
				   i.start()
				   threads.append(i)
				   mid=mid+1
                        self.stdmutex.release()






if __name__=="__main__":
    thread=mythread("http://www.google.co.in/",1)
    thread.start()
    threads.append(thread)
    for thread in threads:
	  thread.join()
    print "main thread exits"




































































































































































































































































More information about the Python-list mailing list