Simple Python web proxy stalls for some web sites

Carl Waldbieser waldbie at verizon.net
Thu Oct 7 03:33:14 CEST 2004


**** Post for FREE via your newsreader at post.usenet.com ****

I have written a simple web proxy using the Python standard library
BaseHTTPRequestHandler.  Right now, all it does is log the web traffic
passing through it to the console.  I have been testing it by setting my
browser's proxy setting to localhost:8077 and browsing to various web
sites.  Some web sites work fine (e.g. www.python.org).  However, some web
sites simply seem to stall indefinitely (e.g. www.google.com).  If I set
the same browser to connect directly to the Internet, the site comes up
close to immediately.

If anybody has any ideas about why this happens, or any coding mistakes I
may have made, I would appreciate the feedback.

Thanks,
Carl Waldbieser

--------- Python code below ------------------------------------
#############################################################
# Simple proxy
#############################################################

from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urlparse import urlparse
import urllib2
import re

def multiple_replace(adict, text):
    """Replace multiple patterns in a single pass."""
    regex = re.compile("|".join(map(re.escape, adict.keys())))
    return regex.sub(lambda match: adict[match.group(0)], text)

def htmlencode(s):
    """Replace HTML special characters (&,<,>,',") with entities."""
    entities = {"&":"&amp;", "<":"&lt;", ">":"&gt;", "'":"&apos;",
'"':"&quot;"}
    return multiple_replace(entities, s)

class WebProxyRequestHandler(BaseHTTPRequestHandler):
    """A subclass of BaseHTTPRequestHandler that acts as a web proxy
    server and can be chained with other web proxies.
    """
    def do_GET(self):
        """Handles an HTTP GET.
        """
        print "do_GET()"
        self.get_post_impl()
    def do_POST(self):
        print "do_POST()"
        length = self.headers["Content-Length"]
        d = self.rfile.read(int(length))
        self.get_post_impl(d)
        
    def get_post_impl(self, data=None):
        print "client host: %s\nclient port %d" % self.client_address
        print "command: %s" % self.command
        print "path: %s" % self.path
        print "request_version: %s" % self.request_version
        print "\n-- headers --"
        for header in self.headers.keys():
            print "%s: %s" % (header, self.headers.getheaders(header))
        print "-- end headers --\n"
        #Forward the request.
        server = self.server
        if server.proxy_addr:
            (scheme, netloc, path, parameters, query, fragment) =
urlparse(self.path)
            print "scheme: %s\nnetloc: %s\npath: %s\nparameters: %s\nquery:
%s\nfragment: %s" % (scheme, netloc, path, parameters, query, fragment)
            url = "%s:%d" % server.proxy_addr
            print "Proxy URL: %s" % url
            self.retrieve_request(data, {'http':url})
        else:
            self.retrieve_request(data)

    def retrieve_request(self, data, proxies={}):
        request = urllib2.Request(self.path)
        for header in self.headers.keys():
            if header.lower() != "host" and header.lower() != "user-agent":
                values = self.headers.getheaders(header)
                value_string = " ".join(values)
                request.add_header(header, value_string)
        for proxy_type in proxies:
            print "setting proxy: (%s, %s)" % (proxies[proxy_type],
proxy_type)
            request.set_proxy(proxies[proxy_type], proxy_type)
        if data != None:
            request.add_data(data)
        print "Attempting to open %s ..." % self.path
        try:
            f = urllib2.urlopen(request)
        except urllib2.HTTPError, e:
            self.send_response(e.code)
        except Exception, e:
            print "Exception: %s" % str(e)
            self.write_error(e)
        else:
            print "Successfully opened %s" % self.path
            
            self.send_response(200) #OK
            print "-- Response Info --" 
            for item in f.info().keys():
                print "%s: %s" % (item, f.info()[item])
                self.send_header(item, f.info()[item])
            print "-- end Response Info --"
            self.end_headers()
            print "Reading..."
            s = f.read()
            print "Read successful."
            f.close()
            print "Writing..."
            self.wfile.write(s)
            print "Write successful."
            #self.wfile.close()
    def write_error(self, error):
        self.send_response(200)
        self.wfile.write("""<html>
            <head>
                <title>Error</title>
            </head>
            <body>
                An error occured connecting to the address given.
                <br/>
                %s
            </body>
            </html>""" % htmlencode(str(error)))
        self.wfile.close()

class WebProxy(HTTPServer):
    def __init__(self, server_addr, proxy_addr=None):
        HTTPServer.__init__(self, server_addr, WebProxyRequestHandler)
        self.proxy_addr = proxy_addr

if __name__ == "__main__":
    import sys
    def usage():
        print "Usage: %s [port [proxy addr proxy port]]" % sys.argv[1]
        
    if len(sys.argv) >= 2:
        try:
            port = int(sys.argv[1])
        except:
            print "Port error."
            usage()
            sys.exit()
    else:
        port = 8077
    proxy_addr = ""
    if len(sys.argv) >= 4:
        proxy_addr = sys.argv[2]
        try:
            proxy_port = int(sys.argv[3])
        except:
            print "Proxy port error."
            usage()
            sys.exit()
    if proxy_addr:
        proxy = WebProxy(("localhost", port), (proxy_addr, proxy_port))
        print "Listening on %s:%d\nForwarding to %s:%d" % ("localhost",
port, proxy_addr, proxy_port)
    else:
        proxy = WebProxy(("localhost", port))
        print "Listening on %s:%d" % ("localhost", port)
        
    proxy.serve_forever()

-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
 *** Usenet.com - The #1 Usenet Newsgroup Service on The Planet! ***
                      http://www.usenet.com
Unlimited Download - 19 Seperate Servers - 90,000 groups - Uncensored
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=



More information about the Python-list mailing list