<html>
<body>
<font color="#800000">I was playing with some simple code I found for a
proxy server, made some mods to trap exceptions it had etc, but still
have odd behavior; one that (with Firefox at least) when the browser
proxy is set to 127.0.0.1:8080, DNS seems to be cached, or ?.<br>
I run the script locally and browse through it to a web site - mostly OK.
But sometimes going to a new site just brings up the home page of the
last site, ie, the base URL. I tried forcing gc, but no help there. The
script seems to take a request for a new URL and simple return the host
of the last URL.<br><br>
Also, craigslist.org fails entirely.<br>
Suggestions?<br>
Links to better proxy code to study?<br><br>
-Ray<br><br>
<br><br>
</font><font size=2>==============================================<br>
# -*- coding: cp1252 -*-<br>
# <PythonProxy.py><br>
#<br>
#Copyright (c) <2009> <Fábio Domingues - fnds3000 in
gmail.com><br>
#<br>
#Permission is hereby granted, free of charge, to any person<br>
#obtaining a copy of this software and associated documentation<br>
#files (the "Software"), to deal in the Software without<br>
#restriction, including without limitation the rights to use,<br>
#copy, modify, merge, publish, distribute, sublicense, and/or sell<br>
#copies of the Software, and to permit persons to whom the<br>
#Software is furnished to do so, subject to the following<br>
#conditions:<br>
#<br>
#The above copyright notice and this permission notice shall be<br>
#included in all copies or substantial portions of the Software.<br>
#<br>
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
KIND,<br>
#EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES<br>
#OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND<br>
#NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT<br>
#HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,<br>
#WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING<br>
#FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR<br>
#OTHER DEALINGS IN THE SOFTWARE.<br><br>
"""\<br>
Copyright (c) <2009> <Fábio Domingues - fnds3000 in
gmail.com> <MIT Licence><br><br>
**************************************<br>
*** Python Proxy - A Fast HTTP proxy ***<br>
**************************************<br><br>
Neste momento este proxy é um Elie Proxy.<br><br>
Suporta os métodos HTTP:<br>
- OPTIONS;<br>
- GET;<br>
- HEAD;<br>
- POST;<br>
- PUT;<br>
- DELETE;<br>
- TRACE;<br>
- CONENCT.<br><br>
Suporta:<br>
- Conexões dos cliente em IPv4 ou IPv6;<br>
- Conexões ao alvo em IPv4 e IPv6;<br>
- Conexões todo o tipo de transmissão de dados TCP (CONNECT
tunneling),<br>
p.e. ligações SSL, como é o caso do
HTTPS.<br><br>
A fazer:<br>
- Verificar se o input vindo do cliente está correcto;<br>
- Enviar os devidos HTTP erros se não, ou simplesmente
quebrar a ligação;<br>
- Criar um gestor de erros;<br>
- Criar ficheiro log de erros;<br>
- Colocar excepções nos sítios onde é previsível a ocorrência de
erros,<br>
p.e.sockets e ficheiros;<br>
- Rever tudo e melhorar a estrutura do programar e colocar nomes
adequados nas<br>
variáveis e métodos;<br>
- Comentar o programa decentemente;<br>
- Doc Strings.<br><br>
Funcionalidades futuras:<br>
- Adiconar a funcionalidade de proxy anónimo e transparente;<br>
- Suportar FTP?.<br><br>
<br>
(!) Atenção o que se segue só tem efeito em conexões não CONNECT, para
estas o<br>
proxy é sempre Elite.<br><br>
Qual a diferença entre um proxy Elite, Anónimo e Transparente?<br>
- Um proxy elite é totalmente anónimo, o servidor que o recebe não
consegue ter<br>
conhecimento da existência do proxy e não recebe
o endereço IP do cliente;<br>
- Quando é usado um proxy anónimo o servidor sabe que o cliente
está a usar um<br>
proxy mas não sabe o endereço IP do
cliente;<br>
É enviado o cabeçalho HTTP
"Proxy-agent".<br>
- Um proxy transparente fornece ao servidor o IP do cliente e um
informação que<br>
se está a usar um proxy.<br>
São enviados os cabeçalhos HTTP
"Proxy-agent" e "HTTP_X_FORWARDED_FOR".<br><br>
"""<br><br>
import socket, thread, select, sys, gc<br><br>
__version__ = '0.1.0 Draft 1'<br>
BUFLEN = 4196 #8192<br>
VERSION = 'Python Proxy/'+__version__<br>
HTTPVER = 'HTTP/1.1'<br><br>
class ConnectionHandler:<br>
def __init__(self, connection, address, timeout):<br>
self.client = connection<br>
self.client_buffer = ''<br>
self.timeout = timeout<br>
self.method, self.path,
self.protocol = self.get_base_header()<br>
if
self.method=='CONNECT':<br>
self.method_CONNECT()<br>
elif self.method in
('OPTIONS', 'GET', 'HEAD', 'POST', 'PUT',<br>
'DELETE', 'TRACE'):<br>
self.method_others()<br>
#print ' closing
client', self.client<br>
self.client.close()<br>
#print ' closing
target', self.target<br>
self.target.close()<br>
#print ' closed'#,
self.client, self.target<br><br>
def get_base_header(self):<br>
while 1:<br>
self.client_buffer += self.client.recv(BUFLEN)<br>
end =
self.client_buffer.find('\n')<br>
if
end!=-1:<br>
break<br>
#print
'%s'%self.client_buffer[:end]#debug<br>
print ' bs
%s...'%self.client_buffer[:40]#debug<br>
data =
(self.client_buffer[:end+1]).split()<br>
self.client_buffer =
self.client_buffer[end+1:]<br>
return data<br><br>
def method_CONNECT(self):<br>
self._connect_target(self.path)<br>
self.client.send(HTTPVER+' 200
Connection established\n'+<br>
'Proxy-agent: %s\n\n'%VERSION)<br>
self.client_buffer = ''<br>
print "CONNECT",
self.path[:35]<br>
self._read_write() <br><br>
def method_others(self):<br>
self.path = self.path[7:]<br>
i = self.path.find('/')<br>
host =
self.path[:i] <br>
path = self.path[i:]<br>
self._connect_target(host)<br>
self.target.send('%s %s
%s\n'%(self.method, path, self.protocol)+<br>
self.client_buffer)<br>
self.client_buffer = ''<br>
#print " M",
self.method, host, 'pth:', self.path[:10]<br>
self._read_write()<br><br>
def _connect_target(self, host):<br>
i = host.find(':')<br>
if i!=-1:<br>
port =
int(host[i+1:])<br>
host =
host[:i]<br>
else:<br>
port =
80<br>
(soc_family, _, _, _, address)
= socket.getaddrinfo(host, port)[0]<br>
self.target =
socket.socket(soc_family)<br>
self.target.connect(address)<br><br>
def _read_write(self):<br>
time_out_max =
self.timeout#/3<br>
socs = [self.client,
self.target]<br>
count = 0<br>
data = ''<br>
while 1:<br>
count
+= 1<br>
(recv,
_, error) = select.select(socs, [], socs, 3)<br>
if
error:<br>
break<br>
if
recv:<br>
for in_ in recv:<br>
try:<br>
data = in_.recv(BUFLEN)<br>
#print " OK:recv data:'%s'..., len:%d,
BUF:%d" % \<br>
# (data[11:35], len(data), BUFLEN)<br>
except socket.error, (errno, strerror):<br>
if errno!=10053 and errno!=10054: ## 'Software caused connection abort'
or reset<br>
print " recv error", (errno, strerror)<br>
print " data:'%s'..., len:%d,
BUF:%d" % \<br>
(data[11:35], len(data), BUFLEN)<br>
pass<br>
else:<br>
break<br>
except:<br>
print ' ERR', sys.exc_info()[0]<br>
if in_ is self.client:<br>
out = self.target<br>
else:<br>
out = self.client<br>
if data:<br>
try:<br>
out.send(data)<br>
except socket.error, (errno, strerror):<br>
if errno!=10053 and errno!=10054: ## 'Software caused connection abort'
or reset<br>
print " send error", (errno, strerror)<br>
print " data:'%s'..., len:%d" %
\<br>
(data[:35], len(data))<br>
pass<br>
else:<br>
break<br>
except:<br>
print ' ERR', sys.exc_info()[0]<br>
count = 0<br>
if
count == time_out_max:<br>
break<br><br>
def start_server(host='localhost', port=8080, IPv6=False,
timeout=10,<br>
handler=ConnectionHandler):<br>
gc.enable()<br>
if IPv6==True:<br>
soc_type=socket.AF_INET6<br>
else:<br>
soc_type=socket.AF_INET<br>
socket.setdefaulttimeout(3) <br>
soc = socket.socket(soc_type)<br>
soc.setblocking(1) <br>
#print dir(soc)<br>
soc.bind((host, port))<br>
print "Serving on %s:%d."%(host,
port)#debug<br>
soc.listen(0)<br>
while 1:<br>
print 'new thread, collected',
gc.collect(), gc.garbage<br>
thread.start_new_thread(handler, soc.accept()+(timeout,))<br><br>
if __name__ == '__main__':<br>
start_server()<br>
</font></body>
</html>