[Jython-checkins] jython: Import latest urllib.py from cpython 2.7:
alan.kennedy
jython-checkins at python.org
Sat Feb 2 14:54:28 CET 2013
http://hg.python.org/jython/rev/52f8fb53c414
changeset: 6977:52f8fb53c414
user: Alan Kennedy <alan at xhaus.com>
date: Sat Feb 02 13:46:56 2013 +0000
summary:
Import latest urllib.py from cpython 2.7: http://hg.python.org/cpython/file/b6b707063991/Lib/urllib.py
files:
Lib/urllib.py | 226 ++++++++++++++-----------------------
1 files changed, 89 insertions(+), 137 deletions(-)
diff --git a/Lib/urllib.py b/Lib/urllib.py
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -27,6 +27,8 @@
import os
import time
import sys
+import base64
+
from urlparse import urljoin as basejoin
__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
@@ -42,9 +44,7 @@
MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
# Helper for non-unix systems
-if os.name == 'mac':
- from macurl2path import url2pathname, pathname2url
-elif (os._name if sys.platform.startswith('java') else os.name) == 'nt':
+if os.name == 'nt':
from nturl2path import url2pathname, pathname2url
elif os.name == 'riscos':
from rourl2path import url2pathname, pathname2url
@@ -94,7 +94,7 @@
def urlcleanup():
if _urlopener:
_urlopener.cleanup()
- _safemaps.clear()
+ _safe_quoters.clear()
ftpcache.clear()
# check for SSL
@@ -177,8 +177,8 @@
def open(self, fullurl, data=None):
"""Use URLopener().open(file) instead of open(file, 'r')."""
fullurl = unwrap(toBytes(fullurl))
- # percent encode url. fixing lame server errors like space within url
- # parts
+ # percent encode url, fixing lame server errors for e.g, like space
+ # within url paths.
fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
if self.tempcache and fullurl in self.tempcache:
filename, headers = self.tempcache[fullurl]
@@ -232,9 +232,9 @@
try:
fp = self.open_local_file(url1)
hdrs = fp.info()
- del fp
+ fp.close()
return url2pathname(splithost(url1)[1]), hdrs
- except IOError, msg:
+ except IOError:
pass
fp = self.open(url, data)
try:
@@ -259,9 +259,9 @@
size = -1
read = 0
blocknum = 0
+ if "content-length" in headers:
+ size = int(headers["Content-Length"])
if reporthook:
- if "content-length" in headers:
- size = int(headers["Content-Length"])
reporthook(blocknum, bs, size)
while 1:
block = fp.read(bs)
@@ -276,8 +276,6 @@
tfp.close()
finally:
fp.close()
- del fp
- del tfp
# raise exception if actual size does not match content-length header
if size >= 0 and read < size:
@@ -322,13 +320,13 @@
if not host: raise IOError, ('http error', 'no host given')
if proxy_passwd:
- import base64
+ proxy_passwd = unquote(proxy_passwd)
proxy_auth = base64.b64encode(proxy_passwd).strip()
else:
proxy_auth = None
if user_passwd:
- import base64
+ user_passwd = unquote(user_passwd)
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
@@ -343,9 +341,7 @@
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
- h.endheaders()
- if data is not None:
- h.send(data)
+ h.endheaders(data)
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == -1:
@@ -380,7 +376,6 @@
def http_error_default(self, url, fp, errcode, errmsg, headers):
"""Default error handler: close the connection and raise IOError."""
- void = fp.read()
fp.close()
raise IOError, ('http error', errcode, errmsg, headers)
@@ -415,12 +410,12 @@
#print "proxy via https:", host, selector
if not host: raise IOError, ('https error', 'no host given')
if proxy_passwd:
- import base64
+ proxy_passwd = unquote(proxy_passwd)
proxy_auth = base64.b64encode(proxy_passwd).strip()
else:
proxy_auth = None
if user_passwd:
- import base64
+ user_passwd = unquote(user_passwd)
auth = base64.b64encode(user_passwd).strip()
else:
auth = None
@@ -438,9 +433,7 @@
if auth: h.putheader('Authorization', 'Basic %s' % auth)
if realhost: h.putheader('Host', realhost)
for args in self.addheaders: h.putheader(*args)
- h.endheaders()
- if data is not None:
- h.send(data)
+ h.endheaders(data)
errcode, errmsg, headers = h.getreply()
fp = h.getfile()
if errcode == -1:
@@ -491,6 +484,8 @@
urlfile = file
if file[:1] == '/':
urlfile = 'file://' + file
+ elif file[:2] == './':
+ raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
return addinfourl(open(localname, 'rb'),
headers, urlfile)
host, port = splitport(host)
@@ -519,8 +514,8 @@
if user: user, passwd = splitpasswd(user)
else: passwd = None
host = unquote(host)
- user = unquote(user or '')
- passwd = unquote(passwd or '')
+ user = user or ''
+ passwd = passwd or ''
host = socket.gethostbyname(host)
if not port:
import ftplib
@@ -598,7 +593,6 @@
time.gmtime(time.time())))
msg.append('Content-type: %s' % type)
if encoding == 'base64':
- import base64
data = base64.decodestring(data)
else:
data = unquote(data)
@@ -648,7 +642,6 @@
newurl = headers['uri']
else:
return
- void = fp.read()
fp.close()
# In case the server sent a relative URL, join with original:
newurl = basejoin(self.type + ":" + url, newurl)
@@ -785,7 +778,7 @@
else:
return self.open(newurl, data)
- def get_user_passwd(self, host, realm, clear_cache = 0):
+ def get_user_passwd(self, host, realm, clear_cache=0):
key = realm + '@' + host.lower()
if key in self.auth_cache:
if clear_cache:
@@ -858,13 +851,16 @@
"""Class used by open_ftp() for cache of open FTP connections."""
def __init__(self, user, passwd, host, port, dirs,
- timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+ persistent=True):
self.user = user
self.passwd = passwd
self.host = host
self.port = port
self.dirs = dirs
self.timeout = timeout
+ self.refcount = 0
+ self.keepalive = persistent
self.init()
def init(self):
@@ -891,7 +887,7 @@
# Try to retrieve as a file
try:
cmd = 'RETR ' + file
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
except ftplib.error_perm, reason:
if str(reason)[:3] != '550':
raise IOError, ('ftp error', reason), sys.exc_info()[2]
@@ -911,11 +907,14 @@
cmd = 'LIST ' + file
else:
cmd = 'LIST'
- conn = self.ftp.ntransfercmd(cmd)
+ conn, retrlen = self.ftp.ntransfercmd(cmd)
self.busy = 1
+ ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
+ self.refcount += 1
+ conn.close()
# Pass back both a suitably decorated object and a retrieval length
- return (addclosehook(conn[0].makefile('rb'),
- self.endtransfer), conn[1])
+ return (ftpobj, retrlen)
+
def endtransfer(self):
if not self.busy:
return
@@ -926,6 +925,17 @@
pass
def close(self):
+ self.keepalive = False
+ if self.refcount <= 0:
+ self.real_close()
+
+ def file_close(self):
+ self.endtransfer()
+ self.refcount -= 1
+ if self.refcount <= 0 and not self.keepalive:
+ self.real_close()
+
+ def real_close(self):
self.endtransfer()
try:
self.ftp.close()
@@ -970,11 +980,11 @@
self.hookargs = hookargs
def close(self):
- addbase.close(self)
if self.closehook:
self.closehook(*self.hookargs)
self.closehook = None
self.hookargs = None
+ addbase.close(self)
class addinfo(addbase):
"""class to add an info() method to an open file."""
@@ -1072,7 +1082,6 @@
_hostprog = re.compile('^//([^/?]*)(.*)$')
match = _hostprog.match(url)
- # http://bugs.python.org/issue4493
if match:
host_port = match.group(1)
path = match.group(2)
@@ -1090,7 +1099,7 @@
_userprog = re.compile('^(.*)@(.*)$')
match = _userprog.match(host)
- if match: return map(unquote, match.group(1, 2))
+ if match: return match.group(1, 2)
return None, host
_passwdprog = None
@@ -1099,7 +1108,7 @@
global _passwdprog
if _passwdprog is None:
import re
- _passwdprog = re.compile('^([^:]*):(.*)$')
+ _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
match = _passwdprog.match(user)
if match: return match.group(1, 2)
@@ -1182,21 +1191,29 @@
if match: return match.group(1, 2)
return attr, None
+# urlparse contains a duplicate of this method to avoid a circular import. If
+# you update this method, also update the copy in urlparse. This code
+# duplication does not exist in Python3.
+
_hexdig = '0123456789ABCDEFabcdef'
-_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
+_hextochr = dict((a + b, chr(int(a + b, 16)))
+ for a in _hexdig for b in _hexdig)
def unquote(s):
"""unquote('abc%20def') -> 'abc def'."""
res = s.split('%')
- for i in xrange(1, len(res)):
- item = res[i]
+ # fastpath
+ if len(res) == 1:
+ return s
+ s = res[0]
+ for item in res[1:]:
try:
- res[i] = _hextochr[item[:2]] + item[2:]
+ s += _hextochr[item[:2]] + item[2:]
except KeyError:
- res[i] = '%' + item
+ s += '%' + item
except UnicodeDecodeError:
- res[i] = unichr(int(item[:2], 16)) + item[2:]
- return "".join(res)
+ s += unichr(int(item[:2], 16)) + item[2:]
+ return s
def unquote_plus(s):
"""unquote('%7e/abc+def') -> '~/abc def'"""
@@ -1206,9 +1223,12 @@
always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
'abcdefghijklmnopqrstuvwxyz'
'0123456789' '_.-')
-_safemaps = {}
+_safe_map = {}
+for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
+ _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
+_safe_quoters = {}
-def quote(s, safe = '/'):
+def quote(s, safe='/'):
"""quote('abc def') -> 'abc%20def'
Each part of a URL, e.g. the path info, the query, etc., has a
@@ -1229,27 +1249,32 @@
called on a path where the existing slash characters are used as
reserved characters.
"""
+ # fastpath
+ if not s:
+ if s is None:
+ raise TypeError('None object cannot be quoted')
+ return s
cachekey = (safe, always_safe)
try:
- safe_map = _safemaps[cachekey]
+ (quoter, safe) = _safe_quoters[cachekey]
except KeyError:
- safe += always_safe
- safe_map = {}
- for i in range(256):
- c = chr(i)
- safe_map[c] = (c in safe) and c or ('%%%02X' % i)
- _safemaps[cachekey] = safe_map
- res = map(safe_map.__getitem__, s)
- return ''.join(res)
+ safe_map = _safe_map.copy()
+ safe_map.update([(c, c) for c in safe])
+ quoter = safe_map.__getitem__
+ safe = always_safe + safe
+ _safe_quoters[cachekey] = (quoter, safe)
+ if not s.rstrip(safe):
+ return s
+ return ''.join(map(quoter, s))
-def quote_plus(s, safe = ''):
+def quote_plus(s, safe=''):
"""Quote the query fragment of a URL; replacing ' ' with '+'"""
if ' ' in s:
s = quote(s, safe + ' ')
return s.replace(' ', '+')
return quote(s, safe)
-def urlencode(query,doseq=0):
+def urlencode(query, doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
@@ -1301,7 +1326,7 @@
else:
try:
# is this a sufficient test for sequence-ness?
- x = len(v)
+ len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
@@ -1342,7 +1367,8 @@
# strip port off host
hostonly, port = splitport(host)
# check if the host ends with any of the DNS suffixes
- for name in no_proxy.split(','):
+ no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
+ for name in no_proxy_list:
if name and (hostonly.endswith(name) or host.endswith(name)):
return 1
# otherwise, don't bypass
@@ -1401,7 +1427,7 @@
else:
mask = int(mask[1:])
- mask = 32 - mask
+ mask = 32 - mask
if (hostIP >> mask) == (base >> mask):
return True
@@ -1411,7 +1437,6 @@
return False
-
def getproxies_macosx_sysconf():
"""Return a dictionary of scheme -> proxy server URL mappings.
@@ -1420,8 +1445,6 @@
"""
return _get_proxies()
-
-
def proxy_bypass(host):
if getproxies_environment():
return proxy_bypass_environment(host)
@@ -1525,18 +1548,11 @@
# '<local>' string by the localhost entry and the corresponding
# canonical entry.
proxyOverride = proxyOverride.split(';')
- i = 0
- while i < len(proxyOverride):
- if proxyOverride[i] == '<local>':
- proxyOverride[i:i+1] = ['localhost',
- '127.0.0.1',
- socket.gethostname(),
- socket.gethostbyname(
- socket.gethostname())]
- i += 1
- # print proxyOverride
# now check if we match one of the registry values.
for test in proxyOverride:
+ if test == '<local>':
+ if '.' not in rawHost:
+ return 1
test = test.replace(".", r"\.") # mask dots
test = test.replace("*", r".*") # change glob sequence
test = test.replace("?", r".") # change glob char
@@ -1584,67 +1600,3 @@
# Report during remote transfers
print "Block number: %d, Block size: %d, Total size: %d" % (
blocknum, blocksize, totalsize)
-
-# Test program
-def test(args=[]):
- if not args:
- args = [
- '/etc/passwd',
- 'file:/etc/passwd',
- 'file://localhost/etc/passwd',
- 'ftp://ftp.gnu.org/pub/README',
- 'http://www.python.org/index.html',
- ]
- if hasattr(URLopener, "open_https"):
- args.append('https://synergy.as.cmu.edu/~geek/')
- try:
- for url in args:
- print '-'*10, url, '-'*10
- fn, h = urlretrieve(url, None, reporthook)
- print fn
- if h:
- print '======'
- for k in h.keys(): print k + ':', h[k]
- print '======'
- fp = open(fn, 'rb')
- data = fp.read()
- del fp
- if '\r' in data:
- table = string.maketrans("", "")
- data = data.translate(table, "\r")
- print data
- fn, h = None, None
- print '-'*40
- finally:
- urlcleanup()
-
-def main():
- import getopt, sys
- try:
- opts, args = getopt.getopt(sys.argv[1:], "th")
- except getopt.error, msg:
- print msg
- print "Use -h for help"
- return
- t = 0
- for o, a in opts:
- if o == '-t':
- t = t + 1
- if o == '-h':
- print "Usage: python urllib.py [-t] [url ...]"
- print "-t runs self-test;",
- print "otherwise, contents of urls are printed"
- return
- if t:
- if t > 1:
- test1()
- test(args)
- else:
- if not args:
- print "Use -h for help"
- for url in args:
- print urlopen(url).read(),
-
-# Run test program when run as a script
-if __name__ == '__main__':
- main()
--
Repository URL: http://hg.python.org/jython
More information about the Jython-checkins
mailing list