[Jython-checkins] jython: Import latest urllib.py from cpython 2.7:

alan.kennedy jython-checkins at python.org
Sat Feb 2 14:54:28 CET 2013


http://hg.python.org/jython/rev/52f8fb53c414
changeset:   6977:52f8fb53c414
user:        Alan Kennedy <alan at xhaus.com>
date:        Sat Feb 02 13:46:56 2013 +0000
summary:
  Import latest urllib.py from cpython 2.7: http://hg.python.org/cpython/file/b6b707063991/Lib/urllib.py

files:
  Lib/urllib.py |  226 ++++++++++++++-----------------------
  1 files changed, 89 insertions(+), 137 deletions(-)


diff --git a/Lib/urllib.py b/Lib/urllib.py
--- a/Lib/urllib.py
+++ b/Lib/urllib.py
@@ -27,6 +27,8 @@
 import os
 import time
 import sys
+import base64
+
 from urlparse import urljoin as basejoin
 
 __all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
@@ -42,9 +44,7 @@
 MAXFTPCACHE = 10        # Trim the ftp cache beyond this size
 
 # Helper for non-unix systems
-if os.name == 'mac':
-    from macurl2path import url2pathname, pathname2url
-elif (os._name if sys.platform.startswith('java') else os.name) == 'nt':
+if os.name == 'nt':
     from nturl2path import url2pathname, pathname2url
 elif os.name == 'riscos':
     from rourl2path import url2pathname, pathname2url
@@ -94,7 +94,7 @@
 def urlcleanup():
     if _urlopener:
         _urlopener.cleanup()
-    _safemaps.clear()
+    _safe_quoters.clear()
     ftpcache.clear()
 
 # check for SSL
@@ -177,8 +177,8 @@
     def open(self, fullurl, data=None):
         """Use URLopener().open(file) instead of open(file, 'r')."""
         fullurl = unwrap(toBytes(fullurl))
-        # percent encode url. fixing lame server errors like space within url
-        # parts
+        # percent encode url, fixing lame server errors for e.g, like space
+        # within url paths.
         fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
         if self.tempcache and fullurl in self.tempcache:
             filename, headers = self.tempcache[fullurl]
@@ -232,9 +232,9 @@
             try:
                 fp = self.open_local_file(url1)
                 hdrs = fp.info()
-                del fp
+                fp.close()
                 return url2pathname(splithost(url1)[1]), hdrs
-            except IOError, msg:
+            except IOError:
                 pass
         fp = self.open(url, data)
         try:
@@ -259,9 +259,9 @@
                 size = -1
                 read = 0
                 blocknum = 0
+                if "content-length" in headers:
+                    size = int(headers["Content-Length"])
                 if reporthook:
-                    if "content-length" in headers:
-                        size = int(headers["Content-Length"])
                     reporthook(blocknum, bs, size)
                 while 1:
                     block = fp.read(bs)
@@ -276,8 +276,6 @@
                 tfp.close()
         finally:
             fp.close()
-        del fp
-        del tfp
 
         # raise exception if actual size does not match content-length header
         if size >= 0 and read < size:
@@ -322,13 +320,13 @@
         if not host: raise IOError, ('http error', 'no host given')
 
         if proxy_passwd:
-            import base64
+            proxy_passwd = unquote(proxy_passwd)
             proxy_auth = base64.b64encode(proxy_passwd).strip()
         else:
             proxy_auth = None
 
         if user_passwd:
-            import base64
+            user_passwd = unquote(user_passwd)
             auth = base64.b64encode(user_passwd).strip()
         else:
             auth = None
@@ -343,9 +341,7 @@
         if auth: h.putheader('Authorization', 'Basic %s' % auth)
         if realhost: h.putheader('Host', realhost)
         for args in self.addheaders: h.putheader(*args)
-        h.endheaders()
-        if data is not None:
-            h.send(data)
+        h.endheaders(data)
         errcode, errmsg, headers = h.getreply()
         fp = h.getfile()
         if errcode == -1:
@@ -380,7 +376,6 @@
 
     def http_error_default(self, url, fp, errcode, errmsg, headers):
         """Default error handler: close the connection and raise IOError."""
-        void = fp.read()
         fp.close()
         raise IOError, ('http error', errcode, errmsg, headers)
 
@@ -415,12 +410,12 @@
                 #print "proxy via https:", host, selector
             if not host: raise IOError, ('https error', 'no host given')
             if proxy_passwd:
-                import base64
+                proxy_passwd = unquote(proxy_passwd)
                 proxy_auth = base64.b64encode(proxy_passwd).strip()
             else:
                 proxy_auth = None
             if user_passwd:
-                import base64
+                user_passwd = unquote(user_passwd)
                 auth = base64.b64encode(user_passwd).strip()
             else:
                 auth = None
@@ -438,9 +433,7 @@
             if auth: h.putheader('Authorization', 'Basic %s' % auth)
             if realhost: h.putheader('Host', realhost)
             for args in self.addheaders: h.putheader(*args)
-            h.endheaders()
-            if data is not None:
-                h.send(data)
+            h.endheaders(data)
             errcode, errmsg, headers = h.getreply()
             fp = h.getfile()
             if errcode == -1:
@@ -491,6 +484,8 @@
             urlfile = file
             if file[:1] == '/':
                 urlfile = 'file://' + file
+            elif file[:2] == './':
+                raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
             return addinfourl(open(localname, 'rb'),
                               headers, urlfile)
         host, port = splitport(host)
@@ -519,8 +514,8 @@
         if user: user, passwd = splitpasswd(user)
         else: passwd = None
         host = unquote(host)
-        user = unquote(user or '')
-        passwd = unquote(passwd or '')
+        user = user or ''
+        passwd = passwd or ''
         host = socket.gethostbyname(host)
         if not port:
             import ftplib
@@ -598,7 +593,6 @@
                                             time.gmtime(time.time())))
         msg.append('Content-type: %s' % type)
         if encoding == 'base64':
-            import base64
             data = base64.decodestring(data)
         else:
             data = unquote(data)
@@ -648,7 +642,6 @@
             newurl = headers['uri']
         else:
             return
-        void = fp.read()
         fp.close()
         # In case the server sent a relative URL, join with original:
         newurl = basejoin(self.type + ":" + url, newurl)
@@ -785,7 +778,7 @@
         else:
             return self.open(newurl, data)
 
-    def get_user_passwd(self, host, realm, clear_cache = 0):
+    def get_user_passwd(self, host, realm, clear_cache=0):
         key = realm + '@' + host.lower()
         if key in self.auth_cache:
             if clear_cache:
@@ -858,13 +851,16 @@
     """Class used by open_ftp() for cache of open FTP connections."""
 
     def __init__(self, user, passwd, host, port, dirs,
-                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+                 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+                 persistent=True):
         self.user = user
         self.passwd = passwd
         self.host = host
         self.port = port
         self.dirs = dirs
         self.timeout = timeout
+        self.refcount = 0
+        self.keepalive = persistent
         self.init()
 
     def init(self):
@@ -891,7 +887,7 @@
             # Try to retrieve as a file
             try:
                 cmd = 'RETR ' + file
-                conn = self.ftp.ntransfercmd(cmd)
+                conn, retrlen = self.ftp.ntransfercmd(cmd)
             except ftplib.error_perm, reason:
                 if str(reason)[:3] != '550':
                     raise IOError, ('ftp error', reason), sys.exc_info()[2]
@@ -911,11 +907,14 @@
                 cmd = 'LIST ' + file
             else:
                 cmd = 'LIST'
-            conn = self.ftp.ntransfercmd(cmd)
+            conn, retrlen = self.ftp.ntransfercmd(cmd)
         self.busy = 1
+        ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
+        self.refcount += 1
+        conn.close()
         # Pass back both a suitably decorated object and a retrieval length
-        return (addclosehook(conn[0].makefile('rb'),
-                             self.endtransfer), conn[1])
+        return (ftpobj, retrlen)
+
     def endtransfer(self):
         if not self.busy:
             return
@@ -926,6 +925,17 @@
             pass
 
     def close(self):
+        self.keepalive = False
+        if self.refcount <= 0:
+            self.real_close()
+
+    def file_close(self):
+        self.endtransfer()
+        self.refcount -= 1
+        if self.refcount <= 0 and not self.keepalive:
+            self.real_close()
+
+    def real_close(self):
         self.endtransfer()
         try:
             self.ftp.close()
@@ -970,11 +980,11 @@
         self.hookargs = hookargs
 
     def close(self):
-        addbase.close(self)
         if self.closehook:
             self.closehook(*self.hookargs)
             self.closehook = None
             self.hookargs = None
+        addbase.close(self)
 
 class addinfo(addbase):
     """class to add an info() method to an open file."""
@@ -1072,7 +1082,6 @@
         _hostprog = re.compile('^//([^/?]*)(.*)$')
 
     match = _hostprog.match(url)
-    # http://bugs.python.org/issue4493
     if match:
         host_port = match.group(1)
         path = match.group(2)
@@ -1090,7 +1099,7 @@
         _userprog = re.compile('^(.*)@(.*)$')
 
     match = _userprog.match(host)
-    if match: return map(unquote, match.group(1, 2))
+    if match: return match.group(1, 2)
     return None, host
 
 _passwdprog = None
@@ -1099,7 +1108,7 @@
     global _passwdprog
     if _passwdprog is None:
         import re
-        _passwdprog = re.compile('^([^:]*):(.*)$')
+        _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
 
     match = _passwdprog.match(user)
     if match: return match.group(1, 2)
@@ -1182,21 +1191,29 @@
     if match: return match.group(1, 2)
     return attr, None
 
+# urlparse contains a duplicate of this method to avoid a circular import.  If
+# you update this method, also update the copy in urlparse.  This code
+# duplication does not exist in Python3.
+
 _hexdig = '0123456789ABCDEFabcdef'
-_hextochr = dict((a+b, chr(int(a+b,16))) for a in _hexdig for b in _hexdig)
+_hextochr = dict((a + b, chr(int(a + b, 16)))
+                 for a in _hexdig for b in _hexdig)
 
 def unquote(s):
     """unquote('abc%20def') -> 'abc def'."""
     res = s.split('%')
-    for i in xrange(1, len(res)):
-        item = res[i]
+    # fastpath
+    if len(res) == 1:
+        return s
+    s = res[0]
+    for item in res[1:]:
         try:
-            res[i] = _hextochr[item[:2]] + item[2:]
+            s += _hextochr[item[:2]] + item[2:]
         except KeyError:
-            res[i] = '%' + item
+            s += '%' + item
         except UnicodeDecodeError:
-            res[i] = unichr(int(item[:2], 16)) + item[2:]
-    return "".join(res)
+            s += unichr(int(item[:2], 16)) + item[2:]
+    return s
 
 def unquote_plus(s):
     """unquote('%7e/abc+def') -> '~/abc def'"""
@@ -1206,9 +1223,12 @@
 always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
                'abcdefghijklmnopqrstuvwxyz'
                '0123456789' '_.-')
-_safemaps = {}
+_safe_map = {}
+for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
+    _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
+_safe_quoters = {}
 
-def quote(s, safe = '/'):
+def quote(s, safe='/'):
     """quote('abc def') -> 'abc%20def'
 
     Each part of a URL, e.g. the path info, the query, etc., has a
@@ -1229,27 +1249,32 @@
     called on a path where the existing slash characters are used as
     reserved characters.
     """
+    # fastpath
+    if not s:
+        if s is None:
+            raise TypeError('None object cannot be quoted')
+        return s
     cachekey = (safe, always_safe)
     try:
-        safe_map = _safemaps[cachekey]
+        (quoter, safe) = _safe_quoters[cachekey]
     except KeyError:
-        safe += always_safe
-        safe_map = {}
-        for i in range(256):
-            c = chr(i)
-            safe_map[c] = (c in safe) and c or ('%%%02X' % i)
-        _safemaps[cachekey] = safe_map
-    res = map(safe_map.__getitem__, s)
-    return ''.join(res)
+        safe_map = _safe_map.copy()
+        safe_map.update([(c, c) for c in safe])
+        quoter = safe_map.__getitem__
+        safe = always_safe + safe
+        _safe_quoters[cachekey] = (quoter, safe)
+    if not s.rstrip(safe):
+        return s
+    return ''.join(map(quoter, s))
 
-def quote_plus(s, safe = ''):
+def quote_plus(s, safe=''):
     """Quote the query fragment of a URL; replacing ' ' with '+'"""
     if ' ' in s:
         s = quote(s, safe + ' ')
         return s.replace(' ', '+')
     return quote(s, safe)
 
-def urlencode(query,doseq=0):
+def urlencode(query, doseq=0):
     """Encode a sequence of two-element tuples or dictionary into a URL query string.
 
     If any values in the query arg are sequences and doseq is true, each
@@ -1301,7 +1326,7 @@
             else:
                 try:
                     # is this a sufficient test for sequence-ness?
-                    x = len(v)
+                    len(v)
                 except TypeError:
                     # not a sequence
                     v = quote_plus(str(v))
@@ -1342,7 +1367,8 @@
     # strip port off host
     hostonly, port = splitport(host)
     # check if the host ends with any of the DNS suffixes
-    for name in no_proxy.split(','):
+    no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
+    for name in no_proxy_list:
         if name and (hostonly.endswith(name) or host.endswith(name)):
             return 1
     # otherwise, don't bypass
@@ -1401,7 +1427,7 @@
 
                 else:
                     mask = int(mask[1:])
-                    mask = 32 - mask
+                mask = 32 - mask
 
                 if (hostIP >> mask) == (base >> mask):
                     return True
@@ -1411,7 +1437,6 @@
 
         return False
 
-
     def getproxies_macosx_sysconf():
         """Return a dictionary of scheme -> proxy server URL mappings.
 
@@ -1420,8 +1445,6 @@
         """
         return _get_proxies()
 
-
-
     def proxy_bypass(host):
         if getproxies_environment():
             return proxy_bypass_environment(host)
@@ -1525,18 +1548,11 @@
         # '<local>' string by the localhost entry and the corresponding
         # canonical entry.
         proxyOverride = proxyOverride.split(';')
-        i = 0
-        while i < len(proxyOverride):
-            if proxyOverride[i] == '<local>':
-                proxyOverride[i:i+1] = ['localhost',
-                                        '127.0.0.1',
-                                        socket.gethostname(),
-                                        socket.gethostbyname(
-                                            socket.gethostname())]
-            i += 1
-        # print proxyOverride
         # now check if we match one of the registry values.
         for test in proxyOverride:
+            if test == '<local>':
+                if '.' not in rawHost:
+                    return 1
             test = test.replace(".", r"\.")     # mask dots
             test = test.replace("*", r".*")     # change glob sequence
             test = test.replace("?", r".")      # change glob char
@@ -1584,67 +1600,3 @@
     # Report during remote transfers
     print "Block number: %d, Block size: %d, Total size: %d" % (
         blocknum, blocksize, totalsize)
-
-# Test program
-def test(args=[]):
-    if not args:
-        args = [
-            '/etc/passwd',
-            'file:/etc/passwd',
-            'file://localhost/etc/passwd',
-            'ftp://ftp.gnu.org/pub/README',
-            'http://www.python.org/index.html',
-            ]
-        if hasattr(URLopener, "open_https"):
-            args.append('https://synergy.as.cmu.edu/~geek/')
-    try:
-        for url in args:
-            print '-'*10, url, '-'*10
-            fn, h = urlretrieve(url, None, reporthook)
-            print fn
-            if h:
-                print '======'
-                for k in h.keys(): print k + ':', h[k]
-                print '======'
-            fp = open(fn, 'rb')
-            data = fp.read()
-            del fp
-            if '\r' in data:
-                table = string.maketrans("", "")
-                data = data.translate(table, "\r")
-            print data
-            fn, h = None, None
-        print '-'*40
-    finally:
-        urlcleanup()
-
-def main():
-    import getopt, sys
-    try:
-        opts, args = getopt.getopt(sys.argv[1:], "th")
-    except getopt.error, msg:
-        print msg
-        print "Use -h for help"
-        return
-    t = 0
-    for o, a in opts:
-        if o == '-t':
-            t = t + 1
-        if o == '-h':
-            print "Usage: python urllib.py [-t] [url ...]"
-            print "-t runs self-test;",
-            print "otherwise, contents of urls are printed"
-            return
-    if t:
-        if t > 1:
-            test1()
-        test(args)
-    else:
-        if not args:
-            print "Use -h for help"
-        for url in args:
-            print urlopen(url).read(),
-
-# Run test program when run as a script
-if __name__ == '__main__':
-    main()

-- 
Repository URL: http://hg.python.org/jython


More information about the Jython-checkins mailing list