[Python-checkins] cpython (merge 3.1 -> 3.2): merge from 3.1

senthil.kumaran python-checkins at python.org
Fri Apr 15 12:22:35 CEST 2011


http://hg.python.org/cpython/rev/5423aaefadbb
changeset:   69367:5423aaefadbb
branch:      3.2
parent:      69363:40656d8ae2c6
parent:      69366:495d12196487
user:        Senthil Kumaran <orsenthil at gmail.com>
date:        Fri Apr 15 18:21:26 2011 +0800
summary:
  merge from 3.1

files:
  Lib/test/test_urlparse.py |   5 +++++
  Lib/urllib/parse.py       |  15 ++++++++++-----
  Misc/NEWS                 |   3 +++
  3 files changed, 18 insertions(+), 5 deletions(-)


diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -228,6 +228,11 @@
         #self.checkJoin(RFC1808_BASE, 'http:g', 'http:g')
         #self.checkJoin(RFC1808_BASE, 'http:', 'http:')
 
+    def test_RFC2368(self):
+        # Issue 11467: path that starts with a number is not parsed correctly
+        self.assertEqual(urllib.parse.urlparse('mailto:1337 at example.org'),
+                ('mailto', '', '1337 at example.org', '', '', ''))
+
     def test_RFC2396(self):
         # cases from RFC 2396
 
diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py
--- a/Lib/urllib/parse.py
+++ b/Lib/urllib/parse.py
@@ -340,12 +340,17 @@
             v = SplitResult(scheme, netloc, url, query, fragment)
             _parse_cache[key] = v
             return _coerce_result(v)
-        if url.endswith(':') or not url[i+1].isdigit():
-            for c in url[:i]:
-                if c not in scheme_chars:
-                    break
-            else:
+        for c in url[:i]:
+            if c not in scheme_chars:
+                break
+        else:
+            try:
+                # make sure "url" is not actually a port number (in which case
+                # "scheme" is really part of the path
+                _testportnum = int(url[i+1:])
+            except ValueError:
                 scheme, url = url[:i].lower(), url[i+1:]
+
     if url[:2] == '//':
         netloc, url = _splitnetloc(url, 2)
         if (('[' in netloc and ']' not in netloc) or
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -56,6 +56,9 @@
 Library
 -------
 
+- Issue #11467: Fix urlparse behavior when handling urls which contains scheme 
+  specific part only digits. Patch by Santoso Wijaya.
+
 - Issue #11474: Fix the bug with url2pathname() handling of '/C|/' on Windows.
   Patch by Santoso Wijaya.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list