[Python-checkins] CVS: python/dist/src/Lib urlparse.py,1.30,1.31

Thu, 15 Nov 2001 18:52:59 -0800

Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv17345/Lib

Modified Files:
	urlparse.py 
Log Message:
Fix parsing of parameters from a URL; urlparse() did not check that it only
split parameters from the last path segment.  Introduces two new functions,
urlsplit() and urlunsplit(), that do the simpler job of splitting the URL
without monkeying around with the parameters field, since that was not being
handled properly.
This closes bug #478038.


Index: urlparse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/urlparse.py,v
retrieving revision 1.30
retrieving revision 1.31
diff -C2 -d -r1.30 -r1.31
*** urlparse.py	2001/08/13 14:38:50	1.30
--- urlparse.py	2001/11/16 02:52:57	1.31
***************
*** 44,48 ****
  
  
! def urlparse(url, scheme = '', allow_fragments = 1):
      """Parse a URL into 6 components:
      <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
--- 44,48 ----
  
  
! def urlparse(url, scheme='', allow_fragments=1):
      """Parse a URL into 6 components:
      <scheme>://<netloc>/<path>;<params>?<query>#<fragment>
***************
*** 50,53 ****
--- 50,76 ----
      Note that we don't break the components up in smaller bits
      (e.g. netloc is a single string) and we don't expand % escapes."""
+     tuple = urlsplit(url, scheme, allow_fragments)
+     scheme, netloc, url, query, fragment = tuple
+     if scheme in uses_params and ';' in url:
+         url, params = _splitparams(url)
+     else:
+         params = ''
+     return scheme, netloc, url, params, query, fragment
+ 
+ def _splitparams(url):
+     if '/'  in url:
+         i = url.find(';', url.rfind('/'))
+         if i < 0:
+             return url, ''
+     else:
+         i = url.find(';')
+     return url[:i], url[i+1:]
+ 
+ def urlsplit(url, scheme='', allow_fragments=1):
+     """Parse a URL into 5 components:
+     <scheme>://<netloc>/<path>?<query>#<fragment>
+     Return a 5-tuple: (scheme, netloc, path, query, fragment).
+     Note that we don't break the components up in smaller bits
+     (e.g. netloc is a single string) and we don't expand % escapes."""
      key = url, scheme, allow_fragments
      cached = _parse_cache.get(key, None)
***************
*** 56,60 ****
      if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
          clear_cache()
!     netloc = params = query = fragment = ''
      i = url.find(':')
      if i > 0:
--- 79,83 ----
      if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
          clear_cache()
!     netloc = query = fragment = ''
      i = url.find(':')
      if i > 0:
***************
*** 68,85 ****
                  netloc = url[2:i]
                  url = url[i:]
!             if allow_fragments:
!                 i = url.rfind('#')
!                 if i >= 0:
!                     fragment = url[i+1:]
!                     url = url[:i]
!             i = url.find('?')
!             if i >= 0:
!                 query = url[i+1:]
!                 url = url[:i]
!             i = url.find(';')
!             if i >= 0:
!                 params = url[i+1:]
!                 url = url[:i]
!             tuple = scheme, netloc, url, params, query, fragment
              _parse_cache[key] = tuple
              return tuple
--- 91,99 ----
                  netloc = url[2:i]
                  url = url[i:]
!             if allow_fragments and '#' in url:
!                 url, fragment = url.split('#', 1)
!             if '?' in url:
!                 url, query = url.split('?', 1)
!             tuple = scheme, netloc, url, query, fragment
              _parse_cache[key] = tuple
              return tuple
***************
*** 95,111 ****
                  i = len(url)
              netloc, url = url[2:i], url[i:]
!     if allow_fragments and scheme in uses_fragment:
!         i = url.rfind('#')
!         if i >= 0:
!             url, fragment = url[:i], url[i+1:]
!     if scheme in uses_query:
!         i = url.find('?')
!         if i >= 0:
!             url, query = url[:i], url[i+1:]
!     if scheme in uses_params:
!         i = url.find(';')
!         if i >= 0:
!             url, params = url[:i], url[i+1:]
!     tuple = scheme, netloc, url, params, query, fragment
      _parse_cache[key] = tuple
      return tuple
--- 109,117 ----
                  i = len(url)
              netloc, url = url[2:i], url[i:]
!     if allow_fragments and scheme in uses_fragment and '#' in url:
!         url, fragment = url.split('#', 1)
!     if scheme in uses_query and '?' in url:
!         url, query = url.split('?', 1)
!     tuple = scheme, netloc, url, query, fragment
      _parse_cache[key] = tuple
      return tuple
***************
*** 116,119 ****
--- 122,130 ----
      originally had redundant delimiters, e.g. a ? with an empty query
      (the draft states that these are equivalent)."""
+     if params:
+         url = "%s;%s" % (url, params)
+     return urlunsplit((scheme, netloc, url, query, fragment))
+ 
+ def urlunsplit((scheme, netloc, url, query, fragment)):
      if netloc or (scheme in uses_netloc and url[:2] == '//'):
          if url and url[:1] != '/': url = '/' + url
***************
*** 121,126 ****
      if scheme:
          url = scheme + ':' + url
-     if params:
-         url = url + ';' + params
      if query:
          url = url + '?' + query
--- 132,135 ----
***************
*** 188,194 ****
      empty string.
      """
!     s, n, p, a, q, frag = urlparse(url)
!     defrag = urlunparse((s, n, p, a, q, ''))
!     return defrag, frag
  
  
--- 197,206 ----
      empty string.
      """
!     if '#' in url:
!         s, n, p, a, q, frag = urlparse(url)
!         defrag = urlunparse((s, n, p, a, q, ''))
!         return defrag, frag
!     else:
!         return url, ''