[Python-checkins] r42088 - sandbox/trunk/setuptools/setuptools/package_index.py

phillip.eby python-checkins at python.org
Tue Jan 17 20:57:00 CET 2006


Author: phillip.eby
Date: Tue Jan 17 20:56:59 2006
New Revision: 42088

Modified:
   sandbox/trunk/setuptools/setuptools/package_index.py
Log:
Scrape-proof Sourceforge mirror processing!


Modified: sandbox/trunk/setuptools/setuptools/package_index.py
==============================================================================
--- sandbox/trunk/setuptools/setuptools/package_index.py	(original)
+++ sandbox/trunk/setuptools/setuptools/package_index.py	Tue Jan 17 20:56:59 2006
@@ -134,9 +134,9 @@
 
     def process_url(self, url, retrieve=False):
         """Evaluate a URL as a possible download, and maybe retrieve it"""
+        url = fix_sf_url(url)
         if url in self.scanned_urls and not retrieve:
             return
-
         self.scanned_urls[url] = True
         if not URL_SCHEME(url):
             # process filenames or directories
@@ -296,6 +296,36 @@
                     "; possible download problem?"
                 )
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
     def download(self, spec, tmpdir):
         """Locate and/or download `spec` to `tmpdir`, returning a local path
 
@@ -502,8 +532,6 @@
 
 
     def _download_html(self, url, headers, filename, tmpdir):
-        # Check for a sourceforge URL
-        sf_url = url.startswith('http://prdownloads.')
         file = open(filename)
         for line in file:
             if line.strip():
@@ -513,13 +541,6 @@
                     file.close()
                     os.unlink(filename)
                     return self._download_svn(url, filename)
-                # Check for a SourceForge header
-                elif sf_url:
-                    page = ''.join(list(file))
-                    if '?use_mirror=' in page:
-                        file.close()
-                        os.unlink(filename)
-                        return self._download_sourceforge(url, page, tmpdir)
                 break   # not an index page
         file.close()
         os.unlink(filename)
@@ -541,43 +562,42 @@
         log.warn(msg, *args)
 
 
+def fix_sf_url(url):
+    scheme, server, path, param, query, frag = urlparse.urlparse(url)
+    if server!='prdownloads.sourceforge.net':
+        return url
+    return urlparse.urlunparse(
+        (scheme, 'dl.sourceforge.net', 'sourceforge'+path, param, '', frag)
+    )
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
-    def _download_sourceforge(self, source_url, sf_page, tmpdir):
-        """Download package from randomly-selected SourceForge mirror"""
 
-        self.debug("Processing SourceForge mirror page")
 
-        mirror_regex = re.compile(r'HREF="?(/.*?\?use_mirror=[^">]*)', re.I)
-        urls = [m.group(1) for m in mirror_regex.finditer(sf_page)]
-        if not urls:
-            raise DistutilsError(
-                "URL looks like a Sourceforge mirror page, but no URLs found"
-            )
 
-        import random
-        url = urlparse.urljoin(source_url, random.choice(urls))
 
-        self.info(
-            "Requesting redirect to (randomly selected) %r mirror",
-            url.split('=',1)[-1]
-        )
 
-        f = self.open_url(url)
-        match = re.search(
-            r'(?i)<META HTTP-EQUIV="refresh" content=".*?URL=(.*?)"',
-            f.read()
-        )
-        f.close()
 
-        if match:
-            download_url = match.group(1)
-            scheme = URL_SCHEME(download_url)
-            return self._download_url(scheme.group(1), download_url, tmpdir)
-        else:
-            raise DistutilsError(
-                'No META HTTP-EQUIV="refresh" found in Sourceforge page at %s'
-                % url
-            )
 
 
 


More information about the Python-checkins mailing list