[Python-checkins] distutils2: Fix bad behavior while browsing the simple index.
tarek.ziade
python-checkins at python.org
Sun Aug 8 11:50:46 CEST 2010
tarek.ziade pushed 45e7efdb6985 to distutils2:
http://hg.python.org/distutils2/rev/45e7efdb6985
changeset: 451:45e7efdb6985
user: Alexis Metaireau <ametaireau at gmail.com>
date: Wed Jul 21 11:26:42 2010 +0200
summary: Fix bad behavior while browsing the simple index.
files: src/distutils2/index/dist.py, src/distutils2/index/simple.py, src/distutils2/tests/test_index_simple.py
diff --git a/src/distutils2/index/dist.py b/src/distutils2/index/dist.py
--- a/src/distutils2/index/dist.py
+++ b/src/distutils2/index/dist.py
@@ -123,6 +123,9 @@
raise TypeError("cannot compare %s and %s"
% (self.name, other.name))
+ def __repr__(self):
+ return "<%s %s>" %(self.name, self.version)
+
def __eq__(self, other):
self._check_is_comparable(other)
return self.version == other.version
diff --git a/src/distutils2/index/simple.py b/src/distutils2/index/simple.py
--- a/src/distutils2/index/simple.py
+++ b/src/distutils2/index/simple.py
@@ -14,7 +14,7 @@
from distutils2.index.base import IndexClient
from distutils2.index.dist import (ReleasesList, EXTENSIONS,
- get_infos_from_url)
+ get_infos_from_url, MD5_HASH)
from distutils2.index.errors import (IndexError, DownloadError,
UnableToDownload)
from distutils2.index.mirrors import get_mirrors
@@ -30,9 +30,6 @@
# -- Regexps -------------------------------------------------
EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
-PYPI_MD5 = re.compile(
- '<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
- 'href="[^?]+\?:action=show_md5&digest=([0-9a-f]{32})">md5</a>\\)')
URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
# This pattern matches a character entity reference (a decimal numeric
@@ -238,6 +235,9 @@
else:
return self._default_link_matcher
+ def _get_full_url(self, url, base_url):
+ return urlparse.urljoin(base_url, self._htmldecode(url))
+
def _simple_link_matcher(self, content, base_url):
"""Yield all links with a rel="download" or rel="homepage".
@@ -245,23 +245,27 @@
If follow_externals is set to False, dont yeld the external
urls.
"""
+ for match in HREF.finditer(content):
+ url = self._get_full_url(match.group(1), base_url)
+ if MD5_HASH.match(url):
+ yield (url, True)
+
for match in REL.finditer(content):
+ # search for rel links.
tag, rel = match.groups()
rels = map(str.strip, rel.lower().split(','))
if 'homepage' in rels or 'download' in rels:
for match in HREF.finditer(tag):
- url = urlparse.urljoin(base_url,
- self._htmldecode(match.group(1)))
+ url = self._get_full_url(match.group(1), base_url)
if 'download' in rels or self._is_browsable(url):
# yield a list of (url, is_download)
- yield (urlparse.urljoin(base_url, url),
- 'download' in rels)
+ yield (url, 'download' in rels)
def _default_link_matcher(self, content, base_url):
"""Yield all links found on the page.
"""
for match in HREF.finditer(content):
- url = urlparse.urljoin(base_url, self._htmldecode(match.group(1)))
+ url = self._get_full_url(match.group(1), base_url)
if self._is_browsable(url):
yield (url, False)
diff --git a/src/distutils2/tests/test_index_simple.py b/src/distutils2/tests/test_index_simple.py
--- a/src/distutils2/tests/test_index_simple.py
+++ b/src/distutils2/tests/test_index_simple.py
@@ -239,24 +239,30 @@
# returns false for it.
# 3. one link that must be followed cause it's a homepage that is
# browsable
+ # 4. one link that must be followed, because it contain a md5 hash
self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
self.assertFalse(crawler._is_browsable("http://dl-link2"))
content = """
<a href="http://dl-link1" rel="download">download_link1</a>
<a href="http://dl-link2" rel="homepage">homepage_link1</a>
- <a href="%stest" rel="homepage">homepage_link2</a>
- """ % crawler.index_url
+ <a href="%(index_url)stest" rel="homepage">homepage_link2</a>
+ <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
+ """ % {'index_url': crawler.index_url }
# Test that the simple link matcher yield the good links.
generator = crawler._simple_link_matcher(content, crawler.index_url)
+ self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % crawler.index_url,
+ True), generator.next())
self.assertEqual(('http://dl-link1', True), generator.next())
self.assertEqual(('%stest' % crawler.index_url, False),
generator.next())
self.assertRaises(StopIteration, generator.next)
- # Follow the external links is possible
+ # Follow the external links is possible (eg. homepages)
crawler.follow_externals = True
generator = crawler._simple_link_matcher(content, crawler.index_url)
+ self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' % crawler.index_url,
+ True), generator.next())
self.assertEqual(('http://dl-link1', True), generator.next())
self.assertEqual(('http://dl-link2', False), generator.next())
self.assertEqual(('%stest' % crawler.index_url, False),
--
Repository URL: http://hg.python.org/distutils2
More information about the Python-checkins
mailing list