r83238 - in python/branches/py3k: Lib/test/test_robotparser.py Lib/urllib/robotparser.py Misc/NEWS
Author: georg.brandl Date: Thu Jul 29 19:55:01 2010 New Revision: 83238 Log: #4108: the first default entry (User-agent: *) wins. Modified: python/branches/py3k/Lib/test/test_robotparser.py python/branches/py3k/Lib/urllib/robotparser.py python/branches/py3k/Misc/NEWS Modified: python/branches/py3k/Lib/test/test_robotparser.py ============================================================================== --- python/branches/py3k/Lib/test/test_robotparser.py (original) +++ python/branches/py3k/Lib/test/test_robotparser.py Thu Jul 29 19:55:01 2010 @@ -216,6 +216,20 @@ RobotTest(14, doc, good, bad) +# 15. For issue #4108 (obey first * entry) +doc = """ +User-agent: * +Disallow: /some/path + +User-agent: * +Disallow: /another/path +""" + +good = ['/another/path'] +bad = ['/some/path'] + +RobotTest(15, doc, good, bad) + class NetworkTestCase(unittest.TestCase): Modified: python/branches/py3k/Lib/urllib/robotparser.py ============================================================================== --- python/branches/py3k/Lib/urllib/robotparser.py (original) +++ python/branches/py3k/Lib/urllib/robotparser.py Thu Jul 29 19:55:01 2010 @@ -66,7 +66,9 @@ def _add_entry(self, entry): if "*" in entry.useragents: # the default entry is considered last - self.default_entry = entry + if self.default_entry is None: + # the first default entry wins + self.default_entry = entry else: self.entries.append(entry) @@ -118,7 +120,7 @@ entry.rulelines.append(RuleLine(line[1], True)) state = 2 if state == 2: - self.entries.append(entry) + self._add_entry(entry) def can_fetch(self, useragent, url): Modified: python/branches/py3k/Misc/NEWS ============================================================================== --- python/branches/py3k/Misc/NEWS (original) +++ python/branches/py3k/Misc/NEWS Thu Jul 29 19:55:01 2010 @@ -475,6 +475,9 @@ Library ------- +- Issue #4108: In urllib.robotparser, if there are multiple 'User-agent: *' + entries, consider the first one. + - Issue #6630: Allow customizing regex flags when subclassing the string.Template class.
participants (1)
-
georg.brandl