[Python-checkins] bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (GH-4529) (#4533)

Raymond Hettinger webhook-mailer at python.org
Thu Nov 23 18:58:01 EST 2017


https://github.com/python/cpython/commit/ff847d1ac7e6a8ee1fb6f8883cfb4aec4b4a9b03
commit: ff847d1ac7e6a8ee1fb6f8883cfb4aec4b4a9b03
branch: 3.6
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: Raymond Hettinger <rhettinger at users.noreply.github.com>
date: 2017-11-23T15:57:58-08:00
summary:

bpo-31325: Fix usage of namedtuple in RobotFileParser.parse() (GH-4529) (#4533)

(cherry picked from commit 3df02dbc8e197053105f9dffeae40b04ec66766e)

files:
A Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
M Doc/library/urllib.robotparser.rst
M Lib/test/test_robotparser.py
M Lib/urllib/robotparser.py

diff --git a/Doc/library/urllib.robotparser.rst b/Doc/library/urllib.robotparser.rst
index 7d31932f965..e3b90e673ca 100644
--- a/Doc/library/urllib.robotparser.rst
+++ b/Doc/library/urllib.robotparser.rst
@@ -69,10 +69,10 @@ structure of :file:`robots.txt` files, see http://www.robotstxt.org/orig.html.
    .. method:: request_rate(useragent)
 
       Returns the contents of the ``Request-rate`` parameter from
-      ``robots.txt`` in the form of a :func:`~collections.namedtuple`
-      ``(requests, seconds)``.  If there is no such parameter or it doesn't
-      apply to the *useragent* specified or the ``robots.txt`` entry for this
-      parameter has invalid syntax, return ``None``.
+      ``robots.txt`` as a :term:`named tuple` ``RequestRate(requests, seconds)``.
+      If there is no such parameter or it doesn't apply to the *useragent*
+      specified or the ``robots.txt`` entry for this parameter has invalid
+      syntax, return ``None``.
 
       .. versionadded:: 3.6
 
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
index 0f64ba8b060..e47344c1195 100644
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -2,7 +2,6 @@
 import os
 import unittest
 import urllib.robotparser
-from collections import namedtuple
 from test import support
 from http.server import BaseHTTPRequestHandler, HTTPServer
 try:
@@ -90,6 +89,10 @@ def test_request_rate(self):
                         self.parser.crawl_delay(agent), self.crawl_delay
                     )
                 if self.request_rate:
+                    self.assertIsInstance(
+                        self.parser.request_rate(agent),
+                        urllib.robotparser.RequestRate
+                    )
                     self.assertEqual(
                         self.parser.request_rate(agent).requests,
                         self.request_rate.requests
@@ -111,7 +114,7 @@ class CrawlDelayAndRequestRateTest(BaseRequestRateTest, unittest.TestCase):
 Disallow: /%7ejoe/index.html
     """
     agent = 'figtree'
-    request_rate = namedtuple('req_rate', 'requests seconds')(9, 30)
+    request_rate = urllib.robotparser.RequestRate(9, 30)
     crawl_delay = 3
     good = [('figtree', '/foo.html')]
     bad = ['/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html',
@@ -240,7 +243,7 @@ class DefaultEntryTest(BaseRequestRateTest, unittest.TestCase):
 Request-rate: 3/15
 Disallow: /cyberworld/map/
     """
-    request_rate = namedtuple('req_rate', 'requests seconds')(3, 15)
+    request_rate = urllib.robotparser.RequestRate(3, 15)
     crawl_delay = 1
     good = ['/', '/test.html']
     bad = ['/cyberworld/map/index.html']
diff --git a/Lib/urllib/robotparser.py b/Lib/urllib/robotparser.py
index 9dab4c1c3a8..daac29c68dc 100644
--- a/Lib/urllib/robotparser.py
+++ b/Lib/urllib/robotparser.py
@@ -16,6 +16,9 @@
 
 __all__ = ["RobotFileParser"]
 
+RequestRate = collections.namedtuple("RequestRate", "requests seconds")
+
+
 class RobotFileParser:
     """ This class provides a set of methods to read, parse and answer
     questions about a single robots.txt file.
@@ -136,11 +139,7 @@ def parse(self, lines):
                         # check if all values are sane
                         if (len(numbers) == 2 and numbers[0].strip().isdigit()
                             and numbers[1].strip().isdigit()):
-                            req_rate = collections.namedtuple('req_rate',
-                                                              'requests seconds')
-                            entry.req_rate = req_rate
-                            entry.req_rate.requests = int(numbers[0])
-                            entry.req_rate.seconds = int(numbers[1])
+                            entry.req_rate = RequestRate(int(numbers[0]), int(numbers[1]))
                         state = 2
         if state == 2:
             self._add_entry(entry)
diff --git a/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst b/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
new file mode 100644
index 00000000000..89a193c9ef5
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-23-22-12-11.bpo-31325.8jAUxN.rst
@@ -0,0 +1,5 @@
+Fix wrong usage of :func:`collections.namedtuple` in
+the :meth:`RobotFileParser.parse() <urllib.robotparser.RobotFileParser.parse>`
+method.
+
+Initial patch by Robin Wellner.



More information about the Python-checkins mailing list