[Python-checkins] cpython (merge 3.6 -> default): Issue #28151: Merge from 3.6
berker.peksag
python-checkins at python.org
Sun Sep 18 04:21:23 EDT 2016
https://hg.python.org/cpython/rev/4bfd91a45c81
changeset: 103914:4bfd91a45c81
parent: 103912:b86effa7e656
parent: 103913:83bca958adc9
user: Berker Peksag <berker.peksag at gmail.com>
date: Sun Sep 18 11:22:29 2016 +0300
summary:
Issue #28151: Merge from 3.6
files:
Lib/test/test_robotparser.py | 43 ++++++++++++++++++++---
1 files changed, 36 insertions(+), 7 deletions(-)
diff --git a/Lib/test/test_robotparser.py b/Lib/test/test_robotparser.py
--- a/Lib/test/test_robotparser.py
+++ b/Lib/test/test_robotparser.py
@@ -1,4 +1,5 @@
import io
+import os
import unittest
import urllib.robotparser
from collections import namedtuple
@@ -272,14 +273,42 @@
class NetworkTestCase(unittest.TestCase):
- def testPythonOrg(self):
+ base_url = 'http://www.pythontest.net/'
+ robots_txt = '{}elsewhere/robots.txt'.format(base_url)
+
+ @classmethod
+ def setUpClass(cls):
support.requires('network')
- with support.transient_internet('www.python.org'):
- parser = urllib.robotparser.RobotFileParser(
- "http://www.python.org/robots.txt")
- parser.read()
- self.assertTrue(
- parser.can_fetch("*", "http://www.python.org/robots.txt"))
+ with support.transient_internet(cls.base_url):
+ cls.parser = urllib.robotparser.RobotFileParser(cls.robots_txt)
+ cls.parser.read()
+
+ def url(self, path):
+ return '{}{}{}'.format(
+ self.base_url, path, '/' if not os.path.splitext(path)[1] else ''
+ )
+
+ def test_basic(self):
+ self.assertFalse(self.parser.disallow_all)
+ self.assertFalse(self.parser.allow_all)
+ self.assertGreater(self.parser.mtime(), 0)
+ self.assertFalse(self.parser.crawl_delay('*'))
+ self.assertFalse(self.parser.request_rate('*'))
+
+ def test_can_fetch(self):
+ self.assertTrue(self.parser.can_fetch('*', self.url('elsewhere')))
+ self.assertFalse(self.parser.can_fetch('Nutch', self.base_url))
+ self.assertFalse(self.parser.can_fetch('Nutch', self.url('brian')))
+ self.assertFalse(self.parser.can_fetch('Nutch', self.url('webstats')))
+ self.assertFalse(self.parser.can_fetch('*', self.url('webstats')))
+ self.assertTrue(self.parser.can_fetch('*', self.base_url))
+
+ def test_read_404(self):
+ parser = urllib.robotparser.RobotFileParser(self.url('i-robot.txt'))
+ parser.read()
+ self.assertTrue(parser.allow_all)
+ self.assertFalse(parser.disallow_all)
+ self.assertEqual(parser.mtime(), 0)
if __name__=='__main__':
unittest.main()
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list