[Python-checkins] cpython (merge 3.3 -> default): Fixes issue #16409: The reporthook callback made by the legacy

gregory.p.smith python-checkins at python.org
Sat Nov 10 22:44:59 CET 2012


http://hg.python.org/cpython/rev/8c48eb0239ca
changeset:   80341:8c48eb0239ca
parent:      80339:398f8770bf0d
parent:      80340:19e6e32db5ec
user:        Gregory P. Smith <greg at krypto.org>
date:        Sat Nov 10 13:44:50 2012 -0800
summary:
  Fixes issue #16409: The reporthook callback made by the legacy
urllib.request.urlretrieve API now properly supplies a constant
non-zero block_size as it did in Python 3.2 and 2.7.  This matches the
behavior of urllib.request.URLopener.retrieve.

files:
  Doc/library/urllib.request.rst |   3 +-
  Lib/test/test_urllibnet.py     |  34 +++++++++++++++++++--
  Lib/urllib/request.py          |   4 +-
  Misc/NEWS                      |   5 +++
  4 files changed, 39 insertions(+), 7 deletions(-)


diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -1305,7 +1305,8 @@
        *filename* is not given, the filename is the output of :func:`tempfile.mktemp`
        with a suffix that matches the suffix of the last path component of the input
        URL.  If *reporthook* is given, it must be a function accepting three numeric
-       parameters.  It will be called after each chunk of data is read from the
+       parameters: A chunk number, the maximum size chunks are read in and the total size of the download
+       (-1 if unknown).  It will be called once at the start and after each chunk of data is read from the
        network.  *reporthook* is ignored for local URLs.
 
        If the *url* uses the :file:`http:` scheme identifier, the optional *data*
diff --git a/Lib/test/test_urllibnet.py b/Lib/test/test_urllibnet.py
--- a/Lib/test/test_urllibnet.py
+++ b/Lib/test/test_urllibnet.py
@@ -137,10 +137,10 @@
     """Tests urllib.request.urlretrieve using the network."""
 
     @contextlib.contextmanager
-    def urlretrieve(self, *args):
+    def urlretrieve(self, *args, **kwargs):
         resource = args[0]
         with support.transient_internet(resource):
-            file_location, info = urllib.request.urlretrieve(*args)
+            file_location, info = urllib.request.urlretrieve(*args, **kwargs)
             try:
                 yield file_location, info
             finally:
@@ -170,9 +170,10 @@
             self.assertIsInstance(info, email.message.Message,
                                   "info is not an instance of email.message.Message")
 
+    logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png"
+
     def test_data_header(self):
-        logo = "http://www.python.org/community/logos/python-logo-master-v3-TM.png"
-        with self.urlretrieve(logo) as (file_location, fileheaders):
+        with self.urlretrieve(self.logo) as (file_location, fileheaders):
             datevalue = fileheaders.get('Date')
             dateformat = '%a, %d %b %Y %H:%M:%S GMT'
             try:
@@ -180,6 +181,31 @@
             except ValueError:
                 self.fail('Date value not in %r format', dateformat)
 
+    def test_reporthook(self):
+        records = []
+        def recording_reporthook(blocks, block_size, total_size):
+            records.append((blocks, block_size, total_size))
+
+        with self.urlretrieve(self.logo, reporthook=recording_reporthook) as (
+                file_location, fileheaders):
+            expected_size = int(fileheaders['Content-Length'])
+
+        records_repr = repr(records)  # For use in error messages.
+        self.assertGreater(len(records), 1, msg="There should always be two "
+                           "calls; the first one before the transfer starts.")
+        self.assertEqual(records[0][0], 0)
+        self.assertGreater(records[0][1], 0,
+                           msg="block size can't be 0 in %s" % records_repr)
+        self.assertEqual(records[0][2], expected_size)
+        self.assertEqual(records[-1][2], expected_size)
+
+        block_sizes = {block_size for _, block_size, _ in records}
+        self.assertEqual({records[0][1]}, block_sizes,
+                         msg="block sizes in %s must be equal" % records_repr)
+        self.assertGreaterEqual(records[-1][0]*records[0][1], expected_size,
+                                msg="number of blocks * block size must be"
+                                " >= total size in %s" % records_repr)
+
 
 def test_main():
     support.requires('network')
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -208,7 +208,7 @@
                 size = int(headers["Content-Length"])
 
             if reporthook:
-                reporthook(blocknum, 0, size)
+                reporthook(blocknum, bs, size)
 
             while True:
                 block = fp.read(bs)
@@ -218,7 +218,7 @@
                 tfp.write(block)
                 blocknum += 1
                 if reporthook:
-                    reporthook(blocknum, len(block), size)
+                    reporthook(blocknum, bs, size)
 
     if size >= 0 and read < size:
         raise ContentTooShortError(
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -113,6 +113,11 @@
 Library
 -------
 
+- Issue #16409: The reporthook callback made by the legacy
+  urllib.request.urlretrieve API now properly supplies a constant non-zero
+  block_size as it did in Python 3.2 and 2.7.  This matches the behavior of
+  urllib.request.URLopener.retrieve.
+
 - Issue #16431: Use the type information when constructing a Decimal subtype
   from a Decimal argument.
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list