[Python-checkins] cpython: Issue #16423: urllib.request now has support for ``data:`` URLs.
antoine.pitrou
python-checkins at python.org
Sat Nov 24 18:00:13 CET 2012
http://hg.python.org/cpython/rev/a182367eac5a
changeset: 80582:a182367eac5a
user: Antoine Pitrou <solipsis at pitrou.net>
date: Sat Nov 24 17:59:08 2012 +0100
summary:
Issue #16423: urllib.request now has support for ``data:`` URLs.
Patch by Mathias Panzenböck.
files:
Doc/library/urllib.request.rst | 26 +++++++-
Lib/test/test_urllib.py | 74 ++++++++++++++++++++++
Lib/urllib/request.py | 38 ++++++++++-
Misc/ACKS | 1 +
Misc/NEWS | 3 +
5 files changed, 137 insertions(+), 5 deletions(-)
diff --git a/Doc/library/urllib.request.rst b/Doc/library/urllib.request.rst
--- a/Doc/library/urllib.request.rst
+++ b/Doc/library/urllib.request.rst
@@ -121,7 +121,7 @@
instances of them or subclasses of them: :class:`ProxyHandler`,
:class:`UnknownHandler`, :class:`HTTPHandler`, :class:`HTTPDefaultErrorHandler`,
:class:`HTTPRedirectHandler`, :class:`FTPHandler`, :class:`FileHandler`,
- :class:`HTTPErrorProcessor`.
+ :class:`HTTPErrorProcessor`, :class:`DataHandler`.
If the Python installation has SSL support (i.e., if the :mod:`ssl` module
can be imported), :class:`HTTPSHandler` will also be added.
@@ -346,6 +346,11 @@
Open local files.
+.. class:: DataHandler()
+
+ Open data URLs.
+
+ .. versionadded:: 3.4
.. class:: FTPHandler()
@@ -972,6 +977,21 @@
hostname is given, an :exc:`URLError` is raised.
+.. _data-handler-objects:
+
+DataHandler Objects
+-------------------
+
+.. method:: DataHandler.data_open(req)
+
+ Read a data URL. This kind of URL contains the content encoded in the URL
+ itself. The data URL syntax is specified in :rfc:`2397`. This implementation
+ ignores white spaces in base64 encoded data URLs so the URL may be wrapped
+ in whatever source file it comes from. But even though some browsers don't
+ mind about a missing padding at the end of a base64 encoded data URL, this
+ implementation will raise an :exc:`ValueError` in that case.
+
+
.. _ftp-handler-objects:
FTPHandler Objects
@@ -1374,7 +1394,9 @@
pair: FTP; protocol
* Currently, only the following protocols are supported: HTTP (versions 0.9 and
- 1.0), FTP, and local files.
+ 1.0), FTP, local files, and data URLs.
+
+ .. versionchanged:: 3.4 Added support for data URLs.
* The caching feature of :func:`urlretrieve` has been disabled until someone
finds the time to hack proper processing of Expiration time headers.
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -337,6 +337,79 @@
with support.check_warnings(('',DeprecationWarning)):
urllib.request.URLopener()
+class urlopen_DataTests(unittest.TestCase):
+ """Test urlopen() opening a data URL."""
+
+ def setUp(self):
+ # text containing URL special- and unicode-characters
+ self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
+ # 2x1 pixel RGB PNG image with one black and one white pixel
+ self.image = (
+ b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
+ b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
+ b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
+ b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
+
+ self.text_url = (
+ "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
+ "D%26%20%C3%B6%20%C3%84%20")
+ self.text_url_base64 = (
+ "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
+ "sJT0mIPYgxCA%3D")
+ # base64 encoded data URL that contains ignorable spaces,
+ # such as "\n", " ", "%0A", and "%20".
+ self.image_url = (
+ "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAIAAAABCAIAAAB7\n"
+ "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
+ "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
+
+ self.text_url_resp = urllib.request.urlopen(self.text_url)
+ self.text_url_base64_resp = urllib.request.urlopen(
+ self.text_url_base64)
+ self.image_url_resp = urllib.request.urlopen(self.image_url)
+
+ def test_interface(self):
+ # Make sure object returned by urlopen() has the specified methods
+ for attr in ("read", "readline", "readlines",
+ "close", "info", "geturl", "getcode", "__iter__"):
+ self.assertTrue(hasattr(self.text_url_resp, attr),
+ "object returned by urlopen() lacks %s attribute" %
+ attr)
+
+ def test_info(self):
+ self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
+ self.assertEqual(self.text_url_base64_resp.info().get_params(),
+ [('text/plain', ''), ('charset', 'ISO-8859-1')])
+ self.assertEqual(self.image_url_resp.info()['content-length'],
+ str(len(self.image)))
+ self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
+ [('text/plain', ''), ('charset', 'US-ASCII')])
+
+ def test_geturl(self):
+ self.assertEqual(self.text_url_resp.geturl(), self.text_url)
+ self.assertEqual(self.text_url_base64_resp.geturl(),
+ self.text_url_base64)
+ self.assertEqual(self.image_url_resp.geturl(), self.image_url)
+
+ def test_read_text(self):
+ self.assertEqual(self.text_url_resp.read().decode(
+ dict(self.text_url_resp.info().get_params())['charset']), self.text)
+
+ def test_read_text_base64(self):
+ self.assertEqual(self.text_url_base64_resp.read().decode(
+ dict(self.text_url_base64_resp.info().get_params())['charset']),
+ self.text)
+
+ def test_read_image(self):
+ self.assertEqual(self.image_url_resp.read(), self.image)
+
+ def test_missing_comma(self):
+ self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
+
+ def test_invalid_base64_data(self):
+ # missing padding character
+ self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
+
class urlretrieve_FileTests(unittest.TestCase):
"""Test urllib.urlretrieve() on local files"""
@@ -1313,6 +1386,7 @@
support.run_unittest(
urlopen_FileTests,
urlopen_HttpTests,
+ urlopen_DataTests,
urlretrieve_FileTests,
urlretrieve_HttpTests,
ProxyTests,
diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py
--- a/Lib/urllib/request.py
+++ b/Lib/urllib/request.py
@@ -103,7 +103,8 @@
from urllib.parse import (
urlparse, urlsplit, urljoin, unwrap, quote, unquote,
splittype, splithost, splitport, splituser, splitpasswd,
- splitattr, splitquery, splitvalue, splittag, to_bytes, urlunparse)
+ splitattr, splitquery, splitvalue, splittag, to_bytes,
+ unquote_to_bytes, urlunparse)
from urllib.response import addinfourl, addclosehook
# check for SSL
@@ -121,7 +122,7 @@
'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
'AbstractBasicAuthHandler', 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler',
'AbstractDigestAuthHandler', 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler',
- 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler',
+ 'HTTPHandler', 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
'UnknownHandler', 'HTTPErrorProcessor',
# Functions
'urlopen', 'install_opener', 'build_opener',
@@ -535,7 +536,8 @@
opener = OpenerDirector()
default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
HTTPDefaultErrorHandler, HTTPRedirectHandler,
- FTPHandler, FileHandler, HTTPErrorProcessor]
+ FTPHandler, FileHandler, HTTPErrorProcessor,
+ DataHandler]
if hasattr(http.client, "HTTPSConnection"):
default_classes.append(HTTPSHandler)
skip = set()
@@ -1541,6 +1543,36 @@
self.cache.clear()
self.timeout.clear()
+class DataHandler(BaseHandler):
+ def data_open(self, req):
+ # data URLs as specified in RFC 2397.
+ #
+ # ignores POSTed data
+ #
+ # syntax:
+ # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
+ # mediatype := [ type "/" subtype ] *( ";" parameter )
+ # data := *urlchar
+ # parameter := attribute "=" value
+ url = req.full_url
+
+ scheme, data = url.split(":",1)
+ mediatype, data = data.split(",",1)
+
+ # even base64 encoded data URLs might be quoted so unquote in any case:
+ data = unquote_to_bytes(data)
+ if mediatype.endswith(";base64"):
+ data = base64.decodebytes(data)
+ mediatype = mediatype[:-7]
+
+ if not mediatype:
+ mediatype = "text/plain;charset=US-ASCII"
+
+ headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
+ (mediatype, len(data)))
+
+ return addinfourl(io.BytesIO(data), headers, url)
+
# Code move from the old urllib module
diff --git a/Misc/ACKS b/Misc/ACKS
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -884,6 +884,7 @@
Todd R. Palmer
Juan David Ibáñez Palomar
Jan Palus
+Mathias Panzenböck
M. Papillon
Peter Parente
Alexandre Parenteau
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -138,6 +138,9 @@
Library
-------
+- Issue #16423: urllib.request now has support for ``data:`` URLs. Patch by
+ Mathias Panzenböck.
+
- Issue #4473: Add a POP3.stls() to switch a clear-text POP3 session into
an encrypted POP3 session, on supported servers. Patch by Lorenzo Catucci.
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list