[Python-checkins] cpython (2.7): #7311: fix HTMLParser to accept non-ASCII attribute values.
ezio.melotti
python-checkins at python.org
Tue Apr 5 19:40:55 CEST 2011
http://hg.python.org/cpython/rev/7d4dea76c476
changeset: 69160:7d4dea76c476
branch: 2.7
parent: 69153:c10d55c51d81
user: Ezio Melotti
date: Tue Apr 05 20:40:52 2011 +0300
summary:
#7311: fix HTMLParser to accept non-ASCII attribute values.
files:
Lib/HTMLParser.py | 2 +-
Lib/test/test_htmlparser.py | 17 +++++++++++++++++
Misc/NEWS | 2 ++
3 files changed, 20 insertions(+), 1 deletions(-)
diff --git a/Lib/HTMLParser.py b/Lib/HTMLParser.py
--- a/Lib/HTMLParser.py
+++ b/Lib/HTMLParser.py
@@ -26,7 +26,7 @@
tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
- r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
+ r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -208,6 +208,23 @@
("starttag", "a", [("href", "mailto:xyz at example.com")]),
])
+ def test_attr_nonascii(self):
+ # see issue 7311
+ self._run_check(u"<img src=/foo/bar.png alt=\u4e2d\u6587>", [
+ ("starttag", "img", [("src", "/foo/bar.png"),
+ ("alt", u"\u4e2d\u6587")]),
+ ])
+ self._run_check(u"<a title='\u30c6\u30b9\u30c8' "
+ u"href='\u30c6\u30b9\u30c8.html'>", [
+ ("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
+ ("href", u"\u30c6\u30b9\u30c8.html")]),
+ ])
+ self._run_check(u'<a title="\u30c6\u30b9\u30c8" '
+ u'href="\u30c6\u30b9\u30c8.html">', [
+ ("starttag", "a", [("title", u"\u30c6\u30b9\u30c8"),
+ ("href", u"\u30c6\u30b9\u30c8.html")]),
+ ])
+
def test_attr_entity_replacement(self):
self._run_check("""<a b='&><"''>""", [
("starttag", "a", [("b", "&><\"'")]),
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -47,6 +47,8 @@
Library
-------
+- Issue #7311: fix HTMLParser to accept non-ASCII attribute values.
+
- Issue #10963: Ensure that subprocess.communicate() never raises EPIPE.
- Issue #11662: Make urllib and urllib2 ignore redirections if the
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list