[Python-checkins] cpython: #21047: set the default value for the *convert_charrefs* argument of HTMLParser
ezio.melotti
python-checkins at python.org
Sat Aug 2 17:36:35 CEST 2014
http://hg.python.org/cpython/rev/4425024f2e01
changeset: 91963:4425024f2e01
user: Ezio Melotti <ezio.melotti at gmail.com>
date: Sat Aug 02 18:36:12 2014 +0300
summary:
#21047: set the default value for the *convert_charrefs* argument of HTMLParser to True. Patch by Berker Peksag.
files:
Doc/library/html.parser.rst | 9 +++++----
Lib/html/parser.py | 10 ++--------
Lib/test/test_htmlparser.py | 7 ++-----
Misc/NEWS | 3 +++
4 files changed, 12 insertions(+), 17 deletions(-)
diff --git a/Doc/library/html.parser.rst b/Doc/library/html.parser.rst
--- a/Doc/library/html.parser.rst
+++ b/Doc/library/html.parser.rst
@@ -16,15 +16,13 @@
This module defines a class :class:`HTMLParser` which serves as the basis for
parsing text files formatted in HTML (HyperText Mark-up Language) and XHTML.
-.. class:: HTMLParser(*, convert_charrefs=False)
+.. class:: HTMLParser(*, convert_charrefs=True)
Create a parser instance able to parse invalid markup.
- If *convert_charrefs* is ``True`` (default: ``False``), all character
+ If *convert_charrefs* is ``True`` (the default), all character
references (except the ones in ``script``/``style`` elements) are
automatically converted to the corresponding Unicode characters.
- The use of ``convert_charrefs=True`` is encouraged and will become
- the default in Python 3.5.
An :class:`.HTMLParser` instance is fed HTML data and calls handler methods
when start tags, end tags, text, comments, and other markup elements are
@@ -37,6 +35,9 @@
.. versionchanged:: 3.4
*convert_charrefs* keyword argument added.
+ .. versionchanged:: 3.5
+ The default value for argument *convert_charrefs* is now ``True``.
+
Example HTML Parser Application
-------------------------------
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -59,7 +59,6 @@
endtagfind = re.compile('</\s*([a-zA-Z][-.a-zA-Z0-9:_]*)\s*>')
-_default_sentinel = object()
class HTMLParser(_markupbase.ParserBase):
"""Find tags and other markup and call handler functions.
@@ -85,17 +84,12 @@
CDATA_CONTENT_ELEMENTS = ("script", "style")
- def __init__(self, *, convert_charrefs=_default_sentinel):
+ def __init__(self, *, convert_charrefs=True):
"""Initialize and reset this instance.
- If convert_charrefs is True (default: False), all character references
+ If convert_charrefs is True (the default), all character references
are automatically converted to the corresponding Unicode characters.
"""
- if convert_charrefs is _default_sentinel:
- convert_charrefs = False # default
- warnings.warn("The value of convert_charrefs will become True in "
- "3.5. You are encouraged to set the value explicitly.",
- DeprecationWarning, stacklevel=2)
self.convert_charrefs = convert_charrefs
self.reset()
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py
--- a/Lib/test/test_htmlparser.py
+++ b/Lib/test/test_htmlparser.py
@@ -346,7 +346,8 @@
self._run_check(html, expected)
def test_convert_charrefs(self):
- collector = lambda: EventCollectorCharrefs(convert_charrefs=True)
+ # default value for convert_charrefs is now True
+ collector = lambda: EventCollectorCharrefs()
self.assertTrue(collector().convert_charrefs)
charrefs = ['"', '"', '"', '"', '"', '"']
# check charrefs in the middle of the text/attributes
@@ -383,10 +384,6 @@
self._run_check('no charrefs here', [('data', 'no charrefs here')],
collector=collector())
- def test_deprecation_warnings(self):
- with self.assertWarns(DeprecationWarning):
- EventCollector() # convert_charrefs not passed explicitly
-
# the remaining tests were for the "tolerant" parser (which is now
# the default), and check various kind of broken markup
def test_tolerant_parsing(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -121,6 +121,9 @@
Library
-------
+- Issue #21047: set the default value for the *convert_charrefs* argument
+ of HTMLParser to True. Patch by Berker Peksag.
+
- Add an __all__ to html.entities.
- Issue #15114: the strict mode and argument of HTMLParser, HTMLParser.error,
--
Repository URL: http://hg.python.org/cpython
More information about the Python-checkins
mailing list