r87542 - in python/branches/py3k/Lib: html/parser.py test/test_htmlparser.py

Author: senthil.kumaran Date: Tue Dec 28 16:55:16 2010 New Revision: 87542 Log: Fix Issue10759 - html.parser.unescape() fails on HTML entities with incorrect syntax Modified: python/branches/py3k/Lib/html/parser.py python/branches/py3k/Lib/test/test_htmlparser.py Modified: python/branches/py3k/Lib/html/parser.py ============================================================================== --- python/branches/py3k/Lib/html/parser.py (original) +++ python/branches/py3k/Lib/html/parser.py Tue Dec 28 16:55:16 2010 @@ -434,13 +434,16 @@ return s def replaceEntities(s): s = s.groups()[0] - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return chr(c) + try: + if s[0] == "#": + s = s[1:] + if s[0] in ['x','X']: + c = int(s[1:], 16) + else: + c = int(s) + return chr(c) + except ValueError: + return ''+ s +';' else: # Cannot use name2codepoint directly, because HTMLParser # supports apos, which is not part of HTML 4 Modified: python/branches/py3k/Lib/test/test_htmlparser.py ============================================================================== --- python/branches/py3k/Lib/test/test_htmlparser.py (original) +++ python/branches/py3k/Lib/test/test_htmlparser.py Tue Dec 28 16:55:16 2010 @@ -356,6 +356,11 @@ [('action', 'bogus|()value')])], collector = self.collector) + def test_unescape_function(self): + p = html.parser.HTMLParser() + self.assertEqual(p.unescape('bad;'),'bad;') + self.assertEqual(p.unescape('&'),'&') + def test_main(): support.run_unittest(HTMLParserTestCase, HTMLParserTolerantTestCase)
participants (1)
-
senthil.kumaran