r87544 - in python/branches/release27-maint/Lib: HTMLParser.py test/test_htmlparser.py

Author: senthil.kumaran Date: Tue Dec 28 17:05:07 2010 New Revision: 87544 Log: Fix Issue10759 - HTMLParser.unescape() to handle malform charrefs. Modified: python/branches/release27-maint/Lib/HTMLParser.py python/branches/release27-maint/Lib/test/test_htmlparser.py Modified: python/branches/release27-maint/Lib/HTMLParser.py ============================================================================== --- python/branches/release27-maint/Lib/HTMLParser.py (original) +++ python/branches/release27-maint/Lib/HTMLParser.py Tue Dec 28 17:05:07 2010 @@ -367,13 +367,16 @@ return s def replaceEntities(s): s = s.groups()[0] - if s[0] == "#": - s = s[1:] - if s[0] in ['x','X']: - c = int(s[1:], 16) - else: - c = int(s) - return unichr(c) + try: + if s[0] == "#": + s = s[1:] + if s[0] in ['x','X']: + c = int(s[1:], 16) + else: + c = int(s) + return unichr(c) + except ValueError: + return ''+s+';' else: # Cannot use name2codepoint directly, because HTMLParser supports apos, # which is not part of HTML 4 Modified: python/branches/release27-maint/Lib/test/test_htmlparser.py ============================================================================== --- python/branches/release27-maint/Lib/test/test_htmlparser.py (original) +++ python/branches/release27-maint/Lib/test/test_htmlparser.py Tue Dec 28 17:05:07 2010 @@ -320,6 +320,11 @@ ("endtag", "p"), ]) + def test_unescape_function(self): + parser = HTMLParser.HTMLParser() + self.assertEqual(parser.unescape('bad;'),'bad;') + self.assertEqual(parser.unescape('&'),'&') + def test_main(): test_support.run_unittest(HTMLParserTestCase)
participants (1)
-
senthil.kumaran