[Python-checkins] CVS: python/dist/src/Lib HTMLParser.py,1.4,1.5
Fred L. Drake
fdrake@users.sourceforge.net
Mon, 20 Aug 2001 14:24:21 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv22193/Lib
Modified Files:
HTMLParser.py
Log Message:
Deal more appropriately with bare ampersands and pointy brackets; this
module has to deal with "class" HTML-as-deployed as well as XHTML, so we
cannot be as strict as XHTML allows.
This closes SF bug #453059, but uses a different fix than suggested in
the bug comments.
Index: HTMLParser.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/HTMLParser.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** HTMLParser.py 2001/08/03 19:50:59 1.4
--- HTMLParser.py 2001/08/20 21:24:19 1.5
***************
*** 16,20 ****
interesting_normal = re.compile('[&<]')
interesting_cdata = re.compile(r'<(/|\Z)')
! incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
--- 16,21 ----
interesting_normal = re.compile('[&<]')
interesting_cdata = re.compile(r'<(/|\Z)')
! incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*'
! '|#([0-9]*|[xX][0-9a-fA-F]*))?')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
***************
*** 186,194 ****
k = self.parse_declaration(i)
else:
! if i < n-1:
! raise HTMLParseError(
! "invalid '<' construct: %s" % `rawdata[i:i+2]`,
! self.getpos())
! k = -1
if k < 0:
if end:
--- 187,192 ----
k = self.parse_declaration(i)
else:
! self.handle_data("<")
! k = i + 1
if k < 0:
if end:
***************
*** 204,208 ****
k = match.end()
if rawdata[k-1] != ';':
! k = k-1
i = self.updatepos(i, k)
continue
--- 202,206 ----
k = match.end()
if rawdata[k-1] != ';':
! k = k - 1
i = self.updatepos(i, k)
continue
***************
*** 213,227 ****
k = match.end()
if rawdata[k-1] != ';':
! k = k-1
i = self.updatepos(i, k)
continue
! if incomplete.match(rawdata, i):
! if end:
raise HTMLParseError(
"EOF in middle of entity or char ref",
self.getpos())
return -1 # incomplete
! raise HTMLParseError("'&' not part of entity or char ref",
! self.getpos())
else:
assert 0, "interesting.search() lied"
--- 211,227 ----
k = match.end()
if rawdata[k-1] != ';':
! k = k - 1
i = self.updatepos(i, k)
continue
! match = incomplete.match(rawdata, i)
! if match:
! rest = rawdata[i:]
! if end and rest != "&" and match.group() == rest:
raise HTMLParseError(
"EOF in middle of entity or char ref",
self.getpos())
return -1 # incomplete
! self.handle_data("&")
! i = self.updatepos(i, i + 1)
else:
assert 0, "interesting.search() lied"