[Python-checkins] CVS: python/dist/src/Lib sgmllib.py,1.28,1.29
Fred L. Drake
fdrake@users.sourceforge.net
Fri, 16 Mar 2001 12:05:00 -0800
- Previous message: [Python-checkins] CVS: python/nondist/peps pep-0237.txt,1.2,1.3 pep-0238.txt,1.1,1.2 pep-0239.txt,1.1,1.2 pep-0240.txt,1.1,1.2
- Next message: [Python-checkins] CVS: python/dist/src/Doc/lib libsgmllib.tex,1.20,1.21
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv20502
Modified Files:
sgmllib.py
Log Message:
Change RuntimeError to SGMLParseError, which subclasses RuntimeError
for backward compatibility.
Add support for SGML declaration syntax (<!....>) to some reasonable
degree. This does not support everything allowed in SGML, but should
work with "real" HTML (internal subset in a DOCTYPE is not handled).
The content of the declaration is passed to the .handle_decl() method,
which can be overridden by subclasses.
Index: sgmllib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sgmllib.py,v
retrieving revision 1.28
retrieving revision 1.29
diff -C2 -r1.28 -r1.29
*** sgmllib.py 2001/03/14 16:18:56 1.28
--- sgmllib.py 2001/03/16 20:04:57 1.29
***************
*** 40,44 ****
--- 40,52 ----
r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
+ declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*')
+ declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*')
+
+ class SGMLParseError(RuntimeError):
+ """Exception raised for all parse errors."""
+ pass
+
+
# SGML parser base class -- find tags and call handler functions.
# Usage: p = SGMLParser(); p.feed(data); ...; p.close().
***************
*** 145,149 ****
i = i+1
continue
! i = match.end(0)
continue
elif rawdata[i] == '&':
--- 153,162 ----
i = i+1
continue
! # This is some sort of declaration; in "HTML as
! # deployed," this should only be the document type
! # declaration ("<!DOCTYPE html...>").
! k = self.parse_declaration(i)
! if k < 0: break
! i = k
continue
elif rawdata[i] == '&':
***************
*** 163,167 ****
continue
else:
! raise RuntimeError, 'neither < nor & ??'
# We get here only if incomplete matches but
# nothing else
--- 176,180 ----
continue
else:
! raise SGMLParserError('neither < nor & ??')
# We get here only if incomplete matches but
# nothing else
***************
*** 187,191 ****
rawdata = self.rawdata
if rawdata[i:i+4] != '<!--':
! raise RuntimeError, 'unexpected call to handle_comment'
match = commentclose.search(rawdata, i+4)
if not match:
--- 200,204 ----
rawdata = self.rawdata
if rawdata[i:i+4] != '<!--':
! raise SGMLParseError('unexpected call to parse_comment()')
match = commentclose.search(rawdata, i+4)
if not match:
***************
*** 196,204 ****
return j-i
# Internal -- parse processing instr, return length or -1 if not terminated
def parse_pi(self, i):
rawdata = self.rawdata
if rawdata[i:i+2] != '<?':
! raise RuntimeError, 'unexpected call to handle_pi'
match = piclose.search(rawdata, i+2)
if not match:
--- 209,248 ----
return j-i
+ # Internal -- parse declaration.
+ def parse_declaration(self, i):
+ rawdata = self.rawdata
+ j = i + 2
+ # in practice, this should look like: ((name|stringlit) S*)+ '>'
+ while 1:
+ c = rawdata[j:j+1]
+ if c == ">":
+ # end of declaration syntax
+ self.handle_decl(rawdata[i+2:j])
+ return j + 1
+ if c in "\"'":
+ m = declstringlit.match(rawdata, j)
+ if not m:
+ # incomplete or an error?
+ return -1
+ j = m.end()
+ elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+ m = declname.match(rawdata, j)
+ if not m:
+ # incomplete or an error?
+ return -1
+ j = m.end()
+ elif i == len(rawdata):
+ # end of buffer between tokens
+ return -1
+ else:
+ raise SGMLParseError(
+ "unexpected char in declaration: %s" % `rawdata[i]`)
+ assert 0, "can't get here!"
+
# Internal -- parse processing instr, return length or -1 if not terminated
def parse_pi(self, i):
rawdata = self.rawdata
if rawdata[i:i+2] != '<?':
! raise SGMLParseError('unexpected call to parse_pi()')
match = piclose.search(rawdata, i+2)
if not match:
***************
*** 247,251 ****
match = tagfind.match(rawdata, i+1)
if not match:
! raise RuntimeError, 'unexpected call to parse_starttag'
k = match.end(0)
tag = rawdata[i+1:k].lower()
--- 291,295 ----
match = tagfind.match(rawdata, i+1)
if not match:
! raise SGMLParseError('unexpected call to parse_starttag')
k = match.end(0)
tag = rawdata[i+1:k].lower()
***************
*** 382,385 ****
--- 426,433 ----
# Example -- handle comment, could be overridden
def handle_comment(self, data):
+ pass
+
+ # Example -- handle declaration, could be overridden
+ def handle_decl(self, decl):
pass
- Previous message: [Python-checkins] CVS: python/nondist/peps pep-0237.txt,1.2,1.3 pep-0238.txt,1.1,1.2 pep-0239.txt,1.1,1.2 pep-0240.txt,1.1,1.2
- Next message: [Python-checkins] CVS: python/dist/src/Doc/lib libsgmllib.tex,1.20,1.21
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]