[Python-checkins] CVS: python/dist/src/Lib sgmllib.py,1.28,1.29

Fred L. Drake fdrake@users.sourceforge.net
Fri, 16 Mar 2001 12:05:00 -0800


Update of /cvsroot/python/python/dist/src/Lib
In directory usw-pr-cvs1:/tmp/cvs-serv20502

Modified Files:
	sgmllib.py 
Log Message:

Change RuntimeError to SGMLParseError, which subclasses RuntimeError
for backward compatibility.

Add support for SGML declaration syntax (<!....>) to some reasonable
degree.  This does not support everything allowed in SGML, but should
work with "real" HTML (internal subset in a DOCTYPE is not handled).
The content of the declaration is passed to the .handle_decl() method,
which can be overridden by subclasses.


Index: sgmllib.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sgmllib.py,v
retrieving revision 1.28
retrieving revision 1.29
diff -C2 -r1.28 -r1.29
*** sgmllib.py	2001/03/14 16:18:56	1.28
--- sgmllib.py	2001/03/16 20:04:57	1.29
***************
*** 40,44 ****
--- 40,52 ----
      r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./:;+*%?!&$\(\)_#=~]*))?')
  
+ declname = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*')
+ declstringlit = re.compile(r'(\'[^\']*\'|"[^"]*")\s*')
  
+ 
+ class SGMLParseError(RuntimeError):
+     """Exception raised for all parse errors."""
+     pass
+ 
+ 
  # SGML parser base class -- find tags and call handler functions.
  # Usage: p = SGMLParser(); p.feed(data); ...; p.close().
***************
*** 145,149 ****
                          i = i+1
                          continue
!                     i = match.end(0)
                      continue
              elif rawdata[i] == '&':
--- 153,162 ----
                          i = i+1
                          continue
!                     # This is some sort of declaration; in "HTML as
!                     # deployed," this should only be the document type
!                     # declaration ("<!DOCTYPE html...>").
!                     k = self.parse_declaration(i)
!                     if k < 0: break
!                     i = k
                      continue
              elif rawdata[i] == '&':
***************
*** 163,167 ****
                      continue
              else:
!                 raise RuntimeError, 'neither < nor & ??'
              # We get here only if incomplete matches but
              # nothing else
--- 176,180 ----
                      continue
              else:
!                 raise SGMLParserError('neither < nor & ??')
              # We get here only if incomplete matches but
              # nothing else
***************
*** 187,191 ****
          rawdata = self.rawdata
          if rawdata[i:i+4] != '<!--':
!             raise RuntimeError, 'unexpected call to handle_comment'
          match = commentclose.search(rawdata, i+4)
          if not match:
--- 200,204 ----
          rawdata = self.rawdata
          if rawdata[i:i+4] != '<!--':
!             raise SGMLParseError('unexpected call to parse_comment()')
          match = commentclose.search(rawdata, i+4)
          if not match:
***************
*** 196,204 ****
          return j-i
  
      # Internal -- parse processing instr, return length or -1 if not terminated
      def parse_pi(self, i):
          rawdata = self.rawdata
          if rawdata[i:i+2] != '<?':
!             raise RuntimeError, 'unexpected call to handle_pi'
          match = piclose.search(rawdata, i+2)
          if not match:
--- 209,248 ----
          return j-i
  
+     # Internal -- parse declaration.
+     def parse_declaration(self, i):
+         rawdata = self.rawdata
+         j = i + 2
+         # in practice, this should look like: ((name|stringlit) S*)+ '>'
+         while 1:
+             c = rawdata[j:j+1]
+             if c == ">":
+                 # end of declaration syntax
+                 self.handle_decl(rawdata[i+2:j])
+                 return j + 1
+             if c in "\"'":
+                 m = declstringlit.match(rawdata, j)
+                 if not m:
+                     # incomplete or an error?
+                     return -1
+                 j = m.end()
+             elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ":
+                 m = declname.match(rawdata, j)
+                 if not m:
+                     # incomplete or an error?
+                     return -1
+                 j = m.end()
+             elif i == len(rawdata):
+                 # end of buffer between tokens
+                 return -1
+             else:
+                 raise SGMLParseError(
+                     "unexpected char in declaration: %s" % `rawdata[i]`)
+         assert 0, "can't get here!"
+ 
      # Internal -- parse processing instr, return length or -1 if not terminated
      def parse_pi(self, i):
          rawdata = self.rawdata
          if rawdata[i:i+2] != '<?':
!             raise SGMLParseError('unexpected call to parse_pi()')
          match = piclose.search(rawdata, i+2)
          if not match:
***************
*** 247,251 ****
              match = tagfind.match(rawdata, i+1)
              if not match:
!                 raise RuntimeError, 'unexpected call to parse_starttag'
              k = match.end(0)
              tag = rawdata[i+1:k].lower()
--- 291,295 ----
              match = tagfind.match(rawdata, i+1)
              if not match:
!                 raise SGMLParseError('unexpected call to parse_starttag')
              k = match.end(0)
              tag = rawdata[i+1:k].lower()
***************
*** 382,385 ****
--- 426,433 ----
      # Example -- handle comment, could be overridden
      def handle_comment(self, data):
+         pass
+ 
+     # Example -- handle declaration, could be overridden
+     def handle_decl(self, decl):
          pass