[XML-SIG] sgmlop and html parsing
Alexandre Fayolle
Alexandre.Fayolle at logilab.fr
Wed Jan 14 05:49:06 EST 2004
Here's the patch I came up with. It fixes the bug that was reported on
Debian (http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=227219), but
I'd like to have some feedback before committing the change to the CVS.
diff -u -r1.7 drv_sgmlop.py
--- xml/sax/drivers2/drv_sgmlop.py 21 Jan 2003 12:42:28 -0000
1.7
+++ xml/sax/drivers2/drv_sgmlop.py 14 Jan 2004 10:40:00 -0000
@@ -99,6 +99,29 @@
if self._lexical_handler is not None:
self._lexical_handler.comment(to_xml_string(data,self._encoding))
+ def handle_charref(self, name):
+ try:
+ if name[0] == 'x':
+ n = int(name[1:], 16)
+ else:
+ n = int(name)
+ except ValueError:
+ self.unknown_charref(name)
+ return
+ try:
+ unichar = unichr(n)
+ except NameError:
+ if not 0 <= n <= 255:
+ self.unknown_charref(name)
+ return
+ self.handle_data(chr(n))
+ else:
+ prev_encoding = self.getProperty(handler.property_encoding)
+ self.setProperty(handler.property_encoding, 'utf-8')
+ self.handle_data(unichar.encode('utf-8'))
+ self.setProperty(handler.property_encoding, prev_encoding)
+
+
def setProperty(self,name,value):
if name == handler.property_lexical_handler:
self._lexical_handler = value
@@ -113,6 +136,7 @@
return self._encoding
raise SAXNotRecognizedException("Property '%s' not recognized"
% name)
+
## def getFeature(self, name):
## if name == handler.feature_namespaces:
## return self._namespaces
--
Alexandre Fayolle
LOGILAB, Paris (France).
http://www.logilab.com http://www.logilab.fr http://www.logilab.org
Développement logiciel avancé - Intelligence Artificielle - Formations
More information about the XML-SIG
mailing list