UnicodeError: ASCII encoding error: ordinal not in range(128)
Hasan Diwan
hdiwan at localhost.
Sat Mar 1 19:17:45 EST 2003
I have a script to parse RSS feeds into HTML. Some of these RSS feeds
contain characters outside the 7-bit ASCII range, and the page is set to
display in utf8. My question is how to force the script below to output
strings in UTF8 as opposed to ASCII:
#!/sw/bin/python
from xml.dom import minidom
import string
import urllib
import time
import sys
import re
import zlib
#Change these to reflect your reality
class news:
def load(self, url):
p=None
try:
p=minidom.parse(urllib.urlopen(url))
except:
print 'unreachable URL '+url,
return p
DEFAULT_NAMESPACES = \
(None, # RSS 0.91, 0.92, 0.93, 0.94, 2.0
'http://purl.org/rss/1.0/', # RSS 1.0
'http://my.netscape.com/rdf/simple/0.9/' # RSS 0.90
)
def getElementsByTagName(self, node, tagName, possibleNamespaces=DEFAULT_NAMESPACES):
for namespace in possibleNamespaces:
children = node.getElementsByTagName(tagName)
if len(children): return children
return []
def first(self,node, tagName, possibleNamespaces=DEFAULT_NAMESPACES):
children = node.getElementsByTagName(tagName)
return len(children) and children[0] or None
def textOf(self,node):
return node and "".join([child.data for child in node.childNodes]) or ""
DUBLIN_CORE = ('http://purl.org/dc/elements/1.1/',)
--
Hasan Diwan <hdiwan at mac.com> OpenPGP keyID: 0x7EE3855B
http://ibn.com/~hdiwan
Fingerprint: 42F0 5758 C3EB BA1F ABD2 ED49 3390 CCF0 7EE3 855B
More information about the Python-list
mailing list