UnicodeError: ASCII encoding error: ordinal not in range(128)

Hasan Diwan hdiwan at localhost.
Sat Mar 1 19:17:45 EST 2003


I have a script to parse RSS feeds into HTML. Some of these RSS feeds
contain characters outside the 7-bit ASCII range, and the page is set to
display in utf8. My question is how to force the script below to output
strings in UTF8 as opposed to ASCII:
#!/sw/bin/python
from xml.dom import minidom
import string
import urllib
import time
import sys
import re
import zlib
#Change these to reflect your reality
class news:
	def load(self, url):
	 p=None
	 try:
	  p=minidom.parse(urllib.urlopen(url))
	 except:
	  print 'unreachable URL '+url,
	 return p
	DEFAULT_NAMESPACES = \
	  (None, # RSS 0.91, 0.92, 0.93, 0.94, 2.0
	   'http://purl.org/rss/1.0/', # RSS 1.0
	   'http://my.netscape.com/rdf/simple/0.9/' # RSS 0.90
	   )

	def getElementsByTagName(self, node, tagName, possibleNamespaces=DEFAULT_NAMESPACES):
	   for namespace in possibleNamespaces:
	      children = node.getElementsByTagName(tagName)
	      if len(children): return children
	      return []
	def first(self,node, tagName, possibleNamespaces=DEFAULT_NAMESPACES):
	  children = node.getElementsByTagName(tagName)
	  return len(children) and children[0] or None

	def textOf(self,node):
	  return node and "".join([child.data for child in node.childNodes]) or ""
DUBLIN_CORE = ('http://purl.org/dc/elements/1.1/',)

-- 
Hasan Diwan <hdiwan at mac.com> OpenPGP keyID: 0x7EE3855B
http://ibn.com/~hdiwan 
Fingerprint: 42F0 5758 C3EB BA1F ABD2  ED49 3390 CCF0 7EE3 855B




More information about the Python-list mailing list