UnicodeError: ASCII decoding error: ordinal not in range(128)
eugene kim
eugene1977 at hotmail.com
Fri Jul 19 13:38:20 EDT 2002
hi..
i'm trying to parse a xml file and feed into postgresql...
the xml file contains korean..
is this python problem or postgres problem?
and of course how do i solve this??
thanks a lot
xml file format
----------------
<item title="Free Blonde Centerfold Gallery"
url="http://xfreebies.com/devon/index.html" first_\
time="1026674445" last_time="1026674445" visits="1"/>
<item title="Hotmail Attachments"
url="http://lw12fd.law12.hotmail.msn.com/cgi-bin/attach" firs\
t_time="1026873675" last_time="1026881258" visits="2"/>
<item title="2000 MP3 : MP3
검색엔ì§&\
#132;"
url="http://music.2000mp3.com/artist_viewk.php3?mode=abcd&name=A&lg="
first_time="\
1026491502" last_time="1026491502" visits="1"/>
--------------------
error msg
--------------
self.url: http://music.2000mp3.com/artist_viewk.php3?mode=abcd&name=A&lg=
self.title: 2000 MP3 : MP3 검색엔진
Traceback (most recent call last):
File "second.py", line 78, in ?
parser.parse("smallhistory2.xml")
File "/usr/lib/python2.2/xml/sax/expatreader.py", line 90, in parse
xmlreader.IncrementalParser.parse(self, source)
File "/usr/lib/python2.2/xml/sax/xmlreader.py", line 123, in parse
self.feed(buffer)
File "/usr/lib/python2.2/xml/sax/expatreader.py", line 143, in feed
self._parser.Parse(data, isFinal)
File "/usr/lib/python2.2/xml/sax/expatreader.py", line 217, in
start_element
self._cont_handler.startElement(name, AttributesImpl(attrs))
File "second.py", line 67, in startElement
sqlclause = 'INSERT INTO history VALUES (' + tmp + self.url + tmp +","+
tmp + self.title + tmp + "," + self.firsttime + "," + self.lasttime + "," +
self.visits + " )"
UnicodeError: ASCII decoding error: ordinal not in range(128)
---------------------
code
----------------
#!/usr/bin/python
import sys
import string
import xml.sax.handler
import _pg
user=_pg.set_defuser('postgres')
db=_pg.connect('test','localhost')
def converter( oldString):
newString = oldString
for aChar in HistoryHandler.specialCharacters:
if ( aChar in oldString):
newString=string.replace(oldString, aChar, "\\"+aChar)
return newString
class HistoryHandler(xml.sax.handler.ContentHandler):
specialCharacters = [ '"', "'", "`"]
# def __init__(self):
# self.inItem =0
# self.mapping={}
# self.count=0
def startElement(self, name, attributes):
if name=="item":
self.url=attributes["url"].encode('utf-8')
self.title = attributes["title"].encode('utf-8')
for aChar in self.specialCharacters:
if ( aChar in self.url):
if aChar == "'":
print "yo"
print "before " + self.url
self.url=converter(self.url)
print "after " +self.url
if ( aChar in self.title):
self.title=converter(self.title)
self.firsttime=attributes["first_time"]
self.lasttime=attributes["last_time"]
self.visits=attributes["visits"]
tmp = "\""
sqlselect = 'SELECT url, visits FROM history WHERE url = \'' +
self.url + "';"
result = db.query(sqlselect).getresult()
if(result):
existUrl = str(result[0][0])
existUrl = converter(existUrl)
newVisits=str(result[0][1]+ int(self.visits))
sqlclause = 'UPDATE history SET visits =' + newVisits + '
WHERE url = \'' + existUrl + '\''
else:
print "self.url: " + self.url
print "self.title: " + self.title
sqlclause = 'INSERT INTO history VALUES (' + tmp + self.url
+ tmp +","+ tmp + self.title + tmp + "," + self.firsttime + "," +
self.lasttime + "," + self.visits + " )"
print sqlclause
db.query(sqlclause)
# import pprint
parser = xml.sax.make_parser( )
handler = HistoryHandler( )
parser.setContentHandler(handler)
parser.parse("smallhistory2.xml")
# pprint.pprint(handler.mapping)
More information about the Python-list
mailing list