UnicodeError: ASCII decoding error: ordinal not in range(128)

eugene kim eugene1977 at hotmail.com
Fri Jul 19 13:38:20 EDT 2002


hi..
i'm trying to parse a xml file and feed into postgresql...
the xml file contains korean..
is this python problem or postgres problem?
and of course how do i solve this??
thanks a lot

xml file format
----------------
 <item title="Free Blonde Centerfold Gallery" 
url="http://xfreebies.com/devon/index.html" first_\
time="1026674445" last_time="1026674445" visits="1"/>
  <item title="Hotmail Attachments" 
url="http://lw12fd.law12.hotmail.msn.com/cgi-bin/attach" firs\
t_time="1026873675" last_time="1026881258" visits="2"/>
  <item title="2000 MP3 : MP3 
검색엔ì§&\
#132;" 
url="http://music.2000mp3.com/artist_viewk.php3?mode=abcd&name=A&lg=" 
first_time="\
1026491502" last_time="1026491502" visits="1"/>
--------------------

error msg
--------------
self.url: http://music.2000mp3.com/artist_viewk.php3?mode=abcd&name=A&lg=
self.title: 2000 MP3 : MP3 검색엔진
Traceback (most recent call last):
  File "second.py", line 78, in ?
    parser.parse("smallhistory2.xml")
  File "/usr/lib/python2.2/xml/sax/expatreader.py", line 90, in parse
    xmlreader.IncrementalParser.parse(self, source)
  File "/usr/lib/python2.2/xml/sax/xmlreader.py", line 123, in parse
    self.feed(buffer)
  File "/usr/lib/python2.2/xml/sax/expatreader.py", line 143, in feed
    self._parser.Parse(data, isFinal)
  File "/usr/lib/python2.2/xml/sax/expatreader.py", line 217, in 
start_element
    self._cont_handler.startElement(name, AttributesImpl(attrs))
  File "second.py", line 67, in startElement
    sqlclause = 'INSERT INTO history VALUES (' + tmp + self.url + tmp +","+  
tmp + self.title + tmp + "," + self.firsttime + "," + self.lasttime + "," + 
self.visits + " )"
UnicodeError: ASCII decoding error: ordinal not in range(128)
---------------------

code
----------------
#!/usr/bin/python
import sys

import string
import xml.sax.handler
import _pg
user=_pg.set_defuser('postgres')
db=_pg.connect('test','localhost')

def converter( oldString):

    newString = oldString
    for aChar in HistoryHandler.specialCharacters:

        if ( aChar in oldString):
            newString=string.replace(oldString, aChar, "\\"+aChar)

    return newString

class HistoryHandler(xml.sax.handler.ContentHandler):
    specialCharacters = [ '"', "'", "`"]
#     def __init__(self):
#         self.inItem =0
#         self.mapping={}
#         self.count=0

    def startElement(self, name, attributes):
        if name=="item":


            self.url=attributes["url"].encode('utf-8')
            self.title = attributes["title"].encode('utf-8')
            for aChar in self.specialCharacters:
                
                if ( aChar in self.url):
                    if aChar == "'":
                        print "yo"

                    print "before " + self.url
                    self.url=converter(self.url)
                    print "after " +self.url
                
                if ( aChar in self.title):
                    self.title=converter(self.title)

            self.firsttime=attributes["first_time"]
            self.lasttime=attributes["last_time"]
            self.visits=attributes["visits"]
            tmp = "\""
            sqlselect = 'SELECT url, visits FROM history WHERE url = \'' + 
self.url + "';"

            result = db.query(sqlselect).getresult()


            if(result):
                existUrl = str(result[0][0])
                existUrl = converter(existUrl)
                newVisits=str(result[0][1]+ int(self.visits))
                sqlclause = 'UPDATE history SET visits =' + newVisits + ' 
WHERE url = \'' + existUrl + '\''
            else:
                print "self.url: " + self.url
                print "self.title: " + self.title
                sqlclause = 'INSERT INTO history VALUES (' + tmp + self.url 
+ tmp +","+  tmp + self.title + tmp + "," + self.firsttime + "," + 
self.lasttime + "," + self.visits + " )"

            print sqlclause
            db.query(sqlclause)
            

# import pprint
parser = xml.sax.make_parser(  )
handler = HistoryHandler(  )
parser.setContentHandler(handler)
parser.parse("smallhistory2.xml")
# pprint.pprint(handler.mapping)
            



More information about the Python-list mailing list