encoding problems (é and è)

bussiere bussiere bussiere at gmail.com
Thu Mar 23 12:07:31 CET 2006


hi i'am making a program for formatting string,
or
i've added :
#!/usr/bin/python
# -*- coding: utf-8 -*-

in the begining of my script but

 str = str.replace('Ç', 'C')
        str = str.replace('é', 'E')
        str = str.replace('É', 'E')
        str = str.replace('è', 'E')
        str = str.replace('È', 'E')
        str = str.replace('ê', 'E')


doesn't work it put me " and , instead of remplacing é by E


if someone have an idea it could be great

regards
Bussiere
ps : i've added the whole script under :






__________________________________________________________________________




#!/usr/bin/python
# -*- coding: utf-8 -*-
import fileinput, glob, string, sys, os, re

fichA=raw_input("Entrez le nom du fichier d'entree : ")
print ("\n")
fichC=raw_input("Entrez le nom du fichier de sortie : ")
print ("\n")
normalisation1 = raw_input("Normaliser les adresses 1 (ex : Avenue->
AV) (O/N) ou A pour tout normaliser \n")
normalisation1 = normalisation1.upper()

if normalisation1 != "A":
    print ("\n")
    normalisation2 = raw_input("Normaliser les civilités (ex :
Docteur-> DR) (O/N) \n")
    normalisation2 = normalisation2.upper()
    print ("\n")
    normalisation3 = raw_input("Normaliser les Adresses 2 (ex :
Place-> PL) (O/N) \n")
    normalisation3 = normalisation3.upper()


    normalisation4 = raw_input("Normaliser les caracteres / et - (ex :
/ ->   ) (O/N) \n" )
    normalisation4 = normalisation4.upper()

if normalisation1 == "A":
    normalisation1 = "O"
    normalisation2 = "O"
    normalisation3 = "O"
    normalisation4 = "O"


fiA=open(fichA,"r")
fiC=open(fichC,"w")


compteur = 0

while 1:

    ligneA=fiA.readline()



    if ligneA == "":

        break

    if ligneA != "":
        str = ligneA
        str = str.replace('a', 'A')
        str = str.replace('b', 'B')
        str = str.replace('c', 'C')
        str = str.replace('d', 'D')
        str = str.replace('e', 'E')
        str = str.replace('f', 'F')
        str = str.replace('g', 'G')
        str = str.replace('h', 'H')
        str = str.replace('i', 'I')
        str = str.replace('j', 'J')
        str = str.replace('k', 'K')
        str = str.replace('l', 'L')
        str = str.replace('m', 'M')
        str = str.replace('n', 'N')
        str = str.replace('o', 'O')
        str = str.replace('p', 'P')
        str = str.replace('q', 'Q')
        str = str.replace('r', 'R')
        str = str.replace('s', 'S')
        str = str.replace('t', 'T')
        str = str.replace('u', 'U')
        str = str.replace('v', 'V')
        str = str.replace('w', 'W')
        str = str.replace('x', 'X')
        str = str.replace('y', 'Y')
        str = str.replace('z', 'Z')

        str = str.replace('ç', 'C')
        str = str.replace('Ç', 'C')
        str = str.replace('é', 'E')
        str = str.replace('É', 'E')
        str = str.replace('è', 'E')
        str = str.replace('È', 'E')
        str = str.replace('ê', 'E')
        str = str.replace('Ê', 'E')
        str = str.replace('ë', 'E')
        str = str.replace('Ë', 'E')
        str = str.replace('ä', 'A')
        str = str.replace('Ä', 'A')
        str = str.replace('à', 'A')
        str = str.replace('À', 'A')
        str = str.replace('Á', 'A')
        str = str.replace('Â', 'A')
        str = str.replace('Ä', 'A')
        str = str.replace('Ã', 'A')
        str = str.replace('â', 'A')
        str = str.replace('Ä', 'A')
        str = str.replace('ï', 'I')
        str = str.replace('Ï', 'I')
        str = str.replace('î', 'I')
        str = str.replace('Î', 'I')
        str = str.replace('ô', 'O')
        str = str.replace('Ô', 'O')
        str = str.replace('ö', 'O')
        str = str.replace('Ö', 'O')
        str = str.replace('Ú','U')
        str = str.replace('  ', ' ')
        str = str.replace('   ', ' ')
        str = str.replace('    ', ' ')



        if normalisation1 == "O":
            str = str.replace('AVENUE', 'AV')
            str = str.replace('BOULEVARD', 'BD')
            str = str.replace('FAUBOURG', 'FBG')
            str = str.replace('GENERAL', 'GAL')
            str = str.replace('COMMANDANT', 'CMDT')
            str = str.replace('MARECHAL', 'MAL')
            str = str.replace('PRESIDENT', 'PRDT')
            str = str.replace('SAINT', 'ST')
            str = str.replace('SAINTE', 'STE')
            str = str.replace('LOTISSEMENT', 'LOT')
            str = str.replace('RESIDENCE', 'RES')
            str = str.replace('IMMEUBLE', 'IMM')
            str = str.replace('IMEUBLE', 'IMM')
            str = str.replace('BATIMENT', 'BAT')

        if normalisation2 == "O":
            str = str.replace('MONSIEUR', 'M')
            str = str.replace('MR', 'M')
            str = str.replace('MADAME', 'MME')
            str = str.replace('MADEMOISELLE', 'MLLE')
            str = str.replace('DOCTEUR', 'DR')
            str = str.replace('PROFESSEUR', 'PR')
            str = str.replace('MONSEIGNEUR', 'MGR')
            str = str.replace('M ME','MME')


        if normalisation3 == "O":
            str = str.replace('PLACE', 'PL')
            str = str.replace('IMPASSE', 'IMP')
            str = str.replace('ESPLANADE', 'ESP')
            str = str.replace('ROND POINT', 'RPT')
            str = str.replace('ROUTE', 'RTE')
            str = str.replace('PASSAGE', 'PAS')
            str = str.replace('SQUARE', 'SQ')
            str = str.replace('ALLEE', 'ALL')
            str = str.replace('ESCALIER', 'ESC')
            str = str.replace('ETAGE', 'ETG')
            str = str.replace('PORTE', 'PTE')
            str = str.replace('APPARTEMENT', 'APT')
            str = str.replace('APARTEMENT', 'APT')
            str = str.replace('AVENUE', 'AV')
            str = str.replace('BOULEVARD', 'BD')
            str = str.replace('ZONE D ACTIVITE', 'ZA')
            str = str.replace('ZONE D ACTIVITEE', 'ZA')
            str = str.replace('ZONE D AMENAGEMENT CONCERTE', 'ZAC')
            str = str.replace('ZONE D AMENAGEMENT CONCERTEE', 'ZAC')
            str = str.replace('ZONE INDUSTRELLE', 'ZI')
            str = str.replace('CENTRE COMMERCIAL', 'CCAL')
            str = str.replace('CENTRE', 'CTRE')
            str = str.replace('C.CIAL','CCAL')
            str = str.replace('CTRE CIAL','CCAL')
            str = str.replace('CTRE CCAL','CCAL')
            str = str.replace('GALERIE','GAL')
            str = str.replace('MARTYR', 'M')
            str = str.replace('ANCIENS', 'AC')
            str = str.replace('ANCIEN', 'AC')
            str = str.replace('REVEREND PERE','R P')

        if normalisation4 == "O":
            str = str.replace(';\"', ' ')
            str = str.replace('\"', ' ')
            str = str.replace('\'', ' ')
            str = str.replace('-', ' ')
            str = str.replace(',', ' ')
            str = str.replace('\\', ' ')
            str = str.replace('\/', ' ')
            str = str.replace('&', ' ')
            str = str.replace('%', ' ')
            str = str.replace('*', ' ')
            str = str.replace('  ', ' ')
            str = str.replace('.', ' ')
            str = str.replace('_', ' ')
            str = str.replace('   ', ' ')
            str = str.replace('    ', ' ')
            str = str.replace('?', ' ')
            str = str.replace('%', ' ')
            str = str.replace('|', ' ')












        str = str.replace('  ', ' ')
        str = str.replace('   ', ' ')
        str = str.replace('    ', ' ')
        fiC.write(str)
        compteur += 1
        print compteur, "\n"


print "FINIT"
fiA.close()
fiC.close()



More information about the Python-list mailing list