encoding problems (é and è)
bussiere bussiere
bussiere at gmail.com
Thu Mar 23 06:07:31 EST 2006
hi i'am making a program for formatting string,
or
i've added :
#!/usr/bin/python
# -*- coding: utf-8 -*-
in the begining of my script but
str = str.replace('Ç', 'C')
str = str.replace('é', 'E')
str = str.replace('É', 'E')
str = str.replace('è', 'E')
str = str.replace('È', 'E')
str = str.replace('ê', 'E')
doesn't work it put me " and , instead of remplacing é by E
if someone have an idea it could be great
regards
Bussiere
ps : i've added the whole script under :
__________________________________________________________________________
#!/usr/bin/python
# -*- coding: utf-8 -*-
import fileinput, glob, string, sys, os, re
fichA=raw_input("Entrez le nom du fichier d'entree : ")
print ("\n")
fichC=raw_input("Entrez le nom du fichier de sortie : ")
print ("\n")
normalisation1 = raw_input("Normaliser les adresses 1 (ex : Avenue->
AV) (O/N) ou A pour tout normaliser \n")
normalisation1 = normalisation1.upper()
if normalisation1 != "A":
print ("\n")
normalisation2 = raw_input("Normaliser les civilités (ex :
Docteur-> DR) (O/N) \n")
normalisation2 = normalisation2.upper()
print ("\n")
normalisation3 = raw_input("Normaliser les Adresses 2 (ex :
Place-> PL) (O/N) \n")
normalisation3 = normalisation3.upper()
normalisation4 = raw_input("Normaliser les caracteres / et - (ex :
/ -> ) (O/N) \n" )
normalisation4 = normalisation4.upper()
if normalisation1 == "A":
normalisation1 = "O"
normalisation2 = "O"
normalisation3 = "O"
normalisation4 = "O"
fiA=open(fichA,"r")
fiC=open(fichC,"w")
compteur = 0
while 1:
ligneA=fiA.readline()
if ligneA == "":
break
if ligneA != "":
str = ligneA
str = str.replace('a', 'A')
str = str.replace('b', 'B')
str = str.replace('c', 'C')
str = str.replace('d', 'D')
str = str.replace('e', 'E')
str = str.replace('f', 'F')
str = str.replace('g', 'G')
str = str.replace('h', 'H')
str = str.replace('i', 'I')
str = str.replace('j', 'J')
str = str.replace('k', 'K')
str = str.replace('l', 'L')
str = str.replace('m', 'M')
str = str.replace('n', 'N')
str = str.replace('o', 'O')
str = str.replace('p', 'P')
str = str.replace('q', 'Q')
str = str.replace('r', 'R')
str = str.replace('s', 'S')
str = str.replace('t', 'T')
str = str.replace('u', 'U')
str = str.replace('v', 'V')
str = str.replace('w', 'W')
str = str.replace('x', 'X')
str = str.replace('y', 'Y')
str = str.replace('z', 'Z')
str = str.replace('ç', 'C')
str = str.replace('Ç', 'C')
str = str.replace('é', 'E')
str = str.replace('É', 'E')
str = str.replace('è', 'E')
str = str.replace('È', 'E')
str = str.replace('ê', 'E')
str = str.replace('Ê', 'E')
str = str.replace('ë', 'E')
str = str.replace('Ë', 'E')
str = str.replace('ä', 'A')
str = str.replace('Ä', 'A')
str = str.replace('à', 'A')
str = str.replace('À', 'A')
str = str.replace('Á', 'A')
str = str.replace('Â', 'A')
str = str.replace('Ä', 'A')
str = str.replace('Ã', 'A')
str = str.replace('â', 'A')
str = str.replace('Ä', 'A')
str = str.replace('ï', 'I')
str = str.replace('Ï', 'I')
str = str.replace('î', 'I')
str = str.replace('Î', 'I')
str = str.replace('ô', 'O')
str = str.replace('Ô', 'O')
str = str.replace('ö', 'O')
str = str.replace('Ö', 'O')
str = str.replace('Ú','U')
str = str.replace(' ', ' ')
str = str.replace(' ', ' ')
str = str.replace(' ', ' ')
if normalisation1 == "O":
str = str.replace('AVENUE', 'AV')
str = str.replace('BOULEVARD', 'BD')
str = str.replace('FAUBOURG', 'FBG')
str = str.replace('GENERAL', 'GAL')
str = str.replace('COMMANDANT', 'CMDT')
str = str.replace('MARECHAL', 'MAL')
str = str.replace('PRESIDENT', 'PRDT')
str = str.replace('SAINT', 'ST')
str = str.replace('SAINTE', 'STE')
str = str.replace('LOTISSEMENT', 'LOT')
str = str.replace('RESIDENCE', 'RES')
str = str.replace('IMMEUBLE', 'IMM')
str = str.replace('IMEUBLE', 'IMM')
str = str.replace('BATIMENT', 'BAT')
if normalisation2 == "O":
str = str.replace('MONSIEUR', 'M')
str = str.replace('MR', 'M')
str = str.replace('MADAME', 'MME')
str = str.replace('MADEMOISELLE', 'MLLE')
str = str.replace('DOCTEUR', 'DR')
str = str.replace('PROFESSEUR', 'PR')
str = str.replace('MONSEIGNEUR', 'MGR')
str = str.replace('M ME','MME')
if normalisation3 == "O":
str = str.replace('PLACE', 'PL')
str = str.replace('IMPASSE', 'IMP')
str = str.replace('ESPLANADE', 'ESP')
str = str.replace('ROND POINT', 'RPT')
str = str.replace('ROUTE', 'RTE')
str = str.replace('PASSAGE', 'PAS')
str = str.replace('SQUARE', 'SQ')
str = str.replace('ALLEE', 'ALL')
str = str.replace('ESCALIER', 'ESC')
str = str.replace('ETAGE', 'ETG')
str = str.replace('PORTE', 'PTE')
str = str.replace('APPARTEMENT', 'APT')
str = str.replace('APARTEMENT', 'APT')
str = str.replace('AVENUE', 'AV')
str = str.replace('BOULEVARD', 'BD')
str = str.replace('ZONE D ACTIVITE', 'ZA')
str = str.replace('ZONE D ACTIVITEE', 'ZA')
str = str.replace('ZONE D AMENAGEMENT CONCERTE', 'ZAC')
str = str.replace('ZONE D AMENAGEMENT CONCERTEE', 'ZAC')
str = str.replace('ZONE INDUSTRELLE', 'ZI')
str = str.replace('CENTRE COMMERCIAL', 'CCAL')
str = str.replace('CENTRE', 'CTRE')
str = str.replace('C.CIAL','CCAL')
str = str.replace('CTRE CIAL','CCAL')
str = str.replace('CTRE CCAL','CCAL')
str = str.replace('GALERIE','GAL')
str = str.replace('MARTYR', 'M')
str = str.replace('ANCIENS', 'AC')
str = str.replace('ANCIEN', 'AC')
str = str.replace('REVEREND PERE','R P')
if normalisation4 == "O":
str = str.replace(';\"', ' ')
str = str.replace('\"', ' ')
str = str.replace('\'', ' ')
str = str.replace('-', ' ')
str = str.replace(',', ' ')
str = str.replace('\\', ' ')
str = str.replace('\/', ' ')
str = str.replace('&', ' ')
str = str.replace('%', ' ')
str = str.replace('*', ' ')
str = str.replace(' ', ' ')
str = str.replace('.', ' ')
str = str.replace('_', ' ')
str = str.replace(' ', ' ')
str = str.replace(' ', ' ')
str = str.replace('?', ' ')
str = str.replace('%', ' ')
str = str.replace('|', ' ')
str = str.replace(' ', ' ')
str = str.replace(' ', ' ')
str = str.replace(' ', ' ')
fiC.write(str)
compteur += 1
print compteur, "\n"
print "FINIT"
fiA.close()
fiC.close()
More information about the Python-list
mailing list