encoding problems (é and è)
Larry Bates
larry.bates at websafe.com
Thu Mar 23 10:55:55 EST 2006
Seems to work fine for me.
>>> x="éÇ"
>>> x=x.replace('é','E')
'E\xc7'
>>> x=x.replace('Ç','C')
>>> x
'E\xc7'
>>> x=x.replace('Ç','C')
>>> x
'EC'
You should also be able to use .upper() method to
uppercase everything in the string in a single statement:
tstr=ligneA.upper()
Note: you should never use 'str' as a variable as
it will mask the built-in str function.
-Larry Bates
bussiere bussiere wrote:
> hi i'am making a program for formatting string,
> or
> i've added :
> #!/usr/bin/python
> # -*- coding: utf-8 -*-
>
> in the begining of my script but
>
> str = str.replace('Ç', 'C')
> str = str.replace('é', 'E')
> str = str.replace('É', 'E')
> str = str.replace('è', 'E')
> str = str.replace('È', 'E')
> str = str.replace('ê', 'E')
>
>
> doesn't work it put me " and , instead of remplacing é by E
>
>
> if someone have an idea it could be great
>
> regards
> Bussiere
> ps : i've added the whole script under :
>
>
>
>
>
>
> __________________________________________________________________________
>
>
>
>
> #!/usr/bin/python
> # -*- coding: utf-8 -*-
> import fileinput, glob, string, sys, os, re
>
> fichA=raw_input("Entrez le nom du fichier d'entree : ")
> print ("\n")
> fichC=raw_input("Entrez le nom du fichier de sortie : ")
> print ("\n")
> normalisation1 = raw_input("Normaliser les adresses 1 (ex : Avenue->
> AV) (O/N) ou A pour tout normaliser \n")
> normalisation1 = normalisation1.upper()
>
> if normalisation1 != "A":
> print ("\n")
> normalisation2 = raw_input("Normaliser les civilités (ex :
> Docteur-> DR) (O/N) \n")
> normalisation2 = normalisation2.upper()
> print ("\n")
> normalisation3 = raw_input("Normaliser les Adresses 2 (ex :
> Place-> PL) (O/N) \n")
> normalisation3 = normalisation3.upper()
>
>
> normalisation4 = raw_input("Normaliser les caracteres / et - (ex :
> / -> ) (O/N) \n" )
> normalisation4 = normalisation4.upper()
>
> if normalisation1 == "A":
> normalisation1 = "O"
> normalisation2 = "O"
> normalisation3 = "O"
> normalisation4 = "O"
>
>
> fiA=open(fichA,"r")
> fiC=open(fichC,"w")
>
>
> compteur = 0
>
> while 1:
>
> ligneA=fiA.readline()
>
>
>
> if ligneA == "":
>
> break
>
> if ligneA != "":
> str = ligneA
> str = str.replace('a', 'A')
> str = str.replace('b', 'B')
> str = str.replace('c', 'C')
> str = str.replace('d', 'D')
> str = str.replace('e', 'E')
> str = str.replace('f', 'F')
> str = str.replace('g', 'G')
> str = str.replace('h', 'H')
> str = str.replace('i', 'I')
> str = str.replace('j', 'J')
> str = str.replace('k', 'K')
> str = str.replace('l', 'L')
> str = str.replace('m', 'M')
> str = str.replace('n', 'N')
> str = str.replace('o', 'O')
> str = str.replace('p', 'P')
> str = str.replace('q', 'Q')
> str = str.replace('r', 'R')
> str = str.replace('s', 'S')
> str = str.replace('t', 'T')
> str = str.replace('u', 'U')
> str = str.replace('v', 'V')
> str = str.replace('w', 'W')
> str = str.replace('x', 'X')
> str = str.replace('y', 'Y')
> str = str.replace('z', 'Z')
>
> str = str.replace('ç', 'C')
> str = str.replace('Ç', 'C')
> str = str.replace('é', 'E')
> str = str.replace('É', 'E')
> str = str.replace('è', 'E')
> str = str.replace('È', 'E')
> str = str.replace('ê', 'E')
> str = str.replace('Ê', 'E')
> str = str.replace('ë', 'E')
> str = str.replace('Ë', 'E')
> str = str.replace('ä', 'A')
> str = str.replace('Ä', 'A')
> str = str.replace('à', 'A')
> str = str.replace('À', 'A')
> str = str.replace('Á', 'A')
> str = str.replace('Â', 'A')
> str = str.replace('Ä', 'A')
> str = str.replace('Ã', 'A')
> str = str.replace('â', 'A')
> str = str.replace('Ä', 'A')
> str = str.replace('ï', 'I')
> str = str.replace('Ï', 'I')
> str = str.replace('î', 'I')
> str = str.replace('Î', 'I')
> str = str.replace('ô', 'O')
> str = str.replace('Ô', 'O')
> str = str.replace('ö', 'O')
> str = str.replace('Ö', 'O')
> str = str.replace('Ú','U')
> str = str.replace(' ', ' ')
> str = str.replace(' ', ' ')
> str = str.replace(' ', ' ')
>
>
>
> if normalisation1 == "O":
> str = str.replace('AVENUE', 'AV')
> str = str.replace('BOULEVARD', 'BD')
> str = str.replace('FAUBOURG', 'FBG')
> str = str.replace('GENERAL', 'GAL')
> str = str.replace('COMMANDANT', 'CMDT')
> str = str.replace('MARECHAL', 'MAL')
> str = str.replace('PRESIDENT', 'PRDT')
> str = str.replace('SAINT', 'ST')
> str = str.replace('SAINTE', 'STE')
> str = str.replace('LOTISSEMENT', 'LOT')
> str = str.replace('RESIDENCE', 'RES')
> str = str.replace('IMMEUBLE', 'IMM')
> str = str.replace('IMEUBLE', 'IMM')
> str = str.replace('BATIMENT', 'BAT')
>
> if normalisation2 == "O":
> str = str.replace('MONSIEUR', 'M')
> str = str.replace('MR', 'M')
> str = str.replace('MADAME', 'MME')
> str = str.replace('MADEMOISELLE', 'MLLE')
> str = str.replace('DOCTEUR', 'DR')
> str = str.replace('PROFESSEUR', 'PR')
> str = str.replace('MONSEIGNEUR', 'MGR')
> str = str.replace('M ME','MME')
>
>
> if normalisation3 == "O":
> str = str.replace('PLACE', 'PL')
> str = str.replace('IMPASSE', 'IMP')
> str = str.replace('ESPLANADE', 'ESP')
> str = str.replace('ROND POINT', 'RPT')
> str = str.replace('ROUTE', 'RTE')
> str = str.replace('PASSAGE', 'PAS')
> str = str.replace('SQUARE', 'SQ')
> str = str.replace('ALLEE', 'ALL')
> str = str.replace('ESCALIER', 'ESC')
> str = str.replace('ETAGE', 'ETG')
> str = str.replace('PORTE', 'PTE')
> str = str.replace('APPARTEMENT', 'APT')
> str = str.replace('APARTEMENT', 'APT')
> str = str.replace('AVENUE', 'AV')
> str = str.replace('BOULEVARD', 'BD')
> str = str.replace('ZONE D ACTIVITE', 'ZA')
> str = str.replace('ZONE D ACTIVITEE', 'ZA')
> str = str.replace('ZONE D AMENAGEMENT CONCERTE', 'ZAC')
> str = str.replace('ZONE D AMENAGEMENT CONCERTEE', 'ZAC')
> str = str.replace('ZONE INDUSTRELLE', 'ZI')
> str = str.replace('CENTRE COMMERCIAL', 'CCAL')
> str = str.replace('CENTRE', 'CTRE')
> str = str.replace('C.CIAL','CCAL')
> str = str.replace('CTRE CIAL','CCAL')
> str = str.replace('CTRE CCAL','CCAL')
> str = str.replace('GALERIE','GAL')
> str = str.replace('MARTYR', 'M')
> str = str.replace('ANCIENS', 'AC')
> str = str.replace('ANCIEN', 'AC')
> str = str.replace('REVEREND PERE','R P')
>
> if normalisation4 == "O":
> str = str.replace(';\"', ' ')
> str = str.replace('\"', ' ')
> str = str.replace('\'', ' ')
> str = str.replace('-', ' ')
> str = str.replace(',', ' ')
> str = str.replace('\\', ' ')
> str = str.replace('\/', ' ')
> str = str.replace('&', ' ')
> str = str.replace('%', ' ')
> str = str.replace('*', ' ')
> str = str.replace(' ', ' ')
> str = str.replace('.', ' ')
> str = str.replace('_', ' ')
> str = str.replace(' ', ' ')
> str = str.replace(' ', ' ')
> str = str.replace('?', ' ')
> str = str.replace('%', ' ')
> str = str.replace('|', ' ')
>
>
>
>
>
>
>
>
>
>
>
>
> str = str.replace(' ', ' ')
> str = str.replace(' ', ' ')
> str = str.replace(' ', ' ')
> fiC.write(str)
> compteur += 1
> print compteur, "\n"
>
>
> print "FINIT"
> fiA.close()
> fiC.close()
More information about the Python-list
mailing list