[Mailman-i18n] translation checker
Simone Piunno
pioppo@ferrara.linux.it
Fri, 24 May 2002 23:23:44 +0200
--jI8keyz6grp/JLjh
Content-Type: text/plain; charset=us-ascii
Content-Disposition: inline
Hi everyone,
During the italian translation of Mailman I've done many typo errors
which, in the worst case, caused Mailman misbehaving. I think the
real problem is that I couldn't find any editor or tool which does
the following checks:
- when translating a text template, if a given %s or %(var)s is
in the original file it probably should appear the same number
of times in the translated text.
- in the translated text you shouldn't have a %(var)s if that
%(var)s wasn't in the original text.
- when translating an html template, the same concept should
apply for <MM-*> tags.
- when translating the .po file, the same concept should apply
for each msgid/msgstr pair.
So, to easy the process, I've written a small script to check my
translation whenever I do some update. This script has been
generalized and now you can use it for your language too.
Just for an overview of how much the tool can be effective, this
simple bash script:
-----------------------------------------------
for i in big5 cs de es fi fr hu it ja ko no ru;
do
echo -ne "$i:\t";
transcheck -q $i;
done
-----------------------------------------------
applied to the last Mailman-CVS has reported:
big5: 33 warnings in 9 files
cs: 192 warnings in 3 files
de: 136 warnings in 6 files
es: 115 warnings in 12 files
fi: 312 warnings in 6 files
fr: 58 warnings in 4 files
hu: 105 warnings in 6 files
it: 1 warnings in 1 files
ja: 182 warnings in 3 files
ko: 231 warnings in 8 files
no: 30 warnings in 3 files
ru: 341 warnings in 13 files
Before using my script, the italian translation counted about 70
warnings. The one still counted actually is not an error: a
%(var)s appears two times in the original english text but in
italian we have intentionally only one.
Using the script without the -q switch, you can see a detailed
report.
The script isn't perferct and can be improved in many ways:
- better regexp to search for Python %(var)s
- better .po parser
- better exception handling for strange situation
- generalizations to use it in other python projects
but it's already good (at least for italian).
Feel free to use it and to report any feedback.
Cheers,
Simone
--
Simone Piunno, FerraraLUG - http://members.ferrara.linux.it/pioppo
--jI8keyz6grp/JLjh
Content-Type: text/plain; charset=us-ascii
Content-Disposition: attachment; filename=transcheck
#!/usr/bin/python
#
# transcheck - (c) 2002 by Simone Piunno <pioppo@ferrara.linux.it>
#
# This program is free software; you can redistribute it and/or modify it
# under the terms of the version 2.0 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
"""
Check a given Mailman translation, making sure that variables and
tags referenced in translation are the same variables and tags in
the original templates and catalog.
Usage:
cd $MAILMAN_DIR
%(program)s [-q] <lang>
Where <lang> is your country code (e.g. 'it' for Italy) and -q is
to ask for a brief summary.
"""
import sys
import re
import os
import getopt
class TransChecker:
"check a translation comparing with the original string"
def __init__(self, regexp):
self.dict = {}
self.errs = []
self.regexp = re.compile(regexp)
def checkin(self, string):
"scan a string from the original file"
for key in self.regexp.findall(string):
if self.dict.has_key(key):
self.dict[key] += 1
else:
self.dict[key] = 1
def checkout(self, string):
"scan a translated string"
for key in self.regexp.findall(string):
if self.dict.has_key(key):
self.dict[key] -= 1
else:
self.errs.append(
"%(key)s was not found" %
{ 'key' : key }
)
def computeErrors(self):
"check for differences between checked in and checked out"
for key in self.dict.keys():
if self.dict[key] < 0:
self.errs.append(
"Too much %(key)s" %
{ 'key' : key }
)
if self.dict[key] > 0:
self.errs.append(
"Too few %(key)s" %
{ 'key' : key }
)
return self.errs
def status(self):
if self.errs:
return "FAILED"
else:
return "OK"
def errorsAsString(self):
msg = ""
for err in self.errs:
msg += " - %(err)s" % { 'err': err }
return msg
def reset(self):
self.dict = {}
self.errs = []
class POParser:
"parse a .po file extracting msgids and msgstrs"
def __init__(self, filename=""):
self.status = 0
self.files = []
self.msgid = ""
self.msgstr = ""
self.line = 1
self.f = None
self.esc = { "n": "\n", "r": "\r", "t": "\t" }
if filename:
self.f = open(filename)
def open(self, filename):
self.f = open(filename)
def close(self):
self.f.close()
def parse(self):
"""States table for the finite-states-machine parser:
0 idle
1 filename-or-comment
2 msgid
3 msgstr
4 end
"""
# each time we can safely re-initialize those vars
self.files = []
self.msgid = ""
self.msgstr = ""
# can't continue if status == 4, this is a dead status
if self.status == 4:
return 0
while 1:
# continue scanning, char-by-char
c = self.f.read(1)
if not c:
# EOF -> maybe we have a msgstr to save?
self.status = 4
if self.msgstr:
return 1
else:
return 0
# keep the line count up-to-date
if c == "\n":
self.line += 1
# a pound was detected the previous char...
if self.status == 1:
if c == ":":
# was a line of filenames
row = self.f.readline()
self.files += row.split()
self.line += 1
elif c == "\n":
# was a single pount on the line
pass
else:
# was a comment... discard
self.f.readline()
self.line += 1
# in every case, we switch to idle status
self.status = 0;
continue
# in idle status we search for a '#' or for a 'm'
if self.status == 0:
if c == "#":
# this could be a comment or a filename
self.status = 1;
continue
elif c == "m":
# this should be a msgid start...
s = self.f.read(4)
assert s == "sgid"
# so now we search for a '"'
self.status = 2
continue
# in idle only those other chars are possibile
assert c in [ "\n", " ", "\t" ]
# searching for the msgid string
if self.status == 2:
if c == "\n":
# a double LF is not possible here
c = self.f.read(1)
assert c != "\n"
if c == "\"":
# ok, this is the start of the string,
# now search for the end
while 1:
c = self.f.read(1)
if not c:
# EOF, bailout
self.status = 4
return 0
if c == "\\":
# a quoted char...
c = self.f.read(1)
if self.esc.has_key(c):
self.msgid += self.esc[c]
else:
self.msgid += c
continue
if c == "\"":
# end of string found
break
# a normal char, add it
self.msgid += c
if c == "m":
# this should be a msgstr identifier
s = self.f.read(5)
assert s == "sgstr"
# ok, now search for the msgstr string
self.status = 3
# searching for the msgstr string
if self.status == 3:
if c == "\n":
# a double LF is the end of the msgstr!
c = self.f.read(1)
if c == "\n":
# ok, time to go idle and return
self.status = 0
self.line += 1
return 1
if c == "\"":
# start of string found
while 1:
c = self.f.read(1)
if not c:
# EOF, bail out
self.status = 4
return 1
if c == "\\":
# a quoted char...
c = self.f.read(1)
if self.esc.has_key(c):
self.msgid += self.esc[c]
else:
self.msgid += c
continue
if c == "\"":
# end of string
break
# a normal char, add it
self.msgstr += c
def check_file(translatedFile, originalFile, html=0, quiet=0):
"""check a translated template against the original one
search also <MM-*> tags if html is not zero"""
if html:
c = TransChecker("(%\([^)]+\)[0-9]*[sd]|</?MM-[^>]+>)")
else:
c = TransChecker("(%\([^)]+\)[0-9]*[sd])")
try:
f = open(originalFile)
except IOError:
if not quiet:
print " - Can'open original file " + originalFile
return 1
while 1:
line = f.readline()
if not line: break
c.checkin(line)
f.close()
try:
f = open(translatedFile)
except IOError:
if not quiet:
print " - Can'open translated file " + translatedFile
return 1
while 1:
line = f.readline()
if not line: break
c.checkout(line)
f.close()
n = 0
msg = ""
for desc in c.computeErrors():
n +=1
if not quiet:
print " - %(desc)s" % { 'desc': desc }
return n
def check_po(file, quiet=0):
"scan the po file comparing msgids with msgstrs"
n = 0
p = POParser(file)
c = TransChecker("(%\([^)]+\)[0-9]*[sdu]|%[0-9]*[sdu])")
while p.parse():
c.reset()
c.checkin(p.msgid)
c.checkout(p.msgstr)
for desc in c.computeErrors():
n += 1
if not quiet:
print " - near line %(line)d %(file)s: %(desc)s" % {
'line': p.line,
'file': p.files,
'desc': desc
}
p.close()
return n
def __main__():
#try:
quiet = 0
optlist, args = getopt.getopt(sys.argv[1:], "q");
lang = args[0]
#except:
#print "Usage: %s [-q] <lang>" % sys.argv[0]
#sys.exit(1)
for o, a in optlist:
if o == "-q":
if a:
print "q is a valid option but without parameters"
sys.exit(1)
quiet = 1
break
print "%s is not a valid option" % o
sys.exit(1)
isHtml = re.compile("\.html$");
isTxt = re.compile("\.txt$");
numerrors = 0
numfiles = 0
try:
files = os.listdir("templates/" + lang + "/")
except:
print "can't open templates/%s/" % lang
for file in files:
fileEN = "templates/en/" + file
fileIT = "templates/" + lang + "/" + file
errlist = []
if isHtml.search(file):
if not quiet:
print "HTML checking " + fileIT + "... "
n = check_file(fileIT, fileEN, html=1, quiet=quiet)
if n:
numerrors += n
numfiles += 1
elif isTxt.search(file):
if not quiet:
print "TXT checking " + fileIT + "... "
n = check_file(fileIT, fileEN, html=0, quiet=quiet)
if n:
numerrors += n
numfiles += 1
else:
continue
file = "messages/" + lang + "/LC_MESSAGES/mailman.po"
if not quiet:
print "PO checking " + file + "... "
n = check_po(file, quiet=quiet)
if n:
numerrors += n
numfiles += 1
if quiet:
print "%(errs)u warnings in %(files)u files" % {
'errs': numerrors,
'files': numfiles
}
if __name__ == '__main__':
__main__()
--jI8keyz6grp/JLjh--