[I18n-sig] plural forms (patch)

Juan David Ibáñez Palomar j-david@noos.fr
Mon, 04 Nov 2002 23:44:47 +0100


This is a multi-part message in MIME format.
--------------060608070102070508020001
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 8bit


Hi,

Attached is the patch to gettext.py and a new test_gettext.py
file. The file "test/output/test_gettext" should be removed.

The most ugly part is the plural expression transformation
from C to Python (see the function c2py), if somebody wants
to look at it.. anyway, the test pass.

There's however an issue, as far as I know there isn't any tool
able to parse Python plural forms, though, I only tried xgettext.

It remains to update the documentation, I'll do it by the end
of this month.

By the way, I would like to get more involved in the development
of Python, where should I ask for CVS write access and subscription
to the python-dev mailing list? Maybe I could commit the gettext
changes myself.


Best regards,

-- 
J. David Ibáñez, http://www.j-david.net
Software Engineer / Ingénieur Logiciel / Ingeniero de Software


--------------060608070102070508020001
Content-Type: text/plain;
 name="gettext.diff"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="gettext.diff"

34a35,36
> # J. David Ibanez implemented plural forms.
> #
46,49c48,49
< import os
< import sys
< import struct
< import copy
---
> 
> import copy, os, re, struct, sys
51a52
> 
57a59,108
> def test(condition, true, false):
>     """
>     Implements the C expression:
> 
>       condition ? true : false
> 
>     Required to correctly interpret plural forms.
>     """
>     if condition:
>         return true
>     else:
>         return false
> 
> 
> def c2py(plural):
>     """
>     Gets a C expression as used in PO files for plural forms and
>     returns a Python lambda function that implements an equivalent
>     expression.
>     """
>     plural = plural.replace('&&', ' and ')
>     plural = plural.replace('||', ' or ')
> 
>     expr = re.compile(r'\![^=]')
>     plural = expr.sub(' not ', plural)
> 
>     # Regular expression and replacement function used to transform
>     # "a?b:c" to "test(a,b,c)".
>     expr = re.compile(r'(.*?)\?(.*?):(.*)')
>     def repl(x):
>         return "test(%s, %s, %s)" % (x.group(1), x.group(2),
>                                      expr.sub(repl, x.group(3)))
> 
>     # Code to transform the plural expression, taking care of parentheses
>     stack = ['']
>     for c in plural:
>         if c == '(':
>             stack.append('')
>         elif c == ')':
>             if len(stack) == 0:
>                 raise ValueError, 'unbalanced parenthesis in plural form'
>             s = expr.sub(repl, stack.pop())
>             stack[-1] += '(%s)' % s
>         else:
>             stack[-1] += c
>     plural = expr.sub(repl, stack.pop())
> 
>     return eval('lambda n: int(%s)' % plural)
> 
> 
123a175,182
>     def ngettext(self, msgid1, msgid2, n):
>         if self._fallback:
>             return self._fallback.ngettext(msgid1, msgid2, n)
>         if n == 1:
>             return msgid1
>         else:
>             return msgid2
> 
128a188,195
>     def ungettext(self, msgid1, msgid2, n):
>         if self._fallback:
>             return self._fallback.ungettext(msgid1, msgid2, n)
>         if n == 1:
>             return unicode(msgid1)
>         else:
>             return unicode(msgid2)
> 
171a239
>                 msg = buf[moff:mend]
173c241,248
<                 catalog[buf[moff:mend]] = tmsg
---
>                 if msg.find('\x00') >= 0:
>                     # Plural forms
>                     msgid1, msgid2 = msg.split('\x00')
>                     tmsg = tmsg.split('\x00')
>                     for i in range(len(tmsg)):
>                         catalog[(msgid1, i)] = tmsg[i]
>                 else:
>                     catalog[msg] = tmsg
188a264,269
>                     elif k == 'plural-forms':
>                         v = v.split(';')
> ##                        nplurals = v[0].split('nplurals=')[1]
> ##                        nplurals = int(nplurals.strip())
>                         plural = v[1].split('plural=')[1]
>                         self.plural = c2py(plural)
200a282,294
> 
>     def ngettext(self, msgid1, msgid2, n):
>         try:
>             return self._catalog[(msgid1, self.plural(n))]
>         except KeyError:
>             if self._fallback:
>                 return self._fallback.ngettext(msgid1, msgid2, n)
>             if n == 1:
>                 return msgid1
>             else:
>                 return msgid2
> 
> 
210a305,316
>     def ungettext(self, msgid1, msgid2, n):
>         try:
>             tmsg = self._catalog[(msgid1, self.plural(n))]
>         except KeyError:
>             if self._fallback:
>                 return self._fallback.ungettext(msgid1, msgid2, n)
>             if n == 1:
>                 tmsg = msgid1
>             else:
>                 tmsg = msgid2
>         return unicode(tmsg, self._charset)
> 
313a420,430
> def dngettext(domain, msgid1, msgid2, n):
>     try:
>         t = translation(domain, _localedirs.get(domain, None))
>     except IOError:
>         if n == 1:
>             return msgid1
>         else:
>             return msgid2
>     return t.ngettext(msgid1, msgid2, n)
> 
> 
317a435,438
> def ngettext(msgid1, msgid2, n):
>     return dngettext(_current_domain, msgid1, msgid2, n)
> 
> 

--------------060608070102070508020001
Content-Type: text/plain;
 name="test_gettext.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="test_gettext.py"

import os
import base64
import gettext

import unittest
from unittest import TestCase

# TODO:
#  - Add new tests, for example for "dgettext"
#  - Remove dummy tests, for example testing for single and double quotes
#    has no sense, it would have if we were testing a parser (i.e. pygettext)
#  - Tests should have only one assert.


GNU_MO_DATA = '''\
3hIElQAAAAAGAAAAHAAAAEwAAAALAAAAfAAAAAAAAACoAAAAFQAAAKkAAAAjAAAAvwAAAKEAAADj
AAAABwAAAIUBAAALAAAAjQEAAEUBAACZAQAAFgAAAN8CAAAeAAAA9gIAAKEAAAAVAwAABQAAALcD
AAAJAAAAvQMAAAEAAAADAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAEAAAABQAAAAYAAAACAAAAAFJh
eW1vbmQgTHV4dXJ5IFlhY2gtdABUaGVyZSBpcyAlcyBmaWxlAFRoZXJlIGFyZSAlcyBmaWxlcwBU
aGlzIG1vZHVsZSBwcm92aWRlcyBpbnRlcm5hdGlvbmFsaXphdGlvbiBhbmQgbG9jYWxpemF0aW9u
CnN1cHBvcnQgZm9yIHlvdXIgUHl0aG9uIHByb2dyYW1zIGJ5IHByb3ZpZGluZyBhbiBpbnRlcmZh
Y2UgdG8gdGhlIEdOVQpnZXR0ZXh0IG1lc3NhZ2UgY2F0YWxvZyBsaWJyYXJ5LgBtdWxsdXNrAG51
ZGdlIG51ZGdlAFByb2plY3QtSWQtVmVyc2lvbjogMi4wClBPLVJldmlzaW9uLURhdGU6IDIwMDAt
MDgtMjkgMTI6MTktMDQ6MDAKTGFzdC1UcmFuc2xhdG9yOiBKLiBEYXZpZCBJYsOhw7FleiA8ai1k
YXZpZEBub29zLmZyPgpMYW5ndWFnZS1UZWFtOiBYWCA8cHl0aG9uLWRldkBweXRob24ub3JnPgpN
SU1FLVZlcnNpb246IDEuMApDb250ZW50LVR5cGU6IHRleHQvcGxhaW47IGNoYXJzZXQ9aXNvLTg4
NTktMQpDb250ZW50LVRyYW5zZmVyLUVuY29kaW5nOiBub25lCkdlbmVyYXRlZC1CeTogcHlnZXR0
ZXh0LnB5IDEuMQpQbHVyYWwtRm9ybXM6IG5wbHVyYWxzPTI7IHBsdXJhbD1uIT0xOwoAVGhyb2F0
d29iYmxlciBNYW5ncm92ZQBIYXkgJXMgZmljaGVybwBIYXkgJXMgZmljaGVyb3MAR3V2ZiB6YnFo
eXIgY2ViaXZxcmYgdmFncmVhbmd2YmFueXZtbmd2YmEgbmFxIHlicG55dm1uZ3ZiYQpmaGNjYmVn
IHNiZSBsYmhlIENsZ3ViYSBjZWJ0ZW56ZiBvbCBjZWJpdnF2YXQgbmEgdmFncmVzbnByIGdiIGd1
ciBUQUgKdHJnZ3JrZyB6cmZmbnRyIHBuZ255YnQgeXZvZW5lbC4AYmFjb24Ad2luayB3aW5rAA==
'''


LOCALEDIR = os.path.join('xx', 'LC_MESSAGES')
MOFILE = os.path.join(LOCALEDIR, 'gettext.mo')

def setup():
    os.makedirs(LOCALEDIR)
    fp = open(MOFILE, 'wb')
    fp.write(base64.decodestring(GNU_MO_DATA))
    fp.close()
    os.environ['LANGUAGE'] = 'xx'

def teardown():
    os.environ['LANGUAGE'] = 'en'
    os.unlink(MOFILE)
    os.removedirs(LOCALEDIR)


class GettextTestCase1(TestCase):
    def setUp(self):
        self.localedir = os.curdir
        self.mofile = MOFILE

        gettext.install('gettext', self.localedir)


    def test_some_translations(self):
        # test some translations
        assert _('albatross') == 'albatross'
        assert _(u'mullusk') == 'bacon'
        assert _(r'Raymond Luxury Yach-t') == 'Throatwobbler Mangrove'
        assert _(ur'nudge nudge') == 'wink wink'


    def test_double_quotes(self):
        # double quotes
        assert _("albatross") == 'albatross'
        assert _(u"mullusk") == 'bacon'
        assert _(r"Raymond Luxury Yach-t") == 'Throatwobbler Mangrove'
        assert _(ur"nudge nudge") == 'wink wink'


    def test_triple_single_quotes(self):
        # triple single quotes
        assert _('''albatross''') == 'albatross'
        assert _(u'''mullusk''') == 'bacon'
        assert _(r'''Raymond Luxury Yach-t''') == 'Throatwobbler Mangrove'
        assert _(ur'''nudge nudge''') == 'wink wink'


    def test_triple_double_quotes(self):
        # triple double quotes
        assert _("""albatross""") == 'albatross'
        assert _(u"""mullusk""") == 'bacon'
        assert _(r"""Raymond Luxury Yach-t""") == 'Throatwobbler Mangrove'
        assert _(ur"""nudge nudge""") == 'wink wink'


    def test_multiline_strings(self):
        # multiline strings
        assert _('''This module provides internationalization and localization
support for your Python programs by providing an interface to the GNU
gettext message catalog library.''') == '''Guvf zbqhyr cebivqrf vagreangvbanyvmngvba naq ybpnyvmngvba
fhccbeg sbe lbhe Clguba cebtenzf ol cebivqvat na vagresnpr gb gur TAH
trggrkg zrffntr pngnybt yvoenel.'''


    def test_the_alternative_interface(self):
        # test the alternative interface
        fp = open(os.path.join(self.mofile), 'rb')
        t = gettext.GNUTranslations(fp)
        fp.close()

        t.install()

        assert _('nudge nudge') == 'wink wink'

        # try unicode return type
        t.install(unicode=1)

        assert _('mullusk') == 'bacon'


class GettextTestCase2(TestCase):
    def setUp(self):
        self.localedir = os.curdir

        gettext.bindtextdomain('gettext', self.localedir)
        gettext.textdomain('gettext')

        self._ = gettext.gettext


    def test_bindtextdomain(self):
        assert gettext.bindtextdomain('gettext') == self.localedir


    def test_textdomain(self):
        assert gettext.textdomain() == 'gettext'


    def test_some_translations(self):
        # test some translations
        assert self._('albatross') == 'albatross'
        assert self._(u'mullusk') == 'bacon'
        assert self._(r'Raymond Luxury Yach-t') == 'Throatwobbler Mangrove'
        assert self._(ur'nudge nudge') == 'wink wink'


    def test_double_quotes(self):
        # double quotes
        assert self._("albatross") == 'albatross'
        assert self._(u"mullusk") == 'bacon'
        assert self._(r"Raymond Luxury Yach-t") == 'Throatwobbler Mangrove'
        assert self._(ur"nudge nudge") == 'wink wink'


    def test_triple_single_quotes(self):
        # triple single quotes
        assert self._('''albatross''') == 'albatross'
        assert self._(u'''mullusk''') == 'bacon'
        assert self._(r'''Raymond Luxury Yach-t''') == 'Throatwobbler Mangrove'
        assert self._(ur'''nudge nudge''') == 'wink wink'


    def test_triple_double_quotes(self):
        # triple double quotes
        assert self._("""albatross""") == 'albatross'
        assert self._(u"""mullusk""") == 'bacon'
        assert self._(r"""Raymond Luxury Yach-t""") == 'Throatwobbler Mangrove'
        assert self._(ur"""nudge nudge""") == 'wink wink'


    def test_multiline_strings(self):
        # multiline strings
        assert self._('''This module provides internationalization and localization
support for your Python programs by providing an interface to the GNU
gettext message catalog library.''') == '''Guvf zbqhyr cebivqrf vagreangvbanyvmngvba naq ybpnyvmngvba
fhccbeg sbe lbhe Clguba cebtenzf ol cebivqvat na vagresnpr gb gur TAH
trggrkg zrffntr pngnybt yvoenel.'''




class PluralFormsTestCase(TestCase):
    def setUp(self):
        self.mofile = MOFILE

    def test_plural_forms1(self):
        x = gettext.ngettext('There is %s file', 'There are %s files', 1)
        assert x == 'Hay %s fichero'

        x = gettext.ngettext('There is %s file', 'There are %s files', 2)
        assert x == 'Hay %s ficheros'


    def test_plural_forms2(self):
        fp = open(os.path.join(self.mofile), 'rb')
        t = gettext.GNUTranslations(fp)
        fp.close()

        x = t.ngettext('There is %s file', 'There are %s files', 1)
        assert x == 'Hay %s fichero'

        x = t.ngettext('There is %s file', 'There are %s files', 2)
        assert x == 'Hay %s ficheros'


    def test_hu(self):
        f = gettext.c2py('0')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"


    def test_de(self):
        f = gettext.c2py('n != 1')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "10111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"


    def test_fr(self):
        f = gettext.c2py('n>1')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "00111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111"


    def test_gd(self):
        f = gettext.c2py('n==1 ? 0 : n==2 ? 1 : 2')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222"


    def test_gd2(self):
        # Tests the combination of parentheses and "?:"
        f = gettext.c2py('n==1 ? 0 : (n==2 ? 1 : 2)')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "20122222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222"


    def test_lt(self):
        f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "20111111112222222222201111111120111111112011111111201111111120111111112011111111201111111120111111112011111111222222222220111111112011111111201111111120111111112011111111201111111120111111112011111111"


    def test_ru(self):
        f = gettext.c2py('n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "20111222222222222222201112222220111222222011122222201112222220111222222011122222201112222220111222222011122222222222222220111222222011122222201112222220111222222011122222201112222220111222222011122222"


    def test_pl(self):
        f = gettext.c2py('n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "20111222222222222222221112222222111222222211122222221112222222111222222211122222221112222222111222222211122222222222222222111222222211122222221112222222111222222211122222221112222222111222222211122222"


    def test_sl(self):
        f = gettext.c2py('n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3')
        s = ''.join([ str(f(x)) for x in range(200) ])
        assert s == "30122333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333012233333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333333"



if __name__ == '__main__':
    try:
        setup()
        unittest.main()
    finally:
        teardown()




# For reference, here's the .po file used to created the .mo data above.
#
# The original version was automatically generated from the sources with
# pygettext. Later it was manually modified to add plural forms support.

'''
# Dummy translation for Python's test_gettext.py module.
# Copyright (C) 2001 Python Software Foundation
# Barry Warsaw <barry@python.org>, 2000.
#
msgid ""
msgstr ""
"Project-Id-Version: 2.0\n"
"PO-Revision-Date: 2000-08-29 12:19-04:00\n"
"Last-Translator: J. David Ibanez <j-david@noos.fr>\n"
"Language-Team: XX <python-dev@python.org>\n"
"MIME-Version: 1.0\n"
"Content-Type: text/plain; charset=iso-8859-1\n"
"Content-Transfer-Encoding: 8bit\n"
"Generated-By: pygettext.py 1.1\n"
"Plural-Forms: nplurals=2; plural=n!=1;\n"

#: test_gettext.py:19 test_gettext.py:25 test_gettext.py:31 test_gettext.py:37
#: test_gettext.py:51 test_gettext.py:80 test_gettext.py:86 test_gettext.py:92
#: test_gettext.py:98
msgid "nudge nudge"
msgstr "wink wink"

#: test_gettext.py:16 test_gettext.py:22 test_gettext.py:28 test_gettext.py:34
#: test_gettext.py:77 test_gettext.py:83 test_gettext.py:89 test_gettext.py:95
msgid "albatross"
msgstr ""

#: test_gettext.py:18 test_gettext.py:24 test_gettext.py:30 test_gettext.py:36
#: test_gettext.py:79 test_gettext.py:85 test_gettext.py:91 test_gettext.py:97
msgid "Raymond Luxury Yach-t"
msgstr "Throatwobbler Mangrove"

#: test_gettext.py:17 test_gettext.py:23 test_gettext.py:29 test_gettext.py:35
#: test_gettext.py:56 test_gettext.py:78 test_gettext.py:84 test_gettext.py:90
#: test_gettext.py:96
msgid "mullusk"
msgstr "bacon"

#: test_gettext.py:40 test_gettext.py:101
msgid ""
"This module provides internationalization and localization\n"
"support for your Python programs by providing an interface to the GNU\n"
"gettext message catalog library."
msgstr ""
"Guvf zbqhyr cebivqrf vagreangvbanyvmngvba naq ybpnyvmngvba\n"
"fhccbeg sbe lbhe Clguba cebtenzf ol cebivqvat na vagresnpr gb gur TAH\n"
"trggrkg zrffntr pngnybt yvoenel."

# Manually added, as neither pygettext nor xgettext support plural forms
# in Python.
msgid "There is %s file"
msgid_plural "There are %s files"
msgstr[0] "Hay %s fichero"
msgstr[1] "Hay %s ficheros"
'''

--------------060608070102070508020001--