[Python-checkins] CVS: python/dist/src/Lib gettext.py,1.3,1.4
Barry Warsaw
python-dev@python.org
Tue, 29 Aug 2000 20:30:01 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv22945
Modified Files:
gettext.py
Log Message:
Finalize this module for Python 2.0 based on feedback and input from
Martin von Loewis, Peter Funk, James Henstridge, Francois Pinard, and
Marc-Andre Lemburg.
Index: gettext.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/gettext.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** gettext.py 2000/08/25 20:26:43 1.3
--- gettext.py 2000/08/30 03:29:58 1.4
***************
*** 7,88 ****
I18N refers to the operation by which a program is made aware of multiple
languages. L10N refers to the adaptation of your program, once
! internationalized, to the local language and cultural habits. In order to
! provide multilingual messages for your Python programs, you need to take the
! following steps:
!
! - prepare your program by specially marking translatable strings
! - run a suite of tools over your marked program files to generate raw
! messages catalogs
! - create language specific translations of the message catalogs
! - use this module so that message strings are properly translated
!
! In order to prepare your program for I18N, you need to look at all the strings
! in your program. Any string that needs to be translated should be marked by
! wrapping it in _('...') -- i.e. a call to the function `_'. For example:
!
! filename = 'mylog.txt'
! message = _('writing a log message')
! fp = open(filename, 'w')
! fp.write(message)
! fp.close()
!
! In this example, the string `writing a log message' is marked as a candidate
! for translation, while the strings `mylog.txt' and `w' are not.
!
! The GNU gettext package provides a tool, called xgettext, that scans C and C++
! source code looking for these specially marked strings. xgettext generates
! what are called `.pot' files, essentially structured human readable files
! which contain every marked string in the source code. These .pot files are
! copied and handed over to translators who write language-specific versions for
! every supported language.
!
! For I18N Python programs however, xgettext won't work; it doesn't understand
! the myriad of string types support by Python. The standard Python
! distribution provides a tool called pygettext that does though (found in the
! Tools/i18n directory). This is a command line script that supports a similar
! interface as xgettext; see its documentation for details. Once you've used
! pygettext to create your .pot files, you can use the standard GNU gettext
! tools to generate your machine-readable .mo files, which are what's used by
! this module.
!
! In the simple case, to use this module then, you need only add the following
! bit of code to the main driver file of your application:
!
! import gettext
! gettext.install()
!
! This sets everything up so that your _('...') function calls Just Work. In
! other words, it installs `_' in the builtins namespace for convenience. You
! can skip this step and do it manually by the equivalent code:
!
! import gettext
! import __builtin__
! __builtin__['_'] = gettext.gettext
!
! Once you've done this, you probably want to call bindtextdomain() and
! textdomain() to get the domain set up properly. Again, for convenience, you
! can pass the domain and localedir to install to set everything up in one fell
! swoop:
!
! import gettext
! gettext.install('mydomain', '/my/locale/dir')
!
! If your program needs to support many languages at the same time, you will
! want to create Translation objects explicitly, like so:
!
! import gettext
! gettext.install()
!
! lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
! lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
! lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))
!
! gettext.set(lang1)
! # all _() will now translate to language 1
! gettext.set(lang2)
! # all _() will now translate to language 2
- Currently, only GNU gettext format binary .mo files are supported.
-
"""
--- 7,12 ----
I18N refers to the operation by which a program is made aware of multiple
languages. L10N refers to the adaptation of your program, once
! internationalized, to the local language and cultural habits.
"""
***************
*** 105,123 ****
# Barry Warsaw integrated these modules, wrote the .install() API and code,
# and conformed all C and Python code to Python's coding standards.
import os
import sys
import struct
! from UserDict import UserDict
!
!
!
! # globals
! _translations = {}
! _current_translation = None
! _current_domain = 'messages'
! # Domain to directory mapping, for use by bindtextdomain()
! _localedirs = {}
--- 29,53 ----
# Barry Warsaw integrated these modules, wrote the .install() API and code,
# and conformed all C and Python code to Python's coding standards.
+ #
+ # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
+ # module.
+ #
+ # TODO:
+ # - Lazy loading of .mo files. Currently the entire catalog is loaded into
+ # memory, but that's probably bad for large translated programs. Instead,
+ # the lexical sort of original strings in GNU .mo files should be exploited
+ # to do binary searches and lazy initializations. Or you might want to use
+ # the undocumented double-hash algorithm for .mo files with hash tables, but
+ # you'll need to study the GNU gettext code to do this.
+ #
+ # - Support Solaris .mo file formats. Unfortunately, we've been unable to
+ # find this format documented anywhere.
import os
import sys
import struct
! from errno import ENOENT
! _default_localedir = os.path.join(sys.prefix, 'share', 'locale')
***************
*** 166,180 ****
! class GNUTranslations(UserDict):
! # Magic number of .mo files
! MAGIC = 0x950412de
! def __init__(self, fp):
! if fp is None:
! d = {}
! else:
! d = self._parse(fp)
! UserDict.__init__(self, d)
def _parse(self, fp):
"""Override this method to support alternative .mo formats."""
--- 96,131 ----
! class NullTranslations:
! def __init__(self, fp=None):
! self._info = {}
! self._charset = None
! if fp:
! self._parse(fp)
! def _parse(self, fp):
! pass
!
! def gettext(self, message):
! return message
!
! def ugettext(self, message):
! return unicode(message)
!
! def info(self):
! return self._info
!
! def charset(self):
! return self._charset
!
! def install(self, unicode=0):
! import __builtin__
! __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
+
+ class GNUTranslations(NullTranslations):
+ # Magic number of .mo files
+ LE_MAGIC = 0x950412de
+ BE_MAGIC = struct.unpack('>i', struct.pack('<i', LE_MAGIC))[0]
+
def _parse(self, fp):
"""Override this method to support alternative .mo formats."""
***************
*** 183,191 ****
# Parse the .mo file header, which consists of 5 little endian 32
# bit words.
! catalog = {}
buf = fp.read()
! magic, version, msgcount, masteridx, transidx = unpack(
! '<5i', buf[:20])
! if magic <> self.MAGIC:
raise IOError(0, 'Bad magic number', filename)
#
--- 134,148 ----
# Parse the .mo file header, which consists of 5 little endian 32
# bit words.
! self._catalog = catalog = {}
buf = fp.read()
! # Are we big endian or little endian?
! magic = unpack('<i', buf[:4])[0]
! if magic == self.LE_MAGIC:
! version, msgcount, masteridx, transidx = unpack('<4i', buf[4:20])
! ii = '<ii'
! elif magic == self.BE_MAGIC:
! version, msgcount, masteridx, transidx = unpack('>4i', buf[4:20])
! ii = '>ii'
! else:
raise IOError(0, 'Bad magic number', filename)
#
***************
*** 193,231 ****
# dictionary.
for i in xrange(0, msgcount):
! mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
! mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
! tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
! tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
if mend < len(buf) and tend < len(buf):
! catalog[buf[mstart:mend]] = buf[tstart:tend]
else:
raise IOError(0, 'File is corrupt', filename)
! #
# advance to next entry in the seek tables
masteridx += 8
transidx += 8
- return catalog
!
! # By default, use GNU gettext format .mo files
! Translations = GNUTranslations
# Locate a .mo file using the gettext strategy
! def _find(localedir=None, languages=None, domain=None):
! global _current_domain
! global _localedirs
# Get some reasonable defaults for arguments that were not supplied
- if domain is None:
- domain = _current_domain
if localedir is None:
! localedir = _localedirs.get(
! domain,
! # TBD: The default localedir is actually system dependent. I
! # don't know of a good platform-consistent and portable way to
! # default it, so instead, we'll just use sys.prefix. Most
! # programs should be calling bindtextdomain() or such explicitly
! # anyway.
! os.path.join(sys.prefix, 'share', 'locale'))
if languages is None:
languages = []
--- 150,193 ----
# dictionary.
for i in xrange(0, msgcount):
! mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
! mend = moff + mlen
! tlen, toff = unpack(ii, buf[transidx:transidx+8])
! tend = toff + tlen
if mend < len(buf) and tend < len(buf):
! tmsg = buf[toff:tend]
! catalog[buf[moff:mend]] = tmsg
else:
raise IOError(0, 'File is corrupt', filename)
! # See if we're looking at GNU .mo conventions for metadata
! if mlen == 0 and tmsg.lower().startswith('project-id-version:'):
! # Catalog description
! for item in tmsg.split('\n'):
! item = item.strip()
! if not item:
! continue
! k, v = item.split(':', 1)
! k = k.strip().lower()
! v = v.strip()
! self._info[k] = v
! if k == 'content-type':
! self._charset = v.split('charset=')[1]
# advance to next entry in the seek tables
masteridx += 8
transidx += 8
+ def gettext(self, message):
+ return self._catalog.get(message, message)
! def ugettext(self, message):
! tmsg = self._catalog.get(message, message)
! return unicode(tmsg, self._charset)
!
+
# Locate a .mo file using the gettext strategy
! def find(domain, localedir=None, languages=None):
# Get some reasonable defaults for arguments that were not supplied
if localedir is None:
! localedir = _default_localedir
if languages is None:
languages = []
***************
*** 248,318 ****
break
mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
! # see if it's in the cache
! mo = _translations.get(mofile)
! if mo:
! return mo
! fp = None
! try:
! try:
! fp = open(mofile, 'rb')
! t = Translations(fp)
! _translations[mofile] = t
! return t
! except IOError:
! pass
! finally:
! if fp:
! fp.close()
! return {}
! def bindtextdomain(domain=None, localedir=None):
! """Bind domain to a file in the specified directory."""
! global _localedirs
! if domain is None:
! return None
! if localedir is None:
! return _localedirs.get(domain, _localedirs.get('C'))
! _localedirs[domain] = localedir
! return localedir
def textdomain(domain=None):
- """Change or query the current global domain."""
global _current_domain
! if domain is None:
! return _current_domain
! else:
_current_domain = domain
! return domain
! def gettext(message):
! """Return localized version of a message."""
! return _find().get(message, message)
def dgettext(domain, message):
! """Like gettext(), but look up message in specified domain."""
! return _find(domain=domain).get(message, message)
!
!
! # A higher level API
! def set(translation):
! global _current_translation
! _current_translation = translation
! def get():
! global _current_translation
! return _current_translation
! def install(domain=None, localedir=None):
! import __builtin__
! __builtin__.__dict__['_'] = gettext
! if domain is not None:
! bindtextdomain(domain, localedir)
! textdomain(domain)
--- 210,285 ----
break
mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
! if os.path.exists(mofile):
! return mofile
! return None
! # a mapping between absolute .mo file path and Translation object
! _translations = {}
!
! def translation(domain, localedir=None, languages=None, class_=None):
! if class_ is None:
! class_ = GNUTranslations
! mofile = find(domain, localedir, languages)
! if mofile is None:
! raise IOError(ENOENT, 'No translation file found for domain', domain)
! key = os.path.abspath(mofile)
! # TBD: do we need to worry about the file pointer getting collected?
! t = _translations.setdefault(key, class_(open(mofile, 'rb')))
! return t
!
!
!
! def install(domain, localedir=None, unicode=0):
! translation(domain, localedir).install(unicode)
!
!
!
! # a mapping b/w domains and locale directories
! _localedirs = {}
! # current global domain, `messages' used for compatibility w/ GNU gettext
! _current_domain = 'messages'
def textdomain(domain=None):
global _current_domain
! if domain is not None:
_current_domain = domain
! return _current_domain
! def bindtextdomain(domain, localedir=None):
! global _localedirs
! if localedir is not None:
! _localedirs[domain] = localedir
! return _localedirs.get(domain, _default_localedir)
def dgettext(domain, message):
! try:
! t = translation(domain, _localedirs.get(domain, None))
! except IOError:
! return message
! return t.gettext(message)
!
! def gettext(message):
! return dgettext(_current_domain, message)
! # dcgettext() has been deemed unnecessary and is not implemented.
+ # James Henstridge's Catalog constructor from GNOME gettext. Documented usage
+ # was:
+ #
+ # import gettext
+ # cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
+ # _ = cat.gettext
+ # print _('Hello World')
+
+ # The resulting catalog object currently don't support access through a
+ # dictionary API, which was supported (but apparently unused) in GNOME
+ # gettext.
! Catalog = translation