[Python-checkins] CVS: python/dist/src/Lib gettext.py,1.3,1.4

Tue, 29 Aug 2000 20:30:01 -0700

Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv22945

Modified Files:
	gettext.py 
Log Message:
Finalize this module for Python 2.0 based on feedback and input from
Martin von Loewis, Peter Funk, James Henstridge, Francois Pinard, and
Marc-Andre Lemburg.

Index: gettext.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/gettext.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** gettext.py	2000/08/25 20:26:43	1.3
--- gettext.py	2000/08/30 03:29:58	1.4
***************
*** 7,88 ****
  I18N refers to the operation by which a program is made aware of multiple
  languages.  L10N refers to the adaptation of your program, once
! internationalized, to the local language and cultural habits.  In order to
! provide multilingual messages for your Python programs, you need to take the
! following steps:
! 
!     - prepare your program by specially marking translatable strings
!     - run a suite of tools over your marked program files to generate raw
!       messages catalogs
!     - create language specific translations of the message catalogs
!     - use this module so that message strings are properly translated
! 
! In order to prepare your program for I18N, you need to look at all the strings
! in your program.  Any string that needs to be translated should be marked by
! wrapping it in _('...') -- i.e. a call to the function `_'.  For example:
! 
!     filename = 'mylog.txt'
!     message = _('writing a log message')
!     fp = open(filename, 'w')
!     fp.write(message)
!     fp.close()
! 
! In this example, the string `writing a log message' is marked as a candidate
! for translation, while the strings `mylog.txt' and `w' are not.
! 
! The GNU gettext package provides a tool, called xgettext, that scans C and C++
! source code looking for these specially marked strings.  xgettext generates
! what are called `.pot' files, essentially structured human readable files
! which contain every marked string in the source code.  These .pot files are
! copied and handed over to translators who write language-specific versions for
! every supported language.
! 
! For I18N Python programs however, xgettext won't work; it doesn't understand
! the myriad of string types support by Python.  The standard Python
! distribution provides a tool called pygettext that does though (found in the
! Tools/i18n directory).  This is a command line script that supports a similar
! interface as xgettext; see its documentation for details.  Once you've used
! pygettext to create your .pot files, you can use the standard GNU gettext
! tools to generate your machine-readable .mo files, which are what's used by
! this module.
! 
! In the simple case, to use this module then, you need only add the following
! bit of code to the main driver file of your application:
! 
!     import gettext
!     gettext.install()
! 
! This sets everything up so that your _('...') function calls Just Work.  In
! other words, it installs `_' in the builtins namespace for convenience.  You
! can skip this step and do it manually by the equivalent code:
! 
!     import gettext
!     import __builtin__
!     __builtin__['_'] = gettext.gettext
! 
! Once you've done this, you probably want to call bindtextdomain() and
! textdomain() to get the domain set up properly.  Again, for convenience, you
! can pass the domain and localedir to install to set everything up in one fell
! swoop:
! 
!     import gettext
!     gettext.install('mydomain', '/my/locale/dir')
! 
! If your program needs to support many languages at the same time, you will
! want to create Translation objects explicitly, like so:
! 
!     import gettext
!     gettext.install()
! 
!     lang1 = gettext.Translations(open('/path/to/my/lang1/messages.mo'))
!     lang2 = gettext.Translations(open('/path/to/my/lang2/messages.mo'))
!     lang3 = gettext.Translations(open('/path/to/my/lang3/messages.mo'))
! 
!     gettext.set(lang1)
!     # all _() will now translate to language 1
!     gettext.set(lang2)
!     # all _() will now translate to language 2

- Currently, only GNU gettext format binary .mo files are supported.
- 
  """

--- 7,12 ----
  I18N refers to the operation by which a program is made aware of multiple
  languages.  L10N refers to the adaptation of your program, once
! internationalized, to the local language and cultural habits.

  """

***************
*** 105,123 ****
  # Barry Warsaw integrated these modules, wrote the .install() API and code,
  # and conformed all C and Python code to Python's coding standards.

  import os
  import sys
  import struct
! from UserDict import UserDict
! 
! 
! 
! # globals
! _translations = {}
! _current_translation = None
! _current_domain = 'messages'

! # Domain to directory mapping, for use by bindtextdomain()
! _localedirs = {}

--- 29,53 ----
  # Barry Warsaw integrated these modules, wrote the .install() API and code,
  # and conformed all C and Python code to Python's coding standards.
+ #
+ # Francois Pinard and Marc-Andre Lemburg also contributed valuably to this
+ # module.
+ #
+ # TODO:
+ # - Lazy loading of .mo files.  Currently the entire catalog is loaded into
+ #   memory, but that's probably bad for large translated programs.  Instead,
+ #   the lexical sort of original strings in GNU .mo files should be exploited
+ #   to do binary searches and lazy initializations.  Or you might want to use
+ #   the undocumented double-hash algorithm for .mo files with hash tables, but
+ #   you'll need to study the GNU gettext code to do this.
+ #
+ # - Support Solaris .mo file formats.  Unfortunately, we've been unable to
+ #   find this format documented anywhere.

  import os
  import sys
  import struct
! from errno import ENOENT

! _default_localedir = os.path.join(sys.prefix, 'share', 'locale')

***************
*** 166,180 ****

! class GNUTranslations(UserDict):
!     # Magic number of .mo files
!     MAGIC = 0x950412de

!     def __init__(self, fp):
!         if fp is None:
!             d = {}
!         else:
!             d = self._parse(fp)
!         UserDict.__init__(self, d)

      def _parse(self, fp):
          """Override this method to support alternative .mo formats."""
--- 96,131 ----

! class NullTranslations:
!     def __init__(self, fp=None):
!         self._info = {}
!         self._charset = None
!         if fp:
!             self._parse(fp)

!     def _parse(self, fp):
!         pass
! 
!     def gettext(self, message):
!         return message
! 
!     def ugettext(self, message):
!         return unicode(message)
! 
!     def info(self):
!         return self._info
! 
!     def charset(self):
!         return self._charset
! 
!     def install(self, unicode=0):
!         import __builtin__
!         __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext

+ 
+ class GNUTranslations(NullTranslations):
+     # Magic number of .mo files
+     LE_MAGIC = 0x950412de
+     BE_MAGIC = struct.unpack('>i', struct.pack('<i', LE_MAGIC))[0]
+ 
      def _parse(self, fp):
          """Override this method to support alternative .mo formats."""
***************
*** 183,191 ****
          # Parse the .mo file header, which consists of 5 little endian 32
          # bit words.
!         catalog = {}
          buf = fp.read()
!         magic, version, msgcount, masteridx, transidx = unpack(
!             '<5i', buf[:20])
!         if magic <> self.MAGIC:
              raise IOError(0, 'Bad magic number', filename)
          #
--- 134,148 ----
          # Parse the .mo file header, which consists of 5 little endian 32
          # bit words.
!         self._catalog = catalog = {}
          buf = fp.read()
!         # Are we big endian or little endian?
!         magic = unpack('<i', buf[:4])[0]
!         if magic == self.LE_MAGIC:
!             version, msgcount, masteridx, transidx = unpack('<4i', buf[4:20])
!             ii = '<ii'
!         elif magic == self.BE_MAGIC:
!             version, msgcount, masteridx, transidx = unpack('>4i', buf[4:20])
!             ii = '>ii'
!         else:
              raise IOError(0, 'Bad magic number', filename)
          #
***************
*** 193,231 ****
          # dictionary.
          for i in xrange(0, msgcount):
!             mstart = unpack('<i', buf[masteridx+4:masteridx+8])[0]
!             mend = mstart + unpack('<i', buf[masteridx:masteridx+4])[0]
!             tstart = unpack('<i', buf[transidx+4:transidx+8])[0]
!             tend = tstart + unpack('<i', buf[transidx:transidx+4])[0]
              if mend < len(buf) and tend < len(buf):
!                 catalog[buf[mstart:mend]] = buf[tstart:tend]
              else:
                  raise IOError(0, 'File is corrupt', filename)
!             #
              # advance to next entry in the seek tables
              masteridx += 8
              transidx += 8
-         return catalog

! 
! # By default, use GNU gettext format .mo files
! Translations = GNUTranslations

  # Locate a .mo file using the gettext strategy
! def _find(localedir=None, languages=None, domain=None):
!     global _current_domain
!     global _localedirs
      # Get some reasonable defaults for arguments that were not supplied
-     if domain is None:
-         domain = _current_domain
      if localedir is None:
!         localedir = _localedirs.get(
!             domain,
!             # TBD: The default localedir is actually system dependent.  I
!             # don't know of a good platform-consistent and portable way to
!             # default it, so instead, we'll just use sys.prefix.  Most
!             # programs should be calling bindtextdomain() or such explicitly
!             # anyway.
!             os.path.join(sys.prefix, 'share', 'locale'))
      if languages is None:
          languages = []
--- 150,193 ----
          # dictionary.
          for i in xrange(0, msgcount):
!             mlen, moff = unpack(ii, buf[masteridx:masteridx+8])
!             mend = moff + mlen
!             tlen, toff = unpack(ii, buf[transidx:transidx+8])
!             tend = toff + tlen
              if mend < len(buf) and tend < len(buf):
!                 tmsg = buf[toff:tend]
!                 catalog[buf[moff:mend]] = tmsg
              else:
                  raise IOError(0, 'File is corrupt', filename)
!             # See if we're looking at GNU .mo conventions for metadata
!             if mlen == 0 and tmsg.lower().startswith('project-id-version:'):
!                 # Catalog description
!                 for item in tmsg.split('\n'):
!                     item = item.strip()
!                     if not item:
!                         continue
!                     k, v = item.split(':', 1)
!                     k = k.strip().lower()
!                     v = v.strip()
!                     self._info[k] = v
!                     if k == 'content-type':
!                         self._charset = v.split('charset=')[1]
              # advance to next entry in the seek tables
              masteridx += 8
              transidx += 8

+     def gettext(self, message):
+         return self._catalog.get(message, message)

!     def ugettext(self, message):
!         tmsg = self._catalog.get(message, message)
!         return unicode(tmsg, self._charset)
! 

+ 
  # Locate a .mo file using the gettext strategy
! def find(domain, localedir=None, languages=None):
      # Get some reasonable defaults for arguments that were not supplied
      if localedir is None:
!         localedir = _default_localedir
      if languages is None:
          languages = []
***************
*** 248,318 ****
              break
          mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
!         # see if it's in the cache
!         mo = _translations.get(mofile)
!         if mo:
!             return mo
!         fp = None
!         try:
!             try:
!                 fp = open(mofile, 'rb')
!                 t = Translations(fp)
!                 _translations[mofile] = t
!                 return t
!             except IOError:
!                 pass
!         finally:
!             if fp:
!                 fp.close()
!     return {}

! def bindtextdomain(domain=None, localedir=None):
!     """Bind domain to a file in the specified directory."""
!     global _localedirs
!     if domain is None:
!         return None
!     if localedir is None:
!         return _localedirs.get(domain, _localedirs.get('C'))
!     _localedirs[domain] = localedir
!     return localedir

  def textdomain(domain=None):
-     """Change or query the current global domain."""
      global _current_domain
!     if domain is None:
!         return _current_domain
!     else:
          _current_domain = domain
!         return domain

! def gettext(message):
!     """Return localized version of a message."""
!     return _find().get(message, message)

  def dgettext(domain, message):
!     """Like gettext(), but look up message in specified domain."""
!     return _find(domain=domain).get(message, message)
! 

! 
! # A higher level API
! def set(translation):
!     global _current_translation
!     _current_translation = translation

! def get():
!     global _current_translation
!     return _current_translation

! def install(domain=None, localedir=None):
!     import __builtin__
!     __builtin__.__dict__['_'] = gettext
!     if domain is not None:
!         bindtextdomain(domain, localedir)
!         textdomain(domain)
--- 210,285 ----
              break
          mofile = os.path.join(localedir, lang, 'LC_MESSAGES', '%s.mo' % domain)
!         if os.path.exists(mofile):
!             return mofile
!     return None

! # a mapping between absolute .mo file path and Translation object
! _translations = {}
! 
! def translation(domain, localedir=None, languages=None, class_=None):
!     if class_ is None:
!         class_ = GNUTranslations
!     mofile = find(domain, localedir, languages)
!     if mofile is None:
!         raise IOError(ENOENT, 'No translation file found for domain', domain)
!     key = os.path.abspath(mofile)
!     # TBD: do we need to worry about the file pointer getting collected?
!     t = _translations.setdefault(key, class_(open(mofile, 'rb')))
!     return t
! 
! 
! 
! def install(domain, localedir=None, unicode=0):
!     translation(domain, localedir).install(unicode)
! 
! 
! 
! # a mapping b/w domains and locale directories
! _localedirs = {}
! # current global domain, `messages' used for compatibility w/ GNU gettext
! _current_domain = 'messages'

  def textdomain(domain=None):
      global _current_domain
!     if domain is not None:
          _current_domain = domain
!     return _current_domain

! def bindtextdomain(domain, localedir=None):
!     global _localedirs
!     if localedir is not None:
!         _localedirs[domain] = localedir
!     return _localedirs.get(domain, _default_localedir)

  def dgettext(domain, message):
!     try:
!         t = translation(domain, _localedirs.get(domain, None))
!     except IOError:
!         return message
!     return t.gettext(message)
!     

! def gettext(message):
!     return dgettext(_current_domain, message)

! # dcgettext() has been deemed unnecessary and is not implemented.

+ # James Henstridge's Catalog constructor from GNOME gettext.  Documented usage
+ # was:
+ #
+ #    import gettext
+ #    cat = gettext.Catalog(PACKAGE, localedir=LOCALEDIR)
+ #    _ = cat.gettext
+ #    print _('Hello World')
+ 
+ # The resulting catalog object currently don't support access through a
+ # dictionary API, which was supported (but apparently unused) in GNOME
+ # gettext.

! Catalog = translation