[Python-checkins] python/dist/src/Lib gettext.py,1.19,1.20

Thu, 24 Apr 2003 11:13:42 -0700

Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1:/tmp/cvs-serv20337

Modified Files:
	gettext.py 
Log Message:
GNUTranslations:

    __init__(): Removed since we no longer need the coerce flag.
    Message ids and strings are now always coerced to Unicode, /if/
    the catalog specified a charset parameter.

    gettext(), ngettext(): Since the message strings are Unicodes in
    the catalog, coerce back to encoded 8-bit strings on return.

    ugettext(), ungettext(): Coerce the message ids to Unicode when
    there's no entry for the id in the catalog.

Minor code cleanups; use booleans where appropriate.

Index: gettext.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/gettext.py,v
retrieving revision 1.19
retrieving revision 1.20
diff -C2 -d -r1.19 -r1.20
*** gettext.py	11 Apr 2003 20:26:47 -0000	1.19
--- gettext.py	24 Apr 2003 18:13:39 -0000	1.20
***************
*** 74,81 ****

  def c2py(plural):
!     """
!     Gets a C expression as used in PO files for plural forms and
!     returns a Python lambda function that implements an equivalent
!     expression.
      """
      # Security check, allow only the "n" identifier
--- 74,79 ----

  def c2py(plural):
!     """Gets a C expression as used in PO files for plural forms and returns a
!     Python lambda function that implements an equivalent expression.
      """
      # Security check, allow only the "n" identifier
***************
*** 84,88 ****
      tokens = tokenize.generate_tokens(StringIO(plural).readline)
      try:
!         danger = [ x for x in tokens if x[0] == token.NAME and x[1] != 'n' ]
      except tokenize.TokenError:
          raise ValueError, \
--- 82,86 ----
      tokens = tokenize.generate_tokens(StringIO(plural).readline)
      try:
!         danger = [x for x in tokens if x[0] == token.NAME and x[1] != 'n']
      except tokenize.TokenError:
          raise ValueError, \
***************
*** 219,223 ****
          return self._charset

!     def install(self, unicode=0):
          import __builtin__
          __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
--- 217,221 ----
          return self._charset

!     def install(self, unicode=False):
          import __builtin__
          __builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
***************
*** 229,238 ****
      BE_MAGIC = 0xde120495L

-     def __init__(self, fp=None, coerce=False):
-         # Set this attribute before calling the base class constructor, since
-         # the latter calls _parse() which depends on self._coerce.
-         self._coerce = coerce
-         NullTranslations.__init__(self, fp)
- 
      def _parse(self, fp):
          """Override this method to support alternative .mo formats."""
--- 227,230 ----
***************
*** 282,294 ****
                      elif k == 'plural-forms':
                          v = v.split(';')
- ##                        nplurals = v[0].split('nplurals=')[1]
- ##                        nplurals = int(nplurals.strip())
                          plural = v[1].split('plural=')[1]
                          self.plural = c2py(plural)
              if msg.find('\x00') >= 0:
                  # Plural forms
                  msgid1, msgid2 = msg.split('\x00')
                  tmsg = tmsg.split('\x00')
!                 if self._coerce:
                      msgid1 = unicode(msgid1, self._charset)
                      tmsg = [unicode(x, self._charset) for x in tmsg]
--- 274,293 ----
                      elif k == 'plural-forms':
                          v = v.split(';')
                          plural = v[1].split('plural=')[1]
                          self.plural = c2py(plural)
+             # Note: we unconditionally convert both msgids and msgstrs to
+             # Unicode using the character encoding specified in the charset
+             # parameter of the Content-Type header.  The gettext documentation
+             # strongly encourages msgids to be us-ascii, but some appliations
+             # require alternative encodings (e.g. Zope's ZCML and ZPT).  For
+             # traditional gettext applications, the msgid conversion will
+             # cause no problems since us-ascii should always be a subset of
+             # the charset encoding.  We may want to fall back to 8-bit msgids
+             # if the Unicode conversion fails.
              if msg.find('\x00') >= 0:
                  # Plural forms
                  msgid1, msgid2 = msg.split('\x00')
                  tmsg = tmsg.split('\x00')
!                 if self._charset:
                      msgid1 = unicode(msgid1, self._charset)
                      tmsg = [unicode(x, self._charset) for x in tmsg]
***************
*** 296,300 ****
                      catalog[(msgid1, i)] = tmsg[i]
              else:
!                 if self._coerce:
                      msg = unicode(msg, self._charset)
                      tmsg = unicode(tmsg, self._charset)
--- 295,299 ----
                      catalog[(msgid1, i)] = tmsg[i]
              else:
!                 if self._charset:
                      msg = unicode(msg, self._charset)
                      tmsg = unicode(tmsg, self._charset)
***************
*** 305,318 ****

      def gettext(self, message):
!         try:
!             return self._catalog[message]
!         except KeyError:
              if self._fallback:
                  return self._fallback.gettext(message)
              return message

      def ngettext(self, msgid1, msgid2, n):
          try:
!             return self._catalog[(msgid1, self.plural(n))]
          except KeyError:
              if self._fallback:
--- 304,324 ----

      def gettext(self, message):
!         missing = object()
!         tmsg = self._catalog.get(message, missing)
!         if tmsg is missing:
              if self._fallback:
                  return self._fallback.gettext(message)
              return message
+         # Encode the Unicode tmsg back to an 8-bit string, if possible
+         if self._charset:
+             return tmsg.encode(self._charset)
+         return tmsg

      def ngettext(self, msgid1, msgid2, n):
          try:
!             tmsg = self._catalog[(msgid1, self.plural(n))]
!             if self._charset:
!                 return tmsg.encode(self._charset)
!             return tmsg
          except KeyError:
              if self._fallback:
***************
*** 329,336 ****
              if self._fallback:
                  return self._fallback.ugettext(message)
!             tmsg = message
!         if not self._coerce:
!             return unicode(tmsg, self._charset)
!         # The msgstr is already coerced to Unicode
          return tmsg

--- 335,339 ----
              if self._fallback:
                  return self._fallback.ugettext(message)
!             return unicode(message)
          return tmsg

***************
*** 342,351 ****
                  return self._fallback.ungettext(msgid1, msgid2, n)
              if n == 1:
!                 tmsg = msgid1
              else:
!                 tmsg = msgid2
!         if not self._coerce:
!             return unicode(tmsg, self._charset)
!         # The msgstr is already coerced to Unicode
          return tmsg

--- 345,351 ----
                  return self._fallback.ungettext(msgid1, msgid2, n)
              if n == 1:
!                 tmsg = unicode(msgid1)
              else:
!                 tmsg = unicode(msgid2)
          return tmsg

***************
*** 393,401 ****

  def translation(domain, localedir=None, languages=None,
!                 class_=None, fallback=0):
      if class_ is None:
          class_ = GNUTranslations
      mofiles = find(domain, localedir, languages, all=1)
!     if len(mofiles)==0:
          if fallback:
              return NullTranslations()
--- 393,401 ----

  def translation(domain, localedir=None, languages=None,
!                 class_=None, fallback=False):
      if class_ is None:
          class_ = GNUTranslations
      mofiles = find(domain, localedir, languages, all=1)
!     if not mofiles:
          if fallback:
              return NullTranslations()
***************
*** 420,425 ****

! def install(domain, localedir=None, unicode=0):
!     translation(domain, localedir, fallback=1).install(unicode)

--- 420,425 ----

! def install(domain, localedir=None, unicode=False):
!     translation(domain, localedir, fallback=True).install(unicode)