[Python-checkins] r46030 - in python/trunk: Doc/lib/liblocale.tex Lib/locale.py Lib/test/test_locale.py Misc/NEWS

georg.brandl python-checkins at python.org
Wed May 17 17:51:18 CEST 2006


Author: georg.brandl
Date: Wed May 17 17:51:16 2006
New Revision: 46030

Modified:
   python/trunk/Doc/lib/liblocale.tex
   python/trunk/Lib/locale.py
   python/trunk/Lib/test/test_locale.py
   python/trunk/Misc/NEWS
Log:
Patch #1180296: improve locale string formatting functions



Modified: python/trunk/Doc/lib/liblocale.tex
==============================================================================
--- python/trunk/Doc/lib/liblocale.tex	(original)
+++ python/trunk/Doc/lib/liblocale.tex	Wed May 17 17:51:16 2006
@@ -61,7 +61,7 @@
   Returns the database of the local conventions as a dictionary.
   This dictionary has the following strings as keys:
 
-  \begin{tableiii}{l|l|p{3in}}{constant}{Key}{Category}{Meaning}
+  \begin{tableiii}{l|l|p{3in}}{constant}{Category}{Key}{Meaning}
     \lineiii{LC_NUMERIC}{\code{'decimal_point'}}
             {Decimal point character.}
     \lineiii{}{\code{'grouping'}}
@@ -76,8 +76,20 @@
             {International currency symbol.}
     \lineiii{}{\code{'currency_symbol'}}
             {Local currency symbol.}
+    \lineiii{}{\code{'p_cs_precedes/n_cs_precedes'}}
+            {Whether the currency symbol precedes the value (for positive resp.
+             negative values).}
+    \lineiii{}{\code{'p_sep_by_space/n_sep_by_space'}}
+            {Whether the currency symbol is separated from the value 
+             by a space (for positive resp. negative values).}
     \lineiii{}{\code{'mon_decimal_point'}}
             {Decimal point used for monetary values.}
+    \lineiii{}{\code{'frac_digits'}}
+            {Number of fractional digits used in local formatting
+             of monetary values.}
+    \lineiii{}{\code{'int_frac_digits'}}
+            {Number of fractional digits used in international
+             formatting of monetary values.}
     \lineiii{}{\code{'mon_thousands_sep'}}
             {Group separator used for monetary values.}
     \lineiii{}{\code{'mon_grouping'}}
@@ -87,13 +99,12 @@
             {Symbol used to annotate a positive monetary value.}
     \lineiii{}{\code{'negative_sign'}}
             {Symbol used to annotate a negative monetary value.}
-    \lineiii{}{\code{'frac_digits'}}
-            {Number of fractional digits used in local formatting
-             of monetary values.}
-    \lineiii{}{\code{'int_frac_digits'}}
-            {Number of fractional digits used in international
-             formatting of monetary values.}
+    \lineiii{}{\code{'p_sign_posn/n_sign_posn'}}
+            {The position of the sign (for positive resp. negative values), see below.}
   \end{tableiii}
+  
+  All numeric values can be set to \constant{CHAR_MAX} to indicate that
+  there is no value specified in this locale.
 
   The possible values for \code{'p_sign_posn'} and
   \code{'n_sign_posn'} are given below.
@@ -104,7 +115,7 @@
     \lineii{2}{The sign should follow the value and currency symbol.}
     \lineii{3}{The sign should immediately precede the value.}
     \lineii{4}{The sign should immediately follow the value.}
-    \lineii{\constant{LC_MAX}}{Nothing is specified in this locale.}
+    \lineii{\constant{CHAR_MAX}}{Nothing is specified in this locale.}
   \end{tableii}
 \end{funcdesc}
 
@@ -206,12 +217,44 @@
   strings.
 \end{funcdesc}
 
-\begin{funcdesc}{format}{format, val\optional{, grouping}}
+\begin{funcdesc}{format}{format, val\optional{, grouping\optional{, monetary}}}
   Formats a number \var{val} according to the current
   \constant{LC_NUMERIC} setting.  The format follows the conventions
   of the \code{\%} operator.  For floating point values, the decimal
   point is modified if appropriate.  If \var{grouping} is true, also
   takes the grouping into account.
+
+  If \var{monetary} is true, the conversion uses monetary thousands
+  separator and grouping strings.
+
+  Please note that this function will only work for exactly one \%char
+  specifier. For whole format strings, use \function{format_string()}.
+
+  \versionchanged[Added the \var{monetary} parameter]{2.5}
+\end{funcdesc}
+
+\begin{funcdesc}{format_string}{format, val\optional{, grouping}}
+  Processes formatting specifiers as in \code{format \% val},
+  but takes the current locale settings into account.
+
+  \versionadded{2.5}
+\end{funcdesc}
+
+\begin{funcdesc}{currency}{val\optional{, symbol\optional{, grouping\optional{, international}}}}
+  Formats a number \var{val} according to the current \constant{LC_MONETARY}
+  settings. 
+  
+  The returned string includes the currency symbol if \var{symbol} is true,
+  which is the default.
+  If \var{grouping} is true (which is not the default), grouping is done with
+  the value.
+  If \var{international} is true (which is not the default), the international
+  currency symbol is used.
+
+  Note that this function will not work with the `C' locale, so you have to set
+  a locale via \function{setlocale()} first.
+
+  \versionadded{2.5}
 \end{funcdesc}
 
 \begin{funcdesc}{str}{float}

Modified: python/trunk/Lib/locale.py
==============================================================================
--- python/trunk/Lib/locale.py	(original)
+++ python/trunk/Lib/locale.py	Wed May 17 17:51:16 2006
@@ -88,13 +88,16 @@
 ### Number formatting APIs
 
 # Author: Martin von Loewis
+# improved by Georg Brandl
 
 #perform the grouping from right to left
-def _group(s):
-    conv=localeconv()
-    grouping=conv['grouping']
-    if not grouping:return (s, 0)
-    result=""
+def _group(s, monetary=False):
+    conv = localeconv()
+    thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
+    grouping = conv[monetary and 'mon_grouping' or 'grouping']
+    if not grouping:
+        return (s, 0)
+    result = ""
     seps = 0
     spaces = ""
     if s[-1] == ' ':
@@ -103,63 +106,142 @@
         s = s[:sp]
     while s and grouping:
         # if grouping is -1, we are done
-        if grouping[0]==CHAR_MAX:
+        if grouping[0] == CHAR_MAX:
             break
         # 0: re-use last group ad infinitum
-        elif grouping[0]!=0:
+        elif grouping[0] != 0:
             #process last group
-            group=grouping[0]
-            grouping=grouping[1:]
+            group = grouping[0]
+            grouping = grouping[1:]
         if result:
-            result=s[-group:]+conv['thousands_sep']+result
+            result = s[-group:] + thousands_sep + result
             seps += 1
         else:
-            result=s[-group:]
-        s=s[:-group]
+            result = s[-group:]
+        s = s[:-group]
         if s and s[-1] not in "0123456789":
             # the leading string is only spaces and signs
-            return s+result+spaces,seps
+            return s + result + spaces, seps
     if not result:
-        return s+spaces,seps
+        return s + spaces, seps
     if s:
-        result=s+conv['thousands_sep']+result
+        result = s + thousands_sep + result
         seps += 1
-    return result+spaces,seps
+    return result + spaces, seps
 
-def format(f,val,grouping=0):
-    """Formats a value in the same way that the % formatting would use,
+def format(percent, value, grouping=False, monetary=False, *additional):
+    """Returns the locale-aware substitution of a %? specifier
+    (percent).
+    
+    additional is for format strings which contain one or more
+    '*' modifiers."""
+    # this is only for one-percent-specifier strings and this should be checked
+    if percent[0] != '%':
+        raise ValueError("format() must be given exactly one %char "
+                         "format specifier")
+    if additional:
+        formatted = percent % ((value,) + additional)
+    else:
+        formatted = percent % value
+    # floats and decimal ints need special action!
+    if percent[-1] in 'eEfFgG':
+        seps = 0
+        parts = formatted.split('.')
+        if grouping:
+            parts[0], seps = _group(parts[0], monetary=monetary)
+        decimal_point = localeconv()[monetary and 'mon_decimal_point'
+                                              or 'decimal_point']
+        formatted = decimal_point.join(parts)
+        while seps:
+            sp = formatted.find(' ')
+            if sp == -1: break
+            formatted = formatted[:sp] + formatted[sp+1:]
+            seps -= 1
+    elif percent[-1] in 'diu':
+        if grouping:
+            formatted = _group(formatted, monetary=monetary)[0]
+    return formatted
+
+import re, operator
+_percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
+                         r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
+
+def format_string(f, val, grouping=False):
+    """Formats a string in the same way that the % formatting would use,
     but takes the current locale into account.
     Grouping is applied if the third parameter is true."""
-    result = f % val
-    fields = result.split(".")
-    seps = 0
-    if grouping:
-        fields[0],seps=_group(fields[0])
-    if len(fields)==2:
-        result = fields[0]+localeconv()['decimal_point']+fields[1]
-    elif len(fields)==1:
-        result = fields[0]
+    percents = list(_percent_re.finditer(f))
+    new_f = _percent_re.sub('%s', f)
+
+    if isinstance(val, tuple):
+        new_val = list(val)
+        i = 0
+        for perc in percents:
+            starcount = perc.group('modifiers').count('*')
+            new_val[i] = format(perc.group(), new_val[i], grouping, False, *new_val[i+1:i+1+starcount])
+            del new_val[i+1:i+1+starcount]
+            i += (1 + starcount)
+        val = tuple(new_val)
+    elif operator.isMappingType(val):
+        for perc in percents:
+            key = perc.group("key")
+            val[key] = format(perc.group(), val[key], grouping)
     else:
-        raise Error, "Too many decimal points in result string"
+        # val is a single value
+        val = format(percents[0].group(), val, grouping)
+
+    return new_f % val
 
-    while seps:
-        # If the number was formatted for a specific width, then it
-        # might have been filled with spaces to the left or right. If
-        # so, kill as much spaces as there where separators.
-        # Leading zeroes as fillers are not yet dealt with, as it is
-        # not clear how they should interact with grouping.
-        sp = result.find(" ")
-        if sp==-1:break
-        result = result[:sp]+result[sp+1:]
-        seps -= 1
+def currency(val, symbol=True, grouping=False, international=False):
+    """Formats val according to the currency settings
+    in the current locale."""
+    conv = localeconv()
+
+    # check for illegal values
+    digits = conv[international and 'int_frac_digits' or 'frac_digits']
+    if digits == 127:
+        raise ValueError("Currency formatting is not possible using "
+                         "the 'C' locale.")
+
+    s = format('%%.%if' % digits, abs(val), grouping, monetary=True)
+    # '<' and '>' are markers if the sign must be inserted between symbol and value
+    s = '<' + s + '>'
+
+    if symbol:
+        smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
+        precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
+        separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
+
+        if precedes:
+            s = smb + (separated and ' ' or '') + s
+        else:
+            s = s + (separated and ' ' or '') + smb
+
+    sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
+    sign = conv[val<0 and 'negative_sign' or 'positive_sign']
+
+    if sign_pos == 0:
+        s = '(' + s + ')'
+    elif sign_pos == 1:
+        s = sign + s
+    elif sign_pos == 2:
+        s = s + sign
+    elif sign_pos == 3:
+        s = s.replace('<', sign)
+    elif sign_pos == 4:
+        s = s.replace('>', sign)
+    else:
+        # the default if nothing specified;
+        # this should be the most fitting sign position
+        s = sign + s
 
-    return result
+    return s.replace('<', '').replace('>', '')
 
 def str(val):
     """Convert float to integer, taking the locale into account."""
-    return format("%.12g",val)
+    return format("%.12g", val)
 
-def atof(string,func=float):
+def atof(string, func=float):
     "Parses a string as a float according to the locale settings."
     #First, get rid of the grouping
     ts = localeconv()['thousands_sep']
@@ -179,10 +261,10 @@
 def _test():
     setlocale(LC_ALL, "")
     #do grouping
-    s1=format("%d", 123456789,1)
+    s1 = format("%d", 123456789,1)
     print s1, "is", atoi(s1)
     #standard formatting
-    s1=str(3.14)
+    s1 = str(3.14)
     print s1, "is", atof(s1)
 
 ### Locale name aliasing engine

Modified: python/trunk/Lib/test/test_locale.py
==============================================================================
--- python/trunk/Lib/test/test_locale.py	(original)
+++ python/trunk/Lib/test/test_locale.py	Wed May 17 17:51:16 2006
@@ -20,14 +20,14 @@
 else:
     raise ImportError, "test locale not supported (tried %s)"%(', '.join(tlocs))
 
-def testformat(formatstr, value, grouping = 0, output=None):
+def testformat(formatstr, value, grouping = 0, output=None, func=locale.format):
     if verbose:
         if output:
             print "%s %% %s =? %s ..." %\
                 (repr(formatstr), repr(value), repr(output)),
         else:
             print "%s %% %s works? ..." % (repr(formatstr), repr(value)),
-    result = locale.format(formatstr, value, grouping = grouping)
+    result = func(formatstr, value, grouping = grouping)
     if output and result != output:
         if verbose:
             print 'no'
@@ -49,6 +49,27 @@
     testformat("%-10.f", 4200, grouping=1, output='4%s200     ' % sep)
     # Invoke getpreferredencoding to make sure it does not cause exceptions,
     locale.getpreferredencoding()
+
+    # === Test format() with more complex formatting strings
+    # test if grouping is independent from other characters in formatting string
+    testformat("One million is %i", 1000000, grouping=1, output='One million is 1,000,000',
+               func=locale.format_string)
+    testformat("One  million is %i", 1000000, grouping=1, output='One  million is 1,000,000',
+               func=locale.format_string)
+    # test dots in formatting string
+    testformat(".%f.", 1000.0, output='.1000.000000.', func=locale.format_string)
+    # test floats
+    testformat("--> %10.2f", 1000.0, grouping=1, output='-->   1,000.00',
+               func=locale.format_string)
+    # test asterisk formats
+    testformat("%10.*f", (2, 1000.0), grouping=0, output='   1000.00',
+               func=locale.format_string)
+    testformat("%*.*f", (10, 2, 1000.0), grouping=1, output='  1,000.00',
+               func=locale.format_string)
+    # test more-in-one
+    testformat("int %i float %.2f str %s", (1000, 1000.0, 'str'), grouping=1,
+               output='int 1,000 float 1,000.00 str str', func=locale.format_string)
+
 finally:
     locale.setlocale(locale.LC_NUMERIC, oldlocale)
 

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Wed May 17 17:51:16 2006
@@ -45,6 +45,10 @@
 Library
 -------
 
+- Patch #1180296: Two new functions were added to the locale module:
+  format_string() to get the effect of  "format % items" but locale-aware,
+  and currency() to format a monetary number with currency sign.
+
 - Patch #1486962: Several bugs in the turtle Tk demo module were fixed
   and several features added, such as speed and geometry control.
 


More information about the Python-checkins mailing list