[Python-checkins] r71460 - in python/branches/py3k-short-float-repr: Include/bytesobject.h Include/unicodeobject.h Lib/test/test_types.py Objects/bytesobject.c Objects/stringlib/formatter.h Objects/stringlib/localeutil.h Python/pystrtod.c

eric.smith python-checkins at python.org
Sat Apr 11 07:44:20 CEST 2009


Author: eric.smith
Date: Sat Apr 11 07:44:19 2009
New Revision: 71460

Log:
Added locale-aware formatting back in, and added ',' formatting for ints and floats.
Some bugs remain, but all existing tests pass. Once we're done with the merge to py3k (and the bug tracker is back up) I'll add test cases and fix bugs.

Modified:
   python/branches/py3k-short-float-repr/Include/bytesobject.h
   python/branches/py3k-short-float-repr/Include/unicodeobject.h
   python/branches/py3k-short-float-repr/Lib/test/test_types.py
   python/branches/py3k-short-float-repr/Objects/bytesobject.c
   python/branches/py3k-short-float-repr/Objects/stringlib/formatter.h
   python/branches/py3k-short-float-repr/Objects/stringlib/localeutil.h
   python/branches/py3k-short-float-repr/Python/pystrtod.c

Modified: python/branches/py3k-short-float-repr/Include/bytesobject.h
==============================================================================
--- python/branches/py3k-short-float-repr/Include/bytesobject.h	(original)
+++ python/branches/py3k-short-float-repr/Include/bytesobject.h	Sat Apr 11 07:44:19 2009
@@ -91,24 +91,22 @@
    into the string pointed to by buffer.  For the argument descriptions,
    see Objects/stringlib/localeutil.h */
 
-PyAPI_FUNC(int) _PyBytes_InsertThousandsGroupingLocale(char *buffer,
-						  Py_ssize_t n_buffer,
-						  Py_ssize_t n_digits,
-						  Py_ssize_t buf_size,
-						  Py_ssize_t *count,
-						  int append_zero_char);
+PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGroupingLocale(char *buffer,
+                                                   Py_ssize_t n_buffer,
+                                                   char *digits,
+                                                   Py_ssize_t n_digits,
+                                                   Py_ssize_t min_width);
 
 /* Using explicit passed-in values, insert the thousands grouping
    into the string pointed to by buffer.  For the argument descriptions,
    see Objects/stringlib/localeutil.h */
-PyAPI_FUNC(int) _PyBytes_InsertThousandsGrouping(char *buffer,
-						 Py_ssize_t n_buffer,
-						 Py_ssize_t n_digits,
-						 Py_ssize_t buf_size,
-						 Py_ssize_t *count,
-                                                 int append_zero_char,
-                                                 const char *grouping,
-                                                 const char *thousands_sep);
+PyAPI_FUNC(Py_ssize_t) _PyBytes_InsertThousandsGrouping(char *buffer,
+                                                   Py_ssize_t n_buffer,
+                                                   char *digits,
+                                                   Py_ssize_t n_digits,
+                                                   Py_ssize_t min_width,
+                                                   const char *grouping,
+                                                   const char *thousands_sep);
 
 /* Flags used by string formatting */
 #define F_LJUST (1<<0)

Modified: python/branches/py3k-short-float-repr/Include/unicodeobject.h
==============================================================================
--- python/branches/py3k-short-float-repr/Include/unicodeobject.h	(original)
+++ python/branches/py3k-short-float-repr/Include/unicodeobject.h	Sat Apr 11 07:44:19 2009
@@ -1482,24 +1482,22 @@
    into the string pointed to by buffer.  For the argument descriptions,
    see Objects/stringlib/localeutil.h */
 
-PyAPI_FUNC(int) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
-						  Py_ssize_t n_buffer,
-						  Py_ssize_t n_digits,
-						  Py_ssize_t buf_size,
-						  Py_ssize_t *count,
-						  int append_zero_char);
+PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
+                                                   Py_ssize_t n_buffer,
+                                                   Py_UNICODE *digits,
+                                                   Py_ssize_t n_digits,
+                                                   Py_ssize_t min_width);
 
 /* Using explicit passed-in values, insert the thousands grouping
    into the string pointed to by buffer.  For the argument descriptions,
    see Objects/stringlib/localeutil.h */
-PyAPI_FUNC(int) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
-						 Py_ssize_t n_buffer,
-						 Py_ssize_t n_digits,
-						 Py_ssize_t buf_size,
-						 Py_ssize_t *count,
-                                                 int append_zero_char,
-                                                 const char *grouping,
-                                                 const char *thousands_sep);
+PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
+                                                   Py_ssize_t n_buffer,
+                                                   Py_UNICODE *digits,
+                                                   Py_ssize_t n_digits,
+                                                   Py_ssize_t min_width,
+                                                   const char *grouping,
+                                                   const char *thousands_sep);
 /* === Characters Type APIs =============================================== */
 
 /* Helper array used by Py_UNICODE_ISSPACE(). */

Modified: python/branches/py3k-short-float-repr/Lib/test/test_types.py
==============================================================================
--- python/branches/py3k-short-float-repr/Lib/test/test_types.py	(original)
+++ python/branches/py3k-short-float-repr/Lib/test/test_types.py	Sat Apr 11 07:44:19 2009
@@ -361,6 +361,8 @@
         self.assertRaises(TypeError, 3 .__format__, 0)
         # can't have ',' with 'n'
         self.assertRaises(ValueError, 3 .__format__, ",n")
+        # can't have ',' with 'c'
+        self.assertRaises(ValueError, 3 .__format__, ",c")
 
         # ensure that only int and float type specifiers work
         for format_spec in ([chr(x) for x in range(ord('a'), ord('z')+1)] +
@@ -467,16 +469,14 @@
                 self.assertEqual(value.__format__(format_spec),
                                  float(value).__format__(format_spec))
 
-    # XXX: needs to be put back in when 'n' formatting is complete
-    #  after the py3k-short-float-repr merge
-#    @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
-#    def test_float__format__locale(self):
-#        # test locale support for __format__ code 'n'
-#
-#        for i in range(-10, 10):
-#            x = 1234567890.0 * (10.0 ** i)
-#            self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
-#            self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
+    @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
+    def test_float__format__locale(self):
+        # test locale support for __format__ code 'n'
+
+        for i in range(-10, 10):
+            x = 1234567890.0 * (10.0 ** i)
+            self.assertEqual(locale.format('%g', x, grouping=True), format(x, 'n'))
+            self.assertEqual(locale.format('%.10g', x, grouping=True), format(x, '.10n'))
 
     @run_with_locale('LC_NUMERIC', 'en_US.UTF8')
     def test_int__format__locale(self):
@@ -552,9 +552,7 @@
         # a totaly empty format specifier means something else.
         # So, just use a sign flag
         test(1e200, '+g', '+1e+200')
-
-        # XXX this is a change from 3.0. Needs to be vetted.
-#        test(1e200, '+', '+1.0e+200')
+        test(1e200, '+', '+1e+200')
 
         test(1.1e200, '+g', '+1.1e+200')
         test(1.1e200, '+', '+1.1e+200')

Modified: python/branches/py3k-short-float-repr/Objects/bytesobject.c
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/bytesobject.c	(original)
+++ python/branches/py3k-short-float-repr/Objects/bytesobject.c	Sat Apr 11 07:44:19 2009
@@ -562,6 +562,7 @@
 /* -------------------------------------------------------------------- */
 /* Methods */
 
+#include "stringlib/stringdefs.h"
 #define STRINGLIB_CHAR char
 
 #define STRINGLIB_CMP memcmp

Modified: python/branches/py3k-short-float-repr/Objects/stringlib/formatter.h
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/stringlib/formatter.h	(original)
+++ python/branches/py3k-short-float-repr/Objects/stringlib/formatter.h	Sat Apr 11 07:44:19 2009
@@ -230,6 +230,7 @@
         ++ptr;
     }
 
+    /* XXX other types like xobXOB also invalid */
     if (format->type == 'n' && format->thousands_separators) {
         PyErr_Format(PyExc_ValueError, "Cannot specify ',' with 'n'.");
         return 0;
@@ -243,6 +244,25 @@
 /*********** common routines for numeric formatting *********************/
 /************************************************************************/
 
+/* Locale type codes. */
+#define LT_USE_LOCALE 0
+#define LT_DEFAULT_LOCALE 1
+#define LT_NO_LOCALE 2
+
+/* Locale info needed for formatting integers and the part of floats
+   before and including the decimal. Note that locales only support
+   8-bit chars, not unicode. */
+typedef struct {
+    int type;       /* One of the LT_* codes. Having this here is just
+                       an optimization for the common case of not
+                       using any locale info (LT_NO_LOCALE). It could
+                       really be inferred just by looking at the
+                       following fields.*/
+    char *decimal_point;
+    char *thousands_sep;
+    char *grouping;
+} LocaleInfo;
+
 /* describes the layout for an integer, see the comment in
    calc_number_widths() for details */
 typedef struct {
@@ -251,31 +271,79 @@
     Py_ssize_t n_spadding;
     Py_ssize_t n_rpadding;
     char sign;
-    Py_ssize_t n_sign;
+    Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
+    Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
+                                    any grouping chars. */
+    Py_ssize_t n_decimal;   /* 0 if only an integer */
+    Py_ssize_t n_remainder; /* digits in decimal and/or exponent part,
+                               excluding the decimal itself, if present */
     Py_ssize_t n_total; /* just a convenience, it's derivable from the
                            other fields */
+
+    Py_ssize_t n_digits; /* The number of digits before a decimal or
+                            exponent. */
+    Py_ssize_t n_min_width; /* The min_width we used when we computed
+                               the n_grouped_digits width. */
 } NumberFieldWidths;
 
+/* Given a number of the form:
+   digits[remainder]
+   where ptr points to the start and end points to the end, find where
+    the integer part ends. This could be a decimal, an exponent, both,
+    or neither.
+   If a decimal point is present, set *has_decimal and increment
+    remainder beyond it.
+   Results are undefined (but shouldn't crash) for improperly
+    formatted strings.
+*/
+static void
+parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
+             Py_ssize_t *n_remainder, int *has_decimal)
+{
+    STRINGLIB_CHAR *end = ptr + len;
+    STRINGLIB_CHAR *remainder;
+
+    while (ptr<end && isdigit(*ptr))
+        ++ptr;
+    remainder = ptr;
+
+    /* Does remainder start with a decimal point? */
+    *has_decimal = ptr<end && *remainder == '.';
+
+    /* Skip the decimal point. */
+    if (*has_decimal)
+        remainder++;
+
+    *n_remainder = end - remainder;
+}
+
 /* not all fields of format are used.  for example, precision is
    unused.  should this take discrete params in order to be more clear
    about what it does?  or is passing a single format parameter easier
    and more efficient enough to justify a little obfuscation? */
 static void
-calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign,
-                   Py_ssize_t n_prefix, Py_ssize_t n_digits, int has_decimal,
-                   Py_ssize_t n_rest, const InternalFormatSpec *format)
+calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
+                   STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
+                   Py_ssize_t n_number, Py_ssize_t n_remainder,
+                   int has_decimal, const LocaleInfo *locale,
+                   const InternalFormatSpec *format)
 {
+    Py_ssize_t n_non_digit_non_padding;
+
+    spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
     spec->n_lpadding = 0;
-    spec->n_prefix = 0;
+    spec->n_prefix = n_prefix;
+    spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
+    spec->n_remainder = n_remainder;
     spec->n_spadding = 0;
     spec->n_rpadding = 0;
     spec->sign = '\0';
     spec->n_sign = 0;
 
     /* the output will look like:
-       |                                                                            |
-       | <lpadding> <sign> <prefix> <spadding> <digits> <decimal> <rest> <rpadding> |
-       |                                                                            |
+       |                                                                                         |
+       | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
+       |                                                                                         |
 
        sign is computed from format->sign and the actual
        sign of the number
@@ -295,29 +363,49 @@
     if (format->sign == '+') {
         /* always put a + or - */
         spec->n_sign = 1;
-        spec->sign = (actual_sign == '-' ? '-' : '+');
+        spec->sign = (sign_char == '-' ? '-' : '+');
     }
     else if (format->sign == ' ') {
         spec->n_sign = 1;
-        spec->sign = (actual_sign == '-' ? '-' : ' ');
+        spec->sign = (sign_char == '-' ? '-' : ' ');
     }
     else {
         /* non specified, or the default (-) */
-        if (actual_sign == '-') {
+        if (sign_char == '-') {
             spec->n_sign = 1;
             spec->sign = '-';
         }
     }
 
-    spec->n_prefix = n_prefix;
+    /* The number of chars used for non-digit and non-padding. */
+    n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
+        spec->n_remainder;
+
+    /* min_width can go negative, that's okay. format->width == -1 means
+       we don't care. */
+    if (format->fill_char == '0')
+        spec->n_min_width = format->width - n_non_digit_non_padding;
+    else
+        spec->n_min_width = 0;
+
+    if (spec->n_digits == 0)
+        /* This case only occurs when using 'c' formatting, we need
+           to special case it because the grouping code always wants
+           to have at least one character. */
+        spec->n_grouped_digits = 0;
+    else
+        spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
+                                                    spec->n_digits,
+                                                    spec->n_min_width,
+                                                    locale->grouping,
+                                                    locale->thousands_sep);
 
-    /* now the number of padding characters */
     if (format->width == -1) {
         /* no padding at all, nothing to do */
     }
     else {
         /* see if any padding is needed */
-        if (spec->n_sign + n_digits +
+        if (spec->n_sign + spec->n_grouped_digits +
                 spec->n_prefix >= format->width) {
             /* no padding needed, we're already bigger than the
                requested width */
@@ -326,8 +414,7 @@
             /* determine which of left, space, or right padding is
                needed */
             Py_ssize_t padding = format->width -
-                                    (spec->n_sign + spec->n_prefix +
-                                     n_digits);
+                (spec->n_sign + spec->n_grouped_digits + n_prefix);
             if (format->align == '<')
                 spec->n_rpadding = padding;
             else if (format->align == '>')
@@ -343,117 +430,123 @@
         }
     }
     spec->n_total = spec->n_lpadding + spec->n_sign + spec->n_prefix +
-            spec->n_spadding + n_digits + spec->n_rpadding;
+        spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
+        spec->n_remainder + spec->n_rpadding;
 }
 
 /* Fill in the digit parts of a numbers's string representation,
-   as determined in calc_number_widths(). */
+   as determined in calc_number_widths().
+   No error checking, since we know the buffer is the correct size. */
 static void
-fill_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
-            STRINGLIB_CHAR *p_digits, Py_ssize_t n_digits)
-{
-    memmove(p_buf +
-               (spec->n_lpadding + spec->n_sign + spec->n_spadding),
-            p_digits,
-            n_digits * sizeof(STRINGLIB_CHAR));
-}
-
-/* fill in the non-digit parts of a numbers's string representation,
-   as determined in calc_number_widths().  returns the pointer to
-   where the digits go. */
-static STRINGLIB_CHAR *
-fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
-                STRINGLIB_CHAR *prefix, Py_ssize_t n_digits,
-                STRINGLIB_CHAR fill_char)
+fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
+            STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
+            STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
+            LocaleInfo *locale, int toupper)
 {
-    STRINGLIB_CHAR *p_digits;
+    /* Used to keep track of digits, decimal, and remainder. */
+    STRINGLIB_CHAR *p = digits;
+
+#ifndef NDEBUG
+    Py_ssize_t r;
+#endif
 
     if (spec->n_lpadding) {
-        STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
-        p_buf += spec->n_lpadding;
+        STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
+        buf += spec->n_lpadding;
     }
     if (spec->n_sign == 1) {
-        *p_buf++ = spec->sign;
+        *buf++ = spec->sign;
     }
     if (spec->n_prefix) {
-        memmove(p_buf,
+        memmove(buf,
                 prefix,
                 spec->n_prefix * sizeof(STRINGLIB_CHAR));
-        p_buf += spec->n_prefix;
+        if (toupper) {
+            Py_ssize_t t;
+            for (t = 0; t < spec->n_prefix; ++t)
+                buf[t] = STRINGLIB_TOUPPER(buf[t]);
+        }
+        buf += spec->n_prefix;
     }
     if (spec->n_spadding) {
-        STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
-        p_buf += spec->n_spadding;
+        STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
+        buf += spec->n_spadding;
     }
-    p_digits = p_buf;
-    p_buf += n_digits;
+
+    /* Only for type 'c' special case, it has no digits. */
+    if (spec->n_digits != 0) {
+        /* Fill the digits with InsertThousandsGrouping. */
+#ifndef NDEBUG
+        r =
+#endif
+            STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
+                               spec->n_digits, spec->n_min_width,
+                               locale->grouping, locale->thousands_sep);
+#ifndef NDEBUG
+        assert(r == spec->n_grouped_digits);
+#endif
+        p += spec->n_digits;
+    }
+    if (toupper) {
+        Py_ssize_t t;
+        for (t = 0; t < spec->n_grouped_digits; ++t)
+            buf[t] = STRINGLIB_TOUPPER(buf[t]);
+    }
+    buf += spec->n_grouped_digits;
+
+    if (spec->n_decimal) {
+        Py_ssize_t t;
+        for (t = 0; t < spec->n_decimal; ++t)
+            buf[t] = locale->decimal_point[t];
+        buf += spec->n_decimal;
+        p += 1;
+    }
+
+    if (spec->n_remainder) {
+        memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
+        buf += spec->n_remainder;
+        p += spec->n_remainder;
+    }
+
     if (spec->n_rpadding) {
-        STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
-        p_buf += spec->n_rpadding;
+        STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
+        buf += spec->n_rpadding;
     }
-    return p_digits;
 }
 
+static char no_grouping[1] = {CHAR_MAX};
+
 /* Find the decimal point character(s?), thousands_separator(s?), and
-   grouping description, either for the current locale if use_locale
-   is GFI_USE_LOCALE, a hard-coded locale if GFI_DEFAULT, or none if
-   GFI_NONE */
-#define GFI_USE_LOCALE 0
-#define GFI_DEFAULT_LOCALE 1
-#define GFI_NO_LOCALE 2
+   grouping description, either for the current locale if type is
+   LT_USE_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or none if
+   LT_NO_LOCALE. */
 static void
-get_formatting_info(int locale, char **decimal_point, char **thousands_sep,
-                    char **grouping)
+get_locale_info(int type, LocaleInfo *locale_info)
 {
-    if (locale == GFI_USE_LOCALE) {
+    locale_info->type = type;
+    switch (type) {
+    case LT_USE_LOCALE: {
         struct lconv *locale_data = localeconv();
-        *decimal_point = locale_data->decimal_point;
-        *thousands_sep = locale_data->thousands_sep;
-        *grouping = locale_data->grouping;
-    } else if (locale == GFI_DEFAULT_LOCALE) {
-        *decimal_point = ".";
-        *thousands_sep = ",";
-        *grouping = "\3"; /* every 3 characters, trailing 0 means repeat */
-    } else {
-        *decimal_point = ".";
-        *thousands_sep = "";
-        *grouping = "";
+        locale_info->decimal_point = locale_data->decimal_point;
+        locale_info->thousands_sep = locale_data->thousands_sep;
+        locale_info->grouping = locale_data->grouping;
+        break;
+    }
+    case LT_DEFAULT_LOCALE:
+        locale_info->decimal_point = ".";
+        locale_info->thousands_sep = ",";
+        locale_info->grouping = "\3"; /* every 3 characters, trailing 0 means repeat */
+        break;
+    case LT_NO_LOCALE:
+        locale_info->decimal_point = ".";
+        locale_info->thousands_sep = "";
+        locale_info->grouping = no_grouping;
+        break;
+    default:
+        assert(0);
     }
 }
 
-/* Given a number of the form:
-   [+-]digits[rest]
-   where ptr points to the start and end points to the end, parse
-    the number into its integer part and then everything else (which
-    could be a decimal, an exponent, both, or neither.
-   This is compatible with the format returned from
-    PyOS_double_to_string().
-   Results are undefined (but shouldn't crash) for improperly
-    formatted strings.
-   Consider moving this to pystrtod.c
-*/
-static void
-find_number_parts(STRINGLIB_CHAR *ptr, Py_ssize_t len,
-                  STRINGLIB_CHAR **sign,
-                  STRINGLIB_CHAR **integer_start,
-                  STRINGLIB_CHAR **rest_start,
-                  int *has_decimal)
-{
-    STRINGLIB_CHAR *end = ptr + len;
-
-    if (ptr<end && (*ptr == '+' || *ptr == '-'))
-        *sign = ptr++;
-    else
-        *sign = NULL;
-
-    *integer_start = ptr;
-    while (ptr<end && isdigit(*ptr))
-        ++ptr;
-    *rest_start = ptr;
-    *has_decimal = ptr<end && **rest_start == '.';
-}
-
-
 #endif /* FORMAT_FLOAT || FORMAT_LONG */
 
 /************************************************************************/
@@ -571,19 +664,21 @@
     PyObject *tmp = NULL;
     STRINGLIB_CHAR *pnumeric_chars;
     STRINGLIB_CHAR numeric_char;
-    STRINGLIB_CHAR sign = '\0';
+    STRINGLIB_CHAR sign_char = '\0';
     STRINGLIB_CHAR *p;
     Py_ssize_t n_digits;       /* count of digits need from the computed
                                   string */
-    Py_ssize_t n_leading_chars;
-    Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
-                                        allocate, used for 'n'
-                                        formatting. */
+    Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
+                                   produces non-digits */
     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
     STRINGLIB_CHAR *prefix = NULL;
     NumberFieldWidths spec;
     long x;
 
+    /* Locale settings, either from the actual locale or
+       from a hard-code pseudo-locale */
+    LocaleInfo locale;
+
     /* no precision allowed on integers */
     if (format->precision != -1) {
         PyErr_SetString(PyExc_ValueError,
@@ -602,6 +697,14 @@
             goto done;
         }
 
+        /* Error to specify a comma. */
+        if (format->thousands_separators) {
+            PyErr_SetString(PyExc_ValueError,
+                            "Thousands separators not allowed with integer"
+                            " format specifier 'c'");
+            goto done;
+        }
+
         /* taken from unicodeobject.c formatchar() */
         /* Integer input truncated to a character */
 /* XXX: won't work for int */
@@ -626,6 +729,13 @@
         numeric_char = (STRINGLIB_CHAR)x;
         pnumeric_chars = &numeric_char;
         n_digits = 1;
+
+        /* As a sort-of hack, we tell calc_number_widths that we only
+           have "remainder" characters. calc_number_widths thinks
+           these are characters that don't get formatted, only copied
+           into the output string. We do this for 'c' formatting,
+           because the characters are likely to be non-digits. */
+        n_remainder = 1;
     }
     else {
         int base;
@@ -677,8 +787,8 @@
 
         /* Is a sign character present in the output?  If so, remember it
            and skip it */
-        sign = pnumeric_chars[0];
-        if (sign == '-') {
+        if (pnumeric_chars[0] == '-') {
+            sign_char = pnumeric_chars[0];
             ++prefix;
             ++leading_chars_to_skip;
         }
@@ -688,20 +798,15 @@
         pnumeric_chars += leading_chars_to_skip;
     }
 
-    if (format->type == 'n')
-            /* Compute how many additional chars we need to allocate
-               to hold the thousands grouping. */
-            STRINGLIB_GROUPING_LOCALE(NULL, n_digits, n_digits,
-                               0, &n_grouping_chars, 0);
-    if (format->thousands_separators)
-            /* Compute how many additional chars we need to allocate
-               to hold the thousands grouping. */
-            STRINGLIB_GROUPING(NULL, n_digits, n_digits,
-                               0, &n_grouping_chars, 0, "\3", ",");
+    /* Determine the grouping, separator, and decimal point, if any. */
+    get_locale_info(format->type == 'n' ? LT_USE_LOCALE :
+                    (format->thousands_separators ? LT_DEFAULT_LOCALE :
+                     LT_NO_LOCALE),
+                    &locale);
 
     /* Calculate the widths of the various leading and trailing parts */
-    calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars,
-                       0, 0, format);
+    calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
+                       n_digits, n_remainder, 0, &locale, format);
 
     /* Allocate a new string to hold the result */
     result = STRINGLIB_NEW(NULL, spec.n_total);
@@ -709,65 +814,9 @@
         goto done;
     p = STRINGLIB_STR(result);
 
-    /* XXX There is too much magic here regarding the internals of
-       spec and the location of the prefix and digits.  It would be
-       better if calc_number_widths returned a number of logical
-       offsets into the buffer, and those were used.  Maybe in a
-       future code cleanup. */
-
-    /* Fill in the digit parts */
-    n_leading_chars = spec.n_lpadding + spec.n_sign +
-            spec.n_prefix + spec.n_spadding;
-    memmove(p + n_leading_chars,
-            pnumeric_chars,
-            n_digits * sizeof(STRINGLIB_CHAR));
-
-    /* If type is 'X', convert the filled in digits to uppercase */
-    if (format->type == 'X') {
-        Py_ssize_t t;
-        for (t = 0; t < n_digits; ++t)
-            p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
-    }
-
-    /* Insert the grouping, if any, after the uppercasing of the digits, so
-       we can ensure that grouping chars won't be affected. */
-    if (n_grouping_chars) {
-            /* We know this can't fail, since we've already
-               reserved enough space. */
-            STRINGLIB_CHAR *pstart = p + n_leading_chars;
-#ifndef NDEBUG
-            int r;
-#endif
-            if (format->type == 'n')
-#ifndef NDEBUG
-                r =
-#endif
-                    STRINGLIB_GROUPING_LOCALE(pstart, n_digits, n_digits,
-                           spec.n_total+n_grouping_chars-n_leading_chars,
-                           NULL, 0);
-            else
-#ifndef NDEBUG
-                r =
-#endif
-                    STRINGLIB_GROUPING(pstart, n_digits, n_digits,
-                           spec.n_total+n_grouping_chars-n_leading_chars,
-                           NULL, 0, "\3", ",");
-            assert(r);
-    }
-
-    /* Fill in the non-digit parts (padding, sign, etc.) */
-    fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars,
-                    format->fill_char == '\0' ? ' ' : format->fill_char);
-
-    /* If type is 'X', uppercase the prefix.  This has to be done after the
-       prefix is filled in by fill_non_digits */
-    if (format->type == 'X') {
-        Py_ssize_t t;
-        for (t = 0; t < n_prefix; ++t)
-            p[t + spec.n_lpadding + spec.n_sign] =
-                    STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_sign]);
-    }
-
+    fill_number(p, &spec, pnumeric_chars, n_digits, prefix,
+                format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
+                format->type == 'X');
 
 done:
     Py_XDECREF(tmp);
@@ -797,33 +846,26 @@
 {
     char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
     Py_ssize_t n_digits;
+    Py_ssize_t n_remainder;
+    int has_decimal;
     double val;
     Py_ssize_t precision = format->precision;
     STRINGLIB_CHAR type = format->type;
-    STRINGLIB_CHAR sign_char = '\0';
     int add_pct = 0;
-    int use_locale;
     STRINGLIB_CHAR *p;
     NumberFieldWidths spec;
     int flags = 0;
     PyObject *result = NULL;
-
-    /* the various parts of the raw formatted number */
-    STRINGLIB_CHAR *sign;
-    STRINGLIB_CHAR *integer_start;
-    STRINGLIB_CHAR *rest_start;
-    int has_decimal = 0;
-
-    /* locale settings, either from the actual locale or
-       from a hard-code pseudo-locale */
-    char *l_decimal_point;
-    char *l_thousands_sep;
-    char *l_grouping;
+    STRINGLIB_CHAR sign_char = '\0';
 
 #if STRINGLIB_IS_UNICODE
     Py_UNICODE *unicode_tmp = NULL;
 #endif
 
+    /* Locale settings, either from the actual locale or
+       from a hard-code pseudo-locale */
+    LocaleInfo locale;
+
     /* alternate is not allowed on floats. */
     if (format->alternate) {
         PyErr_SetString(PyExc_ValueError,
@@ -838,20 +880,14 @@
         type = 'g';
         flags |= Py_DTSF_ADD_DOT_0;
     }
-    if (type == 'n') {
+    if (type == 'n')
         type = 'g';
-        use_locale = GFI_USE_LOCALE;
-    } else if (format->thousands_separators)
-        use_locale = GFI_DEFAULT_LOCALE;
-    else
-        use_locale = GFI_NO_LOCALE;
 
     /* 'F' is the same as 'f', per the PEP */
     if (type == 'F')
         type = 'f';
 
     val = PyFloat_AsDouble(value);
-
     if (val == -1.0 && PyErr_Occurred())
         goto done;
 
@@ -900,37 +936,37 @@
     p = buf;
 #endif
 
-    find_number_parts(p, n_digits,
-                      &sign,
-                      &integer_start,
-                      &rest_start,
-                      &has_decimal);
-
-    /* is a sign character present in the output?  if so, remember it
+    /* Is a sign character present in the output?  If so, remember it
        and skip it */
-    if (sign) {
-        sign_char = *sign;
+    if (*p == '-') {
+        sign_char = *p;
         ++p;
         --n_digits;
     }
 
     /* Determine the grouping, separator, and decimal point, if any. */
-    get_formatting_info(use_locale, &l_decimal_point, &l_thousands_sep, &l_grouping);
+    get_locale_info(type == 'n' ? LT_USE_LOCALE :
+                    (format->thousands_separators ? LT_DEFAULT_LOCALE :
+                     LT_NO_LOCALE),
+                    &locale);
+
+    /* Determine if we have any "remainder" (after the digits, might include
+       decimal or exponent or both (or neither)) */
+    parse_number(p, n_digits, &n_remainder, &has_decimal);
 
     /* Calculate how much space we'll need. */
-    calc_number_widths(&spec, sign_char, 0, n_digits, has_decimal, 0, format);
+    calc_number_widths(&spec, 0, sign_char, p, n_digits, n_remainder,
+                       has_decimal, &locale, format);
 
     /* Allocate that space. */
     result = STRINGLIB_NEW(NULL, spec.n_total);
     if (result == NULL)
         goto done;
 
-    /* Fill in the non-digit parts (padding, sign, etc.) */
-    fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits,
-                    format->fill_char == '\0' ? ' ' : format->fill_char);
-
-    /* Fill in the digit parts. */
-    fill_digits(STRINGLIB_STR(result), &spec, p, n_digits);
+    /* Populate the space. */
+    fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
+                format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
+                0);
 
 done:
     PyMem_Free(buf);

Modified: python/branches/py3k-short-float-repr/Objects/stringlib/localeutil.h
==============================================================================
--- python/branches/py3k-short-float-repr/Objects/stringlib/localeutil.h	(original)
+++ python/branches/py3k-short-float-repr/Objects/stringlib/localeutil.h	Sat Apr 11 07:44:19 2009
@@ -5,161 +5,208 @@
 
 #include <locale.h>
 
+#define MAX(x, y) ((x) < (y) ? (y) : (x))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+
+typedef struct {
+    const char *grouping;
+    char previous;
+    Py_ssize_t i; /* Where we're currently pointing in grouping. */
+} GroupGenerator;
+
+static void
+_GroupGenerator_init(GroupGenerator *self, const char *grouping)
+{
+    self->grouping = grouping;
+    self->i = 0;
+    self->previous = 0;
+}
+
+/* Returns the next grouping, or 0 to signify end. */
+static Py_ssize_t
+_GroupGenerator_next(GroupGenerator *self)
+{
+    /* Note that we don't really do much error checking here. If a
+       grouping string contains just CHAR_MAX, for example, then just
+       terminate the generator. That shouldn't happen, but at least we
+       fail gracefully. */
+    switch (self->grouping[self->i]) {
+    case 0:
+        return self->previous;
+    case CHAR_MAX:
+        /* Stop the generator. */
+        return 0;
+    default: {
+        char ch = self->grouping[self->i];
+        self->previous = ch;
+        self->i++;
+        return (Py_ssize_t)ch;
+    }
+    }
+}
+
+/* Fill in some digits, leading zeros, and thousands separator. All
+   are optional, depending on when we're called. */
+static void
+fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
+     Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
+     Py_ssize_t thousands_sep_len)
+{
+#if STRINGLIB_IS_UNICODE
+    Py_ssize_t i;
+#endif
+
+    if (thousands_sep) {
+        *buffer_end -= thousands_sep_len;
+
+        /* Copy the thousands_sep chars into the buffer. */
+#if STRINGLIB_IS_UNICODE
+        /* Convert from the char's of the thousands_sep from
+           the locale into unicode. */
+        for (i = 0; i < thousands_sep_len; ++i)
+            (*buffer_end)[i] = thousands_sep[i];
+#else
+        /* No conversion, just memcpy the thousands_sep. */
+        memcpy(*buffer_end, thousands_sep, thousands_sep_len);
+#endif
+    }
+
+    *buffer_end -= n_chars;
+    *digits_end -= n_chars;
+    memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
+
+    *buffer_end -= n_zeros;
+    STRINGLIB_FILL(*buffer_end, '0', n_zeros);
+}
+
 /**
  * _Py_InsertThousandsGrouping:
  * @buffer: A pointer to the start of a string.
- * @n_buffer: The length of the string.
+ * @n_buffer: Number of characters in @buffer.
+ * @digits: A pointer to the digits we're reading from. If count
+ *          is non-NULL, this is unused.
  * @n_digits: The number of digits in the string, in which we want
  *            to put the grouping chars.
- * @buf_size: The maximum size of the buffer pointed to by buffer.
- * @count: If non-NULL, points to a variable that will receive the
- *         number of characters we need to insert (and no formatting
- *         will actually occur).
- * @append_zero_char: If non-zero, put a trailing zero at the end of
- *         of the resulting string, if and only if we modified the
- *         string.
+ * @min_width: The minimum width of the digits in the output string.
+ *             Output will be zero-padded on the left to fill.
  * @grouping: see definition in localeconv().
  * @thousands_sep: see definition in localeconv().
  *
+ * There are 2 modes: counting and filling. If @buffer is NULL,
+ *  we are in counting mode, else filling mode.
+ * If counting, the required buffer size is returned.
+ * If filling, we know the buffer will be large enough, so we don't
+ *  need to pass in the buffer size.
  * Inserts thousand grouping characters (as defined by grouping and
  *  thousands_sep) into the string between buffer and buffer+n_digits.
- *  If count is non-NULL, don't do any formatting, just count the
- *  number of characters to insert.  This is used by the caller to
- *  appropriately resize the buffer, if needed.  If count is non-NULL,
- *  buffer can be NULL (it is not dereferenced at all in that case).
  *
  * Return value: 0 on error, else 1.  Note that no error can occur if
  *  count is non-NULL.
  *
  * This name won't be used, the includer of this file should define
  *  it to be the actual function name, based on unicode or string.
+ *
+ * As closely as possible, this code mimics the logic in decimal.py's
+    _insert_thousands_sep().
  **/
-int
+Py_ssize_t
 _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
                             Py_ssize_t n_buffer,
+                            STRINGLIB_CHAR *digits,
                             Py_ssize_t n_digits,
-                            Py_ssize_t buf_size,
-                            Py_ssize_t *count,
-                            int append_zero_char,
+                            Py_ssize_t min_width,
                             const char *grouping,
                             const char *thousands_sep)
 {
-        Py_ssize_t thousands_sep_len = strlen(thousands_sep);
-        STRINGLIB_CHAR *pend = NULL; /* current end of buffer */
-        STRINGLIB_CHAR *pmax = NULL; /* max of buffer */
-        char current_grouping;
-        Py_ssize_t remaining = n_digits; /* Number of chars remaining to
-                                            be looked at */
-
-        /* Initialize the character count, if we're just counting. */
-        if (count)
-                *count = 0;
-        else {
-                /* We're not just counting, we're modifying buffer */
-                pend = buffer + n_buffer;
-                pmax = buffer + buf_size;
+    Py_ssize_t count = 0;
+    Py_ssize_t n_zeros;
+    int loop_broken = 0;
+    int use_separator = 0; /* First time through, don't append the
+                              separator. They only go between
+                              groups. */
+    STRINGLIB_CHAR *buffer_end = NULL;
+    STRINGLIB_CHAR *digits_end;
+    Py_ssize_t l;
+    Py_ssize_t n_chars;
+    Py_ssize_t thousands_sep_len = strlen(thousands_sep);
+    Py_ssize_t remaining = n_digits; /* Number of chars remaining to
+                                        be looked at */
+    /* A generator that returns all of the grouping widths, until it
+       returns 0. */
+    GroupGenerator groupgen;
+    _GroupGenerator_init(&groupgen, grouping);
+
+    if (buffer) {
+        buffer_end = buffer + n_buffer;
+        digits_end = digits + n_digits;
+    }
+
+    while ((l = _GroupGenerator_next(&groupgen)) > 0) {
+        l = MIN(l, MAX(MAX(remaining, min_width), 1));
+        n_zeros = MAX(0, l - remaining);
+        n_chars = MAX(0, MIN(remaining, l));
+
+        /* Use n_zero zero's and n_chars chars */
+
+        /* Count only, don't do anything. */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+
+        if (buffer) {
+            /* Copy into the output buffer. */
+            fill(&digits_end, &buffer_end, n_chars, n_zeros,
+                 use_separator ? thousands_sep : NULL, thousands_sep_len);
         }
 
-        /* Starting at the end and working right-to-left, keep track of
-           what grouping needs to be added and insert that. */
-        current_grouping = *grouping++;
-
-        /* If the first character is 0, perform no grouping at all. */
-        if (current_grouping == 0)
-                return 1;
-
-        while (remaining > current_grouping) {
-                /* Always leave buffer and pend valid at the end of this
-                   loop, since we might leave with a return statement. */
-
-                remaining -= current_grouping;
-                if (count) {
-                        /* We're only counting, not touching the memory. */
-                        *count += thousands_sep_len;
-                }
-                else {
-                        /* Do the formatting. */
-
-                        STRINGLIB_CHAR *plast = buffer + remaining;
-
-                        /* Is there room to insert thousands_sep_len chars? */
-                        if (pmax - pend < thousands_sep_len)
-                                /* No room. */
-                                return 0;
-
-                        /* Move the rest of the string down. */
-                        memmove(plast + thousands_sep_len,
-                                plast,
-                                (pend - plast) * sizeof(STRINGLIB_CHAR));
-                        /* Copy the thousands_sep chars into the buffer. */
-#if STRINGLIB_IS_UNICODE
-                        /* Convert from the char's of the thousands_sep from
-                           the locale into unicode. */
-                        {
-                                Py_ssize_t i;
-                                for (i = 0; i < thousands_sep_len; ++i)
-                                        plast[i] = thousands_sep[i];
-                        }
-#else
-                        /* No conversion, just memcpy the thousands_sep. */
-                        memcpy(plast, thousands_sep, thousands_sep_len);
-#endif
-                }
+        /* Use a separator next time. */
+        use_separator = 1;
 
-                /* Adjust end pointer. */
-                pend += thousands_sep_len;
+        remaining -= n_chars;
+        min_width -= l;
 
-                /* Move to the next grouping character, unless we're
-                   repeating (which is designated by a grouping of 0). */
-                if (*grouping != 0) {
-                        current_grouping = *grouping++;
-                        if (current_grouping == CHAR_MAX)
-                                /* We're done. */
-                                break;
-                }
+        if (remaining <= 0 && min_width <= 0) {
+            loop_broken = 1;
+            break;
         }
-        if (append_zero_char) {
-                /* Append a zero character to mark the end of the string,
-                   if there's room. */
-                if (pend - (buffer + remaining) < 1)
-                        /* No room, error. */
-                        return 0;
-                *pend = 0;
+        min_width -= thousands_sep_len;
+    }
+    if (!loop_broken) {
+        /* We left the loop without using a break statement. */
+
+        l = MAX(MAX(remaining, min_width), 1);
+        n_zeros = MAX(0, l - remaining);
+        n_chars = MAX(0, MIN(remaining, l));
+
+        /* Use n_zero zero's and n_chars chars */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+        if (buffer) {
+            /* Copy into the output buffer. */
+            fill(&digits_end, &buffer_end, n_chars, n_zeros,
+                 use_separator ? thousands_sep : NULL, thousands_sep_len);
         }
-        return 1;
+    }
+    return count;
 }
 
 /**
  * _Py_InsertThousandsGroupingLocale:
  * @buffer: A pointer to the start of a string.
- * @n_buffer: The length of the string.
  * @n_digits: The number of digits in the string, in which we want
  *            to put the grouping chars.
- * @buf_size: The maximum size of the buffer pointed to by buffer.
- * @count: If non-NULL, points to a variable that will receive the
- *         number of characters we need to insert (and no formatting
- *         will actually occur).
- * @append_zero_char: If non-zero, put a trailing zero at the end of
- *         of the resulting string, if and only if we modified the
- *         string.
  *
  * Reads thee current locale and calls _Py_InsertThousandsGrouping().
  **/
-int
+Py_ssize_t
 _Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
                                   Py_ssize_t n_buffer,
+                                  STRINGLIB_CHAR *digits,
                                   Py_ssize_t n_digits,
-                                  Py_ssize_t buf_size,
-                                  Py_ssize_t *count,
-                                  int append_zero_char)
+                                  Py_ssize_t min_width)
 {
         struct lconv *locale_data = localeconv();
         const char *grouping = locale_data->grouping;
         const char *thousands_sep = locale_data->thousands_sep;
 
-        return _Py_InsertThousandsGrouping(buffer, n_buffer, n_digits,
-                                           buf_size, count,
-                                           append_zero_char, grouping,
-                                           thousands_sep);
+        return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
+                                           min_width, grouping, thousands_sep);
 }
 #endif /* STRINGLIB_LOCALEUTIL_H */

Modified: python/branches/py3k-short-float-repr/Python/pystrtod.c
==============================================================================
--- python/branches/py3k-short-float-repr/Python/pystrtod.c	(original)
+++ python/branches/py3k-short-float-repr/Python/pystrtod.c	Sat Apr 11 07:44:19 2009
@@ -357,6 +357,7 @@
 Py_LOCAL_INLINE(int)
 add_thousands_grouping(char* buffer, size_t buf_size)
 {
+#if 0
 	Py_ssize_t len = strlen(buffer);
 	struct lconv *locale_data = localeconv();
 	const char *decimal_point = locale_data->decimal_point;
@@ -379,7 +380,9 @@
 	   want to format.  We need to add the grouping string for the
 	   characters between buffer and p. */
 	return _PyBytes_InsertThousandsGroupingLocale(buffer, len, p-buffer,
-						buf_size, NULL, 1);
+						      buf_size, NULL, 1, 1);
+#endif
+	return 1;
 }
 
 /* see FORMATBUFLEN in unicodeobject.c */


More information about the Python-checkins mailing list