[Python-checkins] r70366 - in python/branches/py3k: Lib/test/test_unicode.py Misc/NEWS Objects/stringlib/string_format.h

eric.smith python-checkins at python.org
Sat Mar 14 13:29:34 CET 2009


Author: eric.smith
Date: Sat Mar 14 13:29:34 2009
New Revision: 70366

Log:
Merged revisions 70364 via svnmerge from 
svn+ssh://pythondev@svn.python.org/python/trunk

........
  r70364 | eric.smith | 2009-03-14 07:57:26 -0400 (Sat, 14 Mar 2009) | 17 lines
  
  Issue 5237, Allow auto-numbered replacement fields in str.format() strings.
  
  For simple uses for str.format(), this makes the typing easier. Hopfully this
  will help in the adoption of str.format().
  
  For example:
  'The {} is {}'.format('sky', 'blue')
  
  You can mix and matcth auto-numbering and named replacement fields:
  'The {} is {color}'.format('sky', color='blue')
  
  But you can't mix and match auto-numbering and specified numbering:
  'The {0} is {}'.format('sky', 'blue')
  ValueError: cannot switch from manual field specification to automatic field numbering
  
  Will port to 3.1.
........


Modified:
   python/branches/py3k/   (props changed)
   python/branches/py3k/Lib/test/test_unicode.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Objects/stringlib/string_format.h

Modified: python/branches/py3k/Lib/test/test_unicode.py
==============================================================================
--- python/branches/py3k/Lib/test/test_unicode.py	(original)
+++ python/branches/py3k/Lib/test/test_unicode.py	Sat Mar 14 13:29:34 2009
@@ -683,9 +683,9 @@
         self.assertRaises(ValueError, "{0!}".format, 0)
         self.assertRaises(ValueError, "{0!rs}".format, 0)
         self.assertRaises(ValueError, "{!}".format)
-        self.assertRaises(ValueError, "{:}".format)
-        self.assertRaises(ValueError, "{:s}".format)
-        self.assertRaises(ValueError, "{}".format)
+        self.assertRaises(IndexError, "{:}".format)
+        self.assertRaises(IndexError, "{:s}".format)
+        self.assertRaises(IndexError, "{}".format)
 
         # can't have a replacement on the field name portion
         self.assertRaises(TypeError, '{0[{1}]}'.format, 'abcdefg', 4)
@@ -704,6 +704,36 @@
         self.assertRaises(ValueError, format, '', '#')
         self.assertRaises(ValueError, format, '', '#20')
 
+    def test_format_auto_numbering(self):
+        class C:
+            def __init__(self, x=100):
+                self._x = x
+            def __format__(self, spec):
+                return spec
+
+        self.assertEqual('{}'.format(10), '10')
+        self.assertEqual('{:5}'.format('s'), 's    ')
+        self.assertEqual('{!r}'.format('s'), "'s'")
+        self.assertEqual('{._x}'.format(C(10)), '10')
+        self.assertEqual('{[1]}'.format([1, 2]), '2')
+        self.assertEqual('{[a]}'.format({'a':4, 'b':2}), '4')
+        self.assertEqual('a{}b{}c'.format(0, 1), 'a0b1c')
+
+        self.assertEqual('a{:{}}b'.format('x', '^10'), 'a    x     b')
+        self.assertEqual('a{:{}x}b'.format(20, '#'), 'a0x14b')
+
+        # can't mix and match numbering and auto-numbering
+        self.assertRaises(ValueError, '{}{1}'.format, 1, 2)
+        self.assertRaises(ValueError, '{1}{}'.format, 1, 2)
+        self.assertRaises(ValueError, '{:{1}}'.format, 1, 2)
+        self.assertRaises(ValueError, '{0:{}}'.format, 1, 2)
+
+        # can mix and match auto-numbering and named
+        self.assertEqual('{f}{}'.format(4, f='test'), 'test4')
+        self.assertEqual('{}{f}'.format(4, f='test'), '4test')
+        self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
+        self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
+
     def test_formatting(self):
         string_tests.MixinStrUnicodeUserStringTest.test_formatting(self)
         # Testing Unicode formatting strings...

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Sat Mar 14 13:29:34 2009
@@ -12,6 +12,9 @@
 Core and Builtins
 -----------------
 
+- Issue #5237: Allow auto-numbered fields in str.format(). For
+  example: '{} {}'.format(1, 2) == '1 2'.
+
 - Issue #5392: when a very low recursion limit was set, the interpreter would
   abort with a fatal error after the recursion limit was hit twice.
 
@@ -33,6 +36,7 @@
 Core and Builtins
 -----------------
 
+=======
 - The io module has been reimplemented in C for speed.
 
 - Give dict views an informative __repr__.

Modified: python/branches/py3k/Objects/stringlib/string_format.h
==============================================================================
--- python/branches/py3k/Objects/stringlib/string_format.h	(original)
+++ python/branches/py3k/Objects/stringlib/string_format.h	Sat Mar 14 13:29:34 2009
@@ -31,10 +31,23 @@
 } SubString;
 
 
+typedef enum {
+    ANS_INIT,
+    ANS_AUTO,
+    ANS_MANUAL,
+} AutoNumberState;   /* Keep track if we're auto-numbering fields */
+
+/* Keeps track of our auto-numbering state, and which number field we're on */
+typedef struct {
+    AutoNumberState an_state;
+    int an_field_number;
+} AutoNumber;
+
+
 /* forward declaration for recursion */
 static PyObject *
 build_string(SubString *input, PyObject *args, PyObject *kwargs,
-             int recursion_depth);
+             int recursion_depth, AutoNumber *auto_number);
 
 
 
@@ -42,6 +55,13 @@
 /**************************  Utility  functions  ************************/
 /************************************************************************/
 
+static void
+AutoNumber_Init(AutoNumber *auto_number)
+{
+    auto_number->an_state = ANS_INIT;
+    auto_number->an_field_number = 0;
+}
+
 /* fill in a SubString from a pointer and length */
 Py_LOCAL_INLINE(void)
 SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
@@ -74,6 +94,32 @@
     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
 }
 
+/* Return 1 if an error has been detected switching between automatic
+   field numbering and manual field specification, else return 0. Set
+   ValueError on error. */
+static int
+autonumber_state_error(AutoNumberState state, int field_name_is_empty)
+{
+    if (state == ANS_MANUAL) {
+        if (field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "manual field specification to "
+                            "automatic field numbering");
+            return 1;
+        }
+    }
+    else {
+        if (!field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "automatic field numbering to "
+                            "manual field specification");
+            return 1;
+        }
+    }
+    return 0;
+}
+
+
 /************************************************************************/
 /***********    Output string management functions       ****************/
 /************************************************************************/
@@ -352,11 +398,14 @@
 */
 static int
 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
-                 Py_ssize_t *first_idx, FieldNameIterator *rest)
+                 Py_ssize_t *first_idx, FieldNameIterator *rest,
+                 AutoNumber *auto_number)
 {
     STRINGLIB_CHAR c;
     STRINGLIB_CHAR *p = ptr;
     STRINGLIB_CHAR *end = ptr + len;
+    int field_name_is_empty;
+    int using_numeric_index;
 
     /* find the part up until the first '.' or '[' */
     while (p < end) {
@@ -380,15 +429,41 @@
     /* see if "first" is an integer, in which case it's used as an index */
     *first_idx = get_integer(first);
 
-    /* zero length string is an error */
-    if (first->ptr >= first->end) {
-        PyErr_SetString(PyExc_ValueError, "empty field name");
-        goto error;
+    field_name_is_empty = first->ptr >= first->end;
+
+    /* If the field name is omitted or if we have a numeric index
+       specified, then we're doing numeric indexing into args. */
+    using_numeric_index = field_name_is_empty || *first_idx != -1;
+
+    /* We always get here exactly one time for each field we're
+       processing. And we get here in field order (counting by left
+       braces). So this is the perfect place to handle automatic field
+       numbering if the field name is omitted. */
+
+    /* Check if we need to do the auto-numbering. It's not needed if
+       we're called from string.Format routines, because it's handled
+       in that class by itself. */
+    if (auto_number) {
+        /* Initialize our auto numbering state if this is the first
+           time we're either auto-numbering or manually numbering. */
+        if (auto_number->an_state == ANS_INIT && using_numeric_index)
+            auto_number->an_state = field_name_is_empty ?
+                ANS_AUTO : ANS_MANUAL;
+
+        /* Make sure our state is consistent with what we're doing
+           this time through. Only check if we're using a numeric
+           index. */
+        if (using_numeric_index)
+            if (autonumber_state_error(auto_number->an_state,
+                                       field_name_is_empty))
+                return 0;
+        /* Zero length field means we want to do auto-numbering of the
+           fields. */
+        if (field_name_is_empty)
+            *first_idx = (auto_number->an_field_number)++;
     }
 
     return 1;
-error:
-    return 0;
 }
 
 
@@ -398,7 +473,8 @@
     the entire input string.
 */
 static PyObject *
-get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
+                 AutoNumber *auto_number)
 {
     PyObject *obj = NULL;
     int ok;
@@ -409,7 +485,7 @@
     FieldNameIterator rest;
 
     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
-                          &index, &rest)) {
+                          &index, &rest, auto_number)) {
         goto error;
     }
 
@@ -548,14 +624,18 @@
 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
             STRINGLIB_CHAR *conversion)
 {
+    /* Note this function works if the field name is zero length,
+       which is good.  Zero length field names are handled later, in
+       field_name_split. */
+
     STRINGLIB_CHAR c = 0;
 
     /* initialize these, as they may be empty */
     *conversion = '\0';
     SubString_init(format_spec, NULL, 0);
 
-    /* search for the field name.  it's terminated by the end of the
-       string, or a ':' or '!' */
+    /* Search for the field name.  it's terminated by the end of
+       the string, or a ':' or '!' */
     field_name->ptr = str->ptr;
     while (str->ptr < str->end) {
         switch (c = *(str->ptr++)) {
@@ -598,15 +678,12 @@
                 }
             }
         }
-
-        return 1;
-
     }
-    else {
+    else
         /* end of string, there's no format_spec or conversion */
         field_name->end = str->ptr;
-        return 1;
-    }
+
+    return 1;
 }
 
 /************************************************************************/
@@ -633,8 +710,8 @@
    string (or something to be expanded) */
 static int
 MarkupIterator_next(MarkupIterator *self, SubString *literal,
-                    SubString *field_name, SubString *format_spec,
-                    STRINGLIB_CHAR *conversion,
+                    int *field_present, SubString *field_name,
+                    SubString *format_spec, STRINGLIB_CHAR *conversion,
                     int *format_spec_needs_expanding)
 {
     int at_end;
@@ -650,6 +727,7 @@
     SubString_init(format_spec, NULL, 0);
     *conversion = '\0';
     *format_spec_needs_expanding = 0;
+    *field_present = 0;
 
     /* No more input, end of iterator.  This is the normal exit
        path. */
@@ -711,6 +789,7 @@
     /* this is markup, find the end of the string by counting nested
        braces.  note that this prohibits escaped braces, so that
        format_specs cannot have braces in them. */
+    *field_present = 1;
     count = 1;
 
     start = self->str.ptr;
@@ -735,13 +814,6 @@
                 if (parse_field(&s, field_name, format_spec, conversion) == 0)
                     return 0;
 
-                /* a zero length field_name is an error */
-                if (field_name->ptr == field_name->end) {
-                    PyErr_SetString(PyExc_ValueError, "zero length field name "
-                                    "in format");
-                    return 0;
-                }
-
                 /* success */
                 return 2;
             }
@@ -793,13 +865,17 @@
    compute the result and write it to output.
    format_spec_needs_expanding is an optimization.  if it's false,
    just output the string directly, otherwise recursively expand the
-   format_spec string. */
+   format_spec string.
+
+   field_name is allowed to be zero length, in which case we
+   are doing auto field numbering.
+*/
 
 static int
 output_markup(SubString *field_name, SubString *format_spec,
               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
               OutputString *output, PyObject *args, PyObject *kwargs,
-              int recursion_depth)
+              int recursion_depth, AutoNumber *auto_number)
 {
     PyObject *tmp = NULL;
     PyObject *fieldobj = NULL;
@@ -808,7 +884,7 @@
     int result = 0;
 
     /* convert field_name to an object */
-    fieldobj = get_field_object(field_name, args, kwargs);
+    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
     if (fieldobj == NULL)
         goto done;
 
@@ -825,7 +901,8 @@
 
     /* if needed, recurively compute the format_spec */
     if (format_spec_needs_expanding) {
-        tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
+        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
+                           auto_number);
         if (tmp == NULL)
             goto done;
 
@@ -859,26 +936,28 @@
 */
 static int
 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
-          OutputString *output, int recursion_depth)
+          OutputString *output, int recursion_depth, AutoNumber *auto_number)
 {
     MarkupIterator iter;
     int format_spec_needs_expanding;
     int result;
+    int field_present;
     SubString literal;
     SubString field_name;
     SubString format_spec;
     STRINGLIB_CHAR conversion;
 
     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
-    while ((result = MarkupIterator_next(&iter, &literal, &field_name,
-                                         &format_spec, &conversion,
+    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
+                                         &field_name, &format_spec,
+                                         &conversion,
                                          &format_spec_needs_expanding)) == 2) {
         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
             return 0;
-        if (field_name.ptr != field_name.end)
+        if (field_present)
             if (!output_markup(&field_name, &format_spec,
                                format_spec_needs_expanding, conversion, output,
-                               args, kwargs, recursion_depth))
+                               args, kwargs, recursion_depth, auto_number))
                 return 0;
     }
     return result;
@@ -891,7 +970,7 @@
 */
 static PyObject *
 build_string(SubString *input, PyObject *args, PyObject *kwargs,
-             int recursion_depth)
+             int recursion_depth, AutoNumber *auto_number)
 {
     OutputString output;
     PyObject *result = NULL;
@@ -913,7 +992,8 @@
                            INITIAL_SIZE_INCREMENT))
         goto done;
 
-    if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
+    if (!do_markup(input, args, kwargs, &output, recursion_depth,
+                   auto_number)) {
         goto done;
     }
 
@@ -947,8 +1027,11 @@
     */
     int recursion_depth = 2;
 
+    AutoNumber auto_number;
+
+    AutoNumber_Init(&auto_number);
     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
-    return build_string(&input, args, kwargs, recursion_depth);
+    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
 }
 
 
@@ -993,8 +1076,9 @@
     SubString format_spec;
     STRINGLIB_CHAR conversion;
     int format_spec_needs_expanding;
-    int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
-                                     &format_spec, &conversion,
+    int field_present;
+    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
+                                     &field_name, &format_spec, &conversion,
                                      &format_spec_needs_expanding);
 
     /* all of the SubString objects point into it->str, so no
@@ -1009,7 +1093,6 @@
         PyObject *format_spec_str = NULL;
         PyObject *conversion_str = NULL;
         PyObject *tuple = NULL;
-        int has_field = field_name.ptr != field_name.end;
 
         literal_str = SubString_new_object(&literal);
         if (literal_str == NULL)
@@ -1021,7 +1104,7 @@
 
         /* if field_name is non-zero length, return a string for
            format_spec (even if zero length), else return None */
-        format_spec_str = (has_field ?
+        format_spec_str = (field_present ?
                            SubString_new_object_or_empty :
                            SubString_new_object)(&format_spec);
         if (format_spec_str == NULL)
@@ -1245,9 +1328,11 @@
     Py_INCREF(self);
     it->str = self;
 
+    /* Pass in auto_number = NULL. We'll return an empty string for
+       first_obj in that case. */
     if (!field_name_split(STRINGLIB_STR(self),
                           STRINGLIB_LEN(self),
-                          &first, &first_idx, &it->it_field))
+                          &first, &first_idx, &it->it_field, NULL))
         goto done;
 
     /* first becomes an integer, if possible; else a string */


More information about the Python-checkins mailing list