[Python-checkins] cpython (3.3): Issue #17768: Support newline fill character in decimal.py and NUL fill

stefan.krah python-checkins at python.org
Wed May 29 15:48:41 CEST 2013


http://hg.python.org/cpython/rev/9156c663d6aa
changeset:   83974:9156c663d6aa
branch:      3.3
parent:      83971:30128355f53b
user:        Stefan Krah <skrah at bytereef.org>
date:        Wed May 29 15:45:38 2013 +0200
summary:
  Issue #17768: Support newline fill character in decimal.py and NUL fill
character in _decimal.c.

files:
  Lib/decimal.py                         |   2 +-
  Lib/test/test_decimal.py               |   4 +
  Modules/_decimal/_decimal.c            |  64 ++++++++++++-
  Modules/_decimal/tests/deccheck.py     |   4 +-
  Modules/_decimal/tests/formathelper.py |   6 +-
  5 files changed, 64 insertions(+), 16 deletions(-)


diff --git a/Lib/decimal.py b/Lib/decimal.py
--- a/Lib/decimal.py
+++ b/Lib/decimal.py
@@ -6140,7 +6140,7 @@
 (?:\.(?P<precision>0|(?!0)\d+))?
 (?P<type>[eEfFgGn%])?
 \Z
-""", re.VERBOSE)
+""", re.VERBOSE|re.DOTALL)
 
 del re
 
diff --git a/Lib/test/test_decimal.py b/Lib/test/test_decimal.py
--- a/Lib/test/test_decimal.py
+++ b/Lib/test/test_decimal.py
@@ -1021,6 +1021,10 @@
             ('/=10', '-45.6', '-/////45.6'),
             ('/=+10', '45.6', '+/////45.6'),
             ('/= 10', '45.6', ' /////45.6'),
+            ('\x00=10', '-inf', '-\x00Infinity'),
+            ('\x00^16', '-inf', '\x00\x00\x00-Infinity\x00\x00\x00\x00'),
+            ('\x00>10', '1.2345', '\x00\x00\x00\x001.2345'),
+            ('\x00<10', '1.2345', '1.2345\x00\x00\x00\x00'),
 
             # thousands separator
             (',', '1234567', '1,234,567'),
diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c
--- a/Modules/_decimal/_decimal.c
+++ b/Modules/_decimal/_decimal.c
@@ -3096,6 +3096,29 @@
     return res;
 }
 
+/* Return a duplicate of src, copy embedded null characters. */
+static char *
+dec_strdup(const char *src, Py_ssize_t size)
+{
+    char *dest = PyMem_Malloc(size+1);
+    if (dest == NULL) {
+        return NULL;
+    }
+
+    memcpy(dest, src, size);
+    dest[size] = '\0';
+    return dest;
+}
+
+static void
+dec_replace_fillchar(char *dest)
+{
+     while (*dest != '\0') {
+         if (*dest == '\xff') *dest = '\0';
+         dest++;
+     }
+}
+
 /* Convert decimal_point or thousands_sep, which may be multibyte or in
    the range [128, 255], to a UTF8 string. */
 static PyObject *
@@ -3131,13 +3154,14 @@
     PyObject *dot = NULL;
     PyObject *sep = NULL;
     PyObject *grouping = NULL;
-    PyObject *fmt = NULL;
     PyObject *fmtarg;
     PyObject *context;
     mpd_spec_t spec;
-    char *decstring= NULL;
+    char *fmt;
+    char *decstring = NULL;
     uint32_t status = 0;
-    size_t n;
+    int replace_fillchar = 0;
+    Py_ssize_t size;
 
 
     CURRENT_CONTEXT(context);
@@ -3146,10 +3170,20 @@
     }
 
     if (PyUnicode_Check(fmtarg)) {
-        fmt = PyUnicode_AsUTF8String(fmtarg);
+        fmt = PyUnicode_AsUTF8AndSize(fmtarg, &size);
         if (fmt == NULL) {
             return NULL;
         }
+        if (size > 0 && fmt[0] == '\0') {
+            /* NUL fill character: must be replaced with a valid UTF-8 char
+               before calling mpd_parse_fmt_str(). */
+            replace_fillchar = 1;
+            fmt = dec_strdup(fmt, size);
+            if (fmt == NULL) {
+                return NULL;
+            }
+            fmt[0] = '_';
+        }
     }
     else {
         PyErr_SetString(PyExc_TypeError,
@@ -3157,12 +3191,19 @@
         return NULL;
     }
 
-    if (!mpd_parse_fmt_str(&spec, PyBytes_AS_STRING(fmt),
-                           CtxCaps(context))) {
+    if (!mpd_parse_fmt_str(&spec, fmt, CtxCaps(context))) {
         PyErr_SetString(PyExc_ValueError,
             "invalid format string");
         goto finish;
     }
+    if (replace_fillchar) {
+        /* In order to avoid clobbering parts of UTF-8 thousands separators or
+           decimal points when the substitution is reversed later, the actual
+           placeholder must be an invalid UTF-8 byte. */
+        spec.fill[0] = '\xff';
+        spec.fill[1] = '\0';
+    }
+
     if (override) {
         /* Values for decimal_point, thousands_sep and grouping can
            be explicitly specified in the override dict. These values
@@ -3199,7 +3240,7 @@
         }
     }
     else {
-        n = strlen(spec.dot);
+        size_t n = strlen(spec.dot);
         if (n > 1 || (n == 1 && !isascii((uchar)spec.dot[0]))) {
             /* fix locale dependent non-ascii characters */
             dot = dotsep_as_utf8(spec.dot);
@@ -3231,14 +3272,19 @@
         }
         goto finish;
     }
-    result = PyUnicode_DecodeUTF8(decstring, strlen(decstring), NULL);
+    size = strlen(decstring);
+    if (replace_fillchar) {
+        dec_replace_fillchar(decstring);
+    }
+
+    result = PyUnicode_DecodeUTF8(decstring, size, NULL);
 
 
 finish:
     Py_XDECREF(grouping);
     Py_XDECREF(sep);
     Py_XDECREF(dot);
-    Py_XDECREF(fmt);
+    if (replace_fillchar) PyMem_Free(fmt);
     if (decstring) mpd_free(decstring);
     return result;
 }
diff --git a/Modules/_decimal/tests/deccheck.py b/Modules/_decimal/tests/deccheck.py
--- a/Modules/_decimal/tests/deccheck.py
+++ b/Modules/_decimal/tests/deccheck.py
@@ -891,7 +891,7 @@
 def test_format(method, prec, exp_range, restricted_range, itr, stat):
     """Iterate the __format__ method through many test cases."""
     for op in all_unary(prec, exp_range, itr):
-        fmt1 = rand_format(chr(random.randrange(32, 128)), 'EeGgn')
+        fmt1 = rand_format(chr(random.randrange(0, 128)), 'EeGgn')
         fmt2 = rand_locale()
         for fmt in (fmt1, fmt2):
             fmtop = (op[0], fmt)
@@ -904,7 +904,7 @@
             except VerifyError as err:
                 log(err)
     for op in all_unary(prec, 9999, itr):
-        fmt1 = rand_format(chr(random.randrange(32, 128)), 'Ff%')
+        fmt1 = rand_format(chr(random.randrange(0, 128)), 'Ff%')
         fmt2 = rand_locale()
         for fmt in (fmt1, fmt2):
             fmtop = (op[0], fmt)
diff --git a/Modules/_decimal/tests/formathelper.py b/Modules/_decimal/tests/formathelper.py
--- a/Modules/_decimal/tests/formathelper.py
+++ b/Modules/_decimal/tests/formathelper.py
@@ -215,8 +215,6 @@
         c = chr(i)
         c.encode('utf-8').decode()
         format(P.Decimal(0), c + '<19g')
-        if c in ("'", '"', '\\'):
-            return None
         return c
     except:
         return None
@@ -224,14 +222,14 @@
 # Generate all unicode characters that are accepted as
 # fill characters by decimal.py.
 def all_fillchars():
-    for i in range(32, 0x110002):
+    for i in range(0, 0x110002):
         c = check_fillchar(i)
         if c: yield c
 
 # Return random fill character.
 def rand_fillchar():
     while 1:
-        i = random.randrange(32, 0x110002)
+        i = random.randrange(0, 0x110002)
         c = check_fillchar(i)
         if c: return c
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list