[Python-checkins] cpython: Issue #25384: Use _PyBytesWriter API in binascii

victor.stinner python-checkins at python.org
Tue Oct 13 04:52:50 EDT 2015


https://hg.python.org/cpython/rev/d6fcda2b9b5e
changeset:   98723:d6fcda2b9b5e
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Tue Oct 13 10:51:47 2015 +0200
summary:
  Issue #25384: Use _PyBytesWriter API in binascii

This API avoids a final call to _PyBytes_Resize() for output smaller than 512
bytes.

Small optimization: disable overallocation in binascii.rledecode_hqx() for the
last write.

files:
  Modules/binascii.c |  194 ++++++++++++++------------------
  1 files changed, 83 insertions(+), 111 deletions(-)


diff --git a/Modules/binascii.c b/Modules/binascii.c
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -346,9 +346,10 @@
     int leftbits = 0;
     unsigned char this_ch;
     unsigned int leftchar = 0;
-    PyObject *rv;
-    Py_ssize_t bin_len;
+    Py_ssize_t bin_len, out_len;
+    _PyBytesWriter writer;
 
+    _PyBytesWriter_Init(&writer);
     bin_data = data->buf;
     bin_len = data->len;
     if ( bin_len > 45 ) {
@@ -358,9 +359,10 @@
     }
 
     /* We're lazy and allocate to much (fixed up later) */
-    if ( (rv=PyBytes_FromStringAndSize(NULL, 2 + (bin_len+2)/3*4)) == NULL )
+    out_len = 2 + (bin_len + 2) / 3 * 4;
+    ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
+    if (ascii_data == NULL)
         return NULL;
-    ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
 
     /* Store the length */
     *ascii_data++ = ' ' + (bin_len & 077);
@@ -382,12 +384,7 @@
     }
     *ascii_data++ = '\n';       /* Append a courtesy newline */
 
-    if (_PyBytes_Resize(&rv,
-                       (ascii_data -
-                        (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
-        Py_CLEAR(rv);
-    }
-    return rv;
+    return _PyBytesWriter_Finish(&writer, ascii_data);
 }
 
 
@@ -433,9 +430,9 @@
     int leftbits = 0;
     unsigned char this_ch;
     unsigned int leftchar = 0;
-    PyObject *rv;
     Py_ssize_t ascii_len, bin_len;
     int quad_pos = 0;
+    _PyBytesWriter writer;
 
     ascii_data = data->buf;
     ascii_len = data->len;
@@ -447,11 +444,12 @@
 
     bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
 
+    _PyBytesWriter_Init(&writer);
+
     /* Allocate the buffer */
-    if ( (rv=PyBytes_FromStringAndSize(NULL, bin_len)) == NULL )
+    bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
+    if (bin_data == NULL)
         return NULL;
-    bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
-    bin_len = 0;
 
     for( ; ascii_len > 0; ascii_len--, ascii_data++) {
         this_ch = *ascii_data;
@@ -496,31 +494,17 @@
         if ( leftbits >= 8 ) {
             leftbits -= 8;
             *bin_data++ = (leftchar >> leftbits) & 0xff;
-            bin_len++;
             leftchar &= ((1 << leftbits) - 1);
         }
     }
 
     if (leftbits != 0) {
         PyErr_SetString(Error, "Incorrect padding");
-        Py_DECREF(rv);
+        _PyBytesWriter_Dealloc(&writer);
         return NULL;
     }
 
-    /* And set string size correctly. If the result string is empty
-    ** (because the input was all invalid) return the shared empty
-    ** string instead; _PyBytes_Resize() won't do this for us.
-    */
-    if (bin_len > 0) {
-        if (_PyBytes_Resize(&rv, bin_len) < 0) {
-            Py_CLEAR(rv);
-        }
-    }
-    else {
-        Py_DECREF(rv);
-        rv = PyBytes_FromStringAndSize("", 0);
-    }
-    return rv;
+    return _PyBytesWriter_Finish(&writer, bin_data);
 }
 
 
@@ -542,11 +526,12 @@
     int leftbits = 0;
     unsigned char this_ch;
     unsigned int leftchar = 0;
-    PyObject *rv;
     Py_ssize_t bin_len, out_len;
+    _PyBytesWriter writer;
 
     bin_data = data->buf;
     bin_len = data->len;
+    _PyBytesWriter_Init(&writer);
 
     assert(bin_len >= 0);
 
@@ -561,9 +546,9 @@
     out_len = bin_len*2 + 2;
     if (newline)
         out_len++;
-    if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL )
+    ascii_data = _PyBytesWriter_Alloc(&writer, out_len);
+    if (ascii_data == NULL)
         return NULL;
-    ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
 
     for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
         /* Shift the data into our buffer */
@@ -588,12 +573,7 @@
     if (newline)
         *ascii_data++ = '\n';       /* Append a courtesy newline */
 
-    if (_PyBytes_Resize(&rv,
-                       (ascii_data -
-                        (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
-        Py_CLEAR(rv);
-    }
-    return rv;
+    return _PyBytesWriter_Finish(&writer, ascii_data);
 }
 
 /*[clinic input]
@@ -613,12 +593,14 @@
     int leftbits = 0;
     unsigned char this_ch;
     unsigned int leftchar = 0;
-    PyObject *rv;
+    PyObject *res;
     Py_ssize_t len;
     int done = 0;
+    _PyBytesWriter writer;
 
     ascii_data = data->buf;
     len = data->len;
+    _PyBytesWriter_Init(&writer);
 
     assert(len >= 0);
 
@@ -628,9 +610,9 @@
     /* Allocate a string that is too big (fixed later)
        Add two to the initial length to prevent interning which
        would preclude subsequent resizing.  */
-    if ( (rv=PyBytes_FromStringAndSize(NULL, len+2)) == NULL )
+    bin_data = _PyBytesWriter_Alloc(&writer, len + 2);
+    if (bin_data == NULL)
         return NULL;
-    bin_data = (unsigned char *)PyBytes_AS_STRING(rv);
 
     for( ; len > 0 ; len--, ascii_data++ ) {
         /* Get the byte and look it up */
@@ -639,7 +621,7 @@
             continue;
         if ( this_ch == FAIL ) {
             PyErr_SetString(Error, "Illegal char");
-            Py_DECREF(rv);
+            _PyBytesWriter_Dealloc(&writer);
             return NULL;
         }
         if ( this_ch == DONE ) {
@@ -661,21 +643,14 @@
     if ( leftbits && !done ) {
         PyErr_SetString(Incomplete,
                         "String has incomplete number of bytes");
-        Py_DECREF(rv);
+        _PyBytesWriter_Dealloc(&writer);
         return NULL;
     }
-    if (_PyBytes_Resize(&rv,
-                       (bin_data -
-                        (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
-        Py_CLEAR(rv);
-    }
-    if (rv) {
-        PyObject *rrv = Py_BuildValue("Oi", rv, done);
-        Py_DECREF(rv);
-        return rrv;
-    }
 
-    return NULL;
+    res = _PyBytesWriter_Finish(&writer, bin_data);
+    if (res == NULL)
+        return NULL;
+    return Py_BuildValue("Ni", res, done);
 }
 
 
@@ -693,10 +668,11 @@
 /*[clinic end generated code: output=0905da344dbf0648 input=e1f1712447a82b09]*/
 {
     unsigned char *in_data, *out_data;
-    PyObject *rv;
     unsigned char ch;
     Py_ssize_t in, inend, len;
+    _PyBytesWriter writer;
 
+    _PyBytesWriter_Init(&writer);
     in_data = data->buf;
     len = data->len;
 
@@ -706,9 +682,9 @@
         return PyErr_NoMemory();
 
     /* Worst case: output is twice as big as input (fixed later) */
-    if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
+    out_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
+    if (out_data == NULL)
         return NULL;
-    out_data = (unsigned char *)PyBytes_AS_STRING(rv);
 
     for( in=0; in<len; in++) {
         ch = in_data[in];
@@ -734,12 +710,8 @@
             }
         }
     }
-    if (_PyBytes_Resize(&rv,
-                       (out_data -
-                        (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
-        Py_CLEAR(rv);
-    }
-    return rv;
+
+    return _PyBytesWriter_Finish(&writer, out_data);
 }
 
 
@@ -760,11 +732,12 @@
     int leftbits = 0;
     unsigned char this_ch;
     unsigned int leftchar = 0;
-    PyObject *rv;
     Py_ssize_t len;
+    _PyBytesWriter writer;
 
     bin_data = data->buf;
     len = data->len;
+    _PyBytesWriter_Init(&writer);
 
     assert(len >= 0);
 
@@ -772,9 +745,9 @@
         return PyErr_NoMemory();
 
     /* Allocate a buffer that is at least large enough */
-    if ( (rv=PyBytes_FromStringAndSize(NULL, len*2+2)) == NULL )
+    ascii_data = _PyBytesWriter_Alloc(&writer, len * 2 + 2);
+    if (ascii_data == NULL)
         return NULL;
-    ascii_data = (unsigned char *)PyBytes_AS_STRING(rv);
 
     for( ; len > 0 ; len--, bin_data++ ) {
         /* Shift into our buffer, and output any 6bits ready */
@@ -791,12 +764,8 @@
         leftchar <<= (6-leftbits);
         *ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
     }
-    if (_PyBytes_Resize(&rv,
-                       (ascii_data -
-                        (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
-        Py_CLEAR(rv);
-    }
-    return rv;
+
+    return _PyBytesWriter_Finish(&writer, ascii_data);
 }
 
 
@@ -815,11 +784,12 @@
 {
     unsigned char *in_data, *out_data;
     unsigned char in_byte, in_repeat;
-    PyObject *rv;
     Py_ssize_t in_len, out_len, out_len_left;
+    _PyBytesWriter writer;
 
     in_data = data->buf;
     in_len = data->len;
+    _PyBytesWriter_Init(&writer);
 
     assert(in_len >= 0);
 
@@ -830,45 +800,49 @@
         return PyErr_NoMemory();
 
     /* Allocate a buffer of reasonable size. Resized when needed */
-    out_len = in_len*2;
-    if ( (rv=PyBytes_FromStringAndSize(NULL, out_len)) == NULL )
+    out_len = in_len * 2;
+    out_data = _PyBytesWriter_Alloc(&writer, out_len);
+    if (out_data == NULL)
         return NULL;
-    out_len_left = out_len;
-    out_data = (unsigned char *)PyBytes_AS_STRING(rv);
+
+    /* Use overallocation */
+    writer.overallocate = 1;
+    out_len_left = writer.allocated;
 
     /*
     ** We need two macros here to get/put bytes and handle
     ** end-of-buffer for input and output strings.
     */
-#define INBYTE(b) \
-    do { \
-             if ( --in_len < 0 ) { \
-                       PyErr_SetString(Incomplete, ""); \
-                       Py_DECREF(rv); \
-                       return NULL; \
-             } \
-             b = *in_data++; \
+#define INBYTE(b)                                                       \
+    do {                                                                \
+         if ( --in_len < 0 ) {                                          \
+           PyErr_SetString(Incomplete, "");                             \
+           goto error;                                                  \
+         }                                                              \
+         b = *in_data++;                                                \
     } while(0)
 
-#define OUTBYTE(b) \
-    do { \
-             if ( --out_len_left < 0 ) { \
-                      if ( out_len > PY_SSIZE_T_MAX / 2) return PyErr_NoMemory(); \
-                      if (_PyBytes_Resize(&rv, 2*out_len) < 0) \
-                        { Py_XDECREF(rv); return NULL; } \
-                      out_data = (unsigned char *)PyBytes_AS_STRING(rv) \
-                                                             + out_len; \
-                      out_len_left = out_len-1; \
-                      out_len = out_len * 2; \
-             } \
-             *out_data++ = b; \
+#define OUTBYTE(b)                                                      \
+    do {                                                                \
+         if ( --out_len_left < 0 ) {                                    \
+             if (in_len <= 0) {                                         \
+                 /* We are done after this write, no need to            \
+                    overallocate the buffer anymore */                  \
+                 writer.overallocate = 0;                               \
+             }                                                          \
+             out_data = _PyBytesWriter_Prepare(&writer, out_data, 1);   \
+             if (out_data == NULL)                                      \
+                 goto error;                                            \
+             out_len_left = writer.allocated;                           \
+         }                                                              \
+         *out_data++ = b;                                               \
     } while(0)
 
-        /*
-        ** Handle first byte separately (since we have to get angry
-        ** in case of an orphaned RLE code).
-        */
-        INBYTE(in_byte);
+    /*
+    ** Handle first byte separately (since we have to get angry
+    ** in case of an orphaned RLE code).
+    */
+    INBYTE(in_byte);
 
     if (in_byte == RUNCHAR) {
         INBYTE(in_repeat);
@@ -877,8 +851,7 @@
             ** of the string only). This is a programmer error.
             */
             PyErr_SetString(Error, "Orphaned RLE code at start");
-            Py_DECREF(rv);
-            return NULL;
+            goto error;
         }
         OUTBYTE(RUNCHAR);
     } else {
@@ -904,12 +877,11 @@
             OUTBYTE(in_byte);
         }
     }
-    if (_PyBytes_Resize(&rv,
-                       (out_data -
-                        (unsigned char *)PyBytes_AS_STRING(rv))) < 0) {
-        Py_CLEAR(rv);
-    }
-    return rv;
+    return _PyBytesWriter_Finish(&writer, out_data);
+
+error:
+    _PyBytesWriter_Dealloc(&writer);
+    return NULL;
 }
 
 

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list