[Python-Dev] Re: [Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.31,2.32

Sjoerd Mullender sjoerd@oratrix.nl
Fri, 30 Jun 2000 16:21:53 +0200


Why was the change that occurred in revision 2.31 reverted?  Accident?

The change log said:
Jack Jansen: Use include "" instead of <>; and staticforward declarations

On Fri, Jun 30 2000 "M.-A. Lemburg" wrote:

> Update of /cvsroot/python/python/dist/src/Objects
> In directory slayer.i.sourceforge.net:/tmp/cvs-serv25442/Objects
> 
> Modified Files:
> 	unicodeobject.c 
> Log Message:
> Marc-Andre Lemburg <mal@lemburg.com>:
> New buffer overflow checks for formatting strings.
> 
> By Trent Mick.
> 
> Index: unicodeobject.c
> ===================================================================
> RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
> retrieving revision 2.31
> retrieving revision 2.32
> diff -C2 -r2.31 -r2.32
> *** unicodeobject.c	2000/06/29 00:06:39	2.31
> --- unicodeobject.c	2000/06/30 10:29:57	2.32
> ***************
> *** 67,71 ****
>   #include "mymath.h"
>   #include "unicodeobject.h"
> ! #include "ucnhash.h"
>   
>   #if defined(HAVE_LIMITS_H)
> --- 67,71 ----
>   #include "mymath.h"
>   #include "unicodeobject.h"
> ! #include <ucnhash.h>
>   
>   #if defined(HAVE_LIMITS_H)
> ***************
> *** 1245,1249 ****
>   ucnFallthrough:
>               /* fall through on purpose */
> !         default:
>               *p++ = '\\';
>               *p++ = (unsigned char)s[-1];
> --- 1245,1249 ----
>   ucnFallthrough:
>               /* fall through on purpose */
> ! 		default:
>               *p++ = '\\';
>               *p++ = (unsigned char)s[-1];
> ***************
> *** 1252,1256 ****
>       }
>       if (_PyUnicode_Resize(v, (int)(p - buf)))
> ! 	goto onError;
>       return (PyObject *)v;
>       
> --- 1252,1256 ----
>       }
>       if (_PyUnicode_Resize(v, (int)(p - buf)))
> ! 		goto onError;
>       return (PyObject *)v;
>       
> ***************
> *** 4374,4377 ****
> --- 4374,4378 ----
>   static int
>   formatfloat(Py_UNICODE *buf,
> + 	    size_t buflen,
>   	    int flags,
>   	    int prec,
> ***************
> *** 4379,4382 ****
> --- 4380,4385 ----
>   	    PyObject *v)
>   {
> +     /* fmt = '%#.' + `prec` + `type`
> +        worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
>       char fmt[20];
>       double x;
> ***************
> *** 4387,4395 ****
>       if (prec < 0)
>   	prec = 6;
> -     if (prec > 50)
> - 	prec = 50; /* Arbitrary limitation */
>       if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
>   	type = 'g';
>       sprintf(fmt, "%%%s.%d%c", (flags & F_ALT) ? "#" : "", prec, type);
>       return usprintf(buf, fmt, x);
>   }
> --- 4390,4408 ----
>       if (prec < 0)
>   	prec = 6;
>       if (type == 'f' && (fabs(x) / 1e25) >= 1e25)
>   	type = 'g';
>       sprintf(fmt, "%%%s.%d%c", (flags & F_ALT) ? "#" : "", prec, type);
> +     /* worst case length calc to ensure no buffer overrun:
> +          fmt = %#.<prec>g
> +          buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
> +             for any double rep.)
> +          len = 1 + prec + 1 + 2 + 5 = 9 + prec
> +        If prec=0 the effective precision is 1 (the leading digit is
> +        always given), therefore increase by one to 10+prec. */
> +     if (buflen <= (size_t)10 + (size_t)prec) {
> + 	PyErr_SetString(PyExc_OverflowError,
> + 	    "formatted float is too long (precision too long?)");
> + 	return -1;
> +     }
>       return usprintf(buf, fmt, x);
>   }
> ***************
> *** 4397,4400 ****
> --- 4410,4414 ----
>   static int
>   formatint(Py_UNICODE *buf,
> + 	  size_t buflen,
>   	  int flags,
>   	  int prec,
> ***************
> *** 4402,4405 ****
> --- 4416,4421 ----
>   	  PyObject *v)
>   {
> +     /* fmt = '%#.' + `prec` + 'l' + `type`
> +        worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
>       char fmt[20];
>       long x;
> ***************
> *** 4410,4413 ****
> --- 4426,4436 ----
>       if (prec < 0)
>   	prec = 1;
> +     /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
> +        worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
> +     if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
> +         PyErr_SetString(PyExc_OverflowError,
> +             "formatted integer is too long (precision too long?)");
> +         return -1;
> +     }
>       sprintf(fmt, "%%%s.%dl%c", (flags & F_ALT) ? "#" : "", prec, type);
>       return usprintf(buf, fmt, x);
> ***************
> *** 4416,4421 ****
>   static int
>   formatchar(Py_UNICODE *buf,
> ! 	   PyObject *v)
>   {
>       if (PyUnicode_Check(v)) {
>   	if (PyUnicode_GET_SIZE(v) != 1)
> --- 4439,4446 ----
>   static int
>   formatchar(Py_UNICODE *buf,
> !            size_t buflen,
> !            PyObject *v)
>   {
> +     /* presume that the buffer is at least 2 characters long */
>       if (PyUnicode_Check(v)) {
>   	if (PyUnicode_GET_SIZE(v) != 1)
> ***************
> *** 4447,4450 ****
> --- 4472,4485 ----
>   }
>   
> + /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
> + 
> +    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
> +    chars are formatted. XXX This is a magic number. Each formatting
> +    routine does bounds checking to ensure no overflow, but a better
> +    solution may be to malloc a buffer of appropriate size for each
> +    format. For now, the current solution is sufficient.
> + */
> + #define FORMATBUFLEN (size_t)120
> + 
>   PyObject *PyUnicode_Format(PyObject *format,
>   			   PyObject *args)
> ***************
> *** 4506,4513 ****
>   	    PyObject *v = NULL;
>   	    PyObject *temp = NULL;
> ! 	    Py_UNICODE *buf;
>   	    Py_UNICODE sign;
>   	    int len;
> ! 	    Py_UNICODE tmpbuf[120]; /* For format{float,int,char}() */
>   
>   	    fmt++;
> --- 4541,4548 ----
>   	    PyObject *v = NULL;
>   	    PyObject *temp = NULL;
> ! 	    Py_UNICODE *pbuf;
>   	    Py_UNICODE sign;
>   	    int len;
> ! 	    Py_UNICODE formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
>   
>   	    fmt++;
> ***************
> *** 4659,4664 ****
>   
>   	    case '%':
> ! 		buf = tmpbuf;
> ! 		buf[0] = '%';
>   		len = 1;
>   		break;
> --- 4694,4700 ----
>   
>   	    case '%':
> ! 		pbuf = formatbuf;
> ! 		/* presume that buffer length is at least 1 */
> ! 		pbuf[0] = '%';
>   		len = 1;
>   		break;
> ***************
> *** 4696,4700 ****
>   			goto onError;
>   		}
> ! 		buf = PyUnicode_AS_UNICODE(temp);
>   		len = PyUnicode_GET_SIZE(temp);
>   		if (prec >= 0 && len > prec)
> --- 4732,4736 ----
>   			goto onError;
>   		}
> ! 		pbuf = PyUnicode_AS_UNICODE(temp);
>   		len = PyUnicode_GET_SIZE(temp);
>   		if (prec >= 0 && len > prec)
> ***************
> *** 4710,4715 ****
>   		if (c == 'i')
>   		    c = 'd';
> ! 		buf = tmpbuf;
> ! 		len = formatint(buf, flags, prec, c, v);
>   		if (len < 0)
>   		    goto onError;
> --- 4746,4752 ----
>   		if (c == 'i')
>   		    c = 'd';
> ! 		pbuf = formatbuf;
> ! 		len = formatint(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
> ! 			flags, prec, c, v);
>   		if (len < 0)
>   		    goto onError;
> ***************
> *** 4719,4725 ****
>   		    if ((flags&F_ALT) &&
>   			(c == 'x' || c == 'X') &&
> ! 			buf[0] == '0' && buf[1] == c) {
> ! 			*res++ = *buf++;
> ! 			*res++ = *buf++;
>   			rescnt -= 2;
>   			len -= 2;
> --- 4756,4762 ----
>   		    if ((flags&F_ALT) &&
>   			(c == 'x' || c == 'X') &&
> ! 			pbuf[0] == '0' && pbuf[1] == c) {
> ! 			*res++ = *pbuf++;
> ! 			*res++ = *pbuf++;
>   			rescnt -= 2;
>   			len -= 2;
> ***************
> *** 4736,4741 ****
>   	    case 'g':
>   	    case 'G':
> ! 		buf = tmpbuf;
> ! 		len = formatfloat(buf, flags, prec, c, v);
>   		if (len < 0)
>   		    goto onError;
> --- 4773,4779 ----
>   	    case 'g':
>   	    case 'G':
> ! 		pbuf = formatbuf;
> ! 		len = formatfloat(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE),
> ! 			flags, prec, c, v);
>   		if (len < 0)
>   		    goto onError;
> ***************
> *** 4746,4751 ****
>   
>   	    case 'c':
> ! 		buf = tmpbuf;
> ! 		len = formatchar(buf, v);
>   		if (len < 0)
>   		    goto onError;
> --- 4784,4789 ----
>   
>   	    case 'c':
> ! 		pbuf = formatbuf;
> ! 		len = formatchar(pbuf, sizeof(formatbuf)/sizeof(Py_UNICODE), v);
>   		if (len < 0)
>   		    goto onError;
> ***************
> *** 4759,4764 ****
>   	    }
>   	    if (sign) {
> ! 		if (*buf == '-' || *buf == '+') {
> ! 		    sign = *buf++;
>   		    len--;
>   		}
> --- 4797,4802 ----
>   	    }
>   	    if (sign) {
> ! 		if (*pbuf == '-' || *pbuf == '+') {
> ! 		    sign = *pbuf++;
  		    len--;
>   		}
> ***************
> *** 4796,4800 ****
>   	    if (sign && fill == ' ')
>   		*res++ = sign;
> ! 	    memcpy(res, buf, len * sizeof(Py_UNICODE));
>   	    res += len;
>   	    rescnt -= len;
> --- 4834,4838 ----
>   	    if (sign && fill == ' ')
>   		*res++ = sign;
> ! 	    memcpy(res, pbuf, len * sizeof(Py_UNICODE));
>   	    res += len;
>   	    rescnt -= len;
> 
> 

-- Sjoerd Mullender <sjoerd.mullender@oratrix.com>