[Patches] Fix for bug PR#341

Trent Mick trentm@activestate.com
Thu, 1 Jun 2000 02:34:28 -0700


Discussion:

This patch fixes the string formatting overflow problem. It tries to do a
little better than MAL's magic nunumber (50) check. If this looks good then I
can do the same for unicodeobject.c


[Tim P on MAL's original patch]
> but I'll join Fred in objecting to the code
> it's mimicking:  not only do magic numbers suck, but these particular magic
> numbers implicitly rely on PyString_Format's tmpbuf vector being declared of
> another magical size larger than them.  As usual, flaky code gets flakier.

My patch still uses the magic number for the temporary buffer. This seems to me
a good practical limit. With the patch this buffer can no longer overflow (as
well, it is faster than malloc'ing a perfect sized buffer every time). 

[MAL]
> A redesign would, of course, use a malloced buffer, the n-variants
> of printf() and add long support ;-) ... maybe for 1.7.

No long support in this patch :(

[Guido on MAL's original patch]
> Having read the patch and the discussion about magic numbers, I agree
> with Marc-Andre: let's apply the quick fix now, worry about
> correctness later.

Maybe this patch is preferable.


Legal:

I confirm that, to the best of my knowledge and belief, this
contribution is free of any claims of third parties under
copyright, patent or other rights or interests ("claims").  To
the extent that I have any such claims, I hereby grant to CNRI a
nonexclusive, irrevocable, royalty-free, worldwide license to
reproduce, distribute, perform and/or display publicly, prepare
derivative versions, and otherwise use this contribution as part
of the Python software and its related documentation, or any
derivative versions thereof, at no cost to CNRI or its licensed
users, and to authorize others to do so.

I acknowledge that CNRI may, at its sole discretion, decide
whether or not to incorporate this contribution in the Python
software and its related documentation.  I further grant CNRI
permission to use my name and other identifying information
provided to CNRI by me for use in connection with the Python
software and its related documentation.


Patch (use 'patch -p8'):

*** /home/trentm/main/contrib/python/dist/src/Objects/stringobject.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/stringobject.c	Wed May 31 23:54:19 2000
***************
*** 237,245 ****
  string_repr(op)
  	register PyStringObject *op;
  {
! 	/* XXX overflow? */
! 	int newsize = 2 + 4 * op->ob_size * sizeof(char);
! 	PyObject *v = PyString_FromStringAndSize((char *)NULL, newsize);
  	if (v == NULL) {
  		return NULL;
  	}
--- 242,254 ----
  string_repr(op)
  	register PyStringObject *op;
  {
! 	size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
! 	PyObject *v;
! 	if (newsize > INT_MAX) {
! 		PyErr_SetString(PyExc_OverflowError,
! 			"string is too large to make repr");
! 	}
! 	v = PyString_FromStringAndSize((char *)NULL, newsize);
  	if (v == NULL) {
  		return NULL;
  	}
***************
*** 2317,2352 ****
  #define F_ZERO	(1<<4)
  
  static int
! formatfloat(buf, flags, prec, type, v)
  	char *buf;
  	int flags;
  	int prec;
  	int type;
  	PyObject *v;
  {
  	char fmt[20];
  	double x;
  	if (!PyArg_Parse(v, "d;float argument required", &x))
  		return -1;
  	if (prec < 0)
  		prec = 6;
- 	if (prec > 50)
- 		prec = 50; /* Arbitrary limitation */
  	if (type == 'f' && fabs(x)/1e25 >= 1e25)
  		type = 'g';
  	sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
  	sprintf(buf, fmt, x);
  	return strlen(buf);
  }
  
  static int
! formatint(buf, flags, prec, type, v)
  	char *buf;
  	int flags;
  	int prec;
  	int type;
  	PyObject *v;
  {
  	char fmt[20];
  	long x;
  	if (!PyArg_Parse(v, "l;int argument required", &x))
--- 2326,2377 ----
  #define F_ZERO	(1<<4)
  
  static int
! formatfloat(buf, buflen, flags, prec, type, v)
  	char *buf;
+ 	size_t buflen;
  	int flags;
  	int prec;
  	int type;
  	PyObject *v;
  {
+ 	/* fmt = '%#.' + `prec` + `type`
+ 	   worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
  	char fmt[20];
  	double x;
  	if (!PyArg_Parse(v, "d;float argument required", &x))
  		return -1;
  	if (prec < 0)
  		prec = 6;
  	if (type == 'f' && fabs(x)/1e25 >= 1e25)
  		type = 'g';
  	sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
+ 	/* worst case length calc to ensure no buffer overrun:
+ 	     fmt = %#.<prec>g
+ 	     buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+ 	        for any double rep.) 
+ 	     len = 1 + prec + 1 + 2 + 5 = 9 + prec
+ 	   If prec=0 the effective precision is 1 (the leading digit is
+ 	   always given), therefore increase by one to 10+prec. */
+ 	if (buflen <= (size_t)10 + (size_t)prec) {
+ 		PyErr_SetString(PyExc_OverflowError,
+ 			"formatted float is too long (precision too long?)");
+ 		return -1;
+ 	}
  	sprintf(buf, fmt, x);
  	return strlen(buf);
  }
  
  static int
! formatint(buf, buflen, flags, prec, type, v)
  	char *buf;
+ 	size_t buflen;
  	int flags;
  	int prec;
  	int type;
  	PyObject *v;
  {
+ 	/* fmt = '%#.' + `prec` + 'l' + `type`
+ 	   worst case length = 3 + 10 (len of INT_MAX) + 1 + 1 = 15 (use 20)*/
  	char fmt[20];
  	long x;
  	if (!PyArg_Parse(v, "l;int argument required", &x))
***************
*** 2354,2368 ****
  	if (prec < 0)
  		prec = 1;
  	sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
  	sprintf(buf, fmt, x);
  	return strlen(buf);
  }
  
  static int
! formatchar(buf, v)
  	char *buf;
  	PyObject *v;
  {
  	if (PyString_Check(v)) {
  		if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
  			return -1;
--- 2379,2402 ----
  	if (prec < 0)
  		prec = 1;
  	sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
+ 	/* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec,len(x in octal))
+ 	   worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
+ 	if (buflen <= 13 || buflen <= (size_t)2+(size_t)prec) {
+ 		PyErr_SetString(PyExc_OverflowError,
+ 			"formatted integer is too long (precision too long?)");
+ 		return -1;
+ 	}
  	sprintf(buf, fmt, x);
  	return strlen(buf);
  }
  
  static int
! formatchar(buf, buflen, v)
  	char *buf;
+ 	size_t buflen;
  	PyObject *v;
  {
+ 	/* presume that the buffer is at least 2 characters long */
  	if (PyString_Check(v)) {
  		if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
  			return -1;
***************
*** 2436,2442 ****
  			char *buf;
  			int sign;
  			int len;
! 			char tmpbuf[120]; /* For format{float,int,char}() */
  			char *fmt_start = fmt;
  			
  			fmt++;
--- 2470,2477 ----
  			char *buf;
  			int sign;
  			int len;
! #define TMPBUFLEN (size_t)120
! 			char tmpbuf[TMPBUFLEN]; /* For format{float,int,char}() */
  			char *fmt_start = fmt;
  			
  			fmt++;
***************
*** 2618,2624 ****
  				if (c == 'i')
  					c = 'd';
  				buf = tmpbuf;
! 				len = formatint(buf, flags, prec, c, v);
  				if (len < 0)
  					goto error;
  				sign = (c == 'd');
--- 2653,2659 ----
  				if (c == 'i')
  					c = 'd';
  				buf = tmpbuf;
! 				len = formatint(buf, TMPBUFLEN, flags, prec, c, v);
  				if (len < 0)
  					goto error;
  				sign = (c == 'd');
***************
*** 2643,2649 ****
  			case 'g':
  			case 'G':
  				buf = tmpbuf;
! 				len = formatfloat(buf, flags, prec, c, v);
  				if (len < 0)
  					goto error;
  				sign = 1;
--- 2678,2684 ----
  			case 'g':
  			case 'G':
  				buf = tmpbuf;
! 				len = formatfloat(buf, TMPBUFLEN, flags, prec, c, v);
  				if (len < 0)
  					goto error;
  				sign = 1;
***************
*** 2652,2658 ****
  				break;
  			case 'c':
  				buf = tmpbuf;
! 				len = formatchar(buf, v);
  				if (len < 0)
  					goto error;
  				break;
--- 2687,2693 ----
  				break;
  			case 'c':
  				buf = tmpbuf;
! 				len = formatchar(buf, TMPBUFLEN, v);
  				if (len < 0)
  					goto error;
  				break;

-- 
Trent Mick
trentm@activestate.com