[Patches] largefile support for Win64 (and some other fixes)

Trent Mick trentm@activestate.com
Thu, 1 Jun 2000 14:19:45 -0700


Discussion:

This patch adds largefile support for Win64 and Linux64 (the latter already
motly worked I think),  and fixes some possible buffer overflows on all
systems with largefile support.

NOTE: this patch depends on my earlier patch to PC/config.h and configure.in
for SIZEOF_OFF_T and SIZEOF_FPOS_T. (see: "[Patches] changes to configure.in
and PC/config.h for 64-bit systems")

Win64 largefile support involved fixing file_seek, file_tell, and
file_truncate to properly handle large indeces and to use the proper Win64
system APIs. Win64 does not have 64-bit capable versions of ftell and fseek,
this could be worked around with fgetpos() and fsetpos() (and _telli64(), and
64-bit tell()). _portable_ftell() and _portable_fseek() were written to hold
the platform dependent logic. You are still restricted to 32-bits for single
reads and writes.

Previously _chsize was used blindly as the replacement for ftruncate() in
Win32. In fact, _chsize() is not 64-bit capable so and appropriate overflow
check was added.

There are some type histrionics involved because off_t is only 32-bits on
Win64. fpos_t is 64-bits, however, so fpos_t is used for Win64.

As well, the patch adds some necessary overflow checks (raising OverflowError
when an overflow is detected). See file_read for example.

A forthcoming patch adds a test to the test suite for this largefile support.


Legal:

I confirm that, to the best of my knowledge and belief, this
contribution is free of any claims of third parties under
copyright, patent or other rights or interests ("claims").  To
the extent that I have any such claims, I hereby grant to CNRI a
nonexclusive, irrevocable, royalty-free, worldwide license to
reproduce, distribute, perform and/or display publicly, prepare
derivative versions, and otherwise use this contribution as part
of the Python software and its related documentation, or any
derivative versions thereof, at no cost to CNRI or its licensed
users, and to authorize others to do so.

I acknowledge that CNRI may, at its sole discretion, decide
whether or not to incorporate this contribution in the Python
software and its related documentation.  I further grant CNRI
permission to use my name and other identifying information
provided to CNRI by me for use in connection with the Python
software and its related documentation.


Patch (use 'patch -p8'):

*** /home/trentm/main/contrib/python/dist/src/Objects/fileobject.c	Thu Jun  1 00:13:40 2000
--- /home/trentm/main/Apps/Perlium/Python/dist/src/Objects/fileobject.c	Thu Jun  1 13:53:50 2000
***************
*** 43,50 ****
  #endif
  
  #ifdef MS_WIN32
- #define ftruncate _chsize
  #define fileno _fileno
  #define HAVE_FTRUNCATE
  #endif
  
--- 43,50 ----
  #endif
  
  #ifdef MS_WIN32
  #define fileno _fileno
+ /* can (almost fully) duplicate with _chsize, see file_truncate */
  #define HAVE_FTRUNCATE
  #endif
  
***************
*** 68,73 ****
--- 68,79 ----
  #include <errno.h>
  #endif
  
+ /* define the appropriate 64-bit capable tell() function */
+ #ifdef MS_WIN64
+ #	define TELL64 _telli64
+ #endif
+ 
+ 
  typedef struct {
  	PyObject_HEAD
  	FILE *f_fp;
***************
*** 256,261 ****
--- 262,338 ----
  	return Py_None;
  }
  
+ 
+ /* a portable fseek() function
+    return 0 on success, non-zero on failure (with errno set) */
+ int
+ _portable_fseek(fp, offset, whence)
+ 	FILE* fp;
+ #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
+ 	fpos_t offset;
+ #else
+ 	off_t offset;
+ #endif
+ 	int whence;
+ {
+ #if defined(HAVE_FSEEKO)
+ 	return fseeko(fp, offset, whence);
+ #elif defined(HAVE_FSEEK64)
+ 	return fseek64(fp, offset, whence);
+ #elif defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_FPOS_T >= 8 
+ 	/* lacking a 64-bit capable fseek() (as Win64 does) use a 64-bit capable
+ 		fsetpos() and tell() to implement fseek()*/
+ 	fpos_t pos;
+ 	switch (whence) {
+ 		case SEEK_CUR:
+ 			if (fgetpos(fp, &pos) != 0)
+ 				return -1;
+ 			offset += pos;
+ 			break;
+ 		case SEEK_END:
+ 			/* do a "no-op" seek first to sync the buffering so that
+ 			   the low-level tell() can be used correctly */
+ 			if (fseek(fp, 0, SEEK_END) != 0)
+ 				return -1;
+ 			if ((pos = TELL64(fileno(fp))) == -1L)
+ 				return -1;
+ 			offset += pos;
+ 			break;
+ 		/* case SEEK_SET: break; */
+ 	}
+ 	return fsetpos(fp, &offset);
+ #else
+ 	return fseek(fp, offset, whence);
+ #endif
+ }
+ 
+ 
+ /* a portable ftell() function
+    Return -1 on failure with errno set appropriately, current file
+    position on success */
+ #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
+ fpos_t
+ #else
+ off_t
+ #endif
+ _portable_ftell(fp)
+ 	FILE* fp;
+ {
+ #if defined(HAVE_FTELLO) && defined(HAVE_LARGEFILE_SUPPORT)
+ 	return ftello(fp);
+ #elif defined(HAVE_FTELL64) && defined(HAVE_LARGEFILE_SUPPORT)
+ 	return ftell64(fp);
+ #elif SIZEOF_FPOS_T >= 8 && defined(HAVE_LARGEFILE_SUPPORT)
+ 	fpos_t pos;
+ 	if (fgetpos(fp, &pos) != 0)
+ 		return -1;
+ 	return pos;
+ #else
+ 	return ftell(fp);
+ #endif
+ }
+ 
+ 
  static PyObject *
  file_seek(f, args)
  	PyFileObject *f;
***************
*** 263,269 ****
--- 340,350 ----
  {
  	int whence;
  	int ret;
+ #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
+ 	fpos_t offset, pos;
+ #else
  	off_t offset;
+ #endif /* !MS_WIN64 */
  	PyObject *offobj;
  	
  	if (f->f_fp == NULL)
***************
*** 279,294 ****
  #endif
  	if (PyErr_Occurred())
  		return NULL;
  	Py_BEGIN_ALLOW_THREADS
  	errno = 0;
! #if defined(HAVE_FSEEKO)
! 	ret = fseeko(f->f_fp, offset, whence);
! #elif defined(HAVE_FSEEK64)
! 	ret = fseek64(f->f_fp, offset, whence);
! #else
! 	ret = fseek(f->f_fp, offset, whence);
! #endif
  	Py_END_ALLOW_THREADS
  	if (ret != 0) {
  		PyErr_SetFromErrno(PyExc_IOError);
  		clearerr(f->f_fp);
--- 360,371 ----
  #endif
  	if (PyErr_Occurred())
  		return NULL;
+ 	
  	Py_BEGIN_ALLOW_THREADS
  	errno = 0;
! 	ret = _portable_fseek(f->f_fp, offset, whence);
  	Py_END_ALLOW_THREADS
+ 
  	if (ret != 0) {
  		PyErr_SetFromErrno(PyExc_IOError);
  		clearerr(f->f_fp);
***************
*** 298,303 ****
--- 375,381 ----
  	return Py_None;
  }
  
+ 
  #ifdef HAVE_FTRUNCATE
  static PyObject *
  file_truncate(f, args)
***************
*** 305,311 ****
--- 383,393 ----
  	PyObject *args;
  {
  	int ret;
+ #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
+ 	fpos_t newsize;
+ #else
  	off_t newsize;
+ #endif
  	PyObject *newsizeobj;
  	
  	if (f->f_fp == NULL)
***************
*** 327,339 ****
  		/* Default to current position*/
  		Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! #if defined(HAVE_FTELLO) && defined(HAVE_LARGEFILE_SUPPORT)
! 		newsize =  ftello(f->f_fp);
! #elif defined(HAVE_FTELL64) && defined(HAVE_LARGEFILE_SUPPORT)
! 		newsize =  ftell64(f->f_fp);
! #else
! 		newsize =  ftell(f->f_fp);
! #endif
  		Py_END_ALLOW_THREADS
  		if (newsize == -1) {
  		        PyErr_SetFromErrno(PyExc_IOError);
--- 409,415 ----
  		/* Default to current position*/
  		Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! 		newsize = _portable_ftell(f->f_fp);
  		Py_END_ALLOW_THREADS
  		if (newsize == -1) {
  		        PyErr_SetFromErrno(PyExc_IOError);
***************
*** 345,363 ****
  	errno = 0;
  	ret = fflush(f->f_fp);
  	Py_END_ALLOW_THREADS
! 	if (ret == 0) {
! 	        Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! 		ret = ftruncate(fileno(f->f_fp), newsize);
  		Py_END_ALLOW_THREADS
  	}
! 	if (ret != 0) {
! 		PyErr_SetFromErrno(PyExc_IOError);
! 		clearerr(f->f_fp);
! 		return NULL;
! 	}
  	Py_INCREF(Py_None);
  	return Py_None;
  }
  #endif /* HAVE_FTRUNCATE */
  
--- 421,457 ----
  	errno = 0;
  	ret = fflush(f->f_fp);
  	Py_END_ALLOW_THREADS
! 	if (ret != 0) goto onioerror;
! 
! #ifdef MS_WIN32
! 	/* can use _chsize; if, however, the newsize overflows 32-bits then
! 	   _chsize is *not* adequate; in this case, an OverflowError is raised */
! 	if (newsize > LONG_MAX) {
! 		PyErr_SetString(PyExc_OverflowError,
! 			"the new size is too long for _chsize (it is limited to 32-bit values)");
! 		return NULL;
! 	} else {
! 		Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! 		ret = _chsize(fileno(f->f_fp), newsize);
  		Py_END_ALLOW_THREADS
+ 		if (ret != 0) goto onioerror;
  	}
! #else
! 	Py_BEGIN_ALLOW_THREADS
! 	errno = 0;
! 	ret = ftruncate(fileno(f->f_fp), newsize);
! 	Py_END_ALLOW_THREADS
! 	if (ret != 0) goto onioerror;
! #endif /* !MS_WIN32 */
! 	
  	Py_INCREF(Py_None);
  	return Py_None;
+ 
+ onioerror:
+ 	PyErr_SetFromErrno(PyExc_IOError);
+ 	clearerr(f->f_fp);
+ 	return NULL;
  }
  #endif /* HAVE_FTRUNCATE */
  
***************
*** 366,395 ****
  	PyFileObject *f;
  	PyObject *args;
  {
! 	off_t offset;
  	if (f->f_fp == NULL)
  		return err_closed();
  	if (!PyArg_NoArgs(args))
  		return NULL;
  	Py_BEGIN_ALLOW_THREADS
  	errno = 0;
! #if defined(HAVE_FTELLO) && defined(HAVE_LARGEFILE_SUPPORT)
! 	offset = ftello(f->f_fp);
! #elif defined(HAVE_FTELL64) && defined(HAVE_LARGEFILE_SUPPORT)
! 	offset = ftell64(f->f_fp);
! #else
! 	offset = ftell(f->f_fp);
! #endif
  	Py_END_ALLOW_THREADS
! 	if (offset == -1) {
  		PyErr_SetFromErrno(PyExc_IOError);
  		clearerr(f->f_fp);
  		return NULL;
  	}
  #if !defined(HAVE_LARGEFILE_SUPPORT)
! 	return PyInt_FromLong(offset);
  #else
! 	return PyLong_FromLongLong(offset);
  #endif
  }
  
--- 460,488 ----
  	PyFileObject *f;
  	PyObject *args;
  {
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
! 	fpos_t pos;
! #else
! 	off_t pos;
! #endif
! 
  	if (f->f_fp == NULL)
  		return err_closed();
  	if (!PyArg_NoArgs(args))
  		return NULL;
  	Py_BEGIN_ALLOW_THREADS
  	errno = 0;
! 	pos = _portable_ftell(f->f_fp);
  	Py_END_ALLOW_THREADS
! 	if (pos == -1) {
  		PyErr_SetFromErrno(PyExc_IOError);
  		clearerr(f->f_fp);
  		return NULL;
  	}
  #if !defined(HAVE_LARGEFILE_SUPPORT)
! 	return PyInt_FromLong(pos);
  #else
! 	return PyLong_FromLongLong(pos);
  #endif
  }
  
***************
*** 530,535 ****
--- 623,633 ----
  		buffersize = new_buffersize(f, (size_t)0);
  	else
  		buffersize = bytesrequested;
+ 	if (buffersize > INT_MAX) {
+ 		PyErr_SetString(PyExc_OverflowError,
+ 			"requested number of bytes is more than a Python string can hold");
+ 		return NULL;
+ 	}
  	v = PyString_FromStringAndSize((char *)NULL, buffersize);
  	if (v == NULL)
  		return NULL;
***************
*** 568,574 ****
  	PyObject *args;
  {
  	char *ptr;
! 	int ntodo, ndone, nnow;
  	
  	if (f->f_fp == NULL)
  		return err_closed();
--- 666,672 ----
  	PyObject *args;
  {
  	char *ptr;
! 	size_t ntodo, ndone, nnow;
  	
  	if (f->f_fp == NULL)
  		return err_closed();
***************
*** 590,596 ****
  		ndone += nnow;
  		ntodo -= nnow;
  	}
! 	return PyInt_FromLong(ndone);
  }
  
  
--- 688,694 ----
  		ndone += nnow;
  		ntodo -= nnow;
  	}
! 	return PyInt_FromLong((long)ndone);
  }
  
  
***************
*** 609,615 ****
  	register FILE *fp;
  	register int c;
  	register char *buf, *end;
! 	int n1, n2;
  	PyObject *v;
  
  	fp = f->f_fp;
--- 707,713 ----
  	register FILE *fp;
  	register int c;
  	register char *buf, *end;
! 	size_t n1, n2;
  	PyObject *v;
  
  	fp = f->f_fp;
***************
*** 648,653 ****
--- 746,756 ----
  				break;
  			n1 = n2;
  			n2 += 1000;
+ 			if (n2 > INT_MAX) {
+ 				PyErr_SetString(PyExc_OverflowError,
+ 					"line is longer than a Python string can hold");
+ 				return NULL;
+ 			}
  			Py_BLOCK_THREADS
  			if (_PyString_Resize(&v, n2) < 0)
  				return NULL;
***************
*** 793,798 ****
--- 896,906 ----
  			/* Need a larger buffer to fit this line */
  			nfilled += nread;
  			buffersize *= 2;
+ 			if (buffersize > INT_MAX) {
+ 				PyErr_SetString(PyExc_OverflowError,
+ 					"line is too long for a Python string");
+ 				goto error;
+ 			}
  			if (big_buffer == NULL) {
  				/* Create the big buffer */
  				big_buffer = PyString_FromStringAndSize(

-- 
Trent Mick
trentm@activestate.com