[Python-checkins] python/dist/src/Objects fileobject.c,2.157,2.158

Sun, 14 Apr 2002 13:12:43 -0700

Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv1748/Objects

Modified Files:
	fileobject.c 
Log Message:
Mass checkin of universal newline support.
Highlights: import and friends will understand any of \r, \n and \r\n
as end of line. Python file input will do the same if you use mode 'U'.
Everything can be disabled by configuring with --without-universal-newlines.

See PEP278 for details. 

Index: fileobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.157
retrieving revision 2.158
diff -C2 -d -r2.157 -r2.158
*** fileobject.c	12 Apr 2002 02:43:31 -0000	2.157
--- fileobject.c	14 Apr 2002 20:12:40 -0000	2.158
***************
*** 38,41 ****
--- 38,58 ----
  #endif

+ #ifdef HAVE_GETC_UNLOCKED
+ #define GETC(f) getc_unlocked(f)
+ #define FLOCKFILE(f) flockfile(f)
+ #define FUNLOCKFILE(f) funlockfile(f)
+ #else
+ #define GETC(f) getc(f)
+ #define FLOCKFILE(f)
+ #define FUNLOCKFILE(f)
+ #endif
+ 
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ /* Bits in f_newlinetypes */
+ #define NEWLINE_UNKNOWN	0	/* No newline seen, yet */
+ #define NEWLINE_CR 1		/* \r newline seen */
+ #define NEWLINE_LF 2		/* \n newline seen */
+ #define NEWLINE_CRLF 4		/* \r\n newline seen */
+ #endif

  FILE *
***************
*** 100,103 ****
--- 117,125 ----
  	f->f_softspace = 0;
  	f->f_binary = strchr(mode,'b') != NULL;
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 	f->f_univ_newline = (strchr(mode, 'U') != NULL);
+ 	f->f_newlinetypes = NEWLINE_UNKNOWN;
+ 	f->f_skipnextlf = 0;
+ #endif

  	if (f->f_name == NULL || f->f_mode == NULL)
***************
*** 135,138 ****
--- 157,171 ----
  	{
  		Py_BEGIN_ALLOW_THREADS
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 		if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
+ 			mode = "rb";
+ #else
+ 		/* Compatibility: specifying U in a Python without universal
+ 		** newlines is allowed, and the file is opened as a normal text
+ 		** file.
+ 		*/
+ 		if (strcmp(mode, "U") == 0 || strcmp(mode, "rU") == 0)
+ 			mode = "r";
+ #endif
  		f->f_fp = fopen(name, mode);
  		Py_END_ALLOW_THREADS
***************
*** 395,398 ****
--- 428,434 ----
  		return NULL;
  	}
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 	f->f_skipnextlf = 0;
+ #endif
  	Py_INCREF(Py_None);
  	return Py_None;
***************
*** 535,538 ****
--- 571,584 ----
  		return NULL;
  	}
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 	if (f->f_skipnextlf) {
+ 		int c;
+ 		c = GETC(f->f_fp);
+ 		if (c == '\n') {
+ 			pos++;
+ 			f->f_skipnextlf = 0;
+ 		} else if (c != EOF) ungetc(c, f->f_fp);
+ 	}
+ #endif
  #if !defined(HAVE_LARGEFILE_SUPPORT)
  	return PyInt_FromLong(pos);
***************
*** 666,671 ****
  		Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! 		chunksize = fread(BUF(v) + bytesread, 1,
! 				  buffersize - bytesread, f->f_fp);
  		Py_END_ALLOW_THREADS
  		if (chunksize == 0) {
--- 712,717 ----
  		Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! 		chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
! 				  buffersize - bytesread, f->f_fp, (PyObject *)f);
  		Py_END_ALLOW_THREADS
  		if (chunksize == 0) {
***************
*** 706,710 ****
  		Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! 		nnow = fread(ptr+ndone, 1, ntodo, f->f_fp);
  		Py_END_ALLOW_THREADS
  		if (nnow == 0) {
--- 752,756 ----
  		Py_BEGIN_ALLOW_THREADS
  		errno = 0;
! 		nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp, (PyObject *)f);
  		Py_END_ALLOW_THREADS
  		if (nnow == 0) {
***************
*** 935,948 ****
  */

- #ifdef HAVE_GETC_UNLOCKED
- #define GETC(f) getc_unlocked(f)
- #define FLOCKFILE(f) flockfile(f)
- #define FUNLOCKFILE(f) funlockfile(f)
- #else
- #define GETC(f) getc(f)
- #define FLOCKFILE(f)
- #define FUNLOCKFILE(f)
- #endif
- 
  static PyObject *
  get_line(PyFileObject *f, int n)
--- 981,984 ----
***************
*** 955,961 ****
  	size_t increment;       /* amount to increment the buffer */
  	PyObject *v;

! #ifdef USE_FGETS_IN_GETLINE
  	if (n <= 0)
  		return getline_via_fgets(fp);
  #endif
--- 991,1006 ----
  	size_t increment;       /* amount to increment the buffer */
  	PyObject *v;
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 	int newlinetypes = f->f_newlinetypes;
+ 	int skipnextlf = f->f_skipnextlf;
+ 	int univ_newline = f->f_univ_newline;
+ #endif

! #if defined(USE_FGETS_IN_GETLINE)
! #ifdef WITH_UNIVERSAL_NEWLINES
! 	if (n <= 0 && !univ_newline )
! #else
  	if (n <= 0)
+ #endif
  		return getline_via_fgets(fp);
  #endif
***************
*** 970,973 ****
--- 1015,1047 ----
  		Py_BEGIN_ALLOW_THREADS
  		FLOCKFILE(fp);
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 		if (univ_newline) {
+ 			c = 'x'; /* Shut up gcc warning */
+ 			while ( buf != end && (c = GETC(fp)) != EOF ) {
+ 				if (skipnextlf ) {
+ 					skipnextlf = 0;
+ 					if (c == '\n') {
+ 						/* Seeing a \n here with skipnextlf true
+ 						** means we saw a \r before.
+ 						*/
+ 						newlinetypes |= NEWLINE_CRLF;
+ 						c = GETC(fp);
+ 						if (c == EOF) break;
+ 					} else {
+ 						newlinetypes |= NEWLINE_CR;
+ 					}
+ 				}
+ 				if (c == '\r') {
+ 					skipnextlf = 1;
+ 					c = '\n';
+ 				} else if ( c == '\n')
+ 					newlinetypes |= NEWLINE_LF;
+ 				*buf++ = c;
+ 				if (c == '\n') break;
+ 			}
+ 			if ( c == EOF && skipnextlf )
+ 				newlinetypes |= NEWLINE_CR;
+ 		} else /* If not universal newlines use the normal loop */
+ #endif
  		while ((c = GETC(fp)) != EOF &&
  		       (*buf++ = c) != '\n' &&
***************
*** 976,979 ****
--- 1050,1057 ----
  		FUNLOCKFILE(fp);
  		Py_END_ALLOW_THREADS
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 		f->f_newlinetypes = newlinetypes;
+ 		f->f_skipnextlf = skipnextlf;
+ #endif
  		if (c == '\n')
  			break;
***************
*** 1151,1156 ****
  			Py_BEGIN_ALLOW_THREADS
  			errno = 0;
! 			nread = fread(buffer+nfilled, 1,
! 				      buffersize-nfilled, f->f_fp);
  			Py_END_ALLOW_THREADS
  			shortread = (nread < buffersize-nfilled);
--- 1229,1234 ----
  			Py_BEGIN_ALLOW_THREADS
  			errno = 0;
! 			nread = Py_UniversalNewlineFread(buffer+nfilled, 
! 				buffersize-nfilled, f->f_fp, (PyObject *)f);
  			Py_END_ALLOW_THREADS
  			shortread = (nread < buffersize-nfilled);
***************
*** 1189,1193 ****
  			else {
  				/* Grow the big buffer */
! 				_PyString_Resize(&big_buffer, buffersize);
  				buffer = PyString_AS_STRING(big_buffer);
  			}
--- 1267,1272 ----
  			else {
  				/* Grow the big buffer */
! 				if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
! 					goto error;
  				buffer = PyString_AS_STRING(big_buffer);
  			}
***************
*** 1504,1510 ****
--- 1583,1620 ----
  	return PyBool_FromLong((long)(f->f_fp == 0));
  }
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ static PyObject *
+ get_newlines(PyFileObject *f, void *closure)
+ {
+ 	switch (f->f_newlinetypes) {
+ 	case NEWLINE_UNKNOWN:
+ 		Py_INCREF(Py_None);
+ 		return Py_None;
+ 	case NEWLINE_CR:
+ 		return PyString_FromString("\r");
+ 	case NEWLINE_LF:
+ 		return PyString_FromString("\n");
+ 	case NEWLINE_CR|NEWLINE_LF:
+ 		return Py_BuildValue("(ss)", "\r", "\n");
+ 	case NEWLINE_CRLF:
+ 		return PyString_FromString("\r\n");
+ 	case NEWLINE_CR|NEWLINE_CRLF:
+ 		return Py_BuildValue("(ss)", "\r", "\r\n");
+ 	case NEWLINE_LF|NEWLINE_CRLF:
+ 		return Py_BuildValue("(ss)", "\n", "\r\n");
+ 	case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
+ 		return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
+ 	default:
+ 		PyErr_Format(PyExc_SystemError, "Unknown newlines value 0x%x\n", f->f_newlinetypes);
+ 		return NULL;
+ 	}
+ }
+ #endif

  static PyGetSetDef file_getsetlist[] = {
  	{"closed", (getter)get_closed, NULL, "True if the file is closed"},
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ 	{"newlines", (getter)get_newlines, NULL, "end-of-line convention used in this file"},
+ #endif
  	{0},
  };
***************
*** 1806,1807 ****
--- 1916,2084 ----
  	return fd;
  }
+ 
+ #ifdef WITH_UNIVERSAL_NEWLINES
+ /* From here on we need access to the real fgets and fread */
+ #undef fgets
+ #undef fread
+ 
+ /*
+ ** Py_UniversalNewlineFgets is an fgets variation that understands
+ ** all of \r, \n and \r\n conventions.
+ ** The stream should be opened in binary mode.
+ ** If fobj is NULL the routine always does newline conversion, and
+ ** it may peek one char ahead to gobble the second char in \r\n.
+ ** If fobj is non-NULL it must be a PyFileObject. In this case there
+ ** is no readahead but in stead a flag is used to skip a following
+ ** \n on the next read. Also, if the file is open in binary mode
+ ** the whole conversion is skipped. Finally, the routine keeps track of
+ ** the different types of newlines seen.
+ ** Note that we need no error handling: fgets() treats error and eof
+ ** identically.
+ */
+ char *
+ Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
+ {
+ 	char *p = buf;
+ 	int c;
+ 	int newlinetypes = 0;
+ 	int skipnextlf = 0;
+ 	int univ_newline = 1;
+ 	
+ 	if (fobj) {
+ 		if (!PyFile_Check(fobj)) {
+ 			errno = ENXIO;	/* What can you do... */
+ 			return NULL;
+ 		}
+ 		univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
+ 		if ( !univ_newline )
+ 			return fgets(buf, n, stream);
+ 		newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
+ 		skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
+ 	}
+ 	FLOCKFILE(stream);
+ 	c = 'x'; /* Shut up gcc warning */
+ 	while (--n > 0 && (c = GETC(stream)) != EOF ) {
+ 		if (skipnextlf ) {
+ 			skipnextlf = 0;
+ 			if (c == '\n') {
+ 				/* Seeing a \n here with skipnextlf true
+ 				** means we saw a \r before.
+ 				*/
+ 				newlinetypes |= NEWLINE_CRLF;
+ 				c = GETC(stream);
+ 				if (c == EOF) break;
+ 			} else {
+ 				/*
+ 				** Note that c == EOF also brings us here,
+ 				** so we're okay if the last char in the file
+ 				** is a CR.
+ 				*/
+ 				newlinetypes |= NEWLINE_CR;
+ 			}
+ 		}
+ 		if (c == '\r') {
+ 			/* A \r is translated into a \n, and we skip
+ 			** an adjacent \n, if any. We don't set the
+ 			** newlinetypes flag until we've seen the next char.
+ 			*/
+ 			skipnextlf = 1;
+ 			c = '\n';
+ 		} else if ( c == '\n') {
+ 			newlinetypes |= NEWLINE_LF;
+ 		}
+ 		*p++ = c;
+ 		if (c == '\n') break;
+ 	}
+ 	if ( c == EOF && skipnextlf )
+ 		newlinetypes |= NEWLINE_CR;
+ 	FUNLOCKFILE(stream);
+ 	*p = '\0';
+ 	if (fobj) {
+ 		((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
+ 		((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
+ 	} else if ( skipnextlf ) {
+ 		/* If we have no file object we cannot save the
+ 		** skipnextlf flag. We have to readahead, which
+ 		** will cause a pause if we're reading from an
+ 		** interactive stream, but that is very unlikely
+ 		** unless we're doing something silly like
+ 		** execfile("/dev/tty").
+ 		*/
+ 		c = GETC(stream);
+ 		if ( c != '\n' )
+ 			ungetc(c, stream);
+ 	}
+ 	if (p == buf)
+ 		return NULL;
+ 	return buf;
+ }
+ 
+ /*
+ ** Py_UniversalNewlineFread is an fread variation that understands
+ ** all of \r, \n and \r\n conventions.
+ ** The stream should be opened in binary mode.
+ ** fobj must be a PyFileObject. In this case there
+ ** is no readahead but in stead a flag is used to skip a following
+ ** \n on the next read. Also, if the file is open in binary mode
+ ** the whole conversion is skipped. Finally, the routine keeps track of
+ ** the different types of newlines seen.
+ */
+ size_t
+ Py_UniversalNewlineFread(void *buf, size_t n,
+ 			 FILE *stream, PyObject *fobj)
+ {
+ 	char *src = buf, *dst = buf, c;
+ 	int nread, ntodo=n;
+ 	int newlinetypes, skipnextlf, univ_newline;
+ 	
+ 	if (!fobj || !PyFile_Check(fobj)) {
+ 		errno = ENXIO;	/* What can you do... */
+ 		return -1;
+ 	}
+ 	univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
+ 	if ( !univ_newline )
+ 		return fread(buf, 1, n, stream);
+ 	newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
+ 	skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
+ 	while (ntodo > 0) {
+ 		if (ferror(stream))
+ 			break;
+ 		nread = fread(dst, 1, ntodo, stream);
+ 		src = dst;
+ 		if (nread <= 0) {
+ 			if (skipnextlf)
+ 				newlinetypes |= NEWLINE_CR;
+ 			break;
+ 		}
+ 		ntodo -= nread;
+ 		while ( nread-- ) {
+ 			c = *src++;
+ 			if (c == '\r') {
+ 				/* Save CR as LF and set flag to skip next newline
+ 				*/
+ 				*dst++ = '\n';
+ 				skipnextlf = 1;
+ 			} else if (skipnextlf && c == '\n') {
+ 				/* Skip an LF, and remember that we saw CR LF
+ 				*/
+ 				skipnextlf = 0;
+ 				newlinetypes |= NEWLINE_CRLF;
+ 			} else {
+ 				/* Normal char to be stored in buffer. Also update
+ 				** the newlinetypes flag if either this is an LF
+ 				** or the previous char was a CR.
+ 				*/
+ 				if (c == '\n')
+ 					newlinetypes |= NEWLINE_LF;
+ 				else if (skipnextlf)
+ 					newlinetypes |= NEWLINE_CR;
+ 				*dst++ = c;
+ 				skipnextlf = 0;
+ 			}
+ 		}
+ 	}
+ 	((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
+ 	((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
+ 	return dst - (char *)buf;
+ }
+ #endif