[Numpy-svn] r3741 - in trunk/numpy/core: include/numpy src tests
numpy-svn at scipy.org
numpy-svn at scipy.org
Thu May 10 14:14:38 EDT 2007
Author: cookedm
Date: 2007-05-10 13:14:29 -0500 (Thu, 10 May 2007)
New Revision: 3741
Modified:
trunk/numpy/core/include/numpy/ndarrayobject.h
trunk/numpy/core/src/arraytypes.inc.src
trunk/numpy/core/src/multiarraymodule.c
trunk/numpy/core/tests/test_multiarray.py
Log:
Improvement of separator handling for fromstring and fromfile.
* fromstring and fromfile should behave identically on text.
* added more test cases for fromstring
* the dtype gets passed to the C code doing the type-specific string
conversions. We don't use it, but someone making their own dtype could.
* separator handling for fromfile is moved out of the type-specific conversion.
I've left the argument in for backwards compatibility; when the API version
is next bumped up, it can be removed.
* separator handling in fromfile is now safe (no fscanf(fp, sep) anymore)
Modified: trunk/numpy/core/include/numpy/ndarrayobject.h
===================================================================
--- trunk/numpy/core/include/numpy/ndarrayobject.h 2007-05-10 17:26:20 UTC (rev 3740)
+++ trunk/numpy/core/include/numpy/ndarrayobject.h 2007-05-10 18:14:29 UTC (rev 3741)
@@ -9,17 +9,18 @@
extern "C" CONFUSE_EMACS
#undef CONFUSE_EMACS
#undef CONFUSE_EMACS2
-/* ... otherwise a semi-smart idententer (like emacs) tries to indent
+/* ... otherwise a semi-smart identer (like emacs) tries to indent
everything when you're typing */
#endif
/* This is auto-generated by the installer */
#include "config.h"
-/* There are several places in the code where an array of dimensions is */
-/* allocated statically. This is the size of that static allocation. */
-/* The array creation itself could have arbitrary dimensions but
- * all the places where static allocation is used would need to
- * be changed to dynamic (including inside of several structures)
+/* There are several places in the code where an array of dimensions is
+ * allocated statically. This is the size of that static allocation.
+ *
+ * The array creation itself could have arbitrary dimensions but
+ * all the places where static allocation is used would need to
+ * be changed to dynamic (including inside of several structures)
*/
#define NPY_MAXDIMS 32
@@ -1004,6 +1005,8 @@
#define PyDimMem_RENEW(ptr,size) \
((npy_intp *)PyArray_realloc(ptr,size*sizeof(npy_intp)))
+/* forward declaration */
+struct _PyArray_Descr;
/* These must deal with unaligned and swapped data if necessary */
typedef PyObject * (PyArray_GetItemFunc) (void *, void *);
@@ -1028,8 +1031,12 @@
typedef void (PyArray_VectorUnaryFunc)(void *, void *, npy_intp, void *,
void *);
-typedef int (PyArray_ScanFunc)(FILE *, void *, void *, void *);
-typedef int (PyArray_FromStrFunc)(char *, void *, char **, void *);
+/* XXX the ignore argument should be removed next time the API version
+ is bumped. It used to be the separator. */
+typedef int (PyArray_ScanFunc)(FILE *fp, void *dptr,
+ char *ignore, struct _PyArray_Descr *);
+typedef int (PyArray_FromStrFunc)(char *s, void *dptr, char **endptr,
+ struct _PyArray_Descr *);
typedef int (PyArray_FillFunc)(void *, npy_intp, void *);
@@ -1157,7 +1164,7 @@
PyDataType_FLAGCHK(dtype, NPY_ITEM_REFCOUNT)
/* Change dtype hasobject to 32-bit in 1.1 and change its name */
-typedef struct {
+typedef struct _PyArray_Descr {
PyObject_HEAD
PyTypeObject *typeobj; /* the type object representing an
instance of this type -- should not
Modified: trunk/numpy/core/src/arraytypes.inc.src
===================================================================
--- trunk/numpy/core/src/arraytypes.inc.src 2007-05-10 17:26:20 UTC (rev 3740)
+++ trunk/numpy/core/src/arraytypes.inc.src 2007-05-10 18:14:29 UTC (rev 3741)
@@ -11,7 +11,7 @@
if (mylong == NULL) return (longlong) -1;
vv = mylong;
}
- else Py_INCREF(vv);
+ else Py_INCREF(vv);
ret = PyLong_AsLongLong(vv);
Py_DECREF(vv);
@@ -867,18 +867,9 @@
/****************** scan *************************************/
-#define _ENDSCAN \
- if (num != 1) { \
- if (num == 0) return -3; \
- if (num == EOF) return -4; \
- return -5; \
- } \
- if (sep != NULL) { \
- num = fscanf(fp, sep); \
- if (num == 0) return 0; \
- if (num == EOF) return -1; \
- } \
- return 0
+/* The first ignore argument is for backwards compatibility.
+ Should be removed when the API version is bumped up.
+ */
/**begin repeat
@@ -887,11 +878,9 @@
#format="hd","hu","d","u","ld","lu",LONGLONG_FMT,ULONGLONG_FMT,"f","lf","Lf"#
*/
static int
- at fname@_scan (FILE *fp, @type@ *ip, char *sep, void *ignore)
+ at fname@_scan (FILE *fp, @type@ *ip, void *ignore, PyArray_Descr *ignore2)
{
- int num;
- num = fscanf(fp, "%"@format@, ip);
- _ENDSCAN;
+ return fscanf(fp, "%"@format@, ip);
}
/**end repeat**/
@@ -903,24 +892,24 @@
#format="d","u"#
*/
static int
- at fname@_scan (FILE *fp, @type@ *ip, char *sep, void *ignore)
+ at fname@_scan (FILE *fp, @type@ *ip, void *ignore, PyArray_Descr *ignore2)
{
@btype@ temp;
int num;
num = fscanf(fp, "%"@format@, &temp);
*ip = (@type@) temp;
- _ENDSCAN;
+ return num;
}
/**end repeat**/
static int
-BOOL_scan (FILE *fp, Bool *ip, char *sep, void *ignore)
+BOOL_scan (FILE *fp, Bool *ip, void *ignore, PyArray_Descr *ignore2)
{
int temp;
int num;
num = fscanf(fp, "%d", &temp);
*ip = (Bool) (temp != 0);
- _ENDSCAN;
+ return num;
}
/**begin repeat
@@ -929,8 +918,6 @@
#define @fname at _scan NULL
/**end repeat**/
-#undef _ENDSCAN
-
/****************** fromstr *************************************/
/**begin repeat
@@ -940,7 +927,7 @@
#btype=(long,ulong)*5#
*/
static int
- at fname@_fromstr(char *str, @type@ *ip, char **endptr, void *ignore)
+ at fname@_fromstr(char *str, @type@ *ip, char **endptr, PyArray_Descr *ignore)
{
@btype@ result;
@@ -956,7 +943,7 @@
*/
#if (PY_VERSION_HEX >= 0x02040000) || defined(PyOS_ascii_strtod)
static int
- at fname@_fromstr(char *str, @type@ *ip, char **endptr, void *ignore)
+ at fname@_fromstr(char *str, @type@ *ip, char **endptr, PyArray_Descr *ignore)
{
double result;
Modified: trunk/numpy/core/src/multiarraymodule.c
===================================================================
--- trunk/numpy/core/src/multiarraymodule.c 2007-05-10 17:26:20 UTC (rev 3740)
+++ trunk/numpy/core/src/multiarraymodule.c 2007-05-10 18:14:29 UTC (rev 3741)
@@ -5828,34 +5828,237 @@
return Py_None;
}
+
+/* Reading from a file or a string.
+
+ As much as possible, we try to use the same code for both files and strings,
+ so the semantics for fromstring and fromfile are the same, especially with
+ regards to the handling of text representations.
+ */
+
+
+typedef int (*next_element)(void **, void *, PyArray_Descr *, void *);
+typedef int (*skip_separator)(void **, const char *, void *);
+
static int
-_skip_sep(char **ptr, char *sep)
+fromstr_next_element(char **s, void *dptr, PyArray_Descr *dtype,
+ const char *end)
{
- char *a;
- int n;
- n = strlen(sep);
- a = *ptr;
- while(*a != '\0' && (strncmp(a, sep, n) != 0))
- a++;
- if (*a == '\0') return -1;
- *ptr = a+strlen(sep);
- return 0;
+ int r = dtype->f->fromstr(*s, dptr, s, dtype);
+ if (end != NULL && *s > end) {
+ return -1;
+ }
+ return r;
}
-/* steals a reference to dtype -- accepts NULL */
-/*OBJECT_API*/
+static int
+fromfile_next_element(FILE **fp, void *dptr, PyArray_Descr *dtype,
+ void *stream_data)
+{
+ /* the NULL argument is for backwards-compatibility */
+ return dtype->f->scanfunc(*fp, dptr, NULL, dtype);
+}
+
+/* Remove multiple whitespace from the separator, and add a space to the
+ beginning and end. This simplifies the separator-skipping code below.
+*/
+static char *
+swab_separator(char *sep)
+{
+ int skip_space = 0;
+ char *s, *start;
+ s = start = malloc(strlen(sep)+3);
+ /* add space to front if there isn't one */
+ if (*sep != '\0' && !isspace(*sep)) {
+ *s = ' '; s++;
+ }
+ while (*sep != '\0') {
+ if (isspace(*sep)) {
+ if (skip_space) {
+ sep++;
+ } else {
+ *s = ' ';
+ s++; sep++;
+ skip_space = 1;
+ }
+ } else {
+ *s = *sep;
+ s++; sep++;
+ skip_space = 0;
+ }
+ }
+ /* add space to end if there isn't one */
+ if (s != start && s[-1] == ' ') {
+ *s = ' ';
+ s++;
+ }
+ *s = '\0';
+ return start;
+}
+
+/* Assuming that the separator is the next bit in the string (file), skip it.
+
+ Single spaces in the separator are matched to arbitrary-long sequences
+ of whitespace in the input.
+
+ If we can't match the separator, return -2.
+ If we hit the end of the string (file), return -1.
+ Otherwise, return 0.
+ */
+
+static int
+fromstr_skip_separator(char **s, const char *sep, const char *end)
+{
+ char *string = *s;
+ int result = 0;
+ while (1) {
+ char c = *string;
+ if (c == '\0' || (end != NULL && string >= end)) {
+ result = -1;
+ break;
+ } else if (*sep == '\0') {
+ /* matched separator */
+ result = 0;
+ break;
+ } else if (*sep == ' ') {
+ if (!isspace(c)) {
+ sep++;
+ continue;
+ }
+ } else if (*sep != c) {
+ result = -2;
+ break;
+ } else {
+ sep++;
+ }
+ string++;
+ }
+ *s = string;
+ return result;
+}
+
+static int
+fromfile_skip_separator(FILE **fp, const char *sep, void *stream_data)
+{
+ int result = 0;
+ while (1) {
+ int c = fgetc(*fp);
+ if (c == EOF) {
+ result = -1;
+ break;
+ } else if (*sep == '\0') {
+ /* matched separator */
+ ungetc(c, *fp);
+ result = 0;
+ break;
+ } else if (*sep == ' ') {
+ if (!isspace(c)) {
+ sep++;
+ ungetc(c, *fp);
+ }
+ } else if (*sep != c) {
+ ungetc(c, *fp);
+ result = -2;
+ break;
+ } else {
+ sep++;
+ }
+ }
+ return result;
+}
+
+/* Create an array by reading from the given stream, using the passed
+ next_element and skip_separator functions.
+ */
+
+#define FROM_BUFFER_SIZE 4096
+static PyArrayObject *
+array_from_text(PyArray_Descr *dtype, intp num, char *sep, size_t *nread,
+ void *stream, next_element next, skip_separator skip_sep,
+ void *stream_data)
+{
+ PyArrayObject *r;
+ intp i;
+ char *dptr, *clean_sep;
+
+ intp thisbuf = 0;
+ intp size;
+ intp bytes, totalbytes;
+
+ size = (num >= 0) ? num : FROM_BUFFER_SIZE;
+
+ r = (PyArrayObject *)
+ PyArray_NewFromDescr(&PyArray_Type,
+ dtype,
+ 1, &size,
+ NULL, NULL,
+ 0, NULL);
+ if (r == NULL) return NULL;
+ clean_sep = swab_separator(sep);
+ NPY_BEGIN_ALLOW_THREADS;
+ totalbytes = bytes = size * dtype->elsize;
+ dptr = r->data;
+ for (i=0; num < 0 || i < num; i++) {
+ if (next(&stream, dptr, dtype, stream_data) < 0)
+ break;
+ *nread += 1;
+ thisbuf += 1;
+ dptr += dtype->elsize;
+ if (num < 0 && thisbuf == size) {
+ totalbytes += bytes;
+ r->data = PyDataMem_RENEW(r->data, totalbytes);
+ dptr = r->data + (totalbytes - bytes);
+ thisbuf = 0;
+ }
+ if (skip_sep(&stream, clean_sep, stream_data) < 0)
+ break;
+ }
+ if (num < 0) {
+ r->data = PyDataMem_RENEW(r->data, (*nread)*dtype->elsize);
+ PyArray_DIM(r,0) = *nread;
+ }
+ NPY_END_ALLOW_THREADS;
+ free(clean_sep);
+ if (PyErr_Occurred()) {
+ Py_DECREF(r);
+ return NULL;
+ }
+ return r;
+}
+#undef FROM_BUFFER_SIZE
+
+/*OBJECT_API
+
+ Given a pointer to a string ``data``, a string length ``slen``, and
+ a ``PyArray_Descr``, return an array corresponding to the data
+ encoded in that string.
+
+ If the dtype is NULL, the default array type is used (double).
+ If non-null, the reference is stolen.
+
+ If ``slen`` is < 0, then the end of string is used for text data.
+ It is an error for ``slen`` to be < 0 for binary data (since embedded NULLs
+ would be the norm).
+
+ The number of elements to read is given as ``num``; if it is < 0, then
+ then as many as possible are read.
+
+ If ``sep`` is NULL or empty, then binary data is assumed, else
+ text data, with ``sep`` as the separator between elements. Whitespace in
+ the separator matches any length of whitespace in the text, and a match
+ for whitespace around the separator is added.
+ */
static PyObject *
PyArray_FromString(char *data, intp slen, PyArray_Descr *dtype,
- intp n, char *sep)
+ intp num, char *sep)
{
int itemsize;
PyArrayObject *ret;
Bool binary;
-
if (dtype == NULL)
dtype=PyArray_DescrFromType(PyArray_DEFAULT);
-
+
if (PyDataType_FLAGCHK(dtype, NPY_ITEM_IS_POINTER)) {
PyErr_SetString(PyExc_ValueError,
"Cannot create an object array from" \
@@ -5874,7 +6077,7 @@
binary = ((sep == NULL) || (strlen(sep) == 0));
if (binary) {
- if (n < 0 ) {
+ if (num < 0 ) {
if (slen % itemsize != 0) {
PyErr_SetString(PyExc_ValueError,
"string size must be a "\
@@ -5882,9 +6085,9 @@
Py_DECREF(dtype);
return NULL;
}
- n = slen/itemsize;
+ num = slen/itemsize;
} else {
- if (slen < n*itemsize) {
+ if (slen < num*itemsize) {
PyErr_SetString(PyExc_ValueError,
"string is smaller than " \
"requested size");
@@ -5893,111 +6096,40 @@
}
}
- if ((ret = (PyArrayObject *)\
- PyArray_NewFromDescr(&PyArray_Type, dtype,
- 1, &n, NULL, NULL,
- 0, NULL)) == NULL)
- return NULL;
- memcpy(ret->data, data, n*dtype->elsize);
- return (PyObject *)ret;
- }
- else { /* read from character-based string */
- char *ptr;
- PyArray_FromStrFunc *fromstr;
- char *dptr;
- intp nread=0;
- intp index;
-
- fromstr = dtype->f->fromstr;
- if (fromstr == NULL) {
+ ret = (PyArrayObject *)
+ PyArray_NewFromDescr(&PyArray_Type, dtype,
+ 1, &num, NULL, NULL,
+ 0, NULL);
+ if (ret == NULL) return NULL;
+ memcpy(ret->data, data, num*dtype->elsize);
+ } else {
+ /* read from character-based string */
+ size_t nread = 0;
+ char *end;
+ if (dtype->f->scanfunc == NULL) {
PyErr_SetString(PyExc_ValueError,
"don't know how to read " \
- "character strings for given " \
+ "character strings with that " \
"array type");
Py_DECREF(dtype);
return NULL;
}
-
- if (n!=-1) {
- ret = (PyArrayObject *) \
- PyArray_NewFromDescr(&PyArray_Type,
- dtype, 1, &n, NULL,
- NULL, 0, NULL);
- if (ret == NULL) return NULL;
- NPY_BEGIN_ALLOW_THREADS
- ptr = data;
- dptr = ret->data;
- for (index=0; index < n; index++) {
- if (fromstr(ptr, dptr, &ptr, ret) < 0)
- break;
- nread += 1;
- dptr += dtype->elsize;
- if (_skip_sep(&ptr, sep) < 0)
- break;
- }
- if (nread < n) {
- fprintf(stderr, "%ld items requested but "\
- "only %ld read\n",
- (long) n, (long) nread);
- ret->data = \
- PyDataMem_RENEW(ret->data,
- nread * \
- ret->descr->elsize);
- PyArray_DIM(ret,0) = nread;
-
- }
- NPY_END_ALLOW_THREADS
+ if (slen < 0) {
+ end = NULL;
+ } else {
+ end = data + slen;
}
- else {
-#define _FILEBUFNUM 4096
- intp thisbuf=0;
- intp size = _FILEBUFNUM;
- intp bytes;
- intp totalbytes;
- char *end;
- int val;
-
- ret = (PyArrayObject *)\
- PyArray_NewFromDescr(&PyArray_Type,
- dtype,
- 1, &size,
- NULL, NULL,
- 0, NULL);
- if (ret==NULL) return NULL;
- NPY_BEGIN_ALLOW_THREADS
- totalbytes = bytes = size * dtype->elsize;
- dptr = ret->data;
- ptr = data;
- end = data+slen;
- while (ptr < end) {
- val = fromstr(ptr, dptr, &ptr, ret);
- if (val < 0) break;
- nread += 1;
- val = _skip_sep(&ptr, sep);
- if (val < 0) break;
- thisbuf += 1;
- dptr += dtype->elsize;
- if (thisbuf == size) {
- totalbytes += bytes;
- ret->data = PyDataMem_RENEW(ret->data,
- totalbytes);
- dptr = ret->data + \
- (totalbytes - bytes);
- thisbuf = 0;
- }
- }
- ret->data = PyDataMem_RENEW(ret->data,
- nread*ret->descr->elsize);
- PyArray_DIM(ret,0) = nread;
-#undef _FILEBUFNUM
- NPY_END_ALLOW_THREADS
- }
+ ret = array_from_text(dtype, num, sep, &nread,
+ data,
+ (next_element) fromstr_next_element,
+ (skip_separator) fromstr_skip_separator,
+ end);
}
return (PyObject *)ret;
}
static PyObject *
-array_fromString(PyObject *ignored, PyObject *args, PyObject *keywds)
+array_fromstring(PyObject *ignored, PyObject *args, PyObject *keywds)
{
char *data;
Py_ssize_t nin=-1;
@@ -6018,6 +6150,148 @@
}
+
+static PyArrayObject *
+array_fromfile_binary(FILE *fp, PyArray_Descr *dtype, intp num, size_t *nread)
+{
+ PyArrayObject *r;
+ intp start, numbytes;
+
+ if (num < 0) {
+ int fail=0;
+ start = (intp )ftell(fp);
+ if (start < 0) fail=1;
+ if (fseek(fp, 0, SEEK_END) < 0) fail=1;
+ numbytes = (intp) ftell(fp);
+ if (numbytes < 0) fail=1;
+ numbytes -= start;
+ if (fseek(fp, start, SEEK_SET) < 0) fail=1;
+ if (fail) {
+ PyErr_SetString(PyExc_IOError,
+ "could not seek in file");
+ Py_DECREF(dtype);
+ return NULL;
+ }
+ num = numbytes / dtype->elsize;
+ }
+ r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
+ dtype,
+ 1, &num,
+ NULL, NULL,
+ 0, NULL);
+ if (r==NULL) return NULL;
+ NPY_BEGIN_ALLOW_THREADS;
+ *nread = fread(r->data, dtype->elsize, num, fp);
+ NPY_END_ALLOW_THREADS;
+ return r;
+}
+
+/*OBJECT_API
+
+ Given a ``FILE *`` pointer ``fp``, and a ``PyArray_Descr``, return an
+ array corresponding to the data encoded in that file.
+
+ If the dtype is NULL, the default array type is used (double).
+ If non-null, the reference is stolen.
+
+ The number of elements to read is given as ``num``; if it is < 0, then
+ then as many as possible are read.
+
+ If ``sep`` is NULL or empty, then binary data is assumed, else
+ text data, with ``sep`` as the separator between elements. Whitespace in
+ the separator matches any length of whitespace in the text, and a match
+ for whitespace around the separator is added.
+
+ For memory-mapped files, use the buffer interface. No more data than
+ necessary is read by this routine.
+*/
+static PyObject *
+PyArray_FromFile(FILE *fp, PyArray_Descr *dtype, intp num, char *sep)
+{
+ PyArrayObject *ret;
+ size_t nread = 0;
+
+ if (PyDataType_REFCHK(dtype)) {
+ PyErr_SetString(PyExc_ValueError,
+ "cannot read into object array");
+ Py_DECREF(dtype);
+ return NULL;
+ }
+ if (dtype->elsize == 0) {
+ PyErr_SetString(PyExc_ValueError, "0-sized elements.");
+ Py_DECREF(dtype);
+ return NULL;
+ }
+
+ if ((sep == NULL) || (strlen(sep) == 0)) {
+ ret = array_fromfile_binary(fp, dtype, num, &nread);
+ } else {
+ if (dtype->f->scanfunc == NULL) {
+ PyErr_SetString(PyExc_ValueError,
+ "don't know how to read " \
+ "character files with that " \
+ "array type");
+ Py_DECREF(dtype);
+ return NULL;
+ }
+ ret = array_from_text(dtype, num, sep, &nread,
+ fp,
+ (next_element) fromfile_next_element,
+ (skip_separator) fromfile_skip_separator,
+ NULL);
+ }
+ if (((intp) nread) < num) {
+ fprintf(stderr, "%ld items requested but only %ld read\n",
+ (long) num, (long) nread);
+ ret->data = PyDataMem_RENEW(ret->data,
+ nread * ret->descr->elsize);
+ PyArray_DIM(ret,0) = nread;
+ }
+ return (PyObject *)ret;
+}
+
+static PyObject *
+array_fromfile(PyObject *ignored, PyObject *args, PyObject *keywds)
+{
+ PyObject *file=NULL, *ret;
+ FILE *fp;
+ char *sep="";
+ Py_ssize_t nin=-1;
+ static char *kwlist[] = {"file", "dtype", "count", "sep", NULL};
+ PyArray_Descr *type=NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, keywds,
+ "O|O&" NPY_SSIZE_T_PYFMT "s",
+ kwlist,
+ &file,
+ PyArray_DescrConverter, &type,
+ &nin, &sep)) {
+ return NULL;
+ }
+
+ if (type == NULL) type = PyArray_DescrFromType(PyArray_DEFAULT);
+
+ if (PyString_Check(file) || PyUnicode_Check(file)) {
+ file = PyObject_CallFunction((PyObject *)&PyFile_Type,
+ "Os", file, "rb");
+ if (file==NULL) return NULL;
+ }
+ else {
+ Py_INCREF(file);
+ }
+ fp = PyFile_AsFile(file);
+ if (fp == NULL) {
+ PyErr_SetString(PyExc_IOError,
+ "first argument must be an open file");
+ Py_DECREF(file);
+ return NULL;
+ }
+ ret = PyArray_FromFile(fp, type, (intp) nin, sep);
+ Py_DECREF(file);
+ return ret;
+}
+
+
/* steals a reference to dtype (which cannot be NULL) */
/*OBJECT_API */
static PyObject *
@@ -6108,7 +6382,7 @@
}
static PyObject *
-array_fromIter(PyObject *ignored, PyObject *args, PyObject *keywds)
+array_fromiter(PyObject *ignored, PyObject *args, PyObject *keywds)
{
PyObject *iter;
Py_ssize_t nin=-1;
@@ -6128,210 +6402,8 @@
}
-
-
-/* This needs an open file object and reads it in directly.
- memory-mapped files handled differently through buffer interface.
-
-file pointer number in resulting 1d array
-(can easily reshape later, -1 for to end of file)
-type of array
-sep is a separator string for character-based data (or NULL for binary)
- " " means whitespace
-*/
-
/*OBJECT_API*/
static PyObject *
-PyArray_FromFile(FILE *fp, PyArray_Descr *typecode, intp num, char *sep)
-{
- PyArrayObject *r;
- size_t nread = 0;
- PyArray_ScanFunc *scan;
- Bool binary;
-
- if (PyDataType_REFCHK(typecode)) {
- PyErr_SetString(PyExc_ValueError, "cannot read into"
- "object array");
- Py_DECREF(typecode);
- return NULL;
- }
- if (typecode->elsize == 0) {
- PyErr_SetString(PyExc_ValueError, "0-sized elements.");
- Py_DECREF(typecode);
- return NULL;
- }
-
- binary = ((sep == NULL) || (strlen(sep) == 0));
- if (num == -1 && binary) { /* Get size for binary file*/
- intp start, numbytes;
- int fail=0;
- start = (intp )ftell(fp);
- if (start < 0) fail=1;
- if (fseek(fp, 0, SEEK_END) < 0) fail=1;
- numbytes = (intp) ftell(fp);
- if (numbytes < 0) fail=1;
- numbytes -= start;
- if (fseek(fp, start, SEEK_SET) < 0) fail=1;
- if (fail) {
- PyErr_SetString(PyExc_IOError,
- "could not seek in file");
- Py_DECREF(typecode);
- return NULL;
- }
- num = numbytes / typecode->elsize;
- }
-
- if (binary) { /* binary data */
- r = (PyArrayObject *)PyArray_NewFromDescr(&PyArray_Type,
- typecode,
- 1, &num,
- NULL, NULL,
- 0, NULL);
- if (r==NULL) return NULL;
- NPY_BEGIN_ALLOW_THREADS
- nread = fread(r->data, typecode->elsize, num, fp);
- NPY_END_ALLOW_THREADS
- }
- else { /* character reading */
- intp i;
- char *dptr;
- int done=0;
-
- scan = typecode->f->scanfunc;
- if (scan == NULL) {
- PyErr_SetString(PyExc_ValueError,
- "don't know how to read " \
- "character files with that " \
- "array type");
- Py_DECREF(typecode);
- return NULL;
- }
-
- if (num != -1) { /* number to read is known */
- r = (PyArrayObject *)\
- PyArray_NewFromDescr(&PyArray_Type,
- typecode,
- 1, &num,
- NULL, NULL,
- 0, NULL);
- if (r==NULL) return NULL;
- NPY_BEGIN_ALLOW_THREADS
- dptr = r->data;
- for (i=0; i < num; i++) {
- if (done) break;
- done = scan(fp, dptr, sep, NULL);
- if (done < -2) break;
- nread += 1;
- dptr += r->descr->elsize;
- }
- NPY_END_ALLOW_THREADS
- if (PyErr_Occurred()) {
- Py_DECREF(r);
- return NULL;
- }
- }
- else { /* we have to watch for the end of the file and
- reallocate at the end */
-#define _FILEBUFNUM 4096
- intp thisbuf=0;
- intp size = _FILEBUFNUM;
- intp bytes;
- intp totalbytes;
-
- r = (PyArrayObject *)\
- PyArray_NewFromDescr(&PyArray_Type,
- typecode,
- 1, &size,
- NULL, NULL,
- 0, NULL);
- if (r==NULL) return NULL;
- NPY_BEGIN_ALLOW_THREADS
- totalbytes = bytes = size * typecode->elsize;
- dptr = r->data;
- while (!done) {
- done = scan(fp, dptr, sep, NULL);
-
- /* end of file reached trying to
- scan value. done is 1 or 2
- if end of file reached trying to
- scan separator. Still good value.
- */
- if (done < -2) break;
- thisbuf += 1;
- nread += 1;
- dptr += r->descr->elsize;
- if (!done && thisbuf == size) {
- totalbytes += bytes;
- r->data = PyDataMem_RENEW(r->data,
- totalbytes);
- dptr = r->data + (totalbytes - bytes);
- thisbuf = 0;
- }
- }
- r->data = PyDataMem_RENEW(r->data, nread*r->descr->elsize);
- PyArray_DIM(r,0) = nread;
- num = nread;
- NPY_END_ALLOW_THREADS
-#undef _FILEBUFNUM
- }
- if (PyErr_Occurred()) {
- Py_DECREF(r);
- return NULL;
- }
-
- }
- if (((intp) nread) < num) {
- fprintf(stderr, "%ld items requested but only %ld read\n",
- (long) num, (long) nread);
- r->data = PyDataMem_RENEW(r->data, nread * r->descr->elsize);
- PyArray_DIM(r,0) = nread;
- }
- return (PyObject *)r;
-}
-
-static PyObject *
-array_fromfile(PyObject *ignored, PyObject *args, PyObject *keywds)
-{
- PyObject *file=NULL, *ret;
- FILE *fp;
- char *sep="";
- Py_ssize_t nin=-1;
- static char *kwlist[] = {"file", "dtype", "count", "sep", NULL};
- PyArray_Descr *type=NULL;
-
- if (!PyArg_ParseTupleAndKeywords(args, keywds,
- "O|O&" NPY_SSIZE_T_PYFMT "s",
- kwlist,
- &file,
- PyArray_DescrConverter, &type,
- &nin, &sep)) {
- return NULL;
- }
-
- if (type == NULL) type = PyArray_DescrFromType(PyArray_DEFAULT);
-
- if (PyString_Check(file) || PyUnicode_Check(file)) {
- file = PyObject_CallFunction((PyObject *)&PyFile_Type,
- "Os", file, "rb");
- if (file==NULL) return NULL;
- }
- else {
- Py_INCREF(file);
- }
- fp = PyFile_AsFile(file);
- if (fp == NULL) {
- PyErr_SetString(PyExc_IOError,
- "first argument must be an open file");
- Py_DECREF(file);
- return NULL;
- }
- ret = PyArray_FromFile(fp, type, (intp) nin, sep);
- Py_DECREF(file);
- return ret;
-}
-
-/*OBJECT_API*/
-static PyObject *
PyArray_FromBuffer(PyObject *buf, PyArray_Descr *type,
intp count, intp offset)
{
@@ -7213,9 +7285,9 @@
METH_VARARGS | METH_KEYWORDS, NULL},
{"putmask", (PyCFunction)array_putmask,
METH_VARARGS | METH_KEYWORDS, NULL},
- {"fromstring",(PyCFunction)array_fromString,
+ {"fromstring",(PyCFunction)array_fromstring,
METH_VARARGS|METH_KEYWORDS, NULL},
- {"fromiter",(PyCFunction)array_fromIter,
+ {"fromiter",(PyCFunction)array_fromiter,
METH_VARARGS|METH_KEYWORDS, NULL},
{"concatenate", (PyCFunction)array_concatenate,
METH_VARARGS|METH_KEYWORDS, NULL},
Modified: trunk/numpy/core/tests/test_multiarray.py
===================================================================
--- trunk/numpy/core/tests/test_multiarray.py 2007-05-10 17:26:20 UTC (rev 3740)
+++ trunk/numpy/core/tests/test_multiarray.py 2007-05-10 18:14:29 UTC (rev 3741)
@@ -116,9 +116,28 @@
a = fromstring('\x00\x00\x80?\x00\x00\x00@\x00\x00@@\x00\x00\x80@',dtype='<f4')
assert_array_equal(a, array([1,2,3,4]))
+ def check_string(self):
+ a = fromstring('1,2,3,4', sep=',')
+ assert_array_equal(a, [1., 2., 3., 4.])
+
+ def check_counted_string(self):
+ a = fromstring('1,2,3,4', count=4, sep=',')
+ assert_array_equal(a, [1., 2., 3., 4.])
+ a = fromstring('1,2,3,4', count=3, sep=',')
+ assert_array_equal(a, [1., 2., 3.])
+
+ def check_string_with_ws(self):
+ a = fromstring('1 2 3 4 ', dtype=int, sep=' ')
+ assert_array_equal(a, [1, 2, 3, 4])
+
+ def check_counted_string_with_ws(self):
+ a = fromstring('1 2 3 4 ', count=3, dtype=int, sep=' ')
+ assert_array_equal(a, [1, 2, 3])
+
def check_ascii(self):
- a = fromstring('1 , 2 , 3 , 4',sep=',')
- b = fromstring('1,2,3,4',dtype=float,sep=',')
+ a = fromstring('1 , 2 , 3 , 4', sep=',')
+ b = fromstring('1,2,3,4', dtype=float, sep=',')
+ assert_array_equal(a, [1.,2.,3.,4.])
assert_array_equal(a,b)
class test_zero_rank(NumpyTestCase):
More information about the Numpy-svn
mailing list