[Python-checkins] python/nondist/sandbox/csv _csv.c,1.2,1.3
davecole@users.sourceforge.net
davecole@users.sourceforge.net
Thu, 30 Jan 2003 05:16:09 -0800
Update of /cvsroot/python/python/nondist/sandbox/csv
In directory sc8-pr-cvs1:/tmp/cvs-serv24039
Modified Files:
_csv.c
Log Message:
* More formatting changes to bring code closer to the Guido style.
* Changed all internal parser settings to match those in the PEP.
* Added PEP settings to allow _csv use by csv.py - new parameters
are not handled yet (skipinitialspace, lineterminator, quoting).
* Removed overloading of quotechar and escapechar values by introducing
have_quotechar and have_escapechar attributes.
Barest minimum of testing has been done.
Index: _csv.c
===================================================================
RCS file: /cvsroot/python/python/nondist/sandbox/csv/_csv.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -C2 -d -r1.2 -r1.3
*** _csv.c 29 Jan 2003 11:21:34 -0000 1.2
--- _csv.c 30 Jan 2003 13:16:06 -0000 1.3
***************
*** 1,2 ****
--- 1,17 ----
+ /* TODO:
+ + Add reader() and writer() functions which return CSV
+ reader/writer objects which implement the PEP interface:
+
+ csvreader = csv.reader(file("blah.csv", "rb"), kwargs)
+ for row in csvreader:
+ process(row)
+
+ csvwriter = csv.writer(file("some.csv", "wb"), kwargs)
+ for row in someiter:
+ csvwriter.write(row)
+
+ + Add CsvWriter.writelines(someiter)
+ */
+
#include "Python.h"
#include "structmember.h"
***************
*** 9,24 ****
} ParserState;
typedef struct {
PyObject_HEAD
! int ms_double_quote; /* is " represented by ""? */
! char field_sep; /* field separator */
! char quote_char; /* quote character */
! char escape_char; /* escape character */
ParserState state; /* current CSV parse state */
PyObject *fields; /* field list for current record */
! int auto_clear; /* should fields be cleared on next
parse() after exception? */
int strict; /* raise exception on bad CSV */
--- 24,48 ----
} ParserState;
+ typedef enum {
+ QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC
+ } QuoteStyle;
+
typedef struct {
PyObject_HEAD
! int doublequote; /* is " represented by ""? */
! char delimiter; /* field separator */
! int have_quotechar; /* is a quotechar defined */
! char quotechar; /* quote character */
! int have_escapechar; /* is an escapechar defined */
! char escapechar; /* escape character */
! int skipinitialspace; /* ignore spaces following delimiter? */
! PyObject *lineterminator; /* string to write between records */
! QuoteStyle quoting; /* style of quoting to write */
ParserState state; /* current CSV parse state */
PyObject *fields; /* field list for current record */
! int autoclear; /* should fields be cleared on next
parse() after exception? */
int strict; /* raise exception on bad CSV */
***************
*** 78,82 ****
self->field_size = 4096;
self->field = PyMem_Malloc(self->field_size);
! } else {
self->field_size *= 2;
self->field = PyMem_Realloc(self->field, self->field_size);
--- 102,107 ----
self->field_size = 4096;
self->field = PyMem_Malloc(self->field_size);
! }
! else {
self->field_size *= 2;
self->field = PyMem_Realloc(self->field, self->field_size);
***************
*** 125,138 ****
parse_save_field(self);
self->state = START_RECORD;
! } else if (c == self->quote_char) {
/* start quoted field */
self->state = IN_QUOTED_FIELD;
! } else if (c == self->escape_char) {
/* possible escaped character */
self->state = ESCAPED_CHAR;
! } else if (c == self->field_sep) {
/* save empty field */
parse_save_field(self);
! } else {
/* begin new unquoted field */
parse_add_char(self, c);
--- 150,167 ----
parse_save_field(self);
self->state = START_RECORD;
! }
! else if (c == self->quotechar) {
/* start quoted field */
self->state = IN_QUOTED_FIELD;
! }
! else if (c == self->escapechar) {
/* possible escaped character */
self->state = ESCAPED_CHAR;
! }
! else if (c == self->delimiter) {
/* save empty field */
parse_save_field(self);
! }
! else {
/* begin new unquoted field */
parse_add_char(self, c);
***************
*** 142,148 ****
case ESCAPED_CHAR:
! if (c != self->escape_char && c != self->field_sep &&
! c != self->quote_char)
! parse_add_char(self, self->escape_char);
parse_add_char(self, c);
self->state = IN_FIELD;
--- 171,177 ----
case ESCAPED_CHAR:
! if (c != self->escapechar && c != self->delimiter &&
! c != self->quotechar)
! parse_add_char(self, self->escapechar);
parse_add_char(self, c);
self->state = IN_FIELD;
***************
*** 155,166 ****
parse_save_field(self);
self->state = START_RECORD;
! } else if (c == self->escape_char) {
/* possible escaped character */
self->state = ESCAPED_CHAR;
! } else if (c == self->field_sep) {
/* save field - wait for new field */
parse_save_field(self);
self->state = START_FIELD;
! } else {
/* normal character - save in field */
parse_add_char(self, c);
--- 184,198 ----
parse_save_field(self);
self->state = START_RECORD;
! }
! else if (c == self->escapechar) {
/* possible escaped character */
self->state = ESCAPED_CHAR;
! }
! else if (c == self->delimiter) {
/* save field - wait for new field */
parse_save_field(self);
self->state = START_FIELD;
! }
! else {
/* normal character - save in field */
parse_add_char(self, c);
***************
*** 173,188 ****
/* end of line - save '\n' in field */
parse_add_char(self, '\n');
! } else if (c == self->escape_char) {
/* Possible escape character */
self->state = ESCAPE_IN_QUOTED_FIELD;
! } else if (c == self->quote_char) {
! if (self->ms_double_quote) {
/* microsoft style double quotes; " represented by "" */
self->state = QUOTE_IN_QUOTED_FIELD;
! } else {
/* end of quote part of field */
self->state = IN_FIELD;
}
! } else {
/* normal character - save in field */
parse_add_char(self, c);
--- 205,224 ----
/* end of line - save '\n' in field */
parse_add_char(self, '\n');
! }
! else if (c == self->escapechar) {
/* Possible escape character */
self->state = ESCAPE_IN_QUOTED_FIELD;
! }
! else if (c == self->quotechar) {
! if (self->doublequote) {
/* microsoft style double quotes; " represented by "" */
self->state = QUOTE_IN_QUOTED_FIELD;
! }
! else {
/* end of quote part of field */
self->state = IN_FIELD;
}
! }
! else {
/* normal character - save in field */
parse_add_char(self, c);
***************
*** 191,197 ****
case ESCAPE_IN_QUOTED_FIELD:
! if (c != self->escape_char && c != self->field_sep &&
! c != self->quote_char)
! parse_add_char(self, self->escape_char);
parse_add_char(self, c);
self->state = IN_QUOTED_FIELD;
--- 227,233 ----
case ESCAPE_IN_QUOTED_FIELD:
! if (c != self->escapechar && c != self->delimiter &&
! c != self->quotechar)
! parse_add_char(self, self->escapechar);
parse_add_char(self, c);
self->state = IN_QUOTED_FIELD;
***************
*** 200,223 ****
case QUOTE_IN_QUOTED_FIELD:
/* microsoft double quotes - seen a quote in an quoted field */
! if (self->quote_char && c == self->quote_char) {
/* save "" as " */
parse_add_char(self, c);
self->state = IN_QUOTED_FIELD;
! } else if (c == self->field_sep) {
/* save field - wait for new field */
parse_save_field(self);
self->state = START_FIELD;
! } else if (c == '\0') {
/* end of line - return [fields] */
parse_save_field(self);
self->state = START_RECORD;
! } else if (!self->strict) {
parse_add_char(self, c);
self->state = IN_FIELD;
! } else {
/* illegal */
self->had_parse_error = 1;
raise_exception("%c expected after %c",
! self->field_sep, self->quote_char);
}
break;
--- 236,263 ----
case QUOTE_IN_QUOTED_FIELD:
/* microsoft double quotes - seen a quote in an quoted field */
! if (self->have_quotechar && c == self->quotechar) {
/* save "" as " */
parse_add_char(self, c);
self->state = IN_QUOTED_FIELD;
! }
! else if (c == self->delimiter) {
/* save field - wait for new field */
parse_save_field(self);
self->state = START_FIELD;
! }
! else if (c == '\0') {
/* end of line - return [fields] */
parse_save_field(self);
self->state = START_RECORD;
! }
! else if (!self->strict) {
parse_add_char(self, c);
self->state = IN_FIELD;
! }
! else {
/* illegal */
self->had_parse_error = 1;
raise_exception("%c expected after %c",
! self->delimiter, self->quotechar);
}
break;
***************
*** 257,261 ****
return NULL;
! if (self->auto_clear && self->had_parse_error)
clear_fields_and_status(self);
--- 297,301 ----
return NULL;
! if (self->autoclear && self->had_parse_error)
clear_fields_and_status(self);
***************
*** 350,354 ****
if (self->num_fields > 0) {
if (copy_phase)
! self->rec[rec_len] = self->field_sep;
rec_len++;
}
--- 390,394 ----
if (self->num_fields > 0) {
if (copy_phase)
! self->rec[rec_len] = self->delimiter;
rec_len++;
}
***************
*** 356,360 ****
*/
if (copy_phase && *quoted) {
! self->rec[rec_len] = self->quote_char;
rec_len++;
}
--- 396,400 ----
*/
if (copy_phase && *quoted) {
! self->rec[rec_len] = self->quotechar;
rec_len++;
}
***************
*** 367,373 ****
* quote.
*/
! if (c == self->quote_char && self->ms_double_quote) {
if (copy_phase)
! self->rec[rec_len] = self->quote_char;
*quoted = 1;
rec_len++;
--- 407,413 ----
* quote.
*/
! if (c == self->have_quotechar && self->doublequote) {
if (copy_phase)
! self->rec[rec_len] = self->quotechar;
*quoted = 1;
rec_len++;
***************
*** 378,388 ****
*/
if (!*quoted
! && (c == self->field_sep || c == self->escape_char
|| c == '\n' || c == '\r')) {
! if (self->quote_char)
*quoted = 1;
! else if (self->escape_char) {
if (copy_phase)
! self->rec[rec_len] = self->escape_char;
rec_len++;
}
--- 418,428 ----
*/
if (!*quoted
! && (c == self->delimiter || c == self->escapechar
|| c == '\n' || c == '\r')) {
! if (self->have_quotechar)
*quoted = 1;
! else if (self->escapechar) {
if (copy_phase)
! self->rec[rec_len] = self->escapechar;
rec_len++;
}
***************
*** 397,401 ****
/* If field is empty check if it needs to be quoted.
*/
! if (i == 0 && quote_empty && self->quote_char)
*quoted = 1;
--- 437,441 ----
/* If field is empty check if it needs to be quoted.
*/
! if (i == 0 && quote_empty && self->have_quotechar)
*quoted = 1;
***************
*** 404,408 ****
if (*quoted) {
if (copy_phase)
! self->rec[rec_len] = self->quote_char;
else
/* Didn't know about leading quote until we found it
--- 444,448 ----
if (*quoted) {
if (copy_phase)
! self->rec[rec_len] = self->quotechar;
else
/* Didn't know about leading quote until we found it
***************
*** 429,433 ****
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
self->rec = PyMem_Malloc(self->rec_size);
! } else {
char *old_rec = self->rec;
--- 469,474 ----
self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
self->rec = PyMem_Malloc(self->rec_size);
! }
! else {
char *old_rec = self->rec;
***************
*** 488,495 ****
append_ok = join_append(self, PyString_AsString(field), len == 1);
Py_DECREF(field);
! } else if (field == Py_None) {
append_ok = join_append(self, "", len == 1);
Py_DECREF(field);
! } else {
PyObject *str;
--- 529,538 ----
append_ok = join_append(self, PyString_AsString(field), len == 1);
Py_DECREF(field);
! }
! else if (field == Py_None) {
append_ok = join_append(self, "", len == 1);
Py_DECREF(field);
! }
! else {
PyObject *str;
***************
*** 524,530 ****
if (self->field)
free(self->field);
! if (self->fields) {
! Py_XDECREF(self->fields);
! }
if (self->rec)
free(self->rec);
--- 567,573 ----
if (self->field)
free(self->field);
! Py_XDECREF(self->fields);
! Py_XDECREF(self->lineterminator);
!
if (self->rec)
free(self->rec);
***************
*** 536,549 ****
static struct memberlist Parser_memberlist[] = {
! { "ms_double_quote", T_INT, OFF(ms_double_quote) },
! { "fields", T_OBJECT, OFF(fields) },
! { "field_sep", T_CHAR, OFF(field_sep) },
! { "quote_char", T_CHAR, OFF(quote_char) },
! { "escape_char", T_CHAR, OFF(escape_char) },
! { "auto_clear", T_INT, OFF(auto_clear) },
! { "strict", T_INT, OFF(strict) },
! { "had_parse_error", T_INT, OFF(had_parse_error), RO },
!
! { NULL } /* Sentinel */
};
--- 579,594 ----
static struct memberlist Parser_memberlist[] = {
! { "quotechar", T_CHAR, OFF(quotechar) },
! { "delimiter", T_CHAR, OFF(delimiter) },
! { "escapechar", T_CHAR, OFF(escapechar) },
! { "skipinitialspace", T_INT, OFF(skipinitialspace) },
! { "lineterminator", T_OBJECT, OFF(lineterminator) },
! { "quoting", T_INT, OFF(quoting) },
! { "doublequote", T_INT, OFF(doublequote) },
! { "fields", T_OBJECT, OFF(fields) },
! { "autoclear", T_INT, OFF(autoclear) },
! { "strict", T_INT, OFF(strict) },
! { "had_parse_error", T_INT, OFF(had_parse_error), RO },
! { NULL }
};
***************
*** 561,565 ****
static int
! _set_char_attr(char *attr, PyObject *v)
{
/* Special case for constructor - NULL == use default.
--- 606,610 ----
static int
! _set_char_attr(char *attr, int *have_attr, PyObject *v)
{
/* Special case for constructor - NULL == use default.
***************
*** 570,581 ****
if (v == Py_None) {
*attr = 0;
return 0;
! } else if (PyInt_Check(v) && PyInt_AsLong(v) == 0) {
*attr = 0;
return 0;
! } else if (PyString_Check(v) && PyString_Size(v) == 1) {
*attr = PyString_AsString(v)[0];
return 0;
! } else {
PyErr_BadArgument();
return -1;
--- 615,632 ----
if (v == Py_None) {
*attr = 0;
+ *have_attr = 0;
return 0;
! }
! else if (PyInt_Check(v) && PyInt_AsLong(v) == 0) {
! *have_attr = 0;
*attr = 0;
return 0;
! }
! else if (PyString_Check(v) && PyString_Size(v) == 1) {
*attr = PyString_AsString(v)[0];
+ *have_attr = 1;
return 0;
! }
! else {
PyErr_BadArgument();
return -1;
***************
*** 590,599 ****
return -1;
}
! if (strcmp(name, "field_sep") == 0)
! return _set_char_attr(&self->field_sep, v);
! else if (strcmp(name, "quote_char") == 0)
! return _set_char_attr(&self->quote_char, v);
! else if (strcmp(name, "escape_char") == 0)
! return _set_char_attr(&self->escape_char, v);
else
return PyMember_Set((char *)self, Parser_memberlist, name, v);
--- 641,656 ----
return -1;
}
! if (strcmp(name, "delimiter") == 0) {
! int have_delimiter;
!
! return _set_char_attr(&self->delimiter,
! &have_delimiter, v);
! }
! else if (strcmp(name, "quotechar") == 0)
! return _set_char_attr(&self->quotechar,
! &self->have_quotechar, v);
! else if (strcmp(name, "escapechar") == 0)
! return _set_char_attr(&self->escapechar,
! &self->have_escapechar, v);
else
return PyMember_Set((char *)self, Parser_memberlist, name, v);
***************
*** 630,647 ****
PyDoc_STRVAR(csv_parser_doc,
! "parser(ms_double_quote = 1, field_sep = ',',\n"
! " auto_clear = 1, strict = 0,\n"
! " quote_char = '\"', escape_char = None) -> Parser\n"
! "\n"
! "Constructs a CSV parser object. \n"
"\n"
! " ms_double_quote\n"
! " When True, quotes in a fields must be doubled up.\n"
"\n"
! " field_sep\n"
" Defines the character that will be used to separate\n"
" fields in the CSV record.\n"
"\n"
! " auto_clear\n"
" When True, calling parse() will automatically call\n"
" the clear() method if the previous call to parse() raised an\n"
--- 687,723 ----
PyDoc_STRVAR(csv_parser_doc,
! "parser(delimiter=',', quotechar='\"', escapechar=None,\n"
! " doublequote=1, lineterminator='\\r\\n', quoting='minimal',\n"
! " autoclear=1, strict=0) -> Parser\n"
"\n"
! "Constructs a CSV parser object.\n"
"\n"
! " delimiter\n"
" Defines the character that will be used to separate\n"
" fields in the CSV record.\n"
"\n"
! " quotechar\n"
! " Defines the character used to quote fields that\n"
! " contain the field separator or newlines. If set to None\n"
! " special characters will be escaped using the escapechar.\n"
! "\n"
! " escapechar\n"
! " Defines the character used to escape special\n"
! " characters. Only used if quotechar is None.\n"
! "\n"
! " doublequote\n"
! " When True, quotes in a fields must be doubled up.\n"
! "\n"
! " skipinitialspace\n"
! " When True spaces following the delimiter are ignored.\n"
! "\n"
! " lineterminator\n"
! " The string used to terminate records.\n"
! "\n"
! " quoting\n"
! " Controls the generation of quotes around fields when writing\n"
! " records. This is only used when quotechar is not None.\n"
! "\n"
! " autoclear\n"
" When True, calling parse() will automatically call\n"
" the clear() method if the previous call to parse() raised an\n"
***************
*** 651,664 ****
" When True, the parser will raise an exception on\n"
" malformed fields rather than attempting to guess the right\n"
! " behavior.\n"
! "\n"
! " quote_char\n"
! " Defines the character used to quote fields that\n"
! " contain the field separator or newlines. If set to None\n"
! " special characters will be escaped using the escape_char.\n"
! "\n"
! " escape_char\n"
! " Defines the character used to escape special\n"
! " characters. Only used if quote_char is None.\n");
static PyObject *
--- 727,731 ----
" When True, the parser will raise an exception on\n"
" malformed fields rather than attempting to guess the right\n"
! " behavior.\n");
static PyObject *
***************
*** 666,673 ****
{
static char *keywords[] = {
! "ms_double_quote", "field_sep", "auto_clear", "strict",
! "quote_char", "escape_char", NULL
};
! PyObject *quote_char, *escape_char;
ParserObj *self = PyObject_NEW(ParserObj, &Parser_Type);
--- 733,742 ----
{
static char *keywords[] = {
! "quotechar", "delimiter", "escapechar", "skipinitialspace",
! "lineterminator", "quoting", "doublequote",
! "autoclear", "strict",
! NULL
};
! PyObject *quotechar, *escapechar;
ParserObj *self = PyObject_NEW(ParserObj, &Parser_Type);
***************
*** 675,690 ****
return NULL;
! self->state = START_RECORD;
! self->fields = PyList_New(0);
! self->ms_double_quote = 1;
! self->auto_clear = 1;
self->strict = 0;
- self->field_sep = ',';
- quote_char = escape_char = NULL;
- self->quote_char = '"';
- self->escape_char = '\0';
self->had_parse_error = 0;
-
self->field = NULL;
self->field_size = 0;
--- 744,762 ----
return NULL;
! self->quotechar = '"';
! self->have_quotechar = 1;
! self->delimiter = ',';
! self->escapechar = '\0';
! self->have_escapechar = 0;
! self->skipinitialspace = 0;
! self->lineterminator = PyString_FromString("\r\n");
! self->quoting = QUOTE_MINIMAL;
! self->doublequote = 1;
! self->autoclear = 1;
self->strict = 0;
+ self->state = START_RECORD;
+ self->fields = PyList_New(0);
self->had_parse_error = 0;
self->field = NULL;
self->field_size = 0;
***************
*** 696,707 ****
self->num_fields = 0;
! if (PyArg_ParseTupleAndKeywords(args, keyword_args, "|iciiOO",
keywords,
! &self->ms_double_quote,
! &self->field_sep,
! &self->auto_clear, &self->strict,
! "e_char, &escape_char)
! && !_set_char_attr(&self->quote_char, quote_char)
! && !_set_char_attr(&self->escape_char, escape_char))
return (PyObject*)self;
--- 768,788 ----
self->num_fields = 0;
! if (self->lineterminator == NULL || self->fields == NULL) {
! Py_DECREF(self);
! return NULL;
! }
!
! quotechar = escapechar = NULL;
! if (PyArg_ParseTupleAndKeywords(args, keyword_args, "|OcOisiiii",
keywords,
! "echar, &self->delimiter,
! &escapechar, &self->skipinitialspace,
! &self->lineterminator, &self->quoting,
! &self->doublequote,
! &self->autoclear, &self->strict)
! && !_set_char_attr(&self->quotechar,
! &self->have_quotechar, quotechar)
! && !_set_char_attr(&self->escapechar,
! &self->have_escapechar, escapechar))
return (PyObject*)self;
***************
*** 722,726 ****
"following methods:\n"
" clear()\n"
! " Discards all fields parsed so far. If auto_clear is set to\n"
" zero. You should call this after a parser exception.\n"
"\n"
--- 803,807 ----
"following methods:\n"
" clear()\n"
! " Discards all fields parsed so far. If autoclear is set to\n"
" zero. You should call this after a parser exception.\n"
"\n"