[pypy-commit] pypy default: Finish the reader logic. Mostly untested so far, apart from the
arigo
noreply at buildbot.pypy.org
Mon Sep 24 18:10:22 CEST 2012
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r57498:47511fcae6a9
Date: 2012-09-24 15:32 +0200
http://bitbucket.org/pypy/pypy/changeset/47511fcae6a9/
Log: Finish the reader logic. Mostly untested so far, apart from the one
test in test_reader.
diff --git a/pypy/module/_csv/interp_reader.py b/pypy/module/_csv/interp_reader.py
--- a/pypy/module/_csv/interp_reader.py
+++ b/pypy/module/_csv/interp_reader.py
@@ -1,7 +1,10 @@
+from pypy.rlib.rstring import StringBuilder
from pypy.interpreter.baseobjspace import Wrappable
from pypy.interpreter.error import OperationError
from pypy.interpreter.gateway import NoneNotWrapped
from pypy.interpreter.typedef import TypeDef, interp2app
+from pypy.interpreter.typedef import interp_attrproperty_w, interp_attrproperty
+from pypy.module._csv.interp_csv import _build_dialect
from pypy.module._csv.interp_csv import (QUOTE_MINIMAL, QUOTE_ALL,
QUOTE_NONNUMERIC, QUOTE_NONE)
@@ -15,6 +18,9 @@
w_error = space.getattr(w_module, space.wrap('Error'))
raise OperationError(w_error, space.wrap(msg))
+def new_field_builder():
+ return StringBuilder(64)
+
class W_Reader(Wrappable):
@@ -27,35 +33,58 @@
def iter_w(self):
return self.space.wrap(self)
+ def save_field(self, field_builder):
+ field = field_builder.build()
+ if self.numeric_field:
+ from pypy.objspace.std.strutil import ParseStringError
+ from pypy.objspace.std.strutil import string_to_float
+ self.numeric_field = False
+ try:
+ ff = string_to_float(field)
+ except ParseStringError, e:
+ raise OperationError(space.w_ValueError,
+ space.wrap(e.msg))
+ w_obj = self.space.wrap(ff)
+ else:
+ w_obj = self.space.wrap(field)
+ self.fields_w.append(w_obj)
+
def next_w(self):
space = self.space
dialect = self.dialect
self.fields_w = []
self.numeric_field = False
- field = ''
+ field_builder = None # valid iff state not in [START_RECORD, EAT_CRNL]
state = START_RECORD
#
while True:
try:
w_line = space.next(self.w_iter)
except OperationError, e:
- if e.match(space, space.w_StopIteration) and len(field) > 0:
- raise error("newline inside string")
+ if e.match(space, space.w_StopIteration):
+ if field_builder is not None:
+ raise error("newline inside string")
raise
self.line_num += 1
line = space.str_w(w_line)
for c in line:
+ if c == '\0':
+ raise error("line contains NULL byte")
+
if state == START_RECORD:
if c == '\n' or c == '\r':
state = EAT_CRNL
continue
# normal character - handle as START_FIELD
state = START_FIELD
+ # fall-through to the next case
+
if state == START_FIELD:
+ field_builder = new_field_builder()
# expecting field
if c == '\n' or c == '\r':
# save empty field
- assert len(field) == 0; self.save_field('')
+ self.save_field(field_builder)
state = EAT_CRNL
elif (c == dialect.quotechar and
dialect.quoting != QUOTE_NONE):
@@ -69,12 +98,101 @@
pass
elif c == dialect.delimiter:
# save empty field
- assert len(field) == 0; self.save_field('')
+ self.save_field(field_builder)
else:
# begin new unquoted field
if dialect.quoting == QUOTE_NONNUMERIC:
self.numeric_field = True
- field += .....
+ field_builder.append(c)
+ state = IN_FIELD
+
+ elif state == ESCAPED_CHAR:
+ field_builder.append(c)
+ state = IN_FIELD
+
+ elif state == IN_FIELD:
+ # in unquoted field
+ if c == '\n' or c == '\r':
+ # end of line
+ self.save_field(field_builder)
+ state = EAT_CRNL
+ elif c == dialect.escapechar:
+ # possible escaped character
+ state = ESCAPED_CHAR
+ elif c == dialect.delimiter:
+ # save field - wait for new field
+ self.save_field(field_builder)
+ state = START_FIELD
+ else:
+ # normal character - save in field
+ field_builder.append(c)
+
+ elif state == IN_QUOTED_FIELD:
+ # in quoted field
+ if c == dialect.escapechar:
+ # Possible escape character
+ state = ESCAPE_IN_QUOTED_FIELD
+ elif (c == dialect.quotechar and
+ dialect.quoting != QUOTE_NONE):
+ if dialect.doublequote:
+ # doublequote; " represented by ""
+ state = QUOTE_IN_QUOTED_FIELD
+ else:
+ # end of quote part of field
+ state = IN_FIELD
+ else:
+ # normal character - save in field
+ field_builder.append(c)
+
+ elif state == ESCAPE_IN_QUOTED_FIELD:
+ field_builder.append(c)
+ state = IN_QUOTED_FIELD
+
+ elif state == QUOTE_IN_QUOTED_FIELD:
+ # doublequote - seen a quote in an quoted field
+ if (dialect.quoting != QUOTE_NONE and
+ c == dialect.quotechar):
+ # save "" as "
+ field_builder.append(c)
+ state = IN_QUOTED_FIELD
+ elif c == dialect.delimiter:
+ # save field - wait for new field
+ self.save_field(field_builder)
+ state = START_FIELD
+ elif c == '\n' or c == '\r':
+ # end of line
+ self.save_field(field_builder)
+ state = EAT_CRNL
+ elif not dialect.strict:
+ field_builder.append(c)
+ state = IN_FIELD
+ else:
+ # illegal
+ raise error("'%s' expected after '%s'" %
+ dialect.delimiter,
+ dialect.quotechar)
+
+ elif state == EAT_CRNL:
+ if not (c == '\n' or c == '\r'):
+ raise error("new-line character seen in unquoted "
+ "field - do you need to open the file "
+ "in universal-newline mode?")
+
+ if (state == START_FIELD or
+ state == IN_FIELD or
+ state == QUOTE_IN_QUOTED_FIELD):
+ self.save_field()
+ break
+ elif state == ESCAPED_CHAR:
+ field_builder.append('\n')
+ state = IN_FIELD
+ elif state == IN_QUOTED_FIELD:
+ pass
+ elif state == ESCAPE_IN_QUOTED_FIELD:
+ field_builder.append('\n')
+ state = IN_QUOTED_FIELD
+ else:
+ break
#
w_result = space.newlist(self.fields_w)
self.fields_w = None
More information about the pypy-commit
mailing list