[pypy-commit] pypy default: Adding an RPython-level _csv module.
arigo
noreply at buildbot.pypy.org
Mon Sep 24 18:10:20 CEST 2012
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r57496:90f77542fc0e
Date: 2012-09-23 14:35 +0200
http://bitbucket.org/pypy/pypy/changeset/90f77542fc0e/
Log: Adding an RPython-level _csv module.
diff --git a/pypy/module/_csv/__init__.py b/pypy/module/_csv/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/__init__.py
@@ -0,0 +1,84 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+
+class Module(MixedModule):
+ """CSV parsing and writing.
+
+This module provides classes that assist in the reading and writing
+of Comma Separated Value (CSV) files, and implements the interface
+described by PEP 305. Although many CSV files are simple to parse,
+the format is not formally defined by a stable specification and
+is subtle enough that parsing lines of a CSV file with something
+like line.split(\",\") is bound to fail. The module supports three
+basic APIs: reading, writing, and registration of dialects.
+
+
+DIALECT REGISTRATION:
+
+Readers and writers support a dialect argument, which is a convenient
+handle on a group of settings. When the dialect argument is a string,
+it identifies one of the dialects previously registered with the module.
+If it is a class or instance, the attributes of the argument are used as
+the settings for the reader or writer:
+
+ class excel:
+ delimiter = ','
+ quotechar = '\"'
+ escapechar = None
+ doublequote = True
+ skipinitialspace = False
+ lineterminator = '\\r\\n'
+ quoting = QUOTE_MINIMAL
+
+SETTINGS:
+
+ * quotechar - specifies a one-character string to use as the
+ quoting character. It defaults to '\"'.
+ * delimiter - specifies a one-character string to use as the
+ field separator. It defaults to ','.
+ * skipinitialspace - specifies how to interpret whitespace which
+ immediately follows a delimiter. It defaults to False, which
+ means that whitespace immediately following a delimiter is part
+ of the following field.
+ * lineterminator - specifies the character sequence which should
+ terminate rows.
+ * quoting - controls when quotes should be generated by the writer.
+ It can take on any of the following module constants:
+
+ csv.QUOTE_MINIMAL means only when required, for example, when a
+ field contains either the quotechar or the delimiter
+ csv.QUOTE_ALL means that quotes are always placed around fields.
+ csv.QUOTE_NONNUMERIC means that quotes are always placed around
+ fields which do not parse as integers or floating point
+ numbers.
+ csv.QUOTE_NONE means that quotes are never placed around fields.
+ * escapechar - specifies a one-character string used to escape
+ the delimiter when quoting is set to QUOTE_NONE.
+ * doublequote - controls the handling of quotes inside fields. When
+ True, two consecutive quotes are interpreted as one during read,
+ and when writing, each quote character embedded in the data is
+ written as two quotes.
+"""
+
+ appleveldefs = {
+ 'register_dialect': 'app_csv.register_dialect',
+ 'unregister_dialect': 'app_csv.unregister_dialect',
+ 'get_dialect': 'app_csv.get_dialect',
+ 'list_dialects': 'app_csv.list_dialects',
+ '_dialects': 'app_csv._dialects',
+
+ 'Error': 'app_csv.Error',
+ }
+
+ interpleveldefs = {
+ '__version__': 'space.wrap("1.0")',
+
+ 'QUOTE_MINIMAL': 'space.wrap(interp_csv.QUOTE_MINIMAL)',
+ 'QUOTE_ALL': 'space.wrap(interp_csv.QUOTE_ALL)',
+ 'QUOTE_NONNUMERIC': 'space.wrap(interp_csv.QUOTE_NONNUMERIC)',
+ 'QUOTE_NONE': 'space.wrap(interp_csv.QUOTE_NONE)',
+
+ 'Dialect': 'interp_csv.W_Dialect',
+
+ 'Reader': 'interp_reader.W_Reader',
+ }
diff --git a/pypy/module/_csv/app_csv.py b/pypy/module/_csv/app_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/app_csv.py
@@ -0,0 +1,33 @@
+import _csv
+
+class Error(Exception):
+ pass
+
+
+_dialects = {}
+
+def register_dialect(name, dialect=None, **kwargs):
+ """Create a mapping from a string name to a dialect class."""
+ if not isinstance(name, basestring):
+ raise TypeError("dialect name must be a string or unicode")
+
+ dialect = _csv.Dialect(dialect, **kwargs)
+ _dialects[name] = dialect
+
+def unregister_dialect(name):
+ """Delete the name/dialect mapping associated with a string name."""
+ try:
+ del _dialects[name]
+ except KeyError:
+ raise Error("unknown dialect")
+
+def get_dialect(name):
+ """Return the dialect instance associated with name."""
+ try:
+ return _dialects[name]
+ except KeyError:
+ raise Error("unknown dialect")
+
+def list_dialects():
+ """Return a list of all know dialect names."""
+ return list(_dialects)
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/interp_csv.py
@@ -0,0 +1,146 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
+from pypy.interpreter.typedef import GetSetProperty
+from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
+
+
+QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE = range(4)
+
+
+class W_Dialect(Wrappable):
+ pass
+
+
+def _fetch(space, w_dialect, name):
+ return space.findattr(w_dialect, space.wrap(name))
+
+def _get_bool(space, w_src, default):
+ if w_src is None:
+ return default
+ return space.is_true(w_src)
+
+def _get_int(space, w_src, default):
+ if w_src is None:
+ return default
+ return space.int_w(w_src)
+
+def _get_str(space, w_src, default):
+ if w_src is None:
+ return default
+ return space.str_w(w_src)
+
+def _get_char(space, w_src, default, name):
+ if w_src is None:
+ return default
+ if space.is_w(w_src, space.w_None):
+ return '\0'
+ src = space.str_w(w_src)
+ if len(src) == 1:
+ return src[0]
+ if len(src) == 0:
+ return '\0'
+ raise operationerrfmt(space.w_TypeError,
+ '"%s" must be a 1-character string', name)
+
+def W_Dialect___new__(space, w_subtype, w_dialect = NoneNotWrapped,
+ w_delimiter = NoneNotWrapped,
+ w_doublequote = NoneNotWrapped,
+ w_escapechar = NoneNotWrapped,
+ w_lineterminator = NoneNotWrapped,
+ w_quotechar = NoneNotWrapped,
+ w_quoting = NoneNotWrapped,
+ w_skipinitialspace = NoneNotWrapped,
+ w_strict = NoneNotWrapped,
+ ):
+ if w_dialect is not None:
+ if space.isinstance_w(w_dialect, space.w_basestring):
+ w_module = space.getbuiltinmodule('_csv')
+ w_dialect = space.call_method(w_module, 'get_dialect', w_dialect)
+
+ if (w_delimiter is None and
+ w_doublequote is None and
+ w_escapechar is None and
+ w_lineterminator is None and
+ w_quotechar is None and
+ w_quoting is None and
+ w_skipinitialspace is None and
+ w_strict is None and
+ space.is_w(w_subtype, space.type(w_dialect))):
+ return w_dialect
+
+ if w_delimiter is None:
+ w_delimiter = _fetch(space, w_dialect, 'delimiter')
+ if w_doublequote is None:
+ w_doublequote = _fetch(space, w_dialect, 'doublequote')
+ if w_escapechar is None:
+ w_escapechar = _fetch(space, w_dialect, 'escapechar')
+ if w_lineterminator is None:
+ w_lineterminator = _fetch(space, w_dialect, 'lineterminator')
+ if w_quotechar is None:
+ w_quotechar = _fetch(space, w_dialect, 'quotechar')
+ if w_quoting is None:
+ w_quoting = _fetch(space, w_dialect, 'quoting')
+ if w_skipinitialspace is None:
+ w_skipinitialspace = _fetch(space, w_dialect, 'skipinitialspace')
+ if w_strict is None:
+ w_strict = _fetch(space, w_dialect, 'strict')
+
+ dialect = space.allocate_instance(W_Dialect, w_subtype)
+ dialect.delimiter = _get_char(space, w_delimiter, ',', 'delimiter')
+ dialect.doublequote = _get_bool(space, w_doublequote, True)
+ dialect.escapechar = _get_char(space, w_escapechar, '\0', 'escapechar')
+ dialect.lineterminator = _get_str(space, w_lineterminator, '\r\n')
+ dialect.quotechar = _get_char(space, w_quotechar, '"', 'quotechar')
+ tmp_quoting = _get_int(space, w_quoting, QUOTE_MINIMAL)
+ dialect.skipinitialspace = _get_bool(space, w_skipinitialspace, False)
+ dialect.strict = _get_bool(space, w_strict, False)
+
+ # validate options
+ if not (0 <= tmp_quoting < 4):
+ raise OperationError(space.w_TypeError,
+ space.wrap('bad "quoting" value'))
+
+ if dialect.delimiter == '\0':
+ raise OperationError(space.w_TypeError,
+ space.wrap('delimiter must be set'))
+
+ if space.is_w(w_quotechar, space.w_None) and w_quoting is None:
+ tmp_quoting = QUOTE_NONE
+ if tmp_quoting != QUOTE_NONE and dialect.quotechar == '\0':
+ raise OperationError(space.w_TypeError,
+ space.wrap('quotechar must be set if quoting enabled'))
+ dialect.quoting = tmp_quoting
+
+ return space.wrap(dialect)
+
+
+def _get_escapechar(space, dialect):
+ if dialect.escapechar == '\0':
+ return space.w_None
+ return space.wrap(dialect.escapechar)
+
+def _get_quotechar(space, dialect):
+ if dialect.quotechar == '\0':
+ return space.w_None
+ return space.wrap(dialect.quotechar)
+
+
+W_Dialect.typedef = TypeDef(
+ 'Dialect',
+ __module__ = '_csv',
+ __new__ = interp2app(W_Dialect___new__),
+
+ delimiter = interp_attrproperty('delimiter', W_Dialect),
+ doublequote = interp_attrproperty('doublequote', W_Dialect),
+ escapechar = GetSetProperty(_get_escapechar, cls=W_Dialect),
+ lineterminator = interp_attrproperty('lineterminator', W_Dialect),
+ quotechar = GetSetProperty(_get_quotechar, cls=W_Dialect),
+ quoting = interp_attrproperty('quoting', W_Dialect),
+ skipinitialspace = interp_attrproperty('skipinitialspace', W_Dialect),
+ strict = interp_attrproperty('strict', W_Dialect),
+
+ __doc__ = """CSV dialect
+
+The Dialect type records CSV parsing and generation options.
+""")
diff --git a/pypy/module/_csv/test/test_dialect.py b/pypy/module/_csv/test/test_dialect.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/test/test_dialect.py
@@ -0,0 +1,107 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestDialect(object):
+ def setup_class(cls):
+ cls.space = gettestobjspace(usemodules=['_csv'])
+
+ def test_register_dialect(self):
+ import _csv
+
+ attrs = [('delimiter', ','),
+ ('doublequote', True),
+ ('escapechar', None),
+ ('lineterminator', '\r\n'),
+ ('quotechar', '"'),
+ ('quoting', _csv.QUOTE_MINIMAL),
+ ('skipinitialspace', False),
+ ('strict', False),
+ ]
+
+ for changeattr, newvalue in [('delimiter', ':'),
+ ('doublequote', False),
+ ('escapechar', '/'),
+ ('lineterminator', '---\n'),
+ ('quotechar', '%'),
+ ('quoting', _csv.QUOTE_NONNUMERIC),
+ ('skipinitialspace', True),
+ ('strict', True)]:
+ kwargs = {changeattr: newvalue}
+ _csv.register_dialect('foo1', **kwargs)
+ d = _csv.get_dialect('foo1')
+ assert d.__class__.__name__ == 'Dialect'
+ for attr, default in attrs:
+ if attr == changeattr:
+ expected = newvalue
+ else:
+ expected = default
+ assert getattr(d, attr) == expected
+
+ def test_register_dialect_base_1(self):
+ import _csv
+ _csv.register_dialect('foo1', escapechar='!')
+ _csv.register_dialect('foo2', 'foo1', strict=True)
+ d1 = _csv.get_dialect('foo1')
+ assert d1.escapechar == '!'
+ assert d1.strict == False
+ d2 = _csv.get_dialect('foo2')
+ assert d2.escapechar == '!'
+ assert d2.strict == True
+
+ def test_register_dialect_base_2(self):
+ import _csv
+ class Foo1:
+ escapechar = '?'
+ _csv.register_dialect('foo2', Foo1, strict=True)
+ d2 = _csv.get_dialect('foo2')
+ assert d2.escapechar == '?'
+ assert d2.strict == True
+
+ def test_typeerror(self):
+ import _csv
+ attempts = [("delimiter", '', 123),
+ ("escapechar", Ellipsis, 'foo', 0),
+ ("lineterminator", -132),
+ ("quotechar", '', 25),
+ ("quoting", 4, '', '\x00'),
+ ]
+ for attempt in attempts:
+ name = attempt[0]
+ for value in attempt[1:]:
+ kwargs = {name: value}
+ raises(TypeError, _csv.register_dialect, 'foo1', **kwargs)
+
+ def test_bool_arg(self):
+ # boolean arguments take *any* object and use its truth-value
+ import _csv
+ _csv.register_dialect('foo1', doublequote=[])
+ assert _csv.get_dialect('foo1').doublequote == False
+ _csv.register_dialect('foo1', skipinitialspace=2)
+ assert _csv.get_dialect('foo1').skipinitialspace == True
+ _csv.register_dialect('foo1', strict=_csv) # :-/
+ assert _csv.get_dialect('foo1').strict == True
+
+ def test_line_terminator(self):
+ # lineterminator can be the empty string
+ import _csv
+ _csv.register_dialect('foo1', lineterminator='')
+ assert _csv.get_dialect('foo1').lineterminator == ''
+
+ def test_unregister_dialect(self):
+ import _csv
+ _csv.register_dialect('foo1')
+ _csv.unregister_dialect('foo1')
+ raises(_csv.Error, _csv.get_dialect, 'foo1')
+ raises(_csv.Error, _csv.unregister_dialect, 'foo1')
+
+ def test_list_dialects(self):
+ import _csv
+ lst = _csv.list_dialects()
+ assert type(lst) is list
+ assert 'neverseen' not in lst
+ _csv.register_dialect('neverseen')
+ lst = _csv.list_dialects()
+ assert 'neverseen' in lst
+ _csv.unregister_dialect('neverseen')
+ lst = _csv.list_dialects()
+ assert 'neverseen' not in lst
More information about the pypy-commit
mailing list