[pypy-commit] pypy default: Adding an RPython-level _csv module.

Mon Sep 24 18:10:20 CEST 2012

Author: Armin Rigo <arigo at tunes.org>
Branch: 
Changeset: r57496:90f77542fc0e
Date: 2012-09-23 14:35 +0200
http://bitbucket.org/pypy/pypy/changeset/90f77542fc0e/

Log:	Adding an RPython-level _csv module.

diff --git a/pypy/module/_csv/__init__.py b/pypy/module/_csv/__init__.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/__init__.py
@@ -0,0 +1,84 @@
+from pypy.interpreter.mixedmodule import MixedModule
+
+
+class Module(MixedModule):
+    """CSV parsing and writing.
+
+This module provides classes that assist in the reading and writing
+of Comma Separated Value (CSV) files, and implements the interface
+described by PEP 305.  Although many CSV files are simple to parse,
+the format is not formally defined by a stable specification and
+is subtle enough that parsing lines of a CSV file with something
+like line.split(\",\") is bound to fail.  The module supports three
+basic APIs: reading, writing, and registration of dialects.
+
+
+DIALECT REGISTRATION:
+
+Readers and writers support a dialect argument, which is a convenient
+handle on a group of settings.  When the dialect argument is a string,
+it identifies one of the dialects previously registered with the module.
+If it is a class or instance, the attributes of the argument are used as
+the settings for the reader or writer:
+
+    class excel:
+        delimiter = ','
+        quotechar = '\"'
+        escapechar = None
+        doublequote = True
+        skipinitialspace = False
+        lineterminator = '\\r\\n'
+        quoting = QUOTE_MINIMAL
+
+SETTINGS:
+
+    * quotechar - specifies a one-character string to use as the 
+        quoting character.  It defaults to '\"'.
+    * delimiter - specifies a one-character string to use as the 
+        field separator.  It defaults to ','.
+    * skipinitialspace - specifies how to interpret whitespace which
+        immediately follows a delimiter.  It defaults to False, which
+        means that whitespace immediately following a delimiter is part
+        of the following field.
+    * lineterminator -  specifies the character sequence which should 
+        terminate rows.
+    * quoting - controls when quotes should be generated by the writer.
+        It can take on any of the following module constants:
+
+        csv.QUOTE_MINIMAL means only when required, for example, when a
+            field contains either the quotechar or the delimiter
+        csv.QUOTE_ALL means that quotes are always placed around fields.
+        csv.QUOTE_NONNUMERIC means that quotes are always placed around
+            fields which do not parse as integers or floating point
+            numbers.
+        csv.QUOTE_NONE means that quotes are never placed around fields.
+    * escapechar - specifies a one-character string used to escape 
+        the delimiter when quoting is set to QUOTE_NONE.
+    * doublequote - controls the handling of quotes inside fields.  When
+        True, two consecutive quotes are interpreted as one during read,
+        and when writing, each quote character embedded in the data is
+        written as two quotes.
+"""
+
+    appleveldefs = {
+        'register_dialect':   'app_csv.register_dialect',
+        'unregister_dialect': 'app_csv.unregister_dialect',
+        'get_dialect':        'app_csv.get_dialect',
+        'list_dialects':      'app_csv.list_dialects',
+        '_dialects':          'app_csv._dialects',
+
+        'Error':              'app_csv.Error',
+        }
+
+    interpleveldefs = {
+        '__version__':      'space.wrap("1.0")',
+
+        'QUOTE_MINIMAL':    'space.wrap(interp_csv.QUOTE_MINIMAL)',
+        'QUOTE_ALL':        'space.wrap(interp_csv.QUOTE_ALL)',
+        'QUOTE_NONNUMERIC': 'space.wrap(interp_csv.QUOTE_NONNUMERIC)',
+        'QUOTE_NONE':       'space.wrap(interp_csv.QUOTE_NONE)',
+
+        'Dialect': 'interp_csv.W_Dialect',
+
+        'Reader': 'interp_reader.W_Reader',
+        }
diff --git a/pypy/module/_csv/app_csv.py b/pypy/module/_csv/app_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/app_csv.py
@@ -0,0 +1,33 @@
+import _csv
+
+class Error(Exception):
+    pass
+
+
+_dialects = {}
+
+def register_dialect(name, dialect=None, **kwargs):
+    """Create a mapping from a string name to a dialect class."""
+    if not isinstance(name, basestring):
+        raise TypeError("dialect name must be a string or unicode")
+
+    dialect = _csv.Dialect(dialect, **kwargs)
+    _dialects[name] = dialect
+
+def unregister_dialect(name):
+    """Delete the name/dialect mapping associated with a string name."""
+    try:
+        del _dialects[name]
+    except KeyError:
+        raise Error("unknown dialect")
+
+def get_dialect(name):
+    """Return the dialect instance associated with name."""
+    try:
+        return _dialects[name]
+    except KeyError:
+        raise Error("unknown dialect")
+
+def list_dialects():
+    """Return a list of all know dialect names."""
+    return list(_dialects)
diff --git a/pypy/module/_csv/interp_csv.py b/pypy/module/_csv/interp_csv.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/interp_csv.py
@@ -0,0 +1,146 @@
+from pypy.interpreter.baseobjspace import Wrappable
+from pypy.interpreter.error import OperationError, operationerrfmt
+from pypy.interpreter.typedef import TypeDef, interp_attrproperty
+from pypy.interpreter.typedef import GetSetProperty
+from pypy.interpreter.gateway import interp2app, unwrap_spec, NoneNotWrapped
+
+
+QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE = range(4)
+
+
+class W_Dialect(Wrappable):
+    pass
+
+
+def _fetch(space, w_dialect, name):
+    return space.findattr(w_dialect, space.wrap(name))
+
+def _get_bool(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.is_true(w_src)
+
+def _get_int(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.int_w(w_src)
+
+def _get_str(space, w_src, default):
+    if w_src is None:
+        return default
+    return space.str_w(w_src)
+
+def _get_char(space, w_src, default, name):
+    if w_src is None:
+        return default
+    if space.is_w(w_src, space.w_None):
+        return '\0'
+    src = space.str_w(w_src)
+    if len(src) == 1:
+        return src[0]
+    if len(src) == 0:
+        return '\0'
+    raise operationerrfmt(space.w_TypeError,
+                          '"%s" must be a 1-character string', name)
+
+def W_Dialect___new__(space, w_subtype, w_dialect = NoneNotWrapped,
+                      w_delimiter        = NoneNotWrapped,
+                      w_doublequote      = NoneNotWrapped,
+                      w_escapechar       = NoneNotWrapped,
+                      w_lineterminator   = NoneNotWrapped,
+                      w_quotechar        = NoneNotWrapped,
+                      w_quoting          = NoneNotWrapped,
+                      w_skipinitialspace = NoneNotWrapped,
+                      w_strict           = NoneNotWrapped,
+                      ):
+    if w_dialect is not None:
+        if space.isinstance_w(w_dialect, space.w_basestring):
+            w_module = space.getbuiltinmodule('_csv')
+            w_dialect = space.call_method(w_module, 'get_dialect', w_dialect)
+
+        if (w_delimiter is None and
+            w_doublequote is None and
+            w_escapechar is None and
+            w_lineterminator is None and
+            w_quotechar is None and
+            w_quoting is None and
+            w_skipinitialspace is None and
+            w_strict is None and
+            space.is_w(w_subtype, space.type(w_dialect))):
+            return w_dialect
+
+        if w_delimiter is None:
+            w_delimiter = _fetch(space, w_dialect, 'delimiter')
+        if w_doublequote is None:
+            w_doublequote = _fetch(space, w_dialect, 'doublequote')
+        if w_escapechar is None:
+            w_escapechar = _fetch(space, w_dialect, 'escapechar')
+        if w_lineterminator is None:
+            w_lineterminator = _fetch(space, w_dialect, 'lineterminator')
+        if w_quotechar is None:
+            w_quotechar = _fetch(space, w_dialect, 'quotechar')
+        if w_quoting is None:
+            w_quoting = _fetch(space, w_dialect, 'quoting')
+        if w_skipinitialspace is None:
+            w_skipinitialspace = _fetch(space, w_dialect, 'skipinitialspace')
+        if w_strict is None:
+            w_strict = _fetch(space, w_dialect, 'strict')
+
+    dialect = space.allocate_instance(W_Dialect, w_subtype)
+    dialect.delimiter = _get_char(space, w_delimiter, ',', 'delimiter')
+    dialect.doublequote = _get_bool(space, w_doublequote, True)
+    dialect.escapechar = _get_char(space, w_escapechar, '\0', 'escapechar')
+    dialect.lineterminator = _get_str(space, w_lineterminator, '\r\n')
+    dialect.quotechar = _get_char(space, w_quotechar, '"', 'quotechar')
+    tmp_quoting = _get_int(space, w_quoting, QUOTE_MINIMAL)
+    dialect.skipinitialspace = _get_bool(space, w_skipinitialspace, False)
+    dialect.strict = _get_bool(space, w_strict, False)
+
+    # validate options
+    if not (0 <= tmp_quoting < 4):
+        raise OperationError(space.w_TypeError,
+                             space.wrap('bad "quoting" value'))
+
+    if dialect.delimiter == '\0':
+        raise OperationError(space.w_TypeError,
+                             space.wrap('delimiter must be set'))
+
+    if space.is_w(w_quotechar, space.w_None) and w_quoting is None:
+        tmp_quoting = QUOTE_NONE
+    if tmp_quoting != QUOTE_NONE and dialect.quotechar == '\0':
+        raise OperationError(space.w_TypeError,
+                        space.wrap('quotechar must be set if quoting enabled'))
+    dialect.quoting = tmp_quoting
+
+    return space.wrap(dialect)
+
+
+def _get_escapechar(space, dialect):
+    if dialect.escapechar == '\0':
+        return space.w_None
+    return space.wrap(dialect.escapechar)
+
+def _get_quotechar(space, dialect):
+    if dialect.quotechar == '\0':
+        return space.w_None
+    return space.wrap(dialect.quotechar)
+
+
+W_Dialect.typedef = TypeDef(
+        'Dialect',
+        __module__ = '_csv',
+        __new__ = interp2app(W_Dialect___new__),
+
+        delimiter        = interp_attrproperty('delimiter', W_Dialect),
+        doublequote      = interp_attrproperty('doublequote', W_Dialect),
+        escapechar       = GetSetProperty(_get_escapechar, cls=W_Dialect),
+        lineterminator   = interp_attrproperty('lineterminator', W_Dialect),
+        quotechar        = GetSetProperty(_get_quotechar, cls=W_Dialect),
+        quoting          = interp_attrproperty('quoting', W_Dialect),
+        skipinitialspace = interp_attrproperty('skipinitialspace', W_Dialect),
+        strict           = interp_attrproperty('strict', W_Dialect),
+
+        __doc__ = """CSV dialect
+
+The Dialect type records CSV parsing and generation options.
+""")
diff --git a/pypy/module/_csv/test/test_dialect.py b/pypy/module/_csv/test/test_dialect.py
new file mode 100644
--- /dev/null
+++ b/pypy/module/_csv/test/test_dialect.py
@@ -0,0 +1,107 @@
+from pypy.conftest import gettestobjspace
+
+
+class AppTestDialect(object):
+    def setup_class(cls):
+        cls.space = gettestobjspace(usemodules=['_csv'])
+
+    def test_register_dialect(self):
+        import _csv
+
+        attrs = [('delimiter', ','),
+                 ('doublequote', True),
+                 ('escapechar', None),
+                 ('lineterminator', '\r\n'),
+                 ('quotechar', '"'),
+                 ('quoting', _csv.QUOTE_MINIMAL),
+                 ('skipinitialspace', False),
+                 ('strict', False),
+                 ]
+
+        for changeattr, newvalue in [('delimiter', ':'),
+                                     ('doublequote', False),
+                                     ('escapechar', '/'),
+                                     ('lineterminator', '---\n'),
+                                     ('quotechar', '%'),
+                                     ('quoting', _csv.QUOTE_NONNUMERIC),
+                                     ('skipinitialspace', True),
+                                     ('strict', True)]:
+            kwargs = {changeattr: newvalue}
+            _csv.register_dialect('foo1', **kwargs)
+            d = _csv.get_dialect('foo1')
+            assert d.__class__.__name__ == 'Dialect'
+            for attr, default in attrs:
+                if attr == changeattr:
+                    expected = newvalue
+                else:
+                    expected = default
+                assert getattr(d, attr) == expected
+
+    def test_register_dialect_base_1(self):
+        import _csv
+        _csv.register_dialect('foo1', escapechar='!')
+        _csv.register_dialect('foo2', 'foo1', strict=True)
+        d1 = _csv.get_dialect('foo1')
+        assert d1.escapechar == '!'
+        assert d1.strict == False
+        d2 = _csv.get_dialect('foo2')
+        assert d2.escapechar == '!'
+        assert d2.strict == True
+
+    def test_register_dialect_base_2(self):
+        import _csv
+        class Foo1:
+            escapechar = '?'
+        _csv.register_dialect('foo2', Foo1, strict=True)
+        d2 = _csv.get_dialect('foo2')
+        assert d2.escapechar == '?'
+        assert d2.strict == True
+
+    def test_typeerror(self):
+        import _csv
+        attempts = [("delimiter", '', 123),
+                    ("escapechar", Ellipsis, 'foo', 0),
+                    ("lineterminator", -132),
+                    ("quotechar", '', 25),
+                    ("quoting", 4, '', '\x00'),
+                    ]
+        for attempt in attempts:
+            name = attempt[0]
+            for value in attempt[1:]:
+                kwargs = {name: value}
+                raises(TypeError, _csv.register_dialect, 'foo1', **kwargs)
+
+    def test_bool_arg(self):
+        # boolean arguments take *any* object and use its truth-value
+        import _csv
+        _csv.register_dialect('foo1', doublequote=[])
+        assert _csv.get_dialect('foo1').doublequote == False
+        _csv.register_dialect('foo1', skipinitialspace=2)
+        assert _csv.get_dialect('foo1').skipinitialspace == True
+        _csv.register_dialect('foo1', strict=_csv)    # :-/
+        assert _csv.get_dialect('foo1').strict == True
+
+    def test_line_terminator(self):
+        # lineterminator can be the empty string
+        import _csv
+        _csv.register_dialect('foo1', lineterminator='')
+        assert _csv.get_dialect('foo1').lineterminator == ''
+
+    def test_unregister_dialect(self):
+        import _csv
+        _csv.register_dialect('foo1')
+        _csv.unregister_dialect('foo1')
+        raises(_csv.Error, _csv.get_dialect, 'foo1')
+        raises(_csv.Error, _csv.unregister_dialect, 'foo1')
+
+    def test_list_dialects(self):
+        import _csv
+        lst = _csv.list_dialects()
+        assert type(lst) is list
+        assert 'neverseen' not in lst
+        _csv.register_dialect('neverseen')
+        lst = _csv.list_dialects()
+        assert 'neverseen' in lst
+        _csv.unregister_dialect('neverseen')
+        lst = _csv.list_dialects()
+        assert 'neverseen' not in lst