[Python-checkins] cpython: Issue #27842: The csv.DictReader now returns rows of type OrderedDict.

Tue Aug 30 15:35:56 EDT 2016

https://hg.python.org/cpython/rev/bb3e2a5be31b
changeset:   102953:bb3e2a5be31b
user:        Raymond Hettinger <python at rcn.com>
date:        Tue Aug 30 12:35:50 2016 -0700
summary:
  Issue #27842: The csv.DictReader now returns rows of type OrderedDict.

files:
  Doc/library/csv.rst  |  39 +++++++++++++++++++------------
  Lib/csv.py           |   3 +-
  Lib/test/test_csv.py |  16 +++++++++++++
  Misc/NEWS            |   3 ++
  4 files changed, 45 insertions(+), 16 deletions(-)

diff --git a/Doc/library/csv.rst b/Doc/library/csv.rst
--- a/Doc/library/csv.rst
+++ b/Doc/library/csv.rst
@@ -149,18 +149,25 @@
 .. class:: DictReader(csvfile, fieldnames=None, restkey=None, restval=None, \
                       dialect='excel', *args, **kwds)
 
-   Create an object which operates like a regular reader but maps the
-   information read into a dict whose keys are given by the optional
-   *fieldnames* parameter.  The *fieldnames* parameter is a :mod:`sequence
-   <collections.abc>` whose elements are associated with the fields of the
-   input data in order. These elements become the keys of the resulting
-   dictionary.  If the *fieldnames* parameter is omitted, the values in the
-   first row of the *csvfile* will be used as the fieldnames.  If the row read
-   has more fields than the fieldnames sequence, the remaining data is added as
-   a sequence keyed by the value of *restkey*.  If the row read has fewer
-   fields than the fieldnames sequence, the remaining keys take the value of
-   the optional *restval* parameter.  Any other optional or keyword arguments
-   are passed to the underlying :class:`reader` instance.
+   Create an object that operates like a regular reader but maps the
+   information in each row to an :mod:`OrderedDict <collections.OrderedDict>`
+   whose keys are given by the optional *fieldnames* parameter.
+
+   The *fieldnames* parameter is a :term:`sequence`.  If *fieldnames* is
+   omitted, the values in the first row of the *csvfile* will be used as the
+   fieldnames.  Regardless of how the fieldnames are determined, the ordered
+   dictionary preserves their original ordering.
+
+   If a row has more fields than fieldnames, the remaining data is put in a
+   list and stored with the fieldname specified by *restkey* (which defaults
+   to ``None``).  If a non-blank row has fewer fields than fieldnames, the
+   missing values are filled-in with ``None``.
+
+   All other optional or keyword arguments are passed to the underlying
+   :class:`reader` instance.
+
+   .. versionchanged:: 3.6
+      Returned rows are now of type :class:`OrderedDict`.
 
    A short usage example::
 
@@ -170,9 +177,11 @@
        ...     for row in reader:
        ...         print(row['first_name'], row['last_name'])
        ...
-       Baked Beans
-       Lovely Spam
-       Wonderful Spam
+       Eric Idle
+       John Cleese
+
+       >>> print(row)
+       OrderedDict([('first_name', 'John'), ('last_name', 'Cleese')])
 
 
 .. class:: DictWriter(csvfile, fieldnames, restval='', extrasaction='raise', \
diff --git a/Lib/csv.py b/Lib/csv.py
--- a/Lib/csv.py
+++ b/Lib/csv.py
@@ -11,6 +11,7 @@
                  __doc__
 from _csv import Dialect as _Dialect
 
+from collections import OrderedDict
 from io import StringIO
 
 __all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
@@ -116,7 +117,7 @@
         # values
         while row == []:
             row = next(self.reader)
-        d = dict(zip(self.fieldnames, row))
+        d = OrderedDict(zip(self.fieldnames, row))
         lf = len(self.fieldnames)
         lr = len(row)
         if lf < lr:
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -10,6 +10,7 @@
 import gc
 import pickle
 from test import support
+from itertools import permutations
 
 class Test_Csv(unittest.TestCase):
     """
@@ -1092,6 +1093,21 @@
             fileobj.seek(0)
             self.assertEqual(fileobj.read(), expected)
 
+class KeyOrderingTest(unittest.TestCase):
+
+    def test_ordering_for_the_dict_reader_and_writer(self):
+        resultset = set()
+        for keys in permutations("abcde"):
+            with TemporaryFile('w+', newline='', encoding="utf-8") as fileobject:
+                dw = csv.DictWriter(fileobject, keys)
+                dw.writeheader()
+                fileobject.seek(0)
+                dr = csv.DictReader(fileobject)
+                kt = tuple(dr.fieldnames)
+                self.assertEqual(keys, kt)
+                resultset.add(kt)
+        # Final sanity check: were all permutations unique?
+        self.assertEqual(len(resultset), 120, "Key ordering: some key permutations not collected (expected 120)")
 
 class MiscTestCase(unittest.TestCase):
     def test__all__(self):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -64,6 +64,9 @@
   match ``math.inf`` and ``math.nan``, and also ``cmath.infj`` and
   ``cmath.nanj`` to match the format used by complex repr.
 
+- Issue #27842: The csv.DictReader now returns rows of type OrderedDict.
+  (Contributed by Steve Holden.)
+
 - Issue #27861: Fixed a crash in sqlite3.Connection.cursor() when a factory
   creates not a cursor.  Patch by Xiang Zhang.
 

-- 
Repository URL: https://hg.python.org/cpython