[Python-checkins] bpo-38200: Add itertools.pairwise() (GH-23549)

rhettinger webhook-mailer at python.org
Mon Nov 30 23:43:00 EST 2020


https://github.com/python/cpython/commit/cc061d0e6fb2569efa91531686f75b89e94ec865
commit: cc061d0e6fb2569efa91531686f75b89e94ec865
branch: master
author: Raymond Hettinger <rhettinger at users.noreply.github.com>
committer: rhettinger <rhettinger at users.noreply.github.com>
date: 2020-11-30T20:42:54-08:00
summary:

bpo-38200: Add itertools.pairwise() (GH-23549)

files:
A Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst
M Doc/library/itertools.rst
M Lib/test/test_itertools.py
M Modules/clinic/itertoolsmodule.c.h
M Modules/itertoolsmodule.c

diff --git a/Doc/library/itertools.rst b/Doc/library/itertools.rst
index 3de66c9349281..44728b42287bc 100644
--- a/Doc/library/itertools.rst
+++ b/Doc/library/itertools.rst
@@ -55,6 +55,7 @@ Iterator                        Arguments                       Results
 :func:`filterfalse`             pred, seq                       elements of seq where pred(elem) is false           ``filterfalse(lambda x: x%2, range(10)) --> 0 2 4 6 8``
 :func:`groupby`                 iterable[, key]                 sub-iterators grouped by value of key(v)
 :func:`islice`                  seq, [start,] stop [, step]     elements from seq[start:stop:step]                  ``islice('ABCDEFG', 2, None) --> C D E F G``
+:func:`pairwise`                iterable                        (p[0], p[1]), (p[1], p[2])                          ``pairwise('ABCDEFG') --> AB BC CD DE EF FG``
 :func:`starmap`                 func, seq                       func(\*seq[0]), func(\*seq[1]), ...                 ``starmap(pow, [(2,5), (3,2), (10,3)]) --> 32 9 1000``
 :func:`takewhile`               pred, seq                       seq[0], seq[1], until pred fails                    ``takewhile(lambda x: x<5, [1,4,6,4,1]) --> 1 4``
 :func:`tee`                     it, n                           it1, it2, ... itn  splits one iterator into n
@@ -475,6 +476,22 @@ loops that truncate the stream.
    If *start* is ``None``, then iteration starts at zero. If *step* is ``None``,
    then the step defaults to one.
 
+.. function:: pairwise(iterable)
+
+   Return successive overlapping pairs taken from the input *iterable*.
+
+   The number of 2-tuples in the output iterator will be one fewer than the
+   number of inputs.  It will be empty if the input iterable has fewer than
+   two values.
+
+   Roughly equivalent to::
+
+        def pairwise(iterable):
+            # pairwise('ABCDEFG') --> AB BC CD DE EF FG
+            a, b = tee(iterable)
+            next(b, None)
+            return zip(a, b)
+
 
 .. function:: permutations(iterable, r=None)
 
@@ -782,12 +799,6 @@ which incur interpreter overhead.
            return starmap(func, repeat(args))
        return starmap(func, repeat(args, times))
 
-   def pairwise(iterable):
-       "s -> (s0,s1), (s1,s2), (s2, s3), ..."
-       a, b = tee(iterable)
-       next(b, None)
-       return zip(a, b)
-
    def grouper(iterable, n, fillvalue=None):
        "Collect data into fixed-length chunks or blocks"
        # grouper('ABCDEFG', 3, 'x') --> ABC DEF Gxx"
diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
index 702cf0820316b..df2997e87d494 100644
--- a/Lib/test/test_itertools.py
+++ b/Lib/test/test_itertools.py
@@ -1024,6 +1024,25 @@ def run(r1, r2):
         self.assertEqual(next(it), (1, 2))
         self.assertRaises(RuntimeError, next, it)
 
+    def test_pairwise(self):
+        self.assertEqual(list(pairwise('')), [])
+        self.assertEqual(list(pairwise('a')), [])
+        self.assertEqual(list(pairwise('ab')),
+                              [('a', 'b')]),
+        self.assertEqual(list(pairwise('abcde')),
+                              [('a', 'b'), ('b', 'c'), ('c', 'd'), ('d', 'e')])
+        self.assertEqual(list(pairwise(range(10_000))),
+                         list(zip(range(10_000), range(1, 10_000))))
+
+        with self.assertRaises(TypeError):
+            pairwise()                                      # too few arguments
+        with self.assertRaises(TypeError):
+            pairwise('abc', 10)                             # too many arguments
+        with self.assertRaises(TypeError):
+            pairwise(iterable='abc')                        # keyword arguments
+        with self.assertRaises(TypeError):
+            pairwise(None)                                  # non-iterable argument
+
     def test_product(self):
         for args, result in [
             ([], [()]),                     # zero iterables
@@ -1787,6 +1806,10 @@ def test_islice(self):
         a = []
         self.makecycle(islice([a]*2, None), a)
 
+    def test_pairwise(self):
+        a = []
+        self.makecycle(pairwise([a]*5), a)
+
     def test_permutations(self):
         a = []
         self.makecycle(permutations([1,2,a,3], 3), a)
@@ -1995,6 +2018,17 @@ def test_islice(self):
             self.assertRaises(TypeError, islice, N(s), 10)
             self.assertRaises(ZeroDivisionError, list, islice(E(s), 10))
 
+    def test_pairwise(self):
+        for s in ("123", "", range(1000), ('do', 1.2), range(2000,2200,5)):
+            for g in (G, I, Ig, S, L, R):
+                seq = list(g(s))
+                expected = list(zip(seq, seq[1:]))
+                actual = list(pairwise(g(s)))
+                self.assertEqual(actual, expected)
+            self.assertRaises(TypeError, pairwise, X(s))
+            self.assertRaises(TypeError, pairwise, N(s))
+            self.assertRaises(ZeroDivisionError, list, pairwise(E(s)))
+
     def test_starmap(self):
         for s in (range(10), range(0), range(100), (7,11), range(20,50,5)):
             for g in (G, I, Ig, S, L, R):
@@ -2312,15 +2346,6 @@ def test_permutations_sizeof(self):
 ...     else:
 ...         return starmap(func, repeat(args, times))
 
->>> def pairwise(iterable):
-...     "s -> (s0,s1), (s1,s2), (s2, s3), ..."
-...     a, b = tee(iterable)
-...     try:
-...         next(b)
-...     except StopIteration:
-...         pass
-...     return zip(a, b)
-
 >>> def grouper(n, iterable, fillvalue=None):
 ...     "grouper(3, 'ABCDEFG', 'x') --> ABC DEF Gxx"
 ...     args = [iter(iterable)] * n
@@ -2451,15 +2476,6 @@ def test_permutations_sizeof(self):
 >>> take(5, map(int, repeatfunc(random.random)))
 [0, 0, 0, 0, 0]
 
->>> list(pairwise('abcd'))
-[('a', 'b'), ('b', 'c'), ('c', 'd')]
-
->>> list(pairwise([]))
-[]
-
->>> list(pairwise('a'))
-[]
-
 >>> list(islice(pad_none('abc'), 0, 6))
 ['a', 'b', 'c', None, None, None]
 
diff --git a/Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst b/Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst
new file mode 100644
index 0000000000000..b4bc5551b2532
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-11-28-22-52-57.bpo-38200.DuWGlW.rst
@@ -0,0 +1 @@
+Added itertools.pairwise()
diff --git a/Modules/clinic/itertoolsmodule.c.h b/Modules/clinic/itertoolsmodule.c.h
index c1192bbcb0d79..82729eeb56bce 100644
--- a/Modules/clinic/itertoolsmodule.c.h
+++ b/Modules/clinic/itertoolsmodule.c.h
@@ -2,6 +2,37 @@
 preserve
 [clinic start generated code]*/
 
+PyDoc_STRVAR(pairwise_new__doc__,
+"pairwise(iterable, /)\n"
+"--\n"
+"\n"
+"Return an iterator of overlapping pairs taken from the input iterator.\n"
+"\n"
+"    s -> (s0,s1), (s1,s2), (s2, s3), ...");
+
+static PyObject *
+pairwise_new_impl(PyTypeObject *type, PyObject *iterable);
+
+static PyObject *
+pairwise_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
+{
+    PyObject *return_value = NULL;
+    PyObject *iterable;
+
+    if ((type == &pairwise_type) &&
+        !_PyArg_NoKeywords("pairwise", kwargs)) {
+        goto exit;
+    }
+    if (!_PyArg_CheckPositional("pairwise", PyTuple_GET_SIZE(args), 1, 1)) {
+        goto exit;
+    }
+    iterable = PyTuple_GET_ITEM(args, 0);
+    return_value = pairwise_new_impl(type, iterable);
+
+exit:
+    return return_value;
+}
+
 PyDoc_STRVAR(itertools_groupby__doc__,
 "groupby(iterable, key=None)\n"
 "--\n"
@@ -627,4 +658,4 @@ itertools_count(PyTypeObject *type, PyObject *args, PyObject *kwargs)
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=d7f58dc477814b45 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=889c4afc3b13574f input=a9049054013a1b77]*/
diff --git a/Modules/itertoolsmodule.c b/Modules/itertoolsmodule.c
index ce8b4347ef220..7144856c352f8 100644
--- a/Modules/itertoolsmodule.c
+++ b/Modules/itertoolsmodule.c
@@ -1,4 +1,5 @@
 
+
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
 #include "pycore_long.h"          // _PyLong_GetZero()
@@ -27,8 +28,9 @@ class itertools.accumulate "accumulateobject *" "&accumulate_type"
 class itertools.compress "compressobject *" "&compress_type"
 class itertools.filterfalse "filterfalseobject *" "&filterfalse_type"
 class itertools.count "countobject *" "&count_type"
+class itertools.pairwise "pairwiseobject *" "&pairwise_type"
 [clinic start generated code]*/
-/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ea05c93c6d94726a]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=6498ed21fbe1bf94]*/
 
 static PyTypeObject groupby_type;
 static PyTypeObject _grouper_type;
@@ -45,9 +47,140 @@ static PyTypeObject accumulate_type;
 static PyTypeObject compress_type;
 static PyTypeObject filterfalse_type;
 static PyTypeObject count_type;
+static PyTypeObject pairwise_type;
 
 #include "clinic/itertoolsmodule.c.h"
 
+/* pairwise object ***********************************************************/
+
+typedef struct {
+    PyObject_HEAD
+    PyObject *it;
+    PyObject *old;
+} pairwiseobject;
+
+/*[clinic input]
+ at classmethod
+itertools.pairwise.__new__ as pairwise_new
+    iterable: object
+    /
+Return an iterator of overlapping pairs taken from the input iterator.
+
+    s -> (s0,s1), (s1,s2), (s2, s3), ...
+
+[clinic start generated code]*/
+
+static PyObject *
+pairwise_new_impl(PyTypeObject *type, PyObject *iterable)
+/*[clinic end generated code: output=9f0267062d384456 input=6e7c3cddb431a8d6]*/
+{
+    PyObject *it;
+    pairwiseobject *po;
+
+    it = PyObject_GetIter(iterable);
+    if (it == NULL) {
+        return NULL;
+    }
+    po = (pairwiseobject *)type->tp_alloc(type, 0);
+    if (po == NULL) {
+        Py_DECREF(it);
+        return NULL;
+    }
+    po->it = it;
+    po->old = NULL;
+    return (PyObject *)po;
+}
+
+static void
+pairwise_dealloc(pairwiseobject *po)
+{
+    PyObject_GC_UnTrack(po);
+    Py_XDECREF(po->it);
+    Py_XDECREF(po->old);
+    Py_TYPE(po)->tp_free(po);
+}
+
+static int
+pairwise_traverse(pairwiseobject *po, visitproc visit, void *arg)
+{
+    Py_VISIT(po->it);
+    Py_VISIT(po->old);
+    return 0;
+}
+
+static PyObject *
+pairwise_next(pairwiseobject *po)
+{
+    PyObject *it = po->it;
+    PyObject *old = po->old;
+    PyObject *new, *result;
+
+    if (it == NULL) {
+        return NULL;
+    }
+    if (old == NULL) {
+        po->old = old = (*Py_TYPE(it)->tp_iternext)(it);
+        if (old == NULL) {
+            Py_CLEAR(po->it);
+            return NULL;
+        }
+    }
+    new = (*Py_TYPE(it)->tp_iternext)(it);
+    if (new == NULL) {
+        Py_CLEAR(po->it);
+        Py_CLEAR(po->old);
+        return NULL;
+    }
+    /* Future optimization: Reuse the result tuple as we do in enumerate() */
+    result = PyTuple_Pack(2, old, new);
+    Py_SETREF(po->old, new);
+    return result;
+}
+
+static PyTypeObject pairwise_type = {
+    PyVarObject_HEAD_INIT(&PyType_Type, 0)
+    "itertools.pairwise",           /* tp_name */
+    sizeof(pairwiseobject),         /* tp_basicsize */
+    0,                              /* tp_itemsize */
+    /* methods */
+    (destructor)pairwise_dealloc,   /* tp_dealloc */
+    0,                              /* tp_vectorcall_offset */
+    0,                              /* tp_getattr */
+    0,                              /* tp_setattr */
+    0,                              /* tp_as_async */
+    0,                              /* tp_repr */
+    0,                              /* tp_as_number */
+    0,                              /* tp_as_sequence */
+    0,                              /* tp_as_mapping */
+    0,                              /* tp_hash */
+    0,                              /* tp_call */
+    0,                              /* tp_str */
+    PyObject_GenericGetAttr,        /* tp_getattro */
+    0,                              /* tp_setattro */
+    0,                              /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+        Py_TPFLAGS_BASETYPE,        /* tp_flags */
+    pairwise_new__doc__,            /* tp_doc */
+    (traverseproc)pairwise_traverse,    /* tp_traverse */
+    0,                              /* tp_clear */
+    0,                              /* tp_richcompare */
+    0,                              /* tp_weaklistoffset */
+    PyObject_SelfIter,              /* tp_iter */
+    (iternextfunc)pairwise_next,    /* tp_iternext */
+    0,                              /* tp_methods */
+    0,                              /* tp_members */
+    0,                              /* tp_getset */
+    0,                              /* tp_base */
+    0,                              /* tp_dict */
+    0,                              /* tp_descr_get */
+    0,                              /* tp_descr_set */
+    0,                              /* tp_dictoffset */
+    0,                              /* tp_init */
+    PyType_GenericAlloc,            /* tp_alloc */
+    pairwise_new,                   /* tp_new */
+    PyObject_GC_Del,                /* tp_free */
+};
+
 
 /* groupby object ************************************************************/
 
@@ -4666,6 +4799,7 @@ groupby(iterable[, keyfunc]) --> sub-iterators grouped by value of keyfunc(v)\n\
 filterfalse(pred, seq) --> elements of seq where pred(elem) is False\n\
 islice(seq, [start,] stop [, step]) --> elements from\n\
        seq[start:stop:step]\n\
+pairwise(s) --> (s[0],s[1]), (s[1],s[2]), (s[2], s[3]), ...\n\
 starmap(fun, seq) --> fun(*seq[0]), fun(*seq[1]), ...\n\
 tee(it, n=2) --> (it1, it2 , ... itn) splits one iterator into n\n\
 takewhile(pred, seq) --> seq[0], seq[1], until pred fails\n\
@@ -4695,6 +4829,7 @@ itertoolsmodule_exec(PyObject *m)
         &filterfalse_type,
         &count_type,
         &ziplongest_type,
+        &pairwise_type,
         &permutations_type,
         &product_type,
         &repeat_type,



More information about the Python-checkins mailing list