[Python-Dev] cpython: Issue #16148: implemented PEP 424
Georg Brandl
g.brandl at gmx.net
Sat Oct 6 14:35:24 CEST 2012
Am 06.10.2012 14:12, schrieb armin.ronacher:
> http://hg.python.org/cpython/rev/a7ec0a1b0f7c
> changeset: 79511:a7ec0a1b0f7c
> parent: 79507:3c1df1ede882
> user: Armin Ronacher <armin.ronacher at active-4.com>
> date: Sat Oct 06 14:03:24 2012 +0200
> summary:
> Issue #16148: implemented PEP 424
>
> files:
> Doc/c-api/object.rst | 7 ++
> Doc/library/operator.rst | 6 +
> Include/abstract.h | 5 +-
> Lib/test/test_enumerate.py | 9 +-
> Lib/test/test_iterlen.py | 62 +++++++++----------
> Lib/test/test_itertools.py | 5 +-
> Lib/test/test_operator.py | 25 ++++++++
> Lib/test/test_set.py | 2 -
> Modules/operator.c | 27 ++++++++
> Objects/abstract.c | 80 +++++++++++++++----------
> Objects/bytearrayobject.c | 2 +-
> Objects/bytesobject.c | 2 +-
> Objects/iterobject.c | 11 ++-
> Objects/listobject.c | 2 +-
> 14 files changed, 162 insertions(+), 83 deletions(-)
>
>
> diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst
> --- a/Doc/c-api/object.rst
> +++ b/Doc/c-api/object.rst
> @@ -342,6 +342,13 @@
> returned. This is the equivalent to the Python expression ``len(o)``.
>
>
> +.. c:function:: Py_ssize_t PyObject_LengthHint(PyObject *o, Py_ssize_t default)
> +
> + Return an estimated length for the object *o*. First trying to return its
> + actual length, then an estimate using ``__length_hint__``, and finally
> + returning the default value. On error ``-1`` is returned. This is the
> + equivalent to the Python expression ``operator.length_hint(o, default)``.
Needs a versionadded.
Since __length_hint__ is now official, it needs an entry in
Doc/reference/datamodel.rst (which you can link to here.)
> .. c:function:: PyObject* PyObject_GetItem(PyObject *o, PyObject *key)
>
> Return element of *o* corresponding to the object *key* or *NULL* on failure.
> diff --git a/Doc/library/operator.rst b/Doc/library/operator.rst
> --- a/Doc/library/operator.rst
> +++ b/Doc/library/operator.rst
> @@ -235,6 +235,12 @@
>
> .. XXX: find a better, readable, example
>
> +.. function:: length_hint(obj, default=0)
> +
> + Return an estimated length for the object *o*. First trying to return its
> + actual length, then an estimate using ``__length_hint__``, and finally
> + returning the default value.
This one also needs versionadded and a link to __length_hint__.
> The :mod:`operator` module also defines tools for generalized attribute and item
> lookups. These are useful for making fast field extractors as arguments for
> :func:`map`, :func:`sorted`, :meth:`itertools.groupby`, or other functions that
> diff --git a/Include/abstract.h b/Include/abstract.h
> --- a/Include/abstract.h
> +++ b/Include/abstract.h
> @@ -403,9 +403,8 @@
> PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o);
> #define PyObject_Length PyObject_Size
>
> -#ifndef Py_LIMITED_API
> - PyAPI_FUNC(Py_ssize_t) _PyObject_LengthHint(PyObject *o, Py_ssize_t);
> -#endif
> +PyAPI_FUNC(int) _PyObject_HasLen(PyObject *o);
> +PyAPI_FUNC(Py_ssize_t) PyObject_LengthHint(PyObject *o, Py_ssize_t);
Not sure if new functions should be included in the limited API. I seem to
recall some discussion about giving Py_LIMITED_API a numeric value for the
required API version? PEP 384 is silent about it.
> /*
> Guess the size of object o using len(o) or o.__length_hint__().
> diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py
> --- a/Lib/test/test_enumerate.py
> +++ b/Lib/test/test_enumerate.py
> @@ -1,4 +1,5 @@
> import unittest
> +import operator
> import sys
> import pickle
>
> @@ -168,15 +169,13 @@
> x = range(1)
> self.assertEqual(type(reversed(x)), type(iter(x)))
>
> - @support.cpython_only
> def test_len(self):
> # This is an implementation detail, not an interface requirement
If it's not cpython_only anymore, this comment should also vanish?
> - from test.test_iterlen import len
> for s in ('hello', tuple('hello'), list('hello'), range(5)):
> - self.assertEqual(len(reversed(s)), len(s))
> + self.assertEqual(operator.length_hint(reversed(s)), len(s))
> r = reversed(s)
> list(r)
> - self.assertEqual(len(r), 0)
> + self.assertEqual(operator.length_hint(r), 0)
> class SeqWithWeirdLen:
> called = False
> def __len__(self):
> @@ -187,7 +186,7 @@
> def __getitem__(self, index):
> return index
> r = reversed(SeqWithWeirdLen())
> - self.assertRaises(ZeroDivisionError, len, r)
> + self.assertRaises(ZeroDivisionError, operator.length_hint, r)
>
>
> def test_gc(self):
> diff --git a/Lib/test/test_iterlen.py b/Lib/test/test_iterlen.py
> --- a/Lib/test/test_iterlen.py
> +++ b/Lib/test/test_iterlen.py
> @@ -45,31 +45,21 @@
> from test import support
> from itertools import repeat
> from collections import deque
> -from builtins import len as _len
> +from operator import length_hint
> ## ------- Concrete Type Tests -------
>
> @@ -92,10 +82,6 @@
> def setUp(self):
> self.it = repeat(None, n)
>
> - def test_no_len_for_infinite_repeat(self):
> - # The repeat() object can also be infinite
> - self.assertRaises(TypeError, len, repeat(None))
Why is this removed? I can see it was duplicated in test_itertools (below),
but you removed both instances.
> diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
> --- a/Lib/test/test_itertools.py
> +++ b/Lib/test/test_itertools.py
> @@ -1723,9 +1723,8 @@
> class LengthTransparency(unittest.TestCase):
>
> def test_repeat(self):
> - from test.test_iterlen import len
> - self.assertEqual(len(repeat(None, 50)), 50)
> - self.assertRaises(TypeError, len, repeat(None))
> + self.assertEqual(operator.length_hint(repeat(None, 50)), 50)
> + self.assertEqual(operator.length_hint(repeat(None), 12), 12)
>
> class RegressionTests(unittest.TestCase):
>
> diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py
> --- a/Lib/test/test_operator.py
> +++ b/Lib/test/test_operator.py
> @@ -410,6 +410,31 @@
> self.assertEqual(operator.__ixor__ (c, 5), "ixor")
> self.assertEqual(operator.__iconcat__ (c, c), "iadd")
>
> + def test_length_hint(self):
> + class X(object):
> + def __init__(self, value):
> + self.value = value
> +
> + def __length_hint__(self):
> + if type(self.value) is type:
> + raise self.value
> + else:
> + return self.value
> +
> + self.assertEqual(operator.length_hint([], 2), 0)
> + self.assertEqual(operator.length_hint(iter([1, 2, 3])), 3)
> +
> + self.assertEqual(operator.length_hint(X(2)), 2)
> + self.assertEqual(operator.length_hint(X(NotImplemented), 4), 4)
> + self.assertEqual(operator.length_hint(X(TypeError), 12), 12)
> + with self.assertRaises(TypeError):
> + operator.length_hint(X("abc"))
> + with self.assertRaises(ValueError):
> + operator.length_hint(X(-2))
> + with self.assertRaises(LookupError):
> + operator.length_hint(X(LookupError))
> +
> +
> def test_main(verbose=None):
> import sys
> test_classes = (
> diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
> --- a/Lib/test/test_set.py
> +++ b/Lib/test/test_set.py
> @@ -848,8 +848,6 @@
> for v in self.set:
> self.assertIn(v, self.values)
> setiter = iter(self.set)
> - # note: __length_hint__ is an internal undocumented API,
> - # don't rely on it in your own programs
> self.assertEqual(setiter.__length_hint__(), len(self.set))
>
> def test_pickling(self):
> diff --git a/Modules/operator.c b/Modules/operator.c
> --- a/Modules/operator.c
> +++ b/Modules/operator.c
> @@ -208,6 +208,31 @@
> return (result == 0);
> }
>
> +PyDoc_STRVAR(length_hint__doc__,
> +"length_hint(obj, default=0) -> int\n"
> +"Return an estimate of the number of items in obj.\n"
> +"This is useful for presizing containers when building from an\n"
> +"iterable.\n"
> +"\n"
> +"If the object supports len(), the result will be\n"
> +"exact. Otherwise, it may over- or under-estimate by an\n"
> +"arbitrary amount. The result will be an integer >= 0.");
> +
> +static PyObject *length_hint(PyObject *self, PyObject *args)
> +{
> + PyObject *obj;
> + Py_ssize_t defaultvalue = 0, res;
> + if (!PyArg_ParseTuple(args, "O|n:length_hint", &obj, &defaultvalue)) {
> + return NULL;
> + }
> + res = PyObject_LengthHint(obj, defaultvalue);
> + if (res == -1 && PyErr_Occurred()) {
> + return NULL;
> + }
> + return PyLong_FromSsize_t(res);
> +}
> +
> +
> PyDoc_STRVAR(compare_digest__doc__,
> "compare_digest(a, b) -> bool\n"
> "\n"
> @@ -366,6 +391,8 @@
>
> {"_compare_digest", (PyCFunction)compare_digest, METH_VARARGS,
> compare_digest__doc__},
> + {"length_hint", (PyCFunction)length_hint, METH_VARARGS,
> + length_hint__doc__},
> {NULL, NULL} /* sentinel */
>
> };
> diff --git a/Objects/abstract.c b/Objects/abstract.c
> --- a/Objects/abstract.c
> +++ b/Objects/abstract.c
> @@ -64,49 +64,67 @@
> }
> #define PyObject_Length PyObject_Size
>
> +int
> +_PyObject_HasLen(PyObject *o) {
> + return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) ||
> + (Py_TYPE(o)->tp_as_mapping && Py_TYPE(o)->tp_as_mapping->mp_length);
> +}
>
> /* The length hint function returns a non-negative value from o.__len__()
> - or o.__length_hint__(). If those methods aren't found or return a negative
> - value, then the defaultvalue is returned. If one of the calls fails,
> - this function returns -1.
> + or o.__length_hint__(). If those methods aren't found. If one of the calls
^^^^^
Sentence incomplete.
> + fails this function returns -1.
> */
>
> Py_ssize_t
> -_PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)
> +PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)
Should we put "#define _PyObject_LengthHint PyObject_LengthHint" in some header
to make porting easier?
> {
> _Py_IDENTIFIER(__length_hint__);
> - PyObject *ro, *hintmeth;
> - Py_ssize_t rv;
> -
> - /* try o.__len__() */
> - rv = PyObject_Size(o);
> - if (rv >= 0)
> - return rv;
> - if (PyErr_Occurred()) {
> - if (!PyErr_ExceptionMatches(PyExc_TypeError))
> + Py_ssize_t res = PyObject_Length(o);
> + if (res < 0 && PyErr_Occurred()) {
> + if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
> return -1;
> + }
> PyErr_Clear();
> }
> -
> - /* try o.__length_hint__() */
> - hintmeth = _PyObject_LookupSpecial(o, &PyId___length_hint__);
> - if (hintmeth == NULL) {
> - if (PyErr_Occurred())
> + else {
> + return res;
> + }
> + PyObject *hint = _PyObject_LookupSpecial(o, &PyId___length_hint__);
> + if (hint == NULL) {
> + if (PyErr_Occurred()) {
> return -1;
> - else
> - return defaultvalue;
> - }
> - ro = PyObject_CallFunctionObjArgs(hintmeth, NULL);
> - Py_DECREF(hintmeth);
> - if (ro == NULL) {
> - if (!PyErr_ExceptionMatches(PyExc_TypeError))
> - return -1;
> - PyErr_Clear();
> + }
> return defaultvalue;
> }
> - rv = PyLong_Check(ro) ? PyLong_AsSsize_t(ro) : defaultvalue;
> - Py_DECREF(ro);
> - return rv;
> + PyObject *result = PyObject_CallFunctionObjArgs(hint, NULL);
> + Py_DECREF(hint);
> + if (result == NULL) {
> + if (PyErr_ExceptionMatches(PyExc_TypeError)) {
> + PyErr_Clear();
> + return defaultvalue;
> + }
> + return -1;
> + }
> + else if (result == Py_NotImplemented) {
> + Py_DECREF(result);
> + return defaultvalue;
> + }
> + if (!PyLong_Check(result)) {
> + PyErr_Format(PyExc_TypeError, "Length hint must be an integer, not %s",
We usually limit the string size here, e.g. "%.100s".
> + Py_TYPE(result)->tp_name);
> + Py_DECREF(result);
> + return -1;
> + }
> + defaultvalue = PyLong_AsSsize_t(result);
Not sure the micro-optimization is worth the confusion of reassigning defaultvalue.
> + Py_DECREF(result);
> + if (defaultvalue < 0 && PyErr_Occurred()) {
> + return -1;
> + }
> + if (defaultvalue < 0) {
> + PyErr_Format(PyExc_ValueError, "__length_hint__() should return >= 0");
Exception message is inconsistent with above: "Length hint" vs "__length_hint__()".
> + return -1;
> + }
> + return defaultvalue;
> }
> diff --git a/Objects/iterobject.c b/Objects/iterobject.c
> --- a/Objects/iterobject.c
> +++ b/Objects/iterobject.c
> @@ -76,9 +76,14 @@
> Py_ssize_t seqsize, len;
>
> if (it->it_seq) {
> - seqsize = PySequence_Size(it->it_seq);
> - if (seqsize == -1)
> - return NULL;
> + if (_PyObject_HasLen(it->it_seq)) {
> + seqsize = PySequence_Size(it->it_seq);
> + if (seqsize == -1)
> + return NULL;
> + }
> + else {
> + return Py_NotImplemented;
An INCREF is missing here, as discussed on IRC.
> + }
> len = seqsize - it->it_index;
> if (len >= 0)
> return PyLong_FromSsize_t(len);
More information about the Python-Dev
mailing list