[Python-Dev] cpython: Issue #16148: implemented PEP 424

Georg Brandl g.brandl at gmx.net
Sat Oct 6 14:35:24 CEST 2012


Am 06.10.2012 14:12, schrieb armin.ronacher:
> http://hg.python.org/cpython/rev/a7ec0a1b0f7c
> changeset:   79511:a7ec0a1b0f7c
> parent:      79507:3c1df1ede882
> user:        Armin Ronacher <armin.ronacher at active-4.com>
> date:        Sat Oct 06 14:03:24 2012 +0200
> summary:
>   Issue #16148: implemented PEP 424
> 
> files:
>   Doc/c-api/object.rst       |   7 ++
>   Doc/library/operator.rst   |   6 +
>   Include/abstract.h         |   5 +-
>   Lib/test/test_enumerate.py |   9 +-
>   Lib/test/test_iterlen.py   |  62 +++++++++----------
>   Lib/test/test_itertools.py |   5 +-
>   Lib/test/test_operator.py  |  25 ++++++++
>   Lib/test/test_set.py       |   2 -
>   Modules/operator.c         |  27 ++++++++
>   Objects/abstract.c         |  80 +++++++++++++++----------
>   Objects/bytearrayobject.c  |   2 +-
>   Objects/bytesobject.c      |   2 +-
>   Objects/iterobject.c       |  11 ++-
>   Objects/listobject.c       |   2 +-
>   14 files changed, 162 insertions(+), 83 deletions(-)
> 
> 
> diff --git a/Doc/c-api/object.rst b/Doc/c-api/object.rst
> --- a/Doc/c-api/object.rst
> +++ b/Doc/c-api/object.rst
> @@ -342,6 +342,13 @@
>     returned.  This is the equivalent to the Python expression ``len(o)``.
>  
>  
> +.. c:function:: Py_ssize_t PyObject_LengthHint(PyObject *o, Py_ssize_t default)
> +
> +   Return an estimated length for the object *o*. First trying to return its
> +   actual length, then an estimate using ``__length_hint__``, and finally
> +   returning the default value. On error ``-1`` is returned. This is the
> +   equivalent to the Python expression ``operator.length_hint(o, default)``.

Needs a versionadded.

Since __length_hint__ is now official, it needs an entry in
Doc/reference/datamodel.rst (which you can link to here.)

>  .. c:function:: PyObject* PyObject_GetItem(PyObject *o, PyObject *key)
>  
>     Return element of *o* corresponding to the object *key* or *NULL* on failure.
> diff --git a/Doc/library/operator.rst b/Doc/library/operator.rst
> --- a/Doc/library/operator.rst
> +++ b/Doc/library/operator.rst
> @@ -235,6 +235,12 @@
>  
>  .. XXX: find a better, readable, example
>  
> +.. function:: length_hint(obj, default=0)
> +
> +   Return an estimated length for the object *o*. First trying to return its
> +   actual length, then an estimate using ``__length_hint__``, and finally
> +   returning the default value.

This one also needs versionadded and a link to __length_hint__.

>  The :mod:`operator` module also defines tools for generalized attribute and item
>  lookups.  These are useful for making fast field extractors as arguments for
>  :func:`map`, :func:`sorted`, :meth:`itertools.groupby`, or other functions that
> diff --git a/Include/abstract.h b/Include/abstract.h
> --- a/Include/abstract.h
> +++ b/Include/abstract.h
> @@ -403,9 +403,8 @@
>       PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o);
>  #define PyObject_Length PyObject_Size
>  
> -#ifndef Py_LIMITED_API
> -     PyAPI_FUNC(Py_ssize_t) _PyObject_LengthHint(PyObject *o, Py_ssize_t);
> -#endif
> +PyAPI_FUNC(int) _PyObject_HasLen(PyObject *o);
> +PyAPI_FUNC(Py_ssize_t) PyObject_LengthHint(PyObject *o, Py_ssize_t);

Not sure if new functions should be included in the limited API.  I seem to
recall some discussion about giving Py_LIMITED_API a numeric value for the
required API version?  PEP 384 is silent about it.

>         /*
>       Guess the size of object o using len(o) or o.__length_hint__().
> diff --git a/Lib/test/test_enumerate.py b/Lib/test/test_enumerate.py
> --- a/Lib/test/test_enumerate.py
> +++ b/Lib/test/test_enumerate.py
> @@ -1,4 +1,5 @@
>  import unittest
> +import operator
>  import sys
>  import pickle
>  
> @@ -168,15 +169,13 @@
>          x = range(1)
>          self.assertEqual(type(reversed(x)), type(iter(x)))
>  
> -    @support.cpython_only
>      def test_len(self):
>          # This is an implementation detail, not an interface requirement

If it's not cpython_only anymore, this comment should also vanish?

> -        from test.test_iterlen import len
>          for s in ('hello', tuple('hello'), list('hello'), range(5)):
> -            self.assertEqual(len(reversed(s)), len(s))
> +            self.assertEqual(operator.length_hint(reversed(s)), len(s))
>              r = reversed(s)
>              list(r)
> -            self.assertEqual(len(r), 0)
> +            self.assertEqual(operator.length_hint(r), 0)
>          class SeqWithWeirdLen:
>              called = False
>              def __len__(self):
> @@ -187,7 +186,7 @@
>              def __getitem__(self, index):
>                  return index
>          r = reversed(SeqWithWeirdLen())
> -        self.assertRaises(ZeroDivisionError, len, r)
> +        self.assertRaises(ZeroDivisionError, operator.length_hint, r)
>  
>  
>      def test_gc(self):
> diff --git a/Lib/test/test_iterlen.py b/Lib/test/test_iterlen.py
> --- a/Lib/test/test_iterlen.py
> +++ b/Lib/test/test_iterlen.py
> @@ -45,31 +45,21 @@
>  from test import support
>  from itertools import repeat
>  from collections import deque
> -from builtins import len as _len
> +from operator import length_hint

>  ## ------- Concrete Type Tests -------
>  
> @@ -92,10 +82,6 @@
>      def setUp(self):
>          self.it = repeat(None, n)
>  
> -    def test_no_len_for_infinite_repeat(self):
> -        # The repeat() object can also be infinite
> -        self.assertRaises(TypeError, len, repeat(None))

Why is this removed?  I can see it was duplicated in test_itertools (below),
but you removed both instances.


> diff --git a/Lib/test/test_itertools.py b/Lib/test/test_itertools.py
> --- a/Lib/test/test_itertools.py
> +++ b/Lib/test/test_itertools.py
> @@ -1723,9 +1723,8 @@
>  class LengthTransparency(unittest.TestCase):
>  
>      def test_repeat(self):
> -        from test.test_iterlen import len
> -        self.assertEqual(len(repeat(None, 50)), 50)
> -        self.assertRaises(TypeError, len, repeat(None))
> +        self.assertEqual(operator.length_hint(repeat(None, 50)), 50)
> +        self.assertEqual(operator.length_hint(repeat(None), 12), 12)
>  
>  class RegressionTests(unittest.TestCase):
>  
> diff --git a/Lib/test/test_operator.py b/Lib/test/test_operator.py
> --- a/Lib/test/test_operator.py
> +++ b/Lib/test/test_operator.py
> @@ -410,6 +410,31 @@
>          self.assertEqual(operator.__ixor__     (c, 5), "ixor")
>          self.assertEqual(operator.__iconcat__  (c, c), "iadd")
>  
> +    def test_length_hint(self):
> +        class X(object):
> +            def __init__(self, value):
> +                self.value = value
> +
> +            def __length_hint__(self):
> +                if type(self.value) is type:
> +                    raise self.value
> +                else:
> +                    return self.value
> +
> +        self.assertEqual(operator.length_hint([], 2), 0)
> +        self.assertEqual(operator.length_hint(iter([1, 2, 3])), 3)
> +
> +        self.assertEqual(operator.length_hint(X(2)), 2)
> +        self.assertEqual(operator.length_hint(X(NotImplemented), 4), 4)
> +        self.assertEqual(operator.length_hint(X(TypeError), 12), 12)
> +        with self.assertRaises(TypeError):
> +            operator.length_hint(X("abc"))
> +        with self.assertRaises(ValueError):
> +            operator.length_hint(X(-2))
> +        with self.assertRaises(LookupError):
> +            operator.length_hint(X(LookupError))
> +
> +
>  def test_main(verbose=None):
>      import sys
>      test_classes = (
> diff --git a/Lib/test/test_set.py b/Lib/test/test_set.py
> --- a/Lib/test/test_set.py
> +++ b/Lib/test/test_set.py
> @@ -848,8 +848,6 @@
>          for v in self.set:
>              self.assertIn(v, self.values)
>          setiter = iter(self.set)
> -        # note: __length_hint__ is an internal undocumented API,
> -        # don't rely on it in your own programs
>          self.assertEqual(setiter.__length_hint__(), len(self.set))
>  
>      def test_pickling(self):
> diff --git a/Modules/operator.c b/Modules/operator.c
> --- a/Modules/operator.c
> +++ b/Modules/operator.c
> @@ -208,6 +208,31 @@
>      return (result == 0);
>  }
>  
> +PyDoc_STRVAR(length_hint__doc__,
> +"length_hint(obj, default=0) -> int\n"
> +"Return an estimate of the number of items in obj.\n"
> +"This is useful for presizing containers when building from an\n"
> +"iterable.\n"
> +"\n"
> +"If the object supports len(), the result will be\n"
> +"exact. Otherwise, it may over- or under-estimate by an\n"
> +"arbitrary amount. The result will be an integer >= 0.");
> +
> +static PyObject *length_hint(PyObject *self, PyObject *args)
> +{
> +    PyObject *obj;
> +    Py_ssize_t defaultvalue = 0, res;
> +    if (!PyArg_ParseTuple(args, "O|n:length_hint", &obj, &defaultvalue)) {
> +        return NULL;
> +    }
> +    res = PyObject_LengthHint(obj, defaultvalue);
> +    if (res == -1 && PyErr_Occurred()) {
> +        return NULL;
> +    }
> +    return PyLong_FromSsize_t(res);
> +}
> +
> +
>  PyDoc_STRVAR(compare_digest__doc__,
>  "compare_digest(a, b) -> bool\n"
>  "\n"
> @@ -366,6 +391,8 @@
>  
>      {"_compare_digest", (PyCFunction)compare_digest, METH_VARARGS,
>       compare_digest__doc__},
> +     {"length_hint", (PyCFunction)length_hint, METH_VARARGS,
> +     length_hint__doc__},
>      {NULL,              NULL}           /* sentinel */
>  
>  };
> diff --git a/Objects/abstract.c b/Objects/abstract.c
> --- a/Objects/abstract.c
> +++ b/Objects/abstract.c
> @@ -64,49 +64,67 @@
>  }
>  #define PyObject_Length PyObject_Size
>  
> +int
> +_PyObject_HasLen(PyObject *o) {
> +    return (Py_TYPE(o)->tp_as_sequence && Py_TYPE(o)->tp_as_sequence->sq_length) ||
> +        (Py_TYPE(o)->tp_as_mapping && Py_TYPE(o)->tp_as_mapping->mp_length);
> +}
>  
>  /* The length hint function returns a non-negative value from o.__len__()
> -   or o.__length_hint__().  If those methods aren't found or return a negative
> -   value, then the defaultvalue is returned.  If one of the calls fails,
> -   this function returns -1.
> +   or o.__length_hint__().  If those methods aren't found.  If one of the calls
                                                          ^^^^^
Sentence incomplete.

> +   fails this function returns -1.
>  */
>  
>  Py_ssize_t
> -_PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)
> +PyObject_LengthHint(PyObject *o, Py_ssize_t defaultvalue)

Should we put "#define _PyObject_LengthHint PyObject_LengthHint" in some header
to make porting easier?

>  {
>      _Py_IDENTIFIER(__length_hint__);
> -    PyObject *ro, *hintmeth;
> -    Py_ssize_t rv;
> -
> -    /* try o.__len__() */
> -    rv = PyObject_Size(o);
> -    if (rv >= 0)
> -        return rv;
> -    if (PyErr_Occurred()) {
> -        if (!PyErr_ExceptionMatches(PyExc_TypeError))
> +    Py_ssize_t res = PyObject_Length(o);
> +    if (res < 0 && PyErr_Occurred()) {
> +        if (!PyErr_ExceptionMatches(PyExc_TypeError)) {
>              return -1;
> +        }
>          PyErr_Clear();
>      }
> -
> -    /* try o.__length_hint__() */
> -    hintmeth = _PyObject_LookupSpecial(o, &PyId___length_hint__);
> -    if (hintmeth == NULL) {
> -        if (PyErr_Occurred())
> +    else {
> +        return res;
> +    }
> +    PyObject *hint = _PyObject_LookupSpecial(o, &PyId___length_hint__);
> +    if (hint == NULL) {
> +        if (PyErr_Occurred()) {
>              return -1;
> -        else
> -            return defaultvalue;
> -    }
> -    ro = PyObject_CallFunctionObjArgs(hintmeth, NULL);
> -    Py_DECREF(hintmeth);
> -    if (ro == NULL) {
> -        if (!PyErr_ExceptionMatches(PyExc_TypeError))
> -            return -1;
> -        PyErr_Clear();
> +        }
>          return defaultvalue;
>      }
> -    rv = PyLong_Check(ro) ? PyLong_AsSsize_t(ro) : defaultvalue;
> -    Py_DECREF(ro);
> -    return rv;
> +    PyObject *result = PyObject_CallFunctionObjArgs(hint, NULL);
> +    Py_DECREF(hint);
> +    if (result == NULL) {
> +        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
> +            PyErr_Clear();
> +            return defaultvalue;
> +        }
> +        return -1;
> +    }
> +    else if (result == Py_NotImplemented) {
> +        Py_DECREF(result);
> +        return defaultvalue;
> +    }
> +    if (!PyLong_Check(result)) {
> +        PyErr_Format(PyExc_TypeError, "Length hint must be an integer, not %s",

We usually limit the string size here, e.g. "%.100s".

> +            Py_TYPE(result)->tp_name);
> +        Py_DECREF(result);
> +        return -1;
> +    }
> +    defaultvalue = PyLong_AsSsize_t(result);

Not sure the micro-optimization is worth the confusion of reassigning defaultvalue.

> +    Py_DECREF(result);
> +    if (defaultvalue < 0 && PyErr_Occurred()) {
> +        return -1;
> +    }
> +    if (defaultvalue < 0) {
> +        PyErr_Format(PyExc_ValueError, "__length_hint__() should return >= 0");

Exception message is inconsistent with above: "Length hint" vs "__length_hint__()".

> +        return -1;
> +    }
> +    return defaultvalue;
>  }

> diff --git a/Objects/iterobject.c b/Objects/iterobject.c
> --- a/Objects/iterobject.c
> +++ b/Objects/iterobject.c
> @@ -76,9 +76,14 @@
>      Py_ssize_t seqsize, len;
>  
>      if (it->it_seq) {
> -        seqsize = PySequence_Size(it->it_seq);
> -        if (seqsize == -1)
> -            return NULL;
> +        if (_PyObject_HasLen(it->it_seq)) {
> +            seqsize = PySequence_Size(it->it_seq);
> +            if (seqsize == -1)
> +                return NULL;
> +        }
> +        else {
> +            return Py_NotImplemented;

An INCREF is missing here, as discussed on IRC.

> +        }
>          len = seqsize - it->it_index;
>          if (len >= 0)
>              return PyLong_FromSsize_t(len);




More information about the Python-Dev mailing list