[Cython] Cython 0.16 RC 1

mark florisson markflorisson88 at gmail.com
Sat Apr 14 23:22:50 CEST 2012


On 14 April 2012 22:21, mark florisson <markflorisson88 at gmail.com> wrote:
> On 14 April 2012 22:13, Wes McKinney <wesmckinn at gmail.com> wrote:
>> On Sat, Apr 14, 2012 at 11:32 AM, mark florisson
>> <markflorisson88 at gmail.com> wrote:
>>> On 14 April 2012 14:57, Dag Sverre Seljebotn <d.s.seljebotn at astro.uio.no> wrote:
>>>> On 04/14/2012 12:46 PM, mark florisson wrote:
>>>>>
>>>>> On 12 April 2012 22:00, Wes McKinney<wesmckinn at gmail.com>  wrote:
>>>>>>
>>>>>> On Thu, Apr 12, 2012 at 10:38 AM, mark florisson
>>>>>> <markflorisson88 at gmail.com>  wrote:
>>>>>>>
>>>>>>> Yet another release candidate, this will hopefully be the last before
>>>>>>> the 0.16 release. You can grab it from here:
>>>>>>> http://wiki.cython.org/ReleaseNotes-0.16
>>>>>>>
>>>>>>> There were several fixes for the numpy attribute rewrite, memoryviews
>>>>>>> and fused types. Accessing the 'base' attribute of a typed ndarray now
>>>>>>> goes through the object layer, which means direct assignment is no
>>>>>>> longer supported.
>>>>>>>
>>>>>>> If there are any problems, please let us know.
>>>>>>> _______________________________________________
>>>>>>> cython-devel mailing list
>>>>>>> cython-devel at python.org
>>>>>>> http://mail.python.org/mailman/listinfo/cython-devel
>>>>>>
>>>>>>
>>>>>> I'm unable to build pandas using git master Cython. I just released
>>>>>> pandas 0.7.3 today which has no issues at all with 0.15.1:
>>>>>>
>>>>>> http://pypi.python.org/pypi/pandas
>>>>>>
>>>>>> For example:
>>>>>>
>>>>>> 16:57 ~/code/pandas  (master)$ python setup.py build_ext --inplace
>>>>>> running build_ext
>>>>>> cythoning pandas/src/tseries.pyx to pandas/src/tseries.c
>>>>>>
>>>>>> Error compiling Cython file:
>>>>>> ------------------------------------------------------------
>>>>>> ...
>>>>>>        self.store = {}
>>>>>>
>>>>>>        ptr =<int32_t**>  malloc(self.depth * sizeof(int32_t*))
>>>>>>
>>>>>>        for i in range(self.depth):
>>>>>>            ptr[i] =<int32_t*>  (<ndarray>  label_arrays[i]).data
>>>>>>                                                          ^
>>>>>> ------------------------------------------------------------
>>>>>>
>>>>>> pandas/src/tseries.pyx:107:59: Compiler crash in
>>>>>> AnalyseExpressionsTransform
>>>>>>
>>>>>> ModuleNode.body = StatListNode(tseries.pyx:1:0)
>>>>>> StatListNode.stats[23] = StatListNode(tseries.pyx:86:5)
>>>>>> StatListNode.stats[0] = CClassDefNode(tseries.pyx:86:5,
>>>>>>    as_name = u'MultiMap',
>>>>>>    class_name = u'MultiMap',
>>>>>>    doc = u'\n    Need to come up with a better data structure for
>>>>>> multi-level indexing\n    ',
>>>>>>    module_name = u'',
>>>>>>    visibility = u'private')
>>>>>> CClassDefNode.body = StatListNode(tseries.pyx:91:4)
>>>>>> StatListNode.stats[1] = StatListNode(tseries.pyx:95:4)
>>>>>> StatListNode.stats[0] = DefNode(tseries.pyx:95:4,
>>>>>>    modifiers = [...]/0,
>>>>>>    name = u'__init__',
>>>>>>    num_required_args = 2,
>>>>>>    py_wrapper_required = True,
>>>>>>    reqd_kw_flags_cname = '0',
>>>>>>    used = True)
>>>>>> File 'Nodes.py', line 342, in analyse_expressions:
>>>>>> StatListNode(tseries.pyx:96:8)
>>>>>> File 'Nodes.py', line 342, in analyse_expressions:
>>>>>> StatListNode(tseries.pyx:106:8)
>>>>>> File 'Nodes.py', line 5903, in analyse_expressions:
>>>>>> ForInStatNode(tseries.pyx:106:8)
>>>>>> File 'Nodes.py', line 342, in analyse_expressions:
>>>>>> StatListNode(tseries.pyx:107:21)
>>>>>> File 'Nodes.py', line 4767, in analyse_expressions:
>>>>>> SingleAssignmentNode(tseries.pyx:107:21)
>>>>>> File 'Nodes.py', line 4872, in analyse_types:
>>>>>> SingleAssignmentNode(tseries.pyx:107:21)
>>>>>> File 'ExprNodes.py', line 7082, in analyse_types:
>>>>>> TypecastNode(tseries.pyx:107:21,
>>>>>>    result_is_used = True,
>>>>>>    use_managed_ref = True)
>>>>>> File 'ExprNodes.py', line 4274, in analyse_types:
>>>>>> AttributeNode(tseries.pyx:107:59,
>>>>>>    attribute = u'data',
>>>>>>    initialized_check = True,
>>>>>>    is_attribute = 1,
>>>>>>    member = u'data',
>>>>>>    needs_none_check = True,
>>>>>>    op = '->',
>>>>>>    result_is_used = True,
>>>>>>    use_managed_ref = True)
>>>>>> File 'ExprNodes.py', line 4360, in analyse_as_ordinary_attribute:
>>>>>> AttributeNode(tseries.pyx:107:59,
>>>>>>    attribute = u'data',
>>>>>>    initialized_check = True,
>>>>>>    is_attribute = 1,
>>>>>>    member = u'data',
>>>>>>    needs_none_check = True,
>>>>>>    op = '->',
>>>>>>    result_is_used = True,
>>>>>>    use_managed_ref = True)
>>>>>> File 'ExprNodes.py', line 4436, in analyse_attribute:
>>>>>> AttributeNode(tseries.pyx:107:59,
>>>>>>    attribute = u'data',
>>>>>>    initialized_check = True,
>>>>>>    is_attribute = 1,
>>>>>>    member = u'data',
>>>>>>    needs_none_check = True,
>>>>>>    op = '->',
>>>>>>    result_is_used = True,
>>>>>>    use_managed_ref = True)
>>>>>>
>>>>>> Compiler crash traceback from this point on:
>>>>>>  File "/home/wesm/code/repos/cython/Cython/Compiler/ExprNodes.py",
>>>>>> line 4436, in analyse_attribute
>>>>>>    replacement_node = numpy_transform_attribute_node(self)
>>>>>>  File "/home/wesm/code/repos/cython/Cython/Compiler/NumpySupport.py",
>>>>>> line 18, in numpy_transform_attribute_node
>>>>>>    numpy_pxd_scope = node.obj.entry.type.scope.parent_scope
>>>>>> AttributeError: 'TypecastNode' object has no attribute 'entry'
>>>>>> building 'pandas._tseries' extension
>>>>>> creating build
>>>>>> creating build/temp.linux-x86_64-2.7
>>>>>> creating build/temp.linux-x86_64-2.7/pandas
>>>>>> creating build/temp.linux-x86_64-2.7/pandas/src
>>>>>> gcc -pthread -fno-strict-aliasing -g -O2 -DNDEBUG -O2 -fPIC
>>>>>> -I/home/wesm/epd/lib/python2.7/site-packages/numpy/core/include
>>>>>> -I/home/wesm/epd/include/python2.7 -c pandas/src/tseries.c -o
>>>>>> build/temp.linux-x86_64-2.7/pandas/src/tseries.o
>>>>>> pandas/src/tseries.c:1:2: error: #error Do not use this file, it is
>>>>>> the result of a failed Cython compilation.
>>>>>> error: command 'gcc' failed with exit status 1
>>>>>>
>>>>>>
>>>>>> -----
>>>>>>
>>>>>> I kludged this particular line in the pandas/timeseries branch so it
>>>>>> will build on git master Cython, but I was treated to dozens of
>>>>>> failures, errors, and finally a segfault in the middle of the test
>>>>>> suite. Suffice to say I'm not sure I would advise you to release the
>>>>>> library in its current state until all of this is resolved. Happy to
>>>>>> help however I can but I'm back to 0.15.1 for now.
>>>>>>
>>>>>> - Wes
>>>>>> _______________________________________________
>>>>>> cython-devel mailing list
>>>>>> cython-devel at python.org
>>>>>> http://mail.python.org/mailman/listinfo/cython-devel
>>>>>
>>>>>
>>>>> It seems that the numpy stopgap solution broke something in Pandas,
>>>>> I'm not sure what or how, but it leads to segfaults where code is
>>>>> trying to retrieve objects from a numpy array that are NULL. I tried
>>>>> disabling the numpy rewrites which unbreaks this with the cython
>>>>> release branch, so I think we should do another RC either with the
>>>>> attribute rewrite disabled or fixed.
>>>>>
>>>>> Dag, do you know what could have been broken by this fix that could
>>>>> lead to these results?
>>>>
>>>>
>>>> I can't imagine what causes a change like you say... one thing that could
>>>> cause a segfault is that technically we should now call import_array in
>>>> every module using numpy.pxd; while we don't do that. If a NumPy version is
>>>> used where PyArray_DATA or similar is not a macro, you would
>>>> segfault....that should be fixed...
>>>>
>>>> Dag
>>>>
>>>> _______________________________________________
>>>> cython-devel mailing list
>>>> cython-devel at python.org
>>>> http://mail.python.org/mailman/listinfo/cython-devel
>>>
>>> Yeah that makes sense, but the thing is that pandas is already calling
>>> import_array everywhere, and the function calls themselves work, it's
>>> the result that's NULL. Now this could be a bug in pandas, but seeing
>>> that pandas works fine without the stopgap solution (that is, it
>>> doesn't pass all the tests but at least it doesn't segfault), I think
>>> it's something funky on our side.
>>>
>>> So I suppose I'll disable the fix for 0.16, and we can try to fix it
>>> for the next release.
>>> _______________________________________________
>>> cython-devel mailing list
>>> cython-devel at python.org
>>> http://mail.python.org/mailman/listinfo/cython-devel
>>
>> Where is the bug in pandas / bad memory access? Maybe something I can
>> work around?
>> _______________________________________________
>> cython-devel mailing list
>> cython-devel at python.org
>> http://mail.python.org/mailman/listinfo/cython-devel
>
> It may have something to do with the Sliders, I'm not sure, but
> without looking carefully at them they look somewhat dangerous.
> Anyway, here is a traceback from the Cython debugger:
>
> #7  0x00000000080dd760 in <module>() at /home/mark/apps/bin/nosetests:8
>         8       load_entry_point('nose==1.1.2', 'console_scripts',
> 'nosetests')()
> #18 0x00000000080dd760 in __init__() at
> /home/mark/apps/lib/python2.7/site-packages/nose/core.py:118
>       118                **extra_args)
> #25 0x00000000080dd760 in __init__() at
> /home/mark/apps/lib/python2.7/unittest/main.py:95
>        95            self.runTests()
> #28 0x00000000080dd760 in runTests() at
> /home/mark/apps/lib/python2.7/site-packages/nose/core.py:197
>       197            result = self.testRunner.run(self.test)
> #31 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/core.py:61
>        61            test(result)
> #41 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:176
>       176            return self.run(*arg, **kw)
> #46 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:223
>       223                    test(orig)
> #56 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/unittest/suite.py:65
>        65            return self.run(*args, **kwds)
> #61 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:74
>        74                test(result)
> #71 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:176
>       176            return self.run(*arg, **kw)
> #76 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:223
>       223                    test(orig)
> #86 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:176
>       176            return self.run(*arg, **kw)
> #91 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:223
>       223                    test(orig)
> #101 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:176
>       176            return self.run(*arg, **kw)
> #106 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:223
>       223                    test(orig)
> #116 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:176
>       176            return self.run(*arg, **kw)
> #121 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/suite.py:223
>       223                    test(orig)
> #131 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/site-packages/nose/case.py:45
>        45            return self.run(*arg, **kwarg)
> #136 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/site-packages/nose/case.py:133
>       133                    self.runTest(result)
> #139 0x00000000080dd760 in runTest() at
> /home/mark/apps/lib/python2.7/site-packages/nose/case.py:151
>       151            test(result)
> #149 0x00000000080dd760 in __call__() at
> /home/mark/apps/lib/python2.7/unittest/case.py:376
>       376            return self.run(*args, **kwds)
> #154 0x00000000080dd760 in run() at
> /home/mark/apps/lib/python2.7/unittest/case.py:318
>       318                        testMethod()
> #157 0x00000000080dd760 in test_as_index_series_return_frame() at
> /home/mark/code/pandas/pandas/tests/test_groupby.py:710
>       710            expected = grouped.agg(np.sum).ix[:, ['A', 'C']]
> #161 0x00000000080dd760 in agg() at
> /home/mark/code/pandas/pandas/core/groupby.py:282
>       282            return self.aggregate(func, *args, **kwargs)
> #166 0x00000000080dd760 in aggregate() at
> /home/mark/code/pandas/pandas/core/groupby.py:1050
>      1050                    result = self._aggregate_generic(arg,
> *args---Type <return> to continue, or q <return> to quit---
> ;49;00m, **kwargs)
> #171 0x00000000080dd760 in _aggregate_generic() at
> /home/mark/code/pandas/pandas/core/groupby.py:1103
>      1103                    return
> self._aggregate_item_by_item(func, *args, **kwargs)
> #176 0x00000000080dd760 in _aggregate_item_by_item() at
> /home/mark/code/pandas/pandas/core/groupby.py:1137
>      1137                    result[item] = colg.agg(func, *args, **kwargs)
> #181 0x00000000080dd760 in agg() at
> /home/mark/code/pandas/pandas/core/groupby.py:282
>       282            return self.aggregate(func, *args, **kwargs)
> #186 0x00000000080dd760 in aggregate() at
> /home/mark/code/pandas/pandas/core/groupby.py:795
>       795                    return
> self._python_agg_general(func_or_funcs, *args, **kwargs)
> #191 0x00000000080dd760 in _python_agg_general() at
> /home/mark/code/pandas/pandas/core/groupby.py:370
>       370
> comp_ids, max_group)
> #194 0x00000000080dd760 in _aggregate_series() at
> /home/mark/code/pandas/pandas/core/groupby.py:421
>       421                return self._aggregate_series_fast(obj,
> func, group_index, ngroups)
> #197 0x00000000080dd760 in _aggregate_series_fast() at
> /home/mark/code/pandas/pandas/core/groupby.py:437
>       437            result, counts = grouper.get_result()
> #199 0x000000000091880e in get_result() at
> /home/mark/code/pandas/pandas/src/tseries.pyx:127
>       127                else:
> #204 0x00000000080dd760 in <lambda>() at
> /home/mark/code/pandas/pandas/core/groupby.py:361
>       361            agg_func = lambda x: func(x, *args, **kwargs)
> #209 0x00000000080dd760 in sum() at
> /home/mark/apps/lib/python2.7/site-packages/numpy/core/fromnumeric.py:1455
>      1455        return sum(axis, dtype, out)
> #213 0x00000000080dd760 in sum() at
> /home/mark/code/pandas/pandas/core/series.py:862
>       862            return nanops.nansum(self.values, skipna=skipna)
> #217 0x00000000080dd760 in f() at
> /home/mark/code/pandas/pandas/core/nanops.py:28
>        28                    result = alt(values, axis=axis,
> skipna=skipna, **kwargs)
> #222 0x00000000080dd760 in _nansum() at
> /home/mark/code/pandas/pandas/core/nanops.py:48
>        48        mask = isnull(values)
> #225 0x00000000080dd760 in isnull() at
> /home/mark/code/pandas/pandas/core/common.py:60
>        60                vec = lib.isnullobj(obj.ravel())
> #227 0x000000000088efe0 in isnullobj() at
> /home/mark/code/pandas/pandas/src/tseries.pyx:224
>       224    cpdef checknull(object val):
>
> Actually that last line is wrong, as the debugger is confused by
> Cython's 'include' statement (that has to be fixed as well at some
> point :). The error occurs on line 240 in isnullobj on the statement
> 'val = arr[i]', because arr[i] is a NULL PyObject *, so the incref
> fails.
>
> If you have any idea why the stopgap solution results in different
> behaviour, please let us know.

(The get_result() is actually from reduce.pyx, not from tseries.pyx,
but again the debugger is confused by the include of reduce.pyx).


More information about the cython-devel mailing list