From numpy-svn at scipy.org Thu Jan 1 02:56:54 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 1 Jan 2009 01:56:54 -0600 (CST) Subject: [Numpy-svn] r6286 - trunk/numpy/distutils Message-ID: <20090101075654.1D45BC8410D@scipy.org> Author: jarrod.millman Date: 2009-01-01 01:56:53 -0600 (Thu, 01 Jan 2009) New Revision: 6286 Modified: trunk/numpy/distutils/system_info.py Log: add default include dir for Fedora/Red Hat (see SciPy ticket 817) Modified: trunk/numpy/distutils/system_info.py =================================================================== --- trunk/numpy/distutils/system_info.py 2008-12-31 23:46:34 UTC (rev 6285) +++ trunk/numpy/distutils/system_info.py 2009-01-01 07:56:53 UTC (rev 6286) @@ -141,7 +141,8 @@ '/opt/local/lib', '/sw/lib'] default_include_dirs = ['/usr/local/include', '/opt/include', '/usr/include', - '/opt/local/include', '/sw/include'] + '/opt/local/include', '/sw/include', + '/usr/include/suitesparse'] default_src_dirs = ['.','/usr/local/src', '/opt/src','/sw/src'] try: From numpy-svn at scipy.org Thu Jan 1 03:13:27 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 1 Jan 2009 02:13:27 -0600 (CST) Subject: [Numpy-svn] r6287 - branches/1.2.x/numpy/distutils Message-ID: <20090101081327.4270DC8410D@scipy.org> Author: jarrod.millman Date: 2009-01-01 02:13:26 -0600 (Thu, 01 Jan 2009) New Revision: 6287 Modified: branches/1.2.x/numpy/distutils/system_info.py Log: Backported default include dir for Fedora/Red Hat (see scipy ticket 817) (r6286) Modified: branches/1.2.x/numpy/distutils/system_info.py =================================================================== --- branches/1.2.x/numpy/distutils/system_info.py 2009-01-01 07:56:53 UTC (rev 6286) +++ branches/1.2.x/numpy/distutils/system_info.py 2009-01-01 08:13:26 UTC (rev 6287) @@ -141,7 +141,8 @@ '/opt/local/lib', '/sw/lib'] default_include_dirs = ['/usr/local/include', '/opt/include', '/usr/include', - '/opt/local/include', '/sw/include'] + '/opt/local/include', '/sw/include', + '/usr/include/suitesparse'] default_src_dirs = ['.','/usr/local/src', '/opt/src','/sw/src'] try: From numpy-svn at scipy.org Thu Jan 1 03:15:30 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 1 Jan 2009 02:15:30 -0600 (CST) Subject: [Numpy-svn] r6288 - branches/1.1.x/numpy/distutils Message-ID: <20090101081530.F3114C8410D@scipy.org> Author: jarrod.millman Date: 2009-01-01 02:15:29 -0600 (Thu, 01 Jan 2009) New Revision: 6288 Modified: branches/1.1.x/numpy/distutils/system_info.py Log: Backported default include dir for Fedora/Red Hat (see scipy ticket 817) (r6286) Modified: branches/1.1.x/numpy/distutils/system_info.py =================================================================== --- branches/1.1.x/numpy/distutils/system_info.py 2009-01-01 08:13:26 UTC (rev 6287) +++ branches/1.1.x/numpy/distutils/system_info.py 2009-01-01 08:15:29 UTC (rev 6288) @@ -141,7 +141,8 @@ '/opt/local/lib', '/sw/lib'] default_include_dirs = ['/usr/local/include', '/opt/include', '/usr/include', - '/opt/local/include', '/sw/include'] + '/opt/local/include', '/sw/include', + '/usr/include/suitesparse'] default_src_dirs = ['.','/usr/local/src', '/opt/src','/sw/src'] try: From numpy-svn at scipy.org Thu Jan 1 04:28:03 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 1 Jan 2009 03:28:03 -0600 (CST) Subject: [Numpy-svn] r6289 - branches/fix_float_format/numpy/core/src Message-ID: <20090101092803.63AAFC8410D@scipy.org> Author: cdavid Date: 2009-01-01 03:26:43 -0600 (Thu, 01 Jan 2009) New Revision: 6289 Modified: branches/fix_float_format/numpy/core/src/npy_format.c Log: Remove some tabs. Modified: branches/fix_float_format/numpy/core/src/npy_format.c =================================================================== --- branches/fix_float_format/numpy/core/src/npy_format.c 2009-01-01 08:15:29 UTC (rev 6288) +++ branches/fix_float_format/numpy/core/src/npy_format.c 2009-01-01 09:26:43 UTC (rev 6289) @@ -51,7 +51,7 @@ if we can delete some of the leading zeros */ if (significant_digit_cnt < MIN_EXPONENT_DIGITS) significant_digit_cnt = MIN_EXPONENT_DIGITS; - + extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt; /* Delete extra_zeros_cnt worth of characters from the @@ -225,14 +225,14 @@ /* * NumPyOS_ascii_format*: - * - buffer: A buffer to place the resulting string in - * - buf_size: The length of the buffer. - * - format: The printf()-style format to use for the code to use for - * converting. - * - value: The value to convert - * - decimal: if != 0, always has a decimal, and at leasat one digit after - * the decimal. This has the same effect as passing 'Z' in the origianl - * PyOS_ascii_formatd + * - buffer: A buffer to place the resulting string in + * - buf_size: The length of the buffer. + * - format: The printf()-style format to use for the code to use for + * converting. + * - value: The value to convert + * - decimal: if != 0, always has a decimal, and at leasat one digit after + * the decimal. This has the same effect as passing 'Z' in the origianl + * PyOS_ascii_formatd * * This is similar to PyOS_ascii_formatd in python > 2.6, except that it does * not handle 'n', and handles nan / inf. @@ -354,8 +354,8 @@ /* * NumPyOS_ascii_ftolf: - * * fp: FILE pointer - * * value: Place to store the value read + * * fp: FILE pointer + * * value: Place to store the value read * * Similar to PyOS_ascii_strtod, except that it reads input from a file. * From numpy-svn at scipy.org Thu Jan 1 04:29:50 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 1 Jan 2009 03:29:50 -0600 (CST) Subject: [Numpy-svn] r6290 - branches/fix_float_format/numpy/core/src Message-ID: <20090101092950.3855CC8410D@scipy.org> Author: cdavid Date: 2009-01-01 03:28:16 -0600 (Thu, 01 Jan 2009) New Revision: 6290 Modified: branches/fix_float_format/numpy/core/src/npy_format.c Log: Do not use strncpy but strcpy: we know the string sizes, and strncpy sucks anyway. Modified: branches/fix_float_format/numpy/core/src/npy_format.c =================================================================== --- branches/fix_float_format/numpy/core/src/npy_format.c 2009-01-01 09:26:43 UTC (rev 6289) +++ branches/fix_float_format/numpy/core/src/npy_format.c 2009-01-01 09:28:16 UTC (rev 6290) @@ -260,20 +260,20 @@ if (buf_size < 4) { \ return NULL; \ } \ - strncpy(buffer, "nan", 4); \ + strcpy(buffer, "nan"); \ } \ else { \ if (signbit(val)) { \ if (buf_size < 5) { \ return NULL; \ } \ - strncpy(buffer, "-inf", 5); \ + strcpy(buffer, "-inf"); \ } \ else { \ if (buf_size < 4) { \ return NULL; \ } \ - strncpy(buffer, "inf", 4); \ + strcpy(buffer, "inf"); \ } \ } \ return buffer; \ From numpy-svn at scipy.org Sun Jan 4 05:57:45 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Sun, 4 Jan 2009 04:57:45 -0600 (CST) Subject: [Numpy-svn] r6291 - trunk/numpy/core/code_generators Message-ID: <20090104105745.A32B3C7C029@scipy.org> Author: cdavid Date: 2009-01-04 04:57:39 -0600 (Sun, 04 Jan 2009) New Revision: 6291 Modified: trunk/numpy/core/code_generators/genapi.py Log: Do not import md5 on python >= 2.6; use hashlib instead. Modified: trunk/numpy/core/code_generators/genapi.py =================================================================== --- trunk/numpy/core/code_generators/genapi.py 2009-01-01 09:28:16 UTC (rev 6290) +++ trunk/numpy/core/code_generators/genapi.py 2009-01-04 10:57:39 UTC (rev 6291) @@ -6,9 +6,15 @@ specified. """ import sys, os, re -import md5 import textwrap +_PY_MAJ, _PY_MIN = sys.version_info[:2] +# md5 is deprecated from python 2.6 +if _PY_MAJ == 2 and _PY_MIN < 6: + import md5 +else: + from hashlib import md5 + __docformat__ = 'restructuredtext' # The files under src/ that are scanned for API functions From numpy-svn at scipy.org Sun Jan 4 06:08:21 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Sun, 4 Jan 2009 05:08:21 -0600 (CST) Subject: [Numpy-svn] r6292 - trunk/numpy/distutils Message-ID: <20090104110821.AB6FEC7C029@scipy.org> Author: cdavid Date: 2009-01-04 05:08:16 -0600 (Sun, 04 Jan 2009) New Revision: 6292 Modified: trunk/numpy/distutils/lib2def.py trunk/numpy/distutils/mingw32ccompiler.py trunk/numpy/distutils/misc_util.py Log: Do not use popen* but subprocess.Popen instead. Modified: trunk/numpy/distutils/lib2def.py =================================================================== --- trunk/numpy/distutils/lib2def.py 2009-01-04 10:57:39 UTC (rev 6291) +++ trunk/numpy/distutils/lib2def.py 2009-01-04 11:08:16 UTC (rev 6292) @@ -1,6 +1,7 @@ import re import sys import os +import subprocess __doc__ = """This module generates a DEF file from the symbols in an MSVC-compiled DLL import library. It correctly discriminates between @@ -59,13 +60,13 @@ deffile = None return libfile, deffile -def getnm(nm_cmd = 'nm -Cs python%s.lib' % py_ver): +def getnm(nm_cmd = ['nm', '-Cs', 'python%s.lib' % py_ver]): """Returns the output of nm_cmd via a pipe. nm_output = getnam(nm_cmd = 'nm -Cs py_lib')""" - f = os.popen(nm_cmd) - nm_output = f.read() - f.close() + f = subprocess.Popen(nm_cmd, shell=True, stdout=subprocess.PIPE) + nm_output = f.stdout.read() + f.stdout.close() return nm_output def parse_nm(nm_output): @@ -107,7 +108,7 @@ deffile = sys.stdout else: deffile = open(deffile, 'w') - nm_cmd = '%s %s' % (DEFAULT_NM, libfile) + nm_cmd = [str(DEFAULT_NM), str(libfile)] nm_output = getnm(nm_cmd) dlist, flist = parse_nm(nm_output) output_def(dlist, flist, DEF_HEADER, deffile) Modified: trunk/numpy/distutils/mingw32ccompiler.py =================================================================== --- trunk/numpy/distutils/mingw32ccompiler.py 2009-01-04 10:57:39 UTC (rev 6291) +++ trunk/numpy/distutils/mingw32ccompiler.py 2009-01-04 11:08:16 UTC (rev 6292) @@ -9,6 +9,7 @@ """ import os +import subprocess import sys import log @@ -50,9 +51,10 @@ # get_versions methods regex if self.gcc_version is None: import re - out = os.popen('gcc -dumpversion','r') - out_string = out.read() - out.close() + p = subprocess.Popen(['gcc', '-dumpversion'], shell=True, + stdout=subprocess.PIPE) + out_string = p.stdout.read() + p.stdout.close() result = re.search('(\d+\.\d+)',out_string) if result: self.gcc_version = StrictVersion(result.group(1)) Modified: trunk/numpy/distutils/misc_util.py =================================================================== --- trunk/numpy/distutils/misc_util.py 2009-01-04 10:57:39 UTC (rev 6291) +++ trunk/numpy/distutils/misc_util.py 2009-01-04 11:08:16 UTC (rev 6292) @@ -6,6 +6,7 @@ import glob import atexit import tempfile +import subprocess try: set @@ -1340,7 +1341,10 @@ revision = None m = None try: - sin, sout = os.popen4('svnversion') + p = subprocess.Popen(['svnversion'], shell=True, + stdout=subprocess.PIPE, stderr=STDOUT, + close_fds=True) + sout = p.stdout m = re.match(r'(?P\d+)', sout.read()) except: pass From numpy-svn at scipy.org Sun Jan 4 07:03:32 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Sun, 4 Jan 2009 06:03:32 -0600 (CST) Subject: [Numpy-svn] r6293 - trunk/numpy/core/code_generators Message-ID: <20090104120332.E1DDCC7C029@scipy.org> Author: cdavid Date: 2009-01-04 06:03:29 -0600 (Sun, 04 Jan 2009) New Revision: 6293 Modified: trunk/numpy/core/code_generators/genapi.py Log: Revert md5 change: hashlib.md5 is not a drop-in replacement for md5. Modified: trunk/numpy/core/code_generators/genapi.py =================================================================== --- trunk/numpy/core/code_generators/genapi.py 2009-01-04 11:08:16 UTC (rev 6292) +++ trunk/numpy/core/code_generators/genapi.py 2009-01-04 12:03:29 UTC (rev 6293) @@ -6,15 +6,9 @@ specified. """ import sys, os, re +import md5 import textwrap -_PY_MAJ, _PY_MIN = sys.version_info[:2] -# md5 is deprecated from python 2.6 -if _PY_MAJ == 2 and _PY_MIN < 6: - import md5 -else: - from hashlib import md5 - __docformat__ = 'restructuredtext' # The files under src/ that are scanned for API functions From numpy-svn at scipy.org Sun Jan 4 15:16:03 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Sun, 4 Jan 2009 14:16:03 -0600 (CST) Subject: [Numpy-svn] r6294 - in trunk/numpy/ma: . tests Message-ID: <20090104201603.54C7DC84112@scipy.org> Author: pierregm Date: 2009-01-04 14:16:00 -0600 (Sun, 04 Jan 2009) New Revision: 6294 Modified: trunk/numpy/ma/core.py trunk/numpy/ma/tests/test_core.py Log: * adapted default_fill_value for flexible datatype * fixed max/minimum_fill_value for flexible datatype Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-04 12:03:29 UTC (rev 6293) +++ trunk/numpy/ma/core.py 2009-01-04 20:16:00 UTC (rev 6294) @@ -152,7 +152,7 @@ """ if hasattr(obj,'dtype'): - defval = default_filler[obj.dtype.kind] + defval = _check_fill_value(None, obj.dtype) elif isinstance(obj, np.dtype): if obj.subdtype: defval = default_filler[obj.subdtype[0].kind] @@ -170,6 +170,18 @@ defval = default_filler['O'] return defval + +def _recursive_extremum_fill_value(ndtype, extremum): + names = ndtype.names + if names: + deflist = [] + for name in names: + fval = _recursive_extremum_fill_value(ndtype[name], extremum) + deflist.append(fval) + return tuple(deflist) + return extremum[ndtype] + + def minimum_fill_value(obj): """ Calculate the default fill value suitable for taking the minimum of ``obj``. @@ -177,11 +189,7 @@ """ errmsg = "Unsuitable type for calculating minimum." if hasattr(obj, 'dtype'): - objtype = obj.dtype - filler = min_filler[objtype] - if filler is None: - raise TypeError(errmsg) - return filler + return _recursive_extremum_fill_value(obj.dtype, min_filler) elif isinstance(obj, float): return min_filler[ntypes.typeDict['float_']] elif isinstance(obj, int): @@ -193,6 +201,7 @@ else: raise TypeError(errmsg) + def maximum_fill_value(obj): """ Calculate the default fill value suitable for taking the maximum of ``obj``. @@ -200,11 +209,7 @@ """ errmsg = "Unsuitable type for calculating maximum." if hasattr(obj, 'dtype'): - objtype = obj.dtype - filler = max_filler[objtype] - if filler is None: - raise TypeError(errmsg) - return filler + return _recursive_extremum_fill_value(obj.dtype, max_filler) elif isinstance(obj, float): return max_filler[ntypes.typeDict['float_']] elif isinstance(obj, int): @@ -257,7 +262,7 @@ if fields: descr = ndtype.descr fill_value = np.array(_recursive_set_default_fill_value(descr), - dtype=ndtype) + dtype=ndtype,) else: fill_value = default_fill_value(ndtype) elif fields: Modified: trunk/numpy/ma/tests/test_core.py =================================================================== --- trunk/numpy/ma/tests/test_core.py 2009-01-04 12:03:29 UTC (rev 6293) +++ trunk/numpy/ma/tests/test_core.py 2009-01-04 20:16:00 UTC (rev 6294) @@ -1074,6 +1074,29 @@ control = np.array((0,0,0), dtype="int, float, float").astype(ndtype) assert_equal(_check_fill_value(0, ndtype), control) + + def test_extremum_fill_value(self): + "Tests extremum fill values for flexible type." + a = array([(1, (2, 3)), (4, (5, 6))], + dtype=[('A', int), ('B', [('BA', int), ('BB', int)])]) + test = a.fill_value + assert_equal(test['A'], default_fill_value(a['A'])) + assert_equal(test['B']['BA'], default_fill_value(a['B']['BA'])) + assert_equal(test['B']['BB'], default_fill_value(a['B']['BB'])) + # + test = minimum_fill_value(a) + assert_equal(test[0], minimum_fill_value(a['A'])) + assert_equal(test[1][0], minimum_fill_value(a['B']['BA'])) + assert_equal(test[1][1], minimum_fill_value(a['B']['BB'])) + assert_equal(test[1], minimum_fill_value(a['B'])) + # + test = maximum_fill_value(a) + assert_equal(test[0], maximum_fill_value(a['A'])) + assert_equal(test[1][0], maximum_fill_value(a['B']['BA'])) + assert_equal(test[1][1], maximum_fill_value(a['B']['BB'])) + assert_equal(test[1], maximum_fill_value(a['B'])) + + #------------------------------------------------------------------------------ class TestUfuncs(TestCase): @@ -1820,6 +1843,28 @@ assert_equal(am, an) + def test_sort_flexible(self): + "Test sort on flexible dtype." + a = array([(3, 3), (3, 2), (2, 2), (2, 1), (1, 0), (1, 1), (1, 2)], + mask=[(0, 0), (0, 1), (0, 0), (0, 0), (1, 0), (0, 0), (0, 0)], + dtype=[('A', int), ('B', int)]) + # + test = sort(a) + b = array([(1, 1), (1, 2), (2, 1), (2, 2), (3, 3), (3, 2), (1, 0)], + mask=[(0, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (1, 0)], + dtype=[('A', int), ('B', int)]) + assert_equal(test, b) + assert_equal(test.mask, b.mask) + # + test = sort(a, endwith=False) + b = array([(1, 0), (1, 1), (1, 2), (2, 1), (2, 2), (3, 2), (3, 3),], + mask=[(1, 0), (0, 0), (0, 0), (0, 0), (0, 0), (0, 1), (0, 0),], + dtype=[('A', int), ('B', int)]) + assert_equal(test, b) + assert_equal(test.mask, b.mask) + # + + def test_squeeze(self): "Check squeeze" data = masked_array([[1,2,3]]) From numpy-svn at scipy.org Mon Jan 5 16:51:28 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 5 Jan 2009 15:51:28 -0600 (CST) Subject: [Numpy-svn] r6295 - trunk Message-ID: <20090105215128.E274EC7C010@scipy.org> Author: stefan Date: 2009-01-05 15:51:18 -0600 (Mon, 05 Jan 2009) New Revision: 6295 Modified: trunk/THANKS.txt Log: Credit more developers. Modified: trunk/THANKS.txt =================================================================== --- trunk/THANKS.txt 2009-01-04 20:16:00 UTC (rev 6294) +++ trunk/THANKS.txt 2009-01-05 21:51:18 UTC (rev 6295) @@ -45,9 +45,18 @@ Pierre Gerard-Marchant for rewriting masked array functionality. Roberto de Almeida for the buffered array iterator. Alan McIntyre for updating the NumPy test framework to use nose, improve - the test coverage, and enhancing the test system documentation + the test coverage, and enhancing the test system documentation. +Joe Harrington for administering the 2008 Documentation Sprint. NumPy is based on the Numeric (Jim Hugunin, Paul Dubois, Konrad Hinsen, and David Ascher) and NumArray (Perry Greenfield, J Todd Miller, Rick White and Paul Barrett) projects. We thank them for paving the way ahead. + +Institutions +------------ + +Enthought for providing resources and finances for development of NumPy. +UC Berkeley for providing travel money and hosting numerous sprints. +The University of Central Florida for funding the 2008 Documentation Marathon. +The University of Stellenbosch for hosting the buildbot. From numpy-svn at scipy.org Mon Jan 5 17:52:23 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 5 Jan 2009 16:52:23 -0600 (CST) Subject: [Numpy-svn] r6296 - trunk/numpy/ma Message-ID: <20090105225223.08571C7C015@scipy.org> Author: pierregm Date: 2009-01-05 16:52:21 -0600 (Mon, 05 Jan 2009) New Revision: 6296 Modified: trunk/numpy/ma/core.py Log: *moved the printing templates out of MaskedArray.__repr__ Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-05 21:51:18 UTC (rev 6295) +++ trunk/numpy/ma/core.py 2009-01-05 22:52:21 UTC (rev 6296) @@ -1340,6 +1340,32 @@ np.putmask(curdata, curmask, printopt) return +_print_templates = dict(long = """\ +masked_%(name)s(data = + %(data)s, + %(nlen)s mask = + %(mask)s, + %(nlen)s fill_value = %(fill)s) +""", + short = """\ +masked_%(name)s(data = %(data)s, + %(nlen)s mask = %(mask)s, +%(nlen)s fill_value = %(fill)s) +""", + long_flx = """\ +masked_%(name)s(data = + %(data)s, + %(nlen)s mask = + %(mask)s, +%(nlen)s fill_value = %(fill)s, + %(nlen)s dtype = %(dtype)s) +""", + short_flx = """\ +masked_%(name)s(data = %(data)s, +%(nlen)s mask = %(mask)s, +%(nlen)s fill_value = %(fill)s, +%(nlen)s dtype = %(dtype)s) +""") #####-------------------------------------------------------------------------- #---- --- MaskedArray class --- @@ -2245,43 +2271,18 @@ """Literal string representation. """ - with_mask = """\ -masked_%(name)s(data = - %(data)s, - mask = - %(mask)s, - fill_value=%(fill)s) -""" - with_mask1 = """\ -masked_%(name)s(data = %(data)s, - mask = %(mask)s, - fill_value=%(fill)s) -""" - with_mask_flx = """\ -masked_%(name)s(data = - %(data)s, - mask = - %(mask)s, - fill_value=%(fill)s, - dtype=%(dtype)s) -""" - with_mask1_flx = """\ -masked_%(name)s(data = %(data)s, - mask = %(mask)s, - fill_value=%(fill)s - dtype=%(dtype)s) -""" n = len(self.shape) name = repr(self._data).split('(')[0] - parameters = dict(name=name, data=str(self), mask=str(self._mask), + parameters = dict(name=name, nlen=" "*len(name), + data=str(self), mask=str(self._mask), fill=str(self.fill_value), dtype=str(self.dtype)) if self.dtype.names: if n <= 1: - return with_mask1_flx % parameters - return with_mask_flx % parameters + return _print_templates['short_flx'] % parameters + return _print_templates['long_flx'] % parameters elif n <= 1: - return with_mask1 % parameters - return with_mask % parameters + return _print_templates['short'] % parameters + return _print_templates['long'] % parameters #............................................ def __add__(self, other): "Add other to self, and return a new masked array." From numpy-svn at scipy.org Tue Jan 6 05:09:40 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Tue, 6 Jan 2009 04:09:40 -0600 (CST) Subject: [Numpy-svn] r6297 - in trunk/numpy: core/tests lib/tests linalg/tests Message-ID: <20090106100940.6EB55C7C02F@scipy.org> Author: stefan Date: 2009-01-06 04:09:00 -0600 (Tue, 06 Jan 2009) New Revision: 6297 Modified: trunk/numpy/core/tests/test_numerictypes.py trunk/numpy/core/tests/test_unicode.py trunk/numpy/lib/tests/test_io.py trunk/numpy/linalg/tests/test_linalg.py Log: Use new-style classes with multiple-inheritance to address bug in IronPython. Modified: trunk/numpy/core/tests/test_numerictypes.py =================================================================== --- trunk/numpy/core/tests/test_numerictypes.py 2009-01-05 22:52:21 UTC (rev 6296) +++ trunk/numpy/core/tests/test_numerictypes.py 2009-01-06 10:09:00 UTC (rev 6297) @@ -97,7 +97,7 @@ # Creation tests ############################################################ -class create_zeros: +class create_zeros(object): """Check the creation of heterogeneous arrays zero-valued""" def test_zeros0D(self): @@ -140,7 +140,7 @@ _descr = Ndescr -class create_values: +class create_values(object): """Check the creation of heterogeneous arrays with values""" def test_tuple(self): @@ -200,7 +200,7 @@ # Reading tests ############################################################ -class read_values_plain: +class read_values_plain(object): """Check the reading of values in heterogeneous arrays (plain)""" def test_access_fields(self): @@ -232,7 +232,7 @@ multiple_rows = 1 _buffer = PbufferT -class read_values_nested: +class read_values_nested(object): """Check the reading of values in heterogeneous arrays (nested)""" Modified: trunk/numpy/core/tests/test_unicode.py =================================================================== --- trunk/numpy/core/tests/test_unicode.py 2009-01-05 22:52:21 UTC (rev 6296) +++ trunk/numpy/core/tests/test_unicode.py 2009-01-06 10:09:00 UTC (rev 6297) @@ -17,7 +17,7 @@ # Creation tests ############################################################ -class create_zeros: +class create_zeros(object): """Check the creation of zero-valued arrays""" def content_check(self, ua, ua_scalar, nbytes): @@ -69,7 +69,7 @@ ulen = 1009 -class create_values: +class create_values(object): """Check the creation of unicode arrays with values""" def content_check(self, ua, ua_scalar, nbytes): @@ -154,7 +154,7 @@ # Assignment tests ############################################################ -class assign_values: +class assign_values(object): """Check the assignment of unicode arrays with values""" def content_check(self, ua, ua_scalar, nbytes): Modified: trunk/numpy/lib/tests/test_io.py =================================================================== --- trunk/numpy/lib/tests/test_io.py 2009-01-05 22:52:21 UTC (rev 6296) +++ trunk/numpy/lib/tests/test_io.py 2009-01-06 10:09:00 UTC (rev 6297) @@ -8,7 +8,7 @@ MAJVER, MINVER = sys.version_info[:2] -class RoundtripTest: +class RoundtripTest(object): def roundtrip(self, save_func, *args, **kwargs): """ save_func : callable Modified: trunk/numpy/linalg/tests/test_linalg.py =================================================================== --- trunk/numpy/linalg/tests/test_linalg.py 2009-01-05 22:52:21 UTC (rev 6296) +++ trunk/numpy/linalg/tests/test_linalg.py 2009-01-06 10:09:00 UTC (rev 6297) @@ -202,7 +202,7 @@ assert_equal(matrix_power(A,2),A) -class HermitianTestCase: +class HermitianTestCase(object): def test_single(self): a = array([[1.,2.], [2.,1.]], dtype=single) self.do(a) From numpy-svn at scipy.org Tue Jan 6 15:35:40 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Tue, 6 Jan 2009 14:35:40 -0600 (CST) Subject: [Numpy-svn] r6298 - trunk/numpy/ma Message-ID: <20090106203540.2D47AC7C00B@scipy.org> Author: pierregm Date: 2009-01-06 14:35:37 -0600 (Tue, 06 Jan 2009) New Revision: 6298 Modified: trunk/numpy/ma/core.py Log: * Bugfix #961 Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-06 10:09:00 UTC (rev 6297) +++ trunk/numpy/ma/core.py 2009-01-06 20:35:37 UTC (rev 6298) @@ -1,5 +1,6 @@ # pylint: disable-msg=E1002 -"""MA: a facility for dealing with missing observations +""" +MA: a facility for dealing with missing observations MA is generally used as a numpy.array look-alike. by Paul F. Dubois. @@ -8,9 +9,9 @@ Adapted for numpy_core 2005 by Travis Oliphant and (mainly) Paul Dubois. -Subclassing of the base ndarray 2006 by Pierre Gerard-Marchant. -pgmdevlist_AT_gmail_DOT_com -Improvements suggested by Reggie Dugard (reggie_AT_merfinllc_DOT_com) +* Subclassing of the base ndarray 2006 by Pierre Gerard-Marchant + (pgmdevlist_AT_gmail_DOT_com) +* Improvements suggested by Reggie Dugard (reggie_AT_merfinllc_DOT_com) :author: Pierre Gerard-Marchant @@ -392,7 +393,7 @@ return rcls #####-------------------------------------------------------------------------- -def get_data(a, subok=True): +def getdata(a, subok=True): """ Return the `_data` part of `a` if `a` is a MaskedArray, or `a` itself. @@ -409,8 +410,8 @@ if not subok: return data.view(ndarray) return data +get_data = getdata -getdata = get_data def fix_invalid(a, mask=nomask, copy=True, fill_value=None): """ @@ -886,7 +887,7 @@ ndtype = np.dtype(ndtype) return np.dtype(_recursive_make_descr(ndtype, np.bool)) -def get_mask(a): +def getmask(a): """Return the mask of a, if any, or nomask. To get a full array of booleans of the same shape as a, use @@ -894,7 +895,7 @@ """ return getattr(a, '_mask', nomask) -getmask = get_mask +get_mask = getmask def getmaskarray(arr): """Return the mask of arr, if any, or a boolean array of the shape @@ -1470,32 +1471,32 @@ ---------- data : {var} Input data. - mask : {nomask, sequence} + mask : {nomask, sequence}, optional Mask. Must be convertible to an array of booleans with the same shape as data: True indicates a masked (eg., invalid) data. - dtype : dtype - Data type of the output. If None, the type of the data - argument is used. If dtype is not None and different from - data.dtype, a copy is performed. - copy : bool - Whether to copy the input data (True), or to use a - reference instead. Note: data are NOT copied by default. - subok : {True, boolean} + dtype : {dtype}, optional + Data type of the output. + If dtype is None, the type of the data argument (`data.dtype`) is used. + If dtype is not None and different from `data.dtype`, a copy is performed. + copy : {False, True}, optional + Whether to copy the input data (True), or to use a reference instead. + Note: data are NOT copied by default. + subok : {True, False}, optional Whether to return a subclass of MaskedArray (if possible) or a plain MaskedArray. - ndmin : {0, int} + ndmin : {0, int}, optional Minimum number of dimensions - fill_value : {var} - Value used to fill in the masked values when necessary. If - None, a default based on the datatype is used. - keep_mask : {True, boolean} + fill_value : {var}, optional + Value used to fill in the masked values when necessary. + If None, a default based on the datatype is used. + keep_mask : {True, boolean}, optional Whether to combine mask with the mask of the input data, if any (True), or to use only mask for the output (False). - hard_mask : {False, boolean} - Whether to use a hard mask or not. With a hard mask, - masked values cannot be unmasked. - shrink : {True, boolean} + hard_mask : {False, boolean}, optional + Whether to use a hard mask or not. + With a hard mask, masked values cannot be unmasked. + shrink : {True, boolean}, optional Whether to force compression of an empty mask. """ @@ -1509,10 +1510,12 @@ subok=True, ndmin=0, fill_value=None, keep_mask=True, hard_mask=None, flag=None, shrink=True, **options): - """Create a new masked array from scratch. + """ + Create a new masked array from scratch. - Note: you can also create an array with the .view(MaskedArray) - method. + Notes + ----- + A masked array can also be created by taking a .view(MaskedArray). """ if flag is not None: From numpy-svn at scipy.org Wed Jan 7 13:14:20 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Wed, 7 Jan 2009 12:14:20 -0600 (CST) Subject: [Numpy-svn] r6299 - in trunk/numpy/ma: . tests Message-ID: <20090107181420.0FF1DC7C072@scipy.org> Author: pierregm Date: 2009-01-07 12:14:12 -0600 (Wed, 07 Jan 2009) New Revision: 6299 Modified: trunk/numpy/ma/core.py trunk/numpy/ma/tests/test_core.py Log: * Fixed iadd/isub/imul when the base array has no mask but the other array does Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-06 20:35:37 UTC (rev 6298) +++ trunk/numpy/ma/core.py 2009-01-07 18:14:12 UTC (rev 6299) @@ -2331,7 +2331,9 @@ "Add other to self in-place." m = getmask(other) if self._mask is nomask: - self._mask = m + if m is not nomask and m.any(): + self._mask = make_mask_none(self.shape, self.dtype) + self._mask += m else: if m is not nomask: self._mask += m @@ -2342,7 +2344,9 @@ "Subtract other from self in-place." m = getmask(other) if self._mask is nomask: - self._mask = m + if m is not nomask and m.any(): + self._mask = make_mask_none(self.shape, self.dtype) + self._mask += m elif m is not nomask: self._mask += m ndarray.__isub__(self._data, np.where(self._mask, 0, getdata(other))) @@ -2352,7 +2356,9 @@ "Multiply self by other in-place." m = getmask(other) if self._mask is nomask: - self._mask = m + if m is not nomask and m.any(): + self._mask = make_mask_none(self.shape, self.dtype) + self._mask += m elif m is not nomask: self._mask += m ndarray.__imul__(self._data, np.where(self._mask, 1, getdata(other))) @@ -3701,7 +3707,7 @@ """ _data = ndarray.__new__(baseclass, baseshape, basetype) - _mask = ndarray.__new__(ndarray, baseshape, 'b1') + _mask = ndarray.__new__(ndarray, baseshape, make_mask_descr(basetype)) return subtype.__new__(subtype, _data, mask=_mask, dtype=basetype,) Modified: trunk/numpy/ma/tests/test_core.py =================================================================== --- trunk/numpy/ma/tests/test_core.py 2009-01-06 20:35:37 UTC (rev 6298) +++ trunk/numpy/ma/tests/test_core.py 2009-01-07 18:14:12 UTC (rev 6299) @@ -1400,6 +1400,51 @@ assert_equal(x.data, [1., 2.**2.5, 3]) assert_equal(x.mask, [0, 0, 1]) + + def test_datafriendly_add_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a += b + assert_equal(a, [[2, 2], [4, 4]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + # + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a += b + assert_equal(a, [[2, 2], [4, 4]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + + def test_datafriendly_sub_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a -= b + assert_equal(a, [[0, 0], [2, 2]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + # + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a -= b + assert_equal(a, [[0, 0], [2, 2]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + + + def test_datafriendly_mul_arrays(self): + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 0]) + a *= b + assert_equal(a, [[1, 1], [3, 3]]) + if a.mask is not nomask: + assert_equal(a.mask, [[0, 0], [0, 0]]) + # + a = array([[1, 1], [3, 3]]) + b = array([1, 1], mask=[0, 1]) + a *= b + assert_equal(a, [[1, 1], [3, 3]]) + assert_equal(a.mask, [[0, 1], [0, 1]]) + #------------------------------------------------------------------------------ class TestMaskedArrayMethods(TestCase): From numpy-svn at scipy.org Wed Jan 7 17:34:56 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Wed, 7 Jan 2009 16:34:56 -0600 (CST) Subject: [Numpy-svn] r6300 - in trunk/numpy/ma: . tests Message-ID: <20090107223456.E932CC7C055@scipy.org> Author: pierregm Date: 2009-01-07 16:34:51 -0600 (Wed, 07 Jan 2009) New Revision: 6300 Modified: trunk/numpy/ma/core.py trunk/numpy/ma/tests/test_core.py Log: * Renamed `torecords` to `toflex`, keeping `torecords` as an alias * Introduced `fromflex`, to reconstruct a masked_array from the output of `toflex` (can?\226?\128?\153t `use fromrecords` as it would clash with `numpy.ma.mrecords.fromrecords`) * Fixed a bug in MaskedBinaryOperation (#979) (wrong array broadcasting) Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-07 18:14:12 UTC (rev 6299) +++ trunk/numpy/ma/core.py 2009-01-07 22:34:51 UTC (rev 6300) @@ -34,7 +34,7 @@ 'default_fill_value', 'diag', 'diagonal', 'divide', 'dump', 'dumps', 'empty', 'empty_like', 'equal', 'exp', 'expand_dims', 'fabs', 'flatten_mask', 'fmod', 'filled', 'floor', 'floor_divide', - 'fix_invalid', 'frombuffer', 'fromfunction', + 'fix_invalid', 'frombuffer', 'fromflex', 'fromfunction', 'getdata','getmask', 'getmaskarray', 'greater', 'greater_equal', 'harden_mask', 'hypot', 'identity', 'ids', 'indices', 'inner', 'innerproduct', @@ -623,8 +623,8 @@ # Transforms to a (subclass of) MaskedArray if we don't have a scalar if result.shape: result = result.view(get_masked_subclass(a, b)) - result._mask = make_mask_none(result.shape) - result._mask.flat = m + if m.any(): + result._mask = mask_or(getmaskarray(a), getmaskarray(b)) if isinstance(a, MaskedArray): result._update_from(a) if isinstance(b, MaskedArray): @@ -3603,7 +3603,7 @@ def tofile(self, fid, sep="", format="%s"): raise NotImplementedError("Not implemented yet, sorry...") - def torecords(self): + def toflex(self): """ Transforms a MaskedArray into a flexible-type array with two fields: * the ``_data`` field stores the ``_data`` part of the array; @@ -3648,6 +3648,7 @@ record['_data'] = self._data record['_mask'] = self._mask return record + torecords = toflex #-------------------------------------------- # Pickling def __getstate__(self): @@ -4613,6 +4614,15 @@ raise NotImplementedError("Not yet implemented. Sorry") +def fromflex(fxarray): + """ + Rebuilds a masked_array from a flexible-type array output by the '.torecord' + array + """ + return masked_array(fxarray['_data'], mask=fxarray['_mask']) + + + class _convert2ma: """Convert functions from numpy to numpy.ma. Modified: trunk/numpy/ma/tests/test_core.py =================================================================== --- trunk/numpy/ma/tests/test_core.py 2009-01-07 18:14:12 UTC (rev 6299) +++ trunk/numpy/ma/tests/test_core.py 2009-01-07 22:34:51 UTC (rev 6300) @@ -549,6 +549,7 @@ assert_equal(np.multiply(x,y), multiply(xm, ym)) assert_equal(np.divide(x,y), divide(xm, ym)) + def test_divide_on_different_shapes(self): x = arange(6, dtype=float) x.shape = (2,3) @@ -567,6 +568,7 @@ assert_equal(z, [[-1.,-1.,-1.], [3.,4.,5.]]) assert_equal(z.mask, [[1,1,1],[0,0,0]]) + def test_mixed_arithmetic(self): "Tests mixed arithmetics." na = np.array([1]) @@ -581,6 +583,7 @@ assert_equal(getmaskarray(a/2), [0,0,0]) assert_equal(getmaskarray(2/a), [1,0,1]) + def test_masked_singleton_arithmetic(self): "Tests some scalar arithmetics on MaskedArrays." # Masked singleton should remain masked no matter what @@ -591,6 +594,7 @@ self.failUnless(maximum(xm, xm).mask) self.failUnless(minimum(xm, xm).mask) + def test_arithmetic_with_masked_singleton(self): "Checks that there's no collapsing to masked" x = masked_array([1,2]) @@ -603,6 +607,7 @@ assert_equal(y.shape, x.shape) assert_equal(y._mask, [True, True]) + def test_arithmetic_with_masked_singleton_on_1d_singleton(self): "Check that we're not losing the shape of a singleton" x = masked_array([1, ]) @@ -610,6 +615,7 @@ assert_equal(y.shape, x.shape) assert_equal(y.mask, [True, ]) + def test_scalar_arithmetic(self): x = array(0, mask=0) assert_equal(x.filled().ctypes.data, x.ctypes.data) @@ -618,6 +624,7 @@ assert_equal(xm.shape,(2,)) assert_equal(xm.mask,[1,1]) + def test_basic_ufuncs (self): "Test various functions such as sin, cos." (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d @@ -659,6 +666,7 @@ assert getmask(count(ott,0)) is nomask assert_equal([1,2],count(ott,0)) + def test_minmax_func (self): "Tests minimum and maximum." (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d @@ -682,6 +690,7 @@ x[-1,-1] = masked assert_equal(maximum(x), 2) + def test_minimummaximum_func(self): a = np.ones((2,2)) aminimum = minimum(a,a) @@ -700,6 +709,7 @@ self.failUnless(isinstance(amaximum, MaskedArray)) assert_equal(amaximum, np.maximum.outer(a,a)) + def test_minmax_funcs_with_output(self): "Tests the min/max functions with explicit outputs" mask = np.random.rand(12).round() @@ -745,7 +755,8 @@ self.failUnless(x.min() is masked) self.failUnless(x.max() is masked) self.failUnless(x.ptp() is masked) - #........................ + + def test_addsumprod (self): "Tests add, sum, product." (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d @@ -767,6 +778,44 @@ assert_equal(np.sum(x,1), sum(x,1)) assert_equal(np.product(x,1), product(x,1)) + + def test_binops_d2D(self): + "Test binary operations on 2D data" + a = array([[1.], [2.], [3.]], mask=[[False], [True], [True]]) + b = array([[2., 3.], [4., 5.], [6., 7.]]) + # + test = a * b + control = array([[2., 3.], [2., 2.], [3., 3.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = b * a + control = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + a = array([[1.], [2.], [3.]]) + b = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [0, 0], [0, 1]]) + test = a * b + control = array([[2, 3], [8, 10], [18, 3]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = b * a + control = array([[2, 3], [8, 10], [18, 7]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + def test_mod(self): "Tests mod" (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d @@ -779,7 +828,6 @@ assert_equal(test.mask, mask_or(mask_or(xm.mask, ym.mask), (ym == 0))) - def test_TakeTransposeInnerOuter(self): "Test of take, transpose, inner, outer products" x = arange(24) @@ -1983,15 +2031,15 @@ assert_equal(x.tolist(), [(1,1.1,'one'),(2,2.2,'two'),(None,None,None)]) - def test_torecords(self): + def test_toflex(self): "Test the conversion to records" data = arange(10) - record = data.torecords() + record = data.toflex() assert_equal(record['_data'], data._data) assert_equal(record['_mask'], data._mask) # data[[0,1,2,-1]] = masked - record = data.torecords() + record = data.toflex() assert_equal(record['_data'], data._data) assert_equal(record['_mask'], data._mask) # @@ -2001,7 +2049,7 @@ np.random.rand(10))], dtype=ndtype) data[[0,1,2,-1]] = masked - record = data.torecords() + record = data.toflex() assert_equal(record['_data'], data._data) assert_equal(record['_mask'], data._mask) # @@ -2011,10 +2059,29 @@ np.random.rand(10))], dtype=ndtype) data[[0,1,2,-1]] = masked - record = data.torecords() + record = data.toflex() assert_equal_records(record['_data'], data._data) assert_equal_records(record['_mask'], data._mask) + + def test_fromflex(self): + "Test the reconstruction of a masked_array from a record" + a = array([1, 2, 3]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.mask, a.mask) + # + a = array([1, 2, 3], mask=[0, 0, 1]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.mask, a.mask) + # + a = array([(1, 1.), (2, 2.), (3, 3.)], mask=[(1, 0), (0, 0), (0, 1)], + dtype=[('A', int), ('B', float)]) + test = fromflex(a.toflex()) + assert_equal(test, a) + assert_equal(test.data, a.data) + #------------------------------------------------------------------------------ From numpy-svn at scipy.org Thu Jan 8 04:19:09 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 03:19:09 -0600 (CST) Subject: [Numpy-svn] r6301 - trunk/numpy/f2py Message-ID: <20090108091909.3B997C7C020@scipy.org> Author: cdavid Date: 2009-01-08 03:19:00 -0600 (Thu, 08 Jan 2009) New Revision: 6301 Modified: trunk/numpy/f2py/cfuncs.py Log: Avoid putting things into stderr when errors occurs in f2py wrappers; put all the info in the python error string instead. Modified: trunk/numpy/f2py/cfuncs.py =================================================================== --- trunk/numpy/f2py/cfuncs.py 2009-01-07 22:34:51 UTC (rev 6300) +++ trunk/numpy/f2py/cfuncs.py 2009-01-08 09:19:00 UTC (rev 6301) @@ -472,15 +472,17 @@ cppmacros['CHECKSTRING']="""\ #define CHECKSTRING(check,tcheck,name,show,var)\\ \tif (!(check)) {\\ -\t\tPyErr_SetString(#modulename#_error,\"(\"tcheck\") failed for \"name);\\ -\t\tfprintf(stderr,show\"\\n\",slen(var),var);\\ +\t\tchar errstring[256];\\ +\t\tsprintf(errstring, \"%s: \"show, \"(\"tcheck\") failed for \"name, slen(var), var);\\ +\t\tPyErr_SetString(#modulename#_error, errstring);\\ \t\t/*goto capi_fail;*/\\ \t} else """ cppmacros['CHECKSCALAR']="""\ #define CHECKSCALAR(check,tcheck,name,show,var)\\ \tif (!(check)) {\\ -\t\tPyErr_SetString(#modulename#_error,\"(\"tcheck\") failed for \"name);\\ -\t\tfprintf(stderr,show\"\\n\",var);\\ +\t\tchar errstring[256];\\ +\t\tsprintf(errstring, \"%s: \"show, \"(\"tcheck\") failed for \"name, var);\\ +\t\tPyErr_SetString(#modulename#_error,errstring);\\ \t\t/*goto capi_fail;*/\\ \t} else """ ## cppmacros['CHECKDIMS']="""\ From numpy-svn at scipy.org Thu Jan 8 10:11:35 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 09:11:35 -0600 (CST) Subject: [Numpy-svn] r6302 - trunk/numpy/core/tests Message-ID: <20090108151135.6C9A5C7C00C@scipy.org> Author: cdavid Date: 2009-01-08 09:11:32 -0600 (Thu, 08 Jan 2009) New Revision: 6302 Modified: trunk/numpy/core/tests/test_print.py Log: Fix python 2.4 issue. Modified: trunk/numpy/core/tests/test_print.py =================================================================== --- trunk/numpy/core/tests/test_print.py 2009-01-08 09:19:00 UTC (rev 6301) +++ trunk/numpy/core/tests/test_print.py 2009-01-08 15:11:32 UTC (rev 6302) @@ -128,14 +128,15 @@ def has_french_locale(): curloc = locale.getlocale(locale.LC_NUMERIC) try: - if not sys.platform == 'win32': - locale.setlocale(locale.LC_NUMERIC, 'fr_FR') - else: - locale.setlocale(locale.LC_NUMERIC, 'FRENCH') + try: + if not sys.platform == 'win32': + locale.setlocale(locale.LC_NUMERIC, 'fr_FR') + else: + locale.setlocale(locale.LC_NUMERIC, 'FRENCH') - st = True - except: - st = False + st = True + except: + st = False finally: locale.setlocale(locale.LC_NUMERIC, locale=curloc) From numpy-svn at scipy.org Thu Jan 8 11:30:05 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 10:30:05 -0600 (CST) Subject: [Numpy-svn] r6303 - trunk/numpy/core/tests Message-ID: <20090108163005.4957DC7C00C@scipy.org> Author: chanley Date: 2009-01-08 10:30:01 -0600 (Thu, 08 Jan 2009) New Revision: 6303 Modified: trunk/numpy/core/tests/test_print.py Log: Fix test_print.py function _test_locale_independance() since str(1.2) does not use the LC_NUMERIC locale to convert numbers. Fix from Mark Sienkiewicz. Modified: trunk/numpy/core/tests/test_print.py =================================================================== --- trunk/numpy/core/tests/test_print.py 2009-01-08 15:11:32 UTC (rev 6302) +++ trunk/numpy/core/tests/test_print.py 2009-01-08 16:30:01 UTC (rev 6303) @@ -154,7 +154,7 @@ else: locale.setlocale(locale.LC_NUMERIC, 'FRENCH') - assert_equal(str(tp(1.2)), str(float(1.2)), + assert_equal(locale.format("%f",tp(1.2)), locale.format("%f",float(1.2)), err_msg='Failed locale test for type %s' % tp) finally: locale.setlocale(locale.LC_NUMERIC, locale=curloc) From numpy-svn at scipy.org Thu Jan 8 14:22:41 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 13:22:41 -0600 (CST) Subject: [Numpy-svn] r6304 - trunk/numpy/core/tests Message-ID: <20090108192241.3C546C7C009@scipy.org> Author: cdavid Date: 2009-01-08 13:22:21 -0600 (Thu, 08 Jan 2009) New Revision: 6304 Modified: trunk/numpy/core/tests/test_print.py Log: Revert buggy test fix for locale independecce. Modified: trunk/numpy/core/tests/test_print.py =================================================================== --- trunk/numpy/core/tests/test_print.py 2009-01-08 16:30:01 UTC (rev 6303) +++ trunk/numpy/core/tests/test_print.py 2009-01-08 19:22:21 UTC (rev 6304) @@ -154,7 +154,7 @@ else: locale.setlocale(locale.LC_NUMERIC, 'FRENCH') - assert_equal(locale.format("%f",tp(1.2)), locale.format("%f",float(1.2)), + assert_equal(str(tp(1.2)), str(float(1.2)), err_msg='Failed locale test for type %s' % tp) finally: locale.setlocale(locale.LC_NUMERIC, locale=curloc) From numpy-svn at scipy.org Thu Jan 8 15:02:31 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 14:02:31 -0600 (CST) Subject: [Numpy-svn] r6305 - in trunk/numpy/ma: . tests Message-ID: <20090108200231.61B09C7C009@scipy.org> Author: pierregm Date: 2009-01-08 14:02:29 -0600 (Thu, 08 Jan 2009) New Revision: 6305 Modified: trunk/numpy/ma/core.py trunk/numpy/ma/tests/test_core.py Log: * Add __eq__ and __ne__ for support of flexible arrays. * Fixed .filled for nested structures Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-08 19:22:21 UTC (rev 6304) +++ trunk/numpy/ma/core.py 2009-01-08 20:02:29 UTC (rev 6305) @@ -857,6 +857,7 @@ #####-------------------------------------------------------------------------- #---- --- Mask creation functions --- #####-------------------------------------------------------------------------- + def _recursive_make_descr(datatype, newtype=bool_): "Private function allowing recursion in make_descr." # Do we have some name fields ? @@ -1134,6 +1135,7 @@ result._mask = cond return result + def masked_greater(x, value, copy=True): """ Return the array `x` masked where (x > value). @@ -1142,22 +1144,27 @@ """ return masked_where(greater(x, value), x, copy=copy) + def masked_greater_equal(x, value, copy=True): "Shortcut to masked_where, with condition = (x >= value)." return masked_where(greater_equal(x, value), x, copy=copy) + def masked_less(x, value, copy=True): "Shortcut to masked_where, with condition = (x < value)." return masked_where(less(x, value), x, copy=copy) + def masked_less_equal(x, value, copy=True): "Shortcut to masked_where, with condition = (x <= value)." return masked_where(less_equal(x, value), x, copy=copy) + def masked_not_equal(x, value, copy=True): "Shortcut to masked_where, with condition = (x != value)." return masked_where(not_equal(x, value), x, copy=copy) + def masked_equal(x, value, copy=True): """ Shortcut to masked_where, with condition = (x == value). For @@ -1171,6 +1178,7 @@ # return array(d, mask=m, copy=copy) return masked_where(equal(x, value), x, copy=copy) + def masked_inside(x, v1, v2, copy=True): """ Shortcut to masked_where, where ``condition`` is True for x inside @@ -1188,6 +1196,7 @@ condition = (xf >= v1) & (xf <= v2) return masked_where(condition, x, copy=copy) + def masked_outside(x, v1, v2, copy=True): """ Shortcut to ``masked_where``, where ``condition`` is True for x outside @@ -1205,7 +1214,7 @@ condition = (xf < v1) | (xf > v2) return masked_where(condition, x, copy=copy) -# + def masked_object(x, value, copy=True, shrink=True): """ Mask the array `x` where the data are exactly equal to value. @@ -1234,6 +1243,7 @@ mask = mask_or(mask, make_mask(condition, shrink=shrink)) return masked_array(x, mask=mask, copy=copy, fill_value=value) + def masked_values(x, value, rtol=1.e-5, atol=1.e-8, copy=True, shrink=True): """ Mask the array x where the data are approximately equal in @@ -1271,6 +1281,7 @@ mask = mask_or(mask, make_mask(condition, shrink=shrink)) return masked_array(xnew, mask=mask, copy=copy, fill_value=value) + def masked_invalid(a, copy=True): """ Mask the array for invalid values (NaNs or infs). @@ -1292,6 +1303,7 @@ #####-------------------------------------------------------------------------- #---- --- Printing options --- #####-------------------------------------------------------------------------- + class _MaskedPrintOption: """ Handle the string used to represent missing data in a masked array. @@ -1372,6 +1384,20 @@ #---- --- MaskedArray class --- #####-------------------------------------------------------------------------- +def _recursive_filled(a, mask, fill_value): + """ + Recursively fill `a` with `fill_value`. + Private function + """ + names = a.dtype.names + for name in names: + current = a[name] + print "Name: %s : %s" % (name, current) + if current.dtype.names: + _recursive_filled(current, mask[name], fill_value[name]) + else: + np.putmask(current, mask[name], fill_value[name]) + #............................................................................... class _arraymethod(object): """ @@ -2013,6 +2039,7 @@ try: return _mask.view((bool_, len(self.dtype))).all(axis) except ValueError: + # In case we have nested fields... return np.all([[f[n].all() for n in _mask.dtype.names] for f in _mask], axis=axis) @@ -2106,6 +2133,7 @@ fill_value = property(fget=get_fill_value, fset=set_fill_value, doc="Filling value.") + def filled(self, fill_value=None): """Return a copy of self._data, where masked values are filled with fill_value. @@ -2140,9 +2168,10 @@ # if m.dtype.names: result = self._data.copy() - for n in result.dtype.names: - field = result[n] - np.putmask(field, self._mask[n], fill_value[n]) + _recursive_filled(result, self._mask, fill_value) +# for n in result.dtype.names: +# field = result[n] +# np.putmask(field, self._mask[n], fill_value[n]) elif not m.any(): return self._data else: @@ -2287,6 +2316,58 @@ return _print_templates['short'] % parameters return _print_templates['long'] % parameters #............................................ + def __eq__(self, other): + "Check whether other equals self elementwise" + omask = getattr(other, '_mask', nomask) + if omask is nomask: + check = ndarray.__eq__(self.filled(0), other).view(type(self)) + check._mask = self._mask + else: + odata = filled(other, 0) + check = ndarray.__eq__(self.filled(0), odata).view(type(self)) + if self._mask is nomask: + check._mask = omask + else: + mask = mask_or(self._mask, omask) + if mask.dtype.names: + if mask.size > 1: + axis = 1 + else: + axis = None + try: + mask = mask.view((bool_, len(self.dtype))).all(axis) + except ValueError: + mask = np.all([[f[n].all() for n in mask.dtype.names] + for f in mask], axis=axis) + check._mask = mask + return check + # + def __ne__(self, other): + "Check whether other doesn't equal self elementwise" + omask = getattr(other, '_mask', nomask) + if omask is nomask: + check = ndarray.__ne__(self.filled(0), other).view(type(self)) + check._mask = self._mask + else: + odata = filled(other, 0) + check = ndarray.__ne__(self.filled(0), odata).view(type(self)) + if self._mask is nomask: + check._mask = omask + else: + mask = mask_or(self._mask, omask) + if mask.dtype.names: + if mask.size > 1: + axis = 1 + else: + axis = None + try: + mask = mask.view((bool_, len(self.dtype))).all(axis) + except ValueError: + mask = np.all([[f[n].all() for n in mask.dtype.names] + for f in mask], axis=axis) + check._mask = mask + return check + # def __add__(self, other): "Add other to self, and return a new masked array." return add(self, other) Modified: trunk/numpy/ma/tests/test_core.py =================================================================== --- trunk/numpy/ma/tests/test_core.py 2009-01-08 19:22:21 UTC (rev 6304) +++ trunk/numpy/ma/tests/test_core.py 2009-01-08 20:02:29 UTC (rev 6305) @@ -474,6 +474,16 @@ np.array([(1, '1', 1.)], dtype=flexi.dtype)) + def test_filled_w_nested_dtype(self): + "Test filled w/ nested dtype" + ndtype = [('A', int), ('B', [('BA', int), ('BB', int)])] + a = array([(1, (1, 1)), (2, (2, 2))], + mask=[(0, (1, 0)), (0, (0, 1))], dtype=ndtype) + test = a.filled(0) + control = np.array([(1, (0, 1)), (2, (2, 0))], dtype=ndtype) + assert_equal(test, control) + + def test_optinfo_propagation(self): "Checks that _optinfo dictionary isn't back-propagated" x = array([1,2,3,], dtype=float) @@ -884,6 +894,40 @@ self.failUnless(output[0] is masked) + def test_eq_on_structured(self): + "Test the equality of structured arrays" + ndtype = [('A', int), ('B', int)] + a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) + test = (a == a) + assert_equal(test, [True, True]) + assert_equal(test.mask, [False, False]) + b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + test = (a == b) + assert_equal(test, [False, True]) + assert_equal(test.mask, [True, False]) + b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = (a == b) + assert_equal(test, [True, False]) + assert_equal(test.mask, [False, False]) + + + def test_ne_on_structured(self): + "Test the equality of structured arrays" + ndtype = [('A', int), ('B', int)] + a = array([(1, 1), (2, 2)], mask=[(0, 1), (0, 0)], dtype=ndtype) + test = (a != a) + assert_equal(test, [False, False]) + assert_equal(test.mask, [False, False]) + b = array([(1, 1), (2, 2)], mask=[(1, 0), (0, 0)], dtype=ndtype) + test = (a != b) + assert_equal(test, [True, False]) + assert_equal(test.mask, [True, False]) + b = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = (a != b) + assert_equal(test, [False, True]) + assert_equal(test.mask, [False, False]) + + #------------------------------------------------------------------------------ class TestMaskedArrayAttributes(TestCase): From numpy-svn at scipy.org Thu Jan 8 16:51:06 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 15:51:06 -0600 (CST) Subject: [Numpy-svn] r6306 - trunk/numpy/ma Message-ID: <20090108215106.81794C7C009@scipy.org> Author: pierregm Date: 2009-01-08 15:51:04 -0600 (Thu, 08 Jan 2009) New Revision: 6306 Modified: trunk/numpy/ma/core.py Log: * Remove a debugging print statement. Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-08 20:02:29 UTC (rev 6305) +++ trunk/numpy/ma/core.py 2009-01-08 21:51:04 UTC (rev 6306) @@ -1392,7 +1392,6 @@ names = a.dtype.names for name in names: current = a[name] - print "Name: %s : %s" % (name, current) if current.dtype.names: _recursive_filled(current, mask[name], fill_value[name]) else: @@ -2169,9 +2168,6 @@ if m.dtype.names: result = self._data.copy() _recursive_filled(result, self._mask, fill_value) -# for n in result.dtype.names: -# field = result[n] -# np.putmask(field, self._mask[n], fill_value[n]) elif not m.any(): return self._data else: From numpy-svn at scipy.org Thu Jan 8 21:14:36 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 20:14:36 -0600 (CST) Subject: [Numpy-svn] r6307 - trunk Message-ID: <20090109021436.2DC83C7C009@scipy.org> Author: jarrod.millman Date: 2009-01-08 20:14:35 -0600 (Thu, 08 Jan 2009) New Revision: 6307 Modified: trunk/LICENSE.txt Log: Updated license file Modified: trunk/LICENSE.txt =================================================================== --- trunk/LICENSE.txt 2009-01-08 21:51:04 UTC (rev 6306) +++ trunk/LICENSE.txt 2009-01-09 02:14:35 UTC (rev 6307) @@ -1,4 +1,4 @@ -Copyright (c) 2005, NumPy Developers +Copyright (c) 2005-2009, NumPy Developers. All rights reserved. Redistribution and use in source and binary forms, with or without From numpy-svn at scipy.org Fri Jan 9 00:27:10 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 8 Jan 2009 23:27:10 -0600 (CST) Subject: [Numpy-svn] r6308 - trunk/numpy/core/tests Message-ID: <20090109052710.EF28DC7C009@scipy.org> Author: cdavid Date: 2009-01-08 23:26:58 -0600 (Thu, 08 Jan 2009) New Revision: 6308 Modified: trunk/numpy/core/tests/test_print.py Log: Tag formatting unit tests as known failures. Modified: trunk/numpy/core/tests/test_print.py =================================================================== --- trunk/numpy/core/tests/test_print.py 2009-01-09 02:14:35 UTC (rev 6307) +++ trunk/numpy/core/tests/test_print.py 2009-01-09 05:26:58 UTC (rev 6308) @@ -25,6 +25,7 @@ assert_equal(str(tp(1e10)), ref, err_msg='Failed str formatting for type %s' % tp) + at dec.knownfailureif(True, "formatting tests are known to fail") def test_float_types(): """ Check formatting. @@ -41,6 +42,7 @@ assert_equal(str(tp(x)), _REF[x], err_msg='Failed str formatting for type %s' % tp) + at dec.knownfailureif(True, "formatting tests are known to fail") def test_nan_inf_float(): """ Check formatting. @@ -73,6 +75,7 @@ assert_equal(str(tp(1e10)), ref, err_msg='Failed str formatting for type %s' % tp) + at dec.knownfailureif(True, "formatting tests are known to fail") def test_complex_types(): """Check formatting. @@ -114,11 +117,13 @@ complex(np.nan, 1), complex(-np.inf, 1)] : _test_redirected_print(complex(x), tp) + at dec.knownfailureif(True, "formatting tests are known to fail") def test_float_type_print(): """Check formatting when using print """ for t in [np.float32, np.double, np.longdouble] : yield check_float_type_print, t + at dec.knownfailureif(True, "formatting tests are known to fail") def test_complex_type_print(): """Check formatting when using print """ for t in [np.complex64, np.cdouble, np.clongdouble] : @@ -159,16 +164,19 @@ finally: locale.setlocale(locale.LC_NUMERIC, locale=curloc) + at dec.knownfailureif(True, "formatting tests are known to fail") @np.testing.dec.skipif(not has_french_locale(), "Skipping locale test, French locale not found") def test_locale_single(): return _test_locale_independance(np.float32) + at dec.knownfailureif(True, "formatting tests are known to fail") @np.testing.dec.skipif(not has_french_locale(), "Skipping locale test, French locale not found") def test_locale_double(): return _test_locale_independance(np.double) + at dec.knownfailureif(True, "formatting tests are known to fail") @np.testing.dec.skipif(not has_french_locale(), "Skipping locale test, French locale not found") def test_locale_longdouble(): From numpy-svn at scipy.org Fri Jan 9 03:59:29 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 02:59:29 -0600 (CST) Subject: [Numpy-svn] r6309 - trunk/numpy/distutils Message-ID: <20090109085929.E8F52C7C009@scipy.org> Author: jarrod.millman Date: 2009-01-09 02:59:29 -0600 (Fri, 09 Jan 2009) New Revision: 6309 Modified: trunk/numpy/distutils/system_info.py Log: should be more reliable way to determine what bit platform Modified: trunk/numpy/distutils/system_info.py =================================================================== --- trunk/numpy/distutils/system_info.py 2009-01-09 05:26:58 UTC (rev 6308) +++ trunk/numpy/distutils/system_info.py 2009-01-09 08:59:29 UTC (rev 6309) @@ -128,6 +128,11 @@ from numpy.distutils.misc_util import is_sequence, is_string from numpy.distutils.command.config import config as cmd_config +# Determine number of bits +import platform +_bits = {'32bit':32,'64bit':64} +platform_bits = _bits[platform.architecture()[0]] + if sys.platform == 'win32': default_lib_dirs = ['C:\\', os.path.join(distutils.sysconfig.EXEC_PREFIX, @@ -145,13 +150,7 @@ '/usr/include/suitesparse'] default_src_dirs = ['.','/usr/local/src', '/opt/src','/sw/src'] - try: - platform = os.uname() - bit64 = platform[-1].endswith('64') - except: - bit64 = False - - if bit64: + if platform_bits == 64: default_x11_lib_dirs = ['/usr/lib64'] else: default_x11_lib_dirs = ['/usr/X11R6/lib','/usr/X11/lib','/usr/lib'] From numpy-svn at scipy.org Fri Jan 9 04:14:19 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 03:14:19 -0600 (CST) Subject: [Numpy-svn] r6310 - trunk/numpy/distutils Message-ID: <20090109091419.1D12FC7C009@scipy.org> Author: jarrod.millman Date: 2009-01-09 03:14:17 -0600 (Fri, 09 Jan 2009) New Revision: 6310 Modified: trunk/numpy/distutils/system_info.py Log: better default library paths for 64bit arch Modified: trunk/numpy/distutils/system_info.py =================================================================== --- trunk/numpy/distutils/system_info.py 2009-01-09 08:59:29 UTC (rev 6309) +++ trunk/numpy/distutils/system_info.py 2009-01-09 09:14:17 UTC (rev 6310) @@ -133,6 +133,46 @@ _bits = {'32bit':32,'64bit':64} platform_bits = _bits[platform.architecture()[0]] +from itertools import cycle + +def libpaths(paths,bits): + """Return a list of library paths valid on 32 or 64 bit systems. + + Inputs: + paths : sequence + A sequence of strings (typically paths) + bits : int + An integer, the only valid values are 32 or 64. A ValueError exception + is raised otherwise. + + Examples: + + Consider a list of directories + >>> paths = ['/usr/X11R6/lib','/usr/X11/lib','/usr/lib'] + + For a 32-bit platform, this is already valid: + >>> libpaths(paths,32) + ['/usr/X11R6/lib', '/usr/X11/lib', '/usr/lib'] + + On 64 bits, we append the '64' postfix + >>> libpaths(paths,64) + ['/usr/X11R6/lib', '/usr/X11R6/lib64', '/usr/X11/lib', '/usr/X11/lib64', + '/usr/lib', '/usr/lib64'] + """ + if bits not in (32, 64): + raise ValueError("Invalid bit size in libpaths: 32 or 64 only") + + # Handle 32bit case + if bits==32: + return paths + + # Handle 64bit case + out = [None]*(2*len(paths)) + out[::2] = paths + out[1::2] = (p+'64' for p in paths) + return out + + if sys.platform == 'win32': default_lib_dirs = ['C:\\', os.path.join(distutils.sysconfig.EXEC_PREFIX, @@ -142,19 +182,16 @@ default_x11_lib_dirs = [] default_x11_include_dirs = [] else: - default_lib_dirs = ['/usr/local/lib', '/opt/lib', '/usr/lib', - '/opt/local/lib', '/sw/lib'] + default_lib_dirs = libpaths(['/usr/local/lib','/opt/lib','/usr/lib', + '/opt/local/lib','/sw/lib'], platform_bits) default_include_dirs = ['/usr/local/include', '/opt/include', '/usr/include', '/opt/local/include', '/sw/include', '/usr/include/suitesparse'] default_src_dirs = ['.','/usr/local/src', '/opt/src','/sw/src'] - if platform_bits == 64: - default_x11_lib_dirs = ['/usr/lib64'] - else: - default_x11_lib_dirs = ['/usr/X11R6/lib','/usr/X11/lib','/usr/lib'] - + default_x11_lib_dirs = libpaths(['/usr/X11R6/lib','/usr/X11/lib', + '/usr/lib'], platform_bits) default_x11_include_dirs = ['/usr/X11R6/include','/usr/X11/include', '/usr/include'] From numpy-svn at scipy.org Fri Jan 9 04:57:16 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 03:57:16 -0600 (CST) Subject: [Numpy-svn] r6311 - trunk/numpy/distutils Message-ID: <20090109095716.338BDC7C009@scipy.org> Author: jarrod.millman Date: 2009-01-09 03:57:15 -0600 (Fri, 09 Jan 2009) New Revision: 6311 Modified: trunk/numpy/distutils/system_info.py Log: simplification suggested by stefan Modified: trunk/numpy/distutils/system_info.py =================================================================== --- trunk/numpy/distutils/system_info.py 2009-01-09 09:14:17 UTC (rev 6310) +++ trunk/numpy/distutils/system_info.py 2009-01-09 09:57:15 UTC (rev 6311) @@ -167,9 +167,10 @@ return paths # Handle 64bit case - out = [None]*(2*len(paths)) - out[::2] = paths - out[1::2] = (p+'64' for p in paths) + out = [] + for p in paths: + out.extend([p, p+bits]) + return out From numpy-svn at scipy.org Fri Jan 9 05:02:10 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 04:02:10 -0600 (CST) Subject: [Numpy-svn] r6312 - trunk/numpy/distutils Message-ID: <20090109100210.5019CC7C009@scipy.org> Author: jarrod.millman Date: 2009-01-09 04:02:09 -0600 (Fri, 09 Jan 2009) New Revision: 6312 Modified: trunk/numpy/distutils/system_info.py Log: switch the order [lib,lib64] --> [lib64,lib] Modified: trunk/numpy/distutils/system_info.py =================================================================== --- trunk/numpy/distutils/system_info.py 2009-01-09 09:57:15 UTC (rev 6311) +++ trunk/numpy/distutils/system_info.py 2009-01-09 10:02:09 UTC (rev 6312) @@ -154,10 +154,10 @@ >>> libpaths(paths,32) ['/usr/X11R6/lib', '/usr/X11/lib', '/usr/lib'] - On 64 bits, we append the '64' postfix + On 64 bits, we prepend the '64' postfix >>> libpaths(paths,64) - ['/usr/X11R6/lib', '/usr/X11R6/lib64', '/usr/X11/lib', '/usr/X11/lib64', - '/usr/lib', '/usr/lib64'] + ['/usr/X11R6/lib64', '/usr/X11R6/lib', '/usr/X11/lib64', '/usr/X11/lib', + '/usr/lib64', '/usr/lib'] """ if bits not in (32, 64): raise ValueError("Invalid bit size in libpaths: 32 or 64 only") @@ -169,7 +169,7 @@ # Handle 64bit case out = [] for p in paths: - out.extend([p, p+bits]) + out.extend([p+bits, p]) return out From numpy-svn at scipy.org Fri Jan 9 05:18:30 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 04:18:30 -0600 (CST) Subject: [Numpy-svn] r6313 - trunk/numpy/distutils Message-ID: <20090109101830.65DFDC7C009@scipy.org> Author: jarrod.millman Date: 2009-01-09 04:18:29 -0600 (Fri, 09 Jan 2009) New Revision: 6313 Modified: trunk/numpy/distutils/system_info.py Log: removed unneeded import Modified: trunk/numpy/distutils/system_info.py =================================================================== --- trunk/numpy/distutils/system_info.py 2009-01-09 10:02:09 UTC (rev 6312) +++ trunk/numpy/distutils/system_info.py 2009-01-09 10:18:29 UTC (rev 6313) @@ -133,8 +133,6 @@ _bits = {'32bit':32,'64bit':64} platform_bits = _bits[platform.architecture()[0]] -from itertools import cycle - def libpaths(paths,bits): """Return a list of library paths valid on 32 or 64 bit systems. From numpy-svn at scipy.org Fri Jan 9 14:37:18 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 13:37:18 -0600 (CST) Subject: [Numpy-svn] r6314 - trunk/numpy/distutils Message-ID: <20090109193718.9C99EC7C009@scipy.org> Author: jarrod.millman Date: 2009-01-09 13:37:16 -0600 (Fri, 09 Jan 2009) New Revision: 6314 Modified: trunk/numpy/distutils/system_info.py Log: can't use append an int to a string Modified: trunk/numpy/distutils/system_info.py =================================================================== --- trunk/numpy/distutils/system_info.py 2009-01-09 10:18:29 UTC (rev 6313) +++ trunk/numpy/distutils/system_info.py 2009-01-09 19:37:16 UTC (rev 6314) @@ -167,7 +167,7 @@ # Handle 64bit case out = [] for p in paths: - out.extend([p+bits, p]) + out.extend([p+'64', p]) return out From numpy-svn at scipy.org Fri Jan 9 15:18:17 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 14:18:17 -0600 (CST) Subject: [Numpy-svn] r6315 - in trunk/numpy/ma: . tests Message-ID: <20090109201817.C6B50C7C046@scipy.org> Author: pierregm Date: 2009-01-09 14:18:12 -0600 (Fri, 09 Jan 2009) New Revision: 6315 Modified: trunk/numpy/ma/core.py trunk/numpy/ma/tests/test_core.py Log: * Added flatten_structured_arrays * Fixed _get_recordarray for nested structures Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-09 19:37:16 UTC (rev 6314) +++ trunk/numpy/ma/core.py 2009-01-09 20:18:12 UTC (rev 6315) @@ -1483,6 +1483,56 @@ return d +def flatten_structured_array(a): + """ + Flatten a strutured array. + + The datatype of the output is the largest datatype of the (nested) fields. + + Returns + ------- + output : var + Flatten MaskedArray if the input is a MaskedArray, + standard ndarray otherwise. + + Examples + -------- + >>> ndtype = [('a', int), ('b', float)] + >>> a = np.array([(1, 1), (2, 2)], dtype=ndtype) + >>> flatten_structured_array(a) + array([[1., 1.], + [2., 2.]]) + + """ + # + def flatten_sequence(iterable): + """Flattens a compound of nested iterables.""" + for elm in iter(iterable): + if hasattr(elm,'__iter__'): + for f in flatten_sequence(elm): + yield f + else: + yield elm + # + a = np.asanyarray(a) + inishape = a.shape + a = a.ravel() + if isinstance(a, MaskedArray): + out = np.array([tuple(flatten_sequence(d.item())) for d in a._data]) + out = out.view(MaskedArray) + out._mask = np.array([tuple(flatten_sequence(d.item())) + for d in getmaskarray(a)]) + else: + out = np.array([tuple(flatten_sequence(d.item())) for d in a]) + if len(inishape) > 1: + newshape = list(out.shape) + newshape[0] = inishape + out.shape = tuple(flatten_sequence(newshape)) + return out + + + + class MaskedArray(ndarray): """ Arrays with possibly masked values. Masked values of True @@ -2021,34 +2071,28 @@ # return self._mask.reshape(self.shape) return self._mask mask = property(fget=_get_mask, fset=__setmask__, doc="Mask") - # - def _getrecordmask(self): - """Return the mask of the records. + + + def _get_recordmask(self): + """ + Return the mask of the records. A record is masked when all the fields are masked. """ _mask = ndarray.__getattribute__(self, '_mask').view(ndarray) if _mask.dtype.names is None: return _mask - if _mask.size > 1: - axis = 1 - else: - axis = None - # - try: - return _mask.view((bool_, len(self.dtype))).all(axis) - except ValueError: - # In case we have nested fields... - return np.all([[f[n].all() for n in _mask.dtype.names] - for f in _mask], axis=axis) + return np.all(flatten_structured_array(_mask), axis=-1) - def _setrecordmask(self): + + def _set_recordmask(self): """Return the mask of the records. A record is masked when all the fields are masked. """ raise NotImplementedError("Coming soon: setting the mask per records!") - recordmask = property(fget=_getrecordmask) + recordmask = property(fget=_get_recordmask) + #............................................ def harden_mask(self): """Force the mask to hard. Modified: trunk/numpy/ma/tests/test_core.py =================================================================== --- trunk/numpy/ma/tests/test_core.py 2009-01-09 19:37:16 UTC (rev 6314) +++ trunk/numpy/ma/tests/test_core.py 2009-01-09 20:18:12 UTC (rev 6315) @@ -482,8 +482,12 @@ test = a.filled(0) control = np.array([(1, (0, 1)), (2, (2, 0))], dtype=ndtype) assert_equal(test, control) - + # + test = a['B'].filled(0) + control = np.array([(0, 1), (2, 0)], dtype=a['B'].dtype) + assert_equal(test, control) + def test_optinfo_propagation(self): "Checks that _optinfo dictionary isn't back-propagated" x = array([1,2,3,], dtype=float) @@ -503,6 +507,45 @@ control = "[(--, (2, --)) (4, (--, 6.0))]" assert_equal(str(test), control) + + def test_flatten_structured_array(self): + "Test flatten_structured_array on arrays" + # On ndarray + ndtype = [('a', int), ('b', float)] + a = np.array([(1, 1), (2, 2)], dtype=ndtype) + test = flatten_structured_array(a) + control = np.array([[1., 1.], [2., 2.]], dtype=np.float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + # On masked_array + a = ma.array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + test = flatten_structured_array(a) + control = ma.array([[1., 1.], [2., 2.]], + mask=[[0, 1], [1, 0]], dtype=np.float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + assert_equal(test.mask, control.mask) + # On masked array with nested structure + ndtype = [('a', int), ('b', [('ba', int), ('bb', float)])] + a = ma.array([(1, (1, 1.1)), (2, (2, 2.2))], + mask=[(0, (1, 0)), (1, (0, 1))], dtype=ndtype) + test = flatten_structured_array(a) + control = ma.array([[1., 1., 1.1], [2., 2., 2.2]], + mask=[[0, 1, 0], [1, 0, 1]], dtype=np.float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + assert_equal(test.mask, control.mask) + # Keeping the initial shape + ndtype = [('a', int), ('b', float)] + a = np.array([[(1, 1),], [(2, 2),]], dtype=ndtype) + test = flatten_structured_array(a) + control = np.array([[[1., 1.],], [[2., 2.],]], dtype=np.float) + assert_equal(test, control) + assert_equal(test.dtype, control.dtype) + + + + #------------------------------------------------------------------------------ class TestMaskedArrayArithmetic(TestCase): From numpy-svn at scipy.org Fri Jan 9 20:53:10 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 9 Jan 2009 19:53:10 -0600 (CST) Subject: [Numpy-svn] r6316 - in trunk/numpy/ma: . tests Message-ID: <20090110015310.1C6B7C7C009@scipy.org> Author: pierregm Date: 2009-01-09 19:53:05 -0600 (Fri, 09 Jan 2009) New Revision: 6316 Modified: trunk/numpy/ma/core.py trunk/numpy/ma/tests/test_core.py Log: * Add flatten_structured_array to the namespace Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-09 20:18:12 UTC (rev 6315) +++ trunk/numpy/ma/core.py 2009-01-10 01:53:05 UTC (rev 6316) @@ -34,7 +34,8 @@ 'default_fill_value', 'diag', 'diagonal', 'divide', 'dump', 'dumps', 'empty', 'empty_like', 'equal', 'exp', 'expand_dims', 'fabs', 'flatten_mask', 'fmod', 'filled', 'floor', 'floor_divide', - 'fix_invalid', 'frombuffer', 'fromflex', 'fromfunction', + 'fix_invalid', 'flatten_structured_array', 'frombuffer', 'fromflex', + 'fromfunction', 'getdata','getmask', 'getmaskarray', 'greater', 'greater_equal', 'harden_mask', 'hypot', 'identity', 'ids', 'indices', 'inner', 'innerproduct', Modified: trunk/numpy/ma/tests/test_core.py =================================================================== --- trunk/numpy/ma/tests/test_core.py 2009-01-09 20:18:12 UTC (rev 6315) +++ trunk/numpy/ma/tests/test_core.py 2009-01-10 01:53:05 UTC (rev 6316) @@ -518,20 +518,20 @@ assert_equal(test, control) assert_equal(test.dtype, control.dtype) # On masked_array - a = ma.array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) + a = array([(1, 1), (2, 2)], mask=[(0, 1), (1, 0)], dtype=ndtype) test = flatten_structured_array(a) - control = ma.array([[1., 1.], [2., 2.]], - mask=[[0, 1], [1, 0]], dtype=np.float) + control = array([[1., 1.], [2., 2.]], + mask=[[0, 1], [1, 0]], dtype=np.float) assert_equal(test, control) assert_equal(test.dtype, control.dtype) assert_equal(test.mask, control.mask) # On masked array with nested structure ndtype = [('a', int), ('b', [('ba', int), ('bb', float)])] - a = ma.array([(1, (1, 1.1)), (2, (2, 2.2))], - mask=[(0, (1, 0)), (1, (0, 1))], dtype=ndtype) + a = array([(1, (1, 1.1)), (2, (2, 2.2))], + mask=[(0, (1, 0)), (1, (0, 1))], dtype=ndtype) test = flatten_structured_array(a) - control = ma.array([[1., 1., 1.1], [2., 2., 2.2]], - mask=[[0, 1, 0], [1, 0, 1]], dtype=np.float) + control = array([[1., 1., 1.1], [2., 2., 2.2]], + mask=[[0, 1, 0], [1, 0, 1]], dtype=np.float) assert_equal(test, control) assert_equal(test.dtype, control.dtype) assert_equal(test.mask, control.mask) From numpy-svn at scipy.org Mon Jan 12 16:22:37 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 12 Jan 2009 15:22:37 -0600 (CST) Subject: [Numpy-svn] r6317 - branches/fix_float_format/numpy/core/tests Message-ID: <20090112212237.B4322C7C028@scipy.org> Author: ptvirtan Date: 2009-01-12 15:22:24 -0600 (Mon, 12 Jan 2009) New Revision: 6317 Modified: branches/fix_float_format/numpy/core/tests/test_multiarray.py branches/fix_float_format/numpy/core/tests/test_print.py Log: More tests for fromstring, fromfile, tostring, tofile. Modified: branches/fix_float_format/numpy/core/tests/test_multiarray.py =================================================================== --- branches/fix_float_format/numpy/core/tests/test_multiarray.py 2009-01-10 01:53:05 UTC (rev 6316) +++ branches/fix_float_format/numpy/core/tests/test_multiarray.py 2009-01-12 21:22:24 UTC (rev 6317) @@ -781,11 +781,13 @@ class TestIO(object): + """Test tofile, fromfile, tostring, and fromstring""" + def setUp(self): - shape = (4,7) + shape = (2,4,3) rand = np.random.random self.x = rand(shape) + rand(shape).astype(np.complex)*1j - self.x[:,0] = [nan, inf, -inf, nan] + self.x[0,:,1] = [nan, inf, -inf, nan] self.dtype = self.x.dtype self.filename = tempfile.mktemp() @@ -808,6 +810,30 @@ y = np.fromfile(self.filename, dtype=self.dtype) assert_array_equal(y, self.x.flat) + def test_roundtrip_binary_str(self): + s = self.x.tostring() + y = np.fromstring(s, dtype=self.dtype) + assert_array_equal(y, self.x.flat) + + s = self.x.tostring('F') + y = np.fromstring(s, dtype=self.dtype) + assert_array_equal(y, self.x.flatten('F')) + + def test_roundtrip_str(self): + x = self.x.real.ravel() + s = "@".join(map(str, x)) + y = np.fromstring(s, sep="@") + # NB. str imbues less precision + nan_mask = ~np.isfinite(x) + assert_array_equal(x[nan_mask], y[nan_mask]) + assert_array_almost_equal(x[~nan_mask], y[~nan_mask], decimal=5) + + def test_roundtrip_repr(self): + x = self.x.real.ravel() + s = "@".join(map(repr, x)) + y = np.fromstring(s, sep="@") + assert_array_equal(x, y) + def _check_from(self, s, value, **kw): y = np.fromstring(s, **kw) assert_array_equal(y, value) @@ -842,6 +868,7 @@ def test_counted_string(self): self._check_from('1,2,3,4', [1., 2., 3., 4.], count=4, sep=',') self._check_from('1,2,3,4', [1., 2., 3.], count=3, sep=',') + self._check_from('1,2,3,4', [1., 2., 3., 4.], count=-1, sep=',') def test_string_with_ws(self): self._check_from('1 2 3 4 ', [1, 2, 3, 4], dtype=int, sep=' ') @@ -857,6 +884,33 @@ def test_malformed(self): self._check_from('1.234 1,234', [1.234, 1.], sep=' ') + def test_long_sep(self): + self._check_from('1_x_3_x_4_x_5', [1,3,4,5], sep='_x_') + + def test_dtype(self): + v = np.array([1,2,3,4], dtype=np.int_) + self._check_from('1,2,3,4', v, sep=',', dtype=np.int_) + + def test_tofile_sep(self): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + f = open(self.filename, 'w') + x.tofile(f, sep=',') + f.close() + f = open(self.filename, 'r') + s = f.read() + f.close() + assert_equal(s, '1.51,2.0,3.51,4.0') + + def test_tofile_format(self): + x = np.array([1.51, 2, 3.51, 4], dtype=float) + f = open(self.filename, 'w') + x.tofile(f, sep=',', format='%.2f') + f.close() + f = open(self.filename, 'r') + s = f.read() + f.close() + assert_equal(s, '1.51,2.00,3.51,4.00') + @in_foreign_locale def _run_in_foreign_locale(self, func, fail=False): np.testing.dec.knownfailureif(fail)(func)(self) @@ -868,6 +922,8 @@ yield self._run_in_foreign_locale, TestIO.test_counted_string yield self._run_in_foreign_locale, TestIO.test_ascii yield self._run_in_foreign_locale, TestIO.test_malformed + yield self._run_in_foreign_locale, TestIO.test_tofile_sep + yield self._run_in_foreign_locale, TestIO.test_tofile_format class TestFromBuffer(TestCase): Modified: branches/fix_float_format/numpy/core/tests/test_print.py =================================================================== --- branches/fix_float_format/numpy/core/tests/test_print.py 2009-01-10 01:53:05 UTC (rev 6316) +++ branches/fix_float_format/numpy/core/tests/test_print.py 2009-01-12 21:22:24 UTC (rev 6317) @@ -43,7 +43,7 @@ err_msg='Failed str formatting for type %s' % tp) def test_nan_inf_float(): - """ Check formatting. + """ Check formatting of nan & inf. This is only for the str function, and only for simple types. The precision of np.float and np.longdouble aren't the same as the @@ -75,7 +75,7 @@ err_msg='Failed str formatting for type %s' % tp) def test_complex_types(): - """Check formatting. + """Check formatting of complex types. This is only for the str function, and only for simple types. The precision of np.float and np.longdouble aren't the same as the From numpy-svn at scipy.org Mon Jan 12 16:23:18 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 12 Jan 2009 15:23:18 -0600 (CST) Subject: [Numpy-svn] r6318 - in branches/fix_float_format/numpy/core: . src Message-ID: <20090112212318.E1C3AC7C028@scipy.org> Author: ptvirtan Date: 2009-01-12 15:22:51 -0600 (Mon, 12 Jan 2009) New Revision: 6318 Added: branches/fix_float_format/numpy/core/src/numpyos.c Removed: branches/fix_float_format/numpy/core/src/npy_format.c Modified: branches/fix_float_format/numpy/core/setup.py branches/fix_float_format/numpy/core/src/arraytypes.inc.src branches/fix_float_format/numpy/core/src/multiarraymodule.c branches/fix_float_format/numpy/core/src/scalartypes.inc.src Log: Recognize POSIX nan/inf representations in NumPyOS_strtod. Collect some NaN and Inf constants to a single place. Modified: branches/fix_float_format/numpy/core/setup.py =================================================================== --- branches/fix_float_format/numpy/core/setup.py 2009-01-12 21:22:24 UTC (rev 6317) +++ branches/fix_float_format/numpy/core/setup.py 2009-01-12 21:22:51 UTC (rev 6318) @@ -338,7 +338,7 @@ deps = [join('src','arrayobject.c'), join('src','arraymethods.c'), join('src','scalartypes.inc.src'), - join('src','npy_format.c'), + join('src','numpyos.c'), join('src','arraytypes.inc.src'), join('src','_signbit.c'), join('src','ucsnarrow.c'), Modified: branches/fix_float_format/numpy/core/src/arraytypes.inc.src =================================================================== --- branches/fix_float_format/numpy/core/src/arraytypes.inc.src 2009-01-12 21:22:24 UTC (rev 6317) +++ branches/fix_float_format/numpy/core/src/arraytypes.inc.src 2009-01-12 21:22:51 UTC (rev 6318) @@ -2,41 +2,17 @@ #include "config.h" static double -_getNAN(void) { -#ifdef NAN - return NAN; -#else - static double nan=0; - - if (nan == 0) { - double mul = 1e100; - double tmp = 0.0; - double pinf=0; - pinf = mul; - for (;;) { - pinf *= mul; - if (pinf == tmp) break; - tmp = pinf; - } - nan = pinf / pinf; - } - return nan; -#endif -} - - -static double MyPyFloat_AsDouble(PyObject *obj) { double ret = 0; PyObject *num; if (obj == Py_None) { - return _getNAN(); + return NumPyOS_NAN; } num = PyNumber_Float(obj); if (num == NULL) { - return _getNAN(); + return NumPyOS_NAN; } ret = PyFloat_AsDouble(num); Py_DECREF(num); @@ -192,7 +168,7 @@ op2 = op; Py_INCREF(op); } if (op2 == Py_None) { - oop.real = oop.imag = _getNAN(); + oop.real = oop.imag = NumPyOS_NAN; } else { oop = PyComplex_AsCComplex (op2); Modified: branches/fix_float_format/numpy/core/src/multiarraymodule.c =================================================================== --- branches/fix_float_format/numpy/core/src/multiarraymodule.c 2009-01-12 21:22:24 UTC (rev 6317) +++ branches/fix_float_format/numpy/core/src/multiarraymodule.c 2009-01-12 21:22:51 UTC (rev 6318) @@ -7705,6 +7705,9 @@ PyObject *m, *d, *s; PyObject *c_api; + /* Initialize constants etc. */ + NumPyOS_init(); + /* Create the module and add the functions */ m = Py_InitModule("multiarray", array_module_methods); if (!m) goto err; Deleted: branches/fix_float_format/numpy/core/src/npy_format.c =================================================================== --- branches/fix_float_format/numpy/core/src/npy_format.c 2009-01-12 21:22:24 UTC (rev 6317) +++ branches/fix_float_format/numpy/core/src/npy_format.c 2009-01-12 21:22:51 UTC (rev 6318) @@ -1,505 +0,0 @@ -#include -#include - -/* From the C99 standard, section 7.19.6: The exponent always contains at least - two digits, and only as many more digits as necessary to represent the - exponent. -*/ -/* We force 3 digits on windows for python < 2.6 for compatibility reason */ -#if defined(MS_WIN32) && (PY_VERSION_HEX < 0x02060000) -#define MIN_EXPONENT_DIGITS 3 -#else -#define MIN_EXPONENT_DIGITS 2 -#endif - -/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS - in length. */ -static void -_ensure_minimum_exponent_length(char* buffer, size_t buf_size) -{ - char *p = strpbrk(buffer, "eE"); - if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { - char *start = p + 2; - int exponent_digit_cnt = 0; - int leading_zero_cnt = 0; - int in_leading_zeros = 1; - int significant_digit_cnt; - - /* Skip over the exponent and the sign. */ - p += 2; - - /* Find the end of the exponent, keeping track of leading - zeros. */ - while (*p && isdigit(Py_CHARMASK(*p))) { - if (in_leading_zeros && *p == '0') - ++leading_zero_cnt; - if (*p != '0') - in_leading_zeros = 0; - ++p; - ++exponent_digit_cnt; - } - - significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; - if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { - /* If there are 2 exactly digits, we're done, - regardless of what they contain */ - } - else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { - int extra_zeros_cnt; - - /* There are more than 2 digits in the exponent. See - if we can delete some of the leading zeros */ - if (significant_digit_cnt < MIN_EXPONENT_DIGITS) - significant_digit_cnt = MIN_EXPONENT_DIGITS; - - extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt; - - /* Delete extra_zeros_cnt worth of characters from the - front of the exponent */ - assert(extra_zeros_cnt >= 0); - - /* Add one to significant_digit_cnt to copy the - trailing 0 byte, thus setting the length */ - memmove(start, start + extra_zeros_cnt, significant_digit_cnt + 1); - } - else { - /* If there are fewer than 2 digits, add zeros - until there are 2, if there's enough room */ - int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; - if (start + zeros + exponent_digit_cnt + 1 < buffer + buf_size) { - memmove(start + zeros, start, exponent_digit_cnt + 1); - memset(start, '0', zeros); - } - } - } -} - -/* Ensure that buffer has a decimal point in it. The decimal point - will not be in the current locale, it will always be '.' */ -static void -_ensure_decimal_point(char* buffer, size_t buf_size) -{ - int insert_count = 0; - char* chars_to_insert; - - /* search for the first non-digit character */ - char *p = buffer; - if (*p == '-' || *p == '+') - /* Skip leading sign, if present. I think this could only - ever be '-', but it can't hurt to check for both. */ - ++p; - while (*p && isdigit(Py_CHARMASK(*p))) - ++p; - - if (*p == '.') { - if (isdigit(Py_CHARMASK(*(p+1)))) { - /* Nothing to do, we already have a decimal - point and a digit after it */ - } - else { - /* We have a decimal point, but no following - digit. Insert a zero after the decimal. */ - ++p; - chars_to_insert = "0"; - insert_count = 1; - } - } - else { - chars_to_insert = ".0"; - insert_count = 2; - } - if (insert_count) { - size_t buf_len = strlen(buffer); - if (buf_len + insert_count + 1 >= buf_size) { - /* If there is not enough room in the buffer - for the additional text, just skip it. It's - not worth generating an error over. */ - } - else { - memmove(p + insert_count, p, - buffer + strlen(buffer) - p + 1); - memcpy(p, chars_to_insert, insert_count); - } - } -} - -/* see FORMATBUFLEN in unicodeobject.c */ -#define FLOAT_FORMATBUFLEN 120 - -/* Given a string that may have a decimal point in the current - locale, change it back to a dot. Since the string cannot get - longer, no need for a maximum buffer size parameter. */ -static void -_change_decimal_from_locale_to_dot(char* buffer) -{ - struct lconv *locale_data = localeconv(); - const char *decimal_point = locale_data->decimal_point; - - if (decimal_point[0] != '.' || decimal_point[1] != 0) { - size_t decimal_point_len = strlen(decimal_point); - - if (*buffer == '+' || *buffer == '-') - buffer++; - while (isdigit(Py_CHARMASK(*buffer))) - buffer++; - if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { - *buffer = '.'; - buffer++; - if (decimal_point_len > 1) { - /* buffer needs to get smaller */ - size_t rest_len = strlen(buffer + - (decimal_point_len - 1)); - memmove(buffer, - buffer + (decimal_point_len - 1), - rest_len); - buffer[rest_len] = 0; - } - } - } -} - -/* - * Check that the format string is a valid one for NumPyOS_ascii_format* - */ -static int -_check_ascii_format(const char *format) -{ - char format_char; - size_t format_len = strlen(format); - - /* The last character in the format string must be the format char */ - format_char = format[format_len - 1]; - - if (format[0] != '%') { - return -1; - } - - /* I'm not sure why this test is here. It's ensuring that the format - string after the first character doesn't have a single quote, a - lowercase l, or a percent. This is the reverse of the commented-out - test about 10 lines ago. */ - if (strpbrk(format + 1, "'l%")) { - return -1; - } - - /* Also curious about this function is that it accepts format strings - like "%xg", which are invalid for floats. In general, the - interface to this function is not very good, but changing it is - difficult because it's a public API. */ - - if (!(format_char == 'e' || format_char == 'E' || - format_char == 'f' || format_char == 'F' || - format_char == 'g' || format_char == 'G')) { - return -1; - } - - return 0; -} - -/* - * Fix the generated string: make sure the decimal is ., that exponent has a - * minimal number of digits, and that it has a decimal + one digit after that - * decimal if decimal argument != 0 (Same effect that 'Z' format in - * PyOS_ascii_formatd - */ -static char* -_fix_ascii_format(char* buf, size_t buflen, int decimal) -{ - /* Get the current locale, and find the decimal point string. - Convert that string back to a dot. */ - _change_decimal_from_locale_to_dot(buf); - - /* If an exponent exists, ensure that the exponent is at least - MIN_EXPONENT_DIGITS digits, providing the buffer is large enough - for the extra zeros. Also, if there are more than - MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get - back to MIN_EXPONENT_DIGITS */ - _ensure_minimum_exponent_length(buf, buflen); - - if (decimal != 0) { - _ensure_decimal_point(buf, buflen); - } - - return buf; -} - -/* - * NumPyOS_ascii_format*: - * - buffer: A buffer to place the resulting string in - * - buf_size: The length of the buffer. - * - format: The printf()-style format to use for the code to use for - * converting. - * - value: The value to convert - * - decimal: if != 0, always has a decimal, and at leasat one digit after - * the decimal. This has the same effect as passing 'Z' in the origianl - * PyOS_ascii_formatd - * - * This is similar to PyOS_ascii_formatd in python > 2.6, except that it does - * not handle 'n', and handles nan / inf. - * - * Converts a #gdouble to a string, using the '.' as decimal point. To format - * the number you pass in a printf()-style format string. Allowed conversion - * specifiers are 'e', 'E', 'f', 'F', 'g', 'G'. - * - * Return value: The pointer to the buffer with the converted string. - */ -#define _ASCII_FORMAT(type, suffix, print_type) \ - static char* \ - NumPyOS_ascii_format ## suffix(char *buffer, size_t buf_size, \ - const char *format, \ - type val, int decimal) \ - { \ - if (isfinite(val)) { \ - if(_check_ascii_format(format)) { \ - return NULL; \ - } \ - PyOS_snprintf(buffer, buf_size, format, (print_type)val); \ - return _fix_ascii_format(buffer, buf_size, decimal); \ - } \ - else if (isnan(val)){ \ - if (buf_size < 4) { \ - return NULL; \ - } \ - strcpy(buffer, "nan"); \ - } \ - else { \ - if (signbit(val)) { \ - if (buf_size < 5) { \ - return NULL; \ - } \ - strcpy(buffer, "-inf"); \ - } \ - else { \ - if (buf_size < 4) { \ - return NULL; \ - } \ - strcpy(buffer, "inf"); \ - } \ - } \ - return buffer; \ - } - -_ASCII_FORMAT(float, f, float) -_ASCII_FORMAT(double, d, double) -#ifndef FORCE_NO_LONG_DOUBLE_FORMATTING -_ASCII_FORMAT(long double, l, long double) -#else -_ASCII_FORMAT(long double, l, double) -#endif - - -/* NumPyOS_ascii_isspace: - * - * Same as isspace under C locale - */ -static int -NumPyOS_ascii_isspace(char c) -{ - return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || - c == '\v'; -} - - -/* NumPyOS_ascii_strtod: - * - * Work around bugs in PyOS_ascii_strtod - */ -static double -NumPyOS_ascii_strtod(const char *s, char** endptr) -{ - struct lconv *locale_data = localeconv(); - const char *decimal_point = locale_data->decimal_point; - size_t decimal_point_len = strlen(decimal_point); - - char buffer[FLOAT_FORMATBUFLEN+1]; - char *p; - size_t n; - double result; - - while (NumPyOS_ascii_isspace(*s)) { - ++s; - } - - /* ## 1 - * - * At least Python versions <= 2.5.2 and <= 2.6.1 - * - * Fails to do best-efforts parsing of strings of the form "1234" - * where is the decimal point under the foreign locale. - */ - if (decimal_point[0] != '.' || decimal_point[1] != 0) { - p = (char *)s; - if (*p == '+' || *p == '-') - ++p; - while (*p >= '0' && *p <= '9') - ++p; - if (strncmp(p, decimal_point, decimal_point_len) == 0) { - n = (size_t)(p - s); - if (n > FLOAT_FORMATBUFLEN) - n = FLOAT_FORMATBUFLEN; - memcpy(buffer, s, n); - buffer[n] = '\0'; - result = PyOS_ascii_strtod(buffer, &p); - if (endptr != NULL) { - *endptr = s + (p - buffer); - } - return result; - } - } - /* End of ##1 */ - - return PyOS_ascii_strtod(s, endptr); -} - - -/* - * NumPyOS_ascii_ftolf: - * * fp: FILE pointer - * * value: Place to store the value read - * - * Similar to PyOS_ascii_strtod, except that it reads input from a file. - * - * Similarly to fscanf, this function always consumes leading whitespace, - * and any text that could be the leading part in valid input. - * - * Return value: similar to fscanf. - * * 0 if no number read, - * * 1 if a number read, - * * EOF if end-of-file met before reading anything. - */ -static int -NumPyOS_ascii_ftolf(FILE *fp, double *value) -{ - char buffer[FLOAT_FORMATBUFLEN+1]; - char *endp; - char *p; - int c; - int ok; - - /* - * Pass on to PyOS_ascii_strtod the leftmost matching part in regexp - * - * \s*[+-]? ( [0-9]*\.[0-9]+([eE][+-]?[0-9]+) - * | nan ( \([:alphanum:_]*\) )? - * | inf(inity)? - * ) - * - * case-insensitively. - * - * The "do { ... } while (0)" wrapping in macros ensures that they behave - * properly eg. in "if ... else" structures. - */ - -#define END_MATCH() \ - goto buffer_filled - -#define NEXT_CHAR() \ - do { \ - if (c == EOF || endp >= buffer + FLOAT_FORMATBUFLEN) \ - END_MATCH(); \ - *endp++ = (char)c; \ - c = getc(fp); \ - } while (0) - -#define MATCH_ALPHA_STRING_NOCASE(string) \ - do { \ - for (p=(string); *p!='\0' && (c==*p || c+('a'-'A')==*p); ++p) \ - NEXT_CHAR(); \ - if (*p != '\0') END_MATCH(); \ - } while (0) - -#define MATCH_ONE_OR_NONE(condition) \ - do { if (condition) NEXT_CHAR(); } while (0) - -#define MATCH_ONE_OR_MORE(condition) \ - do { \ - ok = 0; \ - while (condition) { NEXT_CHAR(); ok = 1; } \ - if (!ok) END_MATCH(); \ - } while (0) - -#define MATCH_ZERO_OR_MORE(condition) \ - while (condition) { NEXT_CHAR(); } - -#define IS_NUMBER (c >= '0' && c <= '9') - -#define IS_ALPHA ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) - -#define IS_ALPHANUM (IS_NUMBER || IS_ALPHA) - - /* 1. emulate fscanf EOF handling */ - c = getc(fp); - if (c == EOF) - return EOF; - - /* 2. consume leading whitespace unconditionally */ - while (NumPyOS_ascii_isspace(c)) { - c = getc(fp); - } - - /* 3. start reading matching input to buffer */ - endp = buffer; - - /* 4.1 sign (optional) */ - MATCH_ONE_OR_NONE(c == '+' || c == '-'); - - /* 4.2 nan, inf, infinity; [case-insensitive] */ - if (c == 'n' || c == 'N') { - NEXT_CHAR(); - MATCH_ALPHA_STRING_NOCASE("an"); - - /* accept nan([:alphanum:_]*), similarly to strtod */ - if (c == '(') { - NEXT_CHAR(); - MATCH_ZERO_OR_MORE(IS_ALPHANUM || c == '_'); - if (c == ')') NEXT_CHAR(); - } - END_MATCH(); - } - else if (c == 'i' || c == 'I') { - NEXT_CHAR(); - MATCH_ALPHA_STRING_NOCASE("nfinity"); - END_MATCH(); - } - - /* 4.3 mantissa */ - MATCH_ZERO_OR_MORE(IS_NUMBER); - - if (c == '.') { - NEXT_CHAR(); - MATCH_ONE_OR_MORE(IS_NUMBER); - } - - /* 4.4 exponent */ - if (c == 'e' || c == 'E') { - NEXT_CHAR(); - MATCH_ONE_OR_NONE(c == '+' || c == '-'); - MATCH_ONE_OR_MORE(IS_NUMBER); - } - - END_MATCH(); - -buffer_filled: - - ungetc(c, fp); - *endp = '\0'; - - /* 5. try to convert buffer. */ - - /* No need for NumPyOS here, the bugs in PyOS_ascii_strtod discussed - above can't manifest here, since the above parsing only copies - "good" strings. */ - *value = PyOS_ascii_strtod(buffer, &p); - - return (buffer == p) ? 0 : 1; /* if something was read */ -} - -#undef END_MATCH -#undef NEXT_CHAR -#undef MATCH_ALPHA_STRING_NOCASE -#undef MATCH_ONE_OR_NONE -#undef MATCH_ONE_OR_MORE -#undef MATCH_ZERO_OR_MORE -#undef IS_NUMBER -#undef IS_ALPHA -#undef IS_ALPHANUM Copied: branches/fix_float_format/numpy/core/src/numpyos.c (from rev 6317, branches/fix_float_format/numpy/core/src/npy_format.c) =================================================================== --- branches/fix_float_format/numpy/core/src/npy_format.c 2009-01-12 21:22:24 UTC (rev 6317) +++ branches/fix_float_format/numpy/core/src/numpyos.c 2009-01-12 21:22:51 UTC (rev 6318) @@ -0,0 +1,627 @@ +#include +#include + +/* From the C99 standard, section 7.19.6: The exponent always contains at least + two digits, and only as many more digits as necessary to represent the + exponent. +*/ +/* We force 3 digits on windows for python < 2.6 for compatibility reason */ +#if defined(MS_WIN32) && (PY_VERSION_HEX < 0x02060000) +#define MIN_EXPONENT_DIGITS 3 +#else +#define MIN_EXPONENT_DIGITS 2 +#endif + +/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS + in length. */ +static void +_ensure_minimum_exponent_length(char* buffer, size_t buf_size) +{ + char *p = strpbrk(buffer, "eE"); + if (p && (*(p + 1) == '-' || *(p + 1) == '+')) { + char *start = p + 2; + int exponent_digit_cnt = 0; + int leading_zero_cnt = 0; + int in_leading_zeros = 1; + int significant_digit_cnt; + + /* Skip over the exponent and the sign. */ + p += 2; + + /* Find the end of the exponent, keeping track of leading + zeros. */ + while (*p && isdigit(Py_CHARMASK(*p))) { + if (in_leading_zeros && *p == '0') + ++leading_zero_cnt; + if (*p != '0') + in_leading_zeros = 0; + ++p; + ++exponent_digit_cnt; + } + + significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt; + if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) { + /* If there are 2 exactly digits, we're done, + regardless of what they contain */ + } + else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) { + int extra_zeros_cnt; + + /* There are more than 2 digits in the exponent. See + if we can delete some of the leading zeros */ + if (significant_digit_cnt < MIN_EXPONENT_DIGITS) + significant_digit_cnt = MIN_EXPONENT_DIGITS; + + extra_zeros_cnt = exponent_digit_cnt - significant_digit_cnt; + + /* Delete extra_zeros_cnt worth of characters from the + front of the exponent */ + assert(extra_zeros_cnt >= 0); + + /* Add one to significant_digit_cnt to copy the + trailing 0 byte, thus setting the length */ + memmove(start, start + extra_zeros_cnt, significant_digit_cnt + 1); + } + else { + /* If there are fewer than 2 digits, add zeros + until there are 2, if there's enough room */ + int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt; + if (start + zeros + exponent_digit_cnt + 1 < buffer + buf_size) { + memmove(start + zeros, start, exponent_digit_cnt + 1); + memset(start, '0', zeros); + } + } + } +} + +/* Ensure that buffer has a decimal point in it. The decimal point + will not be in the current locale, it will always be '.' */ +static void +_ensure_decimal_point(char* buffer, size_t buf_size) +{ + int insert_count = 0; + char* chars_to_insert; + + /* search for the first non-digit character */ + char *p = buffer; + if (*p == '-' || *p == '+') + /* Skip leading sign, if present. I think this could only + ever be '-', but it can't hurt to check for both. */ + ++p; + while (*p && isdigit(Py_CHARMASK(*p))) + ++p; + + if (*p == '.') { + if (isdigit(Py_CHARMASK(*(p+1)))) { + /* Nothing to do, we already have a decimal + point and a digit after it */ + } + else { + /* We have a decimal point, but no following + digit. Insert a zero after the decimal. */ + ++p; + chars_to_insert = "0"; + insert_count = 1; + } + } + else { + chars_to_insert = ".0"; + insert_count = 2; + } + if (insert_count) { + size_t buf_len = strlen(buffer); + if (buf_len + insert_count + 1 >= buf_size) { + /* If there is not enough room in the buffer + for the additional text, just skip it. It's + not worth generating an error over. */ + } + else { + memmove(p + insert_count, p, + buffer + strlen(buffer) - p + 1); + memcpy(p, chars_to_insert, insert_count); + } + } +} + +/* see FORMATBUFLEN in unicodeobject.c */ +#define FLOAT_FORMATBUFLEN 120 + +/* Given a string that may have a decimal point in the current + locale, change it back to a dot. Since the string cannot get + longer, no need for a maximum buffer size parameter. */ +static void +_change_decimal_from_locale_to_dot(char* buffer) +{ + struct lconv *locale_data = localeconv(); + const char *decimal_point = locale_data->decimal_point; + + if (decimal_point[0] != '.' || decimal_point[1] != 0) { + size_t decimal_point_len = strlen(decimal_point); + + if (*buffer == '+' || *buffer == '-') + buffer++; + while (isdigit(Py_CHARMASK(*buffer))) + buffer++; + if (strncmp(buffer, decimal_point, decimal_point_len) == 0) { + *buffer = '.'; + buffer++; + if (decimal_point_len > 1) { + /* buffer needs to get smaller */ + size_t rest_len = strlen(buffer + + (decimal_point_len - 1)); + memmove(buffer, + buffer + (decimal_point_len - 1), + rest_len); + buffer[rest_len] = 0; + } + } + } +} + +/* + * Check that the format string is a valid one for NumPyOS_ascii_format* + */ +static int +_check_ascii_format(const char *format) +{ + char format_char; + size_t format_len = strlen(format); + + /* The last character in the format string must be the format char */ + format_char = format[format_len - 1]; + + if (format[0] != '%') { + return -1; + } + + /* I'm not sure why this test is here. It's ensuring that the format + string after the first character doesn't have a single quote, a + lowercase l, or a percent. This is the reverse of the commented-out + test about 10 lines ago. */ + if (strpbrk(format + 1, "'l%")) { + return -1; + } + + /* Also curious about this function is that it accepts format strings + like "%xg", which are invalid for floats. In general, the + interface to this function is not very good, but changing it is + difficult because it's a public API. */ + + if (!(format_char == 'e' || format_char == 'E' || + format_char == 'f' || format_char == 'F' || + format_char == 'g' || format_char == 'G')) { + return -1; + } + + return 0; +} + +/* + * Fix the generated string: make sure the decimal is ., that exponent has a + * minimal number of digits, and that it has a decimal + one digit after that + * decimal if decimal argument != 0 (Same effect that 'Z' format in + * PyOS_ascii_formatd + */ +static char* +_fix_ascii_format(char* buf, size_t buflen, int decimal) +{ + /* Get the current locale, and find the decimal point string. + Convert that string back to a dot. */ + _change_decimal_from_locale_to_dot(buf); + + /* If an exponent exists, ensure that the exponent is at least + MIN_EXPONENT_DIGITS digits, providing the buffer is large enough + for the extra zeros. Also, if there are more than + MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get + back to MIN_EXPONENT_DIGITS */ + _ensure_minimum_exponent_length(buf, buflen); + + if (decimal != 0) { + _ensure_decimal_point(buf, buflen); + } + + return buf; +} + +/* + * NumPyOS_ascii_format*: + * - buffer: A buffer to place the resulting string in + * - buf_size: The length of the buffer. + * - format: The printf()-style format to use for the code to use for + * converting. + * - value: The value to convert + * - decimal: if != 0, always has a decimal, and at leasat one digit after + * the decimal. This has the same effect as passing 'Z' in the origianl + * PyOS_ascii_formatd + * + * This is similar to PyOS_ascii_formatd in python > 2.6, except that it does + * not handle 'n', and handles nan / inf. + * + * Converts a #gdouble to a string, using the '.' as decimal point. To format + * the number you pass in a printf()-style format string. Allowed conversion + * specifiers are 'e', 'E', 'f', 'F', 'g', 'G'. + * + * Return value: The pointer to the buffer with the converted string. + */ +#define _ASCII_FORMAT(type, suffix, print_type) \ + static char* \ + NumPyOS_ascii_format ## suffix(char *buffer, size_t buf_size, \ + const char *format, \ + type val, int decimal) \ + { \ + if (isfinite(val)) { \ + if(_check_ascii_format(format)) { \ + return NULL; \ + } \ + PyOS_snprintf(buffer, buf_size, format, (print_type)val); \ + return _fix_ascii_format(buffer, buf_size, decimal); \ + } \ + else if (isnan(val)){ \ + if (buf_size < 4) { \ + return NULL; \ + } \ + strcpy(buffer, "nan"); \ + } \ + else { \ + if (signbit(val)) { \ + if (buf_size < 5) { \ + return NULL; \ + } \ + strcpy(buffer, "-inf"); \ + } \ + else { \ + if (buf_size < 4) { \ + return NULL; \ + } \ + strcpy(buffer, "inf"); \ + } \ + } \ + return buffer; \ + } + +_ASCII_FORMAT(float, f, float) +_ASCII_FORMAT(double, d, double) +#ifndef FORCE_NO_LONG_DOUBLE_FORMATTING +_ASCII_FORMAT(long double, l, long double) +#else +_ASCII_FORMAT(long double, l, double) +#endif + + +static double NumPyOS_PINF; /* Positive infinity */ +static double NumPyOS_PZERO; /* +0 */ +static double NumPyOS_NAN; /* NaN */ + +/* NumPyOS_init: + * + * initialize floating-point constants + */ +static void +NumPyOS_init(void) { + double mul = 1e100; + double div = 1e10; + double tmp, c; + + c = mul; + for (;;) { + c *= mul; + if (c == tmp) break; + tmp = c; + } + NumPyOS_PINF = c; + + c = div; + for (;;) { + c /= div; + if (c == tmp) break; + tmp = c; + } + NumPyOS_PZERO = c; + + NumPyOS_NAN = NumPyOS_PINF / NumPyOS_PINF; +} + + +/* NumPyOS_ascii_isspace: + * + * Same as isspace under C locale + */ +static int +NumPyOS_ascii_isspace(char c) +{ + return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || + c == '\v'; +} + + +/* NumPyOS_ascii_isalpha: + * + * Same as isalpha under C locale + */ +static int +NumPyOS_ascii_isalpha(char c) +{ + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + + +/* NumPyOS_ascii_isdigit: + * + * Same as isdigit under C locale + */ +static int +NumPyOS_ascii_isdigit(char c) +{ + return (c >= '0' && c <= '9'); +} + + +/* NumPyOS_ascii_isalnum: + * + * Same as isalnum under C locale + */ +static int +NumPyOS_ascii_isalnum(char c) +{ + return NumPyOS_ascii_isdigit(c) || NumPyOS_ascii_isalpha(c); +} + + +/* NumPyOS_ascii_tolower: + * + * Same as tolower under C locale + */ +static char +NumPyOS_ascii_tolower(char c) +{ + if (c >= 'A' && c <= 'Z') + return c + ('a'-'A'); + return c; +} + + +/* NumPyOS_ascii_strncasecmp: + * + * Same as strncasecmp under C locale + */ +static int +NumPyOS_ascii_strncasecmp(const char* s1, const char* s2, size_t len) +{ + int diff; + while (len > 0 && *s1 != '\0' && *s2 != '\0') { + diff = ((int)NumPyOS_ascii_tolower(*s1)) - + ((int)NumPyOS_ascii_tolower(*s2)); + if (diff != 0) return diff; + ++s1; + ++s2; + --len; + } + if (len > 0) + return ((int)*s1) - ((int)*s2); + return 0; +} + + +/* NumPyOS_ascii_strtod: + * + * Work around bugs in PyOS_ascii_strtod + */ +static double +NumPyOS_ascii_strtod(const char *s, char** endptr) +{ + struct lconv *locale_data = localeconv(); + const char *decimal_point = locale_data->decimal_point; + size_t decimal_point_len = strlen(decimal_point); + + char buffer[FLOAT_FORMATBUFLEN+1]; + char *p; + size_t n; + double result; + + while (NumPyOS_ascii_isspace(*s)) { + ++s; + } + + /* ##1 + * + * Recognize POSIX inf/nan representations on all platforms. + */ + p = s; + result = 1.0; + if (*p == '-') { + result = -1.0; + ++p; + } + else if (*p == '+') { + ++p; + } + if (NumPyOS_ascii_strncasecmp(p, "nan", 3) == 0) { + p += 3; + if (*p == '(') { + ++p; + while (NumPyOS_ascii_isalnum(*p) || *p == '_') ++p; + if (*p == ')') ++p; + } + if (endptr != NULL) *endptr = p; + return NumPyOS_NAN; + } + else if (NumPyOS_ascii_strncasecmp(p, "inf", 3) == 0) { + p += 3; + if (NumPyOS_ascii_strncasecmp(p, "inity", 5) == 0) + p += 5; + if (endptr != NULL) *endptr = p; + return result*NumPyOS_PINF; + } + /* End of ##1 */ + + /* ## 2 + * + * At least Python versions <= 2.5.2 and <= 2.6.1 + * + * Fails to do best-efforts parsing of strings of the form "1234" + * where is the decimal point under the foreign locale. + */ + if (decimal_point[0] != '.' || decimal_point[1] != 0) { + p = (char *)s; + if (*p == '+' || *p == '-') + ++p; + while (*p >= '0' && *p <= '9') + ++p; + if (strncmp(p, decimal_point, decimal_point_len) == 0) { + n = (size_t)(p - s); + if (n > FLOAT_FORMATBUFLEN) + n = FLOAT_FORMATBUFLEN; + memcpy(buffer, s, n); + buffer[n] = '\0'; + result = PyOS_ascii_strtod(buffer, &p); + if (endptr != NULL) { + *endptr = s + (p - buffer); + } + return result; + } + } + /* End of ##2 */ + + return PyOS_ascii_strtod(s, endptr); +} + + +/* + * NumPyOS_ascii_ftolf: + * * fp: FILE pointer + * * value: Place to store the value read + * + * Similar to PyOS_ascii_strtod, except that it reads input from a file. + * + * Similarly to fscanf, this function always consumes leading whitespace, + * and any text that could be the leading part in valid input. + * + * Return value: similar to fscanf. + * * 0 if no number read, + * * 1 if a number read, + * * EOF if end-of-file met before reading anything. + */ +static int +NumPyOS_ascii_ftolf(FILE *fp, double *value) +{ + char buffer[FLOAT_FORMATBUFLEN+1]; + char *endp; + char *p; + int c; + int ok; + + /* + * Pass on to PyOS_ascii_strtod the leftmost matching part in regexp + * + * \s*[+-]? ( [0-9]*\.[0-9]+([eE][+-]?[0-9]+) + * | nan ( \([:alphanum:_]*\) )? + * | inf(inity)? + * ) + * + * case-insensitively. + * + * The "do { ... } while (0)" wrapping in macros ensures that they behave + * properly eg. in "if ... else" structures. + */ + +#define END_MATCH() \ + goto buffer_filled + +#define NEXT_CHAR() \ + do { \ + if (c == EOF || endp >= buffer + FLOAT_FORMATBUFLEN) \ + END_MATCH(); \ + *endp++ = (char)c; \ + c = getc(fp); \ + } while (0) + +#define MATCH_ALPHA_STRING_NOCASE(string) \ + do { \ + for (p=(string); *p!='\0' && (c==*p || c+('a'-'A')==*p); ++p) \ + NEXT_CHAR(); \ + if (*p != '\0') END_MATCH(); \ + } while (0) + +#define MATCH_ONE_OR_NONE(condition) \ + do { if (condition) NEXT_CHAR(); } while (0) + +#define MATCH_ONE_OR_MORE(condition) \ + do { \ + ok = 0; \ + while (condition) { NEXT_CHAR(); ok = 1; } \ + if (!ok) END_MATCH(); \ + } while (0) + +#define MATCH_ZERO_OR_MORE(condition) \ + while (condition) { NEXT_CHAR(); } + + /* 1. emulate fscanf EOF handling */ + c = getc(fp); + if (c == EOF) + return EOF; + + /* 2. consume leading whitespace unconditionally */ + while (NumPyOS_ascii_isspace(c)) { + c = getc(fp); + } + + /* 3. start reading matching input to buffer */ + endp = buffer; + + /* 4.1 sign (optional) */ + MATCH_ONE_OR_NONE(c == '+' || c == '-'); + + /* 4.2 nan, inf, infinity; [case-insensitive] */ + if (c == 'n' || c == 'N') { + NEXT_CHAR(); + MATCH_ALPHA_STRING_NOCASE("an"); + + /* accept nan([:alphanum:_]*), similarly to strtod */ + if (c == '(') { + NEXT_CHAR(); + MATCH_ZERO_OR_MORE(NumPyOS_ascii_isalnum(c) || c == '_'); + if (c == ')') NEXT_CHAR(); + } + END_MATCH(); + } + else if (c == 'i' || c == 'I') { + NEXT_CHAR(); + MATCH_ALPHA_STRING_NOCASE("nfinity"); + END_MATCH(); + } + + /* 4.3 mantissa */ + MATCH_ZERO_OR_MORE(NumPyOS_ascii_isdigit(c)); + + if (c == '.') { + NEXT_CHAR(); + MATCH_ONE_OR_MORE(NumPyOS_ascii_isdigit(c)); + } + + /* 4.4 exponent */ + if (c == 'e' || c == 'E') { + NEXT_CHAR(); + MATCH_ONE_OR_NONE(c == '+' || c == '-'); + MATCH_ONE_OR_MORE(NumPyOS_ascii_isdigit(c)); + } + + END_MATCH(); + +buffer_filled: + + ungetc(c, fp); + *endp = '\0'; + + /* 5. try to convert buffer. */ + + *value = NumPyOS_ascii_strtod(buffer, &p); + + return (buffer == p) ? 0 : 1; /* if something was read */ +} + +#undef END_MATCH +#undef NEXT_CHAR +#undef MATCH_ALPHA_STRING_NOCASE +#undef MATCH_ONE_OR_NONE +#undef MATCH_ONE_OR_MORE +#undef MATCH_ZERO_OR_MORE Modified: branches/fix_float_format/numpy/core/src/scalartypes.inc.src =================================================================== --- branches/fix_float_format/numpy/core/src/scalartypes.inc.src 2009-01-12 21:22:24 UTC (rev 6317) +++ branches/fix_float_format/numpy/core/src/scalartypes.inc.src 2009-01-12 21:22:51 UTC (rev 6318) @@ -6,7 +6,7 @@ #include "numpy/arrayscalars.h" #include "config.h" -#include "npy_format.c" +#include "numpyos.c" static PyBoolScalarObject _PyArrayScalar_BoolValues[2] = { {PyObject_HEAD_INIT(&PyBoolArrType_Type) 0}, From numpy-svn at scipy.org Mon Jan 12 16:26:24 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 12 Jan 2009 15:26:24 -0600 (CST) Subject: [Numpy-svn] r6319 - branches/fix_float_format/numpy/core/src Message-ID: <20090112212624.DB806C7C028@scipy.org> Author: ptvirtan Date: 2009-01-12 15:25:53 -0600 (Mon, 12 Jan 2009) New Revision: 6319 Modified: branches/fix_float_format/numpy/core/src/numpyos.c Log: Initialize a variable properly Modified: branches/fix_float_format/numpy/core/src/numpyos.c =================================================================== --- branches/fix_float_format/numpy/core/src/numpyos.c 2009-01-12 21:22:51 UTC (rev 6318) +++ branches/fix_float_format/numpy/core/src/numpyos.c 2009-01-12 21:25:53 UTC (rev 6319) @@ -302,6 +302,7 @@ double div = 1e10; double tmp, c; + tmp = 0; c = mul; for (;;) { c *= mul; @@ -310,6 +311,7 @@ } NumPyOS_PINF = c; + tmp = 0; c = div; for (;;) { c /= div; From numpy-svn at scipy.org Tue Jan 13 16:02:03 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Tue, 13 Jan 2009 15:02:03 -0600 (CST) Subject: [Numpy-svn] r6320 - in trunk: doc/source/reference numpy/ma Message-ID: <20090113210203.D82B8C7C078@scipy.org> Author: pierregm Date: 2009-01-13 15:01:58 -0600 (Tue, 13 Jan 2009) New Revision: 6320 Added: trunk/doc/source/reference/maskedarray.baseclass.rst trunk/doc/source/reference/maskedarray.generic.rst trunk/doc/source/reference/maskedarray.rst Modified: trunk/doc/source/reference/arrays.classes.rst trunk/doc/source/reference/arrays.ndarray.rst trunk/numpy/ma/core.py trunk/numpy/ma/extras.py Log: numpy.ma.core: * introduced baseclass, sharedmask and hardmask as readonly properties of MaskedArray * docstrings update numpy.ma.extras: * docstring updates docs/reference * introduced maskedarray, maskedarray.baseclass, maskedarray.generic Modified: trunk/doc/source/reference/arrays.classes.rst =================================================================== --- trunk/doc/source/reference/arrays.classes.rst 2009-01-12 21:25:53 UTC (rev 6319) +++ trunk/doc/source/reference/arrays.classes.rst 2009-01-13 21:01:58 UTC (rev 6320) @@ -261,8 +261,13 @@ Masked arrays (:mod:`numpy.ma`) =============================== -.. seealso:: :ref:`routines.ma` +.. seealso:: :ref:`maskedarray` +Masked arrays are arrays that may have missing or invalid entries. +The :mod:`numpy.ma` module provides a nearly work-alike replacement for numpy +that supports data arrays with masks. + + .. XXX: masked array documentation should be improved .. currentmodule:: numpy @@ -270,14 +275,12 @@ .. index:: single: masked arrays -.. autosummary:: - :toctree: generated/ +.. toctree:: + :maxdepth: 2 - ma.masked_array + maskedarray -.. automodule:: numpy.ma - Standard container class ======================== Modified: trunk/doc/source/reference/arrays.ndarray.rst =================================================================== --- trunk/doc/source/reference/arrays.ndarray.rst 2009-01-12 21:25:53 UTC (rev 6319) +++ trunk/doc/source/reference/arrays.ndarray.rst 2009-01-13 21:01:58 UTC (rev 6320) @@ -135,6 +135,8 @@ is automatically made. +.. _arrays.ndarray.attributes: + Array attributes ================ @@ -217,6 +219,9 @@ .. note:: XXX: update and check these docstrings. + +.. _array.ndarray.methods: + Array methods ============= Added: trunk/doc/source/reference/maskedarray.baseclass.rst =================================================================== --- trunk/doc/source/reference/maskedarray.baseclass.rst 2009-01-12 21:25:53 UTC (rev 6319) +++ trunk/doc/source/reference/maskedarray.baseclass.rst 2009-01-13 21:01:58 UTC (rev 6320) @@ -0,0 +1,395 @@ + +.. currentmodule:: numpy.ma + + +.. _numpy.ma.constants: + +Constants of the :mod:`numpy.ma` module +======================================= + +In addition to the :class:`MaskedArray` class, the :mod:`numpy.ma` module +defines several constants. + +.. data:: masked + + The :attr:`masked` constant is a special case of :class:`MaskedArray`, + with a float datatype and a null shape. It is used to test whether a + specific entry of a masked array is masked, or to mask one or several + entries of a masked array:: + + >>> x = ma.array([1, 2, 3], mask=[0, 1, 0]) + >>> x[1] is ma.masked + True + >>> x[-1] = ma.masked + >>> x + masked_array(data = [1 -- --], + mask = [False True True], + fill_value = 999999) + + +.. data:: nomask + + Value indicating that a masked array has no invalid entry. + :attr:`nomask` is used internally to speed up computations when the mask + is not needed. + + +.. data:: masked_print_options + + String used in lieu of missing data when a masked array is printed. + By default, this string is ``'--'``. + + + + +.. _maskedarray.baseclass: + +The :class:`MaskedArray` class +============================== + + An instance of :class:`MaskedArray` can be thought as the combination of several elements: + +* The :attr:`data`, as a regular :class:`numpy.ndarray` of any shape or datatype (the data). +* A boolean :attr:`mask` with the same shape as the data, where a ``True`` value indicates that the corresponding element of the data is invalid. + The special value :attr:`nomask` is also acceptable for arrays without named fields, and indicates that no data is invalid. +* A :attr:`fill_value`, a value that may be used to replace the invalid entries in order to return a standard :class:`numpy.ndarray`. + + + +Attributes and properties of masked arrays +------------------------------------------ + +.. seealso:: :ref:`Array Attributes ` + + +.. attribute:: MaskedArray.data + + Returns the underlying data, as a view of the masked array. + If the underlying data is a subclass of :class:`numpy.ndarray`, it is + returned as such. + + >>> x = ma.array(np.matrix([[1, 2], [3, 4]]), mask=[[0, 1], [1, 0]]) + >>> x.data + matrix([[1, 2], + [3, 4]]) + + The type of the data can be accessed through the :attr:`baseclass` + attribute. + +.. attribute:: MaskedArray.mask + + Returns the underlying mask, as an array with the same shape and structure + as the data, but where all fields are booleans. + A value of ``True`` indicates an invalid entry. + + +.. attribute:: MaskedArray.recordmask + + Returns the mask of the array if it has no named fields. For structured + arrays, returns a ndarray of booleans where entries are ``True`` if **all** + the fields are masked, ``False`` otherwise:: + + >>> x = ma.array([(1, 1), (2, 2), (3, 3), (4, 4), (5, 5)], + ... mask=[(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)], + ... dtype=[('a', int), ('b', int)]) + >>> x.recordmask + array([False, False, True, False, False], dtype=bool) + + +.. attribute:: MaskedArray.fill_value + + Returns the value used to fill the invalid entries of a masked array. + The value is either a scalar (if the masked array has no named fields), + or a 0d-ndarray with the same datatype as the masked array if it has + named fields. + + The default filling value depends on the datatype of the array: + + ======== ======== + datatype default + ======== ======== + bool True + int 999999 + float 1.e20 + complex 1.e20+0j + object '?' + string 'N/A' + ======== ======== + + + +.. attribute:: MaskedArray.baseclass + + Returns the class of the underlying data.:: + + >>> x = ma.array(np.matrix([[1, 2], [3, 4]]), mask=[[0, 0], [1, 0]]) + >>> x.baseclass + + + +.. attribute:: MaskedArray.sharedmask + + Returns whether the mask of the array is shared between several arrays. + If this is the case, any modification to the mask of one array will be + propagated to the other masked arrays. + + +.. attribute:: MaskedArray.hardmask + + Returns whether the mask is hard (``True``) or soft (``False``). + When the mask is hard, masked entries cannot be unmasked. + + +As :class:`MaskedArray` is a subclass of :class:`~numpy.ndarray`, a masked array also inherits all the attributes and properties of a :class:`~numpy.ndarray` instance. + +.. autosummary:: + :toctree: generated/ + + MaskedArray.flags + MaskedArray.shape + MaskedArray.strides + MaskedArray.ndim + MaskedArray.size + MaskedArray.itemsize + MaskedArray.nbytes + MaskedArray.base + MaskedArray.dtype + MaskedArray.T + MaskedArray.real + MaskedArray.imag + MaskedArray.flat + MaskedArray.ctypes + MaskedArray.__array_priority__ + + + +:class:`MaskedArray` methods +============================ + +.. seealso:: :ref:`Array methods ` + + +Conversion +---------- + +.. autosummary:: + :toctree: generated/ + + MaskedArray.view + MaskedArray.astype + MaskedArray.filled + MaskedArray.tofile + MaskedArray.toflex + MaskedArray.tolist + MaskedArray.torecords + MaskedArray.tostring + + +Shape manipulation +------------------ + +For reshape, resize, and transpose, the single tuple argument may be +replaced with ``n`` integers which will be interpreted as an n-tuple. + +.. autosummary:: + :toctree: generated/ + + MaskedArray.flatten + MaskedArray.ravel + MaskedArray.reshape + MaskedArray.resize + MaskedArray.squeeze + MaskedArray.swapaxes + MaskedArray.transpose + + +Item selection and manipulation +------------------------------- + +For array methods that take an *axis* keyword, it defaults to +:const:`None`. If axis is *None*, then the array is treated as a 1-D +array. Any other value for *axis* represents the dimension along which +the operation should proceed. + +.. autosummary:: + :toctree: generated/ + + MaskedArray.argsort + MaskedArray.choose + MaskedArray.compress + MaskedArray.diagonal + MaskedArray.nonzero + MaskedArray.put + MaskedArray.repeat + MaskedArray.searchsorted + MaskedArray.sort + MaskedArray.take + + +Calculations +------------ + +.. autosummary:: + :toctree: generated/ + + MaskedArray.all + MaskedArray.anom + MaskedArray.any + MaskedArray.argmax + MaskedArray.argmin + MaskedArray.clip + MaskedArray.conj + MaskedArray.cumprod + MaskedArray.cumsum + MaskedArray.mean + MaskedArray.min + MaskedArray.prod + MaskedArray.ptp + MaskedArray.round + MaskedArray.std + MaskedArray.sum + MaskedArray.trace + MaskedArray.var + + +Arithmetic and comparison operations +------------------------------------ + +.. index:: comparison, arithmetic, operation, operator + +Comparison operators: +~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + MaskedArray.__lt__ + MaskedArray.__le__ + MaskedArray.__gt__ + MaskedArray.__ge__ + MaskedArray.__eq__ + MaskedArray.__ne__ + +Truth value of an array (:func:`bool()`): +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + MaskedArray.__nonzero__ + + +Arithmetic: +~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + MaskedArray.__add__ + MaskedArray.__sub__ + MaskedArray.__mul__ + MaskedArray.__div__ + MaskedArray.__truediv__ + MaskedArray.__floordiv__ + MaskedArray.__mod__ + MaskedArray.__divmod__ + MaskedArray.__pow__ + MaskedArray.__lshift__ + MaskedArray.__rshift__ + MaskedArray.__and__ + MaskedArray.__or__ + MaskedArray.__xor__ + + +Arithmetic, in-place: +~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + MaskedArray.__iadd__ + MaskedArray.__isub__ + MaskedArray.__imul__ + MaskedArray.__idiv__ + MaskedArray.__itruediv__ + MaskedArray.__ifloordiv__ + MaskedArray.__imod__ + MaskedArray.__ipow__ + MaskedArray.__ilshift__ + MaskedArray.__irshift__ + MaskedArray.__iand__ + MaskedArray.__ior__ + MaskedArray.__ixor__ + + + +Special methods +--------------- + +For standard library functions: + +.. autosummary:: + :toctree: generated/ + + MaskedArray.__copy__ + MaskedArray.__deepcopy__ + MaskedArray.__reduce__ + MaskedArray.__setstate__ + +Basic customization: + +.. autosummary:: + :toctree: generated/ + + MaskedArray.__new__ + MaskedArray.__array__ + MaskedArray.__array_wrap__ + +Container customization: (see :ref:`Indexing `) + +.. autosummary:: + :toctree: generated/ + + MaskedArray.__len__ + MaskedArray.__getitem__ + MaskedArray.__setitem__ + MaskedArray.__getslice__ + MaskedArray.__setslice__ + MaskedArray.__contains__ + + + +Specific methods +---------------- + +Handling the mask +~~~~~~~~~~~~~~~~~ + +The following methods can be used to access information about the mask or to +manipulate the mask. + +.. autosummary:: + :toctree: generated/ + + MaskedArray.harden_mask + MaskedArray.soften_mask + MaskedArray.unshare_mask + MaskedArray.shrink_mask + + +Handling the `fill_value` +~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autosummary:: + :toctree: generated/ + + MaskedArray.get_fill_value + MaskedArray.set_fill_value + + +.. autosummary:: + :toctree: generated/ + + MaskedArray.compressed + MaskedArray.count + Added: trunk/doc/source/reference/maskedarray.generic.rst =================================================================== --- trunk/doc/source/reference/maskedarray.generic.rst 2009-01-12 21:25:53 UTC (rev 6319) +++ trunk/doc/source/reference/maskedarray.generic.rst 2009-01-13 21:01:58 UTC (rev 6320) @@ -0,0 +1,427 @@ +.. currentmodule:: numpy.ma + +.. _maskedarray.generic: + + + +The :mod:`numpy.ma` module +========================== + +Rationale +--------- + +Masked arrays are arrays that may have missing or invalid entries. +The :mod:`numpy.ma` module provides a nearly work-alike replacement for numpy +that supports data arrays with masks. + + + +What is a masked array? +----------------------- + +In many circumstances, datasets can be incomplete or tainted by the presence of invalid data. For example, a sensor may have failed to record a data, or +recorded an invalid value. +The :mod:`numpy.ma` module provides a convenient way to address this issue, by introducing masked arrays. + +A masked array is the combination of a standard :class:`numpy.ndarray` and a mask. A mask is either :attr:`nomask`, indicating that no value of the associated array is invalid, or an array of booleans that determines for each element of the associated array whether the value is valid or not. When an element of the mask is ``False``, the corresponding element of the associated array is valid and is said to be unmasked. When an element of the mask is ``True``, the corresponding element of the associated array is said to be masked (invalid). + +The package ensures that masked entries are not used in computations. + +As an illustration, let's consider the following dataset:: + + >>> import numpy as np + >>> x = np.array([1, 2, 3, -1, 5]) + +We wish to mark the fourth entry as invalid. The easiest is to create a masked +array:: + + >>> mx = ma.masked_array(x, mask=[0, 0, 0, 1, 0]) + +We can now compute the mean of the dataset, without taking the invalid data into account:: + + >>> mx.mean() + 2.75 + + +The :mod:`numpy.ma` module +-------------------------- + + +The main feature of the :mod:`numpy.ma` module is the :class:`~numpy.ma.MaskedArray` class, which is a subclass of :class:`numpy.ndarray`. +The class, its attributes and methods are described in more details in the +:ref:`MaskedArray class ` section. + +The :mod:`numpy.ma` module can be used as an addition to :mod:`numpy`: :: + + >>> import numpy as np + >>> import numpy.ma as ma + +To create an array with the second element invalid, we would do:: + + >>> y = ma.array([1, 2, 3], mask = [0, 1, 0]) + +To create a masked array where all values close to 1.e20 are invalid, we would +do:: + + >>> z = masked_values([1.0, 1.e20, 3.0, 4.0], 1.e20) + +For a complete discussion of creation methods for masked arrays please see +section :ref:`Constructing masked arrays `. + + + + +Using numpy.ma +============== + +.. _maskedarray.generic.constructing: + +Constructing masked arrays +-------------------------- + +There are several ways to construct a masked array. + +* A first possibility is to directly invoke the :class:`MaskedArray` class. + +* A second possibility is to use the two masked array constructors, + :func:`array` and :func:`masked_array`. + + .. autosummary:: + :toctree: generated/ + + array + masked_array + + +* A third option is to take the view of an existing array. In that case, the + mask of the view is set to :attr:`nomask` if the array has no named fields, + or an array of boolean with the same structure as the array otherwise.:: + + >>> x = np.array([1, 2, 3]) + >>> x.view(ma.MaskedArray) + masked_array(data = [1 2 3], + mask = False, + fill_value = 999999) + +* Yet another possibility is to use any of the following functions: + + .. autosummary:: + :toctree: generated/ + + asarray + asanyarray + fix_invalid + masked_equal + masked_greater + masked_greater_equal + masked_inside + masked_invalid + masked_less + masked_less_equal + masked_not_equal + masked_object + masked_outside + masked_values + masked_where + + + +Accessing the data +------------------ + +The underlying data of a masked array can be accessed through several ways: + +* through the :attr:`data` attribute. The output is a view of the array as + a :class:`numpy.ndarray` or one of its subclasses, depending on the type + of the underlying data at the masked array creation. + +* through the :meth:`~MaskedArray.__array__` method. The output is then a :class:`numpy.ndarray`. + +* by directly taking a view of the masked array as a :class:`numpy.ndarray` or one of its subclass (which is actually what using the :attr:`data` attribute does). + +* by using the :func:`getdata` function. + + +None of these methods is completely satisfactory if some entries have been marked as invalid. As a general rule, invalid data should not be relied on. +If a representation of the array is needed without any masked entries, it is recommended to fill the array with the :meth:`filled` method. + + + +Accessing the mask +------------------ + +The mask of a masked array is accessible through its :attr:`mask` attribute. +We must keep in mind that a ``True`` entry in the mask indicates an *invalid* data. + +Another possibility is to use the :func:`getmask` and :func:`getmaskarray` functions. :func:`getmask(x)` outputs the mask of ``x`` if ``x`` is a masked array, and the special value :data:`nomask` otherwise. +:func:`getmaskarray(x)` outputs the mask of ``x`` if ``x`` is a masked array. +If ``x`` has no invalid entry or is not a masked array, the function outputs a boolean array of ``False`` with as many elements as ``x``. + + + + +Accessing only the valid entries +--------------------------------- + +To retrieve only the valid entries, we can use the inverse of the mask as an index. The inverse of the mask can be calculated with the :func:`numpy.logical_not` function or simply with the ``~`` operator:: + + >>> x = ma.array([[1, 2], [3, 4]], mask=[[0, 1], [1, 0]]) + >>> x[~x.mask] + masked_array(data = [1 4], + mask = [False False], + fill_value = 999999) + +Another way to retrieve the valid data is to use the :meth:`compressed` method, which returns a one-dimensional :class:`~numpy.ndarray` (or one of its subclasses, depending on the value of the :attr:`baseclass` attribute):: + + >>> x.compressed + array([1, 4]) + + + +Modifying the mask +------------------ + +Masking an entry +~~~~~~~~~~~~~~~~ + +The recommended way to mark one or several specific entries of a masked array as invalid is to assign the special value :attr:`masked` to them:: + + >>> x = ma.array([1, 2, 3]) + >>> x[0] = ma.masked + >>> x + masked_array(data = [-- 2 3], + mask = [ True False False], + fill_value = 999999) + >>> y = ma.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) + >>> y[(0, 1, 2), (1, 2, 0)] = ma.masked + >>> y + masked_array(data = + [[1 -- 3] + [4 5 --] + [-- 8 9]], + mask = + [[False True False] + [False False True] + [ True False False]], + fill_value = 999999) + >>> z = ma.array([1, 2, 3, 4]) + >>> z[:-2] = ma.masked + >>> z + masked_array(data = [-- -- 3 4], + mask = [ True True False False], + fill_value = 999999) + + +A second possibility is to modify the mask directly, but this usage is discouraged. + +.. note:: + When creating a new masked array with a simple, non-structured datatype, the mask is initially set to the special value :attr:`nomask`, that corresponds roughly to the boolean ``False``. Trying to set an element of :attr:`nomask` will fail with a :exc:`TypeError` exception, as a boolean does not support item assignment. + + +All the entries of an array can be masked at once by assigning ``True`` to the mask:: + + >>> x = ma.array([1, 2, 3], mask=[0, 0, 1]) + >>> x.mask = True + >>> x + masked_array(data = [-- -- --], + mask = [ True True True], + fill_value = 999999) + +Finally, specific entries can be masked and/or unmasked by assigning to the mask a sequence of booleans:: + + >>> x = ma.array([1, 2, 3]) + >>> x.mask = [0, 1, 0] + >>> x + masked_array(data = [1 -- 3], + mask = [False True False], + fill_value = 999999) + +Unmasking an entry +~~~~~~~~~~~~~~~~~~ + +To unmask one or several specific entries, we can just assign one or several new valid values to them:: + + >>> x = ma.array([1, 2, 3], mask=[0, 0, 1]) + >>> x + masked_array(data = [1 2 --], + mask = [False False True], + fill_value = 999999) + >>> x[-1] = 5 + >>> x + masked_array(data = [1 2 5], + mask = [False False False], + fill_value = 999999) + +.. note:: + Unmasking an entry by direct assignment will not work if the masked array + has a *hard* mask, as shown by the :attr:`hardmask`. + This feature was introduced to prevent the overwriting of the mask. + To force the unmasking of an entry in such circumstance, the mask has first + to be softened with the :meth:`soften_mask` method before the allocation, + and then re-hardened with :meth:`harden_mask`:: + + >>> x = ma.array([1, 2, 3], mask=[0, 0, 1]) + >>> x + masked_array(data = [1 2 --], + mask = [False False True], + fill_value = 999999) + >>> x[-1] = 5 + >>> x + masked_array(data = [1 2 --], + mask = [False False True], + fill_value = 999999) + >>> x.soften_mask() + >>> x[-1] = 5 + >>> x + masked_array(data = [1 2 --], + mask = [False False True], + fill_value = 999999) + >>> x.soften_mask() + + +To unmask all masked entries of a masked array, the simplest solution is to assign the constant :attr:`nomask` to the mask:: + + >>> x = ma.array([1, 2, 3], mask=[0, 0, 1]) + >>> x + masked_array(data = [1 2 --], + mask = [False False True], + fill_value = 999999) + >>> x.mask = nomask + >>> x + masked_array(data = [1 2 3], + mask = [False False False], + fill_value = 999999) + + + +Indexing and slicing +-------------------- + +As a :class:`MaskedArray` is a subclass of :class:`numpy.ndarray`, it inherits its mechanisms for indexing and slicing. + +When accessing a single entry of a masked array with no named fields, the output is either a scalar (if the corresponding entry of the mask is ``False``) or the special value :attr:`masked` (if the corresponding entry of the mask is ``True``):: + + >>> x = ma.array([1, 2, 3], mask=[0, 0, 1]) + >>> x[0] + 1 + >>> x[-1] + masked_array(data = --, + mask = True, + fill_value = 1e+20) + >>> x[-1] is ma.masked + True + +If the masked array has named fields, accessing a single entry returns a +:class:`numpy.void` object if none of the fields are masked, or a 0d masked array with the same dtype as the initial array if at least one of the fields is masked. + + >>> y = ma.masked_array([(1,2), (3, 4)], + ... mask=[(0, 0), (0, 1)], + ... dtype=[('a', int), ('b', int)]) + >>> y[0] + (1, 2) + >>> y[-1] + masked_array(data = (3, --), + mask = (False, True), + fill_value = (999999, 999999), + dtype = [('a', '>> x = ma.array([1, 2, 3, 4, 5], mask=[0, 1, 0, 0, 1]) + >>> mx = x[:3] + >>> mx + masked_array(data = [1 -- 3], + mask = [False True False], + fill_value = 999999) + >>> mx[1] = -1 + >>> mx + masked_array(data = [1 -1 3], + mask = [False True False], + fill_value = 999999) + >>> x.mask + array([False, True, False, False, True], dtype=bool) + >>> x.data + array([ 1, -1, 3, 4, 5]) + + +Accessing a field of a masked array with structured datatype returns a :class:`MaskedArray`. + + + +Operations on masked arrays +--------------------------- + +Arithmetic and comparison operations are supported by masked arrays. +As much as possible, invalid entries of a masked array are not processed, meaning that the corresponding :attr:`data` entries *should* be the same before and after the operation. +We need to stress that this behavior may not be systematic, that invalid data may actually be affected by the operation in some cases and once again that invalid data should not be relied on. + +The :mod:`numpy.ma` module comes with a specific implementation of most +ufuncs. Unary and binary functions that have a validity domain (such as :func:`~numpy.log` or :func:`~numpy.divide`) return the :data:`masked` constant whenever the input is masked or falls outside the validity domain:: + + >>> ma.log([-1, 0, 1, 2]) + masked_array(data = [-- -- 0.0 0.69314718056], + mask = [ True True False False], + fill_value = 1e+20) + +Masked arrays also support standard numpy ufuncs. The output is then a masked array. The result of a unary ufunc is masked wherever the input is masked. The result of a binary ufunc is masked wherever any of the input is masked. If the ufunc also returns the optional context output (a 3-element tuple containing the name of the ufunc, its arguments and its domain), the context is processed and entries of the output masked array are masked wherever the corresponding input fall outside the validity domain:: + + >>> x = ma.array([-1, 1, 0, 2, 3], mask=[0, 0, 0, 0, 1]) + >>> np.log(x) + masked_array(data = [-- -- 0.0 0.69314718056 --], + mask = [ True True False False True], + fill_value = 1e+20) + + + +Examples +======== + +Data with a given value representing missing data +------------------------------------------------- + +Let's consider a list of elements, ``x``, where values of -9999. represent missing data. +We wish to compute the average value of the data and the vector of anomalies (deviations from the average):: + + >>> import numpy.ma as ma + >>> x = [0.,1.,-9999.,3.,4.] + >>> mx = ma.masked_values (x, -9999.) + >>> print mx.mean() + 2.0 + >>> print mx - mx.mean() + [-2.0 -1.0 -- 1.0 2.0] + >>> print mx.anom() + [-2.0 -1.0 -- 1.0 2.0] + + +Filling in the missing data +--------------------------- + +Suppose now that we wish to print that same data, but with the missing values +replaced by the average value. + + >>> print mx.filled(mx.mean()) + [ 0. 1. 2. 3. 4.] + + +Numerical operations +-------------------- + +Numerical operations can be easily performed without worrying about missing values, dividing by zero, square roots of negative numbers, etc.:: + + >>> import numpy as np, numpy.ma as ma + >>> x = ma.array([1., -1., 3., 4., 5., 6.], mask=[0,0,0,0,1,0]) + >>> y = ma.array([1., 2., 0., 4., 5., 6.], mask=[0,0,0,0,0,1]) + >>> print np.sqrt(x/y) + [1.0 -- -- 1.0 -- --] + +Four values of the output are invalid: the first one comes from taking the square root of a negative number, the second from the division by zero, and the last two where the inputs were masked. + + +Ignoring extreme values +----------------------- + +Let's consider an array ``d`` of random floats between 0 and 1. +We wish to compute the average of the values of ``d`` while ignoring any data outside the range [0.1, 0.9]:: + + >>> print ma.masked_outside(d, 0.1, 0.9).mean() Added: trunk/doc/source/reference/maskedarray.rst =================================================================== --- trunk/doc/source/reference/maskedarray.rst 2009-01-12 21:25:53 UTC (rev 6319) +++ trunk/doc/source/reference/maskedarray.rst 2009-01-13 21:01:58 UTC (rev 6320) @@ -0,0 +1,14 @@ +.. _maskedarray: + + +Masked arrays are arrays that may have missing or invalid entries. +The :mod:`numpy.ma` module provides a nearly work-alike replacement for numpy +that supports data arrays with masks. + + +.. toctree:: + :maxdepth: 2 + + maskedarray.generic + maskedarray.baseclass + routines.ma \ No newline at end of file Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-12 21:25:53 UTC (rev 6319) +++ trunk/numpy/ma/core.py 2009-01-13 21:01:58 UTC (rev 6320) @@ -1,19 +1,22 @@ # pylint: disable-msg=E1002 """ -MA: a facility for dealing with missing observations -MA is generally used as a numpy.array look-alike. -by Paul F. Dubois. +numpy.ma : a package to handle missing or invalid values. +This package was initially written for numarray by Paul F. Dubois +at Lawrence Livermore National Laboratory. +In 2006, the package was completely rewritten by Pierre Gerard-Marchant +(University of Georgia) to make the MaskedArray class a subclass of ndarray, +and to improve support of structured arrays. + + Copyright 1999, 2000, 2001 Regents of the University of California. Released for unlimited redistribution. -Adapted for numpy_core 2005 by Travis Oliphant and -(mainly) Paul Dubois. - +* Adapted for numpy_core 2005 by Travis Oliphant and (mainly) Paul Dubois. * Subclassing of the base ndarray 2006 by Pierre Gerard-Marchant (pgmdevlist_AT_gmail_DOT_com) * Improvements suggested by Reggie Dugard (reggie_AT_merfinllc_DOT_com) -:author: Pierre Gerard-Marchant +.. moduleauthor:: Pierre Gerard-Marchant """ @@ -1106,7 +1109,7 @@ def masked_where(condition, a, copy=True): """ - Return ``a`` as an array masked where ``condition`` is True. + Return ``a`` as an array masked where ``condition`` is ``True``. Masked values of ``a`` or ``condition`` are kept. Parameters @@ -1139,7 +1142,7 @@ def masked_greater(x, value, copy=True): """ - Return the array `x` masked where (x > value). + Return the array `x` masked where ``(x > value)``. Any value of mask already masked is kept masked. """ @@ -1147,30 +1150,34 @@ def masked_greater_equal(x, value, copy=True): - "Shortcut to masked_where, with condition = (x >= value)." + "Shortcut to masked_where, with condition ``(x >= value)``." return masked_where(greater_equal(x, value), x, copy=copy) def masked_less(x, value, copy=True): - "Shortcut to masked_where, with condition = (x < value)." + "Shortcut to masked_where, with condition ``(x < value)``." return masked_where(less(x, value), x, copy=copy) def masked_less_equal(x, value, copy=True): - "Shortcut to masked_where, with condition = (x <= value)." + "Shortcut to masked_where, with condition ``(x <= value)``." return masked_where(less_equal(x, value), x, copy=copy) def masked_not_equal(x, value, copy=True): - "Shortcut to masked_where, with condition = (x != value)." + "Shortcut to masked_where, with condition ``(x != value)``." return masked_where(not_equal(x, value), x, copy=copy) def masked_equal(x, value, copy=True): """ - Shortcut to masked_where, with condition = (x == value). For - floating point, consider ``masked_values(x, value)`` instead. + Shortcut to masked_where, with condition ``(x == value)``. + See Also + -------- + masked_where : base function + masked_values : equivalent function for floats. + """ # An alternative implementation relies on filling first: probably not needed. # d = filled(x, 0) @@ -1755,7 +1762,8 @@ return #.................................. def __array_wrap__(self, obj, context=None): - """Special hook for ufuncs. + """ + Special hook for ufuncs. Wraps the numpy array and sets the mask according to context. """ result = obj.view(type(self)) @@ -1988,7 +1996,8 @@ ndarray.__setitem__(_data, indx, dindx) _mask[indx] = mindx return - #............................................ + + def __getslice__(self, i, j): """x.__getslice__(i, j) <==> x[i:j] @@ -1997,7 +2006,8 @@ """ return self.__getitem__(slice(i, j)) - #........................ + + def __setslice__(self, i, j, value): """x.__setslice__(i, j, value) <==> x[i:j]=value @@ -2006,7 +2016,8 @@ """ self.__setitem__(slice(i, j), value) - #............................................ + + def __setmask__(self, mask, copy=False): """Set the mask. @@ -2107,6 +2118,10 @@ """ self._hardmask = False + hardmask = property(fget=lambda self: self._hardmask, + doc="Hardness of the mask") + + def unshare_mask(self): """Copy the mask and set the sharedmask flag to False. @@ -2115,6 +2130,9 @@ self._mask = self._mask.copy() self._sharedmask = False + sharedmask = property(fget=lambda self: self._sharedmask, + doc="Share status of the mask (read-only).") + def shrink_mask(self): """Reduce a mask to nomask when possible. @@ -2124,6 +2142,10 @@ self._mask = nomask #............................................ + + baseclass = property(fget= lambda self:self._baseclass, + doc="Class of the underlying data (read-only).") + def _get_data(self): """Return the current data, as a view of the original underlying data. @@ -2179,23 +2201,23 @@ def filled(self, fill_value=None): - """Return a copy of self._data, where masked values are filled - with fill_value. + """ + Return a copy of self, where masked values are filled with `fill_value`. - If fill_value is None, self.fill_value is used instead. + If `fill_value` is None, `self.fill_value` is used instead. - Notes - ----- - + Subclassing is preserved - + The result is NOT a MaskedArray ! + Notes + ----- + + Subclassing is preserved + + The result is NOT a MaskedArray ! - Examples - -------- - >>> x = np.ma.array([1,2,3,4,5], mask=[0,0,1,0,1], fill_value=-999) - >>> x.filled() - array([1,2,-999,4,-999]) - >>> type(x.filled()) - + Examples + -------- + >>> x = np.ma.array([1,2,3,4,5], mask=[0,0,1,0,1], fill_value=-999) + >>> x.filled() + array([1,2,-999,4,-999]) + >>> type(x.filled()) + """ m = self._mask @@ -2502,7 +2524,7 @@ return self #... def __ipow__(self, other): - "Raise self to the power other, in place" + "Raise self to the power other, in place." other_data = getdata(other) other_mask = getmask(other) ndarray.__ipow__(self._data, np.where(self._mask, 1, other_data)) @@ -3728,6 +3750,7 @@ def toflex(self): """ Transforms a MaskedArray into a flexible-type array with two fields: + * the ``_data`` field stores the ``_data`` part of the array; * the ``_mask`` field stores the ``_mask`` part of the array; @@ -4563,7 +4586,8 @@ outerproduct = outer def allequal (a, b, fill_value=True): - """Return True if all entries of a and b are equal, using + """ + Return True if all entries of a and b are equal, using fill_value as a truth value where either or both are masked. """ @@ -4654,9 +4678,9 @@ return np.all(d) #.............................................................................. -def asarray(a, dtype=None): +def asarray(a, dtype=None, order=None): """ - Convert the input to a masked array. + Convert the input `a` to a masked array of the given datatype. Parameters ---------- @@ -4674,24 +4698,35 @@ ------- out : ndarray MaskedArray interpretation of `a`. No copy is performed if the input - is already an ndarray. If `a` is a subclass of ndarray, a base - class ndarray is returned. - Return a as a MaskedArray object of the given dtype. - If dtype is not given or None, is is set to the dtype of a. - No copy is performed if a is already an array. - Subclasses are converted to the base class MaskedArray. + is already an ndarray. If `a` is a subclass of MaskedArray, a base + class MaskedArray is returned. """ return masked_array(a, dtype=dtype, copy=False, keep_mask=True, subok=False) def asanyarray(a, dtype=None): - """asanyarray(data, dtype) = array(data, dtype, copy=0, subok=1) + """ + Convert the input `a` to a masked array of the given datatype. + If `a` is a subclass of MaskedArray, its class is conserved. - Return a as an masked array. - If dtype is not given or None, is is set to the dtype of a. - No copy is performed if a is already an array. - Subclasses are conserved. + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This + includes lists, lists of tuples, tuples, tuples of tuples, tuples + of lists and ndarrays. + dtype : data-type, optional + By default, the data-type is inferred from the input data. + order : {'C', 'F'}, optional + Whether to use row-major ('C') or column-major ('FORTRAN') memory + representation. Defaults to 'C'. + Returns + ------- + out : ndarray + MaskedArray interpretation of `a`. No copy is performed if the input + is already an ndarray. + """ return masked_array(a, dtype=dtype, copy=False, keep_mask=True, subok=True) Modified: trunk/numpy/ma/extras.py =================================================================== --- trunk/numpy/ma/extras.py 2009-01-12 21:25:53 UTC (rev 6319) +++ trunk/numpy/ma/extras.py 2009-01-13 21:01:58 UTC (rev 6320) @@ -45,22 +45,19 @@ #............................................................................... def issequence(seq): """Is seq a sequence (ndarray, list or tuple)?""" - if isinstance(seq, ndarray): + if isinstance(seq, (ndarray, tuple, list)): return True - elif isinstance(seq, tuple): - return True - elif isinstance(seq, list): - return True return False def count_masked(arr, axis=None): - """Count the number of masked elements along the given axis. + """ + Count the number of masked elements along the given axis. Parameters ---------- - axis : int, optional - Axis along which to count. - If None (default), a flattened version of the array is used. + axis : int, optional + Axis along which to count. + If None (default), a flattened version of the array is used. """ m = getmaskarray(arr) @@ -252,7 +249,8 @@ def average(a, axis=None, weights=None, returned=False): - """Average the array over the given axis. + """ + Average the array over the given axis. Parameters ---------- @@ -440,10 +438,10 @@ #.............................................................................. def compress_rowcols(x, axis=None): """ - Suppress the rows and/or columns of a 2D array that contains + Suppress the rows and/or columns of a 2D array that contain masked values. - The suppression behavior is selected with the `axis`parameter. + The suppression behavior is selected with the `axis` parameter. - If axis is None, rows and columns are suppressed. - If axis is 0, only rows are suppressed. @@ -482,13 +480,15 @@ return x._data[idxr][:,idxc] def compress_rows(a): - """Suppress whole rows of a 2D array that contain masked values. + """ + Suppress whole rows of a 2D array that contain masked values. """ return compress_rowcols(a, 0) def compress_cols(a): - """Suppress whole columnss of a 2D array that contain masked values. + """ + Suppress whole columns of a 2D array that contain masked values. """ return compress_rowcols(a, 1) @@ -530,30 +530,35 @@ return a def mask_rows(a, axis=None): - """Mask whole rows of a 2D array that contain masked values. + """ + Mask whole rows of a 2D array that contain masked values. Parameters ---------- - axis : int, optional - Axis along which to perform the operation. - If None, applies to a flattened version of the array. + axis : int, optional + Axis along which to perform the operation. + If None, applies to a flattened version of the array. + """ return mask_rowcols(a, 0) def mask_cols(a, axis=None): - """Mask whole columns of a 2D array that contain masked values. + """ + Mask whole columns of a 2D array that contain masked values. Parameters ---------- - axis : int, optional - Axis along which to perform the operation. - If None, applies to a flattened version of the array. + axis : int, optional + Axis along which to perform the operation. + If None, applies to a flattened version of the array. + """ return mask_rowcols(a, 1) def dot(a,b, strict=False): - """Return the dot product of two 2D masked arrays a and b. + """ + Return the dot product of two 2D masked arrays a and b. Like the generic numpy equivalent, the product sum is over the last dimension of a and the second-to-last dimension of b. If strict is True, @@ -584,24 +589,25 @@ #............................................................................... def ediff1d(array, to_end=None, to_begin=None): - """Return the differences between consecutive elements of an + """ + Return the differences between consecutive elements of an array, possibly with prefixed and/or appended values. Parameters ---------- - array : {array} - Input array, will be flattened before the difference is taken. - to_end : {number}, optional - If provided, this number will be tacked onto the end of the returned - differences. - to_begin : {number}, optional - If provided, this number will be taked onto the beginning of the - returned differences. + array : {array} + Input array, will be flattened before the difference is taken. + to_end : {number}, optional + If provided, this number will be tacked onto the end of the returned + differences. + to_begin : {number}, optional + If provided, this number will be taked onto the beginning of the + returned differences. Returns ------- - ed : {array} - The differences. Loosely, this will be (ary[1:] - ary[:-1]). + ed : {array} + The differences. Loosely, this will be (ary[1:] - ary[:-1]). """ a = masked_array(array, copy=True) @@ -747,7 +753,8 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True): - """The correlation coefficients formed from the array x, where the + """ + The correlation coefficients formed from the array x, where the rows are the observations, and the columns are variables. corrcoef(x,y) where x and y are 1d arrays is the same as @@ -818,7 +825,8 @@ #####-------------------------------------------------------------------------- class MAxisConcatenator(AxisConcatenator): - """Translate slice objects to concatenation along an axis. + """ + Translate slice objects to concatenation along an axis. """ @@ -877,11 +885,13 @@ return self._retval(res) class mr_class(MAxisConcatenator): - """Translate slice objects to concatenation along the first axis. + """ + Translate slice objects to concatenation along the first axis. - For example: - >>> np.ma.mr_[np.ma.array([1,2,3]), 0, 0, np.ma.array([4,5,6])] - array([1, 2, 3, 0, 0, 4, 5, 6]) + Examples + -------- + >>> np.ma.mr_[np.ma.array([1,2,3]), 0, 0, np.ma.array([4,5,6])] + array([1, 2, 3, 0, 0, 4, 5, 6]) """ def __init__(self): @@ -894,7 +904,8 @@ #####-------------------------------------------------------------------------- def flatnotmasked_edges(a): - """Find the indices of the first and last not masked values in a + """ + Find the indices of the first and last not masked values in a 1D masked array. If all values are masked, returns None. """ @@ -907,8 +918,10 @@ else: return None + def notmasked_edges(a, axis=None): - """Find the indices of the first and last not masked values along + """ + Find the indices of the first and last not masked values along the given axis in a masked array. If all values are masked, return None. Otherwise, return a list @@ -917,9 +930,10 @@ Parameters ---------- - axis : int, optional - Axis along which to perform the operation. - If None, applies to a flattened version of the array. + axis : int, optional + Axis along which to perform the operation. + If None, applies to a flattened version of the array. + """ a = asarray(a) if axis is None or a.ndim == 1: @@ -929,8 +943,10 @@ return [tuple([idx[i].min(axis).compressed() for i in range(a.ndim)]), tuple([idx[i].max(axis).compressed() for i in range(a.ndim)]),] + def flatnotmasked_contiguous(a): - """Find contiguous unmasked data in a flattened masked array. + """ + Find contiguous unmasked data in a flattened masked array. Return a sorted sequence of slices (start index, end index). @@ -950,22 +966,22 @@ return result def notmasked_contiguous(a, axis=None): - """Find contiguous unmasked data in a masked array along the given - axis. + """ + Find contiguous unmasked data in a masked array along the given axis. Parameters ---------- - axis : int, optional - Axis along which to perform the operation. - If None, applies to a flattened version of the array. + axis : int, optional + Axis along which to perform the operation. + If None, applies to a flattened version of the array. Returns ------- - A sorted sequence of slices (start index, end index). + A sorted sequence of slices (start index, end index). Notes ----- - Only accepts 2D arrays at most. + Only accepts 2D arrays at most. """ a = asarray(a) From numpy-svn at scipy.org Wed Jan 14 02:14:43 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Wed, 14 Jan 2009 01:14:43 -0600 (CST) Subject: [Numpy-svn] r6321 - trunk/numpy/lib Message-ID: <20090114071443.71B0CC7C05E@scipy.org> Author: stefan Date: 2009-01-14 01:14:27 -0600 (Wed, 14 Jan 2009) New Revision: 6321 Modified: trunk/numpy/lib/info.py Log: Docstring: remove old floating point arithmetic, parallel execution and postponed import references. Modified: trunk/numpy/lib/info.py =================================================================== --- trunk/numpy/lib/info.py 2009-01-13 21:01:58 UTC (rev 6320) +++ trunk/numpy/lib/info.py 2009-01-14 07:14:27 UTC (rev 6321) @@ -98,24 +98,11 @@ polydiv -- Divide polynomials polyval -- Evaluate polynomial at given argument -Import tricks -============= -ppimport -- Postpone module import until trying to use it -ppimport_attr -- Postpone module import until trying to use its - attribute -ppresolve -- Import postponed module and return it. +Machine arithmetic +================== +finfo -- Parameters of system floating point arithmetic +iinfo -- Parameters of system integer arithmetic -Machine arithmetics -=================== -machar_single -- MachAr instance storing the parameters of system - single precision floating point arithmetics -machar_double -- MachAr instance storing the parameters of system - double precision floating point arithmetics - -Threading tricks -================ -ParallelExec -- Execute commands in parallel thread. - 1D array set operations ======================= Set operations for 1D numeric arrays based on sort() function. From numpy-svn at scipy.org Wed Jan 14 02:55:31 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Wed, 14 Jan 2009 01:55:31 -0600 (CST) Subject: [Numpy-svn] r6322 - trunk/numpy/lib Message-ID: <20090114075531.BACD5C7C040@scipy.org> Author: stefan Date: 2009-01-14 01:55:16 -0600 (Wed, 14 Jan 2009) New Revision: 6322 Modified: trunk/numpy/lib/getlimits.py Log: Fix printing of limits. Modified: trunk/numpy/lib/getlimits.py =================================================================== --- trunk/numpy/lib/getlimits.py 2009-01-14 07:14:27 UTC (rev 6321) +++ trunk/numpy/lib/getlimits.py 2009-01-14 07:55:16 UTC (rev 6322) @@ -115,7 +115,7 @@ return obj def _init(self, dtype): - self.dtype = dtype + self.dtype = np.dtype(dtype) if dtype is ntypes.double: itype = ntypes.int64 fmt = '%24.16e' @@ -149,23 +149,23 @@ self.nexp = machar.iexp self.nmant = machar.it self.machar = machar - self._str_tiny = machar._str_xmin - self._str_max = machar._str_xmax - self._str_epsneg = machar._str_epsneg - self._str_eps = machar._str_eps - self._str_resolution = machar._str_resolution + self._str_tiny = machar._str_xmin.strip() + self._str_max = machar._str_xmax.strip() + self._str_epsneg = machar._str_epsneg.strip() + self._str_eps = machar._str_eps.strip() + self._str_resolution = machar._str_resolution.strip() return self def __str__(self): return '''\ Machine parameters for %(dtype)s --------------------------------------------------------------------- -precision=%(precision)3s resolution=%(_str_resolution)s -machep=%(machep)6s eps= %(_str_eps)s -negep =%(negep)6s epsneg= %(_str_epsneg)s -minexp=%(minexp)6s tiny= %(_str_tiny)s -maxexp=%(maxexp)6s max= %(_str_max)s -nexp =%(nexp)6s min= -max +precision=%(precision)3s resolution= %(_str_resolution)s +machep=%(machep)6s eps= %(_str_eps)s +negep =%(negep)6s epsneg= %(_str_epsneg)s +minexp=%(minexp)6s tiny= %(_str_tiny)s +maxexp=%(maxexp)6s max= %(_str_max)s +nexp =%(nexp)6s min= -max --------------------------------------------------------------------- ''' % self.__dict__ @@ -256,6 +256,17 @@ max = property(max) + def __str__(self): + """String representation.""" + return ''' +Machine parameters for %(dtype)s +--------------------------------------------------------------------- +min = %(min)s +max = %(max)s +--------------------------------------------------------------------- +''' % {'dtype': self.dtype, 'min': self.min, 'max': self.max} + + if __name__ == '__main__': f = finfo(ntypes.single) print 'single epsilon:',f.eps From numpy-svn at scipy.org Wed Jan 14 02:56:31 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Wed, 14 Jan 2009 01:56:31 -0600 (CST) Subject: [Numpy-svn] r6323 - in trunk/numpy/lib: . tests Message-ID: <20090114075631.409B1C7C024@scipy.org> Author: stefan Date: 2009-01-14 01:56:10 -0600 (Wed, 14 Jan 2009) New Revision: 6323 Modified: trunk/numpy/lib/getlimits.py trunk/numpy/lib/tests/test_getlimits.py Log: Fix finfo to work on all instances, not just NumPy scalars. Modified: trunk/numpy/lib/getlimits.py =================================================================== --- trunk/numpy/lib/getlimits.py 2009-01-14 07:55:16 UTC (rev 6322) +++ trunk/numpy/lib/getlimits.py 2009-01-14 07:56:10 UTC (rev 6323) @@ -88,6 +88,12 @@ _finfo_cache = {} def __new__(cls, dtype): + try: + dtype = np.dtype(dtype) + except TypeError: + # In case a float instance was given + dtype = np.dtype(type(dtype)) + obj = cls._finfo_cache.get(dtype,None) if obj is not None: return obj @@ -220,8 +226,11 @@ _min_vals = {} _max_vals = {} - def __init__(self, type): - self.dtype = np.dtype(type) + def __init__(self, int_type): + try: + self.dtype = np.dtype(int_type) + except TypeError: + self.dtype = np.dtype(type(int_type)) self.kind = self.dtype.kind self.bits = self.dtype.itemsize * 8 self.key = "%s%d" % (self.kind, self.bits) @@ -258,7 +267,7 @@ def __str__(self): """String representation.""" - return ''' + return '''\ Machine parameters for %(dtype)s --------------------------------------------------------------------- min = %(min)s Modified: trunk/numpy/lib/tests/test_getlimits.py =================================================================== --- trunk/numpy/lib/tests/test_getlimits.py 2009-01-14 07:55:16 UTC (rev 6322) +++ trunk/numpy/lib/tests/test_getlimits.py 2009-01-14 07:56:10 UTC (rev 6323) @@ -51,5 +51,9 @@ assert_equal(iinfo(T).max, T(-1)) +def test_instances(): + iinfo(10) + finfo(3.0) + if __name__ == "__main__": run_module_suite() From numpy-svn at scipy.org Fri Jan 16 19:15:18 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Fri, 16 Jan 2009 18:15:18 -0600 (CST) Subject: [Numpy-svn] r6324 - trunk/numpy/ma Message-ID: <20090117001518.67C86C7C0CA@scipy.org> Author: pierregm Date: 2009-01-16 18:15:15 -0600 (Fri, 16 Jan 2009) New Revision: 6324 Modified: trunk/numpy/ma/core.py Log: * fixed _arraymethod.__call__ for structured arrays Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-14 07:56:10 UTC (rev 6323) +++ trunk/numpy/ma/core.py 2009-01-17 00:15:15 UTC (rev 6324) @@ -1459,7 +1459,7 @@ elif mask is not nomask: result.__setmask__(getattr(mask, methodname)(*args, **params)) else: - if mask.ndim and mask.all(): + if mask.ndim and (not mask.dtype.names and mask.all()): return masked return result #.......................................................... From numpy-svn at scipy.org Sat Jan 17 16:24:27 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Sat, 17 Jan 2009 15:24:27 -0600 (CST) Subject: [Numpy-svn] r6325 - in trunk/numpy/lib: . tests Message-ID: <20090117212427.EFC69C7C015@scipy.org> Author: ptvirtan Date: 2009-01-17 15:24:13 -0600 (Sat, 17 Jan 2009) New Revision: 6325 Modified: trunk/numpy/lib/function_base.py trunk/numpy/lib/tests/test_function_base.py Log: Make `trapz` accept 1-D `x` parameter for n-d `y`, even if axis != -1. Additional tests included. Modified: trunk/numpy/lib/function_base.py =================================================================== --- trunk/numpy/lib/function_base.py 2009-01-17 00:15:15 UTC (rev 6324) +++ trunk/numpy/lib/function_base.py 2009-01-17 21:24:13 UTC (rev 6325) @@ -2818,9 +2818,9 @@ y : array_like Input array to integrate. x : array_like, optional - If `x` is None, then spacing between all `y` elements is 1. + If `x` is None, then spacing between all `y` elements is `dx`. dx : scalar, optional - If `x` is None, spacing given by `dx` is assumed. + If `x` is None, spacing given by `dx` is assumed. Default is 1. axis : int, optional Specify the axis. @@ -2836,7 +2836,15 @@ if x is None: d = dx else: - d = diff(x,axis=axis) + x = asarray(x) + if x.ndim == 1: + d = diff(x) + # reshape to correct shape + shape = [1]*y.ndim + shape[axis] = d.shape[0] + d = d.reshape(shape) + else: + d = diff(x, axis=axis) nd = len(y.shape) slice1 = [slice(None)]*nd slice2 = [slice(None)]*nd Modified: trunk/numpy/lib/tests/test_function_base.py =================================================================== --- trunk/numpy/lib/tests/test_function_base.py 2009-01-17 00:15:15 UTC (rev 6324) +++ trunk/numpy/lib/tests/test_function_base.py 2009-01-17 21:24:13 UTC (rev 6325) @@ -430,6 +430,44 @@ #check integral of normal equals 1 assert_almost_equal(sum(r,axis=0),1,7) + def test_ndim(self): + x = linspace(0, 1, 3) + y = linspace(0, 2, 8) + z = linspace(0, 3, 13) + + wx = ones_like(x) * (x[1]-x[0]) + wx[0] /= 2 + wx[-1] /= 2 + wy = ones_like(y) * (y[1]-y[0]) + wy[0] /= 2 + wy[-1] /= 2 + wz = ones_like(z) * (z[1]-z[0]) + wz[0] /= 2 + wz[-1] /= 2 + + q = x[:,None,None] + y[None,:,None] + z[None,None,:] + + qx = (q*wx[:,None,None]).sum(axis=0) + qy = (q*wy[None,:,None]).sum(axis=1) + qz = (q*wz[None,None,:]).sum(axis=2) + + # n-d `x` + r = trapz(q, x=x[:,None,None], axis=0) + assert_almost_equal(r, qx) + r = trapz(q, x=y[None,:,None], axis=1) + assert_almost_equal(r, qy) + r = trapz(q, x=z[None,None,:], axis=2) + assert_almost_equal(r, qz) + + # 1-d `x` + r = trapz(q, x=x, axis=0) + assert_almost_equal(r, qx) + r = trapz(q, x=y, axis=1) + assert_almost_equal(r, qy) + r = trapz(q, x=z, axis=2) + assert_almost_equal(r, qz) + + class TestSinc(TestCase): def test_simple(self): assert(sinc(0)==1) From numpy-svn at scipy.org Mon Jan 19 03:53:55 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 19 Jan 2009 02:53:55 -0600 (CST) Subject: [Numpy-svn] r6326 - trunk/numpy/ma Message-ID: <20090119085355.2532DC7C011@scipy.org> Author: pierregm Date: 2009-01-19 02:53:53 -0600 (Mon, 19 Jan 2009) New Revision: 6326 Modified: trunk/numpy/ma/core.py Log: * renamed FlatIter to MaskedIterator * added __getitem__ to MaskedIterator Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-17 21:24:13 UTC (rev 6325) +++ trunk/numpy/ma/core.py 2009-01-19 08:53:53 UTC (rev 6326) @@ -1464,10 +1464,11 @@ return result #.......................................................... -class FlatIter(object): +class MaskedIterator(object): "Define an interator." def __init__(self, ma): self.ma = ma + self.ma1d = ma.ravel() self.ma_iter = np.asarray(ma).flat if ma._mask is nomask: @@ -1478,10 +1479,12 @@ def __iter__(self): return self + def __getitem__(self, indx): + return self.ma1d.__getitem__(indx) + ### This won't work is ravel makes a copy def __setitem__(self, index, value): - a = self.ma.ravel() - a[index] = value + self.ma1d[index] = value def next(self): "Returns the next element of the iterator." @@ -2168,7 +2171,7 @@ """Return a flat iterator. """ - return FlatIter(self) + return MaskedIterator(self) # def _set_flat (self, value): """Set a flattened version of self to value. From numpy-svn at scipy.org Mon Jan 19 04:01:43 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 19 Jan 2009 03:01:43 -0600 (CST) Subject: [Numpy-svn] r6327 - trunk/numpy/lib Message-ID: <20090119090143.2FC86C7C011@scipy.org> Author: pierregm Date: 2009-01-19 03:01:24 -0600 (Mon, 19 Jan 2009) New Revision: 6327 Modified: trunk/numpy/lib/arraysetops.py Log: * replace np.asarray by np.asanyarray in unique1d Modified: trunk/numpy/lib/arraysetops.py =================================================================== --- trunk/numpy/lib/arraysetops.py 2009-01-19 08:53:53 UTC (rev 6326) +++ trunk/numpy/lib/arraysetops.py 2009-01-19 09:01:24 UTC (rev 6327) @@ -52,13 +52,19 @@ If provided, this number will be taked onto the beginning of the returned differences. + Notes + ----- + When applied to masked arrays, this function drops the mask information + if the `to_begin` and/or `to_end` parameters are used + + Returns ------- ed : array The differences. Loosely, this will be (ary[1:] - ary[:-1]). """ - ary = np.asarray(ary).flat + ary = np.asanyarray(ary).flat ed = ary[1:] - ary[:-1] arrays = [ed] if to_begin is not None: @@ -132,7 +138,7 @@ "the output was (indices, unique_arr), but " "has now been reversed to be more consistent.") - ar = np.asarray(ar1).flatten() + ar = np.asanyarray(ar1).flatten() if ar.size == 0: if return_inverse and return_index: return ar, np.empty(0, np.bool), np.empty(0, np.bool) From numpy-svn at scipy.org Mon Jan 19 04:04:48 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 19 Jan 2009 03:04:48 -0600 (CST) Subject: [Numpy-svn] r6328 - in trunk/numpy/ma: . tests Message-ID: <20090119090448.1FED8C7C011@scipy.org> Author: pierregm Date: 2009-01-19 03:04:20 -0600 (Mon, 19 Jan 2009) New Revision: 6328 Modified: trunk/numpy/ma/extras.py trunk/numpy/ma/tests/test_extras.py Log: * add intersect1d, intersect1d_nu, setdiff1d, setmember1d, setxor1d, unique1d, union1d * use np.atleast1d instead of ma.atleast1d Modified: trunk/numpy/ma/extras.py =================================================================== --- trunk/numpy/ma/extras.py 2009-01-19 09:01:24 UTC (rev 6327) +++ trunk/numpy/ma/extras.py 2009-01-19 09:04:20 UTC (rev 6328) @@ -19,11 +19,14 @@ 'ediff1d', 'flatnotmasked_contiguous', 'flatnotmasked_edges', 'hsplit', 'hstack', + 'intersect1d', 'intersect1d_nu', 'mask_cols', 'mask_rowcols', 'mask_rows', 'masked_all', 'masked_all_like', 'median', 'mr_', 'notmasked_contiguous', 'notmasked_edges', 'polyfit', 'row_stack', + 'setdiff1d', 'setmember1d', 'setxor1d', + 'unique1d', 'union1d', 'vander', 'vstack', ] @@ -133,9 +136,12 @@ res.append(masked_array(_d, mask=_m)) return res -atleast_1d = _fromnxfunction('atleast_1d') -atleast_2d = _fromnxfunction('atleast_2d') -atleast_3d = _fromnxfunction('atleast_3d') +#atleast_1d = _fromnxfunction('atleast_1d') +#atleast_2d = _fromnxfunction('atleast_2d') +#atleast_3d = _fromnxfunction('atleast_3d') +atleast_1d = np.atleast_1d +atleast_2d = np.atleast_2d +atleast_3d = np.atleast_3d vstack = row_stack = _fromnxfunction('vstack') hstack = _fromnxfunction('hstack') @@ -587,75 +593,213 @@ m = ~np.dot(am, bm) return masked_array(d, mask=m) -#............................................................................... -def ediff1d(array, to_end=None, to_begin=None): +#####-------------------------------------------------------------------------- +#---- --- arraysetops --- +#####-------------------------------------------------------------------------- + +def ediff1d(arr, to_end=None, to_begin=None): """ - Return the differences between consecutive elements of an - array, possibly with prefixed and/or appended values. + Computes the differences between consecutive elements of an array. - Parameters - ---------- - array : {array} - Input array, will be flattened before the difference is taken. - to_end : {number}, optional - If provided, this number will be tacked onto the end of the returned - differences. - to_begin : {number}, optional - If provided, this number will be taked onto the beginning of the - returned differences. + This function is the equivalent of `numpy.ediff1d` that takes masked + values into account. + See Also + -------- + numpy.eddif1d : equivalent function for ndarrays. + Returns ------- - ed : {array} - The differences. Loosely, this will be (ary[1:] - ary[:-1]). - + output : MaskedArray + """ - a = masked_array(array, copy=True) - if a.ndim > 1: - a.reshape((a.size,)) - (d, m, n) = (a._data, a._mask, a.size-1) - dd = d[1:]-d[:-1] - if m is nomask: - dm = nomask - else: - dm = m[1:]-m[:-1] + arr = ma.asanyarray(arr).flat + ed = arr[1:] - arr[:-1] + arrays = [ed] # + if to_begin is not None: + arrays.insert(0, to_begin) if to_end is not None: - to_end = asarray(to_end) - nend = to_end.size - if to_begin is not None: - to_begin = asarray(to_begin) - nbegin = to_begin.size - r_data = np.empty((n+nend+nbegin,), dtype=a.dtype) - r_mask = np.zeros((n+nend+nbegin,), dtype=bool) - r_data[:nbegin] = to_begin._data - r_mask[:nbegin] = to_begin._mask - r_data[nbegin:-nend] = dd - r_mask[nbegin:-nend] = dm - else: - r_data = np.empty((n+nend,), dtype=a.dtype) - r_mask = np.zeros((n+nend,), dtype=bool) - r_data[:-nend] = dd - r_mask[:-nend] = dm - r_data[-nend:] = to_end._data - r_mask[-nend:] = to_end._mask + arrays.append(to_end) # - elif to_begin is not None: - to_begin = asarray(to_begin) - nbegin = to_begin.size - r_data = np.empty((n+nbegin,), dtype=a.dtype) - r_mask = np.zeros((n+nbegin,), dtype=bool) - r_data[:nbegin] = to_begin._data - r_mask[:nbegin] = to_begin._mask - r_data[nbegin:] = dd - r_mask[nbegin:] = dm + if len(arrays) != 1: + # We'll save ourselves a copy of a potentially large array in the common + # case where neither to_begin or to_end was given. + ed = hstack(arrays) # + return ed + + +def unique1d(ar1, return_index=False, return_inverse=False): + """ + Finds the unique elements of an array. + + Masked values are considered the same element (masked). + + The output array is always a MaskedArray. + + See Also + -------- + np.unique1d : equivalent function for ndarrays. + """ + output = np.unique1d(ar1, + return_index=return_index, + return_inverse=return_inverse) + if isinstance(output, tuple): + output = list(output) + output[0] = output[0].view(MaskedArray) + output = tuple(output) else: - r_data = dd - r_mask = dm - return masked_array(r_data, mask=r_mask) + output = output.view(MaskedArray) + return output +def intersect1d(ar1, ar2): + """ + Returns the repeated or unique elements belonging to the two arrays. + + Masked values are assumed equals one to the other. + The output is always a masked array + + See Also + -------- + numpy.intersect1d : equivalent function for ndarrays. + + Examples + -------- + >>> x = array([1, 3, 3, 3], mask=[0, 0, 0, 1]) + >>> y = array([3, 1, 1, 1], mask=[0, 0, 0, 1]) + >>> intersect1d(x, y) + masked_array(data = [1 1 3 3 --], + mask = [False False False False True], + fill_value = 999999) + """ + aux = ma.concatenate((ar1,ar2)) + aux.sort() + return aux[aux[1:] == aux[:-1]] + + + +def intersect1d_nu(ar1, ar2): + """ + Returns the unique elements common to both arrays. + + Masked values are considered equal one to the other. + The output is always a masked array. + + See Also + -------- + intersect1d : Returns repeated or unique common elements. + numpy.intersect1d_nu : equivalent function for ndarrays. + + Examples + -------- + >>> x = array([1, 3, 3, 3], mask=[0, 0, 0, 1]) + >>> y = array([3, 1, 1, 1], mask=[0, 0, 0, 1]) + >>> intersect1d_nu(x, y) + masked_array(data = [1 3 --], + mask = [False False True], + fill_value = 999999) + + """ + # Might be faster than unique1d( intersect1d( ar1, ar2 ) )? + aux = ma.concatenate((unique1d(ar1), unique1d(ar2))) + aux.sort() + return aux[aux[1:] == aux[:-1]] + + + +def setxor1d(ar1, ar2): + """ + Set exclusive-or of 1D arrays with unique elements. + + See Also + -------- + numpy.setxor1d : equivalent function for ndarrays + + """ + aux = ma.concatenate((ar1, ar2)) + if aux.size == 0: + return aux + aux.sort() + auxf = aux.filled() +# flag = ediff1d( aux, to_end = 1, to_begin = 1 ) == 0 + flag = ma.concatenate(([True], (auxf[1:] != auxf[:-1]), [True])) +# flag2 = ediff1d( flag ) == 0 + flag2 = (flag[1:] == flag[:-1]) + return aux[flag2] + + +def setmember1d(ar1, ar2): + """ + Return a boolean array set True where first element is in second array. + + See Also + -------- + numpy.setmember1d : equivalent function for ndarrays. + + """ + ar1 = ma.asanyarray(ar1) + ar2 = ma.asanyarray( ar2 ) + ar = ma.concatenate((ar1, ar2 )) + b1 = ma.zeros(ar1.shape, dtype = np.int8) + b2 = ma.ones(ar2.shape, dtype = np.int8) + tt = ma.concatenate((b1, b2)) + + # We need this to be a stable sort, so always use 'mergesort' here. The + # values from the first array should always come before the values from the + # second array. + perm = ar.argsort(kind='mergesort') + aux = ar[perm] + aux2 = tt[perm] +# flag = ediff1d( aux, 1 ) == 0 + flag = ma.concatenate((aux[1:] == aux[:-1], [False])) + ii = ma.where( flag * aux2 )[0] + aux = perm[ii+1] + perm[ii+1] = perm[ii] + perm[ii] = aux + # + indx = perm.argsort(kind='mergesort')[:len( ar1 )] + # + return flag[indx] + + +def union1d(ar1, ar2): + """ + Union of 1D arrays with unique elements. + + See also + -------- + numpy.union1d : equivalent function for ndarrays. + + """ + return unique1d(ma.concatenate((ar1, ar2))) + + +def setdiff1d(ar1, ar2): + """ + Set difference of 1D arrays with unique elements. + + See Also + -------- + numpy.setdiff1d : equivalent function for ndarrays + + """ + aux = setmember1d(ar1,ar2) + if aux.size == 0: + return aux + else: + return ma.asarray(ar1)[aux == 0] + + + +#####-------------------------------------------------------------------------- +#---- --- Covariance --- +#####-------------------------------------------------------------------------- + + + + def _covhelper(x, y=None, rowvar=True, allow_masked=True): """ Private function for the computation of covariance and correlation Modified: trunk/numpy/ma/tests/test_extras.py =================================================================== --- trunk/numpy/ma/tests/test_extras.py 2009-01-19 09:01:24 UTC (rev 6327) +++ trunk/numpy/ma/tests/test_extras.py 2009-01-19 09:04:20 UTC (rev 6328) @@ -338,40 +338,8 @@ c = dot(b,a,False) assert_equal(c, np.dot(b.filled(0),a.filled(0))) - def test_ediff1d(self): - "Tests mediff1d" - x = masked_array(np.arange(5), mask=[1,0,0,0,1]) - difx_d = (x._data[1:]-x._data[:-1]) - difx_m = (x._mask[1:]-x._mask[:-1]) - dx = ediff1d(x) - assert_equal(dx._data, difx_d) - assert_equal(dx._mask, difx_m) - # - dx = ediff1d(x, to_begin=masked) - assert_equal(dx._data, np.r_[0,difx_d]) - assert_equal(dx._mask, np.r_[1,difx_m]) - dx = ediff1d(x, to_begin=[1,2,3]) - assert_equal(dx._data, np.r_[[1,2,3],difx_d]) - assert_equal(dx._mask, np.r_[[0,0,0],difx_m]) - # - dx = ediff1d(x, to_end=masked) - assert_equal(dx._data, np.r_[difx_d,0]) - assert_equal(dx._mask, np.r_[difx_m,1]) - dx = ediff1d(x, to_end=[1,2,3]) - assert_equal(dx._data, np.r_[difx_d,[1,2,3]]) - assert_equal(dx._mask, np.r_[difx_m,[0,0,0]]) - # - dx = ediff1d(x, to_end=masked, to_begin=masked) - assert_equal(dx._data, np.r_[0,difx_d,0]) - assert_equal(dx._mask, np.r_[1,difx_m,1]) - dx = ediff1d(x, to_end=[1,2,3], to_begin=masked) - assert_equal(dx._data, np.r_[0,difx_d,[1,2,3]]) - assert_equal(dx._mask, np.r_[1,difx_m,[0,0,0]]) - # - dx = ediff1d(x._data, to_end=masked, to_begin=masked) - assert_equal(dx._data, np.r_[0,difx_d,0]) - assert_equal(dx._mask, np.r_[1,0,0,0,0,1]) + class TestApplyAlongAxis(TestCase): # "Tests 2D functions" @@ -383,6 +351,7 @@ assert_equal(xa,[[1,4],[7,10]]) + class TestMedian(TestCase): # def test_2d(self): @@ -422,11 +391,12 @@ assert_equal(median(x,0), [[12,10],[8,9],[16,17]]) + class TestCov(TestCase): - # + def setUp(self): self.data = array(np.random.rand(12)) - # + def test_1d_wo_missing(self): "Test cov on 1D variable w/o missing values" x = self.data @@ -434,7 +404,7 @@ assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) assert_almost_equal(np.cov(x, rowvar=False, bias=True), cov(x, rowvar=False, bias=True)) - # + def test_2d_wo_missing(self): "Test cov on 1 2D variable w/o missing values" x = self.data.reshape(3,4) @@ -442,7 +412,7 @@ assert_almost_equal(np.cov(x, rowvar=False), cov(x, rowvar=False)) assert_almost_equal(np.cov(x, rowvar=False, bias=True), cov(x, rowvar=False, bias=True)) - # + def test_1d_w_missing(self): "Test cov 1 1D variable w/missing values" x = self.data @@ -466,7 +436,7 @@ cov(x, x[::-1], rowvar=False)) assert_almost_equal(np.cov(nx, nx[::-1], rowvar=False, bias=True), cov(x, x[::-1], rowvar=False, bias=True)) - # + def test_2d_w_missing(self): "Test cov on 2D variable w/ missing value" x = self.data @@ -486,11 +456,12 @@ np.cov(xf, rowvar=False, bias=True) * x.shape[0]/frac) + class TestCorrcoef(TestCase): - # + def setUp(self): self.data = array(np.random.rand(12)) - # + def test_1d_wo_missing(self): "Test cov on 1D variable w/o missing values" x = self.data @@ -499,7 +470,7 @@ corrcoef(x, rowvar=False)) assert_almost_equal(np.corrcoef(x, rowvar=False, bias=True), corrcoef(x, rowvar=False, bias=True)) - # + def test_2d_wo_missing(self): "Test corrcoef on 1 2D variable w/o missing values" x = self.data.reshape(3,4) @@ -508,7 +479,7 @@ corrcoef(x, rowvar=False)) assert_almost_equal(np.corrcoef(x, rowvar=False, bias=True), corrcoef(x, rowvar=False, bias=True)) - # + def test_1d_w_missing(self): "Test corrcoef 1 1D variable w/missing values" x = self.data @@ -532,7 +503,7 @@ corrcoef(x, x[::-1], rowvar=False)) assert_almost_equal(np.corrcoef(nx, nx[::-1], rowvar=False, bias=True), corrcoef(x, x[::-1], rowvar=False, bias=True)) - # + def test_2d_w_missing(self): "Test corrcoef on 2D variable w/ missing value" x = self.data @@ -575,6 +546,213 @@ assert_almost_equal(a, a_) + +class TestArraySetOps(TestCase): + # + def test_unique1d_onlist(self): + "Test unique1d on list" + data = [1, 1, 1, 2, 2, 3] + test = unique1d(data, return_index=True, return_inverse=True) + self.failUnless(isinstance(test[0], MaskedArray)) + assert_equal(test[0], masked_array([1, 2, 3], mask=[0, 0, 0])) + assert_equal(test[1], [0, 3, 5]) + assert_equal(test[2], [0, 0, 0, 1, 1, 2]) + + def test_unique1d_onmaskedarray(self): + "Test unique1d on masked data w/use_mask=True" + data = masked_array([1, 1, 1, 2, 2, 3], mask=[0, 0, 1, 0, 1, 0]) + test = unique1d(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) + assert_equal(test[1], [0, 3, 5, 2]) + assert_equal(test[2], [0, 0, 3, 1, 3, 2]) + # + data.fill_value = 3 + data = masked_array([1, 1, 1, 2, 2, 3], + mask=[0, 0, 1, 0, 1, 0], fill_value=3) + test = unique1d(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1, 2, 3, -1], mask=[0, 0, 0, 1])) + assert_equal(test[1], [0, 3, 5, 2]) + assert_equal(test[2], [0, 0, 3, 1, 3, 2]) + + def test_unique1d_allmasked(self): + "Test all masked" + data = masked_array([1, 1, 1], mask=True) + test = unique1d(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array([1,], mask=[True])) + assert_equal(test[1], [0]) + assert_equal(test[2], [0, 0, 0]) + # + "Test masked" + data = masked + test = unique1d(data, return_index=True, return_inverse=True) + assert_equal(test[0], masked_array(masked)) + assert_equal(test[1], [0]) + assert_equal(test[2], [0]) + + def test_ediff1d(self): + "Tests mediff1d" + x = masked_array(np.arange(5), mask=[1,0,0,0,1]) + control = array([1, 1, 1, 4], mask=[1, 0, 0, 1]) + test = ediff1d(x) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + def test_ediff1d_tobegin(self): + "Test ediff1d w/ to_begin" + x = masked_array(np.arange(5), mask=[1,0,0,0,1]) + test = ediff1d(x, to_begin=masked) + control = array([0, 1, 1, 1, 4], mask=[1, 1, 0, 0, 1]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_begin=[1,2,3]) + control = array([1, 2, 3, 1, 1, 1, 4], mask=[0, 0, 0, 1, 0, 0, 1]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + def test_ediff1d_toend(self): + "Test ediff1d w/ to_end" + x = masked_array(np.arange(5), mask=[1,0,0,0,1]) + test = ediff1d(x, to_end=masked) + control = array([1, 1, 1, 4, 0], mask=[1, 0, 0, 1, 1]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=[1,2,3]) + control = array([1, 1, 1, 4, 1, 2, 3], mask=[1, 0, 0, 1, 0, 0, 0]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + def test_ediff1d_tobegin_toend(self): + "Test ediff1d w/ to_begin and to_end" + x = masked_array(np.arange(5), mask=[1,0,0,0,1]) + test = ediff1d(x, to_end=masked, to_begin=masked) + control = array([0, 1, 1, 1, 4, 0], mask=[1, 1, 0, 0, 1, 1]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=[1,2,3], to_begin=masked) + control = array([0, 1, 1, 1, 4, 1, 2, 3], mask=[1, 1, 0, 0, 1, 0, 0, 0]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + def test_ediff1d_ndarray(self): + "Test ediff1d w/ a ndarray" + x = np.arange(5) + test = ediff1d(x) + control = array([1, 1, 1, 1], mask=[0, 0, 0, 0]) + assert_equal(test, control) + self.failUnless(isinstance(test, MaskedArray)) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = ediff1d(x, to_end=masked, to_begin=masked) + control = array([0, 1, 1, 1, 1, 0], mask=[1, 0, 0, 0, 0, 1]) + self.failUnless(isinstance(test, MaskedArray)) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + + def test_intersect1d(self): + "Test intersect1d" + x = array([1, 3, 3, 3], mask=[0, 0, 0, 1]) + y = array([3, 1, 1, 1], mask=[0, 0, 0, 1]) + test = intersect1d(x, y) + control = array([1, 1, 3, 3, -1], mask=[0, 0, 0, 0, 1]) + assert_equal(test, control) + + + def test_intersect1d_nu(self): + "Test intersect1d_nu" + x = array([1, 3, 3, 3], mask=[0, 0, 0, 1]) + y = array([3, 1, 1, 1], mask=[0, 0, 0, 1]) + test = intersect1d_nu(x, y) + control = array([1, 3, -1], mask=[0, 0, 1]) + assert_equal(test, control) + + + def test_setxor1d(self): + "Test setxor1d" + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, -1]) + test = setxor1d(a, b) + assert_equal(test, array([3, 4, 7])) + # + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = [1, 2, 3, 4, 5] + test = setxor1d(a, b) + assert_equal(test, array([3, 4, 7, -1], mask=[0, 0, 0, 1])) + # + a = array( [1, 2, 3] ) + b = array( [6, 5, 4] ) + test = setxor1d(a, b) + assert(isinstance(test, MaskedArray)) + assert_equal(test, [1, 2, 3, 4, 5, 6]) + # + a = array([1, 8, 2, 3], mask=[0, 1, 0, 0]) + b = array([6, 5, 4, 8], mask=[0, 0, 0, 1]) + test = setxor1d(a, b) + assert(isinstance(test, MaskedArray)) + assert_equal(test, [1, 2, 3, 4, 5, 6]) + # + assert_array_equal([], setxor1d([],[])) + + + def test_setmember1d( self ): + "Test setmember1d" + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, -1]) + test = setmember1d(a, b) + assert_equal(test, [True, True, True, False, True]) + # + assert_array_equal([], setmember1d([],[])) + + + def test_union1d( self ): + "Test union1d" + a = array([1, 2, 5, 7, -1], mask=[0, 0, 0, 0, 1]) + b = array([1, 2, 3, 4, 5, -1], mask=[0, 0, 0, 0, 0, -1]) + test = union1d(a, b) + control = array([1, 2, 3, 4, 5, 7, -1], mask=[0, 0, 0, 0, 0, 0, 1]) + assert_equal(test, control) + # + assert_array_equal([], setmember1d([],[])) + + + def test_setdiff1d( self ): + "Test setdiff1d" + a = array([6, 5, 4, 7, 1, 2, 1], mask=[0, 0, 0, 0, 0, 0, 1]) + b = array([2, 4, 3, 3, 2, 1, 5]) + test = setdiff1d(a, b) + assert_equal(test, array([6, 7, -1], mask=[0, 0, 1])) + # + a = arange(10) + b = arange(8) + assert_equal(setdiff1d(a, b), array([8, 9])) + + + def test_setdiff1d_char_array(self): + "Test setdiff1d_charray" + a = np.array(['a','b','c']) + b = np.array(['a','b','s']) + assert_array_equal(setdiff1d(a,b), np.array(['c'])) + + + + +class TestShapeBase(TestCase): + # + def test_atleast1d(self): + pass + + ############################################################################### #------------------------------------------------------------------------------ if __name__ == "__main__": From numpy-svn at scipy.org Mon Jan 19 16:22:59 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 19 Jan 2009 15:22:59 -0600 (CST) Subject: [Numpy-svn] r6329 - in trunk/numpy/lib: . tests Message-ID: <20090119212259.8AA39C7C011@scipy.org> Author: pierregm Date: 2009-01-19 15:22:52 -0600 (Mon, 19 Jan 2009) New Revision: 6329 Added: trunk/numpy/lib/_iotools.py trunk/numpy/lib/tests/test__iotools.py Modified: trunk/numpy/lib/io.py trunk/numpy/lib/tests/test_io.py Log: * lib : introduced _iotools * lib.io : introduced genfromtxt, ndfromtxt, mafromtxt, recfromtxt, recfromcsv. Added: trunk/numpy/lib/_iotools.py =================================================================== --- trunk/numpy/lib/_iotools.py 2009-01-19 09:04:20 UTC (rev 6328) +++ trunk/numpy/lib/_iotools.py 2009-01-19 21:22:52 UTC (rev 6329) @@ -0,0 +1,469 @@ +""" +A collection of functions designed to help I/O with ascii file. + +""" +__docformat__ = "restructuredtext en" + +import numpy as np +import numpy.core.numeric as nx +from __builtin__ import bool, int, long, float, complex, object, unicode, str + + +def _is_string_like(obj): + """ + Check whether obj behaves like a string. + """ + try: + obj + '' + except (TypeError, ValueError): + return False + return True + + +def _to_filehandle(fname, flag='r', return_opened=False): + """ + Returns the filehandle corresponding to a string or a file. + If the string ends in '.gz', the file is automatically unzipped. + + Parameters + ---------- + fname : string, filehandle + Name of the file whose filehandle must be returned. + flag : string, optional + Flag indicating the status of the file ('r' for read, 'w' for write). + return_opened : boolean, optional + Whether to return the opening status of the file. + """ + if _is_string_like(fname): + if fname.endswith('.gz'): + import gzip + fhd = gzip.open(fname, flag) + elif fname.endswith('.bz2'): + import bz2 + fhd = bz2.BZ2File(fname) + else: + fhd = file(fname, flag) + opened = True + elif hasattr(fname, 'seek'): + fhd = fname + opened = False + else: + raise ValueError('fname must be a string or file handle') + if return_opened: + return fhd, opened + return fhd + + +def flatten_dtype(ndtype): + """ + Unpack a structured data-type. + + """ + names = ndtype.names + if names is None: + return [ndtype] + else: + types = [] + for field in names: + (typ, _) = ndtype.fields[field] + flat_dt = flatten_dtype(typ) + types.extend(flat_dt) + return types + + + +class LineSplitter: + """ + Defines a function to split a string at a given delimiter or at given places. + + Parameters + ---------- + comment : {'#', string} + Character used to mark the beginning of a comment. + delimiter : var, optional + If a string, character used to delimit consecutive fields. + If an integer or a sequence of integers, width(s) of each field. + autostrip : boolean, optional + Whether to strip each individual fields + """ + + def autostrip(self, method): + "Wrapper to strip each member of the output of `method`." + return lambda input: [_.strip() for _ in method(input)] + # + def __init__(self, delimiter=None, comments='#', autostrip=True): + self.comments = comments + # Delimiter is a character + if (delimiter is None) or _is_string_like(delimiter): + delimiter = delimiter or None + _handyman = self._delimited_splitter + # Delimiter is a list of field widths + elif hasattr(delimiter, '__iter__'): + _handyman = self._variablewidth_splitter + idx = np.cumsum([0]+list(delimiter)) + delimiter = [slice(i,j) for (i,j) in zip(idx[:-1], idx[1:])] + # Delimiter is a single integer + elif int(delimiter): + (_handyman, delimiter) = (self._fixedwidth_splitter, int(delimiter)) + else: + (_handyman, delimiter) = (self._delimited_splitter, None) + self.delimiter = delimiter + if autostrip: + self._handyman = self.autostrip(_handyman) + else: + self._handyman = _handyman + # + def _delimited_splitter(self, line): + line = line.split(self.comments)[0].strip() + if not line: + return [] + return line.split(self.delimiter) + # + def _fixedwidth_splitter(self, line): + line = line.split(self.comments)[0] + if not line: + return [] + fixed = self.delimiter + slices = [slice(i, i+fixed) for i in range(len(line))[::fixed]] + return [line[s] for s in slices] + # + def _variablewidth_splitter(self, line): + line = line.split(self.comments)[0] + if not line: + return [] + slices = self.delimiter + return [line[s] for s in slices] + # + def __call__(self, line): + return self._handyman(line) + + + +class NameValidator: + """ + Validates a list of strings to use as field names. + The strings are stripped of any non alphanumeric character, and spaces + are replaced by `_`. If the optional input parameter `case_sensitive` + is False, the strings are set to upper case. + + During instantiation, the user can define a list of names to exclude, as + well as a list of invalid characters. Names in the exclusion list + are appended a '_' character. + + Once an instance has been created, it can be called with a list of names + and a list of valid names will be created. + The `__call__` method accepts an optional keyword, `default`, that sets + the default name in case of ambiguity. By default, `default = 'f'`, so + that names will default to `f0`, `f1` + + Parameters + ---------- + excludelist : sequence, optional + A list of names to exclude. This list is appended to the default list + ['return','file','print']. Excluded names are appended an underscore: + for example, `file` would become `file_`. + deletechars : string, optional + A string combining invalid characters that must be deleted from the names. + casesensitive : {True, False, 'upper', 'lower'}, optional + If True, field names are case_sensitive. + If False or 'upper', field names are converted to upper case. + If 'lower', field names are converted to lower case. + """ + # + defaultexcludelist = ['return','file','print'] + defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""") + # + def __init__(self, excludelist=None, deletechars=None, case_sensitive=None): + # + if excludelist is None: + excludelist = [] + excludelist.extend(self.defaultexcludelist) + self.excludelist = excludelist + # + if deletechars is None: + delete = self.defaultdeletechars + else: + delete = set(deletechars) + delete.add('"') + self.deletechars = delete + + if (case_sensitive is None) or (case_sensitive is True): + self.case_converter = lambda x: x + elif (case_sensitive is False) or ('u' in case_sensitive): + self.case_converter = lambda x: x.upper() + elif 'l' in case_sensitive: + self.case_converter = lambda x: x.lower() + else: + self.case_converter = lambda x: x + # + def validate(self, names, default='f'): + # + if names is None: + return + # + validatednames = [] + seen = dict() + # + deletechars = self.deletechars + excludelist = self.excludelist + # + case_converter = self.case_converter + # + for i, item in enumerate(names): + item = case_converter(item) + item = item.strip().replace(' ', '_') + item = ''.join([c for c in item if c not in deletechars]) + if not len(item): + item = '%s%d' % (default, i) + elif item in excludelist: + item += '_' + cnt = seen.get(item, 0) + if cnt > 0: + validatednames.append(item + '_%d' % cnt) + else: + validatednames.append(item) + seen[item] = cnt+1 + return validatednames + # + def __call__(self, names, default='f'): + return self.validate(names, default) + + + +def str2bool(value): + """ + Tries to transform a string supposed to represent a boolean to a boolean. + + Raises + ------ + ValueError + If the string is not 'True' or 'False' (case independent) + """ + value = value.upper() + if value == 'TRUE': + return True + elif value == 'FALSE': + return False + else: + raise ValueError("Invalid boolean") + + + +class StringConverter: + """ + Factory class for function transforming a string into another object (int, + float). + + After initialization, an instance can be called to transform a string + into another object. If the string is recognized as representing a missing + value, a default value is returned. + + Parameters + ---------- + dtype_or_func : {None, dtype, function}, optional + Input data type, used to define a basic function and a default value + for missing data. For example, when `dtype` is float, the :attr:`func` + attribute is set to ``float`` and the default value to `np.nan`. + Alternatively, function used to convert a string to another object. + In that later case, it is recommended to give an associated default + value as input. + default : {None, var}, optional + Value to return by default, that is, when the string to be converted + is flagged as missing. + missing_values : {sequence}, optional + Sequence of strings indicating a missing value. + locked : {boolean}, optional + Whether the StringConverter should be locked to prevent automatic + upgrade or not. + + Attributes + ---------- + func : function + Function used for the conversion + default : var + Default value to return when the input corresponds to a missing value. + type : type + Type of the output. + _status : integer + Integer representing the order of the conversion. + _mapper : sequence of tuples + Sequence of tuples (dtype, function, default value) to evaluate in order. + _locked : boolean + Whether the StringConverter is locked, thereby preventing automatic any + upgrade or not. + + """ + # + _mapper = [(nx.bool_, str2bool, None), + (nx.integer, int, -1), + (nx.floating, float, nx.nan), + (complex, complex, nx.nan+0j), + (nx.string_, str, '???')] + (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper) + # + @classmethod + def _getsubdtype(cls, val): + """Returns the type of the dtype of the input variable.""" + return np.array(val).dtype.type + # + @classmethod + def upgrade_mapper(cls, func, default=None): + """ + Upgrade the mapper of a StringConverter by adding a new function and its + corresponding default. + + The input function (or sequence of functions) and its associated default + value (if any) is inserted in penultimate position of the mapper. + The corresponding type is estimated from the dtype of the default value. + + Parameters + ---------- + func : var + Function, or sequence of functions + + Examples + -------- + >>> import dateutil.parser + >>> import datetime + >>> dateparser = datetutil.parser.parse + >>> defaultdate = datetime.date(2000, 1, 1) + >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) + """ + # Func is a single functions + if hasattr(func, '__call__'): + cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) + return + elif hasattr(func, '__iter__'): + if isinstance(func[0], (tuple, list)): + for _ in func: + cls._mapper.insert(-1, _) + return + if default is None: + default = [None] * len(func) + else: + default = list(default) + default.append([None] * (len(func)-len(default))) + for (fct, dft) in zip(func, default): + cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) + # + def __init__(self, dtype_or_func=None, default=None, missing_values=None, + locked=False): + # Defines a lock for upgrade + self._locked = bool(locked) + # No input dtype: minimal initialization + if dtype_or_func is None: + self.func = str2bool + self._status = 0 + self.default = default + ttype = np.bool + else: + # Is the input a np.dtype ? + try: + self.func = None + ttype = np.dtype(dtype_or_func).type + except TypeError: + # dtype_or_func must be a function, then + if not hasattr(dtype_or_func, '__call__'): + errmsg = "The input argument `dtype` is neither a function"\ + " or a dtype (got '%s' instead)" + raise TypeError(errmsg % type(dtype_or_func)) + # Set the function + self.func = dtype_or_func + # If we don't have a default, try to guess it or set it to None + if default is None: + try: + default = self.func('0') + except ValueError: + default = None + ttype = self._getsubdtype(default) + # Set the status according to the dtype + for (i, (deftype, func, default_def)) in enumerate(self._mapper): + if np.issubdtype(ttype, deftype): + self._status = i + self.default = default or default_def + break + # If the input was a dtype, set the function to the last we saw + if self.func is None: + self.func = func + # If the status is 1 (int), change the function to smthg more robust + if self.func == self._mapper[1][1]: + self.func = lambda x : int(float(x)) + # Store the list of strings corresponding to missing values. + if missing_values is None: + self.missing_values = set(['']) + else: + self.missing_values = set(list(missing_values) + ['']) + # + self._callingfunction = self._strict_call + self.type = ttype + # + def _loose_call(self, value): + try: + return self.func(value) + except ValueError: + return self.default + # + def _strict_call(self, value): + try: + return self.func(value) + except ValueError: + if value.strip() in self.missing_values: + return self.default + raise ValueError("Cannot convert string '%s'" % value) + # + def __call__(self, value): + return self._callingfunction(value) + # + def upgrade(self, value): + """ + Tries to find the best converter for `value`, by testing different + converters in order. + The order in which the converters are tested is read from the + :attr:`_status` attribute of the instance. + """ + try: + self._strict_call(value) + except ValueError: + # Raise an exception if we locked the converter... + if self._locked: + raise ValueError("Converter is locked and cannot be upgraded") + _statusmax = len(self._mapper) + # Complains if we try to upgrade by the maximum + if self._status == _statusmax: + raise ValueError("Could not find a valid conversion function") + elif self._status < _statusmax - 1: + self._status += 1 + (self.type, self.func, self.default) = self._mapper[self._status] + self.upgrade(value) + # + def update(self, func, default=None, missing_values='', locked=False): + """ + Sets the :attr:`func` and :attr:`default` attributes directly. + + Parameters + ---------- + func : function + Conversion function. + default : {var}, optional + Default value to return when a missing value is encountered. + missing_values : {var}, optional + Sequence of strings representing missing values. + locked : {False, True}, optional + Whether the status should be locked to prevent automatic upgrade. + """ + self.func = func + self._locked = locked + # Don't reset the default to None if we can avoid it + if default is not None: + self.default = default + # Add the missing values to the existing set + if missing_values is not None: + if _is_string_like(missing_values): + self.missing_values.add(missing_values) + elif hasattr(missing_values, '__iter__'): + for val in missing_values: + self.missing_values.add(val) + else: + self.missing_values = [] # Update the type + self.type = self._getsubdtype(func('0')) + Property changes on: trunk/numpy/lib/_iotools.py ___________________________________________________________________ Name: svn:mime-type + text/plain Modified: trunk/numpy/lib/io.py =================================================================== --- trunk/numpy/lib/io.py 2009-01-19 09:04:20 UTC (rev 6328) +++ trunk/numpy/lib/io.py 2009-01-19 21:22:52 UTC (rev 6329) @@ -1,4 +1,5 @@ __all__ = ['savetxt', 'loadtxt', + 'genfromtxt', 'ndfromtxt', 'mafromtxt', 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', 'packbits', 'unpackbits', @@ -15,7 +16,11 @@ from _datasource import DataSource from _compiled_base import packbits, unpackbits +from _iotools import LineSplitter, NameValidator, StringConverter, \ + _is_string_like, flatten_dtype + _file = file +_string_like = _is_string_like class BagObj(object): """A simple class that converts attribute lookups to @@ -264,10 +269,6 @@ return str -def _string_like(obj): - try: obj + '' - except (TypeError, ValueError): return 0 - return 1 def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False): @@ -342,7 +343,7 @@ if usecols is not None: usecols = list(usecols) - if _string_like(fname): + if _is_string_like(fname): if fname.endswith('.gz'): import gzip fh = gzip.open(fname) @@ -520,7 +521,7 @@ """ - if _string_like(fname): + if _is_string_like(fname): if fname.endswith('.gz'): import gzip fh = gzip.open(fname,'wb') @@ -608,3 +609,466 @@ seq = [(x,) for x in seq] output = np.array(seq, dtype=dtype) return output + + + + +#####-------------------------------------------------------------------------- +#---- --- ASCII functions --- +#####-------------------------------------------------------------------------- + + + +def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0, + converters=None, missing='', missing_values=None, usecols=None, + names=None, excludelist=None, deletechars=None, + case_sensitive=True, unpack=None, usemask=False, loose=True): + """ + Load data from a text file. + + Each line past the first `skiprows` ones is split at the `delimiter` + character, and characters following the `comments` character are discarded. + + + + Parameters + ---------- + fname : file or string + File or filename to read. If the filename extension is `.gz` or `.bz2`, + the file is first decompressed. + dtype : data-type + Data type of the resulting array. If this is a flexible data-type, + the resulting array will be 1-dimensional, and each row will be + interpreted as an element of the array. In this case, the number + of columns used must match the number of fields in the data-type, + and the names of each field will be set by the corresponding name + of the dtype. + If None, the dtypes will be determined by the contents of each + column, individually. + comments : {string}, optional + The character used to indicate the start of a comment. + All the characters occurring on a line after a comment are discarded + delimiter : {string}, optional + The string used to separate values. By default, any consecutive + whitespace act as delimiter. + skiprows : {int}, optional + Numbers of lines to skip at the beginning of the file. + converters : {None, dictionary}, optional + A dictionary mapping column number to a function that will convert + values in the column to a number. Converters can also be used to + provide a default value for missing data: + ``converters = {3: lambda s: float(s or 0)}``. + missing : {string}, optional + A string representing a missing value, irrespective of the column where + it appears (e.g., `'missing'` or `'unused'`). + missing_values : {None, dictionary}, optional + A dictionary mapping a column number to a string indicating whether the + corresponding field should be masked. + usecols : {None, sequence}, optional + Which columns to read, with 0 being the first. For example, + ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns. + names : {None, True, string, sequence}, optional + If `names` is True, the field names are read from the first valid line + after the first `skiprows` lines. + If `names` is a sequence or a single-string of comma-separated names, + the names will be used to define the field names in a flexible dtype. + If `names` is None, the names of the dtype fields will be used, if any. + excludelist : {sequence}, optional + A list of names to exclude. This list is appended to the default list + ['return','file','print']. Excluded names are appended an underscore: + for example, `file` would become `file_`. + deletechars : {string}, optional + A string combining invalid characters that must be deleted from the names. + case_sensitive : {True, False, 'upper', 'lower'}, optional + If True, field names are case_sensitive. + If False or 'upper', field names are converted to upper case. + If 'lower', field names are converted to lower case. + unpack : {bool}, optional + If True, the returned array is transposed, so that arguments may be + unpacked using ``x, y, z = loadtxt(...)`` + usemask : {bool}, optional + If True, returns a masked array. + If False, return a regular standard array. + + Returns + ------- + out : MaskedArray + Data read from the text file. + + Notes + -------- + * When spaces are used as delimiters, or when no delimiter has been given + as input, there should not be any missing data between two fields. + * When the variable are named (either by a flexible dtype or with `names`, + there must not be any header in the file (else a :exc:ValueError exception + is raised). + + See Also + -------- + numpy.loadtxt : equivalent function when no data is missing. + + """ + # + if usemask: + from numpy.ma import MaskedArray, make_mask_descr + # Check the input dictionary of converters + user_converters = converters or {} + if not isinstance(user_converters, dict): + errmsg = "The input argument 'converter' should be a valid dictionary "\ + "(got '%s' instead)" + raise TypeError(errmsg % type(user_converters)) + # Check the input dictionary of missing values + user_missing_values = missing_values or {} + if not isinstance(user_missing_values, dict): + errmsg = "The input argument 'missing_values' should be a valid "\ + "dictionary (got '%s' instead)" + raise TypeError(errmsg % type(missing_values)) + defmissing = [_.strip() for _ in missing.split(',')] + [''] + + # Initialize the filehandle, the LineSplitter and the NameValidator +# fhd = _to_filehandle(fname) + if isinstance(fname, basestring): + fhd = np.lib._datasource.open(fname) + elif not hasattr(fname, 'read'): + raise TypeError("The input should be a string or a filehandle. "\ + "(got %s instead)" % type(fname)) + else: + fhd = fname + split_line = LineSplitter(delimiter=delimiter, comments=comments, + autostrip=False)._handyman + validate_names = NameValidator(excludelist=excludelist, + deletechars=deletechars, + case_sensitive=case_sensitive) + + # Get the first valid lines after the first skiprows ones + for i in xrange(skiprows): + fhd.readline() + first_values = None + while not first_values: + first_line = fhd.readline() + if first_line == '': + raise IOError('End-of-file reached before encountering data.') + first_values = split_line(first_line) + + # Check the columns to use + if usecols is not None: + usecols = list(usecols) + nbcols = len(usecols or first_values) + + # Check the names and overwrite the dtype.names if needed + if dtype is not None: + dtype = np.dtype(dtype) + dtypenames = getattr(dtype, 'names', None) + if names is True: + names = validate_names([_.strip() for _ in first_values]) + first_line ='' + elif _is_string_like(names): + names = validate_names([_.strip() for _ in names.split(',')]) + elif names: + names = validate_names(names) + elif dtypenames: + dtype.names = validate_names(dtypenames) + if names and dtypenames: + dtype.names = names + + # If usecols is a list of names, convert to a list of indices + if usecols: + for (i, current) in enumerate(usecols): + if _is_string_like(current): + usecols[i] = names.index(current) + + # If user_missing_values has names as keys, transform them to indices + missing_values = {} + for (key, val) in user_missing_values.iteritems(): + # If val is a list, flatten it. In any case, add missing &'' to the list + if isinstance(val, (list, tuple)): + val = [str(_) for _ in val] + else: + val = [str(val),] + val.extend(defmissing) + if _is_string_like(key): + try: + missing_values[names.index(key)] = val + except ValueError: + pass + else: + missing_values[key] = val + + + # Initialize the default converters + if dtype is None: + # Note: we can't use a [...]*nbcols, as we would have 3 times the same + # ... converter, instead of 3 different converters. + converters = [StringConverter(None, + missing_values=missing_values.get(_, defmissing)) + for _ in range(nbcols)] + else: + flatdtypes = flatten_dtype(dtype) + # Initialize the converters + if len(flatdtypes) > 1: + # Flexible type : get a converter from each dtype + converters = [StringConverter(dt, + missing_values=missing_values.get(i, defmissing), + locked=True) + for (i, dt) in enumerate(flatdtypes)] + else: + # Set to a default converter (but w/ different missing values) + converters = [StringConverter(dtype, + missing_values=missing_values.get(_, defmissing), + locked=True) + for _ in range(nbcols)] + missing_values = [_.missing_values for _ in converters] + + # Update the converters to use the user-defined ones + for (i, conv) in user_converters.iteritems(): + # If the converter is specified by column names, use the index instead + if _is_string_like(i): + i = names.index(i) + if usecols: + try: + i = usecols.index(i) + except ValueError: + # Unused converter specified + continue + converters[i].update(conv, default=None, + missing_values=missing_values[i], + locked=True) + + # Reset the names to match the usecols + if (not first_line) and usecols: + names = [names[_] for _ in usecols] + + rows = [] + append_to_rows = rows.append + if usemask: + masks = [] + append_to_masks = masks.append + # Parse each line + for line in itertools.chain([first_line,], fhd): + values = split_line(line) + # Skip an empty line + if len(values) == 0: + continue + # Select only the columns we need + if usecols: + values = [values[_] for _ in usecols] + # Check whether we need to update the converter + if dtype is None: + for (converter, item) in zip(converters, values): + converter.upgrade(item) + # Store the values + append_to_rows(tuple(values)) + if usemask: + append_to_masks(tuple([val.strip() in mss + for (val, mss) in zip(values, + missing_values)])) + + # Convert each value according to the converter: + # We want to modify the list in place to avoid creating a new one... + if loose: + conversionfuncs = [conv._loose_call for conv in converters] + else: + conversionfuncs = [conv._strict_call for conv in converters] + for (i, vals) in enumerate(rows): + rows[i] = tuple([convert(val) + for (convert, val) in zip(conversionfuncs, vals)]) + + # Reset the dtype + data = rows + if dtype is None: + # Get the dtypes from the first row + coldtypes = [np.array(val).dtype for val in data[0]] + # Find the columns with strings, and take the largest number of chars. + strcolidx = [i for (i, v) in enumerate(coldtypes) if v.char == 'S'] + for i in strcolidx: + coldtypes[i] = "|S%i" % max(len(row[i]) for row in data) + # + if names is None: + # If the dtype is uniform, don't define names, else use '' + base = coldtypes[0] + if np.all([(dt == base) for dt in coldtypes]): + (ddtype, mdtype) = (base, np.bool) + else: + ddtype = [('', dt) for dt in coldtypes] + mdtype = [('', np.bool) for dt in coldtypes] + else: + ddtype = zip(names, coldtypes) + mdtype = zip(names, [np.bool] * len(coldtypes)) + output = np.array(data, dtype=ddtype) + if usemask: + outputmask = np.array(masks, dtype=mdtype) + else: + # Overwrite the initial dtype names if needed + if names and dtype.names: + dtype.names = names + flatdtypes = flatten_dtype(dtype) + # Case 1. We have a structured type + if len(flatdtypes) > 1: + # Nested dtype, eg [('a', int), ('b', [('b0', int), ('b1', 'f4')])] + # First, create the array using a flattened dtype: + # [('a', int), ('b1', int), ('b2', float)] + # Then, view the array using the specified dtype. + rows = np.array(data, dtype=[('', t) for t in flatdtypes]) + output = rows.view(dtype) + # Now, process the rowmasks the same way + if usemask: + rowmasks = np.array(masks, + dtype=np.dtype([('', np.bool) + for t in flatdtypes])) + # Construct the new dtype + mdtype = make_mask_descr(dtype) + outputmask = rowmasks.view(mdtype) + # Case #2. We have a basic dtype + else: + # We used some user-defined converters + if user_converters: + ishomogeneous = True + descr = [] + for (i, ttype) in enumerate([conv.type for conv in converters]): + # Keep the dtype of the current converter + if i in user_converters: + ishomogeneous &= (ttype == dtype.type) + if ttype == np.string_: + ttype = "|S%i" % max(len(row[i]) for row in data) + descr.append(('', ttype)) + else: + descr.append(('', dtype)) + if not ishomogeneous: + dtype = np.dtype(descr) + # + output = np.array(data, dtype) + if usemask: + if dtype.names: + mdtype = [(_, np.bool) for _ in dtype.names] + else: + mdtype = np.bool + outputmask = np.array(masks, dtype=mdtype) + # Try to take care of the missing data we missed + if usemask and output.dtype.names: + for (name, conv) in zip(names or (), converters): + missing_values = [conv(_) for _ in conv.missing_values if _ != ''] + for mval in missing_values: + outputmask[name] |= (output[name] == mval) + # Construct the final array + if usemask: + output = output.view(MaskedArray) + output._mask = outputmask + if unpack: + return output.squeeze().T + return output.squeeze() + + + +def ndfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0, + converters=None, missing='', missing_values=None, + usecols=None, unpack=None, names=None, + excludelist=None, deletechars=None, case_sensitive=True,): + """ + Load ASCII data stored in fname and returns a ndarray. + + Complete description of all the optional input parameters is available in + the docstring of the `genfromtxt` function. + + See Also + -------- + numpy.genfromtxt : generic function. + + """ + kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, + skiprows=skiprows, converters=converters, + missing=missing, missing_values=missing_values, + usecols=usecols, unpack=unpack, names=names, + excludelist=excludelist, deletechars=deletechars, + case_sensitive=case_sensitive, usemask=False) + return genfromtxt(fname, **kwargs) + +def mafromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0, + converters=None, missing='', missing_values=None, + usecols=None, unpack=None, names=None, + excludelist=None, deletechars=None, case_sensitive=True,): + """ + Load ASCII data stored in fname and returns a MaskedArray. + + Complete description of all the optional input parameters is available in + the docstring of the `genfromtxt` function. + + See Also + -------- + numpy.genfromtxt : generic function. + """ + kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, + skiprows=skiprows, converters=converters, + missing=missing, missing_values=missing_values, + usecols=usecols, unpack=unpack, names=names, + excludelist=excludelist, deletechars=deletechars, + case_sensitive=case_sensitive, + usemask=True) + return genfromtxt(fname, **kwargs) + + +def recfromtxt(fname, dtype=None, comments='#', delimiter=None, skiprows=0, + converters=None, missing='', missing_values=None, + usecols=None, unpack=None, names=None, + excludelist=None, deletechars=None, case_sensitive=True, + usemask=False): + """ + Load ASCII data stored in fname and returns a standard recarray (if + `usemask=False`) or a MaskedRecords (if `usemask=True`). + + Complete description of all the optional input parameters is available in + the docstring of the `genfromtxt` function. + + See Also + -------- + numpy.genfromtxt : generic function + + Warnings + -------- + * by default, `dtype=None`, which means that the dtype of the output array + will be determined from the data. + """ + kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, + skiprows=skiprows, converters=converters, + missing=missing, missing_values=missing_values, + usecols=usecols, unpack=unpack, names=names, + excludelist=excludelist, deletechars=deletechars, + case_sensitive=case_sensitive, usemask=usemask) + output = genfromtxt(fname, **kwargs) + if usemask: + from numpy.ma.mrecords import MaskedRecords + output = output.view(MaskedRecords) + else: + output = output.view(np.recarray) + return output + + +def recfromcsv(fname, dtype=None, comments='#', skiprows=0, + converters=None, missing='', missing_values=None, + usecols=None, unpack=None, names=True, + excludelist=None, deletechars=None, case_sensitive='lower', + usemask=False): + """ + Load ASCII data stored in comma-separated file and returns a recarray (if + `usemask=False`) or a MaskedRecords (if `usemask=True`). + + Complete description of all the optional input parameters is available in + the docstring of the `genfromtxt` function. + + See Also + -------- + numpy.genfromtxt : generic function + """ + kwargs = dict(dtype=dtype, comments=comments, delimiter=",", + skiprows=skiprows, converters=converters, + missing=missing, missing_values=missing_values, + usecols=usecols, unpack=unpack, names=names, + excludelist=excludelist, deletechars=deletechars, + case_sensitive=case_sensitive, usemask=usemask) + output = genfromtxt(fname, **kwargs) + if usemask: + from numpy.ma.mrecords import MaskedRecords + output = output.view(MaskedRecords) + else: + output = output.view(np.recarray) + return output + Added: trunk/numpy/lib/tests/test__iotools.py =================================================================== --- trunk/numpy/lib/tests/test__iotools.py 2009-01-19 09:04:20 UTC (rev 6328) +++ trunk/numpy/lib/tests/test__iotools.py 2009-01-19 21:22:52 UTC (rev 6329) @@ -0,0 +1,140 @@ + +import StringIO + +import numpy as np +from numpy.lib._iotools import LineSplitter, NameValidator, StringConverter +from numpy.testing import * + +class TestLineSplitter(TestCase): + "Tests the LineSplitter class." + # + def test_no_delimiter(self): + "Test LineSplitter w/o delimiter" + strg = " 1 2 3 4 5 # test" + test = LineSplitter()(strg) + assert_equal(test, ['1', '2', '3', '4', '5']) + test = LineSplitter('')(strg) + assert_equal(test, ['1', '2', '3', '4', '5']) + + def test_space_delimiter(self): + "Test space delimiter" + strg = " 1 2 3 4 5 # test" + test = LineSplitter(' ')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + test = LineSplitter(' ')(strg) + assert_equal(test, ['1 2 3 4', '5']) + + def test_tab_delimiter(self): + "Test tab delimiter" + strg= " 1\t 2\t 3\t 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1', '2', '3', '4', '5 6']) + strg= " 1 2\t 3 4\t 5 6" + test = LineSplitter('\t')(strg) + assert_equal(test, ['1 2', '3 4', '5 6']) + + def test_other_delimiter(self): + "Test LineSplitter on delimiter" + strg = "1,2,3,4,,5" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + # + strg = " 1,2,3,4,,5 # test" + test = LineSplitter(',')(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5']) + + def test_constant_fixed_width(self): + "Test LineSplitter w/ fixed-width fields" + strg = " 1 2 3 4 5 # test" + test = LineSplitter(3)(strg) + assert_equal(test, ['1', '2', '3', '4', '', '5', '']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter(20)(strg) + assert_equal(test, ['1 3 4 5 6']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter(30)(strg) + assert_equal(test, ['1 3 4 5 6']) + + def test_variable_fixed_width(self): + strg = " 1 3 4 5 6# test" + test = LineSplitter((3,6,6,3))(strg) + assert_equal(test, ['1', '3', '4 5', '6']) + # + strg = " 1 3 4 5 6# test" + test = LineSplitter((6,6,9))(strg) + assert_equal(test, ['1', '3 4', '5 6']) + + +#------------------------------------------------------------------------------- + +class TestNameValidator(TestCase): + # + def test_case_sensitivity(self): + "Test case sensitivity" + names = ['A', 'a', 'b', 'c'] + test = NameValidator().validate(names) + assert_equal(test, ['A', 'a', 'b', 'c']) + test = NameValidator(case_sensitive=False).validate(names) + assert_equal(test, ['A', 'A_1', 'B', 'C']) + test = NameValidator(case_sensitive='upper').validate(names) + assert_equal(test, ['A', 'A_1', 'B', 'C']) + test = NameValidator(case_sensitive='lower').validate(names) + assert_equal(test, ['a', 'a_1', 'b', 'c']) + # + def test_excludelist(self): + "Test excludelist" + names = ['dates', 'data', 'Other Data', 'mask'] + validator = NameValidator(excludelist = ['dates', 'data', 'mask']) + test = validator.validate(names) + assert_equal(test, ['dates_', 'data_', 'Other_Data', 'mask_']) + + +#------------------------------------------------------------------------------- + +class TestStringConverter(TestCase): + "Test StringConverter" + # + def test_creation(self): + "Test creation of a StringConverter" + converter = StringConverter(int, -99999) + assert_equal(converter._status, 1) + assert_equal(converter.default, -99999) + # + def test_upgrade(self): + "Tests the upgrade method." + converter = StringConverter() + assert_equal(converter._status, 0) + converter.upgrade('0') + assert_equal(converter._status, 1) + converter.upgrade('0.') + assert_equal(converter._status, 2) + converter.upgrade('0j') + assert_equal(converter._status, 3) + converter.upgrade('a') + assert_equal(converter._status, len(converter._mapper)-1) + # + def test_missing(self): + "Tests the use of missing values." + converter = StringConverter(missing_values=('missing','missed')) + converter.upgrade('0') + assert_equal(converter('0'), 0) + assert_equal(converter(''), converter.default) + assert_equal(converter('missing'), converter.default) + assert_equal(converter('missed'), converter.default) + try: + converter('miss') + except ValueError: + pass + # + def test_upgrademapper(self): + "Tests updatemapper" + import dateutil.parser + import datetime + dateparser = dateutil.parser.parse + StringConverter.upgrade_mapper(dateparser, datetime.date(2000,1,1)) + convert = StringConverter(dateparser, datetime.date(2000, 1, 1)) + test = convert('2001-01-01') + assert_equal(test, datetime.datetime(2001, 01, 01, 00, 00, 00)) + Property changes on: trunk/numpy/lib/tests/test__iotools.py ___________________________________________________________________ Name: svn:mime-type + text/plain Modified: trunk/numpy/lib/tests/test_io.py =================================================================== --- trunk/numpy/lib/tests/test_io.py 2009-01-19 09:04:20 UTC (rev 6328) +++ trunk/numpy/lib/tests/test_io.py 2009-01-19 21:22:52 UTC (rev 6329) @@ -1,5 +1,8 @@ -from numpy.testing import * + import numpy as np +import numpy.ma as ma +from numpy.ma.testutils import * + import StringIO from tempfile import NamedTemporaryFile @@ -355,5 +358,358 @@ assert_array_equal(x, a) +#####-------------------------------------------------------------------------- + + +class TestFromTxt(TestCase): + # + def test_record(self): + "Test w/ explicit dtype" + data = StringIO.StringIO('1 2\n3 4') +# data.seek(0) + test = np.ndfromtxt(data, dtype=[('x', np.int32), ('y', np.int32)]) + control = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')]) + assert_equal(test, control) + # + data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0') +# data.seek(0) + descriptor = {'names': ('gender','age','weight'), + 'formats': ('S1', 'i4', 'f4')} + control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)], + dtype=descriptor) + test = np.ndfromtxt(data, dtype=descriptor) + assert_equal(test, control) + + def test_array(self): + "Test outputing a standard ndarray" + data = StringIO.StringIO('1 2\n3 4') + control = np.array([[1,2],[3,4]], dtype=int) + test = np.ndfromtxt(data, dtype=int) + assert_array_equal(test, control) + # + data.seek(0) + control = np.array([[1,2],[3,4]], dtype=float) + test = np.loadtxt(data, dtype=float) + assert_array_equal(test, control) + + def test_1D(self): + "Test squeezing to 1D" + control = np.array([1, 2, 3, 4], int) + # + data = StringIO.StringIO('1\n2\n3\n4\n') + test = np.ndfromtxt(data, dtype=int) + assert_array_equal(test, control) + # + data = StringIO.StringIO('1,2,3,4\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',') + assert_array_equal(test, control) + + def test_comments(self): + "Test the stripping of comments" + control = np.array([1, 2, 3, 5], int) + # Comment on its own line + data = StringIO.StringIO('# comment\n1,2,3,5\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#') + assert_equal(test, control) + # Comment at the end of a line + data = StringIO.StringIO('1,2,3,5# comment\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', comments='#') + assert_equal(test, control) + + def test_skiprows(self): + "Test row skipping" + control = np.array([1, 2, 3, 5], int) + # + data = StringIO.StringIO('comment\n1,2,3,5\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', skiprows=1) + assert_equal(test, control) + # + data = StringIO.StringIO('# comment\n1,2,3,5\n') + test = np.loadtxt(data, dtype=int, delimiter=',', skiprows=1) + assert_equal(test, control) + + def test_header(self): + "Test retrieving a header" + data = StringIO.StringIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0') + test = np.ndfromtxt(data, dtype=None, names=True) + control = {'gender': np.array(['M', 'F']), + 'age': np.array([64.0, 25.0]), + 'weight': np.array([75.0, 60.0])} + assert_equal(test['gender'], control['gender']) + assert_equal(test['age'], control['age']) + assert_equal(test['weight'], control['weight']) + + def test_auto_dtype(self): + "Test the automatic definition of the output dtype" + data = StringIO.StringIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False') + test = np.ndfromtxt(data, dtype=None) + control = [np.array(['A', 'BCD']), + np.array([64, 25]), + np.array([75.0, 60.0]), + np.array([3+4j, 5+6j]), + np.array([True, False]),] + assert_equal(test.dtype.names, ['f0','f1','f2','f3','f4']) + for (i, ctrl) in enumerate(control): + assert_equal(test['f%i' % i], ctrl) + + + def test_auto_dtype_uniform(self): + "Tests whether the output dtype can be uniformized" + data = StringIO.StringIO('1 2 3 4\n5 6 7 8\n') + test = np.ndfromtxt(data, dtype=None) + control = np.array([[1,2,3,4],[5,6,7,8]]) + assert_equal(test, control) + + + def test_fancy_dtype(self): + "Check that a nested dtype isn't MIA" + data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n') + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',') + control = np.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype) + assert_equal(test, control) + + + def test_names_overwrite(self): + "Test overwriting the names of the dtype" + descriptor = {'names': ('g','a','w'), + 'formats': ('S1', 'i4', 'f4')} + data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0') + names = ('gender','age','weight') + test = np.ndfromtxt(data, dtype=descriptor, names=names) + descriptor['names'] = names + control = np.array([('M', 64.0, 75.0), + ('F', 25.0, 60.0)], dtype=descriptor) + assert_equal(test, control) + + + def test_autonames_and_usecols(self): + "Tests names and usecols" + data = StringIO.StringIO('A B C D\n aaaa 121 45 9.1') + test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), + names=True, dtype=None) + control = np.array(('aaaa', 45, 9.1), + dtype=[('A', '|S4'), ('C', int), ('D', float)]) + assert_equal(test, control) + + + def test_converters_with_usecols(self): + "Test the combination user-defined converters and usecol" + data = StringIO.StringIO('1,2,3,,5\n6,7,8,9,10\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', + converters={3:lambda s: int(s or -999)}, + usecols=(1, 3, )) + control = np.array([[2, -999], [7, 9]], int) + assert_equal(test, control) + + def test_converters_with_usecols_and_names(self): + "Tests names and usecols" + data = StringIO.StringIO('A B C D\n aaaa 121 45 9.1') + test = np.ndfromtxt(data, usecols=('A', 'C', 'D'), names=True, + dtype=None, converters={'C':lambda s: 2 * int(s)}) + control = np.array(('aaaa', 90, 9.1), + dtype=[('A', '|S4'), ('C', int), ('D', float)]) + assert_equal(test, control) + + + def test_unused_converter(self): + "Test whether unused converters are forgotten" + data = StringIO.StringIO("1 21\n 3 42\n") + test = np.ndfromtxt(data, usecols=(1,), + converters={0: lambda s: int(s, 16)}) + assert_equal(test, [21, 42]) + # + data.seek(0) + test = np.ndfromtxt(data, usecols=(1,), + converters={1: lambda s: int(s, 16)}) + assert_equal(test, [33, 66]) + + + def test_dtype_with_converters(self): + dstr = "2009; 23; 46" + test = np.ndfromtxt(StringIO.StringIO(dstr,), + delimiter=";", dtype=float, converters={0:str}) + control = np.array([('2009', 23., 46)], + dtype=[('f0','|S4'), ('f1', float), ('f2', float)]) + assert_equal(test, control) + test = np.ndfromtxt(StringIO.StringIO(dstr,), + delimiter=";", dtype=float, converters={0:float}) + control = np.array([2009., 23., 46],) + assert_equal(test, control) + + + def test_spacedelimiter(self): + "Test space delimiter" + data = StringIO.StringIO("1 2 3 4 5\n6 7 8 9 10") + test = np.ndfromtxt(data) + control = np.array([[ 1., 2., 3., 4., 5.], + [ 6., 7., 8., 9.,10.]]) + assert_equal(test, control) + + + def test_missing(self): + data = StringIO.StringIO('1,2,3,,5\n') + test = np.ndfromtxt(data, dtype=int, delimiter=',', \ + converters={3:lambda s: int(s or -999)}) + control = np.array([1, 2, 3, -999, 5], int) + assert_equal(test, control) + + + def test_usecols(self): + "Test the selection of columns" + # Select 1 column + control = np.array( [[1, 2], [3, 4]], float) + data = StringIO.StringIO() + np.savetxt(data, control) + data.seek(0) + test = np.ndfromtxt(data, dtype=float, usecols=(1,)) + assert_equal(test, control[:, 1]) + # + control = np.array( [[1, 2, 3], [3, 4, 5]], float) + data = StringIO.StringIO() + np.savetxt(data, control) + data.seek(0) + test = np.ndfromtxt(data, dtype=float, usecols=(1, 2)) + assert_equal(test, control[:, 1:]) + # Testing with arrays instead of tuples. + data.seek(0) + test = np.ndfromtxt(data, dtype=float, usecols=np.array([1, 2])) + assert_equal(test, control[:, 1:]) + # Checking with dtypes defined converters. + data = StringIO.StringIO("""JOE 70.1 25.3\nBOB 60.5 27.9""") + names = ['stid', 'temp'] + dtypes = ['S4', 'f8'] + test = np.ndfromtxt(data, usecols=(0, 2), dtype=zip(names, dtypes)) + assert_equal(test['stid'], ["JOE", "BOB"]) + assert_equal(test['temp'], [25.3, 27.9]) + + + def test_empty_file(self): + "Test that an empty file raises the proper exception" + data = StringIO.StringIO() + assert_raises(IOError, np.ndfromtxt, data) + + + def test_fancy_dtype_alt(self): + "Check that a nested dtype isn't MIA" + data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n') + fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])]) + test = np.mafromtxt(data, dtype=fancydtype, delimiter=',') + control = ma.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype) + assert_equal(test, control) + + + def test_withmissing(self): + data = StringIO.StringIO('A,B\n0,1\n2,N/A') + test = np.mafromtxt(data, dtype=None, delimiter=',', missing='N/A', + names=True) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', np.int), ('B', np.int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + # + data.seek(0) + test = np.mafromtxt(data, delimiter=',', missing='N/A', names=True) + control = ma.array([(0, 1), (2, -1)], + mask=[[False, False], [False, True]],) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + + def test_user_missing_values(self): + datastr ="A, B, C\n0, 0., 0j\n1, N/A, 1j\n-9, 2.2, N/A\n3, -99, 3j" + data = StringIO.StringIO(datastr) + basekwargs = dict(dtype=None, delimiter=',', names=True, missing='N/A') + mdtype = [('A', int), ('B', float), ('C', complex)] + # + test = np.mafromtxt(data, **basekwargs) + control = ma.array([( 0, 0.0, 0j), (1, -999, 1j), + ( -9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)], + dtype=mdtype) + assert_equal(test, control) + # + data.seek(0) + test = np.mafromtxt(data, + missing_values={0:-9, 1:-99, 2:-999j}, **basekwargs) + control = ma.array([( 0, 0.0, 0j), (1, -999, 1j), + ( -9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], + dtype=mdtype) + assert_equal(test, control) + # + data.seek(0) + test = np.mafromtxt(data, + missing_values={0:-9, 'B':-99, 'C':-999j}, + **basekwargs) + control = ma.array([( 0, 0.0, 0j), (1, -999, 1j), + ( -9, 2.2, -999j), (3, -99, 3j)], + mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)], + dtype=mdtype) + assert_equal(test, control) + + + def test_withmissing_float(self): + data = StringIO.StringIO('A,B\n0,1.5\n2,-999.00') + test = np.mafromtxt(data, dtype=None, delimiter=',', missing='-999.0', + names=True,) + control = ma.array([(0, 1.5), (2, -1.)], + mask=[(False, False), (False, True)], + dtype=[('A', np.int), ('B', np.float)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + + + def test_recfromtxt(self): + # + data = StringIO.StringIO('A,B\n0,1\n2,3') + test = np.recfromtxt(data, delimiter=',', missing='N/A', names=True) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', np.int), ('B', np.int)]) + self.failUnless(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = StringIO.StringIO('A,B\n0,1\n2,N/A') + test = np.recfromtxt(data, dtype=None, delimiter=',', missing='N/A', + names=True, usemask=True) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', np.int), ('B', np.int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(test.A, [0, 2]) + + + def test_recfromcsv(self): + # + data = StringIO.StringIO('A,B\n0,1\n2,3') + test = np.recfromcsv(data, missing='N/A', + names=True, case_sensitive=True) + control = np.array([(0, 1), (2, 3)], + dtype=[('A', np.int), ('B', np.int)]) + self.failUnless(isinstance(test, np.recarray)) + assert_equal(test, control) + # + data = StringIO.StringIO('A,B\n0,1\n2,N/A') + test = np.recfromcsv(data, dtype=None, missing='N/A', + names=True, case_sensitive=True, usemask=True) + control = ma.array([(0, 1), (2, -1)], + mask=[(False, False), (False, True)], + dtype=[('A', np.int), ('B', np.int)]) + assert_equal(test, control) + assert_equal(test.mask, control.mask) + assert_equal(test.A, [0, 2]) + # + data = StringIO.StringIO('A,B\n0,1\n2,3') + test = np.recfromcsv(data, missing='N/A',) + control = np.array([(0, 1), (2, 3)], + dtype=[('a', np.int), ('b', np.int)]) + self.failUnless(isinstance(test, np.recarray)) + assert_equal(test, control) + + + + if __name__ == "__main__": run_module_suite() From numpy-svn at scipy.org Thu Jan 22 00:37:38 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Wed, 21 Jan 2009 23:37:38 -0600 (CST) Subject: [Numpy-svn] r6330 - in trunk/numpy/lib: . tests Message-ID: <20090122053738.11038C7C02D@scipy.org> Author: pierregm Date: 2009-01-21 23:37:36 -0600 (Wed, 21 Jan 2009) New Revision: 6330 Modified: trunk/numpy/lib/io.py trunk/numpy/lib/tests/test_io.py Log: * genfromtxt : if names is True, accept a line starting with a comment character as header. Modified: trunk/numpy/lib/io.py =================================================================== --- trunk/numpy/lib/io.py 2009-01-19 21:22:52 UTC (rev 6329) +++ trunk/numpy/lib/io.py 2009-01-22 05:37:36 UTC (rev 6330) @@ -748,7 +748,14 @@ first_line = fhd.readline() if first_line == '': raise IOError('End-of-file reached before encountering data.') - first_values = split_line(first_line) + if names is True: + first_values = first_line.strip().split(delimiter) + else: + first_values = split_line(first_line) + if names is True: + fval = first_values[0].strip() + if fval in comments: + del first_values[0] # Check the columns to use if usecols is not None: Modified: trunk/numpy/lib/tests/test_io.py =================================================================== --- trunk/numpy/lib/tests/test_io.py 2009-01-19 21:22:52 UTC (rev 6329) +++ trunk/numpy/lib/tests/test_io.py 2009-01-22 05:37:36 UTC (rev 6330) @@ -483,6 +483,30 @@ assert_equal(test, control) + def test_commented_header(self): + "Check that names can be retrieved even if the line is commented out." + data = StringIO.StringIO(""" +#gender age weight +M 21 72.100000 +F 35 58.330000 +M 33 21.99 + """) + # The # is part of the first name and should be deleted automatically. + test = np.genfromtxt(data, names=True, dtype=None) + ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)], + dtype=[('gender','|S1'), ('age', int), ('weight', float)]) + assert_equal(test, ctrl) + # Ditto, but we should get rid of the first element + data = StringIO.StringIO(""" +# gender age weight +M 21 72.100000 +F 35 58.330000 +M 33 21.99 + """) + test = np.genfromtxt(data, names=True, dtype=None) + assert_equal(test, ctrl) + + def test_autonames_and_usecols(self): "Tests names and usecols" data = StringIO.StringIO('A B C D\n aaaa 121 45 9.1') @@ -707,9 +731,9 @@ dtype=[('a', np.int), ('b', np.int)]) self.failUnless(isinstance(test, np.recarray)) assert_equal(test, control) - + if __name__ == "__main__": run_module_suite() From numpy-svn at scipy.org Thu Jan 22 00:40:28 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Wed, 21 Jan 2009 23:40:28 -0600 (CST) Subject: [Numpy-svn] r6331 - in trunk/numpy/lib: . tests Message-ID: <20090122054028.0F0C0C7C035@scipy.org> Author: pierregm Date: 2009-01-21 23:40:25 -0600 (Wed, 21 Jan 2009) New Revision: 6331 Added: trunk/numpy/lib/recfunctions.py trunk/numpy/lib/tests/test_recfunctions.py Log: * added recfunctions, a collection of utilities to manipulate structured arrays. Added: trunk/numpy/lib/recfunctions.py =================================================================== --- trunk/numpy/lib/recfunctions.py 2009-01-22 05:37:36 UTC (rev 6330) +++ trunk/numpy/lib/recfunctions.py 2009-01-22 05:40:25 UTC (rev 6331) @@ -0,0 +1,931 @@ +""" +Collection of utilities to manipulate structured arrays. + +Most of these functions were initially implemented by John Hunter for matplotlib. +They have been rewritten and extended for convenience. + + +""" + + +import itertools +from itertools import chain as iterchain, repeat as iterrepeat, izip as iterizip +import numpy as np +from numpy import ndarray, recarray +import numpy.ma as ma +from numpy.ma import MaskedArray +from numpy.ma.mrecords import MaskedRecords + +from numpy.lib._iotools import _is_string_like + +_check_fill_value = np.ma.core._check_fill_value + +__all__ = ['append_fields', + 'drop_fields', + 'find_duplicates', + 'get_fieldstructure', + 'join_by', + 'merge_arrays', + 'rec_append_fields', 'rec_drop_fields', 'rec_join', + 'recursive_fill_fields', 'rename_fields', + 'stack_arrays', + ] + + +def recursive_fill_fields(input, output): + """ + Fills fields from output with fields from input, + with support for nested structures. + + Parameters + ---------- + input : ndarray + Input array. + output : ndarray + Output array. + + Notes + ----- + * `output` should be at least the same size as `input` + + Examples + -------- + >>> a = np.array([(1, 10.), (2, 20.)], dtype=[('A', int), ('B', float)]) + >>> b = np.zeros((3,), dtype=a.dtype) + >>> recursive_fill_fields(a, b) + np.array([(1, 10.), (2, 20.), (0, 0.)], dtype=[('A', int), ('B', float)]) + + """ + newdtype = output.dtype + for field in newdtype.names: + try: + current = input[field] + except ValueError: + continue + if current.dtype.names: + recursive_fill_fields(current, output[field]) + else: + output[field][:len(current)] = current + return output + + + +def get_names(adtype): + """ + Returns the field names of the input datatype as a tuple. + + Parameters + ---------- + adtype : dtype + Input datatype + + Examples + -------- + >>> get_names(np.empty((1,), dtype=int)) is None + True + >>> get_names(np.empty((1,), dtype=[('A',int), ('B', float)])) + ('A', 'B') + >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) + >>> get_names(adtype) + ('a', ('b', ('ba', 'bb'))) + """ + listnames = [] + names = adtype.names + for name in names: + current = adtype[name] + if current.names: + listnames.append((name, tuple(get_names(current)))) + else: + listnames.append(name) + return tuple(listnames) or None + + +def get_names_flat(adtype): + """ + Returns the field names of the input datatype as a tuple. Nested structure + are flattend beforehand. + + Parameters + ---------- + adtype : dtype + Input datatype + + Examples + -------- + >>> get_names_flat(np.empty((1,), dtype=int)) is None + True + >>> get_names_flat(np.empty((1,), dtype=[('A',int), ('B', float)])) + ('A', 'B') + >>> adtype = np.dtype([('a', int), ('b', [('ba', int), ('bb', int)])]) + >>> get_names_flat(adtype) + ('a', 'b', 'ba', 'bb') + """ + listnames = [] + names = adtype.names + for name in names: + listnames.append(name) + current = adtype[name] + if current.names: + listnames.extend(get_names_flat(current)) + return tuple(listnames) or None + + +def flatten_descr(ndtype): + """ + Flatten a structured data-type description. + + Examples + -------- + >>> ndtype = np.dtype([('a', '>> flatten_descr(ndtype) + (('a', dtype('int32')), ('ba', dtype('float64')), ('bb', dtype('int32'))) + + """ + names = ndtype.names + if names is None: + return ndtype.descr + else: + descr = [] + for field in names: + (typ, _) = ndtype.fields[field] + if typ.names: + descr.extend(flatten_descr(typ)) + else: + descr.append((field, typ)) + return tuple(descr) + + +def zip_descr(seqarrays, flatten=False): + """ + Combine the dtype description of a series of arrays. + + Parameters + ---------- + seqarrays : sequence of arrays + Sequence of arrays + flatten : {boolean}, optional + Whether to collapse nested descriptions. + """ + newdtype = [] + if flatten: + for a in seqarrays: + newdtype.extend(flatten_descr(a.dtype)) + else: + for a in seqarrays: + current = a.dtype + names = current.names or () + if len(names) > 1: + newdtype.append(('', current.descr)) + else: + newdtype.extend(current.descr) + return np.dtype(newdtype).descr + + +def get_fieldstructure(adtype, lastname=None, parents=None,): + """ + Returns a dictionary with fields as keys and a list of parent fields as values. + + This function is used to simplify access to fields nested in other fields. + + Parameters + ---------- + adtype : np.dtype + Input datatype + lastname : optional + Last processed field name (used internally during recursion). + parents : dictionary + Dictionary of parent fields (used interbally during recursion). + + Examples + -------- + >>> ndtype = np.dtype([('A', int), + ... ('B', [('BA', int), + ... ('BB', [('BBA', int), ('BBB', int)])])]) + >>> get_fieldstructure(ndtype) + {'A': [], 'B': [], 'BA': ['B'], 'BB': ['B'], + 'BBA': ['B', 'BB'], 'BBB': ['B', 'BB']} + + """ + if parents is None: + parents = {} + names = adtype.names + for name in names: + current = adtype[name] + if current.names: + if lastname: + parents[name] = [lastname,] + else: + parents[name] = [] + parents.update(get_fieldstructure(current, name, parents)) + else: + lastparent = [_ for _ in (parents.get(lastname, []) or [])] + if lastparent: +# if (lastparent[-1] != lastname): + lastparent.append(lastname) + elif lastname: + lastparent = [lastname,] + parents[name] = lastparent or [] + return parents or None + + +def _izip_fields_flat(iterable): + """ + Returns an iterator of concatenated fields from a sequence of arrays, + collapsing any nested structure. + """ + for element in iterable: + if isinstance(element, np.void): + for f in _izip_fields_flat(tuple(element)): + yield f + else: + yield element + + +def _izip_fields(iterable): + """ + Returns an iterator of concatenated fields from a sequence of arrays. + """ + for element in iterable: + if hasattr(element, '__iter__') and not isinstance(element, basestring): + for f in _izip_fields(element): + yield f + elif isinstance(element, np.void) and len(tuple(element)) == 1: + for f in _izip_fields(element): + yield f + else: + yield element + + +def izip_records(seqarrays, fill_value=None, flatten=True): + """ + Returns an iterator of concatenated items from a sequence of arrays. + + Parameters + ---------- + seqarray : sequence of arrays + Sequence of arrays. + fill_value : {None, integer} + Value used to pad shorter iterables. + flatten : {True, False}, + Whether to + """ + # OK, that's a complete ripoff from Python2.6 itertools.izip_longest + def sentinel(counter = ([fill_value]*(len(seqarrays)-1)).pop): + "Yields the fill_value or raises IndexError" + yield counter() + # + fillers = iterrepeat(fill_value) + iters = [iterchain(it, sentinel(), fillers) for it in seqarrays] + # Should we flatten the items, or just use a nested approach + if flatten: + zipfunc = _izip_fields_flat + else: + zipfunc = _izip_fields + # + try: + for tup in iterizip(*iters): + yield tuple(zipfunc(tup)) + except IndexError: + pass + + +def _fix_output(output, usemask=True, asrecarray=False): + """ + Private function: return a recarray, a ndarray, a MaskedArray + or a MaskedRecords depending on the input parameters + """ + if not isinstance(output, MaskedArray): + usemask = False + if usemask: + if asrecarray: + output = output.view(MaskedRecords) + else: + output = ma.filled(output) + if asrecarray: + output = output.view(recarray) + return output + + +def _fix_defaults(output, defaults=None): + """ + Update the fill_value and masked data of `output` + from the default given in a dictionary defaults. + """ + names = output.dtype.names + (data, mask, fill_value) = (output.data, output.mask, output.fill_value) + for (k, v) in (defaults or {}).iteritems(): + if k in names: + fill_value[k] = v + data[k][mask[k]] = v + return output + + +def merge_arrays(seqarrays, + fill_value=-1, flatten=False, usemask=True, asrecarray=False): + """ + Merge arrays field by field. + + Parameters + ---------- + seqarrays : sequence of ndarrays + Sequence of arrays + fill_value : {float}, optional + Filling value used to pad missing data on the shorter arrays. + flatten : {False, True}, optional + Whether to collapse nested fields. + usemask : {False, True}, optional + Whether to return a masked array or not. + asrecarray : {False, True}, optional + Whether to return a recarray (MaskedRecords) or not. + + Examples + -------- + >>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) + masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], + mask = [(False, False) (False, False) (True, False)], + fill_value=(999999, 1e+20) + dtype=[('f0', '>> merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), + ... usemask=False) + array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)], + dtype=[('f0', '>> merge_arrays((np.array([1, 2]).view([('a', int)]), + np.array([10., 20., 30.])), + usemask=False, asrecarray=True) + rec.array(data = [(1, 10.0) (2, 20.0) (-1, 30.0)], + dtype=[('a', int), ('f1', '>> a = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + >>> drop_fields(a, 'a') + array([((2.0, 3),), ((5.0, 6),)], + dtype=[('b', [('ba', '>> drop_fields(a, 'ba') + array([(1, (3,)), (4, (6,))], + dtype=[('a', '>> drop_fields(a, ['ba', 'bb']) + array([(1,), (4,)], + dtype=[('a', '>> a = np.array([(1, (2, [3.0, 30.])), (4, (5, [6.0, 60.]))], + dtype=[('a', int), + ('b', [('ba', float), ('bb', (float, 2))])]) + >>> rename_fields(a, {'a':'A', 'bb':'BB'}) + array([(1, (2.0, 3)), (4, (5.0, 6))], + dtype=[('A', ' 1: + data = merge_arrays(data, flatten=True, usemask=usemask, + fill_value=fill_value) + else: + data = data.pop() + # + output = ma.masked_all(max(len(base), len(data)), + dtype=base.dtype.descr + data.dtype.descr) + output = recursive_fill_fields(base, output) + output = recursive_fill_fields(data, output) + # + return _fix_output(output, usemask=usemask, asrecarray=asrecarray) + + + +def rec_append_fields(base, names, data, dtypes=None): + """ + Add new fields to an existing array. + + The names of the fields are given with the `names` arguments, + the corresponding values with the `data` arguments. + If a single field is appended, `names`, `data` and `dtypes` do not have + to be lists but just values. + + Parameters + ---------- + base : array + Input array to extend. + names : string, sequence + String or sequence of strings corresponding to the names + of the new fields. + data : array or sequence of arrays + Array or sequence of arrays storing the fields to add to the base. + dtypes : sequence of datatypes, optional + Datatype or sequence of datatypes. + If None, the datatypes are estimated from the `data`. + + See Also + -------- + append_fields + + Returns + ------- + appended_array : np.recarray + """ + return append_fields(base, names, data=data, dtypes=dtypes, + asrecarray=True, usemask=False) + + + +def stack_arrays(arrays, defaults=None, usemask=True, asrecarray=False): + """ + Superposes arrays fields by fields + + Parameters + ---------- + seqarrays : array or sequence + Sequence of input arrays. + defaults : dictionary, optional + Dictionary mapping field names to the corresponding default values. + usemask : {True, False}, optional + Whether to return a MaskedArray (or MaskedRecords is `asrecarray==True`) + or a ndarray. + asrecarray : {False, True}, optional + Whether to return a recarray (or MaskedRecords if `usemask==True`) or + just a flexible-type ndarray. + + Examples + -------- + >>> x = np.array([1, 2,]) + >>> stack_arrays(x) is x + True + >>> z = np.array([('A', 1), ('B', 2)], dtype=[('A', '|S3'), ('B', float)]) + >>> zz = np.array([('a', 10., 100.), ('b', 20., 200.), ('c', 30., 300.)], + dtype=[('A', '|S3'), ('B', float), ('C', float)]) + >>> test = stack_arrays((z,zz)) + >>> masked_array(data = [('A', 1.0, --) ('B', 2.0, --) ('a', 10.0, 100.0) + ... ('b', 20.0, 200.0) ('c', 30.0, 300.0)], + ... mask = [(False, False, True) (False, False, True) (False, False, False) + ... (False, False, False) (False, False, False)], + ... fill_value=('N/A', 1e+20, 1e+20) + ... dtype=[('A', '|S3'), ('B', ' '%s'" %\ + (dict(newdescr)[name], descr[1])) + # Only one field: use concatenate + if len(newdescr) == 1: + output = ma.concatenate(seqarrays) + else: + # + output = ma.masked_all((np.sum(nrecords),), newdescr) + offset = np.cumsum(np.r_[0, nrecords]) + seen = [] + for (a, n, i, j) in zip(seqarrays, fldnames, offset[:-1], offset[1:]): + names = a.dtype.names + if names is None: + output['f%i' % len(seen)][i:j] = a + else: + for name in n: + output[name][i:j] = a[name] + if name not in seen: + seen.append(name) + # + return _fix_output(_fix_defaults(output, defaults), + usemask=usemask, asrecarray=asrecarray) + + + +def find_duplicates(a, key=None, ignoremask=True, return_index=False): + """ + Find the duplicates in a structured array along a given key + + Parameters + ---------- + a : array-like + Input array + key : {string, None}, optional + Name of the fields along which to check the duplicates. + If None, the search is performed by records + ignoremask : {True, False}, optional + Whether masked data should be discarded or considered as duplicates. + return_index : {False, True}, optional + Whether to return the indices of the duplicated values. + + Examples + -------- + >>> ndtype = [('a', int)] + >>> a = ma.array([1, 1, 1, 2, 2, 3, 3], + ... mask=[0, 0, 1, 0, 0, 0, 1]).view(ndtype) + >>> find_duplicates(a, ignoremask=True, return_index=True) + """ + a = np.asanyarray(a).ravel() + # Get a dictionary of fields + fields = get_fieldstructure(a.dtype) + # Get the sorting data (by selecting the corresponding field) + base = a + if key: + for f in fields[key]: + base = base[f] + base = base[key] + # Get the sorting indices and the sorted data + sortidx = base.argsort() + sortedbase = base[sortidx] + sorteddata = sortedbase.filled() + # Compare the sorting data + flag = (sorteddata[:-1] == sorteddata[1:]) + # If masked data must be ignored, set the flag to false where needed + if ignoremask: + sortedmask = sortedbase.recordmask + flag[sortedmask[1:]] = False + flag = np.concatenate(([False], flag)) + # We need to take the point on the left as well (else we're missing it) + flag[:-1] = flag[:-1] + flag[1:] + duplicates = a[sortidx][flag] + if return_index: + return (duplicates, sortidx[flag]) + else: + return duplicates + + + +def join_by(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', + defaults=None, usemask=True, asrecarray=False): + """ + Join arrays `r1` and `r2` on key `key`. + + The key should be either a string or a sequence of string corresponding + to the fields used to join the array. + An exception is raised if the `key` field cannot be found in the two input + arrays. + Neither `r1` nor `r2` should have any duplicates along `key`: the presence + of duplicates will make the output quite unreliable. Note that duplicates + are not looked for by the algorithm. + + Parameters + ---------- + key : {string, sequence} + A string or a sequence of strings corresponding to the fields used + for comparison. + r1, r2 : arrays + Structured arrays. + jointype : {'inner', 'outer', 'leftouter'}, optional + If 'inner', returns the elements common to both r1 and r2. + If 'outer', returns the common elements as well as the elements of r1 + not in r2 and the elements of not in r2. + If 'leftouter', returns the common elements and the elements of r1 not + in r2. + r1postfix : string, optional + String appended to the names of the fields of r1 that are present in r2 + but absent of the key. + r2postfix : string, optional + String appended to the names of the fields of r2 that are present in r1 + but absent of the key. + defaults : {dictionary}, optional + Dictionary mapping field names to the corresponding default values. + usemask : {True, False}, optional + Whether to return a MaskedArray (or MaskedRecords is `asrecarray==True`) + or a ndarray. + asrecarray : {False, True}, optional + Whether to return a recarray (or MaskedRecords if `usemask==True`) or + just a flexible-type ndarray. + + Notes + ----- + * The output is sorted along the key. + * A temporary array is formed by dropping the fields not in the key for the + two arrays and concatenating the result. This array is then sorted, and + the common entries selected. The output is constructed by filling the fields + with the selected entries. Matching is not preserved if there are some + duplicates... + + """ + # Check jointype + if jointype not in ('inner', 'outer', 'leftouter'): + raise ValueError("The 'jointype' argument should be in 'inner', "\ + "'outer' or 'leftouter' (got '%s' instead)" % jointype) + # If we have a single key, put it in a tuple + if isinstance(key, basestring): + key = (key, ) + + # Check the keys + for name in key: + if name not in r1.dtype.names: + raise ValueError('r1 does not have key field %s'%name) + if name not in r2.dtype.names: + raise ValueError('r2 does not have key field %s'%name) + + # Make sure we work with ravelled arrays + r1 = r1.ravel() + r2 = r2.ravel() + (nb1, nb2) = (len(r1), len(r2)) + (r1names, r2names) = (r1.dtype.names, r2.dtype.names) + + # Make temporary arrays of just the keys + r1k = drop_fields(r1, [n for n in r1names if n not in key]) + r2k = drop_fields(r2, [n for n in r2names if n not in key]) + + # Concatenate the two arrays for comparison + aux = ma.concatenate((r1k, r2k)) + idx_sort = aux.argsort(order=key) + aux = aux[idx_sort] + # + # Get the common keys + flag_in = ma.concatenate(([False], aux[1:] == aux[:-1])) + flag_in[:-1] = flag_in[1:] + flag_in[:-1] + idx_in = idx_sort[flag_in] + idx_1 = idx_in[(idx_in < nb1)] + idx_2 = idx_in[(idx_in >= nb1)] - nb1 + (r1cmn, r2cmn) = (len(idx_1), len(idx_2)) + if jointype == 'inner': + (r1spc, r2spc) = (0, 0) + elif jointype == 'outer': + idx_out = idx_sort[~flag_in] + idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) + idx_2 = np.concatenate((idx_2, idx_out[(idx_out >= nb1)] - nb1)) + (r1spc, r2spc) = (len(idx_1) - r1cmn, len(idx_2) - r2cmn) + elif jointype == 'leftouter': + idx_out = idx_sort[~flag_in] + idx_1 = np.concatenate((idx_1, idx_out[(idx_out < nb1)])) + (r1spc, r2spc) = (len(idx_1) - r1cmn, 0) + # Select the entries from each input + (s1, s2) = (r1[idx_1], r2[idx_2]) + # + # Build the new description of the output array ....... + # Start with the key fields + ndtype = [list(_) for _ in r1k.dtype.descr] + # Add the other fields + ndtype.extend(list(_) for _ in r1.dtype.descr if _[0] not in key) + # Find the new list of names (it may be different from r1names) + names = list(_[0] for _ in ndtype) + for desc in r2.dtype.descr: + desc = list(desc) + name = desc[0] + # Have we seen the current name already ? + if name in names: + nameidx = names.index(name) + current = ndtype[nameidx] + # The current field is part of the key: take the largest dtype + if name in key: + current[-1] = max(desc[1], current[-1]) + # The current field is not part of the key: add the suffixes + else: + current[0] += r1postfix + desc[0] += r2postfix + ndtype.insert(nameidx+1, desc) + #... we haven't: just add the description to the current list + else: + names.extend(desc[0]) + ndtype.append(desc) + # Revert the elements to tuples + ndtype = [tuple(_) for _ in ndtype] + # Find the largest nb of common fields : r1cmn and r2cmn should be equal, but... + cmn = max(r1cmn, r2cmn) + # Construct an empty array + output = ma.masked_all((cmn + r1spc + r2spc,), dtype=ndtype) + names = output.dtype.names + for f in r1names: + selected = s1[f] + if f not in names: + f += r1postfix + current = output[f] + current[:r1cmn] = selected[:r1cmn] + if jointype in ('outer', 'leftouter'): + current[cmn:cmn+r1spc] = selected[r1cmn:] + for f in r2names: + selected = s2[f] + if f not in names: + f += r2postfix + current = output[f] + current[:r2cmn] = selected[:r2cmn] + if (jointype == 'outer') and r2spc: + current[-r2spc:] = selected[r2cmn:] + # Sort and finalize the output + output.sort(order=key) + kwargs = dict(usemask=usemask, asrecarray=asrecarray) + return _fix_output(_fix_defaults(output, defaults), **kwargs) + + +def rec_join(key, r1, r2, jointype='inner', r1postfix='1', r2postfix='2', + defaults=None): + """ + Join arrays `r1` and `r2` on keys. + Alternative to join_by, that always returns a np.recarray. + + See Also + -------- + join_by : equivalent function + """ + kwargs = dict(jointype=jointype, r1postfix=r1postfix, r2postfix=r2postfix, + defaults=defaults, usemask=False, asrecarray=True) + return join_by(key, r1, r2, **kwargs) Property changes on: trunk/numpy/lib/recfunctions.py ___________________________________________________________________ Name: svn:mime-type + text/plain Added: trunk/numpy/lib/tests/test_recfunctions.py =================================================================== --- trunk/numpy/lib/tests/test_recfunctions.py 2009-01-22 05:37:36 UTC (rev 6330) +++ trunk/numpy/lib/tests/test_recfunctions.py 2009-01-22 05:40:25 UTC (rev 6331) @@ -0,0 +1,570 @@ + +import numpy as np +import numpy.ma as ma +from numpy.ma.testutils import * + +from numpy.ma.mrecords import MaskedRecords + +from numpy.lib.recfunctions import * +get_names = np.lib.recfunctions.get_names +get_names_flat = np.lib.recfunctions.get_names_flat +zip_descr = np.lib.recfunctions.zip_descr + +class TestRecFunctions(TestCase): + """ + Misc tests + """ + # + def setUp(self): + x = np.array([1, 2,]) + y = np.array([10, 20, 30]) + z = np.array([('A', 1.), ('B', 2.)], + dtype=[('A', '|S3'), ('B', float)]) + w = np.array([(1, (2, 3.0)), (4, (5, 6.0))], + dtype=[('a', int), ('b', [('ba', float), ('bb', int)])]) + self.data = (w, x, y, z) + + + def test_zip_descr(self): + "Test zip_descr" + (w, x, y, z) = self.data + # Std array + test = zip_descr((x, x), flatten=True) + assert_equal(test, + np.dtype([('', ' Author: pierregm Date: 2009-01-22 12:21:32 -0600 (Thu, 22 Jan 2009) New Revision: 6332 Modified: trunk/numpy/lib/tests/test_recfunctions.py Log: * fixed a machine-dependent issue on default int (' Author: cdavid Date: 2009-01-24 02:02:14 -0600 (Sat, 24 Jan 2009) New Revision: 6333 Modified: trunk/numpy/core/src/scalartypes.inc.src Log: Fix compilation error on 2.4. Modified: trunk/numpy/core/src/scalartypes.inc.src =================================================================== --- trunk/numpy/core/src/scalartypes.inc.src 2009-01-22 18:21:32 UTC (rev 6332) +++ trunk/numpy/core/src/scalartypes.inc.src 2009-01-24 08:02:14 UTC (rev 6333) @@ -2254,7 +2254,9 @@ 0, /* nb_inplace_floor_divide */ 0, /* nb_inplace_true_divide */ /* Added in release 2.5 */ +#if PY_VERSION_HEX >= 0x02050000 0, /* nb_index */ +#endif }; static PyObject * From numpy-svn at scipy.org Mon Jan 26 16:04:33 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 26 Jan 2009 15:04:33 -0600 (CST) Subject: [Numpy-svn] r6334 - in trunk/numpy/lib: . tests Message-ID: <20090126210433.75980C7C011@scipy.org> Author: pierregm Date: 2009-01-26 15:04:26 -0600 (Mon, 26 Jan 2009) New Revision: 6334 Modified: trunk/numpy/lib/_iotools.py trunk/numpy/lib/io.py trunk/numpy/lib/tests/test_io.py Log: * _iotools.StringConverter : - add a _checked attribute to indicate whether the converter has been upgraded or not. - switched the default value for bool to False * io.genfromtxt: - fixed for the case where a whole column is masked: switch to bool or the common dtype (if needed) Modified: trunk/numpy/lib/_iotools.py =================================================================== --- trunk/numpy/lib/_iotools.py 2009-01-24 08:02:14 UTC (rev 6333) +++ trunk/numpy/lib/_iotools.py 2009-01-26 21:04:26 UTC (rev 6334) @@ -294,7 +294,7 @@ """ # - _mapper = [(nx.bool_, str2bool, None), + _mapper = [(nx.bool_, str2bool, False), (nx.integer, int, -1), (nx.floating, float, nx.nan), (complex, complex, nx.nan+0j), @@ -354,7 +354,7 @@ if dtype_or_func is None: self.func = str2bool self._status = 0 - self.default = default + self.default = default or False ttype = np.bool else: # Is the input a np.dtype ? @@ -396,6 +396,7 @@ # self._callingfunction = self._strict_call self.type = ttype + self._checked = False # def _loose_call(self, value): try: @@ -408,6 +409,8 @@ return self.func(value) except ValueError: if value.strip() in self.missing_values: + if not self._status: + self._checked = False return self.default raise ValueError("Cannot convert string '%s'" % value) # @@ -421,6 +424,7 @@ The order in which the converters are tested is read from the :attr:`_status` attribute of the instance. """ + self._checked = True try: self._strict_call(value) except ValueError: Modified: trunk/numpy/lib/io.py =================================================================== --- trunk/numpy/lib/io.py 2009-01-24 08:02:14 UTC (rev 6333) +++ trunk/numpy/lib/io.py 2009-01-26 21:04:26 UTC (rev 6334) @@ -883,18 +883,21 @@ # Reset the dtype data = rows if dtype is None: - # Get the dtypes from the first row - coldtypes = [np.array(val).dtype for val in data[0]] - # Find the columns with strings, and take the largest number of chars. - strcolidx = [i for (i, v) in enumerate(coldtypes) if v.char == 'S'] + # Get the dtypes from the types of the converters + coldtypes = [conv.type for conv in converters] + # Find the columns with strings... + strcolidx = [i for (i, v) in enumerate(coldtypes) + if v in (type('S'), np.string_)] + # ... and take the largest number of chars. for i in strcolidx: coldtypes[i] = "|S%i" % max(len(row[i]) for row in data) # if names is None: # If the dtype is uniform, don't define names, else use '' - base = coldtypes[0] - if np.all([(dt == base) for dt in coldtypes]): - (ddtype, mdtype) = (base, np.bool) + base = set([c.type for c in converters if c._checked]) + + if len(base) == 1: + (ddtype, mdtype) = (list(base)[0], np.bool) else: ddtype = [('', dt) for dt in coldtypes] mdtype = [('', np.bool) for dt in coldtypes] Modified: trunk/numpy/lib/tests/test_io.py =================================================================== --- trunk/numpy/lib/tests/test_io.py 2009-01-24 08:02:14 UTC (rev 6333) +++ trunk/numpy/lib/tests/test_io.py 2009-01-26 21:04:26 UTC (rev 6334) @@ -685,6 +685,23 @@ assert_equal(test.mask, control.mask) + def test_with_masked_column_uniform(self): + "Test masked column" + data = StringIO.StringIO('1 2 3\n4 5 6\n') + test = np.genfromtxt(data, missing='2,5', dtype=None, usemask=True) + control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0],[0, 1, 0]]) + assert_equal(test, control) + + def test_with_masked_column_various(self): + "Test masked column" + data = StringIO.StringIO('True 2 3\nFalse 5 6\n') + test = np.genfromtxt(data, missing='2,5', dtype=None, usemask=True) + control = ma.array([(1, 2, 3), (0, 5, 6)], + mask=[(0, 1, 0),(0, 1, 0)], + dtype=[('f0', bool), ('f1', bool), ('f2', int)]) + assert_equal(test, control) + + def test_recfromtxt(self): # data = StringIO.StringIO('A,B\n0,1\n2,3') From numpy-svn at scipy.org Mon Jan 26 21:46:31 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Mon, 26 Jan 2009 20:46:31 -0600 (CST) Subject: [Numpy-svn] r6335 - in trunk/numpy/ma: . tests Message-ID: <20090127024631.80176C7C011@scipy.org> Author: pierregm Date: 2009-01-26 20:46:26 -0600 (Mon, 26 Jan 2009) New Revision: 6335 Modified: trunk/numpy/ma/core.py trunk/numpy/ma/tests/test_core.py Log: * prevent MaskedBinaryOperation and DomainedBinaryOperation to shrink the mask of the output when at least one of the inputs has a mask full of False Modified: trunk/numpy/ma/core.py =================================================================== --- trunk/numpy/ma/core.py 2009-01-26 21:04:26 UTC (rev 6334) +++ trunk/numpy/ma/core.py 2009-01-27 02:46:26 UTC (rev 6335) @@ -616,7 +616,7 @@ def __call__ (self, a, b, *args, **kwargs): "Execute the call behavior." - m = mask_or(getmask(a), getmask(b)) + m = mask_or(getmask(a), getmask(b), shrink=False) (da, db) = (getdata(a), getdata(b)) # Easy case: there's no mask... if m is nomask: @@ -627,8 +627,12 @@ # Transforms to a (subclass of) MaskedArray if we don't have a scalar if result.shape: result = result.view(get_masked_subclass(a, b)) + # If we have a mask, make sure it's broadcasted properly if m.any(): result._mask = mask_or(getmaskarray(a), getmaskarray(b)) + # If some initial masks where not shrunk, don't shrink the result + elif m.shape: + result._mask = make_mask_none(result.shape, result.dtype) if isinstance(a, MaskedArray): result._update_from(a) if isinstance(b, MaskedArray): @@ -754,18 +758,19 @@ def __call__(self, a, b, *args, **kwargs): "Execute the call behavior." ma = getmask(a) - mb = getmask(b) + mb = getmaskarray(b) da = getdata(a) db = getdata(b) t = narray(self.domain(da, db), copy=False) if t.any(None): - mb = mask_or(mb, t) + mb = mask_or(mb, t, shrink=False) # The following line controls the domain filling if t.size == db.size: db = np.where(t, self.filly, db) else: db = np.where(np.resize(t, db.shape), self.filly, db) - m = mask_or(ma, mb) + # Shrink m if a.mask was nomask, otherwise don't. + m = mask_or(ma, mb, shrink=(getattr(a, '_mask', nomask) is nomask)) if (not m.ndim) and m: return masked elif (m is nomask): @@ -774,7 +779,12 @@ result = np.where(m, da, self.f(da, db, *args, **kwargs)) if result.shape: result = result.view(get_masked_subclass(a, b)) - result._mask = m + # If we have a mask, make sure it's broadcasted properly + if m.any(): + result._mask = mask_or(getmaskarray(a), mb) + # If some initial masks where not shrunk, don't shrink the result + elif m.shape: + result._mask = make_mask_none(result.shape, result.dtype) if isinstance(a, MaskedArray): result._update_from(a) if isinstance(b, MaskedArray): Modified: trunk/numpy/ma/tests/test_core.py =================================================================== --- trunk/numpy/ma/tests/test_core.py 2009-01-26 21:04:26 UTC (rev 6334) +++ trunk/numpy/ma/tests/test_core.py 2009-01-27 02:46:26 UTC (rev 6335) @@ -869,6 +869,60 @@ assert_equal(test.mask, control.mask) + def test_domained_binops_d2D(self): + "Test domained binary operations on 2D data" + a = array([[1.], [2.], [3.]], mask=[[False], [True], [True]]) + b = array([[2., 3.], [4., 5.], [6., 7.]]) + # + test = a / b + control = array([[1./2., 1./3.], [2., 2.], [3., 3.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = b / a + control = array([[2./1., 3./1.], [4., 5.], [6., 7.]], + mask=[[0, 0], [1, 1], [1, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + a = array([[1.], [2.], [3.]]) + b = array([[2., 3.], [4., 5.], [6., 7.]], + mask=[[0, 0], [0, 0], [0, 1]]) + test = a / b + control = array([[1./2, 1./3], [2./4, 2./5], [3./6, 3]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + # + test = b / a + control = array([[2/1., 3/1.], [4/2., 5/2.], [6/3., 7]], + mask=[[0, 0], [0, 0], [0, 1]]) + assert_equal(test, control) + assert_equal(test.data, control.data) + assert_equal(test.mask, control.mask) + + + def test_noshrinking(self): + "Check that we don't shrink a mask when not wanted" + # Binary operations + a = masked_array([1,2,3], mask=[False,False,False], shrink=False) + b = a + 1 + assert_equal(b.mask, [0, 0, 0]) + # In place binary operation + a += 1 + assert_equal(a.mask, [0, 0, 0]) + # Domained binary operation + b = a / 1. + assert_equal(b.mask, [0, 0, 0]) + # In place binary operation + a /= 1. + assert_equal(a.mask, [0, 0, 0]) + + def test_mod(self): "Tests mod" (x, y, a10, m1, m2, xm, ym, z, zm, xf) = self.d From numpy-svn at scipy.org Thu Jan 29 19:26:46 2009 From: numpy-svn at scipy.org (numpy-svn at scipy.org) Date: Thu, 29 Jan 2009 18:26:46 -0600 (CST) Subject: [Numpy-svn] r6336 - trunk/numpy Message-ID: <20090130002646.381C3C7C02A@scipy.org> Author: matthew.brett at gmail.com Date: 2009-01-29 18:26:44 -0600 (Thu, 29 Jan 2009) New Revision: 6336 Modified: trunk/numpy/add_newdocs.py Log: New docstrings for byteorder and newbyteorder() Modified: trunk/numpy/add_newdocs.py =================================================================== --- trunk/numpy/add_newdocs.py 2009-01-27 02:46:26 UTC (rev 6335) +++ trunk/numpy/add_newdocs.py 2009-01-30 00:26:44 UTC (rev 6336) @@ -1997,6 +1997,32 @@ Equivalent to a.view(a.dtype.newbytorder(byteorder)) + Return array with dtype changed to interpret array data as + specified byte order. + + Changes are also made in all fields and sub-arrays of the array + data type. + + Parameters + ---------- + new_order : string, optional + Byte order to force; a value from the byte order + specifications below. The default value ('S') results in + swapping the current byte order. + `new_order` codes can be any of: + * {'<', 'L'} - little endian + * {'>', 'B'} - big endian + * {'=', 'N'} - native order + * 'S' - swap dtype from current to opposite endian + * {'|', 'I'} - ignore (no change to byte order) + The code does a case-insensitive check on the first letter of + `new_order` for these alternatives. For example, any of '>' + or 'B' or 'b' or 'brian' are valid to specify big-endian. + + Returns + ------- + new_arr : array + array with the given change to the dtype byte order. """)) @@ -2815,3 +2841,100 @@ [12, 15, 18]]) """)) + +add_newdoc('numpy.core', 'dtype', ('newbyteorder', + ''' + newbyteorder(new_order='S') + + Return a new dtype with a different byte order. + + Changes are also made in all fields and sub-arrays of the data type. + + Parameters + ---------- + new_order : string, optional + Byte order to force; a value from the byte order + specifications below. The default value ('S') results in + swapping the current byte order. + `new_order` codes can be any of: + * {'<', 'L'} - little endian + * {'>', 'B'} - big endian + * {'=', 'N'} - native order + * 'S' - swap dtype from current to opposite endian + * {'|', 'I'} - ignore (no change to byte order) + The code does a case-insensitive check on the first letter of + `new_order` for these alternatives. For example, any of '>' + or 'B' or 'b' or 'brian' are valid to specify big-endian. + + Returns + ------- + new_dtype : dtype + New dtype object with the given change to the byte order. + + Examples + -------- + >>> import sys + >>> sys_is_le = sys.byteorder == 'little' + >>> native_code = sys_is_le and '<' or '>' + >>> swapped_code = sys_is_le and '>' or '<' + >>> native_dt = np.dtype(native_code+'i2') + >>> swapped_dt = np.dtype(swapped_code+'i2') + >>> native_dt.newbyteorder('S') == swapped_dt + True + >>> native_dt.newbyteorder() == swapped_dt + True + >>> native_dt == swapped_dt.newbyteorder('S') + True + >>> native_dt == swapped_dt.newbyteorder('=') + True + >>> native_dt == swapped_dt.newbyteorder('N') + True + >>> native_dt == native_dt.newbyteorder('|') + True + >>> np.dtype('>> np.dtype('>> np.dtype('>i2') == native_dt.newbyteorder('>') + True + >>> np.dtype('>i2') == native_dt.newbyteorder('B') + True + ''')) + +add_newdoc('numpy.core', 'dtype', ('byteorder', + ''' + dt.byteorder + + String giving byteorder of dtype + + One of: + * '=' - native byteorder + * '<' - little endian + * '>' - big endian + * '|' - endian not relevant + + Examples + -------- + >>> dt = np.dtype('i2') + >>> dt.byteorder + '=' + >>> # endian is not relevant for 8 bit numbers + >>> np.dtype('i1').byteorder + '|' + >>> # or ASCII strings + >>> np.dtype('S2').byteorder + '|' + >>> # Even if specific code is given, and it is native + >>> # '=' is the byteorder + >>> import sys + >>> sys_is_le = sys.byteorder == 'little' + >>> native_code = sys_is_le and '<' or '>' + >>> swapped_code = sys_is_le and '>' or '<' + >>> dt = np.dtype(native_code + 'i2') + >>> dt.byteorder + '=' + >>> # Swapped code shows up as itself + >>> dt = np.dtype(swapped_code + 'i2') + >>> dt.byteorder == swapped_code + True + '''))