"""Simple illustration of nested record arrays.

Note: possible numpy.loadtxt bug?"""

from StringIO import StringIO

import numpy as np
from numpy import array, dtype, loadtxt, recarray

# Consider the task of loading data that is stored in plain text in a file such
# as the string below, where the last block of numbers is meant to be
# interpreted as a single 2x3 int array, whose field name in the resulting
# structured array will be 'block'.
txtdata = StringIO("""
# name  x       y       block - 2x3 ints
aaaa	1.0	8.0	1 2 3 4 5 6
aaaa	2.0	7.4	2 11 22 3 4 5 6
bbbb	3.5	8.5	3 0 22 44 5 6
aaaa	6.4	4.0	4 1 3 33 54 65
aaaa	8.8	4.1	5 5 3 4 44 77
bbbb	5.5	9.1	6 3 4 5 0 55
bbbb	7.7	8.5	7 2 3 4 5 66
""")

# We make the dtype for it:
dt = dtype(dict(names=['name','x','y','block'],
                formats=['S4',float,float,(int,(2,3))]))

# And we load it with loadtxt and make a recarray version for convenience
data = loadtxt(txtdata,dt)
rdata = data.view(recarray)

# Unfortunately, if we look at the block data, it repeats the first number
# found. This seems to be a loadtxt bug:
# In [176]: rdata.block[0,1]
# Out[176]: array([1, 1, 1])  # we'd expect array([4, 5, 6])
if np.any(rdata.block[0,1] != array([4, 5, 6])):
    print 'WARNING: loadtxt bug??'

# A workaround can be used by doing a second pass on the file, loading the
# columns corresponding to the block as plain ints and doing a reassignment of
# that data into the original data.

# Rewind the data and reload only the 'block' of ints:
txtdata.seek(0)
block_data = loadtxt(txtdata,int,usecols=range(3,9))

# Let's work with a copy of the original so we can compare interactively...
rdata2 = rdata.copy()
# We assign to the block field in our real array the block_data one,
# appropriately reshaped
rdata2.block[:] = block_data.reshape(rdata.block.shape)

# Same check as before, with the new one
if np.any(rdata2.block[0,1] != array([4, 5, 6])):
    print 'WARNING: loadtxt bug??'
else:
    print 'Second pass - data loaded OK.'
