[Numpy-discussion] Adding `offset` argument to np.lib.format.open_memmap and np.load
Jon Olav Vik
jonovik at gmail.com
Thu Feb 24 11:56:36 EST 2011
Jon Olav Vik <jonovik <at> gmail.com> writes:
> https://github.com/jonovik/numpy/compare/master...offset_memmap
> I've used this extensively on Numpy 1.4.0, but based my Git checkout on the
> current Numpy trunk. There have been some rearrangements in np.load since
then
> (it used to be in np.lib.io and is now in np.lib.npyio), but as far as I can
> see, my modifications carry over fine. I haven't had a chance to test with
> Numpy trunk, though. (What is the best way to set up a test version without
> affecting my working 1.4.0 setup?)
I tried to push my modifications for 1.4.0, but couldn't figure out how my
Github account could hold forks of both Numpy trunk and maintenance/1.4.x.
Anyhow, here is a patch for 1.4:
>From c3ff71637c6c00d6cac1ee22a2cad34de2449431 Mon Sep 17 00:00:00 2001
From: Jon Olav Vik <jonovik at gmail.com>
Date: Thu, 24 Feb 2011 17:38:03 +0100
Subject: [PATCH 54/54] Added `offset` parameter as in np.memmap to np.load and
np.lib.format.open_memmap.
Modified numpy/lib/format.py
Modified numpy/lib/io.py
Doctests:
>>> filename = "temp.npy"
>>> np.save(filename, np.arange(10))
>>> load(filename)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> mmap = load(filename, mmap_mode="r+")
>>> mmap
memmap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> mmap[3:7] = 42
>>> del mmap
>>> np.load(filename)
array([ 0, 1, 2, 42, 42, 42, 42, 7, 8, 9])
>>> mmap = load(filename, mmap_mode="r+", offset=2, shape=6)
>>> mmap[-1] = 123
>>> del mmap
>>> np.load(filename)
array([ 0, 1, 2, 42, 42, 42, 42, 123, 8, 9])
>>> import os
>>> os.remove(filename)
---
numpy/lib/format.py | 17 +++++++++++++----
numpy/lib/io.py | 7 +++++--
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 3c5fe32..7c28b09 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -460,7 +460,7 @@ def read_array(fp):
def open_memmap(filename, mode='r+', dtype=None, shape=None,
- fortran_order=False, version=(1,0)):
+ fortran_order=False, version=(1,0), offset=0):
"""
Open a .npy file as a memory-mapped array.
@@ -479,13 +479,15 @@ def open_memmap(filename, mode='r+', dtype=None,
shape=None,
mode.
shape : tuple of int, optional
The shape of the array if we are creating a new file in "write"
- mode.
+ mode. Shape of (contiguous) slice if opening an existing file.
fortran_order : bool, optional
Whether the array should be Fortran-contiguous (True) or
C-contiguous (False) if we are creating a new file in "write" mode.
version : tuple of int (major, minor)
If the mode is a "write" mode, then this is the version of the file
format used to create the file.
+ offset : int, optional
+ Number of elements to skip along the first dimension.
Returns
-------
@@ -509,6 +511,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
" existing file handles.")
if 'w' in mode:
+ assert offset == 0, "Cannot specify offset when creating memmap"
# We are creating the file, not reading it.
# Check if we ought to create the file.
if version != (1, 0):
@@ -541,11 +544,17 @@ def open_memmap(filename, mode='r+', dtype=None,
shape=None,
if version != (1, 0):
msg = "only support version (1,0) of file format, not %r"
raise ValueError(msg % (version,))
- shape, fortran_order, dtype = read_array_header_1_0(fp)
+ fullshape, fortran_order, dtype = read_array_header_1_0(fp)
+ if shape is None:
+ shape = fullshape
+ if offset:
+ shape = list(fullshape)
+ shape[0] = shape[0] - offset
+ shape = tuple(shape)
if dtype.hasobject:
msg = "Array can't be memory-mapped: Python objects in dtype."
raise ValueError(msg)
- offset = fp.tell()
+ offset = fp.tell() + offset * dtype.itemsize
finally:
fp.close()
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index 262d20d..694bae2 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -212,7 +212,7 @@ class NpzFile(object):
return self.files.__contains__(key)
-def load(file, mmap_mode=None):
+def load(file, mmap_mode=None, offset=0, shape=None):
"""
Load a pickled, ``.npy``, or ``.npz`` binary file.
@@ -272,6 +272,9 @@ def load(file, mmap_mode=None):
memmap([4, 5, 6])
"""
+ if (not mmap_mode) and (offset or shape):
+ raise ValueError("Offset and shape should be used only with mmap_mode")
+
import gzip
if isinstance(file, basestring):
@@ -290,7 +293,7 @@ def load(file, mmap_mode=None):
return NpzFile(fid)
elif magic == format.MAGIC_PREFIX: # .npy file
if mmap_mode:
- return format.open_memmap(file, mode=mmap_mode)
+ return open_memmap(file, mode=mmap_mode, shape=shape,
offset=offset)
else:
return format.read_array(fid)
else: # Try a pickle
--
1.7.4.msysgit.0
More information about the NumPy-Discussion
mailing list