[Numpy-discussion] Adding `offset` argument to np.lib.format.open_memmap and np.load

Jon Olav Vik jonovik at gmail.com
Thu Feb 24 11:56:36 EST 2011


Jon Olav Vik <jonovik <at> gmail.com> writes:

> https://github.com/jonovik/numpy/compare/master...offset_memmap

> I've used this extensively on Numpy 1.4.0, but based my Git checkout on the 
> current Numpy trunk. There have been some rearrangements in np.load since 
then 
> (it used to be in np.lib.io and is now in np.lib.npyio), but as far as I can 
> see, my modifications carry over fine. I haven't had a chance to test with 
> Numpy trunk, though. (What is the best way to set up a test version without 
> affecting my working 1.4.0 setup?)

I tried to push my modifications for 1.4.0, but couldn't figure out how my 
Github account could hold forks of both Numpy trunk and maintenance/1.4.x. 
Anyhow, here is a patch for 1.4:

>From c3ff71637c6c00d6cac1ee22a2cad34de2449431 Mon Sep 17 00:00:00 2001
From: Jon Olav Vik <jonovik at gmail.com>
Date: Thu, 24 Feb 2011 17:38:03 +0100
Subject: [PATCH 54/54] Added `offset` parameter as in np.memmap to np.load and 
np.lib.format.open_memmap.
 Modified   numpy/lib/format.py
 Modified   numpy/lib/io.py

Doctests:
>>> filename = "temp.npy"
>>> np.save(filename, np.arange(10))
>>> load(filename)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> mmap = load(filename, mmap_mode="r+")
>>> mmap
memmap([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> mmap[3:7] = 42
>>> del mmap
>>> np.load(filename)
array([ 0,  1,  2, 42, 42, 42, 42,  7,  8,  9])
>>> mmap = load(filename, mmap_mode="r+", offset=2, shape=6)
>>> mmap[-1] = 123
>>> del mmap
>>> np.load(filename)
array([  0,   1,   2,  42,  42,  42,  42, 123,   8,   9])
>>> import os
>>> os.remove(filename)
---
 numpy/lib/format.py |   17 +++++++++++++----
 numpy/lib/io.py     |    7 +++++--
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 3c5fe32..7c28b09 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -460,7 +460,7 @@ def read_array(fp):
 
 
 def open_memmap(filename, mode='r+', dtype=None, shape=None,
-                fortran_order=False, version=(1,0)):
+                fortran_order=False, version=(1,0), offset=0):
     """
     Open a .npy file as a memory-mapped array.
 
@@ -479,13 +479,15 @@ def open_memmap(filename, mode='r+', dtype=None, 
shape=None,
         mode.
     shape : tuple of int, optional
         The shape of the array if we are creating a new file in "write"
-        mode.
+        mode. Shape of (contiguous) slice if opening an existing file.
     fortran_order : bool, optional
         Whether the array should be Fortran-contiguous (True) or
         C-contiguous (False) if we are creating a new file in "write" mode.
     version : tuple of int (major, minor)
         If the mode is a "write" mode, then this is the version of the file
         format used to create the file.
+    offset : int, optional
+        Number of elements to skip along the first dimension.
 
     Returns
     -------
@@ -509,6 +511,7 @@ def open_memmap(filename, mode='r+', dtype=None, shape=None,
                          " existing file handles.")
 
     if 'w' in mode:
+        assert offset == 0, "Cannot specify offset when creating memmap"
         # We are creating the file, not reading it.
         # Check if we ought to create the file.
         if version != (1, 0):
@@ -541,11 +544,17 @@ def open_memmap(filename, mode='r+', dtype=None, 
shape=None,
             if version != (1, 0):
                 msg = "only support version (1,0) of file format, not %r"
                 raise ValueError(msg % (version,))
-            shape, fortran_order, dtype = read_array_header_1_0(fp)
+            fullshape, fortran_order, dtype = read_array_header_1_0(fp)
+            if shape is None:
+                shape = fullshape
+                if offset:
+                    shape = list(fullshape)
+                    shape[0] = shape[0] - offset
+                    shape = tuple(shape)
             if dtype.hasobject:
                 msg = "Array can't be memory-mapped: Python objects in dtype."
                 raise ValueError(msg)
-            offset = fp.tell()
+            offset = fp.tell() + offset * dtype.itemsize
         finally:
             fp.close()
 
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index 262d20d..694bae2 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -212,7 +212,7 @@ class NpzFile(object):
         return self.files.__contains__(key)
 
 
-def load(file, mmap_mode=None):
+def load(file, mmap_mode=None, offset=0, shape=None):
     """
     Load a pickled, ``.npy``, or ``.npz`` binary file.
 
@@ -272,6 +272,9 @@ def load(file, mmap_mode=None):
     memmap([4, 5, 6])
 
     """
+    if (not mmap_mode) and (offset or shape):
+        raise ValueError("Offset and shape should be used only with mmap_mode")
+
     import gzip
 
     if isinstance(file, basestring):
@@ -290,7 +293,7 @@ def load(file, mmap_mode=None):
         return NpzFile(fid)
     elif magic == format.MAGIC_PREFIX: # .npy file
         if mmap_mode:
-            return format.open_memmap(file, mode=mmap_mode)
+            return open_memmap(file, mode=mmap_mode, shape=shape, 
offset=offset)
         else:
             return format.read_array(fid)
     else:  # Try a pickle
-- 
1.7.4.msysgit.0





More information about the NumPy-Discussion mailing list