[Scipy-svn] r3754 - branches/io_new

Mon Dec 31 21:26:26 EST 2007

Author: oliphant
Date: 2007-12-31 20:26:21 -0600 (Mon, 31 Dec 2007)
New Revision: 3754

Removed:
   branches/io_new/datasource.py
Modified:
   branches/io_new/__init__.py
   branches/io_new/array_import.py
   branches/io_new/data_store.py
   branches/io_new/npfile.py
   branches/io_new/pickler.py
Log:
Add deprecation doc strings for scipy.io

Modified: branches/io_new/__init__.py
===================================================================

--- branches/io_new/__init__.py	2008-01-01 01:42:14 UTC (rev 3753)
+++ branches/io_new/__init__.py	2008-01-01 02:26:21 UTC (rev 3754)
@@ -6,36 +6,35 @@
 
 from numpy import deprecate_with_doc
 
+# These are all deprecated (until the end deprecated tag)
+from npfile import npfile
+from data_store import save, load, create_module, create_shelf
+from array_import import read_array, write_array
+from pickler import objload, objsave
+
 from numpyio import packbits, unpackbits, bswap, fread, fwrite, \
      convert_objectarray
-fread = deprecate_with_doc(\
-"""
 
-scipy.io.fread is easily replaced with raw reading capabilities of NumPy 
+fread = deprecate_with_doc("""
+scipy.io.fread is can be replaced with raw reading capabilities of NumPy 
 including fromfile as well as memory-mapping capabilities.  
 """)(fread)
 
-fwrite = deprecate_with_doc(\
-"""
+fwrite = deprecate_with_doc("""
+scipy.io.fwrite can be replaced with raw writing capabilities of
+NumPy.  Also, remember that files can be directly memory-mapped into NumPy
+arrays which is often a better way of reading especially large files. 
 
-scipy.io.fwrite is easily replaced with raw writing capabilities of
-NumPy.  Also, remmber that files can be directly memory-mapped into NumPy
-arrays which is often a better way of "reading" especially large files. 
-
 Look at the tofile methods as well as save and savez for writing arrays into
 easily transported files of data.  
 """)(fwrite)
 
-bswap = deprecate_with_doc(\
-"""
-
+bswap = deprecate_with_doc("""
 scipy.io.bswap is easily replaced with the byteswap method on an array.
 out = scipy.io.bswap(arr) --> out = arr.byteswap(True)
 """)(bswap)
 
-packbits = deprecate_with_doc(\
-"""
-
+packbits = deprecate_with_doc("""
 The functionality of scipy.io.packbits is now available as numpy.packbits
 The calling convention is a bit different as the 2-d case is not specialized.
 
@@ -52,9 +51,7 @@
     return np.packbits(a, axis=-1).ravel()
 """)(packbits)
 
-unpackbits = deprecate_with_doc(\
-"""
-
+unpackbits = deprecate_with_doc("""
 The functionality of scipy.io.unpackbits is now available in numpy.unpackbits
 The calling convention is different however as the 2-d case is no longer
 specialized. 
@@ -68,24 +65,25 @@
     res = np.unpackbits(inp, axis=-1)[:,:els_per_slice]
     return res.ravel()
 """)(unpackbits)
-convert_objectarray = deprecate_with_doc(convert_objectarray)
 
+convert_objectarray = deprecate_with_doc("""
+The same functionality can be obtained using NumPy string arrays and the
+.astype method (except for the optional missing value feature).
+""")(convert_objectarray)
+
+# end deprecated
+
 # matfile read and write
 from matlab.mio import loadmat, savemat
 
 # netCDF file support
 from netcdf import netcdf_file, netcdf_variable
 
-from npfile import npfile
-
 from recaster import sctype_attributes, Recaster
+from data_store import save_as_module
+from mmio import mminfo, mmread, mmwrite
 
-from array_import import read_array, write_array
-from data_store import save, save_as_module
-from data_store import load, create_module, create_shelf
-from pickler import objload, objsave
 
-from mmio import mminfo, mmread, mmwrite
 
 __all__ = filter(lambda s:not s.startswith('_'),dir())
 from numpy.testing import NumpyTest

Modified: branches/io_new/array_import.py
===================================================================
--- branches/io_new/array_import.py	2008-01-01 01:42:14 UTC (rev 3753)
+++ branches/io_new/array_import.py	2008-01-01 02:26:21 UTC (rev 3754)
@@ -18,10 +18,6 @@
 # Numpy imports.
 import numpy
 
-# snip on----- DELETE after numpy.deprecate_with_doc is available
-numpy.deprecate_with_doc = lambda doc: (lambda func: func)
-# snip off---- DELETE after numpy.deprecate_with_doc is available
-
 from numpy import array, take, concatenate, asarray, real, imag, \
   deprecate_with_doc
 # Sadly, this module is still written with typecodes in mind.
@@ -316,7 +312,10 @@
     return cols, atype
 
 
- at deprecate_with_doc('')
+ at deprecate_with_doc("""
+The functionality of read_array is in numpy.loadtxt which allows the same
+functionality using different syntax.
+""")
 def read_array(fileobject, separator=default, columns=default, comment="#",
                lines=default, atype=Float, linesep='\n',
                rowsize=10000, missing=0):
@@ -444,7 +443,11 @@
     return row_sep.join(thestr)
 
 
- at deprecate_with_doc('')
+ at deprecate_with_doc("""
+
+This function is replaced by numpy.savetxt which allows the same functionality
+through a different syntax.
+""")
 def write_array(fileobject, arr, separator=" ", linesep='\n',
                 precision=5, suppress_small=0, keep_open=0):
     """Write a rank-2 or less array to file represented by fileobject.

Modified: branches/io_new/data_store.py
===================================================================
--- branches/io_new/data_store.py	2008-01-01 01:42:14 UTC (rev 3753)
+++ branches/io_new/data_store.py	2008-01-01 02:26:21 UTC (rev 3754)
@@ -5,30 +5,27 @@
     you to store data to a file and then load it back into the workspace.
     When the data is stored, a python module is also created as the
     "namespace for the data"
-    >>> import data_store
+    >>> import scipy.io
     >>> import os
     >>> a = 1
-    >>> data_store.save('c:/temp/junker',{'a':a})
+    >>> scipy.io.save_as_module('c:/temp/junker',{'a':a})
     >>> os.chdir('c:/temp')
     >>> import junker
     >>> junker.a
     1
 """
 
-__all__ = ['load', 'save', 'create_module', 'create_shelf']
+__all__ = ['save_as_module', 
+           # The rest of these are all deprecated
+           'save', 'create_module',
+           'create_shelf', 'load']
+
 import dumb_shelve
 import os
 
-# snip on----- DELETE after numpy.deprecate_with_doc is available
-import numpy
-numpy.deprecate_with_doc = lambda doc: (lambda func: func)
-# snip off---- DELETE after numpy.deprecate_with_doc is available
+from numpy import deprecate_with_doc, deprecate
 
-from numpy import deprecate_with_doc
-
-
- at deprecate_with_doc('')
-def load(module):
+def _load(module):
     """ Load data into module from a shelf with
         the same name as the module.
     """
@@ -43,18 +40,15 @@
 #       print i, 'loaded...'
 #   print 'done'
 
-def save_as_module(file_name=None,data=None):
-    """ Save the dictionary "data" into
-        a module and shelf named save
-    """
-    import dumb_shelve
-    create_module(file_name)
-    create_shelf(file_name,data)
+load = deprecate_with_doc("""
+This is an internal function used with scipy.io.save_as_module
 
-save = deprecate_with_doc('')(save_as_module)
+If you are saving arrays into a module, you should think about using
+HDF5 or .npz files instead.
+""")(_load)
 
- at deprecate_with_doc('')
-def create_module(file_name):
+
+def _create_module(file_name):
     """ Create the module file.
     """
     if not os.path.exists(file_name+'.py'): # don't clobber existing files
@@ -62,11 +56,17 @@
         f = open(file_name+'.py','w')
         f.write('import scipy.io.data_store as data_store\n')
         f.write('import %s\n' % module_name)
-        f.write('data_store.load(%s)' % module_name)
+        f.write('data_store._load(%s)' % module_name)
         f.close()
 
- at deprecate_with_doc('')
-def create_shelf(file_name,data):
+create_module = deprecate_with_doc("""
+This is an internal function used with scipy.io.save_as_module
+
+If you are saving arrays into a module, you should think about
+using HDF5 or .npz files instead.
+""")(_create_module)
+
+def _create_shelf(file_name,data):
     """Use this to write the data to a new file
     """
     shelf_name = file_name.split('.')[0]
@@ -76,3 +76,20 @@
         f[i] = data[i]
 #   print 'done'
     f.close()
+
+create_shelf = deprecate_with_doc("""
+This is an internal function used with scipy.io.save_as_module
+
+If you are saving arrays into a module, you should think about using
+HDF5 or .npz files instead.
+""")(_create_shelf)
+
+
+def save_as_module(file_name=None,data=None):
+    """ Save the dictionary "data" into
+        a module and shelf named save
+    """
+    _create_module(file_name)
+    _create_shelf(file_name,data)
+
+save = deprecate(save_as_module, 'save', 'save_as_module')

Deleted: branches/io_new/datasource.py
===================================================================
--- branches/io_new/datasource.py	2008-01-01 01:42:14 UTC (rev 3753)
+++ branches/io_new/datasource.py	2008-01-01 02:26:21 UTC (rev 3754)
@@ -1,457 +0,0 @@
-"""A file interface for handling local and remote data files.
-The goal of datasource is to abstract some of the file system operations when
-dealing with data files so the researcher doesn't have to know all the
-low-level details.  Through datasource, a researcher can obtain and use a
-file with one function call, regardless of location of the file.
-
-DataSource is meant to augment standard python libraries, not replace them.
-It should work seemlessly with standard file IO operations and the os module.
-
-DataSource files can originate locally or remotely:
-
-- local files : '/home/guido/src/local/data.txt'
-- URLs (http, ftp, ...) : 'http://www.scipy.org/not/real/data.txt'
-
-DataSource files can also be compressed or uncompressed.  Currently only gzip
-and bz2 are supported.
-
-Example:
-
-    >>> # Create a DataSource, use os.curdir (default) for local storage.
-    >>> ds = datasource.DataSource()
-    >>>
-    >>> # Open a remote file.
-    >>> # DataSource downloads the file, stores it locally in:
-    >>> #     './www.google.com/index.html'
-    >>> # opens the file and returns a file object.
-    >>> fp = ds.open('http://www.google.com/index.html')
-    >>>
-    >>> # Use the file as you normally would
-    >>> fp.read()
-    >>> fp.close()
-
-"""
-
-__docformat__ = "restructuredtext en"
-
-import bz2
-import gzip
-import os
-import tempfile
-from shutil import rmtree
-from urllib2 import urlopen, URLError
-from urlparse import urlparse
-
-import warnings
-
-# datasource has been used for a while in the NIPY project for analyzing
-# large fmri imaging files hosted over a network.  Data would be fetched
-# via URLs, cached locally and analyzed. Under these conditions the code
-# worked well, however it needs to be documented, tested and reviewed
-# before being fully exposed to SciPy.  We hope to do this before the
-# 0.7 release.
-_api_warning = "The datasource API will be changing frequently before \
-the 0.7 release as the code is ported from the NIPY project to SciPy. \
-Some of the current public interface may become private during the port! \
-Use this module minimally, if at all, until it is stabilized."
-
-warnings.warn(_api_warning)
-
-# TODO: .zip support, .tar support?
-_file_openers = {".gz":gzip.open, ".bz2":bz2.BZ2File, None:file}
-
-
-def open(path, mode='r', destpath=os.curdir):
-    """Open ``path`` with ``mode`` and return the file object.
-
-    If ``path`` is an URL, it will be downloaded, stored in the DataSource
-    directory and opened from there.
-
-    *Parameters*:
-
-        path : {string}
-
-        mode : {string}, optional
-
-        destpath : {string}, optional
-            Destination directory where URLs will be downloaded and stored.
-
-    *Returns*:
-
-        file object
-
-    """
-
-    ds = DataSource(destpath)
-    return ds.open(path, mode)
-
-
-class DataSource (object):
-    """A generic data source file (file, http, ftp, ...).
-
-    DataSources could be local files or remote files/URLs.  The files may
-    also be compressed or uncompressed.  DataSource hides some of the low-level
-    details of downloading the file, allowing you to simply pass in a valid
-    file path (or URL) and obtain a file object.
-
-    *Methods*:
-
-        - exists : test if the file exists locally or remotely
-        - abspath : get absolute path of the file in the DataSource directory
-        - open : open the file
-
-    *Example URL DataSource*::
-
-        # Initialize DataSource with a local directory, default is os.curdir.
-        ds = DataSource('/home/guido')
-
-        # Open remote file.
-        # File will be downloaded and opened from here:
-        #     /home/guido/site/xyz.txt
-        ds.open('http://fake.xyz.web/site/xyz.txt')
-
-    *Example using DataSource for temporary files*::
-
-        # Initialize DataSource with 'None' for the local directory.
-        ds = DataSource(None)
-
-        # Open local file.
-        # Opened file exists in a temporary directory like:
-        #     /tmp/tmpUnhcvM/foobar.txt
-        # Temporary directories are deleted when the DataSource is deleted.
-        ds.open('/home/guido/foobar.txt')
-
-    *Notes*:
-        BUG : URLs require a scheme string ('http://') to be used.
-              www.google.com will fail.
-
-              >>> repos.exists('www.google.com/index.html')
-              False
-
-              >>> repos.exists('http://www.google.com/index.html')
-              True
-
-    """
-
-    def __init__(self, destpath=os.curdir):
-        """Create a DataSource with a local path at destpath."""
-        if destpath:
-            self._destpath = os.path.abspath(destpath)
-            self._istmpdest = False
-        else:
-            self._destpath = tempfile.mkdtemp()
-            self._istmpdest = True
-
-    def __del__(self):
-        # Remove temp directories
-        if self._istmpdest:
-            rmtree(self._destpath)
-
-    def _iszip(self, filename):
-        """Test if the filename is a zip file by looking at the file extension.
-        """
-        fname, ext = os.path.splitext(filename)
-        return ext in _file_openers.keys()
-
-    def _iswritemode(self, mode):
-        """Test if the given mode will open a file for writing."""
-
-        # Currently only used to test the bz2 files.
-        _writemodes = ("w", "+")
-        for c in mode:
-            if c in _writemodes:
-                return True
-        return False
-
-    def _splitzipext(self, filename):
-        """Split zip extension from filename and return filename.
-
-        *Returns*:
-            base, zip_ext : {tuple}
-
-        """
-
-        if self._iszip(filename):
-            return os.path.splitext(filename)
-        else:
-            return filename, None
-
-    def _possible_names(self, filename):
-        """Return a tuple containing compressed filename variations."""
-        names = [filename]
-        if not self._iszip(filename):
-            for zipext in _file_openers.keys():
-                if zipext:
-                    names.append(filename+zipext)
-        return names
-
-    def _isurl(self, path):
-        """Test if path is a net location.  Tests the scheme and netloc."""
-
-        # BUG : URLs require a scheme string ('http://') to be used.
-        #       www.google.com will fail.
-        #       Should we prepend the scheme for those that don't have it and
-        #       test that also?  Similar to the way we append .gz and test for
-        #       for compressed versions of files.
-
-        scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
-        return bool(scheme and netloc)
-
-    def _cache(self, path):
-        """Cache the file specified by path.
-
-        Creates a copy of the file in the datasource cache.
-
-        """
-
-        upath = self.abspath(path)
-
-        # ensure directory exists
-        if not os.path.exists(os.path.dirname(upath)):
-            os.makedirs(os.path.dirname(upath))
-
-        # TODO: Doesn't handle compressed files!
-        if self._isurl(path):
-            try:
-                openedurl = urlopen(path)
-                file(upath, 'w').write(openedurl.read())
-            except URLError:
-                raise URLError("URL not found: ", path)
-        else:
-            try:
-                # TODO: Why not just copy the file with shutils.copyfile?
-                fp = file(path, 'r')
-                file(upath, 'w').write(fp.read())
-            except IOError:
-                raise IOError("File not found: ", path)
-        return upath
-
-    def _findfile(self, path):
-        """Searches for ``path`` and returns full path if found.
-
-        If path is an URL, _findfile will cache a local copy and return
-        the path to the cached file.
-        If path is a local file, _findfile will return a path to that local
-        file.
-
-        The search will include possible compressed versions of the file and
-        return the first occurence found.
-
-        """
-
-        # Build list of possible local file paths
-        if not self._isurl(path):
-            # Valid local paths
-            filelist = self._possible_names(path)
-            # Paths in self._destpath
-            filelist += self._possible_names(self.abspath(path))
-        else:
-            # Cached URLs in self._destpath
-            filelist = self._possible_names(self.abspath(path))
-            # Remote URLs
-            filelist = filelist + self._possible_names(path)
-
-        for name in filelist:
-            if self.exists(name):
-                if self._isurl(name):
-                    name = self._cache(name)
-                return name
-        return None
-
-    def abspath(self, path):
-        """Return absolute path of ``path`` in the DataSource directory.
-
-        If ``path`` is an URL, the ``abspath`` will be either the location
-        the file exists locally or the location it would exist when opened
-        using the ``open`` method.
-
-        The functionality is idential to os.path.abspath.
-
-        *Parameters*:
-
-            path : {string}
-                Can be a local file or a remote URL.
-
-        *Returns*:
-
-            Complete path, rooted in the DataSource destination directory.
-
-        *See Also*:
-
-            `open` : Method that downloads and opens files.
-
-        """
-
-        # TODO:  This should be more robust.  Handles case where path includes
-        #        the destpath, but not other sub-paths. Failing case:
-        #        path = /home/guido/datafile.txt
-        #        destpath = /home/alex/
-        #        upath = self.abspath(path)
-        #        upath == '/home/alex/home/guido/datafile.txt'
-
-        # handle case where path includes self._destpath
-        splitpath = path.split(self._destpath, 2)
-        if len(splitpath) > 1:
-            path = splitpath[1]
-        scheme, netloc, upath, uparams, uquery, ufrag = urlparse(path)
-        return os.path.join(self._destpath, netloc, upath.strip(os.sep))
-
-    def exists(self, path):
-        """Test if ``path`` exists.
-
-        Test if ``path`` exists as (and in this order):
-
-        - a local file.
-        - a remote URL that have been downloaded and stored locally in the
-          DataSource directory.
-        - a remote URL that has not been downloaded, but is valid and
-          accessible.
-
-        *Parameters*:
-
-            path : {string}
-                Can be a local file or a remote URL.
-
-        *Returns*:
-
-            boolean
-
-        *See Also*:
-
-            `abspath`
-
-        *Notes*
-
-            When ``path`` is an URL, ``exist`` will return True if it's either
-            stored locally in the DataSource directory, or is a valid remote
-            URL.  DataSource does not discriminate between to two, the file
-            is accessible if it exists in either location.
-
-        """
-
-        # Test local path
-        if os.path.exists(path):
-            return True
-
-        # Test cached url
-        upath = self.abspath(path)
-        if os.path.exists(upath):
-            return True
-
-        # Test remote url
-        if self._isurl(path):
-            try:
-                netfile = urlopen(path)
-                del(netfile)
-                return True
-            except URLError:
-                return False
-        return False
-
-    def open(self, path, mode='r'):
-        """Open ``path`` with ``mode`` and return the file object.
-
-        If ``path`` is an URL, it will be downloaded, stored in the DataSource
-        directory and opened from there.
-
-        *Parameters*:
-
-            path : {string}
-
-            mode : {string}, optional
-
-
-        *Returns*:
-
-            file object
-
-        """
-
-        # TODO: There is no support for opening a file for writing which
-        #       doesn't exist yet (creating a file).  Should there be?
-
-        # TODO: Add a ``subdir`` parameter for specifying the subdirectory
-        #       used to store URLs in self._destpath.
-
-        if self._isurl(path) and self._iswritemode(mode):
-            raise ValueError("URLs are not writeable")
-
-        # NOTE: _findfile will fail on a new file opened for writing.
-        found = self._findfile(path)
-        if found:
-            _fname, ext = self._splitzipext(found)
-            if ext == 'bz2':
-                mode.replace("+", "")
-            return _file_openers[ext](found, mode=mode)
-        else:
-            raise IOError("%s not found." % path)
-
-
-class Repository (DataSource):
-    """A data Repository where multiple DataSource's share a base URL/directory.
-
-    Repository extends DataSource by prepending a base URL (or directory) to
-    all the files it handles. Use a Repository when you will be working with
-    multiple files from one base URL.  Initialize the Respository with the
-    base URL, then refer to each file by it's filename only.
-
-    *Methods*:
-
-        - exists : test if the file exists locally or remotely
-        - abspath : get absolute path of the file in the DataSource directory
-        - open : open the file
-
-    *Toy example*::
-
-        # Analyze all files in the repository.
-        repos = Repository('/home/user/data/dir/')
-        for filename in filelist:
-            fp = repos.open(filename)
-            fp.analyze()
-            fp.close()
-
-        # Similarly you could use a URL for a repository.
-        repos = Repository('http://www.xyz.edu/data')
-
-    """
-
-    def __init__(self, baseurl, destpath=os.curdir):
-        """Create a Repository with a shared url or directory of baseurl."""
-        DataSource.__init__(self, destpath=destpath)
-        self._baseurl = baseurl
-
-    def __del__(self):
-        DataSource.__del__(self)
-
-    def _fullpath(self, path):
-        """Return complete path for path.  Prepends baseurl if necessary."""
-        splitpath = path.split(self._baseurl, 2)
-        if len(splitpath) == 1:
-            result = os.path.join(self._baseurl, path)
-        else:
-            result = path    # path contains baseurl already
-        return result
-
-    def _findfile(self, path):
-        """Extend DataSource method to prepend baseurl to ``path``."""
-        return DataSource._findfile(self, self._fullpath(path))
-
-    def abspath(self, path):
-        """Extend DataSource method to prepend baseurl to ``path``."""
-        return DataSource.abspath(self, self._fullpath(path))
-
-    def exists(self, path):
-        """Extend DataSource method to prepend baseurl to ``path``."""
-        return DataSource.exists(self, self._fullpath(path))
-
-    def open(self, path, mode='r'):
-        """Extend DataSource method to prepend baseurl to ``path``."""
-        return DataSource.open(self, self._fullpath(path), mode)
-
-    def listdir(self):
-        '''List files in the source Repository.'''
-        if self._isurl(self._baseurl):
-            raise NotImplementedError, \
-                  "Directory listing of URLs, not supported yet."
-        else:
-            return os.listdir(self._baseurl)

Modified: branches/io_new/npfile.py
===================================================================
--- branches/io_new/npfile.py	2008-01-01 01:42:14 UTC (rev 3753)
+++ branches/io_new/npfile.py	2008-01-01 02:26:21 UTC (rev 3754)
@@ -222,3 +222,11 @@
         if (not endian == 'dtype') and (dt_endian != endian):
             return arr.byteswap()
         return arr.copy()
+
+npfile = N.deprecate_with_doc("""
+You can achieve the same effect as using npfile, using ndarray.tofile
+and numpy.fromfile. 
+
+Even better you can use memory-mapped arrays and data-types to map out a 
+file format for direct manipulation in NumPy.
+""")(npfile)

Modified: branches/io_new/pickler.py
===================================================================
--- branches/io_new/pickler.py	2008-01-01 01:42:14 UTC (rev 3753)
+++ branches/io_new/pickler.py	2008-01-01 02:26:21 UTC (rev 3754)
@@ -1,13 +1,10 @@
 import cPickle
 
-# snip on----- DELETE after numpy.deprecate_with_doc is available
-import numpy
-numpy.deprecate_with_doc = lambda doc: (lambda func: func)
-# snip off---- DELETE after numpy.deprecate_with_doc is available
-
 from numpy import deprecate_with_doc
 
- at deprecate_with_doc('')
+ at deprecate_with_doc("""
+Just use cPickle.dump directly or numpy.savez
+""")
 def objsave(file, allglobals, *args):
     """Pickle the part of a dictionary containing the argument list
     into file string.
@@ -27,6 +24,9 @@
     cPickle.dump(savedict,fid,1)
     fid.close()
 
+ at deprecate_with_doc("""
+Just use cPickle.load or numpy.load. 
+""")
 def objload(file, allglobals):
     """Load a previously pickled dictionary and insert into given dictionary.