Issue #1305: Performance Issues with large BoxLib datasets (yt_analysis/yt)

New issue 1305: Performance Issues with large BoxLib datasets https://bitbucket.org/yt_analysis/yt/issues/1305/performance-issues-with-lar... Chris Byrohl: When loading a larger BoxLib dataset (~270Gb), yt is stuck for hours without making any progress. Interrupting points to _reconstruct_parent_child(self): ``` #!python --------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-14-fa86699093be> in <module>() ----> 1 box['density'] /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/data_objects/data_containers.py in __getitem__(self, key) 263 Returns a single field. Will add if necessary. 264 """ --> 265 f = self._determine_fields([key])[0] 266 if f not in self.field_data and key not in self.field_data: 267 if f in self._container_fields: /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/data_objects/data_containers.py in _determine_fields(self, fields) 993 else: 994 fname = field --> 995 finfo = self.ds._get_field_info("unknown", fname) 996 if finfo.particle_type: 997 ftype = self._current_particle_type /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/data_objects/static_output.py in _get_field_info(self, ftype, fname) 622 _last_finfo = None 623 def _get_field_info(self, ftype, fname = None): --> 624 self.index 625 if fname is None: 626 if isinstance(ftype, DerivedField): /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/data_objects/static_output.py in index(self) 417 raise RuntimeError("You should not instantiate Dataset.") 418 self._instantiated_index = self._index_class( --> 419 self, dataset_type=self.dataset_type) 420 # Now we do things that we need an instantiated index for 421 # ...first off, we create our field_info now. /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/frontends/boxlib/data_structures.py in __init__(self, ds, dataset_type) 144 self.directory = ds.output_dir 145 --> 146 GridIndex.__init__(self, ds, dataset_type) 147 self._cache_endianness(self.grids[-1]) 148 /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/geometry/geometry_handler.py in __init__(self, ds, dataset_type) 48 49 mylog.debug("Setting up domain geometry.") ---> 50 self._setup_geometry() 51 52 mylog.debug("Initializing data grid data IO") /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/geometry/grid_geometry_handler.py in _setup_geometry(self) 52 53 mylog.debug("Constructing grid objects.") ---> 54 self._populate_grid_objects() 55 56 mylog.debug("Re-examining index") /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/frontends/boxlib/data_structures.py in _populate_grid_objects(self) 295 mylog.debug("Creating grid objects") 296 self.grids = np.array(self.grids, dtype='object') --> 297 self._reconstruct_parent_child() 298 for i, grid in enumerate(self.grids): 299 if (i % 1e4) == 0: mylog.debug("Prepared % 7i / % 7i grids", i, /home/uni09/cosmo/cbyrohl/anaconda3/envs/py35/lib/python3.5/site-packages/yt/frontends/boxlib/data_structures.py in _reconstruct_parent_child(self) 311 self.grid_levels[i] + 1, 312 self.grid_left_edge, self.grid_right_edge, --> 313 self.grid_levels, mask) 314 ids = np.where(mask.astype("bool")) # where is a tuple 315 grid._children_ids = ids[0] + grid._id_offset ``` The result of ``` #!python np.savez('data.npz', left_edge=self.grid_left_edge, right_edge=self.grid_right_edge, levels=self.grid_levels) ``` for the beginning of that routine can be found here: http://use.yt/upload/87b007b1
participants (1)
-
Chris Byrohl