How to use the h5py._hl.dataset.Dataset function in h5py

To help you get started, we’ve selected a few h5py examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dbcollection / dbcollection / tests / core / test_loader.py View on Github external
def test_to_memory_to_disk(self):
        field_loader, _ = db_generator.get_test_data_FieldLoader('train')

        field_loader.to_memory = True
        field_loader.to_memory = False

        assert isinstance(field_loader.data, h5py._hl.dataset.Dataset)
github glotzerlab / signac / signac / core / h5store.py View on Github external
_h5set(store, subgrp, k, v, path)

    # Regular built-in types:
    elif value is None:
        grp.create_dataset(key, data=None, shape=None, dtype='f')
    elif isinstance(value, (int, float, str, bool, array.array)):
        grp[key] = value
    elif isinstance(value, bytes):
        grp[key] = numpy.bytes_(value)

    # NumPy types
    elif type(value).__module__ == numpy.__name__:
        grp[key] = value

    # h5py native types
    elif isinstance(value, h5py._hl.dataset.Dataset):
        grp[key] = value  # Creates hard-link!

    # Other types
    else:
        _load_pandas()   # might be a pandas type
        if _is_pandas_type(value):
            _requires_tables()
            store.close()
            with _pandas.HDFStore(store._filename, mode='a') as store_:
                store_[path] = value
            store.open()
        else:
            grp[key] = value
            warnings.warn(
                "Storage for object of type '{}' appears to have succeeded, but this "
                "type is not officially supported!".format(type(value)))
github bio-phys / cadishi / cadishi / h5pickle.py View on Github external
def load(h5_grp):
    """Load a HDF5 group recursively into a Python dictionary,
    and return the dictionary.
    """
    data = {}
    for key in list(h5_grp.keys()):
        h5py_class = h5_grp.get(key, getclass=True)
        if h5py_class is h5py._hl.group.Group:
            # print h5py_class, "Group"
            subgrp = h5_grp[key]
            val = load(subgrp)
        elif h5py_class is h5py._hl.dataset.Dataset:
            # print h5py_class, "Data"
            val = (h5_grp[key])[()]
        else:
            # shouldn't be reached at all
            raise ValueError
        data[key] = val
    for key in h5_grp.attrs:
        data[key] = h5_grp.attrs[key]
    return data
github h5py / h5py / h5py / _hl / group.py View on Github external
The value to use where there is no data.

            """
            from .vds import VDSmap
            # Encode filenames and dataset names appropriately.
            sources = [VDSmap(vspace, filename_encode(file_name),
                              self._e(dset_name), src_space)
                       for (vspace, file_name, dset_name, src_space)
                       in layout.sources]

            with phil:
                dsid = dataset.make_new_virtual_dset(self, layout.shape,
                         sources=sources, dtype=layout.dtype,
                         maxshape=layout.maxshape, fillvalue=fillvalue)

                dset = dataset.Dataset(dsid)
                if name is not None:
                    self[name] = dset

            return dset
github h5py / h5py / h5py / _hl / dims.py View on Github external
def items(self):
        """ Get a list of (name, Dataset) pairs with all scales on this
        dimension.
        """
        with phil:
            scales = []

            # H5DSiterate raises an error if there are no dimension scales,
            # rather than iterating 0 times.  See #483.
            if len(self) > 0:
                h5ds.iterate(self._id, self._dimension, scales.append, 0)

            return [
                (self._d(h5ds.get_scale_name(x)), Dataset(x))
                for x in scales
                ]
github cctbx / cctbx_project / xfel / euxfel / write_composite_image.py View on Github external
def recursive_copy(self,src, dest, mode='max', n_frames=None):
    print src, type(src)
    self.copy_attributes(src, dest)

    assert n_frames is not None, 'Need to provide n_frames'
    assert type(src) in [h5py._hl.group.Group, h5py._hl.files.File]
    for key in src:
      if type(src[key]) in [h5py._hl.group.Group, h5py._hl.files.File]:
        dest_child = dest.create_group(key)
        self.recursive_copy(src[key], dest_child,mode=mode,n_frames=n_frames)
      elif type(src[key]) is h5py._hl.dataset.Dataset:
        dataset = src[key]
        print key, dataset.shape
        if dataset.shape == (n_frames, 8192, 128):
          if dataset.name != "/entry_1/data_1/data":
            print "Skipping data block", dataset.name
            continue
          print '====================================='
          if mode == 'max':
            dmax = np.zeros((8192, 128))
            dmax[:] = -np.inf
            for i in range(n_frames):
              frame = dataset[i]
              dmax = np.maximum(dmax, frame)
            result = dmax.reshape(1 ,8192, 128)
          elif mode == 'mean':
            dsum = np.zeros((8192, 128))
github h5py / h5py / h5py / _hl / group.py View on Github external
global default h5.get_config().track_order.
        external
            (Iterable of tuples) Sets the external storage property, thus
            designating that the dataset will be stored in one or more
            non-HDF5 files external to the HDF5 file.  Adds each tuple
            of (name, offset, size) to the dataset's list of external files.
            Each name must be a str, bytes, or os.PathLike; each offset and
            size, an integer.  If only a name is given instead of an iterable
            of tuples, it is equivalent to [(name, 0, h5py.h5f.UNLIMITED)].
        """
        if 'track_order' not in kwds:
            kwds['track_order'] = h5.get_config().track_order

        with phil:
            dsid = dataset.make_new_dset(self, shape, dtype, data, **kwds)
            dset = dataset.Dataset(dsid)
            if name is not None:
                self[name] = dset
            return dset
github NREL / OpenWARP / source / openwarpgui / nemoh / utility.py View on Github external
def check_dataset_type(val, name='The hdf5 dataset', allow_none=False, print_value=True, location=''):
    """
    Check if the given value is an hdf5 dataset. And also check if the val is None.
    :param val: the given value to check
    :param name: name of val
    :param print_value: whether or not to print the value name in case of error
    :param location: The location of the potential hdf5 value to check
    :param allow_none: whether the val is allowed to be None
    :raise TypeError: if val is not of expected type
    :raise ValueError: if val is None while not allow None
    """
    none_msg = name + ' was not found in the hdf5 file at its location ' + location
    return check_type_value(val, name, h5py._hl.dataset.Dataset,
                     allow_none=allow_none, print_value=print_value, none_msg=none_msg)
github MaayanLab / biojupies-plugins / library / core_scripts / load / load.py View on Github external
def upload(uid, filter_metadata=False, collapse_duplicates=True):

	# Load HDF5 File
	h5 = '/download/{uid}.h5'.format(**locals())
	with open(h5, 'wb') as openfile:
		openfile.write(urllib.request.urlopen('https://storage.googleapis.com/jupyter-notebook-generator-user-data/{uid}/{uid}.h5'.format(**locals())).read())
	f = h5py.File(h5, 'r')
		
	# Get data
	rawcount_dataframe = pd.DataFrame(data=f['data']['expression'].value, index=[x for x in f['meta']['gene']['symbol'].value], columns=[x for x in f['meta']['sample']['Sample'].value])
	sample_metadata_dataframe = pd.DataFrame({key: [x for x in value.value] if type(value) == h5py._hl.dataset.Dataset else [x for x in [y for y in value.items()][0][1].value] for key, value in f['meta']['sample'].items()}).set_index('Sample')#, drop=False).rename(columns={'Sample': 'Sample Title'})

	# Filter
	if filter_metadata:
		for column in sample_metadata_dataframe.columns:
			unique_vals = list(set(sample_metadata_dataframe[column]))
			if len(unique_vals) == 1 or any([len(x) > 20 for x in unique_vals]):
				sample_metadata_dataframe.drop(column, axis=1, inplace=True)

	# Collapse duplicates
	if collapse_duplicates and any(rawcount_dataframe.index.duplicated()):
		try:
			rawcount_dataframe = rawcount_dataframe.fillna(0).reset_index().groupby('index').sum()
		except:
			pass

	data = {'rawdata': rawcount_dataframe, 'sample_metadata': sample_metadata_dataframe, 'dataset_metadata': {'source': 'upload', 'datatype': 'rnaseq'}}
github mantidproject / mantid / scripts / AbinsModules / IOmodule.py View on Github external
def _recursively_load_dict_contents_from_group(cls, hdf_file=None, path=None):
        """
        Loads structure dataset which has form of Python dictionary.
        :param hdf_file:  hdf file object from which dataset is loaded
        :param path: path to dataset in hdf file
        :returns: dictionary which was loaded from hdf file

        """
        ans = {}
        for key, item in hdf_file[path].items():
            # noinspection PyUnresolvedReferences,PyProtectedMember,PyProtectedMember
            if isinstance(item, h5py._hl.dataset.Dataset):
                ans[key] = item.value
            elif isinstance(item, h5py._hl.group.Group):
                ans[key] = cls._recursively_load_dict_contents_from_group(hdf_file, path + key + '/')
        return ans