How to use the h5py.new_vlen function in h5py

To help you get started, we’ve selected a few h5py examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github open-machine-learning / mldata / utils / h5conv / base.py View on Github external
class H5Converter(object):
    """Base converter class.

    @cvar str_type: string type to be used for variable length strings in h5py
    @type str_type: numpy.dtype

    @ivar fname_in: filename to read data from
    @type fname_in: string
    @ivar fname_out: filename to write converted data to
    @type fname_out: string
    @ivar labels_idx: indices for labels for each row
    @type labels_idx: list of integers
    @ivar seperator: seperator to seperate variables in examples
    @type seperator: string
    """
    str_type = h5py.new_vlen(numpy.str)


    def __init__(self, fname_in, fname_out, seperator=None, remove_out=True):
        """
        @param seperator: seperator used to seperate examples
        @type seperator: string
        @param remove_out: if output file shall be removed before running.
        @type remove_out: boolean
        """
        self.fname_in = fname_in
        self.fname_out = fname_out
        self.labels_idx = None
        self.set_seperator(seperator)

        # sometimes it seems files are not properly overwritten when opened by
        # 'w' during run().
github siconos / siconos / io / swig / io / mechanics_hdf5.py View on Github external
def add_mesh_from_string(self, name, shape_data, scale=None,
                             insideMargin=None, outsideMargin=None):
        """
        Add a mesh shape from a string.
        Accepted format : mesh encoded in VTK .vtp format
        """

        if name not in self._ref:

            shape = self._ref.create_dataset(name, (1,),
                                             dtype=h5py.new_vlen(str))
            shape[:] = shape_data
            shape.attrs['id'] = self._number_of_shapes
            shape.attrs['type'] = 'vtp'
            if scale is not None:
                shape.attrs['scale'] = scale
            if insideMargin is not None:
                shape.attrs['insideMargin'] = insideMargin
            if outsideMargin is not None:
                shape.attrs['outsideMargin'] = outsideMargin
            self._number_of_shapes += 1
github FXIhub / hummingbird / examples / advanced / save_camera / cxiwriter.py View on Github external
group_prefix_new = group_prefix + k + "/"
                log_debug(logger, "Writing group %s" % group_prefix_new)
                if k not in self._f[group_prefix]:
                    self._f.create_group(group_prefix_new)
                self._write_without_iterate(D[k], group_prefix_new)
            else:
                name = group_prefix + k
                logger.debug( "Writing dataset %s" % name)
                data = D[k]
                if k not in self._f[group_prefix]:
                    if numpy.isscalar(data):
                        maxshape = (None,)
                        shape = (self._chunksize,)
                        dtype = numpy.dtype(type(data))
                        if dtype == "S":
                            dtype = h5py.new_vlen(str)
                        axes = "experiment_identifier:value"
                    else:
                        data = numpy.asarray(data)
                        try:
                            h5py.h5t.py_create(data.dtype, logical=1)
                        except TypeError:
                            logger.warning("Could not save dataset %s. Conversion to numpy array failed" % name)
                            continue
                        maxshape = tuple([None]+list(data.shape))
                        shape = tuple([self._chunksize]+list(data.shape))
                        dtype = data.dtype
                        ndim = data.ndim
                        axes = "experiment_identifier"
                        if ndim == 1: axes = axes + ":x"
                        elif ndim == 2: axes = axes + ":y:x"
                        elif ndim == 3: axes = axes + ":z:y:x"
github open-machine-learning / mldata-utils / ml2h5 / converter / basehandler.py View on Github external
class BaseHandler(object):
    """Base handler class.

    It is the base for classes to handle different data formats.
    It implicitely handles HDF5.

    @cvar str_type: string type to be used for variable length strings in h5py
    @type str_type: numpy.dtype

    @ivar fname: name of file to handle
    @type fname: string
    @ivar seperator: seperator to seperate variables in examples
    @type seperator: string
    """
    str_type = h5py.new_vlen(numpy.str)


    def __init__(self, fname, seperator=None, compression=None, merge=False):
        """
        @param fname: name of in-file
        @type fname: string
        @param seperator: seperator used to seperate examples
        @type seperator: string
        """
        self.fname = fname
        self.compression = compression
        self.set_seperator(seperator)
        self.merge = merge


    def set_seperator(self, seperator):
github siconos / siconos / io / swig / io / mechanics_io.py View on Github external
def addInteraction(self, name, body1_name, contactor1_name,
                       body2_name, contactor2_name,
                       distance_calculator='cadmbtb',
                       offset=0.0001):
        """
        Add permanent interactions between two objects contactors.
        """
        if name not in self.permanent_interactions():
            pinter = self.permanent_interactions().\
                      create_dataset(name, (1,),
                                     dtype=h5py.new_vlen(str))
            pinter.attrs['id'] = self._number_of_permanent_interactions
            pinter.attrs['type'] = 'permanent_interaction'
            pinter.attrs['body1_name'] = body1_name
            pinter.attrs['body2_name'] = body2_name
            pinter.attrs['contactor1_name'] = contactor1_name
            pinter.attrs['contactor2_name'] = contactor2_name
            pinter.attrs['distance_calculator'] = distance_calculator
            pinter.attrs['offset'] = offset

            self._pinterid[name] = pinter.attrs['id']
            self._number_of_permanent_interactions += 1
github janelia-flyem / gala / gala / segmentation_pipeline.py View on Github external
md5hex = hashlib.md5(' '.join(sys.argv)).hexdigest()
        file_base = os.path.abspath(session_location)+"/seg_data/seg-"+str(threshold) + "-" + md5hex + "-"
        transforms = imio.compute_sp_to_body_map(supervoxels, segmentation)
        seg_loc = file_base +"v1.h5"
        if not os.path.exists(session_location+"/seg_data"):
            os.makedirs(session_location+"/seg_data")
        imio.write_mapped_segmentation(supervoxels, transforms, seg_loc)    

        if options.synapse_file is not None:
            h5temp = h5py.File(seg_loc, 'a')
            syn_data = json.load(open((options.synapse_file)))
            meta = syn_data['metadata']
            meta['username'] = "auto"
            syn_data_str = json.dumps(syn_data, indent=4)
            str_type = h5py.new_vlen(str)
            ds = h5temp.create_dataset("synapse-annotations", data=syn_data_str, shape=(1,), dtype=str_type)

        graph_loc = file_base+"graphv1.json"
       
        json_data = {}
        json_data['graph'] = graph_loc
        json_data['border'] = options.border_size  
        subvolume = {}
        subvolume['segmentation-file'] = seg_loc
        subvolume['prediction-file'] = os.path.abspath(session_location) + "/STACKED_prediction.h5"
        
        gray_file_whole = os.path.abspath(glob.glob(options.image_stack)[0])
        gray_path = os.path.dirname(gray_file_whole)
       
        gray_file = os.path.basename(gray_file_whole)
        field_width = len(re.findall(r'\d',gray_file))
github ilastik / lazyflow / lazyflow / utility / io_util / blockwiseFileset.py View on Github external
def _createDatasetInFile(self, hdf5File, datasetName, roi):
        shape = tuple( roi[1] - roi[0] )
        chunks = self._description.chunks
        if chunks is not None:
            # chunks must not be bigger than the data in any dim
            chunks = numpy.minimum( chunks, shape )
            chunks = tuple(chunks)
        compression = self._description.compression
        compression_opts = self._description.compression_opts
        
        dtype=self._description.dtype
        if dtype == object:
            dtype = h5py.new_vlen(str)
        dataset = hdf5File.create_dataset( datasetName,
                                 shape=shape,
                                 dtype=dtype,
                                 chunks=chunks,
                                 compression=compression,
                                 compression_opts=compression_opts )

        # Set data attributes
        if self._description.drange is not None:
            dataset.attrs['drange'] = self._description.drange
        if _use_vigra:
            dataset.attrs['axistags'] = vigra.defaultAxistags( str(self._description.axes) ).toJSON()
github eucall-software / simex_platform / python / src / SimEx / Utilities / prepHDF5.py View on Github external
#print file_in['data'].keys()
    #print file_in['data'].items()    

    # Create external link to parent's data
    #file_out['history/parent/detail/data'] = h5py.ExternalLink(src,'/data')
    parent_module = os.path.basename( src ) [ : os.path.basename( src ) .find( '_out' ) ]
    file_out['history/parent/detail/data'] = h5py.ExternalLink( '../' + parent_module + '/' + os.path.basename( src ) , '/data' )
    
	# Create your own groups
    grp_data = file_out.create_group( "data" )
    grp_param = file_out.create_group( "params" )
    grp_param = file_out.create_group( "misc" )
    grp_param = file_out.create_group( "info" )

    str_type = h5py.new_vlen(str)
	# Interface version  
    dataset = file_out.create_dataset("version", (1,), dtype='f')
    dataset[...] = 0.1
    # Populate /info
    dataset = file_out.create_dataset("info/package_version",(1,), dtype=str_type)
    data = ("SingFEL v0.1.0")
    dataset[...] = data
    dataset = file_out.create_dataset("info/contact",(2,), dtype=str_type)
    data = ("Name: Chunhong Yoon", "Email: chun.hong.yoon@desy.de")
    dataset[...] = data
    dataset = file_out.create_dataset("info/data_description",(1,), dtype=str_type)
    data = ("This dataset contains a diffraction pattern generated using SingFEL.")
    dataset[...] = data
    dataset = file_out.create_dataset("info/method_description",(1,), dtype=str_type)
    data = ("Form factors of the radiation damaged molecules are calculated in time slices. At each time slice, the coherent scattering is calculated and incoherently added to the final diffraction pattern. Finally, Poissonian noise is added to the diffraction pattern.")
    dataset[...] = data
github FXIhub / hummingbird / src / analysis / cxiwriter.py View on Github external
def _create_dataset(self, data, name):
        if numpy.isscalar(data):
            maxshape = (None,)
            shape = (self._chunksize,)
            dtype = numpy.dtype(type(data))
            if dtype == "S":
                dtype = h5py.new_vlen(str)
            axes = "experiment_identifier:value"
        else:
            data = numpy.asarray(data)
            try:
                h5py.h5t.py_create(data.dtype, logical=1)
            except TypeError:
                log_warning(logger, "(%i) Could not save dataset %s. Conversion to numpy array failed" % (self._rank, name))
                return 1
            maxshape = tuple([None]+list(data.shape))
            shape = tuple([self._chunksize]+list(data.shape))
            dtype = data.dtype
            ndim = data.ndim
            axes = "experiment_identifier"
            if ndim == 1: axes = axes + ":x"
            elif ndim == 2: axes = axes + ":y:x"
            elif ndim == 3: axes = axes + ":z:y:x"
github CellCognition / cecog / pysrc / cecog / analyzer / timeholder.py View on Github external
maxshape=max_shape(raw_image_cpy.shape),
                                           compression=self._hdf5_compression)
            self._hdf5_file[raw_image_str].attrs['valid'] = raw_image_valid

            if self._hdf5_file[raw_image_str].shape[0] != len(self._regions_to_idx):
                self._hdf5_file[raw_image_str].resize(len(self._regions_to_idx), axis=0)
                
        if feature_dict is not None:
            if object_dict is not None:            
                for (key_desc, key_data), (value_desc, value_data) in feature_dict.items():
                    self._hdf5_file.create_dataset(key_desc, data=value_desc, compression=self._hdf5_compression)
                    if not value_data.dtype == numpy.dtype('O'):
                        d = self._hdf5_file.create_dataset(key_data, data=value_data, compression=self._hdf5_compression)
                        d.attrs["reused"] = True
                    else:
                        d = self._hdf5_file.create_dataset(key_data, data=value_data, compression=self._hdf5_compression, dtype=h5py.new_vlen(str))
                        d.attrs["reused"] = True
                    
                for (key_desc, key_data), (value_desc, value_data) in object_dict.items():
                    d = self._hdf5_file.create_dataset(key_desc, data=value_desc, compression=self._hdf5_compression)
                    d.attrs["reused"] = True
                    d = self._hdf5_file.create_dataset(key_data, data=value_data, compression=self._hdf5_compression)
                    d.attrs["reused"] = True
                
                
                
                
        self.cellh5_file = CH5File(self._hdf5_file)