How to use the vaex.dataset function in vaex

To help you get started, we’ve selected a few vaex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vaexio / vaex / test / dataset.py View on Github external
def setUp(self):
		self.dataset = dataset.DatasetArrays("dataset")

		self.x = x = np.arange(10)
		self.y = y = x ** 2
		self.dataset.add_column("x", x)
		self.dataset.add_column("y", y)
		self.dataset.set_variable("t", 1.)
		self.dataset.add_virtual_column("z", "x+t*y")
		self.dataset.units["x"] = astropy.units.Unit("km")
		self.dataset.units["y"] = astropy.units.Unit("km/s")
		self.dataset.units["t"] = astropy.units.Unit("s")
		self.dataset.add_column("f", np.arange(len(self.dataset), dtype=np.float64))
		self.dataset.ucds["x"] = "some;ucd"

		#self.jobsManager = dataset.JobsManager()

		x = np.array([0., 1])
github vaexio / vaex / test / dataset.py View on Github external
def test_selection(self):
		total = self.dataset("x").sum()
		self.dataset.select("x > 5")
		total_subset = self.dataset("x").selected().sum()
		self.assertLess(total_subset, total)
		for mode in vaex.dataset._select_functions.keys():
			self.dataset.select("x > 5")
			self.dataset.select("x > 5", mode)
			self.dataset.select(None)
			self.dataset.select("x > 5", mode)

		pass # TODO
github vaexio / vaex / test / dataset.py View on Github external
def concat(*types):
			arrays = [np.arange(3, dtype=dtype) for dtype in types]
			N = len(arrays)
			datasets = [vx.dataset.DatasetArrays("dataset-%i" % i)  for i in range(N)]
			for dataset, array in zip(datasets, arrays):
				dataset.add_column("x", array)
			dataset_concat = vx.dataset.DatasetConcatenated(datasets, name="dataset_concat")
			return dataset_concat
github vaexio / vaex / tests / common.py View on Github external
def create_filtered():
    ds = create_base_ds()
    ds.select('(x >= 0) & (x < 10)', name=vaex.dataset.FILTER_SELECTION_NAME)
    return ds
github vaexio / vaex / packages / vaex-ui / vaex / ui / variables.py View on Github external
# if property == "Units":
        # if value:
        # try:
        # unit = astropy.units.Unit(value)
        # logger.debug("setting unit to: %s (%s)" % (value, unit))
        # self.dataset.units[column_name] = unit
        # # TODO: move to dataset class
        # self.dataset.signal_column_changed.emit(self.dataset, column_name, "change")
        # except Exception, e:
        # dialogs.dialog_error(None, "Cannot parse unit", "Cannot parse unit:\n %s" % e)
        # else:
        # if column_name in self.dataset.units:
        # del self.dataset.units[column_name]
        if property == "Expression":
            try:
                test = eval(value, vaex.dataset.expression_namespace, self.dataset.variables)
                self.dataset.add_variable(variable_name, value)
            except Exception as e:
                dialogs.dialog_error(None, "Invalid expression", "Invalid expression: %s" % e)
            # although it may not be a valid expression, still set it to the user can edit it
            # self.dataset.virtual_columns[column_name] = value

        self.dataset.write_meta()
        return True
github vaexio / vaex / packages / vaex-core / vaex / misc / pandawrap.py View on Github external
z = pd.Series(dataset.columns["z"], index=index)
	f = pd.DataFrame({"x": x, "y":y, "z":z})
	print((f.x.mean()))
	print((f.y.mean()))
	print((f.z.mean()))
	meminfo()

	#y = pd.Series(dataset.columns["x"])




if __name__ == "__main__":
	input = sys.argv[1]
	#output = sys.argv[2]
	dataset_in = vaex.dataset.load_file(input)
	test_pandas(dataset_in)
github vaexio / vaex / packages / vaex-core / vaex / ext / ipyvolume.py View on Github external
def _update_image(self):
        with self.output:
            grid = self.get_grid()
            if self.smooth_pre:
                for i in range(grid.shape[0]):
                    grid[i] = vaex.grids.gf(grid[i], self.smooth_pre)
            f = vaex.dataset._parse_f(self.f)
            fgrid = f(grid)
            if self.smooth_post:
                for i in range(grid.shape[0]):
                    fgrid[i] = vaex.grids.gf(fgrid[i], self.smooth_post)
            ngrid, fmin, fmax = self.normalise(fgrid)
            print(ngrid.shape)
            if len(ngrid.shape) == 4:
                #if ngrid.shape[0] == 1:
                ngrid = ngrid[-1]
            p3.volshow(ngrid.T, controls=self._first_time)

            vx, vy, vz = self.vgrids[:3]
            vcount = self.vcount
            if vx is not None and vy is not None and vz is not None and vcount is not None:
                vcount = vcount[-1] # no multivolume render, just take the last selection
                vx = vx[-1]
github vaexio / vaex / packages / vaex-core / vaex / file / colfits.py View on Github external
chunks = int(math.ceil(len(dataset)/float(chunk_size)))
			for i in range(chunks):
				i1 = i * chunk_size
				i2 = min(len(dataset), (i+1) * chunk_size)
				data_big_endian = column[i1:i2].astype(">" + numpy_type_name)
				f.write(data_big_endian)
			print((f.tell(), f.tell() / 1024**2, "mb", len(dataset)))
			assert i2 == len(dataset)
		finish_data()



if __name__ == "__main__":
	input = sys.argv[1]
	output = sys.argv[2]
	dataset_in = vaex.dataset.load_file(input)
	write_colfits(dataset_in, output)
github vaexio / vaex / packages / vaex-distributed / vaex / distributed / __init__.py View on Github external
return self.dataset.server._call_subspace("correlation", self, means=means, vars=vars)

    def var(self, means=None):
        return self.dataset.server._call_subspace("var", self, means=means)

    def sum(self):
        return self.dataset.server._call_subspace("sum", self)

    def limits_sigma(self, sigmas=3, square=False):
        return self.dataset.server._call_subspace("limits_sigma", self, sigmas=sigmas, square=square)

    def mutual_information(self, limits=None, size=256):
        return self.dataset.server._call_subspace("mutual_information", self, limits=limits, size=size)


class DatasetDistributed(vaex.dataset.Dataset):
    def __init__(self, datasets):
        super(DatasetDistributed, self).__init__(datasets[0].name, datasets[0].column_names)
        self.datasets = datasets
        self.executor = ServerExecutor()
        # self.name = self.datasets[0].name
        #   self.column_names = self.datasets[0].column_names
        self.dtypes = self.datasets[0].dtypes
        self.units = self.datasets[0].units
        self.virtual_columns.update(self.datasets[0].units)
        self.ucds = self.datasets[0].ucds
        self.descriptions = self.datasets[0].descriptions
        self.description = self.datasets[0].description
        self._length_original = self.datasets[0].length_original()
        self._length_unfiltered = self.datasets[0].length_unfiltered()
        self.path = self.datasets[0].path # may we should use some cluster name oroso
        parts = np.linspace(0, self._length_original, len(self.datasets)+1, dtype=int)
github vaexio / vaex / packages / vaex-core / vaex / distributed.py View on Github external
def correlation(self, means=None, vars=None):
        return self.dataset.server._call_subspace("correlation", self, means=means, vars=vars)

    def var(self, means=None):
        return self.dataset.server._call_subspace("var", self, means=means)

    def sum(self):
        return self.dataset.server._call_subspace("sum", self)

    def limits_sigma(self, sigmas=3, square=False):
        return self.dataset.server._call_subspace("limits_sigma", self, sigmas=sigmas, square=square)

    def mutual_information(self, limits=None, size=256):
        return self.dataset.server._call_subspace("mutual_information", self, limits=limits, size=size)

class DatasetDistributed(vaex.dataset.Dataset):
    def __init__(self, datasets):
        super(DatasetDistributed, self).__init__(datasets[0].name, datasets[0].column_names)
        self.datasets = datasets
        self.executor = ServerExecutor()
        #self.name = self.datasets[0].name
        #self.column_names = self.datasets[0].column_names
        self.dtypes = self.datasets[0].dtypes
        self.units = self.datasets[0].units
        self.virtual_columns.update(self.datasets[0].units)
        self.ucds = self.datasets[0].ucds
        self.descriptions = self.datasets[0].descriptions
        self.description = self.datasets[0].description
        self._full_length = self.datasets[0].full_length()
        self._length = self._full_length
        self.path = self.datasets[0].path # may we should use some cluster name oroso
        parts = np.linspace(0, self._length, len(self.datasets)+1, dtype=int)