How to use vaex - 10 common examples

To help you get started, we’ve selected a few vaex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vaexio / vaex / test / ui.py View on Github external
def test_select_by_expression(self):
		self.window.xlabel = "x"
		self.window.ylabel = "y"
		##self.window._wait() # TODO: is this a bug? if we don't wait and directly do the selection, the ThreadPoolIndex
		## is entered twice, not sure this can happen from the gui
		vaex.ui.qt.set_choose("x < 0", True)
		logger.debug("click mouse")
		QtTest.QTest.mouseClick(self.layer.button_selection_expression, QtCore.Qt.LeftButton)
		logger.debug("clicked mouse")
		return
		self.window._wait()
		self.assertTrue(self.no_exceptions)

		filename = self.window.plot_to_png()
		self.compare(filename, get_comparison_image("example_xy_selection_on_x"))
github vaexio / vaex / test / dataset.py View on Github external
def setUp(self):
		self.dataset = dataset.DatasetArrays("dataset")

		self.x = x = np.arange(10)
		self.y = y = x ** 2
		self.dataset.add_column("x", x)
		self.dataset.add_column("y", y)
		self.dataset.set_variable("t", 1.)
		self.dataset.add_virtual_column("z", "x+t*y")
		self.dataset.units["x"] = astropy.units.Unit("km")
		self.dataset.units["y"] = astropy.units.Unit("km/s")
		self.dataset.units["t"] = astropy.units.Unit("s")
		self.dataset.add_column("f", np.arange(len(self.dataset), dtype=np.float64))
		self.dataset.ucds["x"] = "some;ucd"

		#self.jobsManager = dataset.JobsManager()

		x = np.array([0., 1])
github vaexio / vaex / test / ui.py View on Github external
def test_invalid_expression(self):
		self.window._wait()

		with dialogs.assertError(2):
			self.layer.x = "vx*"
			self.layer.y = "vy&"
		with dialogs.assertError(3):
			self.layer.x = "hoeba(vx)"
			self.layer.x = "x(vx)"
			self.layer.y = "doesnotexist"
		with dialogs.assertError(2):
			self.layer.vx = "hoeba(vx)"
			self.layer.vy = "x(vx)"
		with dialogs.assertError(1):
			self.layer.weight = "hoeba(vx)"
		self.layer.x = "x"
		self.layer.y = "y"
		self.layer.weight = "z"
		#self.window._wait()
		# since this will be triggered, overrule it
github vaexio / vaex / test / dataset.py View on Github external
#print dataset.full_length()
									#print len(dataset)
									if export == dataset.export_hdf5:
										path = path_hdf5
										export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection, progress=False)
									else:
										path = path_fits
										export(path, column_names=column_names, shuffle=shuffle, selection=selection, progress=False)
										with astropy.io.fits.open(path) as fitsfile:
											# make sure astropy can read the data
											bla = fitsfile[1].data
											try:
												fitsfile.writeto(path_fits_astropy)
											finally:
												os.remove(path_fits_astropy)
									compare = vx.open(path)
									column_names = column_names or ["x", "y", "f", "z"]
									# TODO: does the order matter?
									self.assertEqual(sorted(compare.get_column_names()), sorted(column_names + (["random_index"] if shuffle else [])))
									for column_name in column_names:
										values = dataset.evaluate(column_name)
										if selection:
											self.assertEqual(sorted(compare.columns[column_name]), sorted(values[dataset.mask]))
										else:
											if shuffle:
												indices = compare.columns["random_index"]
												self.assertEqual(sorted(compare.columns[column_name]), sorted(values[indices]))
											else:
												self.assertEqual(sorted(compare.columns[column_name]), sorted(values[:length]))
									compare.close_files()

				# self.dataset_concat_dup references self.dataset, so set it's active_fraction to 1 again
github vaexio / vaex / test / ui.py View on Github external
for shuffle in [False, True]:
							for selection in [False, True]:
								for export in [dataset.export_fits, dataset.export_hdf5] if byteorder == ">" else [dataset.export_hdf5]:
									type = "hdf5" if export == dataset.export_hdf5 else "fits"
									if shuffle and selection:
										continue # TODO: export should fail on this combination
									#print column_names, byteorder, shuffle, selection, type
									if export == dataset.export_hdf5:
										path = path_hdf5
										path_ui = path_hdf5_ui
										export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection)
									else:
										path = path_fits
										path_ui = path_fits_ui
										export(path, column_names=column_names, shuffle=shuffle, selection=selection)
									compare_direct = vx.open(path)

									dialogs.set_choose(1 if selection else 0).then("=<>".index(byteorder))
									# select columns
									dialogs.set_select_many(True, [name in column_names for name in dataset.get_column_names()])
									counter_confirm = CallCounter(return_value=shuffle)
									counter_info = CallCounter()
									dialogs.dialog_confirm = counter_confirm
									dialogs.dialog_info = counter_info
									dialogs.get_path_save = lambda *args: path_ui
									dialogs.ProgressExecution = dialogs.FakeProgressExecution
									import sys
									sys.stdout.flush()

									self.app.export(type=type)
									compare_ui = vx.open(path_ui)
github vaexio / vaex / tests / internal / superagg_tests.py View on Github external
def test_count_1d_strings():
    x = np.array([-1, -2, 0.5, 1.5, 4.5, 5], dtype='f8')
    y = x.astype(str).astype('O')
    y[2] = None
    y = vaex.column._to_string_sequence(y)
    bins = 5
    binner = vaex.superagg.BinnerScalar_float64('x', 0, 5, bins)
    binner.set_data(x)

    grid = vaex.superagg.Grid([binner])
    agg = vaex.superagg.AggCount_string(grid)
    agg.set_data(y, 0)
    agg_data = np.asarray(agg)
    grid.bin([agg])
    assert agg_data.tolist() == [0, 2, 0, 1, 0, 0, 1, 1]
github vaexio / vaex / test / dataset.py View on Github external
def test_selection(self):
		total = self.dataset("x").sum()
		self.dataset.select("x > 5")
		total_subset = self.dataset("x").selected().sum()
		self.assertLess(total_subset, total)
		for mode in vaex.dataset._select_functions.keys():
			self.dataset.select("x > 5")
			self.dataset.select("x > 5", mode)
			self.dataset.select(None)
			self.dataset.select("x > 5", mode)

		pass # TODO
github vaexio / vaex / test / dataset.py View on Github external
def concat(*types):
			arrays = [np.arange(3, dtype=dtype) for dtype in types]
			N = len(arrays)
			datasets = [vx.dataset.DatasetArrays("dataset-%i" % i)  for i in range(N)]
			for dataset, array in zip(datasets, arrays):
				dataset.add_column("x", array)
			dataset_concat = vx.dataset.DatasetConcatenated(datasets, name="dataset_concat")
			return dataset_concat
github vaexio / vaex / tests / common.py View on Github external
def create_filtered():
    ds = create_base_ds()
    ds.select('(x >= 0) & (x < 10)', name=vaex.dataset.FILTER_SELECTION_NAME)
    return ds
github vaexio / vaex / test / ui.py View on Github external
export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection)
									else:
										path = path_fits
										path_ui = path_fits_ui
										export(path, column_names=column_names, shuffle=shuffle, selection=selection)
									compare_direct = vx.open(path)

									dialogs.set_choose(1 if selection else 0).then("=<>".index(byteorder))
									# select columns
									dialogs.set_select_many(True, [name in column_names for name in dataset.get_column_names()])
									counter_confirm = CallCounter(return_value=shuffle)
									counter_info = CallCounter()
									dialogs.dialog_confirm = counter_confirm
									dialogs.dialog_info = counter_info
									dialogs.get_path_save = lambda *args: path_ui
									dialogs.ProgressExecution = dialogs.FakeProgressExecution
									import sys
									sys.stdout.flush()

									self.app.export(type=type)
									compare_ui = vx.open(path_ui)

									column_names = column_names or ["x", "y", "z"]
									self.assertEqual(compare_direct.get_column_names(), compare_ui.get_column_names())
									for column_name in column_names:
										values_ui = compare_ui.evaluate(column_name)
										values = compare_direct.evaluate(column_name)
										self.assertEqual(sorted(values), sorted(values_ui))