How to use the vaex.open function in vaex

To help you get started, we’ve selected a few vaex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vaexio / vaex / test / dataset.py View on Github external
#print dataset.full_length()
									#print len(dataset)
									if export == dataset.export_hdf5:
										path = path_hdf5
										export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection, progress=False)
									else:
										path = path_fits
										export(path, column_names=column_names, shuffle=shuffle, selection=selection, progress=False)
										with astropy.io.fits.open(path) as fitsfile:
											# make sure astropy can read the data
											bla = fitsfile[1].data
											try:
												fitsfile.writeto(path_fits_astropy)
											finally:
												os.remove(path_fits_astropy)
									compare = vx.open(path)
									column_names = column_names or ["x", "y", "f", "z"]
									# TODO: does the order matter?
									self.assertEqual(sorted(compare.get_column_names()), sorted(column_names + (["random_index"] if shuffle else [])))
									for column_name in column_names:
										values = dataset.evaluate(column_name)
										if selection:
											self.assertEqual(sorted(compare.columns[column_name]), sorted(values[dataset.mask]))
										else:
											if shuffle:
												indices = compare.columns["random_index"]
												self.assertEqual(sorted(compare.columns[column_name]), sorted(values[indices]))
											else:
												self.assertEqual(sorted(compare.columns[column_name]), sorted(values[:length]))
									compare.close_files()

				# self.dataset_concat_dup references self.dataset, so set it's active_fraction to 1 again
github vaexio / vaex / test / ui.py View on Github external
for shuffle in [False, True]:
							for selection in [False, True]:
								for export in [dataset.export_fits, dataset.export_hdf5] if byteorder == ">" else [dataset.export_hdf5]:
									type = "hdf5" if export == dataset.export_hdf5 else "fits"
									if shuffle and selection:
										continue # TODO: export should fail on this combination
									#print column_names, byteorder, shuffle, selection, type
									if export == dataset.export_hdf5:
										path = path_hdf5
										path_ui = path_hdf5_ui
										export(path, column_names=column_names, byteorder=byteorder, shuffle=shuffle, selection=selection)
									else:
										path = path_fits
										path_ui = path_fits_ui
										export(path, column_names=column_names, shuffle=shuffle, selection=selection)
									compare_direct = vx.open(path)

									dialogs.set_choose(1 if selection else 0).then("=<>".index(byteorder))
									# select columns
									dialogs.set_select_many(True, [name in column_names for name in dataset.get_column_names()])
									counter_confirm = CallCounter(return_value=shuffle)
									counter_info = CallCounter()
									dialogs.dialog_confirm = counter_confirm
									dialogs.dialog_info = counter_info
									dialogs.get_path_save = lambda *args: path_ui
									dialogs.ProgressExecution = dialogs.FakeProgressExecution
									import sys
									sys.stdout.flush()

									self.app.export(type=type)
									compare_ui = vx.open(path_ui)
github vaexio / vaex / packages / vaex-core / vaex / misc_cmdline.py View on Github external
def stat_main(argv):
    parser = make_stat_parser(argv[0])
    args = parser.parse_args(argv[1:])
    import vaex
    dataset = vaex.open(args.dataset)
    if dataset is None:
        print("Cannot open input: %s" % args.dataset)
        sys.exit(1)
    print("dataset:")
    print("  length: %s" % len(dataset))
    print("  full_length: %s" % dataset.full_length())
    print("  name: %s" % dataset.name)
    print("  path: %s" % dataset.path)
    print("  columns: ")
    desc = dataset.description
    if desc:
        print("    description: %s" % desc)
    for name in dataset.get_column_names():
        print("   - %s: " % name)
        desc = dataset.descriptions.get(name)
        if desc:
github vaexio / vaex / packages / vaex-ui / vaex / ui / variables.py View on Github external
def main(argv=sys.argv):
    dataset = vaex.open(argv[1])
    app = QtGui.QApplication(argv)
    table = VariablesTable(None)
    table.set_dataset(dataset)
    table.show()
    table.raise_()
    sys.exit(app.exec_())
github vaexio / vaex / packages / vaex-ui / vaex / ui / columns.py View on Github external
def main(argv=sys.argv):
    dataset = vaex.open(argv[1])
    app = QtGui.QApplication(argv)
    table = ColumnsTable(None)
    table.set_dataset(dataset)
    table.show()
    table.raise_()
    sys.exit(app.exec_())
github vaexio / vaex / packages / vaex-core / vaex / datasets.py View on Github external
def open(self):
        return vx.open_many(self.filenames_vaex) if len(self.filenames_vaex) != 1 else vx.open(self.filenames_vaex[0])
github vaexio / vaex / packages / vaex-ui / vaex / ui / main.py View on Github external
def open(self, path):
        """Add a dataset and add it to the UI"""
        logger.debug("open dataset: %r", path)
        if path.startswith("http") or path.startswith("ws"):
            dataset = vaex.open(path, thread_mover=self.call_in_main_thread)
        else:
            dataset = vaex.open(path)
        self.add_recently_opened(path)
        self.dataset_selector.add(dataset)
        return dataset
github vaexio / vaex / packages / vaex-core / vaex / export.py View on Github external
else:
            return 1
    if args.task == "tap":
        dataset = vaex.dataset.DatasetTap(args.tap_url, args.table_name)
        if not args.quiet:
            print("exporting from {tap_url} table name {table_name} to {output}".format(tap_url=args.tap_url, table_name=args.table_name, output=args.output))
    if args.task == "csv":
        # dataset = vaex.dataset.DatasetTap(args.tap_url, args.table_name)
        if not args.quiet:
            print("exporting from {input} to {output}".format(input=args.input, output=args.output))
    if args.task == "file":
        if args.input[0] == "@":
            inputs = open(args.input[1:]).readlines()
            dataset = vaex.open_many(inputs)
        else:
            dataset = vaex.open(args.input)
        if not args.quiet:
            print("exporting from {input} to {output}".format(input=args.input, output=args.output))

    if dataset is None and args.task not in ["csv"]:
        if not args.quiet:
            print("Cannot open input")
        return 1
    if dataset:
        dataset.set_active_fraction(args.fraction)
    if args.list:
        if not args.quiet:
            print("columns names: " + " ".join(dataset.get_column_names()))
    else:
        if args.task == "csv":
            row_count = -1  # the header does not count
            with file(args.input) as lines:
github vaexio / vaex / benchmark / examples.py View on Github external
import numpy as np
import vaex as vx
import numexpr as ne
import vaex.multithreading as mt
import timeit
import math
import vaex.execution
import threading
lock = threading.Lock()
import sys

pool = mt.pool
ds = vx.open("data/Aq-A-2-999-shuffled-10percent.hdf5") if len(sys.argv) == 1 else vx.open(sys.argv[1])
x = ds("x")
xlim = x.minmax()
data = ds.columns["x"]
print len(data), len(data)/4, len(data)%4, math.ceil(float(len(data))/pool.nthreads)
splits = 10
buf_size = int(1e7)
buf = np.zeros((pool.nthreads, len(data)/pool.nthreads+10), dtype=np.float64)
print buf.shape
import concurrent.futures
import theano.tensor as T
from theano import function
x = T.dvector('x')
z = eval("x**2")
func = function([x], z)
def case_a():
	#executor = concurrent.futures.ThreadPoolExecutor(max_workers=max_workers)