How to use the vaex.server function in vaex

To help you get started, we’ve selected a few vaex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vaexio / vaex / packages / vaex-ui / vaex / ui / main.py View on Github external
while index < len(args):
            filename = args[index]
            filename = args[index]
            print("filename", filename)
            dataset = None
            if filename.startswith("cluster://"):
                dataset = vaex.open(filename)  # , thread_mover=self.call_in_main_thread)
            elif filename.startswith("http://") or filename.startswith("ws://"):  # TODO: thinkg about https wss
                # o = urlparse(filename)
                # assert o.scheme == "http"
                # base_path, should_be_datasets, dataset_name = o.path.rsplit("/", 2)
                # if should_be_datasets != "datasets":
                # error("expected an url in the form http://host:port/optional/part/datasets/dataset_name")
                # server = vaex.server(hostname=o.hostname, port = o.port or 80, thread_mover=self.call_in_main_thread, base_path=base_path)
                if 0:
                    server = vaex.server(filename, thread_mover=self.call_in_main_thread)
                    datasets = server.datasets()
                    names = [dataset.name for dataset in datasets]
                    index += 1
                    if index >= len(args):
                        error("expected dataset to follow url, e.g. vaex http://servername:9000 somedataset, possible dataset names: %s" % " ".join(names))
                    name = args[index]
                    if name not in names:
                        error("no such dataset '%s' at server, possible dataset names: %s" % (name, " ".join(names)))

                    found = [dataset for dataset in datasets if dataset.name == name]
                    if found:
                        dataset = found[0]
                dataset = vaex.open(filename, thread_mover=self.call_in_main_thread)
                self.add_recently_opened(filename)
                # dataset = self.open(filename)
            elif filename[0] == ":":  # not a filename, but a classname
github vaexio / vaex / packages / vaex-core / vaex / __init__.py View on Github external
"""
    import vaex
    try:
        if path in aliases:
            path = aliases[path]
        if path.startswith("http://") or path.startswith("ws://"):  # TODO: think about https and wss
            server, name = path.rsplit("/", 1)
            url = urlparse(path)
            if '?' in name:
                name = name[:name.index('?')]
            extra_args = {key: values[0] for key, values in parse_qs(url.query).items()}
            if 'token' in extra_args:
                kwargs['token'] = extra_args['token']
            if 'token_trusted' in extra_args:
                kwargs['token_trusted'] = extra_args['token_trusted']
            server = vaex.server(server, **kwargs)
            dataframe_map = server.datasets(as_dict=True)
            if name not in dataframe_map:
                raise KeyError("no such DataFrame '%s' at server, possible names: %s" % (name, " ".join(dataframe_map.keys())))
            return dataframe_map[name]
        if path.startswith("cluster"):
            import vaex.distributed
            return vaex.distributed.open(path, *args, **kwargs)
        else:
            import vaex.file
            import glob
            if isinstance(path, six.string_types):
                paths = [path]
            else:
                paths = path
            filenames = []
            for path in paths:
github vaexio / vaex / packages / vaex-core / vaex / remote.py View on Github external
self.signal_end.emit()
                # if new tasks were added as a result of this, execute them immediately
                # TODO: we may want to include infinite recursion protection
                self._is_executing = False
                if len(self.task_queue) > 0:
                    logger.debug("task queue not empty.. start over!")
                    self.execute()
        finally:
            self._is_executing = False


if __name__ == "__main__":
    import vaex
    import sys
    vaex.set_log_level_debug()
    server = vaex.server(sys.argv[1], port=int(sys.argv[2]))
    datasets = server.datasets()
    print(datasets)
    dataset = datasets[0]
    dataset = vaex.example()
    print(dataset("x").minmax())
    dataset.select("x < 0")
    print(dataset.selected_length(), len(dataset))
    print(dataset("x").selected().is_masked)
    print(dataset("x").selected().minmax())
github vaexio / vaex / examples / vaexclient.py View on Github external
__author__ = 'breddels'
import vaex as vx
import numpy as np
import pylab


server = vx.server("localhost")


list = server.list_datasets()
print list
ds = server.open(list[0])
print "length", len(ds)
subspace = ds("x", "y")
limits = subspace.limits_sigma(sigmas=3, square=True)

ds.select("z>50")
selected = subspace.selected()


print subspace.mean()
print subspace.var()
print subspace.limits_sigma()
github vaexio / vaex / packages / vaex-core / vaex / distributed.py View on Github external
verbosity = ["ERROR", "WARNING", "INFO", "DEBUG"]
    logging.getLogger("vaex").setLevel(verbosity[min(3, args.verbose)])
    quiet = args.quiet
    if args.task == "check":
        name = args.name
        clusterlist = vaex.settings.cluster.get("clusters." + name, None)
        if clusterlist is None:
            if not quiet:
                print("cluster does not exist: %s" % name)
        else:
            common = None
            for hostname in clusterlist:
                print(hostname)
                try:
                    server = vx.server(hostname)
                    datasets = server.datasets()
                except socket.error as e:
                    print("\t" + str(e))
                    if args.clean:
                        clusterlist.remove(hostname)
                else:
                    for dataset in datasets:
                        print("\t" +dataset.name)
                        #if common is None:
                    names = set([k.name for k in datasets])
                    common = names if common is None else common.union(names)
            print("Cluster: " + name + " has %d hosts connected, to connect to a dataset, use the following urls:" % (len(clusterlist))  )
            for dsname in common:
                print("\tcluster://%s/%s" % (name, dsname))
            if args.clean:
                vaex.settings.cluster.store("clusters." + name, clusterlist)
github vaexio / vaex / packages / vaex-core / vaex / distributed.py View on Github external
def open(url, thread_mover=None):
    url = urlparse(url)
    assert url.scheme in ["cluster"]
    port = url.port
    base_path = url.path
    if base_path.startswith("/"):
        base_path = base_path[1:]
    clustername = url.hostname
    clusterlist = vaex.settings.cluster.get("clusters." + clustername, None)
    if clusterlist:
        datasets = []
        for hostname in clusterlist:
            try:
                server = vx.server(hostname, thread_mover=thread_mover)
                datasets_dict = server.datasets(as_dict=True)
            except socket.error as e:
                logger.info("could not connect to %s, skipping", hostname)
            else:
                dataset = datasets_dict[base_path]
                datasets.append(dataset)
            #datasets.append(vx.server(url).datasets()[0])
        dsd=DatasetDistributed(datasets=datasets)
        return dsd
github vaexio / vaex / packages / vaex-ui / vaex / ui / main.py View on Github external
def server_connect(*ignore):
            servers = vaex.settings.main.get("servers", ["ws://localhost:9000/"])
            server = str(dialogs.choose(self, "Connect to server", "Connect to server", servers, editable=True))
            if server is None:
                return
            try:
                vaex_server = vaex.server(server, thread_mover=self.call_in_main_thread)
                datasets = vaex_server.datasets()
            except Exception as e:
                dialogs.dialog_error(self, "Error connecting", "Error connecting: %r" % e)
                return
            dataset_descriptions = ["{} ({:,} rows)".format(dataset.name, len(dataset)) for dataset in datasets]
            dataset_index = dialogs.choose(self, "Choose datasets", "Choose dataset", dataset_descriptions)
            if dataset_index is None:
                return
            dataset = datasets[dataset_index]
            self.dataset_selector.add(dataset)
            self.add_recently_opened(dataset.path)

            if server in servers:
                servers.remove(server)
            servers.insert(0, server)
            vaex.settings.main.store("servers", servers)