How to use the vaex.utils function in vaex

To help you get started, we’ve selected a few vaex examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vaexio / vaex / packages / vaex-core / vaex / datasets.py View on Github external
__author__ = 'maartenbreddels'
import sys
import vaex.utils
import vaex as vx
import os
# data_dir = "/tmp/vaex/data"
data_dir = vaex.utils.get_private_dir("data")

try:
    from urllib import urlretrieve  # py2
except ImportError:
    from urllib.request import urlretrieve  # py3


def _url_to_filename(url, replace_ext=None, subdir=None):
    if subdir:
        filename = os.path.join(data_dir, subdir, url.split("/")[-1])
    else:
        filename = os.path.join(data_dir, url.split("/")[-1])
    if replace_ext:
        dot_index = filename.rfind(".")
        filename = filename[:dot_index] + replace_ext
    return filename
github vaexio / vaex / vaex / execution.py View on Github external
if not cancelled[0]:
							block_scope = block_scopes[thread_index]
							block_scope.move(i1 + dataset._index_start, i2 + dataset._index_start)
							#with ne_lock:
							block_dict = {expression:block_scope.evaluate(expression) for expression in expressions}
							for task in task_queue:
								blocks = [block_dict[expression] for expression in task.expressions_all]
								if not cancelled[0]:
									task._results.append(task.map(thread_index, i1, i2, *blocks))
								# don't call directly, since ui's don't like being updated from a different thread
								#self.thread_mover(task.signal_progress, float(i2)/length)
#								time.sleep(0.1)

					length = len(dataset)
					#print self.thread_pool.map()
					for element in self.thread_pool.map(process, vaex.utils.subdivide(length, max_length=self.buffer_size),\
														progress=lambda p: all(self.signal_progress.emit(p)) and\
																all([all(task.signal_progress.emit(p)) for task in task_queue]),
														cancel=cancel):
						pass # just eat all element
					self._is_executing = False
			except:
				# on any error we flush the task queue
				self.signal_cancel.emit()
				logger.exception("error in task, flush task queue")
				raise
			logger.debug("executing took %r seconds" % (time.time() - t0))
			# while processing the self.task_queue, new elements will be added to it, so copy it
			logger.error("cancelled: %r", cancelled)
			if cancelled[0]:
				logger.debug("execution aborted")
				task_queue = task_queue_all
github vaexio / vaex / packages / vaex-core / vaex / kld.py View on Github external
def mutual_information(data):
    Q = vaex.utils.disjoined(data)
    P = data

    P = P / P.sum()
    Q = Q / Q.sum()
    mask = (P > 0) & (Q > 0)
    information = np.sum(P[mask] * np.log(P[mask] / Q[mask]))  # * np.sum(dx)
    return information
github vaexio / vaex / packages / vaex-core / vaex / file / cache.py View on Github external
def __init__(self, path, length, dtype=np.uint8):
        self.path = path
        self.length = length
        if not os.path.exists(path):
            with open(self.path, 'wb') as fp:
                fp.seek(self.length-1)
                fp.write(b'\00')
                fp.flush()

        self.fp = open(self.path, 'rb+')
        kwargs = {}
        if vaex.utils.osname == "windows":
            kwargs["access"] = mmap.ACCESS_WRITE
        else:
            kwargs["prot"] = mmap.PROT_WRITE
        self.mmap = mmap.mmap(self.fp.fileno(), self.length)
        self.data = np.frombuffer(self.mmap, dtype=dtype, count=self.length)
github vaexio / vaex / packages / vaex-ui / vaex / ui / qt.py View on Github external
def email(text):
    osname = platform.system().lower()
    if osname == "linux":
        text = text.replace("#", "%23")  # for some reason, # needs to be double quoted on linux, otherwise it is interpreted as comment symbol

    body = urlquote(text)

    subject = urlquote('Error report for: ' + vaex.__full_name__)
    mailto = "mailto:maartenbreddels@gmail.com?subject={subject}&body={body}".format(**locals())
    print("open:", mailto)
    vaex.utils.os_open(mailto)
github vaexio / vaex / packages / vaex-distributed / vaex / distributed / __init__.py View on Github external
def nearest(self, point, metric=None):
        point = vaex.utils.make_list(point)
        result = self.dataset.server._call_subspace("nearest", self, point=point, metric=metric)
        return self._task(result)
github vaexio / vaex / packages / vaex-core / vaex / meta.py View on Github external
else:
            output_data = dict(description=ds.description,
                               descriptions=ds.descriptions,
                               ucds=ds.ucds,
                               units={name: str(unit) for name, unit in ds.units.items()},
                               )
        if args.output == "-":
            yaml.safe_dump(output_data, sys.stdout, default_flow_style=False)  # , encoding='utf-8',  allow_unicode=True)
        else:
            vaex.utils.write_json_or_yaml(args.output, output_data)
            print("wrote %s" % args.output)
    if args.task == "import":
        if args.input == "-":
            data = yaml.load(sys.stdin)
        else:
            data = vaex.utils.read_json_or_yaml(args.input)

        ds = vaex.open(args.output)

        units = data["units"]
        ucds = data["ucds"]
        descriptions = data["descriptions"]
        if args.description:
            ds.description = args.description
        else:
            if ds.description is None or args.overwrite:
                ds.description = data["description"]
        for column_name in ds.get_column_names(strings=True):
            if column_name not in descriptions:
                print(column_name, 'missing description')
            else:
                print('>>>', column_name, descriptions[column_name])
github vaexio / vaex / packages / vaex-ml / vaex / ml / pipeline.py View on Github external
def load(self, f):
        states = vaex.utils.read_json_or_yaml(f)
        objects = [from_dict(k) for k in states]
        del self[:]  # TODOPY2: replace by .clear
        self.extend(objects)
github vaexio / vaex / packages / vaex-core / vaex / meta.py View on Github external
if args.all:
            output_data = dict(description=ds.description,
                               descriptions={name: ds.descriptions.get(name, "") for name in column_names},
                               ucds={name: ds.ucds.get(name, "") for name in column_names},
                               units={name: str(ds.units.get(name, "")) for name in column_names},  # {name:str(unit) for name, unit in ds.units.items()},
                               )
        else:
            output_data = dict(description=ds.description,
                               descriptions=ds.descriptions,
                               ucds=ds.ucds,
                               units={name: str(unit) for name, unit in ds.units.items()},
                               )
        if args.output == "-":
            yaml.safe_dump(output_data, sys.stdout, default_flow_style=False)  # , encoding='utf-8',  allow_unicode=True)
        else:
            vaex.utils.write_json_or_yaml(args.output, output_data)
            print("wrote %s" % args.output)
    if args.task == "import":
        if args.input == "-":
            data = yaml.load(sys.stdin)
        else:
            data = vaex.utils.read_json_or_yaml(args.input)

        ds = vaex.open(args.output)

        units = data["units"]
        ucds = data["ucds"]
        descriptions = data["descriptions"]
        if args.description:
            ds.description = args.description
        else:
            if ds.description is None or args.overwrite: