Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
__author__ = 'maartenbreddels'
import sys
import vaex.utils
import vaex as vx
import os
# data_dir = "/tmp/vaex/data"
data_dir = vaex.utils.get_private_dir("data")
try:
from urllib import urlretrieve # py2
except ImportError:
from urllib.request import urlretrieve # py3
def _url_to_filename(url, replace_ext=None, subdir=None):
if subdir:
filename = os.path.join(data_dir, subdir, url.split("/")[-1])
else:
filename = os.path.join(data_dir, url.split("/")[-1])
if replace_ext:
dot_index = filename.rfind(".")
filename = filename[:dot_index] + replace_ext
return filename
if not cancelled[0]:
block_scope = block_scopes[thread_index]
block_scope.move(i1 + dataset._index_start, i2 + dataset._index_start)
#with ne_lock:
block_dict = {expression:block_scope.evaluate(expression) for expression in expressions}
for task in task_queue:
blocks = [block_dict[expression] for expression in task.expressions_all]
if not cancelled[0]:
task._results.append(task.map(thread_index, i1, i2, *blocks))
# don't call directly, since ui's don't like being updated from a different thread
#self.thread_mover(task.signal_progress, float(i2)/length)
# time.sleep(0.1)
length = len(dataset)
#print self.thread_pool.map()
for element in self.thread_pool.map(process, vaex.utils.subdivide(length, max_length=self.buffer_size),\
progress=lambda p: all(self.signal_progress.emit(p)) and\
all([all(task.signal_progress.emit(p)) for task in task_queue]),
cancel=cancel):
pass # just eat all element
self._is_executing = False
except:
# on any error we flush the task queue
self.signal_cancel.emit()
logger.exception("error in task, flush task queue")
raise
logger.debug("executing took %r seconds" % (time.time() - t0))
# while processing the self.task_queue, new elements will be added to it, so copy it
logger.error("cancelled: %r", cancelled)
if cancelled[0]:
logger.debug("execution aborted")
task_queue = task_queue_all
def mutual_information(data):
Q = vaex.utils.disjoined(data)
P = data
P = P / P.sum()
Q = Q / Q.sum()
mask = (P > 0) & (Q > 0)
information = np.sum(P[mask] * np.log(P[mask] / Q[mask])) # * np.sum(dx)
return information
def __init__(self, path, length, dtype=np.uint8):
self.path = path
self.length = length
if not os.path.exists(path):
with open(self.path, 'wb') as fp:
fp.seek(self.length-1)
fp.write(b'\00')
fp.flush()
self.fp = open(self.path, 'rb+')
kwargs = {}
if vaex.utils.osname == "windows":
kwargs["access"] = mmap.ACCESS_WRITE
else:
kwargs["prot"] = mmap.PROT_WRITE
self.mmap = mmap.mmap(self.fp.fileno(), self.length)
self.data = np.frombuffer(self.mmap, dtype=dtype, count=self.length)
def email(text):
osname = platform.system().lower()
if osname == "linux":
text = text.replace("#", "%23") # for some reason, # needs to be double quoted on linux, otherwise it is interpreted as comment symbol
body = urlquote(text)
subject = urlquote('Error report for: ' + vaex.__full_name__)
mailto = "mailto:maartenbreddels@gmail.com?subject={subject}&body={body}".format(**locals())
print("open:", mailto)
vaex.utils.os_open(mailto)
def nearest(self, point, metric=None):
point = vaex.utils.make_list(point)
result = self.dataset.server._call_subspace("nearest", self, point=point, metric=metric)
return self._task(result)
else:
output_data = dict(description=ds.description,
descriptions=ds.descriptions,
ucds=ds.ucds,
units={name: str(unit) for name, unit in ds.units.items()},
)
if args.output == "-":
yaml.safe_dump(output_data, sys.stdout, default_flow_style=False) # , encoding='utf-8', allow_unicode=True)
else:
vaex.utils.write_json_or_yaml(args.output, output_data)
print("wrote %s" % args.output)
if args.task == "import":
if args.input == "-":
data = yaml.load(sys.stdin)
else:
data = vaex.utils.read_json_or_yaml(args.input)
ds = vaex.open(args.output)
units = data["units"]
ucds = data["ucds"]
descriptions = data["descriptions"]
if args.description:
ds.description = args.description
else:
if ds.description is None or args.overwrite:
ds.description = data["description"]
for column_name in ds.get_column_names(strings=True):
if column_name not in descriptions:
print(column_name, 'missing description')
else:
print('>>>', column_name, descriptions[column_name])
def load(self, f):
states = vaex.utils.read_json_or_yaml(f)
objects = [from_dict(k) for k in states]
del self[:] # TODOPY2: replace by .clear
self.extend(objects)
if args.all:
output_data = dict(description=ds.description,
descriptions={name: ds.descriptions.get(name, "") for name in column_names},
ucds={name: ds.ucds.get(name, "") for name in column_names},
units={name: str(ds.units.get(name, "")) for name in column_names}, # {name:str(unit) for name, unit in ds.units.items()},
)
else:
output_data = dict(description=ds.description,
descriptions=ds.descriptions,
ucds=ds.ucds,
units={name: str(unit) for name, unit in ds.units.items()},
)
if args.output == "-":
yaml.safe_dump(output_data, sys.stdout, default_flow_style=False) # , encoding='utf-8', allow_unicode=True)
else:
vaex.utils.write_json_or_yaml(args.output, output_data)
print("wrote %s" % args.output)
if args.task == "import":
if args.input == "-":
data = yaml.load(sys.stdin)
else:
data = vaex.utils.read_json_or_yaml(args.input)
ds = vaex.open(args.output)
units = data["units"]
ucds = data["ucds"]
descriptions = data["descriptions"]
if args.description:
ds.description = args.description
else:
if ds.description is None or args.overwrite: