Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def setUp(self):
self.dataset = dataset.DatasetArrays("dataset")
self.x = x = np.arange(10)
self.y = y = x ** 2
self.dataset.add_column("x", x)
self.dataset.add_column("y", y)
self.dataset.set_variable("t", 1.)
self.dataset.add_virtual_column("z", "x+t*y")
self.dataset.units["x"] = astropy.units.Unit("km")
self.dataset.units["y"] = astropy.units.Unit("km/s")
self.dataset.units["t"] = astropy.units.Unit("s")
self.dataset.add_column("f", np.arange(len(self.dataset), dtype=np.float64))
self.dataset.ucds["x"] = "some;ucd"
#self.jobsManager = dataset.JobsManager()
x = np.array([0., 1])
def test_selection(self):
total = self.dataset("x").sum()
self.dataset.select("x > 5")
total_subset = self.dataset("x").selected().sum()
self.assertLess(total_subset, total)
for mode in vaex.dataset._select_functions.keys():
self.dataset.select("x > 5")
self.dataset.select("x > 5", mode)
self.dataset.select(None)
self.dataset.select("x > 5", mode)
pass # TODO
def concat(*types):
arrays = [np.arange(3, dtype=dtype) for dtype in types]
N = len(arrays)
datasets = [vx.dataset.DatasetArrays("dataset-%i" % i) for i in range(N)]
for dataset, array in zip(datasets, arrays):
dataset.add_column("x", array)
dataset_concat = vx.dataset.DatasetConcatenated(datasets, name="dataset_concat")
return dataset_concat
def create_filtered():
ds = create_base_ds()
ds.select('(x >= 0) & (x < 10)', name=vaex.dataset.FILTER_SELECTION_NAME)
return ds
# if property == "Units":
# if value:
# try:
# unit = astropy.units.Unit(value)
# logger.debug("setting unit to: %s (%s)" % (value, unit))
# self.dataset.units[column_name] = unit
# # TODO: move to dataset class
# self.dataset.signal_column_changed.emit(self.dataset, column_name, "change")
# except Exception, e:
# dialogs.dialog_error(None, "Cannot parse unit", "Cannot parse unit:\n %s" % e)
# else:
# if column_name in self.dataset.units:
# del self.dataset.units[column_name]
if property == "Expression":
try:
test = eval(value, vaex.dataset.expression_namespace, self.dataset.variables)
self.dataset.add_variable(variable_name, value)
except Exception as e:
dialogs.dialog_error(None, "Invalid expression", "Invalid expression: %s" % e)
# although it may not be a valid expression, still set it to the user can edit it
# self.dataset.virtual_columns[column_name] = value
self.dataset.write_meta()
return True
z = pd.Series(dataset.columns["z"], index=index)
f = pd.DataFrame({"x": x, "y":y, "z":z})
print((f.x.mean()))
print((f.y.mean()))
print((f.z.mean()))
meminfo()
#y = pd.Series(dataset.columns["x"])
if __name__ == "__main__":
input = sys.argv[1]
#output = sys.argv[2]
dataset_in = vaex.dataset.load_file(input)
test_pandas(dataset_in)
def _update_image(self):
with self.output:
grid = self.get_grid()
if self.smooth_pre:
for i in range(grid.shape[0]):
grid[i] = vaex.grids.gf(grid[i], self.smooth_pre)
f = vaex.dataset._parse_f(self.f)
fgrid = f(grid)
if self.smooth_post:
for i in range(grid.shape[0]):
fgrid[i] = vaex.grids.gf(fgrid[i], self.smooth_post)
ngrid, fmin, fmax = self.normalise(fgrid)
print(ngrid.shape)
if len(ngrid.shape) == 4:
#if ngrid.shape[0] == 1:
ngrid = ngrid[-1]
p3.volshow(ngrid.T, controls=self._first_time)
vx, vy, vz = self.vgrids[:3]
vcount = self.vcount
if vx is not None and vy is not None and vz is not None and vcount is not None:
vcount = vcount[-1] # no multivolume render, just take the last selection
vx = vx[-1]
chunks = int(math.ceil(len(dataset)/float(chunk_size)))
for i in range(chunks):
i1 = i * chunk_size
i2 = min(len(dataset), (i+1) * chunk_size)
data_big_endian = column[i1:i2].astype(">" + numpy_type_name)
f.write(data_big_endian)
print((f.tell(), f.tell() / 1024**2, "mb", len(dataset)))
assert i2 == len(dataset)
finish_data()
if __name__ == "__main__":
input = sys.argv[1]
output = sys.argv[2]
dataset_in = vaex.dataset.load_file(input)
write_colfits(dataset_in, output)
return self.dataset.server._call_subspace("correlation", self, means=means, vars=vars)
def var(self, means=None):
return self.dataset.server._call_subspace("var", self, means=means)
def sum(self):
return self.dataset.server._call_subspace("sum", self)
def limits_sigma(self, sigmas=3, square=False):
return self.dataset.server._call_subspace("limits_sigma", self, sigmas=sigmas, square=square)
def mutual_information(self, limits=None, size=256):
return self.dataset.server._call_subspace("mutual_information", self, limits=limits, size=size)
class DatasetDistributed(vaex.dataset.Dataset):
def __init__(self, datasets):
super(DatasetDistributed, self).__init__(datasets[0].name, datasets[0].column_names)
self.datasets = datasets
self.executor = ServerExecutor()
# self.name = self.datasets[0].name
# self.column_names = self.datasets[0].column_names
self.dtypes = self.datasets[0].dtypes
self.units = self.datasets[0].units
self.virtual_columns.update(self.datasets[0].units)
self.ucds = self.datasets[0].ucds
self.descriptions = self.datasets[0].descriptions
self.description = self.datasets[0].description
self._length_original = self.datasets[0].length_original()
self._length_unfiltered = self.datasets[0].length_unfiltered()
self.path = self.datasets[0].path # may we should use some cluster name oroso
parts = np.linspace(0, self._length_original, len(self.datasets)+1, dtype=int)
def correlation(self, means=None, vars=None):
return self.dataset.server._call_subspace("correlation", self, means=means, vars=vars)
def var(self, means=None):
return self.dataset.server._call_subspace("var", self, means=means)
def sum(self):
return self.dataset.server._call_subspace("sum", self)
def limits_sigma(self, sigmas=3, square=False):
return self.dataset.server._call_subspace("limits_sigma", self, sigmas=sigmas, square=square)
def mutual_information(self, limits=None, size=256):
return self.dataset.server._call_subspace("mutual_information", self, limits=limits, size=size)
class DatasetDistributed(vaex.dataset.Dataset):
def __init__(self, datasets):
super(DatasetDistributed, self).__init__(datasets[0].name, datasets[0].column_names)
self.datasets = datasets
self.executor = ServerExecutor()
#self.name = self.datasets[0].name
#self.column_names = self.datasets[0].column_names
self.dtypes = self.datasets[0].dtypes
self.units = self.datasets[0].units
self.virtual_columns.update(self.datasets[0].units)
self.ucds = self.datasets[0].ucds
self.descriptions = self.datasets[0].descriptions
self.description = self.datasets[0].description
self._full_length = self.datasets[0].full_length()
self._length = self._full_length
self.path = self.datasets[0].path # may we should use some cluster name oroso
parts = np.linspace(0, self._length, len(self.datasets)+1, dtype=int)