Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_ref_count():
x = np.array([-1, -2, 0.5, 1.5, 4.5, 5], dtype='f8')
bins = 5
binner = vaex.superagg.BinnerScalar_float64('x', 0, 5, bins)
start_count_binner = sys.getrefcount(binner)
grid = vaex.superagg.Grid([binner])
assert sys.getrefcount(binner) == start_count_binner + 1
start_count_grid = sys.getrefcount(grid)
agg = vaex.superagg.AggCount_float64(grid)
assert sys.getrefcount(binner) == start_count_grid + 1
del agg
assert sys.getrefcount(grid) == start_count_grid
assert sys.getrefcount(binner) == start_count_binner + 1
del grid
assert sys.getrefcount(binner) == start_count_binner
def test_count_1d_ordinal():
x = np.array([-1, -2, 0, 1, 4, 6, 10], dtype='i8')
ordinal_count = 5
binner = vaex.superagg.BinnerOrdinal_int64('x', ordinal_count, 0)
binner.set_data(x)
grid = vaex.superagg.Grid([binner])
agg = vaex.superagg.AggCount_int64(grid)
agg_data = np.asarray(agg)
grid.bin([agg])
assert agg_data.tolist() == [0, 2, 1, 1, 0, 0, 1, 2]
def test_count_1d():
x = np.array([-1, -2, 0.5, 1.5, 4.5, 5], dtype='f8')
df = vaex.from_arrays(x=x)
bins = 5
binner = df._binner_scalar('x', [0, 5], bins)
grid = vaex.superagg.Grid([binner])
agg = vaex.agg.count()
grid = df._agg(agg, grid)
assert grid.tolist() == [0, 2, 1, 1, 0, 0, 1, 1]
def test_min_max_1d_ordinal():
x = np.array([-1, -1, 0, 0, 4, 6, 10], dtype='i8')
y = np.array([-1, 2, 4, 1, 9, 6, 10], dtype='i8')
ordinal_count = 5
binner = vaex.superagg.BinnerOrdinal_int64('x', ordinal_count, 0)
binner.set_data(x)
grid = vaex.superagg.Grid([binner])
agg = vaex.superagg.AggMax_int64(grid)
agg_data = np.asarray(agg)
agg_data -= 100
agg.set_data(y, 0)
grid.bin([agg])
assert agg_data.tolist() == [-100, 2, 4, -100, -100, -100, 9, 10]
grid = vaex.superagg.Grid([binner])
agg = vaex.superagg.AggMin_int64(grid)
agg_data = np.asarray(agg)
agg_data += 100
agg.set_data(y, 0)
grid.bin([agg])
assert agg_data.tolist() == [100, -1, 1, 100, 100, 100, 9, 6]
def test_count_1d_ordinal():
x = np.array([-1, -2, 0, 1, 4, 5], dtype='i8')
df = vaex.from_arrays(x=x)
bins = 5
binner = df._binner_ordinal('x', 5)
grid = vaex.superagg.Grid([binner])
agg = vaex.agg.count()
grid = df._agg(agg, grid)
assert grid.tolist() == [0, 2, 1, 1, 0, 0, 1, 1]
def test_count_1d_object():
x = np.array([-1, -1, 0, 0, 2, 6, 10], dtype='i8')
y = np.array([ 1, 1, 1, None, 1, '1', np.nan], dtype='O')
ordinal_count = 5
binner = vaex.superagg.BinnerOrdinal_int64('x', ordinal_count, 0)
binner.set_data(x)
grid = vaex.superagg.Grid([binner])
agg = vaex.superagg.AggCount_object(grid)
agg_data = np.asarray(agg)
agg.set_data(y, 0)
grid.bin([agg])
assert agg_data.tolist() == [0, 2, 1, 0, 1, 0, 0, 1]
def __init__(self, df, grid):
expressions = [binner.expression for binner in grid.binners]
Task.__init__(self, df, expressions, name="statisticNd", pre_filter=df.filtered)
self.df = df
self.parent_grid = grid
self.nthreads = self.df.executor.thread_pool.nthreads
# for each thread, we have 1 grid and a set of binners
self.grids = [vaex.superagg.Grid([binner.copy() for binner in grid.binners]) for i in range(self.nthreads)]
self.aggregations = []
# self.grids = []