Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
ip = ip or socket.gethostbyname(socket.gethostname())
self.env = env
self.application_master_container = None
self.app_id = None
self.channels = channels or []
self.conda_pars = conda_pars
try:
self.local_cluster = LocalCluster(n_workers=0, ip=ip)
except (OSError, IOError):
self.local_cluster = LocalCluster(n_workers=0, scheduler_port=0,
ip=ip)
self.packages = list(
sorted(unique((packages or []) + global_packages, key=first_word)))
self.knit = Knit(autodetect=autodetect, **kwargs)
atexit.register(self.stop)
def get_colors(palette, names):
unique_names = list(sorted(unique(names)))
n_names = len(unique_names)
palette_lookup = brewer[palette]
keys = list(palette_lookup.keys())
low, high = min(keys), max(keys)
if n_names > high:
colors = cycle(palette_lookup[high])
elif n_names < low:
colors = palette_lookup[low]
else:
colors = palette_lookup[n_names]
color_lookup = dict(zip(unique_names, colors))
return [color_lookup[n] for n in names]
def __repr__(self) -> str:
return '{}({})'.format(
self.name,
', '.join(
'{}={!r}'.format(slot, getattr(self, slot))
for slot in toolz.unique(self.__slots__ + ('nullable',))
),
def root_tables(self):
result = list(
toolz.unique(
toolz.concatv(
self.expr._root_tables(),
distinct_roots(
*toolz.concatv(
self.window._order_by, self.window._group_by
)
),
)
)
)
return result
result: list
A list of lists of keys that show which data exist on which
divisions
"""
_is_broadcastable = partial(is_broadcastable, dfs)
dfs1 = [df for df in dfs if isinstance(df, _Frame) and not _is_broadcastable(df)]
if len(dfs) == 0:
raise ValueError("dfs contains no DataFrame and Series")
if not all(df.known_divisions for df in dfs1):
raise ValueError(
"Not all divisions are known, can't align "
"partitions. Please use `set_index` "
"to set the index."
)
divisions = list(unique(merge_sorted(*[df.divisions for df in dfs1])))
if len(divisions) == 1: # single value for index
divisions = (divisions[0], divisions[0])
dfs2 = [
df.repartition(divisions, force=True) if isinstance(df, _Frame) else df
for df in dfs
]
result = list()
inds = [0 for df in dfs]
for d in divisions[:-1]:
L = list()
for i, df in enumerate(dfs2):
if isinstance(df, _Frame):
j = inds[i]
divs = df.divisions
if j < len(divs) - 1 and divs[j] == d:
def _leaves(self):
return list(unique(concat(i._leaves() for i in self.children)))
(, [('b', 0), ('b', 1)])),
('c', 1): ('b', 2),
('c', 2): ('b', 3)}
"""
if not isinstance(b, (list, tuple)):
raise ValueError('New division must be list or tuple')
b = list(b)
if len(b) < 2:
# minimum division is 2 elements, like [0, 0]
raise ValueError('New division must be longer than 2 elements')
if b != sorted(b):
raise ValueError('New division must be sorted')
if len(b[:-1]) != len(list(unique(b[:-1]))):
msg = 'New division must be unique, except for the last element'
raise ValueError(msg)
if force:
if a[0] < b[0]:
msg = ('left side of the new division must be equal or smaller '
'than old division')
raise ValueError(msg)
if a[-1] > b[-1]:
msg = ('right side of the new division must be equal or larger '
'than old division')
raise ValueError(msg)
else:
if a[0] != b[0]:
msg = 'left side of old and new divisions are different'
raise ValueError(msg)
def _from_projects(self, key, unique=False):
values = (getattr(p, key) for p in self.projects)
values = reduce(operator.add, values)
if unique:
values = toolz.unique(values)
return list(values)
Combine a list of base tuples into a single base tuple. This forms the
reducing step in a reduction tree.
``finalize(aggs)``
Given a tuple of base numpy arrays, returns the finalized ``DataArray``
or ``Dataset``.
"""
reds = list(traverse_aggregation(agg))
# List of base reductions (actually computed)
bases = list(unique(concat(r._build_bases(cuda) for r in reds)))
dshapes = [b.out_dshape(schema) for b in bases]
# List of tuples of (append, base, input columns, temps)
calls = [_get_call_tuples(b, d, schema, cuda) for (b, d) in zip(bases, dshapes)]
# List of unique column names needed
cols = list(unique(concat(pluck(2, calls))))
# List of temps needed
temps = list(pluck(3, calls))
create = make_create(bases, dshapes, cuda)
info = make_info(cols)
append = make_append(bases, cols, calls, glyph)
combine = make_combine(bases, dshapes, temps)
finalize = make_finalize(bases, agg, schema, cuda)
return create, info, append, combine, finalize
def active_columns(self):
return sorted(unique(x._name for x in self.traverse()
if isinstance(x, ScalarSymbol)))