Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def compute_up(expr, c, **kwargs):
(chunkleaf, chunkexpr), (aggleaf, aggexpr) = split(expr._child, expr)
# Put each chunk into a list, then concatenate
intermediate = list(concat(into([], compute(chunkexpr, {chunkleaf: chunk}))
for chunk in c))
return compute(aggexpr, {aggleaf: intermediate})
def selection(table, predicate):
subexpr = common_subexpression(table, predicate)
if not builtins.all(isinstance(node, (ElemWise, Symbol))
or node.isidentical(subexpr)
for node in concat([path(predicate, subexpr),
path(table, subexpr)])):
raise ValueError("Selection not properly matched with table:\n"
"child: %s\n"
"apply: %s\n"
"predicate: %s" % (subexpr, table, predicate))
if not isboolean(predicate.dshape):
raise TypeError("Must select over a boolean predicate. Got:\n"
"%s[%s]" % (table, predicate))
return table._subs({subexpr: Selection(subexpr, predicate)})
Appends the ``i``th row of the table to the ``(x, y)`` bin, given the
base arrays and columns in ``aggs_and_cols``. This does the bulk of the
work.
``combine(base_tuples)``
Combine a list of base tuples into a single base tuple. This forms the
reducing step in a reduction tree.
``finalize(aggs)``
Given a tuple of base numpy arrays, returns the finalized ``DataArray``
or ``Dataset``.
"""
reds = list(traverse_aggregation(agg))
# List of base reductions (actually computed)
bases = list(unique(concat(r._build_bases(cuda) for r in reds)))
dshapes = [b.out_dshape(schema) for b in bases]
# List of tuples of (append, base, input columns, temps)
calls = [_get_call_tuples(b, d, schema, cuda) for (b, d) in zip(bases, dshapes)]
# List of unique column names needed
cols = list(unique(concat(pluck(2, calls))))
# List of temps needed
temps = list(pluck(3, calls))
create = make_create(bases, dshapes, cuda)
info = make_info(cols)
append = make_append(bases, cols, calls, glyph)
combine = make_combine(bases, dshapes, temps)
finalize = make_finalize(bases, agg, schema, cuda)
return create, info, append, combine, finalize
def _flatten_subclass_tree(cls):
"""Return the set of all child classes of `cls`.
Parameters
----------
cls : Type
Returns
-------
frozenset[Type]
"""
subclasses = frozenset(cls.__subclasses__())
children = frozenset(toolz.concat(map(_flatten_subclass_tree, subclasses)))
return frozenset({cls}) | subclasses | children
def inputs(self):
return tuple(unique(concat(v.inputs for v in self.values)))
def _check_dsk(dsk):
""" Check that graph is well named and non-overlapping """
if not isinstance(dsk, HighLevelGraph):
return
assert all(isinstance(k, (tuple, str)) for k in dsk.layers)
freqs = frequencies(concat(dsk.dicts.values()))
non_one = {k: v for k, v in freqs.items() if v != 1}
assert not non_one, non_one
def learn_word_vocab(self, sentences):
# type: (Encoder, Iterable[str]) -> Dict[str, int]
""" Build vocab from self.word_vocab_size most common tokens in provided sentences """
word_counts = Counter(word for word in toolz.concat(map(self.word_tokenizer, sentences)))
for token in set(self.required_tokens or []):
word_counts[token] = int(2**63)
sorted_word_counts = sorted(word_counts.items(), key=lambda p: -p[1])
return {word: idx for idx, (word, count) in enumerate(sorted_word_counts[:self.word_vocab_size])}
def split_at_breaks(array, breaks, axis=0):
""" Split an array into a list of arrays (using slices) at the given breaks
>>> split_at_breaks(np.arange(6), [3, 5])
[array([0, 1, 2]), array([3, 4]), array([5])]
"""
padded_breaks = concat([[None], breaks, [None]])
slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)]
preslice = (slice(None),) * axis
split_array = [array[preslice + (s,)] for s in slices]
return split_array
columns = (i.contig, i.start - 1, i.end, i.id, i.strand)
# BED files are tab-delimited
delimiter = '\t'
# 1. fetch interval tuples from the database (producer)
# 2. stringify each item in each subsequence (interval tuple)
# 3. join lines on tab-character
# 4. prepend the header
bed_lines = pipe(
fetch_records(chanjo_db, columns),
map(map(str)), # convert fields to strings
map(juxt(compose(list, take(4)), # keep first 4 fields
lambda _: [str(bed_score)], # insert BED score
compose(list, last))), # keep last field
map(concat), # flatten each item
map(delimiter.join) # join on \t
)
for bed_line in bed_lines:
yield bed_line
multiple years can go in the same file.
Parameters
----------
fname : str
File name for HDFStore. Will be opened in append mode and closed
at the end of this function.
models : list of str
Models from which to gather injected tables for saving.
year : int or None
If an integer, used as a prefix along with table names for
labeling DataFrames in the HDFStore.
"""
models = (get_model(m) for m in toolz.unique(models))
table_names = toolz.unique(toolz.concat(m._tables_used() for m in models))
tables = (get_table(t) for t in table_names)
key_template = '{}/{{}}'.format(year) if year is not None else '{}'
with pd.get_store(fname, mode='a') as store:
for t in tables:
store[key_template.format(t.name)] = t.to_frame()