How to use the toolz.concat function in toolz

To help you get started, we’ve selected a few toolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github blaze / blaze / blaze / compute / chunks.py View on Github external
def compute_up(expr, c, **kwargs):
    (chunkleaf, chunkexpr), (aggleaf, aggexpr) = split(expr._child, expr)

    # Put each chunk into a list, then concatenate
    intermediate = list(concat(into([], compute(chunkexpr, {chunkleaf: chunk}))
                          for chunk in c))

    return compute(aggexpr, {aggleaf: intermediate})
github blaze / blaze / blaze / expr / expressions.py View on Github external
def selection(table, predicate):
    subexpr = common_subexpression(table, predicate)

    if not builtins.all(isinstance(node, (ElemWise, Symbol))
                        or node.isidentical(subexpr)
           for node in concat([path(predicate, subexpr),
                               path(table, subexpr)])):

        raise ValueError("Selection not properly matched with table:\n"
                   "child: %s\n"
                   "apply: %s\n"
                   "predicate: %s" % (subexpr, table, predicate))

    if not isboolean(predicate.dshape):
        raise TypeError("Must select over a boolean predicate.  Got:\n"
                        "%s[%s]" % (table, predicate))

    return table._subs({subexpr: Selection(subexpr, predicate)})
github holoviz / datashader / datashader / compiler.py View on Github external
Appends the ``i``th row of the table to the ``(x, y)`` bin, given the
        base arrays and columns in ``aggs_and_cols``. This does the bulk of the
        work.

    ``combine(base_tuples)``
        Combine a list of base tuples into a single base tuple. This forms the
        reducing step in a reduction tree.

    ``finalize(aggs)``
        Given a tuple of base numpy arrays, returns the finalized ``DataArray``
        or ``Dataset``.
    """
    reds = list(traverse_aggregation(agg))

    # List of base reductions (actually computed)
    bases = list(unique(concat(r._build_bases(cuda) for r in reds)))
    dshapes = [b.out_dshape(schema) for b in bases]
    # List of tuples of (append, base, input columns, temps)
    calls = [_get_call_tuples(b, d, schema, cuda) for (b, d) in zip(bases, dshapes)]
    # List of unique column names needed
    cols = list(unique(concat(pluck(2, calls))))
    # List of temps needed
    temps = list(pluck(3, calls))

    create = make_create(bases, dshapes, cuda)
    info = make_info(cols)
    append = make_append(bases, cols, calls, glyph)
    combine = make_combine(bases, dshapes, temps)
    finalize = make_finalize(bases, agg, schema, cuda)

    return create, info, append, combine, finalize
github ibis-project / ibis / ibis / pandas / api.py View on Github external
def _flatten_subclass_tree(cls):
    """Return the set of all child classes of `cls`.

    Parameters
    ----------
    cls : Type

    Returns
    -------
    frozenset[Type]
    """
    subclasses = frozenset(cls.__subclasses__())
    children = frozenset(toolz.concat(map(_flatten_subclass_tree, subclasses)))
    return frozenset({cls}) | subclasses | children
github holoviz / datashader / datashader / reductions.py View on Github external
def inputs(self):
        return tuple(unique(concat(v.inputs for v in self.values)))
github dask / dask / dask / array / utils.py View on Github external
def _check_dsk(dsk):
    """ Check that graph is well named and non-overlapping """
    if not isinstance(dsk, HighLevelGraph):
        return

    assert all(isinstance(k, (tuple, str)) for k in dsk.layers)
    freqs = frequencies(concat(dsk.dicts.values()))
    non_one = {k: v for k, v in freqs.items() if v != 1}
    assert not non_one, non_one
github soaxelbrooke / python-bpe / bpe / encoder.py View on Github external
def learn_word_vocab(self, sentences):
        # type: (Encoder, Iterable[str]) -> Dict[str, int]
        """ Build vocab from self.word_vocab_size most common tokens in provided sentences """
        word_counts = Counter(word for word in toolz.concat(map(self.word_tokenizer, sentences)))
        for token in set(self.required_tokens or []):
            word_counts[token] = int(2**63)
        sorted_word_counts = sorted(word_counts.items(), key=lambda p: -p[1])
        return {word: idx for idx, (word, count) in enumerate(sorted_word_counts[:self.word_vocab_size])}
github dask / dask / dask / array / routines.py View on Github external
def split_at_breaks(array, breaks, axis=0):
    """ Split an array into a list of arrays (using slices) at the given breaks

    >>> split_at_breaks(np.arange(6), [3, 5])
    [array([0, 1, 2]), array([3, 4]), array([5])]
    """
    padded_breaks = concat([[None], breaks, [None]])
    slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)]
    preslice = (slice(None),) * axis
    split_array = [array[preslice + (s,)] for s in slices]
    return split_array
github Clinical-Genomics / chanjo / chanjo / exporter / core.py View on Github external
columns = (i.contig, i.start - 1, i.end, i.id, i.strand)

  # BED files are tab-delimited
  delimiter = '\t'

  # 1. fetch interval tuples from the database (producer)
  # 2. stringify each item in each subsequence (interval tuple)
  # 3. join lines on tab-character
  # 4. prepend the header
  bed_lines = pipe(
    fetch_records(chanjo_db, columns),
    map(map(str)),                        # convert fields to strings
    map(juxt(compose(list, take(4)),      # keep first 4 fields
             lambda _: [str(bed_score)],  # insert BED score
             compose(list, last))),       # keep last field
    map(concat),                          # flatten each item
    map(delimiter.join)                   # join on \t
  )

  for bed_line in bed_lines:
    yield bed_line
github UDST / urbansim / urbansim / sim / simulation.py View on Github external
multiple years can go in the same file.

    Parameters
    ----------
    fname : str
        File name for HDFStore. Will be opened in append mode and closed
        at the end of this function.
    models : list of str
        Models from which to gather injected tables for saving.
    year : int or None
        If an integer, used as a prefix along with table names for
        labeling DataFrames in the HDFStore.

    """
    models = (get_model(m) for m in toolz.unique(models))
    table_names = toolz.unique(toolz.concat(m._tables_used() for m in models))
    tables = (get_table(t) for t in table_names)

    key_template = '{}/{{}}'.format(year) if year is not None else '{}'

    with pd.get_store(fname, mode='a') as store:
        for t in tables:
            store[key_template.format(t.name)] = t.to_frame()