How to use the toolz.groupby function in toolz

To help you get started, we’ve selected a few toolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pytoolz / toolz / bench / test_groupby.py View on Github external
def test_groupby():
    groupby(identity, data)
github mesosphere / dcos-commons / tools / diagnostics / service_bundle.py View on Github external
def download_log_files(self):
        all_tasks = self.scheduler_tasks + self.tasks()

        tasks_by_agent_id = dict(groupby("slave_id", all_tasks))

        agent_id_by_task_id = dict(map(lambda task: (task["id"], task["slave_id"]), all_tasks))

        agent_executor_paths = {}
        for agent_id in tasks_by_agent_id.keys():
            agent_executor_paths[agent_id] = agent.debug_agent_files(agent_id)

        task_executor_sandbox_paths = {}
        for agent_id, tasks in tasks_by_agent_id.items():
            for task in tasks:
                task_executor_sandbox_path = sdk_diag._find_matching_executor_path(
                    agent_executor_paths[agent_id], sdk_diag._TaskEntry(task)
                )
                if task_executor_sandbox_path:
                    task_executor_sandbox_paths[task["id"]] = task_executor_sandbox_path
                else:
github dask / dask / dask / dataframe / io / parquet / core.py View on Github external
def apply_conjunction(parts, statistics, conjunction):
        for column, operator, value in conjunction:
            out_parts = []
            out_statistics = []
            for part, stats in zip(parts, statistics):
                if "filter" in stats and stats["filter"]:
                    continue  # Filtered by engine
                try:
                    c = toolz.groupby("name", stats["columns"])[column][0]
                    min = c["min"]
                    max = c["max"]
                except KeyError:
                    out_parts.append(part)
                    out_statistics.append(stats)
                else:
                    if (
                        operator == "=="
                        and min <= value <= max
                        or operator == "<"
                        and min < value
                        or operator == "<="
                        and min <= value
                        or operator == ">"
                        and max > value
                        or operator == ">="
github logpy / logpy / kanren / core.py View on Github external
def earlyorder(*goals):
    """ Reorder goals to avoid EarlyGoalErrors

    All goals are evaluated.  Those that raise EarlyGoalErrors are placed at
    the end in a lall

    See also:
        EarlyGoalError
    """
    if not goals:
        return ()
    groups = groupby(earlysafe, goals)
    good = groups.get(True, [])
    bad = groups.get(False, [])

    if not good:
        raise EarlyGoalError()
    elif not bad:
        return tuple(good)
    else:
        return tuple(good) + ((lall, ) + tuple(bad), )
github timothyrenner / bigfoot-dash-app / app.py View on Github external
def bigfoot_map(sightings):
    classifications = groupby('classification', sightings)
    return {
        "data": [
                {
                    "type": "scattermapbox",
                    "lat": listpluck("latitude", class_sightings),
                    "lon": listpluck("longitude", class_sightings),
                    "text": listpluck("title", class_sightings),
                    "mode": "markers",
                    "name": classification,
                    "marker": {
                        "size": 3,
                        "opacity": 1.0
                    }
                }
                for classification, class_sightings in classifications.items()
            ],
github dask / dask / dask / bag / core.py View on Github external
def partition(grouper, sequence, npartitions, p, nelements=2 ** 20):
    """ Partition a bag along a grouper, store partitions on disk. """
    for block in partition_all(nelements, sequence):
        d = groupby(grouper, block)
        d2 = defaultdict(list)
        for k, v in d.items():
            d2[abs(hash(k)) % npartitions].extend(v)
        p.append(d2, fsync=True)
    return p
github apache / arrow / dev / tasks / crossbow.py View on Github external
# jira category => website category mapping
        categories = {
            'New Feature': 'feature',
            'Improvement': 'feature',
            'Wish': 'feature',
            'Task': 'feature',
            'Test': 'bug',
            'Bug': 'bug',
            'Sub-task': 'feature'
        }
        titles = {
            'feature': 'New Features and Improvements',
            'bugfix': 'Bug Fixes'
        }

        issues_by_category = toolz.groupby(
            lambda issue: categories[issue.fields.issuetype.name],
            self.issues
        )

        out = StringIO()

        for category in ('feature', 'bug'):
            title = titles[category]
            issues = issues_by_category[category]
            issues.sort(key=lambda x: x.key)

            out.write(md('## {}\n\n', title))
            for issue in issues:
                link = md('[{0}]({1}/browse/{0})', issue.key, self.server)
                out.write(md('* {} - {}\n', link, issue.fields.summary))
            out.write('\n')
github dask / dask / dask / base.py View on Github external
def collections_to_dsk(collections, optimize_graph=True, **kwargs):
    """
    Convert many collections into a single dask graph, after optimization
    """
    optimizations = kwargs.pop("optimizations", None) or config.get("optimizations", [])

    if optimize_graph:
        groups = groupby(optimization_function, collections)
        groups = {opt: _extract_graph_and_keys(val) for opt, val in groups.items()}

        for opt in optimizations:
            groups = {k: (opt(dsk, keys), keys) for k, (dsk, keys) in groups.items()}

        dsk = merge(
            *map(
                ensure_dict,
                [opt(dsk, keys, **kwargs) for opt, (dsk, keys) in groups.items()],
            )
        )
    else:
        dsk, _ = _extract_graph_and_keys(collections)

    return dsk
github enigmampc / catalyst / catalyst / pipeline / engine.py View on Github external
Returns
        -------
        results : dict
            Dictionary mapping requested results to outputs.
        """
        self._validate_compute_chunk_params(dates, assets, initial_workspace)
        get_loader = self.get_loader

        # Copy the supplied initial workspace so we don't mutate it in place.
        workspace = initial_workspace.copy()

        # If loadable terms share the same loader and extra_rows, load them all
        # together.
        loader_group_key = juxt(get_loader, getitem(graph.extra_rows))
        loader_groups = groupby(loader_group_key, graph.loadable_terms)

        refcounts = graph.initial_refcounts(workspace)

        for term in graph.execution_order(refcounts):
            # `term` may have been supplied in `initial_workspace`, and in the
            # future we may pre-compute loadable terms coming from the same
            # dataset.  In either case, we will already have an entry for this
            # term, which we shouldn't re-compute.
            if term in workspace:
                continue

            # Asset labels are always the same, but date labels vary by how
            # many extra rows are needed.
            mask, mask_dates = graph.mask_and_dates_for_term(
                term,
                self._root_mask_term,