How to use the toolz.curried.filter function in toolz

To help you get started, we’ve selected a few toolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github rackerlabs / otter / scripts / trigger_convergence.py View on Github external
:return: Deferred fired with list of {"tenantId": .., "groupId": ..} dict
    """
    log = mock_log()
    if parsed.group:
        groups = [g.split(":") for g in parsed.group]
        return succeed(
            [{"tenantId": tid, "groupId": gid} for tid, gid in groups])
    elif parsed.all:
        d = store.get_all_valid_groups()
    elif parsed.tenant_id:
        d = get_groups_of_tenants(log, store, parsed.tenant_id)
    elif parsed.disabled_tenants:
        non_conv_tenants = conf["non-convergence-tenants"]
        d = store.get_all_valid_groups()
        d.addCallback(
            filter(lambda g: g["tenantId"] not in set(non_conv_tenants)))
        d.addCallback(list)
    elif parsed.conf_conv_tenants:
        d = get_groups_of_tenants(log, store, conf["convergence-tenants"])
    else:
        raise SystemExit("Unexpected group selection")
    return d
github jondot / mongomon / mongomon / __init__.py View on Github external
self.started_cmds.pop(event.request_id)

        duration = event.duration_micros
        if self.is_below_lwm(duration):
            return

        [cmd, q, meta] = take(3, command.items())
        self.render_cmd(cmd, duration, q)

        ents = pipe(
            traceback.extract_stack(),
            self.config.stack_preprocess,
            map(lambda rec: StackEntry(self.config.file_capture, *rec)),
            filter(lambda ent: ent.file_capture()),
            filter(
                lambda ent: len(
                    list(
                        filter(
                            lambda p: re.match(p, ent.file, re.M), self.config.ignores
                        )
                    )
                )
                == 0
            ),
            groupby(lambda ent: ent.file),
        )
        self.render_stack(ents)
github flosell / trailscraper / trailscraper / policy_generator.py View on Github external
def generate_policy(selected_records):
    """Generates a policy from a set of records"""
    statements = pipe(selected_records,
                      mapz(Record.to_statement),
                      filterz(lambda statement: statement is not None),
                      _combine_statements_by(lambda statement: statement.Resource),
                      _combine_statements_by(lambda statement: statement.Action),
                      sortedz())

    return PolicyDocument(
        Version="2012-10-17",
        Statement=statements,
    )
github blaze / blaze / blaze / compute / sparksql.py View on Github external
def compute_down(expr, data, **kwargs):
    """ Compile a blaze expression to a sparksql expression"""
    leaves = expr._leaves()

    # make sure we only have a single leaf node
    if len(leaves) != 1:
        raise ValueError('Must compile from exactly one root database')

    leaf, = leaves

    # field expressions on the database are Field instances with a record
    # measure whose immediate child is the database leaf
    tables = pipe(expr._subterms(), filter(istable(leaf)), list)

    # raise if we don't have tables in our database
    if not tables:
        raise ValueError('Expressions not referencing a table cannot be '
                         'compiled')

    # make new symbols for each table
    new_leaves = [symbol(t._name, t.dshape) for t in tables]

    # sub them in the expression
    expr = expr._subs(dict(zip(tables, new_leaves)))

    # compute using sqlalchemy
    scope = dict(zip(new_leaves, map(make_sqlalchemy_table, tables)))
    query = compute(expr, scope, return_type='native')
github rackerlabs / otter / otter / metrics.py View on Github external
def get_all_metrics(dispatcher, tenanted_groups, log, _print=False,
                    get_all_metrics_effects=get_all_metrics_effects):
    """
    Gather server data and produce metrics for all groups across all tenants
    in a region.

    :param dispatcher: An Effect dispatcher.
    :param dict tenanted_groups: Scaling Groups grouped on tenantid
    :param bool _print: Should the function print while processing?

    :return: ``list`` of `GroupMetrics` as `Deferred`
    """
    effs = get_all_metrics_effects(tenanted_groups, log, _print=_print)
    d = _perform_limited_effects(dispatcher, effs, 10)
    d.addCallback(filter(lambda x: x is not None))
    return d.addCallback(lambda x: reduce(operator.add, x, []))
github blaze / blaze / blaze / compute / pandas.py View on Github external
summary(x_sum=sum(t.x), y_count=count(t.y))

    A mapping of those names to new leaves to use in another compuation
    >>> two  # doctest: +SKIP
    {'x_sum': x_sum, 'y_count': y_count}

    A mapping of computations to do for each column
    >>> three   # doctest: +SKIP
    {'a': x_sum, 'b': (x_sum + y_count) - 1}

    In this way, ``compute_by`` is able to do simple pandas reductions using
    groups.agg(...) and then do columnwise arithmetic afterwards.
    """
    seen_names.clear()
    name_dict.clear()
    exprs = pipe(expr.values, map(Expr._traverse), concat, filter(lambda x:
        isinstance(x, Reduction)), set)
    one = summary(**dict((_name(expr), expr) for expr in exprs))

    two = dict((_name(expr), symbol(_name(expr), datashape.var * expr.dshape))
                for expr in exprs)

    d = dict((expr, two[_name(expr)]) for expr in exprs)
    three = dict((name, value._subs(d)) for name, value in zip(expr.names,
        expr.values))

    return one, two, three
github jondot / mongomon / mongomon / __init__.py View on Github external
return

        self.started_cmds.pop(event.request_id)

        duration = event.duration_micros
        if self.is_below_lwm(duration):
            return

        [cmd, q, meta] = take(3, command.items())
        self.render_cmd(cmd, duration, q)

        ents = pipe(
            traceback.extract_stack(),
            self.config.stack_preprocess,
            map(lambda rec: StackEntry(self.config.file_capture, *rec)),
            filter(lambda ent: ent.file_capture()),
            filter(
                lambda ent: len(
                    list(
                        filter(
                            lambda p: re.match(p, ent.file, re.M), self.config.ignores
                        )
                    )
                )
                == 0
            ),
            groupby(lambda ent: ent.file),
        )
        self.render_stack(ents)
github aanari / pg-materialize / pg_materialize / pg_materialize.py View on Github external
valmap(lambda val: list(val), valfilter(lambda val: val, dag))
        )

    create_views = pipe(sorted_views,
        map(lambda view: view_content[view]),
        unique,
        list
    )

    create_script = generate_script(create_views, transaction)

    refresh_prefix = 'REFRESH MATERIALIZED VIEW CONCURRENTLY '
    if transaction:
        refresh_prefix = '  ' + refresh_prefix
    refresh_views = pipe(sorted_views,
        filter(lambda view: re.search(pattern, view) and not (ignore_refresh and re.search(ignore_refresh , view))),
        map(lambda view: refresh_prefix + view + ';'),
        list
    )

    if verbose:
        print('Selecting %d Materialized Views for Refresh' % len(refresh_views))

    refresh_script = generate_script(refresh_views, transaction, "\n\n")

    if dry_run:
        print('Dry Run Option Enabled - Skipping Script Generation')
        return

    timestr = time.strftime("%Y%m%d-%H%M%S")

    serialize_script('create', timestr, create_script, output_dir, verbose)
github nubank / fklearn / src / fklearn / validation / splitters.py View on Github external
def _get_lc_folds(date_range: Union[pd.DatetimeIndex, pd.PeriodIndex],
                  date_fold_filter_fn: Callable[[DateType], pd.DataFrame],
                  test_time: pd.Series,
                  time_column: str,
                  min_samples: int) -> List[Tuple[pd.Series, pd.Series]]:
    return pipe(date_range,
                map(date_fold_filter_fn),  # iteratively filter the dates
                map(lambda df: df[time_column]),  # keep only time column
                filter(lambda s: len(s.index) > min_samples),
                lambda train: zip(train, repeat(test_time)),
                list)