How to use the toolz.curried.curry function in toolz

To help you get started, we’ve selected a few toolz examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github berrytj / bookends / example.py View on Github external
from funcy import count_by

def functional():
  return count_by(itemgetter('hour'),
                  map(json.loads,
                      filter(None,
                             mapcat(lambda output: output.strip().split('\n'),
                                    map(lambda date: logs[date.strftime('%Y/%m/%d')],
                                        map(lambda days_ago: today - timedelta(days=days_ago),
                                            range(1, days_of_logs + 1)))))))

print functional()


from toolz.curried import map, filter, mapcat, curry
count_by = curry(count_by)

from bookends import _

def piped():
  return (_| range(1, days_of_logs + 1)
           | map(lambda days_ago: today - timedelta(days=days_ago))
           | map(lambda date: logs[date.strftime('%Y/%m/%d')])
           | mapcat(lambda output: output.strip().split('\n'))
           | filter(None)
           | map(json.loads)
           | count_by(itemgetter('hour'))
           |_)

print piped()
github nubank / fklearn / src / fklearn / validation / splitters.py View on Github external
@curry
def stability_curve_time_in_space_splitter(train_data: pd.DataFrame,
                                           training_time_limit: DateType,
                                           space_column: str,
                                           time_column: str,
                                           freq: str = 'M',
                                           space_hold_percentage: float = 0.5,
                                           random_state: int = None,
                                           min_samples: int = 1000) -> SplitterReturnType:
    """
    Splits the data into temporal buckets given by the specified frequency.
    Training set is fixed before hold out and uses a rolling window hold out set.
    Each fold moves the hold out further into the future.
    Useful to see how model performance degrades as the training data gets more
    outdated. Folds are made so that ALL IDs in the holdout also appear in
    the training set.
github iliatimofeev / gpdvega / gpdvega / geodata.py View on Github external
@curry
def gpd_to_values(data):
    """Replace a GeoDataFrame by a data model with values.
    For ``geopandas.GeoDataFrame`` columns values are stored as Foreign Members
    of GeoJSON feature objects. For all other types uses function
    :py:func:`altair.to_values`."""
    if isinstance(data, gpd.GeoDataFrame):
        data = alt.utils.sanitize_dataframe(data)
        values = geopandas_to_dict(data)
        return {'values': json.dumps(values)}
    else:
        return alt.to_values(data)
github lmhale99 / atomman / atomman / tools / duplicates_allclose.py View on Github external
DataFrame with tolerances.

"""

from toolz.curried import curry, compose
import pandas as pd
import numpy as np

__all__ = ['duplicates_allclose']

pdapply = curry(pd.DataFrame.apply)  # pylint: disable=invalid-name
sort_values = curry(pd.DataFrame.sort_values)  # pylint: disable=invalid-name
pdall = curry(pd.DataFrame.all)  # pylint: disable=invalid-name
duplicated = curry(pd.DataFrame.duplicated)  # pylint: disable=invalid-name
diff = curry(pd.DataFrame.diff)  # pylint: disable=invalid-name
npappend = curry(np.append)  # pylint: disable=invalid-name


def sequence(*args):
    """Compose functions in order

    Args:
      args: the functions to compose

    Returns:
      composed functions

    >>> assert sequence(lambda x: x + 1, lambda x: x * 2)(3) == 8
    """
    return compose(*args[::-1])

github nubank / fklearn / src / fklearn / validation / splitters.py View on Github external
@curry
def time_and_space_learning_curve_splitter(train_data: pd.DataFrame,
                                           training_time_limit: str,
                                           space_column: str,
                                           time_column: str,
                                           freq: str = 'M',
                                           space_hold_percentage: float = 0.5,
                                           holdout_gap: timedelta = timedelta(days=0),
                                           random_state: int = None,
                                           min_samples: int = 1000) -> SplitterReturnType:
    """
    Splits the data into temporal buckets given by the specified frequency.
    Uses a fixed out-of-ID and time hold out set for every fold.
    Training size increases per fold, with more recent data being added in each fold.
    Useful for learning curve validation, that is, for seeing how hold out performance
    increases as the training size increases with more recent data.
github blaze / odo / odo / backends / url.py View on Github external
def sample_url_line_delimited(data, lines=5, encoding='utf-8', timeout=None):
    """Get a size `length` sample from an URL CSV or URL line-delimited JSON.

    Parameters
    ----------
    data : URL(CSV)
        A hosted CSV
    lines : int, optional, default ``5``
        Number of lines to read into memory
    """

    with closing(urlopen(data.url, timeout=timeout)) as r:
        raw = pipe(r, take(lines), map(bytes.strip),
                   curry(codecs.iterdecode, encoding=encoding),
                   b'\n'.decode(encoding).join)
        with tmpfile(data.filename) as fn:
            with codecs.open(fn, 'wb', encoding=encoding) as f:
                f.write(raw)
            yield fn
github altair-viz / altair / altair / vega / data.py View on Github external
@curry
def sample(data, n=None, frac=None):
    """Reduce the size of the data model by sampling without replacement."""
    _check_data_type(data)
    if isinstance(data, pd.DataFrame):
        return data.sample(n=n, frac=frac)
    elif isinstance(data, dict):
        if 'values' in data:
            values = data['values']
            n = n if n else int(frac*len(values))
            values = random.sample(values, n)
            return {'values': values}
github nubank / fklearn / src / fklearn / tuning / samplers.py View on Github external
@curry
def remove_by_feature_shuffling(log: LogType,
                                predict_fn: PredictFnType,
                                eval_fn: EvalFnType,
                                eval_data: pd.DataFrame,
                                extractor: ExtractorFnType,
                                metric_name: str,
                                max_removed_by_step: int = 50,
                                threshold: float = 0.005,
                                speed_up_by_importance: bool = False,
                                parallel: bool = False,
                                nthread: int = 1,
                                seed: int = 7) -> List[str]:

    """
        Performs feature selection based on the evaluation of the test vs the
        evaluation of the test with randomly shuffled features
github nubank / fklearn / src / fklearn / tuning / utils.py View on Github external
@curry
def get_avg_metric_from_extractor(logs: LogType, extractor: ExtractorFnType, metric_name: str) -> float:
    metric_folds = extract(logs["validator_log"], extractor)
    return metric_folds[metric_name].mean()
github lmhale99 / atomman / atomman / tools / duplicates_allclose.py View on Github external
@curry
def fduplicates(dcols, fcols, dataframe):
    """Check for mixture of exact duplicates and closeness

    Args:
      dcols: the columns to check for exact duplicates
      fcols: the columns to check for closeness, a dict with column names as
       keys and tolerances as values
      dataframe: the dataframe

    Returns:
      a dataframe of the same shape but with bool values indicating
      neighboring close values in the same column

    >>> fduplicates(
    ...     dcols=['C', 'D', 'E'],
    ...     fcols=dict(A=0.02, B=0.02),