How to use the agate.aggregations.base.Aggregation function in agate

To help you get started, we’ve selected a few agate examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / agate / agate / aggregations / percentiles.py View on Github external
#!/usr/bin/env python

import math

from agate.aggregations.base import Aggregation
from agate.aggregations.has_nulls import HasNulls
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.utils import Quantiles
from agate.warns import warn_null_calculation


class Percentiles(Aggregation):
    """
    Divide a column into 100 equal-size groups using the "CDF" method.

    See `this explanation `_
    of the various methods for computing percentiles.

    "Zeroth" (min value) and "Hundredth" (max value) percentiles are included
    for reference and intuitive indexing.

    A reference implementation was provided by
    `pycalcstats `_.

    This aggregation can not be applied to a :class:`.TableSet`.

    :param column_name:
        The name of a column containing :class:`.Number` data.
github wireservice / agate / agate / aggregations / has_nulls.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.data_types import Boolean


class HasNulls(Aggregation):
    """
    Check if the column contains null values.

    :param column_name:
        The name of the column to check.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        return Boolean()

    def run(self, table):
        return None in table.columns[self._column_name].values()
github wireservice / agate / agate / aggregations / max_precision.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.utils import max_precision


class MaxPrecision(Aggregation):
    """
    Find the most decimal places present for any value in this column.

    :param column_name:
        The name of the column to be searched.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
github wireservice / agate / agate / aggregations / sum.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.data_types import Number
from agate.exceptions import DataTypeError


class Sum(Aggregation):
    """
    Calculate the sum of a column.

    :param column_name:
        The name of a column containing :class:`.Number` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
github wireservice / agate / agate / aggregations / any.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.data_types import Boolean


class Any(Aggregation):
    """
    Check if any value in a column passes a test.

    The test may be omitted when checking :class:`.Boolean` data.

    :param column_name:
        The name of the column to check.
    :param test:
        A function that takes a value and returns `True` or `False`.
    """
    def __init__(self, column_name, test=None):
        self._column_name = column_name
        self._test = test

    def get_aggregate_data_type(self, table):
        return Boolean()
github wireservice / agate / agate / aggregations / max_length.py View on Github external
#!/usr/bin/env python

from decimal import Decimal

from agate.aggregations.base import Aggregation
from agate.data_types import Number, Text
from agate.exceptions import DataTypeError


class MaxLength(Aggregation):
    """
    Find the length of the longest string in a column.

    :param column_name:
        The name of a column containing :class:`.Text` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Text):
github wireservice / agate / agate / aggregations / min.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.data_types import Date, DateTime, Number
from agate.exceptions import DataTypeError


class Min(Aggregation):
    """
    Find the minimum value in a column.

    This aggregation can be applied to columns containing :class:`.Date`,
    :class:`.DateTime`, or :class:`.Number` data.

    :param column_name:
        The name of the column to be searched.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        column = table.columns[self._column_name]

        if (isinstance(column.data_type, Number) or
github wireservice / agate / agate / aggregations / variance.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.aggregations.has_nulls import HasNulls
from agate.aggregations.mean import Mean
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.warns import warn_null_calculation


class Variance(Aggregation):
    """
    Calculate the sample variance of a column.

    For the population variance see :class:`.PopulationVariance`.

    :param column_name:
        The name of a column containing :class:`.Number` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name
        self._mean = Mean(column_name)

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
github wireservice / agate / agate / aggregations / all.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.data_types import Boolean


class All(Aggregation):
    """
    Check if all values in a column pass a test.

    :param column_name:
        The name of the column to check.
    :param test:
        Either a single value that all values in the column are compared against
        (for equality) or a function that takes a column value and returns
        `True` or `False`.
    """
    def __init__(self, column_name, test):
        self._column_name = column_name

        if callable(test):
            self._test = test
        else:
github wireservice / agate / agate / aggregations / deciles.py View on Github external
#!/usr/bin/env python

from agate.aggregations.base import Aggregation
from agate.aggregations.has_nulls import HasNulls
from agate.aggregations.percentiles import Percentiles
from agate.data_types import Number
from agate.exceptions import DataTypeError
from agate.utils import Quantiles
from agate.warns import warn_null_calculation


class Deciles(Aggregation):
    """
    Calculate the deciles of a column based on its percentiles.

    Deciles will be equivalent to the 10th, 20th ... 90th percentiles.

    "Zeroth" (min value) and "Tenth" (max value) deciles are included for
    reference and intuitive indexing.

    See :class:`Percentiles` for implementation details.

    This aggregation can not be applied to a :class:`.TableSet`.

    :param column_name:
        The name of a column containing :class:`.Number` data.
    """
    def __init__(self, column_name):