How to use the agate.aggregations.Aggregation function in agate

To help you get started, we’ve selected a few agate examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github wireservice / agate / agate / aggregations.py View on Github external
def run(self, table):
        """
        :returns:
            :class:`bool`
        """
        column = table.columns[self._column_name]
        data = column.values()

        if isinstance(column.data_type, Boolean):
            return all(data)

        return all(self._test(d) for d in data)


class Count(Aggregation):
    """
    Count values. If no arguments are specified, this is simply a count of the
    number of rows in the table. If only :code:`column_name` is specified, this
    will count the number of non-null values in that column. If both
    :code:`column_name` and :code:`value` are specified, then it will count
    occurrences of a specific value in the specified column will be counted.

    :param column_name:
        A column to count values in.
    :param value:
        Any value to be counted, including :code:`None`.
    """
    def __init__(self, column_name=None, value=default):
        self._column_name = column_name
        self._value = value
github wireservice / agate / agate / aggregations.py View on Github external
has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

    def run(self, table):
        column = table.columns[self._column_name]

        data = column.values_without_nulls()
        mean = self._mean.run(table)

        return sum((n - mean) ** 2 for n in data) / len(data)


class StDev(Aggregation):
    """
    Calculate the sample standard of deviation of a column containing
    :class:`.Number` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name
        self._variance = Variance(column_name)

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('StDev can only be applied to columns containing Number data.')
github wireservice / agate / agate / aggregations.py View on Github external
def validate(self, table):
        """
        Perform any checks necessary to verify this aggregation can run on the
        provided table without errors. This is called by
        :meth:`.Table.aggregate` before :meth:`run`.
        """
        pass

    def run(self, table):
        """
        Execute this aggregation on a given column and return the result.
        """
        raise NotImplementedError()


class Summary(Aggregation):
    """
    An aggregation that can apply an arbitrary function to a column.

    :param column_name:
        The column being summarized.
    :param data_type:
        The return type of this aggregation.
    :param func:
        A function which will be passed the column for processing.
    """
    def __init__(self, column_name, data_type, func):
        self._column_name = column_name
        self._data_type = data_type
        self._func = func

    def get_aggregate_data_type(self, table):
github wireservice / agate / agate / aggregations.py View on Github external
self._value = value

    def get_aggregate_data_type(self, table):
        return Number()

    def run(self, table):
        if self._column_name is not None:
            if self._value is not default:
                return table.columns[self._column_name].values().count(self._value)
            else:
                return len(table.columns[self._column_name].values_without_nulls())
        else:
            return len(table.rows)


class Min(Aggregation):
    """
    Calculate the minimum value in a column. May be applied to columns containing
    :class:`.DateTime` or :class:`.Number` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        column = table.columns[self._column_name]

        if (isinstance(column.data_type, Number) or
        isinstance(column.data_type, Date) or
        isinstance(column.data_type, DateTime)):
            return column.data_type

    def validate(self, table):
github wireservice / agate / agate / aggregations.py View on Github external
def validate(self, table):
        column = table.columns[self._column_name]

        if not (isinstance(column.data_type, Number) or
        isinstance(column.data_type, Date) or
        isinstance(column.data_type, DateTime)):
            raise DataTypeError('Min can only be applied to columns containing DateTime orNumber data.')

    def run(self, table):
        column = table.columns[self._column_name]

        return max(column.values_without_nulls())


class MaxPrecision(Aggregation):
    """
    Calculate the most decimal places present for any value in this column.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('MaxPrecision can only be applied to columns containing Number data.')

    def run(self, table):
github wireservice / agate / agate / aggregations.py View on Github external
# No remainder
            if low == high:
                value = data[low - 1]
            # Remainder
            else:
                value = (data[low - 1] + data[high - 1]) / 2

            quantiles.append(value)

        # Hundredth percentile is final datum
        quantiles.append(data[-1])

        return Quantiles(quantiles)


class Quartiles(Aggregation):
    """
    The quartiles of a :class:`.Number` column based on the 25th, 50th and
    75th percentiles.

    "Zeroth" (min value) and "Fourth" (max value) quartiles are included for
    reference and intuitive indexing.

    See :class:`Percentiles` for implementation details.

    This aggregation can not be applied to a :class:`.TableSet`.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def validate(self, table):
        column = table.columns[self._column_name]
github wireservice / agate / agate / aggregations.py View on Github external
if has_nulls:
            warn_null_calculation(self, column)

    def run(self, table):
        column = table.columns[self._column_name]

        data = column.values_without_nulls()
        state = defaultdict(int)

        for n in data:
            state[n] += 1

        return max(state.keys(), key=lambda x: state[x])


class IQR(Aggregation):
    """
    Calculate the interquartile range of a column containing
    :class:`.Number` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name
        self._percentiles = Percentiles(column_name)

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('IQR can only be applied to columns containing Number data.')
github wireservice / agate / agate / aggregations.py View on Github external
def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('MaxPrecision can only be applied to columns containing Number data.')

    def run(self, table):
        column = table.columns[self._column_name]

        return max_precision(column.values_without_nulls())


class Sum(Aggregation):
    """
    Calculate the sum of a column containing :class:`.Number` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('Sum can only be applied to columns containing Number data.')

    def run(self, table):
github wireservice / agate / agate / aggregations.py View on Github external
class HasNulls(Aggregation):
    """
    Returns :code:`True` if the column contains null values.
    """
    def __init__(self, column_name):
        self._column_name = column_name

    def get_aggregate_data_type(self, table):
        return Boolean()

    def run(self, table):
        return None in table.columns[self._column_name].values()


class Any(Aggregation):
    """
    Returns :code:`True` if any value in a column passes a truth test. The
    truth test may be omitted when testing :class:`.Boolean` data.

    :param test:
        A function that takes a value and returns `True` or `False`.
    """
    def __init__(self, column_name, test=None):
        self._column_name = column_name
        self._test = test

    def get_aggregate_data_type(self, table):
        return Boolean()

    def validate(self, table):
        column = table.columns[self._column_name]
github wireservice / agate / agate / aggregations.py View on Github external
def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('PopulationStDev can only be applied to columns containing Number data.')

        has_nulls = HasNulls(self._column_name).run(table)

        if has_nulls:
            warn_null_calculation(self, column)

    def run(self, table):
        return self._population_variance.run(table).sqrt()


class MAD(Aggregation):
    """
    Calculate the `median absolute deviation `_
    of a column containing :class:`.Number` data.
    """
    def __init__(self, column_name):
        self._column_name = column_name
        self._median = Median(column_name)

    def get_aggregate_data_type(self, table):
        return Number()

    def validate(self, table):
        column = table.columns[self._column_name]

        if not isinstance(column.data_type, Number):
            raise DataTypeError('MAD can only be applied to columns containing Number data.')