Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def run(self, table):
"""
:returns:
:class:`bool`
"""
column = table.columns[self._column_name]
data = column.values()
if isinstance(column.data_type, Boolean):
return all(data)
return all(self._test(d) for d in data)
class Count(Aggregation):
"""
Count values. If no arguments are specified, this is simply a count of the
number of rows in the table. If only :code:`column_name` is specified, this
will count the number of non-null values in that column. If both
:code:`column_name` and :code:`value` are specified, then it will count
occurrences of a specific value in the specified column will be counted.
:param column_name:
A column to count values in.
:param value:
Any value to be counted, including :code:`None`.
"""
def __init__(self, column_name=None, value=default):
self._column_name = column_name
self._value = value
has_nulls = HasNulls(self._column_name).run(table)
if has_nulls:
warn_null_calculation(self, column)
def run(self, table):
column = table.columns[self._column_name]
data = column.values_without_nulls()
mean = self._mean.run(table)
return sum((n - mean) ** 2 for n in data) / len(data)
class StDev(Aggregation):
"""
Calculate the sample standard of deviation of a column containing
:class:`.Number` data.
"""
def __init__(self, column_name):
self._column_name = column_name
self._variance = Variance(column_name)
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
raise DataTypeError('StDev can only be applied to columns containing Number data.')
def validate(self, table):
"""
Perform any checks necessary to verify this aggregation can run on the
provided table without errors. This is called by
:meth:`.Table.aggregate` before :meth:`run`.
"""
pass
def run(self, table):
"""
Execute this aggregation on a given column and return the result.
"""
raise NotImplementedError()
class Summary(Aggregation):
"""
An aggregation that can apply an arbitrary function to a column.
:param column_name:
The column being summarized.
:param data_type:
The return type of this aggregation.
:param func:
A function which will be passed the column for processing.
"""
def __init__(self, column_name, data_type, func):
self._column_name = column_name
self._data_type = data_type
self._func = func
def get_aggregate_data_type(self, table):
self._value = value
def get_aggregate_data_type(self, table):
return Number()
def run(self, table):
if self._column_name is not None:
if self._value is not default:
return table.columns[self._column_name].values().count(self._value)
else:
return len(table.columns[self._column_name].values_without_nulls())
else:
return len(table.rows)
class Min(Aggregation):
"""
Calculate the minimum value in a column. May be applied to columns containing
:class:`.DateTime` or :class:`.Number` data.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
column = table.columns[self._column_name]
if (isinstance(column.data_type, Number) or
isinstance(column.data_type, Date) or
isinstance(column.data_type, DateTime)):
return column.data_type
def validate(self, table):
def validate(self, table):
column = table.columns[self._column_name]
if not (isinstance(column.data_type, Number) or
isinstance(column.data_type, Date) or
isinstance(column.data_type, DateTime)):
raise DataTypeError('Min can only be applied to columns containing DateTime orNumber data.')
def run(self, table):
column = table.columns[self._column_name]
return max(column.values_without_nulls())
class MaxPrecision(Aggregation):
"""
Calculate the most decimal places present for any value in this column.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
raise DataTypeError('MaxPrecision can only be applied to columns containing Number data.')
def run(self, table):
# No remainder
if low == high:
value = data[low - 1]
# Remainder
else:
value = (data[low - 1] + data[high - 1]) / 2
quantiles.append(value)
# Hundredth percentile is final datum
quantiles.append(data[-1])
return Quantiles(quantiles)
class Quartiles(Aggregation):
"""
The quartiles of a :class:`.Number` column based on the 25th, 50th and
75th percentiles.
"Zeroth" (min value) and "Fourth" (max value) quartiles are included for
reference and intuitive indexing.
See :class:`Percentiles` for implementation details.
This aggregation can not be applied to a :class:`.TableSet`.
"""
def __init__(self, column_name):
self._column_name = column_name
def validate(self, table):
column = table.columns[self._column_name]
if has_nulls:
warn_null_calculation(self, column)
def run(self, table):
column = table.columns[self._column_name]
data = column.values_without_nulls()
state = defaultdict(int)
for n in data:
state[n] += 1
return max(state.keys(), key=lambda x: state[x])
class IQR(Aggregation):
"""
Calculate the interquartile range of a column containing
:class:`.Number` data.
"""
def __init__(self, column_name):
self._column_name = column_name
self._percentiles = Percentiles(column_name)
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
raise DataTypeError('IQR can only be applied to columns containing Number data.')
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
raise DataTypeError('MaxPrecision can only be applied to columns containing Number data.')
def run(self, table):
column = table.columns[self._column_name]
return max_precision(column.values_without_nulls())
class Sum(Aggregation):
"""
Calculate the sum of a column containing :class:`.Number` data.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
raise DataTypeError('Sum can only be applied to columns containing Number data.')
def run(self, table):
class HasNulls(Aggregation):
"""
Returns :code:`True` if the column contains null values.
"""
def __init__(self, column_name):
self._column_name = column_name
def get_aggregate_data_type(self, table):
return Boolean()
def run(self, table):
return None in table.columns[self._column_name].values()
class Any(Aggregation):
"""
Returns :code:`True` if any value in a column passes a truth test. The
truth test may be omitted when testing :class:`.Boolean` data.
:param test:
A function that takes a value and returns `True` or `False`.
"""
def __init__(self, column_name, test=None):
self._column_name = column_name
self._test = test
def get_aggregate_data_type(self, table):
return Boolean()
def validate(self, table):
column = table.columns[self._column_name]
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
raise DataTypeError('PopulationStDev can only be applied to columns containing Number data.')
has_nulls = HasNulls(self._column_name).run(table)
if has_nulls:
warn_null_calculation(self, column)
def run(self, table):
return self._population_variance.run(table).sqrt()
class MAD(Aggregation):
"""
Calculate the `median absolute deviation `_
of a column containing :class:`.Number` data.
"""
def __init__(self, column_name):
self._column_name = column_name
self._median = Median(column_name)
def get_aggregate_data_type(self, table):
return Number()
def validate(self, table):
column = table.columns[self._column_name]
if not isinstance(column.data_type, Number):
raise DataTypeError('MAD can only be applied to columns containing Number data.')