How to use the boltons.statsutils._StatsProperty function in boltons

To help you get started, we’ve selected a few boltons examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mahmoud / boltons / boltons / statsutils.py View on Github external
return self.variance ** 0.5
    std_dev = _StatsProperty('std_dev', _calc_std_dev)

    def _calc_median_abs_dev(self):
        """\
        Median Absolute Deviation is a robust measure of statistical
        dispersion: http://en.wikipedia.org/wiki/Median_absolute_deviation

        >>> median_abs_dev(range(97))
        24.0
        """
        global median  # defined elsewhere in this file
        sorted_vals = sorted(self.data)
        x = float(median(sorted_vals))
        return median([abs(x - v) for v in sorted_vals])
    median_abs_dev = _StatsProperty('median_abs_dev', _calc_median_abs_dev)
    mad = median_abs_dev  # convenience

    def _calc_rel_std_dev(self):
        """\
        Standard deviation divided by the absolute value of the average.

        http://en.wikipedia.org/wiki/Relative_standard_deviation

        >>> print('%1.3f' % rel_std_dev(range(97)))
        0.583
        """
        abs_mean = abs(self.mean)
        if abs_mean:
            return self.std_dev / abs_mean
        else:
            return self.default
github mahmoud / boltons / boltons / statsutils.py View on Github external
See the module docstring for more about statistical moments.

        >>> skewness(range(97))  # symmetrical around 48.0
        0.0
        >>> left_skewed = skewness(list(range(97)) + list(range(10)))
        >>> right_skewed = skewness(list(range(97)) + list(range(87, 97)))
        >>> round(left_skewed, 3), round(right_skewed, 3)
        (0.114, -0.114)
        """
        data, s_dev = self.data, self.std_dev
        if len(data) > 1 and s_dev > 0:
            return (sum(self._get_pow_diffs(3)) /
                    float((len(data) - 1) * (s_dev ** 3)))
        else:
            return self.default
    skewness = _StatsProperty('skewness', _calc_skewness)

    def _calc_kurtosis(self):
        """\
        Indicates how much data is in the tails of the distribution. The
        result is always positive, with the normal "bell-curve"
        distribution having a kurtosis of 3.

        http://en.wikipedia.org/wiki/Kurtosis

        See the module docstring for more about statistical moments.

        >>> kurtosis(range(9))
        1.99125

        With a kurtosis of 1.99125, [0, 1, 2, 3, 4, 5, 6, 7, 8] is more
        centrally distributed than the normal curve.
github mahmoud / boltons / boltons / statsutils.py View on Github external
"""
        return len(self.data)
    count = _StatsProperty('count', _calc_count)

    def _calc_mean(self):
        """
        The arithmetic mean, or "average". Sum of the values divided by
        the number of values.

        >>> mean(range(20))
        9.5
        >>> mean(list(range(19)) + [949])  # 949 is an arbitrary outlier
        56.0
        """
        return sum(self.data, 0.0) / len(self.data)
    mean = _StatsProperty('mean', _calc_mean)

    def _calc_max(self):
        """
        The maximum value present in the data.

        >>> Stats([2, 1, 3]).max
        3
        """
        if self._is_sorted:
            return self.data[-1]
        return max(self.data)
    max = _StatsProperty('max', _calc_max)

    def _calc_min(self):
        """
        The minimum value present in the data.
github mahmoud / boltons / boltons / statsutils.py View on Github external
if round(c1, precision) == 0:
            if round(beta2, precision) == 3:
                return 0  # Normal
            else:
                if beta2 < 3:
                    return 2  # Symmetric Beta
                elif beta2 > 3:
                    return 7
        elif round(c2, precision) == 0:
            return 3  # Gamma
        else:
            k = c1 ** 2 / (4 * c0 * c2)
            if k < 0:
                return 1  # Beta
        raise RuntimeError('missed a spot')
    pearson_type = _StatsProperty('pearson_type', _calc_pearson_type)

    @staticmethod
    def _get_quantile(sorted_data, q):
        data, n = sorted_data, len(sorted_data)
        idx = q / 1.0 * (n - 1)
        idx_f, idx_c = int(floor(idx)), int(ceil(idx))
        if idx_f == idx_c:
            return data[idx_f]
        return (data[idx_f] * (idx_c - idx)) + (data[idx_c] * (idx - idx_f))

    def get_quantile(self, q):
        """Get a quantile from the dataset. Quantiles are floating point
        values between ``0.0`` and ``1.0``, with ``0.0`` representing
        the minimum value in the dataset and ``1.0`` representing the
        maximum. ``0.5`` represents the median:
github mahmoud / boltons / boltons / statsutils.py View on Github external
if self._is_sorted:
            return self.data[-1]
        return max(self.data)
    max = _StatsProperty('max', _calc_max)

    def _calc_min(self):
        """
        The minimum value present in the data.

        >>> Stats([2, 1, 3]).min
        1
        """
        if self._is_sorted:
            return self.data[0]
        return min(self.data)
    min = _StatsProperty('min', _calc_min)

    def _calc_median(self):
        """
        The median is either the middle value or the average of the two
        middle values of a sample. Compared to the mean, it's generally
        more resilient to the presence of outliers in the sample.

        >>> median([2, 1, 3])
        2
        >>> median(range(97))
        48
        >>> median(list(range(96)) + [1066])  # 1066 is an arbitrary outlier
        48
        """
        return self._get_quantile(self._get_sorted_data(), 0.5)
    median = _StatsProperty('median', _calc_median)
github mahmoud / boltons / boltons / statsutils.py View on Github external
56.0
        """
        return sum(self.data, 0.0) / len(self.data)
    mean = _StatsProperty('mean', _calc_mean)

    def _calc_max(self):
        """
        The maximum value present in the data.

        >>> Stats([2, 1, 3]).max
        3
        """
        if self._is_sorted:
            return self.data[-1]
        return max(self.data)
    max = _StatsProperty('max', _calc_max)

    def _calc_min(self):
        """
        The minimum value present in the data.

        >>> Stats([2, 1, 3]).min
        1
        """
        if self._is_sorted:
            return self.data[0]
        return min(self.data)
    min = _StatsProperty('min', _calc_min)

    def _calc_median(self):
        """
        The median is either the middle value or the average of the two
github mahmoud / boltons / boltons / statsutils.py View on Github external
"""The trimean is a robust measure of central tendency, like the
        median, that takes the weighted average of the median and the
        upper and lower quartiles.

        >>> trimean([2, 1, 3])
        2.0
        >>> trimean(range(97))
        48.0
        >>> trimean(list(range(96)) + [1066])  # 1066 is an arbitrary outlier
        48.0

        """
        sorted_data = self._get_sorted_data()
        gq = lambda q: self._get_quantile(sorted_data, q)
        return (gq(0.25) + (2 * gq(0.5)) + gq(0.75)) / 4.0
    trimean = _StatsProperty('trimean', _calc_trimean)

    def _calc_variance(self):
        """\
        Variance is the average of the squares of the difference between
        each value and the mean.

        >>> variance(range(97))
        784.0
        """
        global mean  # defined elsewhere in this file
        return mean(self._get_pow_diffs(2))
    variance = _StatsProperty('variance', _calc_variance)

    def _calc_std_dev(self):
        """\
        Standard deviation. Square root of the variance.
github mahmoud / boltons / boltons / statsutils.py View on Github external
See the module docstring for more about statistical moments.

        >>> kurtosis(range(9))
        1.99125

        With a kurtosis of 1.99125, [0, 1, 2, 3, 4, 5, 6, 7, 8] is more
        centrally distributed than the normal curve.
        """
        data, s_dev = self.data, self.std_dev
        if len(data) > 1 and s_dev > 0:
            return (sum(self._get_pow_diffs(4)) /
                    float((len(data) - 1) * (s_dev ** 4)))
        else:
            return 0.0
    kurtosis = _StatsProperty('kurtosis', _calc_kurtosis)

    def _calc_pearson_type(self):
        precision = self._pearson_precision
        skewness = self.skewness
        kurtosis = self.kurtosis
        beta1 = skewness ** 2.0
        beta2 = kurtosis * 1.0

        # TODO: range checks?

        c0 = (4 * beta2) - (3 * beta1)
        c1 = skewness * (beta2 + 3)
        c2 = (2 * beta2) - (3 * beta1) - 6

        if round(c1, precision) == 0:
            if round(beta2, precision) == 3:
github mahmoud / boltons / boltons / statsutils.py View on Github external
return self._get_quantile(self._get_sorted_data(), 0.5)
    median = _StatsProperty('median', _calc_median)

    def _calc_iqr(self):
        """Inter-quartile range (IQR) is the difference between the 75th
        percentile and 25th percentile. IQR is a robust measure of
        dispersion, like standard deviation, but safer to compare
        between datasets, as it is less influenced by outliers.

        >>> iqr([1, 2, 3, 4, 5])
        2
        >>> iqr(range(1001))
        500
        """
        return self.get_quantile(0.75) - self.get_quantile(0.25)
    iqr = _StatsProperty('iqr', _calc_iqr)

    def _calc_trimean(self):
        """The trimean is a robust measure of central tendency, like the
        median, that takes the weighted average of the median and the
        upper and lower quartiles.

        >>> trimean([2, 1, 3])
        2.0
        >>> trimean(range(97))
        48.0
        >>> trimean(list(range(96)) + [1066])  # 1066 is an arbitrary outlier
        48.0

        """
        sorted_data = self._get_sorted_data()
        gq = lambda q: self._get_quantile(sorted_data, q)