How to use the catalyst.lib.labelarray.LabelArray function in catalyst

To help you get started, we’ve selected a few catalyst examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github enigmampc / catalyst / tests / test_labelarray.py View on Github external
def manual_narrow_condense_back_to_valid_size_slow(self):
        """This test is really slow so we don't want it run by default.
        """
        # tests that we don't try to create an 'int24' (which is meaningless)
        categories = self.create_categories(24, plus_one=False)
        categories.append(categories[0])
        arr = LabelArray(categories, missing_value=categories[0])
        assert_equal(arr.itemsize, 4)
        self.check_roundtrip(arr)
github enigmampc / catalyst / tests / pipeline / test_factor.py View on Github external
# Generated with:
        # data = arange(25).reshape(5, 5).transpose() % 4
        data = array([[0, 1, 2, 3, 0],
                      [1, 2, 3, 0, 1],
                      [2, 3, 0, 1, 2],
                      [3, 0, 1, 2, 3],
                      [0, 1, 2, 3, 0]], dtype=factor_dtype)

        # Generated with:
        # classifier_data = arange(25).reshape(5, 5).transpose() % 2
        classifier_data = array([[0, 1, 0, 1, 0],
                                 [1, 0, 1, 0, 1],
                                 [0, 1, 0, 1, 0],
                                 [1, 0, 1, 0, 1],
                                 [0, 1, 0, 1, 0]], dtype=int64_dtype)
        string_classifier_data = LabelArray(
            classifier_data.astype(str).astype(object),
            missing_value=None,
        )

        expected_ranks = {
            'ordinal': array(
                [[1., 1., 3., 2., 2.],
                 [1., 2., 3., 1., 2.],
                 [2., 2., 1., 1., 3.],
                 [2., 1., 1., 2., 3.],
                 [1., 1., 3., 2., 2.]]
            ),
            'average': array(
                [[1.5, 1., 3., 2., 1.5],
                 [1.5, 2., 3., 1., 1.5],
                 [2.5, 2., 1., 1., 2.5],
github enigmampc / catalyst / tests / test_labelarray.py View on Github external
check_arrays(
                self.strs == value,
                arr1d.as_int_array() == idx,
            )

        # It should be equivalent to pass the same set of categories manually.
        arr1d_explicit_categories = LabelArray(
            self.strs,
            missing_value='',
            categories=arr1d.categories,
        )
        check_arrays(arr1d, arr1d_explicit_categories)

        for shape in (9, 3), (3, 9), (3, 3, 3):
            strs2d = self.strs.reshape(shape)
            arr2d = LabelArray(strs2d, missing_value='')
            codes2d = arr2d.as_int_array()

            self.assertEqual(arr2d.shape, shape)
            check_arrays(arr2d.categories, categories)

            for idx, value in enumerate(arr2d.categories):
                check_arrays(strs2d == value, codes2d == idx)
github enigmampc / catalyst / tests / pipeline / test_adjusted_array.py View on Github external
                    lambda a: LabelArray(
                        a.astype(unicode).astype(object),
                        None,
                    )
github enigmampc / catalyst / tests / pipeline / test_classifier.py View on Github external
def test_string_eq(self, compval, labelarray_dtype):

        compval = labelarray_dtype.type(compval)

        class C(Classifier):
            dtype = categorical_dtype
            missing_value = ''
            inputs = ()
            window_length = 0

        c = C()

        # There's no significance to the values here other than that they
        # contain a mix of the comparison value and other values.
        data = LabelArray(
            np.asarray(
                [['',    'a',  'ab', 'ba'],
                 ['z',  'ab',   'a', 'ab'],
                 ['aa', 'ab',    '', 'ab'],
                 ['aa',  'a',  'ba', 'ba']],
                dtype=labelarray_dtype,
            ),
            missing_value='',
        )

        self.check_terms(
            terms={
                'eq': c.eq(compval),
            },
            expected={
                'eq': (data == compval),
github enigmampc / catalyst / catalyst / pipeline / classifiers / classifier.py View on Github external
op=vectorized_is_element,
                    opargs=(choices,),
                )
            else:
                raise TypeError(
                    "Found non-int in choices for {typename}.element_of.\n"
                    "Supplied choices were {choices}.".format(
                        typename=type(self).__name__,
                        choices=choices,
                    )
                )
        elif self.dtype == categorical_dtype:
            if only_contains((bytes, unicode), choices):
                return ArrayPredicate(
                    term=self,
                    op=LabelArray.element_of,
                    opargs=(choices,),
                )
            else:
                raise TypeError(
                    "Found non-string in choices for {typename}.element_of.\n"
                    "Supplied choices were {choices}.".format(
                        typename=type(self).__name__,
                        choices=choices,
                    )
                )
        assert False, "Unknown dtype in Classifier.element_of %s." % self.dtype
github enigmampc / catalyst / catalyst / lib / labelarray.py View on Github external
def view(self, dtype=_NotPassed, type=_NotPassed):
        if type is _NotPassed and dtype not in (_NotPassed, self.dtype):
            raise TypeError("Can't view LabelArray as another dtype.")

        # The text signature on ndarray.view makes it look like the default
        # values for dtype and type are `None`, but passing None explicitly has
        # different semantics than not passing an arg at all, so we reconstruct
        # the kwargs dict here to simulate the args not being passed at all.
        kwargs = {}
        if dtype is not _NotPassed:
            kwargs['dtype'] = dtype
        if type is not _NotPassed:
            kwargs['type'] = type
        return super(LabelArray, self).view(**kwargs)
github enigmampc / catalyst / catalyst / lib / adjusted_array.py View on Github external
return data, {}

    data_dtype = data.dtype
    if data_dtype == bool_:
        return data.astype(uint8), {'dtype': dtype(bool_)}
    elif data_dtype in FLOAT_DTYPES:
        return data.astype(float64), {'dtype': dtype(float64)}
    elif data_dtype in INT_DTYPES:
        return data.astype(int64), {'dtype': dtype(int64)}
    elif is_categorical(data_dtype):
        if not isinstance(missing_value, LabelArray.SUPPORTED_SCALAR_TYPES):
            raise TypeError(
                "Invalid missing_value for categorical array.\n"
                "Expected None, bytes or unicode. Got %r." % missing_value,
            )
        return LabelArray(data, missing_value), {}
    elif data_dtype.kind == 'M':
        try:
            outarray = data.astype('datetime64[ns]').view('int64')
            return outarray, {'dtype': datetime64ns_dtype}
        except OverflowError:
            raise ValueError(
                "AdjustedArray received a datetime array "
                "not representable as datetime64[ns].\n"
                "Min Date: %s\n"
                "Max Date: %s\n"
                % (data.min(), data.max())
            )
    else:
        raise TypeError(
            "Don't know how to construct AdjustedArray "
            "on data of type %s." % data_dtype
github enigmampc / catalyst / catalyst / lib / adjusted_array.py View on Github external
Returns
    -------
    coerced, view_kwargs : (np.ndarray, np.dtype)
    """
    if isinstance(data, LabelArray):
        return data, {}

    data_dtype = data.dtype
    if data_dtype == bool_:
        return data.astype(uint8), {'dtype': dtype(bool_)}
    elif data_dtype in FLOAT_DTYPES:
        return data.astype(float64), {'dtype': dtype(float64)}
    elif data_dtype in INT_DTYPES:
        return data.astype(int64), {'dtype': dtype(int64)}
    elif is_categorical(data_dtype):
        if not isinstance(missing_value, LabelArray.SUPPORTED_SCALAR_TYPES):
            raise TypeError(
                "Invalid missing_value for categorical array.\n"
                "Expected None, bytes or unicode. Got %r." % missing_value,
            )
        return LabelArray(data, missing_value), {}
    elif data_dtype.kind == 'M':
        try:
            outarray = data.astype('datetime64[ns]').view('int64')
            return outarray, {'dtype': datetime64ns_dtype}
        except OverflowError:
            raise ValueError(
                "AdjustedArray received a datetime array "
                "not representable as datetime64[ns].\n"
                "Min Date: %s\n"
                "Max Date: %s\n"
                % (data.min(), data.max())