How to use the bigml.util.cast function in bigml

To help you get started, we’ve selected a few bigml examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bigmlcom / python / bigml / logistic.py View on Github external
are not being used in the model

        """

        # Checks and cleans input_data leaving the fields used in the model
        unused_fields = []
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        if full:
            input_data, unused_fields = new_data
        else:
            input_data = new_data

        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        # When operating_point is used, we need the probabilities
        # of all possible classes to decide, so se use
        # the `predict_probability` method
        if operating_point:
            return self.predict_operating( \
                input_data, operating_point=operating_point)
        if operating_kind:
            return self.predict_operating_kind( \
                input_data, operating_kind=operating_kind)

        # In case that missing_numerics is False, checks that all numeric
        # fields are present in input data.
        if not self.missing_numerics:
            check_no_missing_numerics(input_data, self.model_fields,
                                      self.weight_field)
github bigmlcom / python / bigml / ensemble.py View on Github external
- unused_fields: list of fields in the input data that
                                       are not being used in the model
        """

        # Checks and cleans input_data leaving the fields used in the model
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        unused_fields = None
        if full:
            input_data, unused_fields = new_data
        else:
            input_data = new_data

        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        if median and method is None:
            # predictions with median are only available with old combiners
            method = PLURALITY_CODE

        if method is None and operating_point is None and \
            operating_kind is None and not median:
            # operating_point has precedence over operating_kind. If no
            # combiner is set, default operating kind is "probability"
            operating_kind = "probability"

        if operating_point:
            if self.regression:
                raise ValueError("The operating_point argument can only be"
                                 " used in classifications.")
            prediction = self.predict_operating( \
github bigmlcom / bigmler / bigmler / export / out_model / pythonlr.py View on Github external
from bigml.util import cast, NUMERIC
from bigml.predicate import TM_FULL_TERM, TM_ALL
from bigml.cluster import parse_terms, parse_items
from bigml.logistic import LogisticRegression, balance_input
from bigml.modelfields import get_unique_terms

CONSTANTS = """
EXPANSION_ATTRIBUTES = {"categorical": "categories", "text": "tag_cloud",
                        "items": "items"}
TM_FULL_TERM = %s
TM_ALL = %s
NUMERIC = %s

""" % (repr(TM_FULL_TERM), repr(TM_ALL), repr(NUMERIC))

FUNCTIONS = [cast, parse_terms, parse_items, get_unique_terms,
             balance_input]

CLASS_DEFINITION = """

class BasicLR(object):

    def __init__(self, lr_dict):
        self.__dict__ = lr_dict

"""

CLASS_METHODS = ["predict", "filter_input_data", "category_probability",
                 "get_unique_terms", "get_coefficients", "normalize"]


def summary_example(field):
github bigmlcom / python / bigml / fusion.py View on Github external
# Checks and cleans input_data leaving the fields used in the model
        unused_fields = []
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        if full:
            input_data, unused_fields = new_data
        else:
            input_data = new_data

        if not self.missing_numerics:
            check_no_missing_numerics(input_data, self.model_fields)

        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        full_prediction = self._predict( \
            input_data, missing_strategy=missing_strategy,
            operating_point=operating_point,
            unused_fields=unused_fields)
        if full:
            return dict((key, value) for key, value in \
                full_prediction.iteritems() if value is not None)

        return full_prediction['prediction']
github bigmlcom / python / bigml / linear.py View on Github external
- prediction: the prediction value
                  - unused_fields: list of fields in the input data that
                                   are not being used in the model

        """

        # Checks and cleans input_data leaving the fields used in the model
        unused_fields = []
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        if full:
            new_data, unused_fields = new_data

        # Strips affixes for numeric values and casts to the final field type
        cast(new_data, self.fields)

        # In case that the training data has no missings, input data shouldn't
        check_no_training_missings(new_data, self.model_fields,
                                   self.weight_field,
                                   self.objective_id)

        # Computes text and categorical field expansion
        unique_terms = self.get_unique_terms(new_data)

        # Creates an input vector with the values for all expanded fields.
        input_array = self.expand_input(new_data, unique_terms)
        compact_input_array = self.expand_input(new_data, unique_terms, True)

        prediction = dot([flatten(self.coefficients)], [input_array])[0][0]

        result = {
github bigmlcom / python / bigml / deepnet.py View on Github external
- probability: prediction's probability
                  - unused_fields: list of fields in the input data that
                                   are not being used in the model
        """

        # Checks and cleans input_data leaving the fields used in the model
        unused_fields = []
        new_data = self.filter_input_data( \
            input_data, add_unused_fields=full)
        if full:
            input_data, unused_fields = new_data
        else:
            input_data = new_data

        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        # When operating_point is used, we need the probabilities
        # of all possible classes to decide, so se use
        # the `predict_probability` method
        if operating_point:
            if self.regression:
                raise ValueError("The operating_point argument can only be"
                                 " used in classifications.")
            return self.predict_operating( \
                input_data, operating_point=operating_point)
        if operating_kind:
            if self.regression:
                raise ValueError("The operating_point argument can only be"
                                 " used in classifications.")
            return self.predict_operating_kind( \
                input_data, operating_kind=operating_kind)
github bigmlcom / python / bigml / model.py View on Github external
in the predicted node
                  - unused_fields: list of fields in the input data that
                                   are not being used in the model
        """

        # Checks and cleans input_data leaving the fields used in the model
        unused_fields = []
        new_data = self.filter_input_data( \
            input_data,
            add_unused_fields=full)
        if full:
            input_data, unused_fields = new_data
        else:
            input_data = new_data
        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        full_prediction = self._predict( \
            input_data, missing_strategy=missing_strategy,
            operating_point=operating_point, operating_kind=operating_kind,
            unused_fields=unused_fields)
        if full:
            return dict((key, value) for key, value in \
                full_prediction.iteritems() if value is not None)

        return full_prediction['prediction']
github bigmlcom / python / bigml / anomaly.py View on Github external
To produce an anomaly score, we evaluate each tree in the iforest
            for its depth result (see the depth method in the AnomalyTree
            object for details). We find the average of these depths
            to produce an `observed_mean_depth`. We calculate an
            `expected_mean_depth` using the `sample_size` and `mean_depth`
            parameters which come as part of the forest message.
            We combine those values as seen below, which should result in a
            value between 0 and 1.

        """

        # Checks and cleans input_data leaving the fields used in the model
        input_data = self.filter_input_data(input_data)

        # Strips affixes for numeric values and casts to the final field type
        cast(input_data, self.fields)

        depth_sum = 0
        if self.iforest is None:
            raise Exception("We could not find the iforest information to "
                            "compute the anomaly score. Please, rebuild your "
                            "Anomaly object from a complete anomaly detector "
                            "resource.")
        for tree in self.iforest:
            depth_sum += tree.depth(input_data)[0]
        observed_mean_depth = float(depth_sum) / len(self.iforest)
        return math.pow(2, - observed_mean_depth / self.expected_mean_depth)