How to use the h2o.utils.typechecks.is_type function in h2o

To help you get started, we’ve selected a few h2o examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / h2o-3 / h2o-py / h2o / automl / autoh2o.py View on Github external
validation_frame = H2OFrame._validate(validation_frame, 'validation_frame')
        self.input_spec['validation_frame'] = validation_frame.frame_id if validation_frame is not None else None

        leaderboard_frame = H2OFrame._validate(leaderboard_frame, 'leaderboard_frame')
        self.input_spec['leaderboard_frame'] = leaderboard_frame.frame_id if leaderboard_frame is not None else None

        blending_frame = H2OFrame._validate(blending_frame, 'blending_frame')
        self.input_spec['blending_frame'] = blending_frame.frame_id if blending_frame is not None else None

        if x is not None:
            assert_is_type(x, list)
            xset = set()
            if is_type(x, int, str): x = [x]
            for xi in x:
                if is_type(xi, int):
                    if not (-ncols <= xi < ncols):
                        raise H2OValueError("Column %d does not exist in the training frame" % xi)
                    xset.add(names[xi])
                else:
                    if xi not in names:
                        raise H2OValueError("Column %s not in the training frame" % xi)
                    xset.add(xi)
            ignored_columns = set(names) - xset
            for col in [y, fold_column, weights_column]:
                if col is not None and col in ignored_columns:
                    ignored_columns.remove(col)
            if ignored_columns is not None:
                self.input_spec['ignored_columns'] = list(ignored_columns)


        def clean_params(params):
github h2oai / h2o-3 / h2o-py / h2o / model / metrics_base.py View on Github external
:param str metric: A metric among :const:`maximizing_metrics`.
        :param thresholds: thresholds parameter must be a number or a list (i.e. [0.01, 0.5, 0.99]).
            If None, then the threshold maximizing the metric will be used.
            If 'all', then all stored thresholds are used and returned with the matching metric.
        :returns: The set of metrics for the list of thresholds.
            The returned list has a 'value' property holding only
            the metric value (if no threshold provided or if provided as a number),
            or all the metric values (if thresholds provided as a list)
        """
        assert_is_type(thresholds, None, 'all', numeric, [numeric])
        if metric not in H2OBinomialModelMetrics.maximizing_metrics:
            raise ValueError("The only allowable metrics are {}".format(', '.join(H2OBinomialModelMetrics.maximizing_metrics)))

        h2o_metric = (H2OBinomialModelMetrics.metrics_aliases[metric] if metric in H2OBinomialModelMetrics.metrics_aliases
                      else metric)
        value_is_scalar = is_type(metric, str) and (thresholds is None or is_type(thresholds, numeric))
        if thresholds is None:
            thresholds = [self.find_threshold_by_max_metric(h2o_metric)]
        elif thresholds == 'all':
            thresholds = None
        elif is_type(thresholds, numeric):
            thresholds = [thresholds]

        metrics = List()
        thresh2d = self._metric_json['thresholds_and_metric_scores']
        if thresholds is None:  # fast path to return all thresholds: skipping find_idx logic
            metrics.extend(list(t) for t in zip(thresh2d['threshold'], thresh2d[h2o_metric]))
        else:
            for t in thresholds:
                idx = self.find_idx_by_threshold(t)
                metrics.append([t, thresh2d[h2o_metric][idx]])
github h2oai / h2o-3 / h2o-py / h2o / grid / grid_search.py View on Github external
def __init__(self, model, hyper_params, grid_id=None, search_criteria=None):
        super(H2OGridSearch, self).__init__()
        assert_is_type(model, None, H2OEstimator, lambda mdl: issubclass(mdl, H2OEstimator))
        assert_is_type(hyper_params, dict)
        assert_is_type(grid_id, None, str)
        assert_is_type(search_criteria, None, dict)
        if not (model is None or is_type(model, H2OEstimator)): model = model()
        self._id = grid_id
        self.model = model
        self.hyper_params = dict(hyper_params)
        self.search_criteria = None if search_criteria is None else dict(search_criteria)
        self._grid_json = None
        self.models = None  # list of H2O Estimator instances
        self._parms = {}  # internal, for object recycle #
        self.parms = {}  # external#
        self._future = False  # used by __repr__/show to query job state#
        self._job = None  # used when _future is True#
github h2oai / h2o-3 / h2o-py / h2o / group_by.py View on Github external
def _add_agg(self, op, col, na):
        if op == "nrow": col = 0
        if col is None:
            for i in range(self._fr.ncol):
                if i not in self._by: self._add_agg(op, i, na)
            return self
        elif is_type(col, str):
            cidx = self._fr.names.index(col)
        elif is_type(col, int):
            cidx = col
        elif is_type(col, list, tuple):
            for i in col:
                self._add_agg(op, i, na)
            return self
        else:
            raise ValueError("col must be a column name or index.")
        name = "{}_{}".format(op, self._fr.names[cidx])
        self._aggs[name] = [op, cidx, na]
        return self
github h2oai / h2o-3 / h2o-py / h2o / grid / grid_search.py View on Github external
tframe = algo_params["training_frame"]
        if tframe is None: raise ValueError("Missing training_frame")
        if y is not None:
            if is_type(y, list, tuple):
                if len(y) == 1:
                    parms["y"] = y[0]
                else:
                    raise ValueError('y must be a single column reference')
        if x is None:
            if(isinstance(y, int)):
                xset = set(range(training_frame.ncols)) - {y}
            else:
                xset = set(training_frame.names) - {y}
        else:
            xset = set()
            if is_type(x, int, str): x = [x]
            for xi in x:
                if is_type(xi, int):
                    if not (-training_frame.ncols <= xi < training_frame.ncols):
                        raise H2OValueError("Column %d does not exist in the training frame" % xi)
                    xset.add(training_frame.names[xi])
                else:
                    if xi not in training_frame.names:
                        raise H2OValueError("Column %s not in the training frame" % xi)
                    xset.add(xi)
        x = list(xset)
        parms["x"] = x
        self.build_model(parms)
github h2oai / h2o-3 / h2o-py / h2o / estimators / estimator_base.py View on Github external
is_unsupervised = is_auto_encoder or self.algo in {"aggregator", "pca", "svd", "kmeans", "glrm", "word2vec"}
        if is_auto_encoder and y is not None: raise ValueError("y should not be specified for autoencoder.")
        if not is_unsupervised and y is None: raise ValueError("Missing response")

        # Step 3
        if not training_frame_exists:
            parms["training_frame"] = training_frame
            offset = parms["offset_column"]
            folds = parms["fold_column"]
            weights = parms["weights_column"]

        if validation_frame is not None: parms["validation_frame"] = validation_frame
        if is_type(y, int): y = training_frame.names[y]
        if y is not None: parms["response_column"] = y
        if not isinstance(x, (list, tuple)): x = [x]
        if is_type(x[0], int):
            x = [training_frame.names[i] for i in x]
        if not training_frame_exists:
            ignored_columns = list(set(training_frame.names) - set(x + [y, offset, folds, weights]))
            parms["ignored_columns"] = None if ignored_columns == [] else [quoted(col) for col in ignored_columns]
        parms["interactions"] = (None if "interactions" not in parms or parms["interactions"] is None else
                                 [quoted(col) for col in parms["interactions"]])
        parms["interaction_pairs"] = (None if "interaction_pairs" not in parms or parms["interaction_pairs"] is None else
                                 [tuple(map(quoted, ip)) for ip in parms["interaction_pairs"]])

        parms = {k: H2OEstimator._keyify_if_h2oframe(parms[k]) for k in parms}
        rest_ver = parms.pop("_rest_version") if "_rest_version" in parms else 3

        model_builder_json = h2o.api("POST /%d/ModelBuilders/%s" % (rest_ver, self.algo), data=parms)
        model = H2OJob(model_builder_json, job_type=(self.algo + " Model Build"))

        if self._future:
github h2oai / h2o-3 / h2o-py / h2o / estimators / estimator_base.py View on Github external
if x is not None:
                    raise H2OValueError("Properties x and ignored_columns cannot be specified simultaneously")
                for ic in ignored_columns:
                    if is_type(ic, int):
                        if not (-ncols <= ic < ncols):
                            raise H2OValueError("Column %d does not exist in the training frame" % ic)
                        ignored_columns_set.add(names[ic])
                    else:
                        if ic not in names:
                            raise H2OValueError("Column %s not in the training frame" % ic)
                        ignored_columns_set.add(ic)
            if x is None:
                xset = set(names) - {y} - ignored_columns_set
            else:
                xset = set()
                if is_type(x, int, str): x = [x]
                for xi in x:
                    if is_type(xi, int):
                        if not (-ncols <= xi < ncols):
                            raise H2OValueError("Column %d does not exist in the training frame" % xi)
                        xset.add(names[xi])
                    else:
                        if xi not in names:
                            raise H2OValueError("Column %s not in the training frame" % xi)
                        xset.add(xi)
            x = list(xset)

            parms["offset_column"] = offset_column
            parms["fold_column"] = fold_column
            parms["weights_column"] = weights_column
            parms["max_runtime_secs"] = max_runtime_secs
github h2oai / h2o-3 / h2o-py / h2o / grid / grid_search.py View on Github external
if y is not None:
            if is_type(y, list, tuple):
                if len(y) == 1:
                    parms["y"] = y[0]
                else:
                    raise ValueError('y must be a single column reference')
        if x is None:
            if(isinstance(y, int)):
                xset = set(range(training_frame.ncols)) - {y}
            else:
                xset = set(training_frame.names) - {y}
        else:
            xset = set()
            if is_type(x, int, str): x = [x]
            for xi in x:
                if is_type(xi, int):
                    if not (-training_frame.ncols <= xi < training_frame.ncols):
                        raise H2OValueError("Column %d does not exist in the training frame" % xi)
                    xset.add(training_frame.names[xi])
                else:
                    if xi not in training_frame.names:
                        raise H2OValueError("Column %s not in the training frame" % xi)
                    xset.add(xi)
        x = list(xset)
        parms["x"] = x
        self.build_model(parms)
github h2oai / h2o-3 / h2o-py / h2o / estimators / estimator_base.py View on Github external
for ic in ignored_columns:
                    if is_type(ic, int):
                        if not (-ncols <= ic < ncols):
                            raise H2OValueError("Column %d does not exist in the training frame" % ic)
                        ignored_columns_set.add(names[ic])
                    else:
                        if ic not in names:
                            raise H2OValueError("Column %s not in the training frame" % ic)
                        ignored_columns_set.add(ic)
            if x is None:
                xset = set(names) - {y} - ignored_columns_set
            else:
                xset = set()
                if is_type(x, int, str): x = [x]
                for xi in x:
                    if is_type(xi, int):
                        if not (-ncols <= xi < ncols):
                            raise H2OValueError("Column %d does not exist in the training frame" % xi)
                        xset.add(names[xi])
                    else:
                        if xi not in names:
                            raise H2OValueError("Column %s not in the training frame" % xi)
                        xset.add(xi)
            x = list(xset)

            parms["offset_column"] = offset_column
            parms["fold_column"] = fold_column
            parms["weights_column"] = weights_column
            parms["max_runtime_secs"] = max_runtime_secs

        # Overwrites the model_id parameter only if model_id is passed
        if model_id is not None:
github h2oai / h2o-3 / h2o-py / h2o / group_by.py View on Github external
def _add_agg(self, op, col, na):
        if op == "nrow": col = 0
        if col is None:
            for i in range(self._fr.ncol):
                if i not in self._by: self._add_agg(op, i, na)
            return self
        elif is_type(col, str):
            cidx = self._fr.names.index(col)
        elif is_type(col, int):
            cidx = col
        elif is_type(col, list, tuple):
            for i in col:
                self._add_agg(op, i, na)
            return self
        else:
            raise ValueError("col must be a column name or index.")
        name = "{}_{}".format(op, self._fr.names[cidx])
        self._aggs[name] = [op, cidx, na]
        return self