How to use the datatable.Frame function in datatable

To help you get started, we’ve selected a few datatable examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / datatable / tests / models / test_ftrl.py View on Github external
ft = Ftrl(alpha = 0.1, nepochs = 10000, model_type = "binomial")
    df_train_odd = dt.Frame([[1, 3, 7, 5, 9]])
    df_target_odd = dt.Frame([["odd", "odd", "odd", "odd", "odd"]])
    ft.fit(df_train_odd, df_target_odd)
    assert_equals(ft.labels, dt.Frame([["odd"], [0]], names = ["label", "id"]))

    df_train_wrong = dt.Frame([[2, 4, None, 6]])
    df_target_wrong = dt.Frame([["even", "even", "none", "even"]])
    with pytest.raises(ValueError) as e:
        ft.fit(df_train_wrong, df_target_wrong)
    assert ("Got two new labels in the target column, however, positive "
            "label is already set"
            == str(e.value))

    df_train_even_odd = dt.Frame([[2, 1, 8, 3]])
    df_target_even_odd = dt.Frame([["even", "odd", "even", "odd"]])
    ft.fit(df_train_even_odd, df_target_even_odd)
    assert_equals(ft.labels, dt.Frame([["even", "odd"], [1, 0]], names = ["label", "id"]))

    p = ft.predict(df_train_odd)
    p_dict = p.to_dict()
    delta_odd = [abs(i - j) for i, j in zip(p_dict["odd"], [1, 1, 1, 1, 1])]
    delta_even = [abs(i - j) for i, j in zip(p_dict["even"], [0, 0, 0, 0, 0])]
    assert ft.model_type_trained == "binomial"
    assert max(delta_odd) < epsilon
    assert max(delta_even) < epsilon

    p = ft.predict(df_train_even_odd)
    p_dict = p.to_dict()
    delta_even = [abs(i - j) for i, j in zip(p_dict["even"], [1, 0, 1, 0])]
    delta_odd = [abs(i - j) for i, j in zip(p_dict["odd"], [0, 1, 0, 1])]
    assert ft.model_type_trained == "binomial"
github h2oai / datatable / tests / test_dt.py View on Github external
def test_to_dict():
    d0 = dt.Frame(A=["purple", "yellow", "indigo", "crimson"],
                  B=[0, None, 123779, -299],
                  C=[1.23, 4.56, 7.89, 10.11])
    assert d0.to_dict() == {"A": ["purple", "yellow", "indigo", "crimson"],
                            "B": [0, None, 123779, -299],
                            "C": [1.23, 4.56, 7.89, 10.11]}
github h2oai / datatable / tests / test_dt.py View on Github external
def dt0():
    return dt.Frame([
        [2, 7, 0, 0],
        [True, False, False, True],
        [1, 1, 1, 1],
        [0.1, 2, -4, 4.4],
        [None, None, None, None],
        [0, 0, 0, 0],
        ["1", "2", "hello", "world"],
    ], names=list("ABCDEFG"))
github h2oai / datatable / tests / extras / test_aggregate.py View on Github external
def test_aggregate_0d_continuous_integer_random():
    n_bins = 3  # `nrows < min_rows`, so we also test that this input is ignored
    min_rows = 500
    d_in = dt.Frame([None, 9, 8, None, 2, 3, 3, 0, 5, 5, 8, 1, None])
    d_members = aggregate(d_in, min_rows=min_rows, n_bins=n_bins,
                          progress_fn=report_progress)
    d_members.internal.check()
    assert d_members.shape == (13, 1)
    assert d_members.ltypes == (ltype.int,)
    assert d_members.to_list() == [[0, 12, 10, 1, 5, 6, 7, 3, 8, 9, 11, 4, 2]]
    d_in.internal.check()
    assert d_in.shape == (13, 2)
    assert d_in.ltypes == (ltype.int, ltype.int)
    assert d_in.to_list() == [[None, None, None, 0, 1, 2, 3, 3, 5, 5, 8, 8, 9],
                              [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
github h2oai / datatable / tests / test-groups.py View on Github external
def test_groupby_on_view():
    # See issue #1542
    DT = dt.Frame(A=[1, 2, 3, 1, 2, 3],
                  B=[3, 6, 2, 4, 3, 1],
                  C=['b', 'd', 'b', 'b', 'd', 'b'])
    V = DT[f.A != 1, :]
    assert isview(V)
    assert_equals(V, dt.Frame(A=[2, 3, 2, 3],
                              B=[6, 2, 3, 1],
                              C=['d', 'b', 'd', 'b']))
    RES = V[:, max(f.B), by(f.C)]
    assert_equals(RES, dt.Frame(C=['b', 'd'], B=[2, 6]))
github h2oai / datatable / tests / munging / test_cbind.py View on Github external
def test_cbind_error_1():
    DT = dt.Frame(A=[1, 5])
    with pytest.raises(ValueError) as e:
        DT.cbind(dt.Frame(B=[]))
    assert ("Cannot cbind frame with 0 rows to a frame with 2 rows"
            in str(e.value))
github WZBSocialScienceCenter / tmtoolkit / tmtoolkit / topicmod / model_io.py View on Github external
distribution as datatable Frame

    :param topic_word_distrib: topic-word distribution; shape KxM, where K is number of topics, M is vocabulary size
    :param vocab: vocabulary list/array of length K
    :param colname_rowindex: column name for the "row index", i.e. the column that identifies each row
    :param row_labels: format string for each row index where ``{i0}`` or ``{i1}`` are replaced by the respective
                       zero- or one-indexed topic numbers or an array with individual row labels
    :return: datatable Frame
    """
    if isinstance(row_labels, str):
        rownames = [row_labels.format(i0=i, i1=i + 1) for i in range(topic_word_distrib.shape[0])]
    else:
        rownames = row_labels

    return dt.cbind(dt.Frame({colname_rowindex: rownames}),
                    dt.Frame(topic_word_distrib, names=list(vocab)))
github h2oai / datatable / datatable / graph / __init__.py View on Github external
"combine with rows / groupby argument.")

        assert not delete_mode
        if update_mode:
            assert grbynode is None
            allrows = isinstance(rowsnode, AllRFNode)
            # Without `materialize`, when an update is applied to a view,
            # `rowsnode.execute()` will merge the rowindex implied by
            # `rowsnode` with its parent's rowindex. This will cause the
            # parent's data to be updated, which is wrong.
            dt.materialize()
            if isinstance(replacement, (int, float, str, type(None))):
                replacement = datatable.Frame([replacement])
                if allrows:
                    replacement = datatable.repeat(replacement, dt.nrows)
            elif isinstance(replacement, datatable.Frame):
                pass
            elif isinstance(replacement, BaseExpr):
                _col = replacement.evaluate_eager(ee)
                _colset = core.columns_from_columns([_col])
                replacement = _colset.to_frame(None)
            else:
                replacement = datatable.Frame(replacement)
            rowsnode.execute()
            colsnode.execute_update(dt, replacement)
            return

        rowsnode.execute()
        if grbynode:
            grbynode.execute(ee)

        colsnode.execute()
github h2oai / driverlessai-recipes / transformers / string / user_agent_transformer.py View on Github external
def transform(self, X: dt.Frame):
        ua_column_names = ['ua', 'user-agent', 'user_agent', 'useragent']
        col_name = X.names[0]
        if col_name in ua_column_names:
            newnames = ("browser", "os", "device", "is_mobile", "is_tablet")
            Y = X[col_name].to_list()[0]
            Z = dt.Frame([get_ua_info(x) for x in Y], names=[f"{col_name}_{s}" for s in newnames])
            X.cbind(Z)
            return X
        else:
            return X.to_pandas().iloc[:, 0]
github h2oai / driverlessai-recipes / transformers / augmentation / usairportcode_origin_dest_geo_features.py View on Github external
b = 12742 * dt.math.arcsin(dt.math.sqrt(a))  # 2*R*asin...
            all_dt.cbind(all_dt[:, {"distance_km": b}])

            self._output_feature_names = self._output_feature_names + ["{}.{}".format(self.transformer_name, f) for f in
                                          ['elevation_diff', 'lat_diff', 'long_diff', 'distance_km']]
            self._feature_desc = self._feature_desc + [
                                  'Elevation difference between Origin and Destination',
                                  'Latitude difference between Origin and Destination',
                                  'Longitude difference between Origin and Destination',
                                  'Distance in km between Origin and Destination (Harvestine approx.)']
        elif (isOrigin and not isDest):
            all_dt = X_origin
        elif (isDest and not isOrigin):
            all_dt = X_dest
        else:
            all_dt = dt.Frame(np.zeros((X.shape[0], 1)), names = ["dummy"])
            self._output_feature_names = ["dummy"]
            self._feature_desc = ["dummy"]

        return all_dt