How to use the datatable.cbind function in datatable

To help you get started, we’ve selected a few datatable examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github h2oai / datatable / tests / test_dt.py View on Github external
def test_materialize():
    DT1 = dt.Frame(A=range(12))[::2, :]
    DT2 = dt.repeat(dt.Frame(B=["red", "green", "blue"]), 2)
    DT3 = dt.Frame(C=[4, 2, 9.1, 12, 0])
    DT = dt.cbind(DT1, DT2, DT3, force=True)
    assert frame_column_rowindex(DT, 0).type == "slice"
    assert frame_column_rowindex(DT, 1).type == "arr32"
    assert frame_column_rowindex(DT, 2) is None
    DT.materialize()
    assert frame_column_rowindex(DT, 0) is None
    assert frame_column_rowindex(DT, 1) is None
    assert frame_column_rowindex(DT, 2) is None
github h2oai / datatable / tests / munging / test_cbind.py View on Github external
def join(names1, names2):
        with pytest.warns(DatatableWarning):
            return dt.cbind(dt.Frame(names=names1),
                            dt.Frame(names=names2)).names
github h2oai / datatable / tests / munging / test_cbind.py View on Github external
def test_cbind_api():
    DT1 = dt.Frame(A=[1, 2, 3])
    DT2 = dt.Frame(B=[-4, -5, None])
    DT3 = dt.Frame(X=["makes", "gonna", "make"])
    RES1 = dt.cbind(DT1, DT2, DT3)
    RES2 = dt.cbind([DT1, DT2, DT3])
    RES3 = dt.cbind((DT1, DT2, DT3))  # tuple
    RES4 = dt.cbind([DT1], [DT2, DT3])
    RES5 = dt.cbind(DT1, [DT2], DT3)
    RES6 = dt.cbind((frame for frame in [DT1, DT2, DT3]))  # generator
    assert_equals(RES1, RES2)
    assert_equals(RES1, RES3)
    assert_equals(RES1, RES4)
    assert_equals(RES1, RES5)
    assert_equals(RES1, RES6)
github h2oai / datatable / tests / test_dt_create.py View on Github external
def test_create_from_doublestar_expansion():
    DT0 = dt.Frame(A=range(3), B=["df", "qe;r", None])
    DT1 = dt.Frame(D=[7.99, -12.5, 0.1], E=[None]*3)
    DT = dt.Frame(**DT0, **DT1)
    assert_equals(DT, dt.cbind(DT0, DT1))
github h2oai / datatable / tests / munging / test_cbind.py View on Github external
def test_cbind_expanded_frame():
    DT = dt.Frame(A=[1, 2], B=['a', "E"], C=[7, 1000], D=[-3.14, 159265])
    RES = dt.cbind(*DT)
    assert_equals(DT, RES)
github WZBSocialScienceCenter / tmtoolkit / tmtoolkit / topicmod / model_io.py View on Github external
:func:`~tmtoolkit.topicmod.model_io.ldamodel_full_doc_topics` to retrieve the full document-topic
                 distribution as datatable Frame

    :param topic_word_distrib: topic-word distribution; shape KxM, where K is number of topics, M is vocabulary size
    :param vocab: vocabulary list/array of length K
    :param colname_rowindex: column name for the "row index", i.e. the column that identifies each row
    :param row_labels: format string for each row index where ``{i0}`` or ``{i1}`` are replaced by the respective
                       zero- or one-indexed topic numbers or an array with individual row labels
    :return: datatable Frame
    """
    if isinstance(row_labels, str):
        rownames = [row_labels.format(i0=i, i1=i + 1) for i in range(topic_word_distrib.shape[0])]
    else:
        rownames = row_labels

    return dt.cbind(dt.Frame({colname_rowindex: rownames}),
                    dt.Frame(topic_word_distrib, names=list(vocab)))
github WZBSocialScienceCenter / tmtoolkit / tmtoolkit / topicmod / model_io.py View on Github external
distribution as datatable Frame

    :param doc_topic_distrib: document-topic distribution; shape NxK, where N is the number of documents, K is the
                              number of topics
    :param doc_labels: list/array of length N with a string label for each document
    :param colname_rowindex: column name for the "row index", i.e. the column that identifies each row
    :param topic_labels: format string for each row index where ``{i0}`` or ``{i1}`` are replaced by the respective
                         zero- or one-indexed topic numbers or an array with individual row labels
    :return: datatable Frame
    """
    if isinstance(topic_labels, str):
        colnames = [topic_labels.format(i0=i, i1=i+1) for i in range(doc_topic_distrib.shape[1])]
    else:
        colnames = topic_labels

    return dt.cbind(dt.Frame({colname_rowindex: doc_labels}),
                    dt.Frame(doc_topic_distrib, names=list(colnames)))
github h2oai / driverlessai-recipes / transformers / mli / debiasing_lfr.py View on Github external
def transform(self, X: dt.Frame, y: np.array = None):
        from aif360.datasets import BinaryLabelDataset
        # Transformation should only occur during training when y is present
        if self.fitted and (self.label_names in X.names or y is not None):
            if self.label_names not in X.names:
                X = dt.cbind(X, dt.Frame(y))

            X_pd = X.to_pandas()
            X = dt.Frame(X_pd.fillna(X_pd.mean()))
            transformed_X: BinaryLabelDataset = self.lfr.transform(
                BinaryLabelDataset(
                    df=X.to_pandas(),
                    favorable_label=self.favorable_label,
                    unfavorable_label=self.unfavorable_label,
                    label_names=self.label_names,
                    protected_attribute_names=self.protected_attribute_names,
                )
            )

            return dt.Frame(
                transformed_X.features,
                names=[name+"_lfr" for name in transformed_X.feature_names],