How to use the featuretools.encode_features function in featuretools

To help you get started, we’ve selected a few featuretools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Featuretools / DL-DB / tests / test_atm.py View on Github external
def score_model_baseline(fm, labels, fl, hyperparams):
    baseline_fm = (fm.reset_index('customer_id', drop=False)
                     .drop_duplicates('customer_id', keep='last')
                     .set_index('customer_id'))
    baseline_fm, baseline_fl = ft.encode_features(baseline_fm, fl)
    baseline_fm, baseline_fl = remove_low_information_features(baseline_fm, baseline_fl)

    hyperparams = parse_hyperparams_baseline(hyperparams)
    print("HYPERPARAMS:", hyperparams)
    cv_score = []
    n_splits = 5
    splitter = StratifiedKFold(n_splits=n_splits, shuffle=True)
    for train_index, test_index in splitter.split(labels, labels):
        baseline_train_labels = labels.iloc[train_index]
        baseline_test_labels = labels.iloc[test_index]
        baseline_train_fm = baseline_fm.loc[baseline_train_labels.index, :]
        baseline_test_fm = baseline_fm.loc[baseline_test_labels.index, :]

        score = score_baseline_pipeline(baseline_train_fm, baseline_train_labels,
                                        baseline_test_fm, baseline_test_labels,
                                        **hyperparams)
github HDI-Project / MLBlocks / mlblocks / components / functions / multitable / dfs.py View on Github external
def produce(self, X, **kwargs):
        feature_matrix = ft.calculate_feature_matrix(self.__features, cutoff_time=X, **kwargs)
        fm_encoded, features_encoded = ft.encode_features(feature_matrix, self.__features)
        fm_encoded = fm_encoded.fillna(0)
        return fm_encoded
github HDI-Project / MLBlocks / mlblocks / primitives / custom / preprocessors / multitable / dfs.py View on Github external
def produce(self, X, **kwargs):
        feature_matrix = ft.calculate_feature_matrix(
            self.features, cutoff_time=X, **kwargs)

        fm_encoded, features_encoded = ft.encode_features(
            feature_matrix, self.features)

        return fm_encoded.fillna(0)
github HDI-Project / Trane / trane / utils / featuretools_wrapper.py View on Github external
cutoffs_ft = []

        for _id, row in cutoffs.iterrows():
            cutoffs_ft.append((row[self.entity_col], row['cutoff_st'] - timedelta(days=1)))

        cutoffs_ft = pd.DataFrame(cutoffs_ft, columns=['instance_id', 'time'])

        feature_matrix, features = ft.dfs(target_entity=self.entity_col,
                                          cutoff_time=cutoffs_ft,
                                          training_window="%dday" % feature_window,  # same as above
                                          entityset=self.es,
                                          cutoff_time_in_index=True,
                                          verbose=True)
        # encode categorical values
        fm_encoded, features_encoded = ft.encode_features(feature_matrix,
                                                          features)

        self.features = fm_encoded.fillna(0)
github FeatureLabs / nlp_primitives / nlp_primitives / utils.py View on Github external
max_depth=max_depth, features_only=True)

    applicable_features = []
    for feat in features:
        for x in feature_substrings:
            if x in feat.get_name():
                applicable_features.append(feat)
    if len(applicable_features) == 0:
        raise ValueError('No feature names with %s, verify the name attribute \
                          is defined and/or generate_name() is defined to \
                          return %s ' % (feature_substrings, feature_substrings))
    df = ft.calculate_feature_matrix(entityset=es,
                                     features=applicable_features,
                                     instance_ids=instance_ids)

    ft.encode_features(df, applicable_features)

    # TODO: check the multi_output shape by checking
    # feature.number_output_features for each feature
    # and comparing it with the matrix shape
    if not multi_output:
        assert len(applicable_features) == df.shape[1]
    return