How to use the featuretools.dfs function in featuretools

To help you get started, we’ve selected a few featuretools examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github FeatureLabs / featuretools-tsfresh-primitives / featuretools_tsfresh_primitives / test_primitives.py View on Github external
def test_all_primitives(entityset, parameters):
    is_agg_primitive = lambda name: issubclass(primitives[name], ft.primitives.AggregationPrimitive)
    construct_primitive = lambda name: primitives[name](**parameters.get(name, {}))
    agg_primitives = [construct_primitive(name) for name in primitives if is_agg_primitive(name)]
    feature_matrix, features = ft.dfs(entityset=entityset, target_entity='sessions', agg_primitives=agg_primitives)
    assert not feature_matrix.empty
github HDI-Project / MLBlocks / mlblocks / primitives / custom / preprocessors / multitable / dfs.py View on Github external
def fit(self, X, **kwargs):
        self.features = ft.dfs(
            cutoff_time=X,
            features_only=True,
            max_depth=self.max_depth,
            **kwargs
        )
github intel-analytics / analytics-zoo / pyzoo / zoo / automl / feature / time_sequence.py View on Github external
def is_awake(column):
            hour = column.dt.hour
            return (((hour >= 6) & (hour <= 23)) | (hour == 0)).astype(int)

        def is_busy_hours(column):
            hour = column.dt.hour
            return (((hour >= 7) & (hour <= 9)) | (hour >= 16) & (hour <= 19)).astype(int)

        IsAwake = make_trans_primitive(function=is_awake,
                                       input_types=[DatetimeTimeIndex],
                                       return_type=Numeric)
        IsBusyHours = make_trans_primitive(function=is_busy_hours,
                                           input_types=[DatetimeTimeIndex],
                                           return_type=Numeric)

        feature_matrix, feature_defs = ft.dfs(entityset=es,
                                              target_entity="time_seq",
                                              agg_primitives=["count"],
                                              trans_primitives=["month", "weekday", "day", "hour",
                                                                "is_weekend", IsAwake, IsBusyHours])
        return feature_matrix, feature_defs
github Featuretools / DL-DB / dldb / tdfs.py View on Github external
instance_id_column = index
        else:
            instance_ids = cutoffs.iloc[:, 0]
            instance_id_column = cutoffs.columns[0]
        time_column = 'time'
        if time_column not in cutoffs:
            not_instance_id = [c for c in cutoffs.columns
                               if c != instance_id_column]
            time_column = not_instance_id[0]
        times = cutoffs[time_column]
        temporal_cutoffs = make_temporal_cutoffs(instance_ids,
                                                 times,
                                                 window_size,
                                                 num_windows,
                                                 start)
    result = ft.dfs(entityset=entityset,
                    features_only=features_only,
                    cutoff_time=temporal_cutoffs,
                    target_entity=target_entity,
                    cutoff_time_in_index=True,
                    **kwargs)
    if not features_only:
        fm, fl = result
        return fm.sort_index(level=[entityset[target_entity].index,
                                    'time']), fl
    return result
github HDI-Project / Trane / trane / utils / featuretools_wrapper.py View on Github external
def compute_features(self, df, cutoff_strategy, feature_window):
        assert cutoff_strategy.entity_col == self.entity_col

        cutoffs = cutoff_strategy.generate_cutoffs(df)

        cutoffs_ft = []

        for _id, row in cutoffs.iterrows():
            cutoffs_ft.append((row[self.entity_col], row['cutoff_st'] - timedelta(days=1)))

        cutoffs_ft = pd.DataFrame(cutoffs_ft, columns=['instance_id', 'time'])

        feature_matrix, features = ft.dfs(target_entity=self.entity_col,
                                          cutoff_time=cutoffs_ft,
                                          training_window="%dday" % feature_window,  # same as above
                                          entityset=self.es,
                                          cutoff_time_in_index=True,
                                          verbose=True)
        # encode categorical values
        fm_encoded, features_encoded = ft.encode_features(feature_matrix,
                                                          features)

        self.features = fm_encoded.fillna(0)
github pan5431333 / featuretools4s / featuretools4s / featuretools4s.py View on Github external
dataframe=df,
                                         index=EntitySpark.recover_col_name(entity_id, entity.index),
                                         variable_types=entity.variable_types,
                                         time_index=EntitySpark.recover_col_name(entity_id, entity.time_index),
                                         secondary_time_index=EntitySpark.recover_col_name(entity_id,
                                                                                           entity.secondary_time_index))

            for relationship in relationships:
                parent_entity = relationship.parent_variable.entity_id
                parent_col = EntitySpark.recover_col_name(parent_entity, relationship.parent_variable.column_name)
                child_entity = relationship.child_variable.entity_id
                child_col = EntitySpark.recover_col_name(child_entity, relationship.child_variable.column_name)
                es.add_relationship(ft.Relationship(es[parent_entity][parent_col],
                                                    es[child_entity][child_col]))

            feature_matrix, feature_dfs = ft.dfs(entityset=es,
                                                 agg_primitives=agg_primitives,
                                                 trans_primitives=trans_primitives,
                                                 target_entity=target_entity,
                                                 cutoff_time=cutoff_time,
                                                 cutoff_time_in_index=False,
                                                 n_jobs=n_jobs,
                                                 max_depth=max_depth,
                                                 training_window=training_window,
                                                 approximate=approximate,
                                                 chunk_size=chunk_size)

            feature_matrix.reset_index(inplace=True)

            columns = sorted(feature_matrix.columns)
            res = []
            for i in range(feature_matrix.shape[0]):