How to use matminer - 10 common examples

To help you get started, we’ve selected a few matminer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hackingmaterials / automatminer / automatminer / automl / adaptors.py View on Github external
not_in_model))
        else:
            X = df[self._features].values  # rectify feature order
            y_pred = self._backend.predict(X)
            df[target + " predicted"] = y_pred
            self._logger.debug("Prediction finished successfully.")
            return df


if __name__ == "__main__":
    from matminer.datasets.dataset_retrieval import load_dataset
    from automatminer.featurization import AutoFeaturizer
    from automatminer.preprocessing import DataCleaner, FeatureReducer

    # Load a dataset
    df = load_dataset("elastic_tensor_2015").rename(
        columns={"formula": "composition"})[["composition", "K_VRH"]]
    testdf = df.iloc[501:550]
    traindf = df.iloc[:100]
    target = "K_VRH"

    # Get top-lvel transformers
    autofeater = AutoFeaturizer()
    cleaner = DataCleaner()
    reducer = FeatureReducer()
    learner = TPOTAdaptor("regression", max_time_mins=5)

    # Fit transformers on training data
    traindf = autofeater.fit_transform(traindf, target)
    traindf = cleaner.fit_transform(traindf, target)
    traindf = reducer.fit_transform(traindf, target)
    learner.fit(traindf, target)
github DLHub-Argonne / dlhub_sdk / examples / matminer / make_model.py View on Github external
data.columns.tolist())

# Get only the minimum energy structure at each composition
data['composition'] = data['structure'].apply(lambda x: x.composition)
data['integer_formula'] = data['composition'].apply(lambda x: x.get_integer_formula_and_factor()[0])

data.sort_values('e_above_hull', ascending=True, inplace=True)
data.drop_duplicates('integer_formula', keep='first', inplace=True)
print('Reduced dataset to {} unique compositions.'.format(len(data)))

data.reset_index(inplace=True, drop=True)

# Create the featurizer, which will take the composition as input
featurizer = MultipleFeaturizer([
      cf.Stoichiometry(),
      cf.ElementProperty.from_preset('magpie'),
      cf.ValenceOrbital(props=['frac']),
      cf.IonProperty(fast=True)
])

# Compute the features
featurizer.set_n_jobs(1)
X = featurizer.featurize_many(data['composition'])

# Make the model
model = Pipeline([
    ('imputer', Imputer()),
    ('model', RandomForestRegressor())
])
model.fit(X, data['formation_energy_per_atom'])
print('Trained a RandomForest model')
github hackingmaterials / automatminer / automatminer_dev / tasks.py View on Github external
raise ValueError("{} not supported yet!"
                             "".format(learner_name))
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs)}
        logger = initialize_logger(AMM_LOGGER_BASENAME, log_dir=base_save_dir)
        pipe = MatPipe(**pipe_config, logger=logger)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        pipe.fit(df, target)
        pipe.save(os.path.join(base_save_dir, "pipe.p"))
github hackingmaterials / automatminer / automatminer_dev / tasks / single.py View on Github external
else:
            raise ValueError("{} not supported yet!" "".format(learner_name))
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs),
        }
        pipe = MatPipe(**pipe_config)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        pipe.fit(df, target)
        pipe.save(os.path.join(base_save_dir, "pipe.p"))
github hackingmaterials / automatminer / automatminer / featurization / sets.py View on Github external
def need_fit(self):
        fs = [
            sf.PartialRadialDistributionFunction(),
            sf.BondFractions(),
            sf.BagofBonds(coulomb_matrix=sf.CoulombMatrix()),
            sf.BagofBonds(coulomb_matrix=sf.SineCoulombMatrix()),
        ]
        return self._get_featurizers(fs)
github DLHub-Argonne / dlhub_sdk / examples / matminer / make_model.py View on Github external
# Get only the minimum energy structure at each composition
data['composition'] = data['structure'].apply(lambda x: x.composition)
data['integer_formula'] = data['composition'].apply(lambda x: x.get_integer_formula_and_factor()[0])

data.sort_values('e_above_hull', ascending=True, inplace=True)
data.drop_duplicates('integer_formula', keep='first', inplace=True)
print('Reduced dataset to {} unique compositions.'.format(len(data)))

data.reset_index(inplace=True, drop=True)

# Create the featurizer, which will take the composition as input
featurizer = MultipleFeaturizer([
      cf.Stoichiometry(),
      cf.ElementProperty.from_preset('magpie'),
      cf.ValenceOrbital(props=['frac']),
      cf.IonProperty(fast=True)
])

# Compute the features
featurizer.set_n_jobs(1)
X = featurizer.featurize_many(data['composition'])

# Make the model
model = Pipeline([
    ('imputer', Imputer()),
    ('model', RandomForestRegressor())
])
model.fit(X, data['formation_energy_per_atom'])
print('Trained a RandomForest model')

# Save the model, featurizer, and data using pickle
with open('model.pkl', 'wb') as fp:
github hackingmaterials / automatminer / automatminer / featurization / sets.py View on Github external
def need_fit(self):
        fs = [
            sf.PartialRadialDistributionFunction(),
            sf.BondFractions(),
            sf.BagofBonds(coulomb_matrix=sf.CoulombMatrix()),
            sf.BagofBonds(coulomb_matrix=sf.SineCoulombMatrix()),
        ]
        return self._get_featurizers(fs)
github hackingmaterials / automatminer / automatminer_dev / graphnet / cgcnn.py View on Github external
df = pd.DataFrame(pickle.load(f))[["structure", prop_col]].dropna()
    idx_list = list(range(len(df)))

    kf = KFold(n_splits=5, random_state=18012019, shuffle=True)
    for kf_idx, (remain_index, test_index) in enumerate(kf.split(idx_list)):
        if kf_idx in kf_indices:
            kf_tmp_output_path = os.path.join(
                tmp_output_path, "kfold_{}".format(kf_idx)
            )
            if not os.path.exists(kf_tmp_output_path):
                os.makedirs(kf_tmp_output_path, exist_ok=True)
            train_index, val_index = train_test_split(
                remain_index, test_size=0.25, random_state=18012019, shuffle=True
            )

            cgcnnfz = CGCNNFeaturizer(
                task=args.task,
                distributed=distributed,
                n_works=args.n_works,
                disable_cuda=disable_cuda,
                save_idx=kf_tmp_output_path,
                output_path=kf_tmp_output_path,
                atom_init_fea=atom_features,
                use_batch=False,
                test=args.test,
                dropout_percent=0.5,
                batch_size=args.batch_size,
                warm_start_file=args.warm_start,
                warm_start_latest=True,
                use_pretrained=False,
                save_model_to_dir=os.path.join(kf_tmp_output_path, "model"),
                save_checkpoint_to_dir=os.path.join(kf_tmp_output_path, "checkpoint"),
github DLHub-Argonne / dlhub_sdk / examples / matminer / make_model.py View on Github external
print('Loaded {} rows with {} columns:'.format(len(data), len(data.columns)),
      data.columns.tolist())

# Get only the minimum energy structure at each composition
data['composition'] = data['structure'].apply(lambda x: x.composition)
data['integer_formula'] = data['composition'].apply(lambda x: x.get_integer_formula_and_factor()[0])

data.sort_values('e_above_hull', ascending=True, inplace=True)
data.drop_duplicates('integer_formula', keep='first', inplace=True)
print('Reduced dataset to {} unique compositions.'.format(len(data)))

data.reset_index(inplace=True, drop=True)

# Create the featurizer, which will take the composition as input
featurizer = MultipleFeaturizer([
      cf.Stoichiometry(),
      cf.ElementProperty.from_preset('magpie'),
      cf.ValenceOrbital(props=['frac']),
      cf.IonProperty(fast=True)
])

# Compute the features
featurizer.set_n_jobs(1)
X = featurizer.featurize_many(data['composition'])

# Make the model
model = Pipeline([
    ('imputer', Imputer()),
    ('model', RandomForestRegressor())
])
model.fit(X, data['formation_energy_per_atom'])
print('Trained a RandomForest model')
github DLHub-Argonne / dlhub_sdk / examples / matminer / make_model.py View on Github external
# Get only the minimum energy structure at each composition
data['composition'] = data['structure'].apply(lambda x: x.composition)
data['integer_formula'] = data['composition'].apply(lambda x: x.get_integer_formula_and_factor()[0])

data.sort_values('e_above_hull', ascending=True, inplace=True)
data.drop_duplicates('integer_formula', keep='first', inplace=True)
print('Reduced dataset to {} unique compositions.'.format(len(data)))

data.reset_index(inplace=True, drop=True)

# Create the featurizer, which will take the composition as input
featurizer = MultipleFeaturizer([
      cf.Stoichiometry(),
      cf.ElementProperty.from_preset('magpie'),
      cf.ValenceOrbital(props=['frac']),
      cf.IonProperty(fast=True)
])

# Compute the features
featurizer.set_n_jobs(1)
X = featurizer.featurize_many(data['composition'])

# Make the model
model = Pipeline([
    ('imputer', Imputer()),
    ('model', RandomForestRegressor())
])
model.fit(X, data['formation_energy_per_atom'])
print('Trained a RandomForest model')

# Save the model, featurizer, and data using pickle