How to use the function in matminer

To help you get started, we’ve selected a few matminer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hackingmaterials / automatminer / automatminer_dev / View on Github external
raise ValueError("{} not supported yet!"
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs)}
        logger = initialize_logger(AMM_LOGGER_BASENAME, log_dir=base_save_dir)
        pipe = MatPipe(**pipe_config, logger=logger)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file), target), "pipe.p"))
github hackingmaterials / automatminer / automatminer_dev / tasks / View on Github external
            raise ValueError("{} not supported yet!" "".format(learner_name))
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs),
        pipe = MatPipe(**pipe_config)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file), target), "pipe.p"))
github hackingmaterials / automatminer / automatminer_dev / View on Github external
autofeaturizer_kwargs["cache_src"] = os.path.join(base_save_dir, "features.json")
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs)}

        logger = initialize_logger(AMM_LOGGER_BASENAME, log_dir=save_dir)
        pipe = MatPipe(**pipe_config, logger=logger)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        # Check other parameters that would otherwise not be checked until after
        # benchmarking, hopefully saves some errors at the end during scoring.
        if problem_type not in [AMM_CLF_NAME, AMM_REG_NAME]:
            raise ValueError("Problem must be either classification or "
        elif problem_type == AMM_CLF_NAME:
            if not isinstance(clf_pos_label, (str, bool)):
                raise TypeError("The classification positive label should be a "
                                "string, or bool not {}."
            elif clf_pos_label not in df[target]:
                raise ValueError("The classification positive label should be"
                                 "present in the target column.")
            elif len(df[target].unique()) > 2:
                raise ValueError("Only binary classification scoring available"
github hackingmaterials / automatminer / automatminer_dev / tasks / View on Github external
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs),

        pipe = MatPipe(**pipe_config)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        # Check other parameters that would otherwise not be checked until after
        # benchmarking, hopefully saves some errors at the end during scoring.
        if problem_type not in [AMM_CLF_NAME, AMM_REG_NAME]:
            raise ValueError("Problem must be either classification or " "regression.")
        elif problem_type == AMM_CLF_NAME:
            if not isinstance(clf_pos_label, (str, bool)):
                raise TypeError(
                    "The classification positive label should be a "
                    "string, or bool not {}."
            elif clf_pos_label not in df[target]:
                raise ValueError(
                    "The classification positive label should be"
                    "present in the target column."
github hackingmaterials / automatminer / automatminer / featurization / View on Github external
        Decorate a dataframe containing composition, structure, bandstructure,
        and/or DOS objects with descriptors.

            df (pandas.DataFrame): The dataframe not containing features.
            target (str): The ML-target property contained in the df.

            df (pandas.DataFrame): Transformed dataframe containing features.
        if self.cache_src and os.path.exists(self.cache_src):
                self._log_prefix + "Reading cache_src {}".format(self.cache_src)
            cached_df = load_dataframe_from_json(self.cache_src)
            if not all([loc in cached_df.index for loc in df.index]):
                raise AutomatminerError(
                    "Feature cache does not contain all "
                    "entries (by DataFrame index) needed "
                    "to transform the input df."
                cached_subdf = cached_df.loc[df.index]
                if target in cached_subdf.columns:
                    if target not in df.columns:
                            + "Target not present in both cached df and input df."
                            " Cannot perform comparison to ensure index match."
github hackingmaterials / automatminer / mslearn / data / View on Github external
hole mass_z (target): Effective hole mass in z direction (BoltzTraP)
        epsilon_x opt (target): Static dielectric function in x direction
            calculated with OptB88vDW functional.
        epsilon_y opt (target): Static dielectric function in y direction
            calculated with OptB88vDW functional.
        epsilon_z opt (target): Static dielectric function in z direction
            calculated with OptB88vDW functional.
        epsilon_x tbmbj (target): Static dielectric function in x direction
            calculated with TBMBJ functional.
        epsilon_y tbmbj (target): Static dielectric function in y direction
            calculated with TBMBJ functional.
        epsilon_z tbmbj (target): Static dielectric function in z direction
            calculated with TBMBJ functional.

    df = load_dataframe_from_json(os.path.join(data_dir, 'jdft_3d.json'))

    colmap = {"el_mass_x": "e mass_x",
            "el_mass_y": "e mass_y",
            "el_mass_z": "e mass_z",
            "epsx": "epsilon_x opt",
            "epsy": "epsilon_y opt",
            "epsz": "epsilon_z opt",
            "exfoliation_en": "e_exfol",
            "form_enp": "e_form",
            "gv": "shear modulus",
            "hl_mass_x": "hole mass_x",
            "hl_mass_y": "hole mass_y",
            "hl_mass_z": "hole mass_z",
            "kv": "bulk modulus",
            "magmom": "mu_b",
            "mbj_gap": "gap tbmbj",
github hackingmaterials / automatminer / automatminer_dev / local / View on Github external
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from import regression_or_classification
from automatminer_dev.config import BENCHMARK_FULL_SET, GLASS, EXPT_IS_METAL, EXPT_GAP
from import load_dataframe_from_json

benchmark_dir = os.environ["AMM_DATASET_DIR"]


for p in bmarks:
    pname = p["name"]
    print("Loading {}".format(pname))
    df = load_dataframe_from_json(os.path.join(benchmark_dir, p["data_file"]))
    target = p["target"]
    ltype = p["problem_type"]
    if ltype == AMM_REG_NAME:
        kf = KFold(n_splits=5, random_state=18012019, shuffle=True)
        estimator = DummyRegressor(strategy="mean")
        scoring = "neg_mean_absolute_error"
        multiplier = -1
    elif ltype == AMM_CLF_NAME:
        kf = StratifiedKFold(n_splits=5, random_state=18012019, shuffle=True)
        estimator = DummyClassifier(strategy="stratified")
        multiplier = 1
        scoring = "roc_auc"
        raise ValueError("problem type {} is not known.".format(ltype))

    cvs = cross_val_score(
github hackingmaterials / robocrystallographer / robocrys / condense / View on Github external
def __init__(self,
                 initial_ltol: float = 0.2,
                 initial_stol: float = 0.3,
                 initial_angle_tol: float = 5.,
                 use_fingerprint_matching: bool = True,
                 fingerprint_distance_cutoff: float = 0.4):
        db_file = resource_filename('robocrys.condense', 'mineral_db.json.gz')
        self.mineral_db = load_dataframe_from_json(db_file)
        self.initial_ltol = initial_ltol
        self.initial_stol = initial_stol
        self.initial_angle_tol = initial_angle_tol
        self.fingerprint_distance_cutoff = fingerprint_distance_cutoff
        self.use_fingerprint_matching = use_fingerprint_matching
        self._structure = None
        self._mineral_db = None