How to use the matminer.utils.io.load_dataframe_from_json function in matminer

To help you get started, we’ve selected a few matminer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hackingmaterials / automatminer / automatminer_dev / tasks.py View on Github external
raise ValueError("{} not supported yet!"
                             "".format(learner_name))
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs)}
        logger = initialize_logger(AMM_LOGGER_BASENAME, log_dir=base_save_dir)
        pipe = MatPipe(**pipe_config, logger=logger)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        pipe.fit(df, target)
        pipe.save(os.path.join(base_save_dir, "pipe.p"))
github hackingmaterials / automatminer / automatminer_dev / tasks / single.py View on Github external
else:
            raise ValueError("{} not supported yet!" "".format(learner_name))
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs),
        }
        pipe = MatPipe(**pipe_config)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        pipe.fit(df, target)
        pipe.save(os.path.join(base_save_dir, "pipe.p"))
github hackingmaterials / automatminer / automatminer_dev / tasks.py View on Github external
autofeaturizer_kwargs["cache_src"] = os.path.join(base_save_dir, "features.json")
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs)}

        logger = initialize_logger(AMM_LOGGER_BASENAME, log_dir=save_dir)
        pipe = MatPipe(**pipe_config, logger=logger)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        # Check other parameters that would otherwise not be checked until after
        # benchmarking, hopefully saves some errors at the end during scoring.
        if problem_type not in [AMM_CLF_NAME, AMM_REG_NAME]:
            raise ValueError("Problem must be either classification or "
                             "regression.")
        elif problem_type == AMM_CLF_NAME:
            if not isinstance(clf_pos_label, (str, bool)):
                raise TypeError("The classification positive label should be a "
                                "string, or bool not {}."
                                "".format(type(clf_pos_label)))
            elif clf_pos_label not in df[target]:
                raise ValueError("The classification positive label should be"
                                 "present in the target column.")
            elif len(df[target].unique()) > 2:
                raise ValueError("Only binary classification scoring available"
github hackingmaterials / automatminer / automatminer_dev / tasks / bench.py View on Github external
)
        pipe_config = {
            "learner": learner,
            "reducer": FeatureReducer(**reducer_kwargs),
            "cleaner": DataCleaner(**cleaner_kwargs),
            "autofeaturizer": AutoFeaturizer(**autofeaturizer_kwargs),
        }

        pipe = MatPipe(**pipe_config)

        # Set up dataset
        # Dataset should already be set up correctly as json beforehand.
        # this includes targets being converted to classification, removing
        # extra columns, having the names of featurization cols set to the
        # same as the matpipe config, etc.
        df = load_dataframe_from_json(data_file)

        # Check other parameters that would otherwise not be checked until after
        # benchmarking, hopefully saves some errors at the end during scoring.
        if problem_type not in [AMM_CLF_NAME, AMM_REG_NAME]:
            raise ValueError("Problem must be either classification or " "regression.")
        elif problem_type == AMM_CLF_NAME:
            if not isinstance(clf_pos_label, (str, bool)):
                raise TypeError(
                    "The classification positive label should be a "
                    "string, or bool not {}."
                    "".format(type(clf_pos_label))
                )
            elif clf_pos_label not in df[target]:
                raise ValueError(
                    "The classification positive label should be"
                    "present in the target column."
github hackingmaterials / automatminer / automatminer / featurization / core.py View on Github external
"""
        Decorate a dataframe containing composition, structure, bandstructure,
        and/or DOS objects with descriptors.

        Args:
            df (pandas.DataFrame): The dataframe not containing features.
            target (str): The ML-target property contained in the df.

        Returns:
            df (pandas.DataFrame): Transformed dataframe containing features.
        """
        if self.cache_src and os.path.exists(self.cache_src):
            logger.debug(
                self._log_prefix + "Reading cache_src {}".format(self.cache_src)
            )
            cached_df = load_dataframe_from_json(self.cache_src)
            if not all([loc in cached_df.index for loc in df.index]):
                raise AutomatminerError(
                    "Feature cache does not contain all "
                    "entries (by DataFrame index) needed "
                    "to transform the input df."
                )
            else:
                cached_subdf = cached_df.loc[df.index]
                if target in cached_subdf.columns:
                    if target not in df.columns:
                        logger.warn(
                            self._log_prefix
                            + "Target not present in both cached df and input df."
                            " Cannot perform comparison to ensure index match."
                        )
                    else:
github hackingmaterials / automatminer / mslearn / data / load.py View on Github external
hole mass_z (target): Effective hole mass in z direction (BoltzTraP)
        epsilon_x opt (target): Static dielectric function in x direction
            calculated with OptB88vDW functional.
        epsilon_y opt (target): Static dielectric function in y direction
            calculated with OptB88vDW functional.
        epsilon_z opt (target): Static dielectric function in z direction
            calculated with OptB88vDW functional.
        epsilon_x tbmbj (target): Static dielectric function in x direction
            calculated with TBMBJ functional.
        epsilon_y tbmbj (target): Static dielectric function in y direction
            calculated with TBMBJ functional.
        epsilon_z tbmbj (target): Static dielectric function in z direction
            calculated with TBMBJ functional.
    """

    df = load_dataframe_from_json(os.path.join(data_dir, 'jdft_3d.json'))

    colmap = {"el_mass_x": "e mass_x",
            "el_mass_y": "e mass_y",
            "el_mass_z": "e mass_z",
            "epsx": "epsilon_x opt",
            "epsy": "epsilon_y opt",
            "epsz": "epsilon_z opt",
            "exfoliation_en": "e_exfol",
            "form_enp": "e_form",
            "gv": "shear modulus",
            "hl_mass_x": "hole mass_x",
            "hl_mass_y": "hole mass_y",
            "hl_mass_z": "hole mass_z",
            "kv": "bulk modulus",
            "magmom": "mu_b",
            "mbj_gap": "gap tbmbj",
github hackingmaterials / automatminer / automatminer_dev / local / dummy.py View on Github external
from sklearn.model_selection import cross_val_score, KFold, StratifiedKFold
from automatminer.utils.ml import regression_or_classification
from automatminer.utils.ml import AMM_CLF_NAME, AMM_REG_NAME
from automatminer_dev.config import BENCHMARK_FULL_SET, GLASS, EXPT_IS_METAL, EXPT_GAP
from matminer.utils.io import load_dataframe_from_json


benchmark_dir = os.environ["AMM_DATASET_DIR"]

bmarks = BENCHMARK_FULL_SET
bmarks = [GLASS, EXPT_GAP, EXPT_IS_METAL]

for p in bmarks:
    pname = p["name"]
    print("Loading {}".format(pname))
    df = load_dataframe_from_json(os.path.join(benchmark_dir, p["data_file"]))
    target = p["target"]
    ltype = p["problem_type"]
    if ltype == AMM_REG_NAME:
        kf = KFold(n_splits=5, random_state=18012019, shuffle=True)
        estimator = DummyRegressor(strategy="mean")
        scoring = "neg_mean_absolute_error"
        multiplier = -1
    elif ltype == AMM_CLF_NAME:
        kf = StratifiedKFold(n_splits=5, random_state=18012019, shuffle=True)
        estimator = DummyClassifier(strategy="stratified")
        multiplier = 1
        scoring = "roc_auc"
    else:
        raise ValueError("problem type {} is not known.".format(ltype))

    cvs = cross_val_score(
github hackingmaterials / robocrystallographer / robocrys / condense / mineral.py View on Github external
def __init__(self,
                 initial_ltol: float = 0.2,
                 initial_stol: float = 0.3,
                 initial_angle_tol: float = 5.,
                 use_fingerprint_matching: bool = True,
                 fingerprint_distance_cutoff: float = 0.4):
        db_file = resource_filename('robocrys.condense', 'mineral_db.json.gz')
        self.mineral_db = load_dataframe_from_json(db_file)
        self.initial_ltol = initial_ltol
        self.initial_stol = initial_stol
        self.initial_angle_tol = initial_angle_tol
        self.fingerprint_distance_cutoff = fingerprint_distance_cutoff
        self.use_fingerprint_matching = use_fingerprint_matching
        self._structure = None
        self._mineral_db = None