How to use the matminer.data_retrieval.retrieve_MP.MPDataRetrieval function in matminer

To help you get started, we’ve selected a few matminer examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github hackingmaterials / automatminer / automatminer_dev / matbench / mp_elasticity.py View on Github external
- elasticity_G_VRH
    - elasticity_log10(G_VRH)

From matminer's dataset library.
"""

from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
import numpy as np

pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

mpdr = MPDataRetrieval()

df = mpdr.get_dataframe(
    criteria={
        "e_above_hull": {"$lt": 0.150},
        "formation_energy_per_atom": {"$lt": 0.150},
        "elasticity": {"$exists": 1, "$ne": None},
    },
    # "elements": },
    properties=[
        "material_id",
        "structure",
        "elasticity.K_VRH",
        "elasticity.G_VRH",
        "elasticity.G_Voigt",
        "elasticity.K_Voigt",
        "elasticity.G_Reuss",
github hackingmaterials / automatminer / automatminer_dev / matbench / mp_gaps.py View on Github external
from pymatgen import MPRester
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
import numpy as np
from tqdm import tqdm


pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

chunksize = 1000

mpdr = MPDataRetrieval()
mpr = MPRester()


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i : i + n]


df = mpdr.get_dataframe(
    criteria={
        "e_above_hull": {"$lt": 0.150},
        "formation_energy_per_atom": {"$lt": 0.150},
        "band_gap": {"$exists": 1, "$ne": None},
    },
    properties=["material_id", "warnings"],
github hackingmaterials / automatminer / automatminer_dev / matbench / dielectric.py View on Github external
Regenerating from the newest Materials Project calculations
"""

from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
from pymatgen import Element

import pandas as pd
import numpy as np

# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

mpdr = MPDataRetrieval()


# df = load_dataset("dielectric_constant")

df = mpdr.get_dataframe(
    criteria={"has": "diel"},
    properties=[
        "material_id",
        "diel.n",
        "formation_energy_per_atom",
        "e_above_hull",
        "structure",
    ],
    index_mpid=False,
)
df = df[(df["e_above_hull"] < 0.150) & (df["formation_energy_per_atom"] < 0.150)]
github hackingmaterials / rocketsled / old / perovskites / mongo_and_mp_data.py View on Github external
import numpy as np
import pandas as pd
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
from matminer.data_retrieval.retrieve_MongoDB import MongoDataRetrieval
# from matminer.descriptors.composition_features import get_pymatgen_descriptor
from pymongo import MongoClient
from references import Evaluator

pd.set_option('display.width', 2000)
pd.set_option('display.max_colwidth', 500)
pd.set_option('max_columns', None)
pd.set_option('display.max_rows', None)

api_key = 'AJsTCV3n1IOkBi97'
mp_retriever = MPDataRetrieval(api_key)


def pretty_formula(i, data):
    A = data.A[i]
    B = data.B[i]
    anion = data.anion[i]
    return A+B+anion

def score_n_store():

    fit_eval = Evaluator()

    client = MongoClient('localhost', 27017)
    unc = client.unc.data_raw

    mdb_retriever = MongoDataRetrieval(unc)
github hackingmaterials / automatminer / automatminer_dev / matbench / mp_eform.py View on Github external
from pymatgen import MPRester
from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval
import pandas as pd
import numpy as np
from tqdm import tqdm


pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

chunksize = 1000

mpdr = MPDataRetrieval()
mpr = MPRester()


def chunks(l, n):
    """Yield successive n-sized chunks from l."""
    for i in range(0, len(l), n):
        yield l[i : i + n]


df = mpdr.get_dataframe(
    criteria={"formation_energy_per_atom": {"$lt": 2.5}},
    properties=["material_id", "warnings"],
    index_mpid=False,
)

print(df["warnings"].astype(str).value_counts())
github hackingmaterials / automatminer / automatminer_dev / matbench / phonons.py View on Github external
From matminer's dataset library.
"""

from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval


import pandas as pd

# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

mpdr = MPDataRetrieval()


df = load_dataset("phonon_dielectric_mp")

print(df)

mpids = df["mpid"].tolist()
dfe = mpdr.get_dataframe(
    criteria={"material_id": {"$in": mpids}},
    properties=["e_above_hull", "formation_energy_per_atom", "material_id"],
    index_mpid=False,
)
dfe = dfe.rename(columns={"material_id": "mpid"})

df = pd.merge(df, dfe, how="inner")
github hackingmaterials / automatminer / mslearn / data / generate.py View on Github external
Args:
        max_nsites (int): The maximum number of sites to include in the query.
        initial_structures (bool): If true, include the structures before
            relaxation.
        properties ([str]): list of properties supported by MPDataRetrieval
        write_to_csv (bool): whether to write resulting dataframe to csv
        limit (int): maximum length of the returned data; no limit if None

    Returns (pandas.DataFrame):
        retrieved/generated data
    """
    properties = properties or [
        'pretty_formula', 'e_above_hull', 'band_gap', 'total_magnetization',
        'elasticity.elastic_anisotropy', 'elasticity.K_VRH', 'elasticity.G_VRH',
        'structure', 'energy', 'energy_per_atom', 'formation_energy_per_atom']
    mpdr = MPDataRetrieval()
    mpdf = None
    for nsites in list(range(1, 101)) + [{'$gt': 100}]:
        if nsites==max_nsites:
            break
        print("Processing nsites = {}".format(nsites))
        df = mpdr.get_dataframe(criteria={'nsites': nsites},
                                properties=properties,
                                index_mpid=True)
        if initial_structures:
            # prevent data limit API error using this conditional
            isdf = mpdr.get_dataframe(criteria={'nsites': nsites},
                                      properties=['initial_structure'],
                                      index_mpid=True),
            df = df.join(isdf, how='inner')
        if mpdf is None:
            mpdf = df
github hackingmaterials / automatminer / automatminer_dev / matbench / castelli.py View on Github external
From matminer's dataset library.
"""

from matminer.datasets.dataset_retrieval import load_dataset
from matminer.data_retrieval.retrieve_MP import MPDataRetrieval


import pandas as pd

# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)

mpdr = MPDataRetrieval()

df = load_dataset("castelli_perovskites")
df = df[["structure", "e_form"]]
df = df.reset_index(drop=True)

print(df)
df.to_pickle("castelli.pickle.gz")