How to use maggma - 10 common examples

To help you get started, we’ve selected a few maggma examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github materialsintelligence / propnet / propnet / ext / aflow.py View on Github external
# Split up current query matchbook to recover filters
        matchbook_splitter = re.compile(r"(?!\'),(?= 1:
            if len(props) / chunks < 2:
                chunks = len(props) + 1
            query_error = False
            for chunk in grouper(props, (len(props) // chunks) + 1):
                logger.debug('Requesting property chunk {} with {} records'.format(chunks, k))
                props_to_request = list(set(c for c in chunk if c is not None))

                # Exclude orderby keyword if it is not requested in this chunk.
                # If it is included, remove from requested properties to avoid duplication in URI
                orderby_prop = None
                orderby_str = None
                for prop in props_to_request:
                    if orderby_kw.startswith(prop):
                        if orderby_kw.startswith('$'):
                            orderby_str = orderby_kw[1:]
                        else:
                            orderby_str = orderby_kw
                        orderby_prop = prop
                        break
github materialsintelligence / propnet / propnet / dbtools / aflow_ingester.py View on Github external
'all' if not config_['select']
                                                 else config_['select']))

            if config_['select']:
                kws_to_chunk = config_['select']
            else:
                kws_to_chunk = self.keywords

            k = config_['k']
            filter_vals = config_['filter']

            chunk_idx = 0
            chunk_size = 5
            total_chunks = len(kws_to_chunk) // chunk_size + 1

            for chunk in grouper(kws_to_chunk, chunk_size):
                chunk_idx += 1
                logger.debug("Property chunk {} of {}".format(chunk_idx, total_chunks))
                props = [getattr(AFLOW_KWS, c) for c in chunk if c is not None]
                if len(props) == 0:
                    continue
                data_query = self._get_query_obj(config_['catalog'], k,
                                                 config_['exclude'], filter_vals)
                data_query.select(*props)
                success = False
                while not success:
                    try:
                        for entry in data_query:
                            yield entry, config_['targets']
                        success = True
                    except ValueError:
                        if data_query.N == 0:   # Empty query
github materialsintelligence / propnet / propnet / dbtools / correlation_with_mp.py View on Github external
from maggma.builders import Builder
import numpy as np

from propnet.core.graph import Graph
from propnet import ureg
# noinspection PyUnresolvedReferences
import propnet.models
from propnet.core.registry import Registry

warnings.warn("The correlation_with_mp module is deprecated. Use the correlation module instead.",
              DeprecationWarning)
logger = logging.getLogger(__name__)


class CorrelationBuilder(Builder):
    """
    A class to calculate the correlation between properties derived by or used in propnet
    using a suite of regression tools. Uses the Builder architecture for optional parallel
    processing of data.

    Note: serialization of builder does not work with custom correlation functions, although
    interactive use does support them.

    """
    # TODO: Add these symbols to propnet so we don't have to bring them in explicitly?
    MP_QUERY_PROPS = ["piezo.eij_max", "elasticity.universal_anisotropy",
                      "diel.poly_electronic", "total_magnetization", "efermi",
                      "magnetism.total_magnetization_normalized_vol"]
    PROPNET_PROPS = [v.name for v in Registry("symbols").values()
                     if (v.category == 'property' and v.shape == 1)]
github materialsintelligence / propnet / propnet / core / mic_builder.py View on Github external
from maggma.builders import Builder
from itertools import combinations_with_replacement
import numpy as np
import json
from minepy import MINE
from collections import defaultdict
from propnet.symbols import DEFAULT_SYMBOLS
from propnet import ureg

import random

class MicBuilder(Builder):

    def __init__(self, propnet_store, mp_store, correlation_store, out_file, **kwargs):

        self.propnet_store = propnet_store
        self.mp_store = mp_store
        self.correlation_store = correlation_store
        self.out_file = out_file

        super(MicBuilder, self).__init__(sources=[propnet_store, mp_store],
                                         targets=[correlation_store],
                                         **kwargs)

    def get_items(self):
        data = defaultdict(dict)
        propnet_props = [v.name for v in DEFAULT_SYMBOLS.values()
                         if (v.category == 'property' and v.shape == 1)]
github materialsintelligence / propnet / propnet / dbtools / aflow_ingester.py View on Github external
from propnet.dbtools.aflow_ingester_defaults import default_query_configs, default_files_to_ingest
from aflow.keywords import load as kw_load, reset as kw_reset
from aflow import K as AFLOW_KWS
from maggma.builders import Builder
from maggma.utils import grouper
from monty.json import jsanitize
from pymongo import UpdateOne
import logging
import time
import datetime
from urllib.error import HTTPError

logger = logging.getLogger(__name__)


class AflowIngester(Builder):
    """
    Builds MongoDB collections from AFLOW data using the AFLOW and AFLUX web APIs.
    """
    _available_kws = dict()
    """Contains supported keywords in the AFLUX schema
    """
    kw_load(_available_kws)
    
    def __init__(self, data_target, auid_target=None,
                 keywords=None, query_configs=None,
                 files_to_ingest=None, filter_null_properties=False,
                 **kwargs):
        """
        Initialize the database builder.

        Args:
github materialsintelligence / propnet / propnet / builders / correlation.py View on Github external
from maggma.builders import Builder
from itertools import combinations_with_replacement
import numpy as np
import json
from collections import defaultdict
from propnet.symbols import DEFAULT_SYMBOLS
from propnet.core.graph import Graph
from propnet import ureg
import logging
import re

logger = logging.getLogger(__name__)


class CorrelationBuilder(Builder):
    """
    A class to calculate the correlation between properties derived by or used in propnet
    using a suite of regression tools. Uses the Builder architecture for optional parallel
    processing of data.

    Note: serialization of builder does not work with custom correlation functions, although
    interactive use does support them.

    """
    def __init__(self, propnet_store, mp_store,
                 correlation_store, out_file=None,
                 funcs='linlsq', **kwargs):
        """
        Constructor for the correlation builder.

        Args:
github materialsintelligence / propnet / propnet / dbtools / correlation.py View on Github external
import numpy as np
import json
from collections import defaultdict
from propnet.core.graph import Graph
from propnet import ureg
import logging
import re

# noinspection PyUnresolvedReferences
import propnet.models
from propnet.core.registry import Registry

logger = logging.getLogger(__name__)


class CorrelationBuilder(Builder):
    """
    A class to calculate the correlation between properties derived by or used in propnet
    using a suite of regression tools. Uses the Builder architecture for optional parallel
    processing of data.

    Note: serialization of builder does not work with custom correlation functions, although
    interactive use does support them.

    """
    PROPNET_PROPS = [v.name for v in Registry("symbols").values()
                     if (v.category == 'property' and v.shape == 1)]
    
    def __init__(self, propnet_store,
                 correlation_store, out_file=None,
                 funcs='linlsq', props=None,
                 sample_size=None, from_quantity_db=True,
github materialsintelligence / propnet / propnet / dbtools / separation.py View on Github external
from maggma.builders import Builder
from maggma.utils import grouper
from pymongo import InsertOne
import pydash
from itertools import chain
from propnet import ureg
from propnet.core.registry import Registry
# noinspection PyUnresolvedReferences
import propnet.symbols


class SeparationBuilder(Builder):
    """
    Converts old-style propnet database into separate quantity-centered
    and materials-centered databases.
    """

    def __init__(self, propnet_store, quantity_store, material_store=None,
                 criteria=None, props=None, chunk_size=100, insert_only=False):
        """

        Args:
            propnet_store (Mongolike Store): old-style propnet store
            quantity_store (Mongolike Store): store for quantities
            material_store (Mongolike Store): store for materials
            criteria (dict): JSON-style criteria for MongoDB find() query
            **kwargs: arguments to Builder parent class
        """
github materialsintelligence / propnet / propnet / dbtools / separation.py View on Github external
def get_items(self):
        # Borrowed from MapBuilder
        keys = self.propnet_store.distinct('task_id', criteria=self.criteria)
        containers = self.props + ['inputs']
        self.total = len(keys)
        for chunked_keys in grouper(keys, self.chunk_size, None):
            chunked_keys = list(filter(None.__ne__, chunked_keys))
            for doc in list(
                    self.propnet_store.query(
                        criteria={'task_id': {
                            "$in": chunked_keys
                        }},
                        properties=containers + ['task_id'],
                    )):
                yield doc
github hackingmaterials / robocrystallographer / dev_scripts / download_pubchem.py View on Github external
def process_sdf_file(filename):
    mp_pubchem = MongograntStore("rw:knowhere.lbl.gov/mp_pubchem", "mp_pubchem",
                                 key="pubchem_id")
    mp_pubchem.connect()
    coll = mp_pubchem.collection

    skipped = 0
    pubchem_molecules = []
    for i, mol in enumerate(pybel.readfile('sdf', filename)):
        try:
            pubchem_id = int(mol.data['PUBCHEM_COMPOUND_CID'])
            xyz = mol.write(format="xyz")

            data = {'pubchem_id': pubchem_id,
                    'xyz': xyz}
            for key in keys:
                if key in mol.data:
                    data[key_map[key]] = mol.data[key]

maggma

Framework to develop datapipelines from files on disk to full dissemenation API

BSD-3-Clause
Latest version published 7 days ago

Package Health Score

81 / 100
Full package analysis