How to use the biothings.hub.dataload.uploader.MergerSourceUploader function in biothings

To help you get started, we’ve selected a few biothings examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biothings / mygene.info / src / hub / dataload / sources / uniprot / pdb_upload.py View on Github external
import biothings.hub.dataload.uploader as uploader
from .parser import load_pdb


class UniprotPDBUploader(uploader.MergerSourceUploader):

    name = "uniprot_pdb"

    def load_data(self, data_folder):
        return load_pdb(data_folder)

    @classmethod
    def get_mapping(klass):
        mapping = {
            "pdb": {
                "type": "keyword",
                "normalizer" : "keyword_lowercase_normalizer",
            }
        }
        return mapping
github biothings / mygene.info / src / hub / dataload / sources / ensembl / gene_upload.py View on Github external
from .parser import EnsemblParser
import biothings.hub.dataload.uploader as uploader
from biothings.utils.common import dump2gridfs

class EnsemblGeneUploader(uploader.MergerSourceUploader):

    name = "ensembl_gene"
    main_source = "ensembl"
    __metadata__ = {"mapper" : 'ensembl2entrez'}

    def load_data(self, data_folder):
        ep = EnsemblParser(data_folder,load_ensembl2entrez=False)
        ensembl_genes = ep.load_ensembl_main()
        return ensembl_genes

    def get_mapping_to_entrez(self, data_folder):
        ep = EnsemblParser(data_folder)
        ep._load_ensembl2entrez_li()
        return ep.ensembl2entrez_li

    def post_update_data(self,*args,**kwargs):
github biothings / mygene.info / src / hub / dataload / sources / ensembl / genomic_pos_upload.py View on Github external
from .parser import EnsemblParser
import biothings.hub.dataload.uploader as uploader

class EnsemblGenomicPosUploader(uploader.MergerSourceUploader):

    name = "ensembl_genomic_pos"
    main_source = "ensembl"

    def load_data(self, data_folder):
        ep = EnsemblParser(self.main_source, data_folder)
        ensembl2pos = ep.load_ensembl2pos()
        return ensembl2pos

    @classmethod
    def get_mapping(klass):
        mapping = {
            "genomic_pos": {
                "dynamic": False,
                "type": "nested",                 # index as nested
                "properties": {
github biothings / mygene.info / src / hub / dataload / sources / refseq / ec_upload.py View on Github external
from .parser import Gene2ECParser
import biothings.hub.dataload.uploader as uploader

class EntrezECUploader(uploader.MergerSourceUploader):

    name = "entrez_ec"
    main_source = "refseq"

    def load_data(self, data_folder):
        gene2ec = Gene2ECParser(data_folder).load()
        return gene2ec

    @classmethod
    def get_mapping(klass):
        mapping = {
            "ec": {
                "type": "keyword",
                "normalizer" : "keyword_lowercase_normalizer",
            },
        }
github biothings / mygene.info / src / hub / dataload / sources / uniprot / pir_upload.py View on Github external
import biothings.hub.dataload.uploader as uploader
from .parser import load_pir


class UniprotPIRUploader(uploader.MergerSourceUploader):

    name = "uniprot_pir"

    def load_data(self, data_folder):
        return load_pir(data_folder)

    @classmethod
    def get_mapping(klass):
        mapping = {
            "pir": {
                "type": "keyword",
                "normalizer" : "keyword_lowercase_normalizer",
                'copy_to': ['all'],
            }
        }
        return mapping
github biothings / mygene.info / src / hub / dataload / sources / ensembl / pfam_upload.py View on Github external
from .parser import EnsemblParser
import biothings.hub.dataload.uploader as uploader

class EnsemblPfamUploader(uploader.MergerSourceUploader):

    name = "ensembl_pfam"
    main_source = "ensembl"

    def load_data(self, data_folder):
        ep = EnsemblParser(self.main_source, data_folder)
        ensembl2pfam = ep.load_ensembl2pfam()
        return ensembl2pfam

    @classmethod
    def get_mapping(klass):
        mapping = {
            "pfam": {
                "type": "keyword",
                "normalizer" : "keyword_lowercase_normalizer",
                'copy_to': ['all'],
github biothings / mygene.info / src / hub / dataload / sources / ensembl / interpro_upload.py View on Github external
from .parser import EnsemblParser
import biothings.hub.dataload.uploader as uploader

class EnsemblInterproUploader(uploader.MergerSourceUploader):

    name = "ensembl_interpro"
    main_source = "ensembl"
    __metadata__ = {"mapper" : 'ensembl2entrez'}

    def load_data(self, data_folder):
        ep = EnsemblParser(self.main_source, data_folder)
        ensembl2interpro = ep.load_ensembl2interpro()
        return ensembl2interpro

    @classmethod
    def get_mapping(klass):
        mapping = {
            "interpro": {
                "dynamic": False,
                "properties": {
github biothings / mygene.info / src / hub / dataload / sources / entrez / gene_upload.py View on Github external
from .parser import GeneInfoParser
from .parser import get_geneid_d
import biothings.hub.dataload.uploader as uploader
from biothings.utils.common import dump2gridfs

class EntrezGeneUploader(uploader.MergerSourceUploader):

    name = "entrez_gene"
    main_source = "entrez"
    ENTREZ_GENEDOC_ROOT = True

    def load_data(self, data_folder):
        self.parser = GeneInfoParser(data_folder)
        self.parser.set_all_species()
        genedoc_d = self.parser.load()
        return genedoc_d

    def get_geneid_d(self,*args,**kwargs):
        return get_geneid_d(self.data_folder, *args, **kwargs)

    def post_update_data(self,*args,**kwargs):
        self.logger.info('Uploading "geneid_d" to GridFS...')
github biothings / mygene.info / src / hub / dataload / sources / entrez / accession_upload.py View on Github external
from .parser import Gene2AccessionParser
import biothings.hub.dataload.uploader as uploader

class EntrezAccessionUploader(uploader.MergerSourceUploader):

    name = "entrez_accession"
    main_source = "entrez"

    def load_data(self, data_folder):
        self.parser = Gene2AccessionParser(data_folder)
        self.parser.set_all_species()
        gene2acc = self.parser.load()
        return gene2acc

    @classmethod
    def get_mapping(klass):
        mapping = {
                "accession": {
                    "dynamic": False,
                    #make both fields, accession.rna and rna, work
github biothings / mygene.info / src / hub / dataload / sources / ensembl / prosite_upload.py View on Github external
from .parser import EnsemblParser
import biothings.hub.dataload.uploader as uploader

class EnsemblPrositeUploader(uploader.MergerSourceUploader):

    name = "ensembl_prosite"
    main_source = "ensembl"
    __metadata__ = {"mapper" : 'ensembl2entrez'}

    def load_data(self, data_folder):
        ep = EnsemblParser(self.main_source, data_folder)
        ensembl2prosite = ep.load_ensembl2prosite()
        return ensembl2prosite

    @classmethod
    def get_mapping(klass):
        mapping = {
            "prosite": {
                "type": "keyword",
                "normalizer" : "keyword_lowercase_normalizer",