How to use the hub.dataload.uploader.SnpeffPostUpdateUploader function in hub

To help you get started, we’ve selected a few hub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biothings / myvariant.info / src / hub / dataload / sources / dbsnp / dbsnp_upload.py View on Github external
from .dbsnp_json_parser import load_data_file
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader


SRC_META = {
        "url" : "https://www.ncbi.nlm.nih.gov/projects/SNP/",
        "license_url" : "https://www.ncbi.nlm.nih.gov/home/about/policies/",
        "license_url_short": "http://bit.ly/2AqoLOc"
        }


class DBSNPBaseUploader(uploader.IgnoreDuplicatedSourceUploader,
                    uploader.ParallelizedSourceUploader,
                    SnpeffPostUpdateUploader):

    def jobs(self):
        files = glob.glob(os.path.join(self.data_folder,"refsnp-chr*.json.bz2"))
        return [(f,) for f in files]

    def load_data(self,input_file):
        self.logger.info("Load data from '%s'",input_file)
        return load_data_file(input_file,self.__class__.__metadata__["assembly"])

    def post_update_data(self, *args, **kwargs):
        super(DBSNPBaseUploader,self).post_update_data(*args,**kwargs)
        self.logger.info("Indexing 'rsid'")
        # background=true or it'll lock the whole database...
        self.collection.create_index("dbsnp.rsid",background=True)

    @classmethod
github biothings / myvariant.info / src / hub / dataload / sources / gwassnps / gwassnps_upload.py View on Github external
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader

class GwassnpsUploader(uploader.DummySourceUploader,SnpeffPostUpdateUploader):

    name = "gwassnps"
    __metadata__ = {
        "mapper" : 'observed',
        "assembly" : "hg19",
        "src_meta" : {
            "url" : "http://www.ebi.ac.uk/gwas/",
            "license_url" : "http://www.ebi.ac.uk/gwas/docs/about",
            "license_url_short": "http://bit.ly/2M3dDMC"
        }
    }

    @classmethod
    def get_mapping(klass):
        mapping = {
            "gwassnps": {
github biothings / myvariant.info / src / hub / dataload / sources / evs / evs_upload.py View on Github external
import zipfile

from .evs_parser import load_data
import biothings.hub.dataload.storage as storage
from hub.dataload.uploader import SnpeffPostUpdateUploader


# common to both hg19 and hg38
SRC_META = {
    "url" : "http://evs.gs.washington.edu/EVS/",
    "license_url" : "http://evs.gs.washington.edu/EVS/",
    "license_url_short": "http://bit.ly/2QAcvkh"
}


class EVSBaseUploader(SnpeffPostUpdateUploader):

    storage_class = storage.IgnoreDuplicatedStorage

    def load_data(self,data_folder):
        #self.prepare()
        self.logger.info("Load data from '%s'" % data_folder)
        return load_data(data_folder,
                         self.__class__.__metadata__["assembly"])

    @classmethod
    def get_mapping(klass):
        mapping = {
            "evs": {
                "properties": {
                    "chrom": {
                        "type": "text"
github biothings / myvariant.info / src / hub / dataload / sources / gnomad / gnomad_upload.py View on Github external
import biothings.hub.dataload.uploader as uploader

from .gnomad_parser_genomes import load_data as load_data_genomes
from .gnomad_parser_exomes import load_data as load_data_exomes
from .mapping import exomes_mapping, genomes_mapping
from hub.dataload.uploader import SnpeffPostUpdateUploader
from hub.dataload.storage import MyVariantIgnoreDuplicatedStorage

SRC_META = {
    "url" : "http://gnomad.broadinstitute.org",
    "license_url" : "http://gnomad.broadinstitute.org/terms",
    "license_url_short": "http://bit.ly/2I1cl1I",
    "license" : "ODbL"
}

class GnomadBaseUploader(SnpeffPostUpdateUploader):

    storage_class = MyVariantIgnoreDuplicatedStorage


class GnomadBaseHg19Uploader(GnomadBaseUploader):
    __metadata__ = {"mapper" : 'observed',
            "assembly" : "hg19",
            "src_meta" : SRC_META
            }


class GnomadBaseHg38Uploader(GnomadBaseUploader):
    __metadata__ = {"mapper" : 'observed',
            "assembly" : "hg38",
            "src_meta" : SRC_META
            }
github biothings / myvariant.info / src / hub / dataload / sources / dbnsfp / dbnsfp_upload.py View on Github external
from .dbnsfp_parser import load_data_file as load_common
import biothings.hub.dataload.uploader as uploader
from biothings.hub.dataload.storage import IgnoreDuplicatedStorage
from hub.dataload.uploader import SnpeffPostUpdateUploader
from hub.dataload.storage import MyVariantIgnoreDuplicatedStorage


SRC_META = {
    "url" : "https://sites.google.com/site/jpopgen/dbNSFP",
    "license_url" : "https://sites.google.com/site/jpopgen/dbNSFP",
    "license_url_short": "http://bit.ly/2VLnQBz"
}


class DBNSFPBaseUploader(uploader.ParallelizedSourceUploader,
                         SnpeffPostUpdateUploader):

    storage_class = MyVariantIgnoreDuplicatedStorage
    GLOB_PATTERN = "dbNSFP*_variant.chr*"

    @classmethod
    def get_mapping(klass):
        mapping = {
            "dbnsfp": {
                "properties": {
                    "chrom": {
                        "type": "keyword",
                        "normalizer": "keyword_lowercase_normalizer"
                    },
                    "hg19": {
                        "properties": {
                            "start": {
github biothings / myvariant.info / src / hub / dataload / sources / mutdb / mutdb_upload.py View on Github external
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader

class MutDBUploader(uploader.DummySourceUploader,SnpeffPostUpdateUploader):
    """Originally downloaded from: http://www.mutdb.org/"""

    name = "mutdb"
    __metadata__ = {
        "mapper" : 'observed',
        "assembly" : "hg19",
        "src_meta" : {
            "url" : "http://www.mutdb.org/",
            "license_url" : "http://www.mutdb.org/",
            "license_url_short": "http://bit.ly/2SQ6fXA"
        }
    }

    @classmethod
    def get_mapping(klass):
        mapping = {
github biothings / myvariant.info / src / hub / dataload / sources / grasp / grasp_upload.py View on Github external
import os
import glob
import zipfile

from .grasp_parser import load_data
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader
from hub.dataload.storage import MyVariantIgnoreDuplicatedStorage


class GraspUploader(SnpeffPostUpdateUploader):

    name = "grasp"
    storage_class = MyVariantIgnoreDuplicatedStorage
    __metadata__ = {"mapper" : 'observed',
            "assembly" : "hg19",
            "src_meta" : {
                "url" : "https://grasp.nhlbi.nih.gov/Updates.aspx",
                "license_url" : "https://grasp.nhlbi.nih.gov/Terms.aspx",
                "license_url_short": "http://bit.ly/2RJN30i"
                }
            }

    def load_data(self,data_folder):
        # there's one zip there, let's get the zipped filename
        zgrasp = glob.glob(os.path.join(data_folder,"*.zip"))
        if len(zgrasp) != 1:
github biothings / myvariant.info / src / hub / dataload / sources / geno2mp / geno2mp_upload.py View on Github external
import os
import glob
import zipfile

from .geno2mp_parser import load_data
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader

class Geno2MPUploader(SnpeffPostUpdateUploader):

    name = "geno2mp"
    __metadata__ = {
        "mapper" : 'observed',
        "assembly" : "hg19",
        "src_meta" : {
            "url" : "http://geno2mp.gs.washington.edu",
            "license_url" : "http://geno2mp.gs.washington.edu/Geno2MP/#/terms",
            "license_url_short": "http://bit.ly/2QyGCIN"
        }
    }

    def load_data(self,data_folder):
        # there's one vcf file there, let's get it
        input_file = glob.glob(os.path.join(data_folder,"*.vcf"))
        if len(input_file) != 1:
github biothings / myvariant.info / src / hub / dataload / sources / snpedia / snpedia_upload.py View on Github external
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader

class SnpediaUploader(uploader.DummySourceUploader,SnpeffPostUpdateUploader):
    """Originally downloaded from: http://www.snpedia.org/"""

    name = "snpedia"
    __metadata__ = {
        "mapper" : 'observed',
        "assembly" : "hg19",
        "src_meta" : {
            "url" : "https://www.snpedia.com/",
            "license" : "CC BY-NC-SA",
            "license_url" : "https://www.snpedia.com/index.php/SNPedia:General_disclaimer",
            "license_url_short": "http://bit.ly/2VJ3TeR"
        }
    }

    @classmethod
    def get_mapping(klass):
github biothings / myvariant.info / src / hub / dataload / sources / cgi / cgi_upload.py View on Github external
import os

from hub.dataload.uploader import SnpeffPostUpdateUploader
from .cgi_parser import load_data


class CGIUploader(SnpeffPostUpdateUploader):
    name = "cgi"
    __metadata__ = {
        "mapper": 'observed',
        "assembly": "hg19",
        "src_meta": {
            "url": "https://www.cancergenomeinterpreter.org/home",
            "license_url": "https://creativecommons.org/publicdomain/zero/1.0/",
            "license_url_short": "http://bit.ly/2FqS871",
            "licence": "CC0 1.0 Universal"
        }
    }

    def load_data(self,data_folder):
        # there's one vcf file there, let's get it
        input_file = os.path.join(data_folder,"cgi_biomarkers_per_variant.tsv")
        assert os.path.exists(input_file), "Can't find input file '%s'" % input_file