Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
from .dbsnp_json_parser import load_data_file
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader
SRC_META = {
"url" : "https://www.ncbi.nlm.nih.gov/projects/SNP/",
"license_url" : "https://www.ncbi.nlm.nih.gov/home/about/policies/",
"license_url_short": "http://bit.ly/2AqoLOc"
}
class DBSNPBaseUploader(uploader.IgnoreDuplicatedSourceUploader,
uploader.ParallelizedSourceUploader,
SnpeffPostUpdateUploader):
def jobs(self):
files = glob.glob(os.path.join(self.data_folder,"refsnp-chr*.json.bz2"))
return [(f,) for f in files]
def load_data(self,input_file):
self.logger.info("Load data from '%s'",input_file)
return load_data_file(input_file,self.__class__.__metadata__["assembly"])
def post_update_data(self, *args, **kwargs):
super(DBSNPBaseUploader,self).post_update_data(*args,**kwargs)
self.logger.info("Indexing 'rsid'")
# background=true or it'll lock the whole database...
self.collection.create_index("dbsnp.rsid",background=True)
@classmethod
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader
class GwassnpsUploader(uploader.DummySourceUploader,SnpeffPostUpdateUploader):
name = "gwassnps"
__metadata__ = {
"mapper" : 'observed',
"assembly" : "hg19",
"src_meta" : {
"url" : "http://www.ebi.ac.uk/gwas/",
"license_url" : "http://www.ebi.ac.uk/gwas/docs/about",
"license_url_short": "http://bit.ly/2M3dDMC"
}
}
@classmethod
def get_mapping(klass):
mapping = {
"gwassnps": {
import zipfile
from .evs_parser import load_data
import biothings.hub.dataload.storage as storage
from hub.dataload.uploader import SnpeffPostUpdateUploader
# common to both hg19 and hg38
SRC_META = {
"url" : "http://evs.gs.washington.edu/EVS/",
"license_url" : "http://evs.gs.washington.edu/EVS/",
"license_url_short": "http://bit.ly/2QAcvkh"
}
class EVSBaseUploader(SnpeffPostUpdateUploader):
storage_class = storage.IgnoreDuplicatedStorage
def load_data(self,data_folder):
#self.prepare()
self.logger.info("Load data from '%s'" % data_folder)
return load_data(data_folder,
self.__class__.__metadata__["assembly"])
@classmethod
def get_mapping(klass):
mapping = {
"evs": {
"properties": {
"chrom": {
"type": "text"
import biothings.hub.dataload.uploader as uploader
from .gnomad_parser_genomes import load_data as load_data_genomes
from .gnomad_parser_exomes import load_data as load_data_exomes
from .mapping import exomes_mapping, genomes_mapping
from hub.dataload.uploader import SnpeffPostUpdateUploader
from hub.dataload.storage import MyVariantIgnoreDuplicatedStorage
SRC_META = {
"url" : "http://gnomad.broadinstitute.org",
"license_url" : "http://gnomad.broadinstitute.org/terms",
"license_url_short": "http://bit.ly/2I1cl1I",
"license" : "ODbL"
}
class GnomadBaseUploader(SnpeffPostUpdateUploader):
storage_class = MyVariantIgnoreDuplicatedStorage
class GnomadBaseHg19Uploader(GnomadBaseUploader):
__metadata__ = {"mapper" : 'observed',
"assembly" : "hg19",
"src_meta" : SRC_META
}
class GnomadBaseHg38Uploader(GnomadBaseUploader):
__metadata__ = {"mapper" : 'observed',
"assembly" : "hg38",
"src_meta" : SRC_META
}
from .dbnsfp_parser import load_data_file as load_common
import biothings.hub.dataload.uploader as uploader
from biothings.hub.dataload.storage import IgnoreDuplicatedStorage
from hub.dataload.uploader import SnpeffPostUpdateUploader
from hub.dataload.storage import MyVariantIgnoreDuplicatedStorage
SRC_META = {
"url" : "https://sites.google.com/site/jpopgen/dbNSFP",
"license_url" : "https://sites.google.com/site/jpopgen/dbNSFP",
"license_url_short": "http://bit.ly/2VLnQBz"
}
class DBNSFPBaseUploader(uploader.ParallelizedSourceUploader,
SnpeffPostUpdateUploader):
storage_class = MyVariantIgnoreDuplicatedStorage
GLOB_PATTERN = "dbNSFP*_variant.chr*"
@classmethod
def get_mapping(klass):
mapping = {
"dbnsfp": {
"properties": {
"chrom": {
"type": "keyword",
"normalizer": "keyword_lowercase_normalizer"
},
"hg19": {
"properties": {
"start": {
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader
class MutDBUploader(uploader.DummySourceUploader,SnpeffPostUpdateUploader):
"""Originally downloaded from: http://www.mutdb.org/"""
name = "mutdb"
__metadata__ = {
"mapper" : 'observed',
"assembly" : "hg19",
"src_meta" : {
"url" : "http://www.mutdb.org/",
"license_url" : "http://www.mutdb.org/",
"license_url_short": "http://bit.ly/2SQ6fXA"
}
}
@classmethod
def get_mapping(klass):
mapping = {
import os
import glob
import zipfile
from .grasp_parser import load_data
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader
from hub.dataload.storage import MyVariantIgnoreDuplicatedStorage
class GraspUploader(SnpeffPostUpdateUploader):
name = "grasp"
storage_class = MyVariantIgnoreDuplicatedStorage
__metadata__ = {"mapper" : 'observed',
"assembly" : "hg19",
"src_meta" : {
"url" : "https://grasp.nhlbi.nih.gov/Updates.aspx",
"license_url" : "https://grasp.nhlbi.nih.gov/Terms.aspx",
"license_url_short": "http://bit.ly/2RJN30i"
}
}
def load_data(self,data_folder):
# there's one zip there, let's get the zipped filename
zgrasp = glob.glob(os.path.join(data_folder,"*.zip"))
if len(zgrasp) != 1:
import os
import glob
import zipfile
from .geno2mp_parser import load_data
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader
class Geno2MPUploader(SnpeffPostUpdateUploader):
name = "geno2mp"
__metadata__ = {
"mapper" : 'observed',
"assembly" : "hg19",
"src_meta" : {
"url" : "http://geno2mp.gs.washington.edu",
"license_url" : "http://geno2mp.gs.washington.edu/Geno2MP/#/terms",
"license_url_short": "http://bit.ly/2QyGCIN"
}
}
def load_data(self,data_folder):
# there's one vcf file there, let's get it
input_file = glob.glob(os.path.join(data_folder,"*.vcf"))
if len(input_file) != 1:
import biothings.hub.dataload.uploader as uploader
from hub.dataload.uploader import SnpeffPostUpdateUploader
class SnpediaUploader(uploader.DummySourceUploader,SnpeffPostUpdateUploader):
"""Originally downloaded from: http://www.snpedia.org/"""
name = "snpedia"
__metadata__ = {
"mapper" : 'observed',
"assembly" : "hg19",
"src_meta" : {
"url" : "https://www.snpedia.com/",
"license" : "CC BY-NC-SA",
"license_url" : "https://www.snpedia.com/index.php/SNPedia:General_disclaimer",
"license_url_short": "http://bit.ly/2VJ3TeR"
}
}
@classmethod
def get_mapping(klass):
import os
from hub.dataload.uploader import SnpeffPostUpdateUploader
from .cgi_parser import load_data
class CGIUploader(SnpeffPostUpdateUploader):
name = "cgi"
__metadata__ = {
"mapper": 'observed',
"assembly": "hg19",
"src_meta": {
"url": "https://www.cancergenomeinterpreter.org/home",
"license_url": "https://creativecommons.org/publicdomain/zero/1.0/",
"license_url_short": "http://bit.ly/2FqS871",
"licence": "CC0 1.0 Universal"
}
}
def load_data(self,data_folder):
# there's one vcf file there, let's get it
input_file = os.path.join(data_folder,"cgi_biomarkers_per_variant.tsv")
assert os.path.exists(input_file), "Can't find input file '%s'" % input_file