How to use the biothings.config_for_app function in biothings

To help you get started, we’ve selected a few biothings examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biothings / myvariant.info / src / hub / dataload / sources / dbsnp / dbsnp_dump.py View on Github external
import os
import os.path
import sys, re
import time
from datetime import datetime

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper

class DBSNPDumper(FTPDumper):
    SRC_NAME = "dbsnp"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
    FTP_HOST = 'ftp.ncbi.nlm.nih.gov'
    CWD_DIR = '/snp/latest_release/JSON'
    VERSIONS_DIR = '/snp/archive'
    FILE_RE = 'refsnp-chr*.json.bz2'
    MAX_PARALLEL_DUMP = 10

    SCHEDULE = "0 9 * * *"

    def set_release(self):
github biothings / mychem.info / src / hub / dataload / sources / umls / umls_dump.py View on Github external
import os
import os.path
import sys
import time

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper
from biothings.utils.common import unzipall


class UMLSDumper(ManualDumper):

    SRC_NAME = "umls"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
    #VERSION = '2020-4-7'
github biothings / mygene.info / src / hub / dataload / sources / umls / dump.py View on Github external
import os
import os.path
import sys
import time

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper
from biothings.utils.common import unzipall


class UMLSDumper(ManualDumper):

    SRC_NAME = "umls"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)

    def __init__(self, *args, **kwargs):
        super(UMLSDumper,self).__init__(*args,**kwargs)
        self.logger.info("""
Assuming manual download from: https://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html
- umls-2017AA-full.zip
github biothings / mygene.info / src / hub / dataload / sources / uniprot / dump.py View on Github external
import os
import os.path
import sys
import time
from datetime import datetime

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper, FilesystemDumper
from biothings.utils.hub_db import get_src_dump


class UniprotDumper(FTPDumper):

    SRC_NAME = "uniprot"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
    FTP_HOST = 'ftp.uniprot.org'
    CWD_DIR = '/pub/databases/uniprot/current_release/knowledgebase/idmapping'

    SCHEDULE = "30 7 * * *"

    def get_newest_info(self):
github biothings / mychem.info / src / hub / dataload / sources / sider / sider_dump.py View on Github external
import os
import sys
import time
import ftplib
import re
import pandas as pd

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import FTPDumper, DumperException
from biothings.utils.common import gunzipall


class SiderDumper(FTPDumper):

    SRC_NAME = "sider"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
    FTP_HOST = 'xi.embl.de'
    CWD_DIR = '/SIDER'
    SCHEDULE = "0 12 * * *"

    def get_release(self):
        # only dir with dates
github biothings / mygene.info / src / hub / dataload / sources / entrez / dump.py View on Github external
import os
import os.path
import sys
import time
from datetime import datetime

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper


class EntrezGeneDumper(FTPDumper):

    SRC_NAME = "entrez"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
    FTP_HOST = 'ftp.ncbi.nih.gov'
    CWD_DIR = '/gene/DATA'

    SCHEDULE = "0 22  * * 6"

    def get_newest_info(self):
        res = self.client.sendcmd("MDTM gene_info.gz") # pick one, assuming all other on the same data
github biothings / myvariant.info / src / dataload / sources / cadd / cadd_dump.py View on Github external
import os
import os.path
import sys
import time

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper


class CADDDumper(ManualDumper):

    SRC_NAME = "cadd"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)

    def __init__(self, *args, **kwargs):
        super(CADDDumper,self).__init__(*args,**kwargs)
        self.logger.info("""
Assuming manual download from: http://cadd.gs.washington.edu
of files (.tsv.gz and .tsv.gz.tbi) looking like:
- HumanExome-12v1-1_A_inclAn"no"
github biothings / mygene.info / src / dataload / data_dump / dl_exac.py View on Github external
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
import os
import os.path
import time
from datetime import datetime
from ftplib import FTP

import biothings, config
biothings.config_for_app(config)

from biothings.utils.common import ask, timesofar, safewfile, setup_logfile
from biothings.utils.hipchat import hipchat_msg
from biothings.utils.mongo import get_src_dump
from config import DATA_ARCHIVE_ROOT, logger as logging


timestamp = time.strftime('%Y%m%d')
DATA_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, 'by_resources/exac', timestamp)

FTP_SERVER = 'ftp.broadinstitute.org'
DATAFILES_PATH = [
        'pub/ExAC_release/current/functional_gene_constraint/fordist_cleaned_exac_nonTCGA_z_pli_rec_null_data.txt',
        'pub/ExAC_release/current/functional_gene_constraint/fordist_cleaned_exac_r03_march16_z_pli_rec_null_data.txt',
        'pub/ExAC_release/current/functional_gene_constraint/fordist_cleaned_nonpsych_z_pli_rec_null_data.txt'
        ]
github biothings / myvariant.info / src / dataload / sources / emv / emv_dump.py View on Github external
import os
import os.path
import sys
import time

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT
from biothings.hub.dataload.dumper import ManualDumper


class EMVDumper(ManualDumper):

    SRC_NAME = "emv"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)

    def __init__(self, *args, **kwargs):
        super(EMVDumper,self).__init__(*args,**kwargs)
        self.logger.info("""
Assuming manual download from: http://geneticslab.emory.edu/emvclass/emvclass.php
- EmVClass.*.csv
""")
github biothings / mygene.info / src / hub / dataload / sources / homologene / dump.py View on Github external
import os
import os.path
import sys
import time

import biothings, config
biothings.config_for_app(config)

from config import DATA_ARCHIVE_ROOT, logger as logging
from biothings.hub.dataload.dumper import FTPDumper


class HomologeneDumper(FTPDumper):

    SRC_NAME = "homologene"
    SRC_ROOT_FOLDER = os.path.join(DATA_ARCHIVE_ROOT, SRC_NAME)
    FTP_HOST = 'ftp.ncbi.nih.gov'
    CWD_DIR = '/pub/HomoloGene/current'

    SCHEDULE = "0 6 * * *"

    def get_newest_info(self):
        rel = None