How to use kipoi - 10 common examples

To help you get started, we’ve selected a few kipoi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github kipoi / models / tests / test_all_models.py View on Github external
@pytest.mark.parametrize("model_name", models_to_test(kipoi.get_source("kipoi")))
def test_model(model_name, caplog):
    """kipoi test ...
    """
    caplog.set_level(logging.INFO)

    source_name = "kipoi"
    assert source_name == "kipoi"

    env_name = conda_env_name(model_name, model_name, source_name)
    env_name = "test-" + env_name  # prepend "test-"

    # if environment already exists, remove it
    if env_exists(env_name):
        print("Removing the environment: {0}".format(env_name))
        remove_env(env_name)
github kipoi / kipoiseq / tests / dont_test_4_integration.py View on Github external
def test_deepsea():
    model = kipoi.get_model("DeepSEA/variantEffects")
    mie = ModelInfoExtractor(model, SeqIntervalDl)
github kipoi / kipoiseq / tests / dont_test_4_integration.py View on Github external
def test_var_eff_pred_varseq(tmpdir):
    model_name = "DeepSEA/variantEffects"
    if INSTALL_REQ:
        install_model_requirements(model_name, "kipoi", and_dataloaders=True)
    #
    model = kipoi.get_model(model_name, source="kipoi")
    # The preprocessor
    Dataloader = SeqIntervalDl
    #
    dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
                            "fasta_file": "example_files/hg38_chr22.fa",
                            "required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str}
    dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in
                            dataloader_arguments.items()}

    vcf_path = "tests/data/variants.vcf"
    out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf"))
    #
    vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path)
    model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader)
    writer = kipoi_veff.VcfWriter(
        model, vcf_path, out_vcf_fpath, standardise_var_id=True)
github kipoi / models / Basset / test_basset_model.py View on Github external
def test_ref_seq():
    # Get pure fasta predictions
    model_dir = model_root + "./"
    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    dataloader_arguments = {
        "fasta_file": "/nfs/research1/stegle/users/rkreuzhu/opt/manuscript_code/data/raw/dataloader_files/shared/hg19.fa",
        "intervals_file": "test_files/test_encode_roadmap.bed"
    }
    # predict using results
    preds = model.pipeline.predict(dataloader_arguments)
    #
    res_orig = pd.read_csv("/nfs/research1/stegle/users/rkreuzhu/deeplearning/Basset/data/test_encode_roadmap_short_pred.txt", "\t", header=None)
    assert np.isclose(preds, res_orig.values, atol=1e-3).all()
github kipoi / models / Basset / test_basset_model.py View on Github external
def test_ref_seq():
    # Get pure fasta predictions
    model_dir = model_root + "./"
    model = kipoi.get_model(model_dir, source="dir")
    # The preprocessor
    Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
    dataloader_arguments = {
        "fasta_file": "/nfs/research1/stegle/users/rkreuzhu/opt/manuscript_code/data/raw/dataloader_files/shared/hg19.fa",
        "intervals_file": "test_files/test_encode_roadmap.bed"
    }
    # predict using results
    preds = model.pipeline.predict(dataloader_arguments)
    #
    res_orig = pd.read_csv("/nfs/research1/stegle/users/rkreuzhu/deeplearning/Basset/data/test_encode_roadmap_short_pred.txt", "\t", header=None)
    assert np.isclose(preds, res_orig.values, atol=1e-3).all()
github kipoi / models / tests / test_all_models.py View on Github external
def test_model(model_name, caplog):
    """kipoi test ...
    """
    caplog.set_level(logging.INFO)

    source_name = "kipoi"
    assert source_name == "kipoi"

    env_name = conda_env_name(model_name, model_name, source_name)
    env_name = "test-" + env_name  # prepend "test-"

    # if environment already exists, remove it
    if env_exists(env_name):
        print("Removing the environment: {0}".format(env_name))
        remove_env(env_name)

    # create the model test environment
    args = ["kipoi", "env", "create",
            "--source", source_name,
            "--env", env_name,
            model_name]
    returncode = subprocess.call(args=args)
    assert returncode == 0

    if model_name == "basenji":
github kipoi / models / Basenji / test_model.py View on Github external
def test_tf_model():
    tf.reset_default_graph()
    input_nodes = "inputs"
    target_nodes = "preds"
    meta_graph = "model_files/model.tf.meta"
    # meta_graph = 'model_files/model.tf-modified.meta'
    checkpoint = "model_files/model.tf"
    index = "model_files/model.tf.index"
    pkl_file = "model_files/const_feed_dict.pkl"

    from kipoi.model import TensorFlowModel

    m = TensorFlowModel(input_nodes="inputs",
                        target_nodes="preds",
                        meta_graph=meta_graph,
                        checkpoint=checkpoint,
                        const_feed_dict_pkl=pkl_file)
    ops = tf.get_default_graph().get_operations()



    # TODO - modify the 
    out = tf.train.export_meta_graph(filename='model_files/model.tf-modified.meta', as_text=True)
    ops[0].outputs[0].shape[0] = None

    pops = [op.outputs[0] for op in ops
           if op.type == "Placeholder" and
           op.name.startswith("Placeholder")]
github kipoi / kipoiseq / tests / dont_test_4_integration.py View on Github external
def test_var_eff_pred_varseq(tmpdir):
    model_name = "DeepSEA/variantEffects"
    if INSTALL_REQ:
        install_model_requirements(model_name, "kipoi", and_dataloaders=True)
    #
    model = kipoi.get_model(model_name, source="kipoi")
    # The preprocessor
    Dataloader = SeqIntervalDl
    #
    dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
                            "fasta_file": "example_files/hg38_chr22.fa",
                            "required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str}
    dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in
                            dataloader_arguments.items()}

    vcf_path = "tests/data/variants.vcf"
    out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf"))
    #
    vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path)
    model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader)
github kipoi / models / MMSplice / modules / exon_5prime / dataloader.py View on Github external
def __next__(self):
        ss = next(self.exonGenerator)
        out = {}
        out['inputs'] = {}
        seq = ss.get_seq(self.fasta).upper()
        if self.split_seq:
            seq = self.split(seq, ss.overhang)['exon'][0]
        out['inputs']['input_3'] = seq

        out['metadata'] = {}
        out['metadata']['ranges'] = GenomicRanges(
            ss.chrom,
            ss.Exon_Start,
            ss.Exon_End,
            ss.transcript_id,
            ss.strand)

        return out
github kipoi / models / FactorNet / FOXA1 / multiTask_DGF / dataloader.py View on Github external
rf.get_file(output_file)


class BedToolLinecache(BedTool):
    """Faster BedTool accessor by Ziga Avsec
    Normal BedTools loops through the whole file to get the
    line of interest. Hence the access it o(n)
    Note: this might load the whole bedfile into memory
    """

    def __getitem__(self, idx):
        line = linecache.getline(self.fn, idx + 1)
        return pybedtools.create_interval_from_list(line.strip().split("\t"))


class SeqDataset(Dataset):
    """
    Args:
        intervals_file: bed3 file containing intervals
        fasta_file: file path; Genome sequence
        target_file: file path; path to the targets in the csv format
    """

    SEQ_WIDTH = 1002

    def __init__(self,
                 intervals_file,
                 fasta_file,
                 dnase_file,
                 use_linecache=True):

        # intervals