Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@pytest.mark.parametrize("model_name", models_to_test(kipoi.get_source("kipoi")))
def test_model(model_name, caplog):
"""kipoi test ...
"""
caplog.set_level(logging.INFO)
source_name = "kipoi"
assert source_name == "kipoi"
env_name = conda_env_name(model_name, model_name, source_name)
env_name = "test-" + env_name # prepend "test-"
# if environment already exists, remove it
if env_exists(env_name):
print("Removing the environment: {0}".format(env_name))
remove_env(env_name)
def test_deepsea():
model = kipoi.get_model("DeepSEA/variantEffects")
mie = ModelInfoExtractor(model, SeqIntervalDl)
def test_var_eff_pred_varseq(tmpdir):
model_name = "DeepSEA/variantEffects"
if INSTALL_REQ:
install_model_requirements(model_name, "kipoi", and_dataloaders=True)
#
model = kipoi.get_model(model_name, source="kipoi")
# The preprocessor
Dataloader = SeqIntervalDl
#
dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
"fasta_file": "example_files/hg38_chr22.fa",
"required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str}
dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in
dataloader_arguments.items()}
vcf_path = "tests/data/variants.vcf"
out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf"))
#
vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path)
model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader)
writer = kipoi_veff.VcfWriter(
model, vcf_path, out_vcf_fpath, standardise_var_id=True)
def test_ref_seq():
# Get pure fasta predictions
model_dir = model_root + "./"
model = kipoi.get_model(model_dir, source="dir")
# The preprocessor
Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
dataloader_arguments = {
"fasta_file": "/nfs/research1/stegle/users/rkreuzhu/opt/manuscript_code/data/raw/dataloader_files/shared/hg19.fa",
"intervals_file": "test_files/test_encode_roadmap.bed"
}
# predict using results
preds = model.pipeline.predict(dataloader_arguments)
#
res_orig = pd.read_csv("/nfs/research1/stegle/users/rkreuzhu/deeplearning/Basset/data/test_encode_roadmap_short_pred.txt", "\t", header=None)
assert np.isclose(preds, res_orig.values, atol=1e-3).all()
def test_ref_seq():
# Get pure fasta predictions
model_dir = model_root + "./"
model = kipoi.get_model(model_dir, source="dir")
# The preprocessor
Dataloader = kipoi.get_dataloader_factory(model_dir, source="dir")
dataloader_arguments = {
"fasta_file": "/nfs/research1/stegle/users/rkreuzhu/opt/manuscript_code/data/raw/dataloader_files/shared/hg19.fa",
"intervals_file": "test_files/test_encode_roadmap.bed"
}
# predict using results
preds = model.pipeline.predict(dataloader_arguments)
#
res_orig = pd.read_csv("/nfs/research1/stegle/users/rkreuzhu/deeplearning/Basset/data/test_encode_roadmap_short_pred.txt", "\t", header=None)
assert np.isclose(preds, res_orig.values, atol=1e-3).all()
def test_model(model_name, caplog):
"""kipoi test ...
"""
caplog.set_level(logging.INFO)
source_name = "kipoi"
assert source_name == "kipoi"
env_name = conda_env_name(model_name, model_name, source_name)
env_name = "test-" + env_name # prepend "test-"
# if environment already exists, remove it
if env_exists(env_name):
print("Removing the environment: {0}".format(env_name))
remove_env(env_name)
# create the model test environment
args = ["kipoi", "env", "create",
"--source", source_name,
"--env", env_name,
model_name]
returncode = subprocess.call(args=args)
assert returncode == 0
if model_name == "basenji":
def test_tf_model():
tf.reset_default_graph()
input_nodes = "inputs"
target_nodes = "preds"
meta_graph = "model_files/model.tf.meta"
# meta_graph = 'model_files/model.tf-modified.meta'
checkpoint = "model_files/model.tf"
index = "model_files/model.tf.index"
pkl_file = "model_files/const_feed_dict.pkl"
from kipoi.model import TensorFlowModel
m = TensorFlowModel(input_nodes="inputs",
target_nodes="preds",
meta_graph=meta_graph,
checkpoint=checkpoint,
const_feed_dict_pkl=pkl_file)
ops = tf.get_default_graph().get_operations()
# TODO - modify the
out = tf.train.export_meta_graph(filename='model_files/model.tf-modified.meta', as_text=True)
ops[0].outputs[0].shape[0] = None
pops = [op.outputs[0] for op in ops
if op.type == "Placeholder" and
op.name.startswith("Placeholder")]
def test_var_eff_pred_varseq(tmpdir):
model_name = "DeepSEA/variantEffects"
if INSTALL_REQ:
install_model_requirements(model_name, "kipoi", and_dataloaders=True)
#
model = kipoi.get_model(model_name, source="kipoi")
# The preprocessor
Dataloader = SeqIntervalDl
#
dataloader_arguments = {"intervals_file": "example_files/intervals.bed",
"fasta_file": "example_files/hg38_chr22.fa",
"required_seq_len": 1000, "alphabet_axis": 1, "dummy_axis": 2, "label_dtype": str}
dataloader_arguments = {k: model.source_dir + "/" + v if isinstance(v, str) else v for k, v in
dataloader_arguments.items()}
vcf_path = "tests/data/variants.vcf"
out_vcf_fpath = str(tmpdir.mkdir("variants_generated", ).join("out.vcf"))
#
vcf_path = kipoi_veff.ensure_tabixed_vcf(vcf_path)
model_info = kipoi_veff.ModelInfoExtractor(model, Dataloader)
def __next__(self):
ss = next(self.exonGenerator)
out = {}
out['inputs'] = {}
seq = ss.get_seq(self.fasta).upper()
if self.split_seq:
seq = self.split(seq, ss.overhang)['exon'][0]
out['inputs']['input_3'] = seq
out['metadata'] = {}
out['metadata']['ranges'] = GenomicRanges(
ss.chrom,
ss.Exon_Start,
ss.Exon_End,
ss.transcript_id,
ss.strand)
return out
rf.get_file(output_file)
class BedToolLinecache(BedTool):
"""Faster BedTool accessor by Ziga Avsec
Normal BedTools loops through the whole file to get the
line of interest. Hence the access it o(n)
Note: this might load the whole bedfile into memory
"""
def __getitem__(self, idx):
line = linecache.getline(self.fn, idx + 1)
return pybedtools.create_interval_from_list(line.strip().split("\t"))
class SeqDataset(Dataset):
"""
Args:
intervals_file: bed3 file containing intervals
fasta_file: file path; Genome sequence
target_file: file path; path to the targets in the csv format
"""
SEQ_WIDTH = 1002
def __init__(self,
intervals_file,
fasta_file,
dnase_file,
use_linecache=True):
# intervals