How to use snakemake - 10 common examples

To help you get started, we’ve selected a few snakemake examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github broadinstitute / viral-ngs / test / unit / test_snake.py View on Github external
def call_snakemake(workdir, targets=None):
    return snakemake.snakemake(
        os.path.join(workdir, 'Snakefile'),
        configfile=os.path.join(workdir, 'config.yaml'),
        workdir=workdir,
        dryrun=True,
        targets=targets)
github emanuega / MERlin / test / test_snakemake.py View on Github external
'parameters': {'dependencies': ['Task1']}},
        {'task': 'SimpleParallelAnalysisTask',
         'module': 'testtask',
         'analysis_name': 'Task3',
         'parameters': {'dependencies': ['Task2']}}
    ]}

    generator = snakewriter.SnakefileGenerator(taskDict, simple_merfish_data)
    workflow = generator.generate_workflow()
    outputTask1 = simple_merfish_data.load_analysis_task('Task1')
    outputTask2 = simple_merfish_data.load_analysis_task('Task2')
    outputTask3 = simple_merfish_data.load_analysis_task('Task3')
    assert not outputTask1.is_complete()
    assert not outputTask2.is_complete()
    assert not outputTask3.is_complete()
    snakemake.snakemake(workflow)
    assert outputTask1.is_complete()
    assert outputTask2.is_complete()
    assert outputTask3.is_complete()

    shutil.rmtree('.snakemake')
github percyfal / pytest-ngsfixtures / pytest_ngsfixtures / helpers.py View on Github external
def make_targets(rules, config, application, **kw):
    TARGETS = []
    all_versions = get_versions(config[application])
    for r in rules:
        p = {}
        if not r.name.startswith(application.replace("-", "_")):
            continue
        for label, out in r.output.items():
            if "{end}" in str(out):
                p['end'] = config[application][r.name].get("_end", kw['end'])
            if "{version}" in str(out):
                versions = get_versions(config[application][r.name], all_versions)
                p['version'] = versions
            TMP = expand(out, **p)
            TARGETS = TARGETS + list(TMP)
    return TARGETS
github percyfal / snakemake-rules / tests / helpers / utils.py View on Github external
all_files = []
        for wc, filename in inputmap:
            try:
                wc = eval(wc)
            except:
                pass
            wc = update_wildcard_constraints(wc, wildcard_constraints, {})
            all_wc.append(wc)
            if filename is None:
                continue
            if isinstance(filename, str):
                filename = [filename]
            all_files = all_files + filename
        for f in all_files:
            for wc in all_wc:
                wildcards = glob_wildcards(wc, [os.path.basename(f)])
                for k, v in wildcards._asdict().items():
                    if len(v) > 0:
                        d[k] = v[0]
    except:
        logger.debug("Failed to get wildcards for inputmap ", inputmap)
        raise
    return d
github lcdb / lcdb-wf / wrappers / wrappers / sicer / wrapper.py View on Github external
redundancy_threshold = get_value('redundancy_threshold')
window_size = get_value('window_size')
fragment_size = get_value('fragment_size')
effective_genome_fraction = get_value('effective_genome_fraction', 'reference_effective_genome_fraction')
gap_size = get_value('gap_size')
fdr = get_value('fdr')
genome_build = get_value('genome_build', 'reference_genome_build')

outdir, basebed = os.path.split(snakemake.output.bed)
label = snakemake.params.block['label']

tmpdir = tempfile.mkdtemp()
cwd = os.getcwd()

# SICER expects bed input format, not bam as in other peak callers
shell(
    'bamToBed -i {snakemake.input.ip} > {tmpdir}/ip.bed ; '
    'bamToBed -i {snakemake.input.control} > {tmpdir}/in.bed '
)

# SICER emits a single hard-coded file that does not respect output directory.
# So move each run into its own temp directory to avoid collisions with
# other processes.
os.chdir(tmpdir)

shell(
    # there is a CI-specific bug, in which the python symlink is not correctly resolved to python2.7;
    # so as a really desperate hack, modify SICER's python calls to directly touch 2.7
    """sed 's/^python/$CONDA_PREFIX\/bin\/python2.7/' """
    """$CONDA_PREFIX/share/sicer*/SICER.sh > {tmpdir}/SICER.sh && chmod u+x {tmpdir}/SICER.sh """
)
shell(
github lcdb / lcdb-wf / wrappers / wrappers / macs2 / backgrounds / wrapper.py View on Github external
lock_file.close()
                break
            
            cmds = (
                'macs2 predictd '
                '-i {ip_bam} '
                '{genome_count_flag} '
                '-m {mfold_lower} {mfold_upper} '
                '-f BAM '
            )
            shell(cmds + ' {log}')
            inlog = log.split(' ')[-2]
            d_estimate_temporary_file = tempfile.NamedTemporaryFile().name
            cmds = ('awk \'/predicted fragment length is/ {{print "{mfold_lower} {mfold_upper} "$(NF-1)}}\' '
                    '< {inlog} > {d_estimate_temporary_file}')
            shell(cmds)
            with open(d_estimate_temporary_file, "r") as f:
                lines = f.readlines()
                # try to get access to d estimate file
                with open(d_estimate_file, "a") as d_file:
                    # if there was less than one matching line from the awk extraction,
                    # it's because there was an MFOLD failure. Fall back to default extsize.
                    if len(lines) < 1:
                        d_file.write("{0} {1} {2}\n".format(mfold_lower, mfold_upper, d_estimate_default_value))
                    else:
                        d_file.write("{0} {1} {2}\n".format(mfold_lower, mfold_upper, lines[0].strip()))
            cmds = ('awk \'/tag size is determined as/ {{print $(NF-1)}}\' < {inlog} > {read_length_file}')
            shell(cmds)
            shell("rm {0}".format(d_estimate_temporary_file))

            # release the d estimate lock
            fcntl.flock(lock_file, fcntl.LOCK_UN)
github metagenome-atlas / atlas / atlas / cluster / slurm / slurm-submit.py View on Github external
"-t", "--time", help="time limit")
slurm_parser.add_argument(
    "--wrap", help="wrap command string in a sh script and submit")
slurm_parser.add_argument(
    "-C", "--constraint", help="specify a list of constraints")
slurm_parser.add_argument(
    "--mem", help="minimum amount of real memory")

args = parser.parse_args()

if args.help:
    parser.print_help()
    sys.exit(0)

jobscript = sys.argv[-1]
job_properties = read_job_properties(jobscript)

extras = ""
if args.positional:
    for m in args.positional:
        if m is not None:
            extras = extras + " " + m

arg_dict = dict(args.__dict__)


# Process resources
if "resources" in job_properties:
    resources = job_properties["resources"]
    if arg_dict["time"] is None:
        if "runtime" in resources:
            arg_dict["time"] = resources["runtime"]
github spraakbanken / sparv-pipeline / sparv / core / snake_utils.py View on Github external
rule.missing_config.update(annotation.expand_variables(rule.full_name))
                export_annotations[i] = (annotation, export_name)
                plain_name, attr = annotation.split()
                if not attr:
                    plain_annotations.add(plain_name)
                else:
                    possible_plain_annotations.add(plain_name)
            # Add plain annotations where needed
            for a in possible_plain_annotations.difference(plain_annotations):
                export_annotations.append((annotation_type(a), None))

            for annotation, export_name in export_annotations:
                if param.default.is_input:
                    if param_type == ExportAnnotationsAllDocs:
                        rule.inputs.extend(
                            expand(escape_wildcards(paths.annotation_dir / get_annotation_path(annotation.name)),
                                   doc=get_source_files(storage.source_files)))
                    else:
                        rule.inputs.append(paths.annotation_dir / get_annotation_path(annotation.name))
                rule.parameters[param_name].append((annotation, export_name))
        # Corpus
        elif param.annotation == Corpus:
            rule.parameters[param_name] = Corpus(sparv_config.get("metadata.id"))
        # Language
        elif param.annotation == Language:
            rule.parameters[param_name] = Language(sparv_config.get("metadata.language"))
        # Document
        elif param.annotation == Document:
            rule.docs.append(param_name)
        # AllDocuments (all source documents)
        elif param_type == AllDocuments:
            rule.parameters[param_name] = AllDocuments(get_source_files(storage.source_files))
github chenfeiwang / MAESTRO / MAESTRO / scATAC_report.py View on Github external
report_html_tempfile = os.path.join(SCRIPT_PATH, "html", "scATAC_template.html")
report_html_temp = open(report_html_tempfile, "r").read()

# readdistrplot_link = '''"Plot/%s_scATAC_read_distr.png"'''%outpre
# fragplot_link = '''"Plot/%s_scATAC_fragment_size.png"'''%outpre
# mapplot_link = '''"Plot/%s_scATAC_mapping_summary.png"'''%outpre
# fripplot_link = '''"Plot/%s_scATAC_cell_filtering.png"'''%outpre
# peakcluster_link = '''"Plot/%s_cluster.png"'''%outpre
# rpannotate_link = '''"Plot/%s_annotated.png"'''%outpre
# bulkqc_file = "Result/QC/%s_bam_stat.txt"%outpre
cluster_regulator_file = "Result/Analysis/%s.PredictedTFTop10.txt"%outpre



fragplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_fragment_size.png"%outpre)[0]
# mapplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_mapping_summary.png"%outpre)[0]
fripplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_cell_filtering.png"%outpre)[0]
peakcluster_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_cluster.png"%outpre)[0]
rpannotate_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_annotated.png"%outpre)[0]
readdistrplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scATAC_read_distr.png"%outpre)[0]

# line_id = 0
# total,mapped,duplicate,mito,uniq,promoters = 0,0,0,0,0,0
# for line in open(bulkqc_file).readlines():
#     line = line.strip().split(' ')
#     line_id += 1
#     if line_id == 1:
#         total = int(line[0])
#     if line_id == 5:
#         mapped = int(line[0])
#     if line_id == 4:
github chenfeiwang / MAESTRO / MAESTRO / scRNA_report.py View on Github external
items_str_list = ["                                                            " + i + "" for i in items]
            items_str = "                                                        \n" + "\n".join(items_str_list) + "\n                                                        "
            td_list.append(items_str)
    td_str = "\n".join(td_list)

    #"totalreads":stat_list[0],"dupreads":stat_list[1],"mapreads":stat_list[2],"maptags":stat_list[3],"exontags":stat_list[4],"introntags":stat_list[5],
    report_html_instance = report_html_temp % {"outprefix":outpre, "fastqdir":fastqdir, "species":species,"platform":platform, "readdistr":readdistrplot_link,"readqual":readqualplot_link, "nvc":nvcplot_link, "gc":gcplot_link, "genecov":genecovplot_link, "countgene":countgeneplot_link, "genecluster":genecluster_link, "geneannotate":geneannotate_link, "regtable":td_str}

elif platform == "10x-genomics":
    report_html_tempfile = os.path.join(SCRIPT_PATH, "html", "scRNA_10x_noqc_template.html")
    report_html_temp = open(report_html_tempfile, "r").read()

    cluster_regulator_file = "Result/Analysis/%s.PredictedTFTop10.txt"%outpre

    readdistrplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scRNA_read_distr.png"%outpre)[0]
    countgeneplot_link = snakemake.report.data_uri_from_file("Result/QC/%s_scRNA_cell_filtering.png"%outpre)[0]
    genecluster_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_cluster.png"%outpre)[0]
    geneannotate_link = snakemake.report.data_uri_from_file("Result/Analysis/%s_annotated.png"%outpre)[0]

    td_list = []
    for line in open(cluster_regulator_file,"r").readlines():
        if not line.startswith("Cluster"):
            items = line.strip().split("\t")
            items_str_list = ["                                                            " + i + "" for i in items]
            items_str = "                                                        \n" + "\n".join(items_str_list) + "\n                                                        "
            td_list.append(items_str)
    td_str = "\n".join(td_list)

    #"totalreads":stat_list[0],"dupreads":stat_list[1],"mapreads":stat_list[2],"maptags":stat_list[3],"exontags":stat_list[4],"introntags":stat_list[5],
    report_html_instance = report_html_temp % {"outprefix":outpre, "fastqdir":fastqdir, "species":species,"platform":platform, "readdistr":readdistrplot_link, "countgene":countgeneplot_link, "genecluster":genecluster_link, "geneannotate":geneannotate_link, "regtable":td_str}