How to use the snakemake.shell function in snakemake

To help you get started, we’ve selected a few snakemake examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github lcdb / lcdb-wf / wrappers / wrappers / sicer / wrapper.py View on Github external
redundancy_threshold = get_value('redundancy_threshold')
window_size = get_value('window_size')
fragment_size = get_value('fragment_size')
effective_genome_fraction = get_value('effective_genome_fraction', 'reference_effective_genome_fraction')
gap_size = get_value('gap_size')
fdr = get_value('fdr')
genome_build = get_value('genome_build', 'reference_genome_build')

outdir, basebed = os.path.split(snakemake.output.bed)
label = snakemake.params.block['label']

tmpdir = tempfile.mkdtemp()
cwd = os.getcwd()

# SICER expects bed input format, not bam as in other peak callers
shell(
    'bamToBed -i {snakemake.input.ip} > {tmpdir}/ip.bed ; '
    'bamToBed -i {snakemake.input.control} > {tmpdir}/in.bed '
)

# SICER emits a single hard-coded file that does not respect output directory.
# So move each run into its own temp directory to avoid collisions with
# other processes.
os.chdir(tmpdir)

shell(
    # there is a CI-specific bug, in which the python symlink is not correctly resolved to python2.7;
    # so as a really desperate hack, modify SICER's python calls to directly touch 2.7
    """sed 's/^python/$CONDA_PREFIX\/bin\/python2.7/' """
    """$CONDA_PREFIX/share/sicer*/SICER.sh > {tmpdir}/SICER.sh && chmod u+x {tmpdir}/SICER.sh """
)
shell(
github lcdb / lcdb-wf / wrappers / wrappers / macs2 / backgrounds / wrapper.py View on Github external
lock_file.close()
                break
            
            cmds = (
                'macs2 predictd '
                '-i {ip_bam} '
                '{genome_count_flag} '
                '-m {mfold_lower} {mfold_upper} '
                '-f BAM '
            )
            shell(cmds + ' {log}')
            inlog = log.split(' ')[-2]
            d_estimate_temporary_file = tempfile.NamedTemporaryFile().name
            cmds = ('awk \'/predicted fragment length is/ {{print "{mfold_lower} {mfold_upper} "$(NF-1)}}\' '
                    '< {inlog} > {d_estimate_temporary_file}')
            shell(cmds)
            with open(d_estimate_temporary_file, "r") as f:
                lines = f.readlines()
                # try to get access to d estimate file
                with open(d_estimate_file, "a") as d_file:
                    # if there was less than one matching line from the awk extraction,
                    # it's because there was an MFOLD failure. Fall back to default extsize.
                    if len(lines) < 1:
                        d_file.write("{0} {1} {2}\n".format(mfold_lower, mfold_upper, d_estimate_default_value))
                    else:
                        d_file.write("{0} {1} {2}\n".format(mfold_lower, mfold_upper, lines[0].strip()))
            cmds = ('awk \'/tag size is determined as/ {{print $(NF-1)}}\' < {inlog} > {read_length_file}')
            shell(cmds)
            shell("rm {0}".format(d_estimate_temporary_file))

            # release the d estimate lock
            fcntl.flock(lock_file, fcntl.LOCK_UN)
github karel-brinda / rnftools / rnftools / utils / __init__.py View on Github external
def shell(
		cmd,
		remove_spaces=True,
		async=False,
		iterable=False,
		read=False,
):
	if remove_spaces:
		# print("removing spaces from command")
		cmd = re.sub(r'[ \t\f\v]+', ' ', cmd).strip()

	return snakemake.shell(
		cmd=cmd,
		async=async,
		iterable=iterable,
		read=read,
	)
github sequana / sequana / sequana / scripts / mapping.py View on Github external
from sequana import FastQ
    from sequana import FastA
    S = 0
    for this in FastQ(options.file1):
        S += len(this['sequence'])
    if options.file2:
        for this in FastQ(options.file2):
            S += len(this['sequence'])
    ref = FastA(options.reference)
    coverage = float(S) / len(ref.sequences[0])
    print('Theoretical Depth of Coverage : %s' % coverage)

    params = {"reference": reference, "fastq": fastq, "thread": options.thread}

    # indexing
    shellcmd("bwa index %(reference)s " % params)
    cmd = "samtools faidx %(reference)s " % params

    # mapping
    cmd = "bwa mem -M "  # mark shorter split read as secondary; -M is not compulsary but recommended
    if options.pacbio:
        cmd += "-x pacbio "
    cmd += r" -t %(thread)s -R @RG\\tID:1\\tSM:1\\tPL:illumina -T 30 %(reference)s %(fastq)s  "

    # Samtools options:
    #   S:ignore input format
    #   h:include header
    #   b:bam output
    if options.sambamba is False:
        cmd += "| samtools view -Sbh | "
        # sorting BAM
        cmd += "samtools sort -@ %(thread)s -o %(reference)s.sorted.bam -"
github prophyle / prophyle / deprec / taxonomy_old_code / karel / build_index.py View on Github external
cmd = """
			"{assembler}" \
				-i "{i}" \
				-o "{o}" \
				-x "{x}" \
				-k {k} \
			""".format(
					assembler="../../bin/assembler",
					i='" -i "'.join(input_files),
					o='" -o "'.join(output_files),
					x=intersection_file,
					k=k,
				)
	logger.info('Cmd to be run: {}'.format(cmd))

	snakemake.shell(cmd)
	logger.info('Finished assembly')
github sequana / sequana / sequana / pacbio.py View on Github external
def _to_fastX(self, mode, output_filename, threads=2):
        """

        :param mode: fastq or fasta

        """
        # for now, we use samtools
        # can use bamtools as well but as long and output 10% larger (sequences
        # are split on 80-characters length)
        from snakemake import shell
        cmd = "samtools %s  -@ %s %s > %s" % (mode, threads,
            self.filename, output_filename)
        logger.info("Please be patient")
        logger.info("This may be long depending on your input data file: ")
        logger.info("typically, a minute per  500,000 reads")
        shell(cmd)
        logger.info("done")
github sunbeam-labs / sunbeam / sunbeamlib / igv.py View on Github external
def _control_script(igvcommands, igv_fp, igv_prefs):
        igvscript = tempfile.NamedTemporaryFile()
        igvscript.writelines(map(lambda x: bytes(x+'\n', 'ascii'), igvcommands))
        igvscript.flush()
        igvprefsfile = _write_prefs(igv_prefs)
        shell("xvfb-run -a -s '-screen 1 1920x1080x24' %s -o %s -b %s" % (igv_fp, igvprefsfile.name, igvscript.name))
github lcdb / lcdb-wf / wrappers / wrappers / sicer / wrapper.py View on Github external
# "library of raw redundancy-removed reads on significant islands
redundancy_removed = glob.glob(os.path.join(tmpdir, '*-W{0}-G{1}-FDR*-islandfiltered.bed'.format(window_size, gap_size)))
if len(redundancy_removed) == 1:
    redundancy_removed = redundancy_removed[0]
else:
    raise ValueError("SICER redundancy removed library file not found")

# "wig file for the island-filtered redundancy-removed reads
normalized_postfilter_wig = glob.glob(os.path.join(tmpdir, '*-W{0}-G{1}-FDR*-islandfiltered-normalized.wig'.format(window_size, gap_size)))
if len(normalized_postfilter_wig) == 1:
    normalized_postfilter_wig = normalized_postfilter_wig[0]
else:
    raise ValueError("SICER normalized postfilter wig file not found")

shell(
    "export LC_COLLATE=C; "
    # format the output in broadPeak format
github karel-brinda / smbl / smbl / __init__.py View on Github external
return [
			fastafile.get() for fastafile in smbl.fasta.get_registered_fastas()
		]

def all_programs():
	return [
			plugin.get_installation_files() for plugin in smbl.prog.plugins.get_registered_plugins()
		]

def all_compatible_programs():
	return [
			plugin.get_installation_files() for plugin in smbl.prog.plugins.get_registered_plugins()
				if plugin.is_platform_supported()
		]

snakemake.shell(
		"""
			mkdir -p "{}" "{}" "{}"
		""".format(bin_dir,fa_dir,src_dir)
	)
github lcdb / lcdb-wf / wrappers / wrappers / macs2 / bdgpeakcall / wrapper.py View on Github external
'-l {d_estimate} '
    '-g {read_length} '
    '-o {bed} '
)
shell(cmds + '{log}')

bed_cleaning_intermediate = tempfile.NamedTemporaryFile().name
# Fix the output file so that it doesn't have negative numbers and so it fits
# inside the genome
shell(
    """awk -F "\\t" '{{OFS="\\t"; print $1, "0", $2}}' """
    "{snakemake.input.chromsizes} "
    "> {bed_cleaning_intermediate}"
)
unsorted_bed_intermediate = tempfile.NamedTemporaryFile().name
shell(
    "export LC_COLLATE=C; "
    """awk -F "\\t" '{{OFS="\\t"; if (($2>0) && ($3>0)) print $0}}' {bed} | """
    "bedtools intersect -a - -b {bed_cleaning_intermediate} > {unsorted_bed_intermediate} "
    "&& bedSort {unsorted_bed_intermediate} {bed}"
)
shell("rm {0} {1} {2}".format(qvalue_intermediate,
                              bed_cleaning_intermediate,
                              unsorted_bed_intermediate))