How to use the jcvi.apps.base.sh function in jcvi

To help you get started, we’ve selected a few jcvi examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tanghaibao / jcvi / assembly / preprocess.py View on Github external
@depends
def run_FastbAndQualb2Fastq(infile=None, outfile=None, rc=False):
    corr = op.basename(infile).rsplit(".", 1)[0]
    cmd = "FastbQualbToFastq HEAD_IN={0} HEAD_OUT={0}".format(corr)
    cmd += " PAIRED=False PHRED_OFFSET=33"
    if rc:
        cmd += " FLIP=True"
    sh(cmd)
github tanghaibao / jcvi / formats / cas.py View on Github external
p.add_option("-m", dest="multi", default=False, action="store_true",
        help="report multi-matches [default: %default]")
    opts, args = p.parse_args(args)

    if len(args) != 1:
        sys.exit(p.print_help())

    casfile, = args
    txtfile = casfile.replace(".cas", ".txt")
    assert op.exists(casfile)

    cmd = "assembly_table -n -s -p "
    if opts.multi:
        cmd += "-m "
    cmd += casfile
    sh(cmd, outfile=txtfile)

    return txtfile
github tanghaibao / jcvi / jcvi / apps / gmap.py View on Github external
def check_index(dbfile, supercat=False, go=True):
    if supercat:
        updated = False
        pf = dbfile.rsplit(".", 1)[0]
        supercatfile = pf + ".supercat"
        coordsfile = supercatfile + ".coords"
        if go and need_update(dbfile, supercatfile):
            cmd = "tGBS-Generate_Pseudo_Genome.pl"
            cmd += " -f {0} -o {1}".format(dbfile, supercatfile)
            sh(cmd)
            # Rename .coords file since gmap_build will overwrite it
            coordsbak = backup(coordsfile)
            updated = True
        dbfile = supercatfile + ".fasta"

    #dbfile = get_abs_path(dbfile)
    dbdir, filename = op.split(dbfile)
    if not dbdir:
        dbdir = "."
    dbname = filename.rsplit(".", 1)[0]
    safile = op.join(dbdir, "{0}/{0}.genomecomp".format(dbname))
    if dbname == filename:
        dbname = filename + ".db"

    if not go:
        return dbdir, dbname
github tanghaibao / jcvi / jcvi / formats / coords.py View on Github external
pctid = opts.pctid
    hitlen = opts.hitlen

    filename, = args
    if pctid == 0 and hitlen == 0:
        return filename

    pf, suffix = filename.rsplit(".", 1)
    outfile = "".join((pf, ".P{0}L{1}.".format(int(pctid), int(hitlen)), suffix))
    if not need_update(filename, outfile):
        return outfile

    if suffix == "delta":
        cmd = "delta-filter -i {0} -l {1} {2}".format(pctid, hitlen, filename)
        sh(cmd, outfile=outfile)
        return outfile

    fp = open(filename)
    fw = must_open(outfile, "w")
    for row in fp:
        try:
            c = CoordsLine(row)
        except AssertionError:
            continue

        if c.identity < pctid:
            continue
        if c.len2 < hitlen:
            continue
        if opts.overlap and not c.overlap:
            continue
github tanghaibao / jcvi / assembly / preprocess.py View on Github external
opts, args = p.parse_args(args)

    if len(args) not in (1, 2):
        sys.exit(not p.print_help())

    path = op.expanduser(opts.path)
    url = \
    "http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-{0}.zip"\
    .format(tv)

    if not op.exists(path):
        path = download(url)
        TrimUnzipped = "Trimmomatic-" + tv
        if not op.exists(TrimUnzipped):
            sh("unzip " + path)
        os.remove(path)
        path = op.join(TrimUnzipped, TrimJar)

    assert op.exists(path), \
        "Couldn't find Trimmomatic jar file at `{0}`".\
        format(path)

    adaptersfile = "adapters.fasta"
    write_file(adaptersfile, Adapters, skipcheck=True)

    assert op.exists(adaptersfile), \
        "Please place the illumina adapter sequence in `{0}`".\
        format(adaptersfile)

    if opts.phred is None:
        offset = guessoffset([args[0]])
github tanghaibao / jcvi / jcvi / assembly / automaton.py View on Github external
"""
    from jcvi.assembly.soap import prepare

    logging.debug("Work on {0} ({1})".format(pf, ','.join(p)))
    asm = "{0}.closed.scafSeq".format(pf)
    if not need_update(p, asm):
        logging.debug("Assembly found: {0}. Skipped.".format(asm))
        return

    slink(p, pf, tag, extra)

    cwd = os.getcwd()
    os.chdir(pf)
    prepare(sorted(glob("*.fastq") + glob("*.fastq.gz")) + \
            ["--assemble_1st_rank_only", "-K 31"])
    sh("./run.sh")
    sh("cp asm31.closed.scafSeq ../{0}".format(asm))

    logging.debug("Assembly finished: {0}".format(asm))
    os.chdir(cwd)
github tanghaibao / jcvi / jcvi / annotation / train.py View on Github external
mkdir(augdir)
    os.chdir(augdir)
    target = "{0}/config/species/{1}".format(mhome, species)

    if op.exists(target):
        logging.debug("Removing existing target `{0}`".format(target))
        sh("rm -rf {0}".format(target))
        
    config_path = "{0}/config".format(mhome)
    sh("{0}/scripts/new_species.pl --species={1} --AUGUSTUS_CONFIG_PATH={2}".format(mhome, species, config_path))
    sh("{0}/scripts/gff2gbSmallDNA.pl {1} {2} 1000 raw.gb".\
            format(mhome, gffile, fastafile))
    sh("{0}/bin/etraining --species={1} raw.gb 2> train.err".\
            format(mhome, species))
    sh("cat train.err | perl -pe 's/.*in sequence (\S+): .*/$1/' > badgenes.lst")
    sh("{0}/scripts/filterGenes.pl badgenes.lst raw.gb > training.gb".\
            format(mhome))
    sh("grep -c LOCUS raw.gb training.gb")

    # autoAugTrain failed to execute, disable for now
    if opts.autotrain:
        sh("rm -rf {0}".format(target))
        sh("{0}/scripts/autoAugTrain.pl --trainingset=training.gb --species={1}".\
                format(mhome, species))

    os.chdir(cwd)
    sh("cp -r {0} augustus/".format(target))
github tanghaibao / jcvi / jcvi / apps / align.py View on Github external
def run_formatdb(infile=None, outfile=None, dbtype="nucl"):
    cmd = "makeblastdb"
    cmd += " -dbtype {0} -in {1}".format(dbtype, infile)
    sh(cmd)
github tanghaibao / jcvi / assembly / preprocess.py View on Github external
if need_update(filtfastb, editfastb):
        cmd = "FindErrors HEAD_IN={0} HEAD_OUT={1}".format(filt, edit)
        cmd += " PLOIDY_FILE=data/ploidy"
        cmd += nthreads
        sh(cmd)

    corr = datadir + "/{0}_corr".format(tag)
    corrfastb = corr + ".fastb"
    if need_update(editfastb, corrfastb):
        cmd = "CleanCorrectedReads DELETE=True"
        cmd += " HEAD_IN={0} HEAD_OUT={1}".format(edit, corr)
        cmd += " PLOIDY_FILE={0}/ploidy".format(datadir)
        if haploidify:
            cmd += " HAPLOIDIFY=True"
        cmd += nthreads
        sh(cmd)

    pf = op.basename(corr)

    cwd = os.getcwd()
    os.chdir(datadir)
    corrfastq = pf + ".fastq"
    run_FastbAndQualb2Fastq(infile=op.basename(corrfastb), outfile=corrfastq)
    os.chdir(cwd)

    pairsfile = pf + ".pairs"
    fragsfastq = pf + ".corr.fastq"
    run_pairs(infile=[op.join(datadir, pairsfile), op.join(datadir, corrfastq)],
                      outfile=fragsfastq)
github tanghaibao / jcvi / jcvi / assembly / kmer.py View on Github external
def merylhistogram(merylfile):
    """
    Run meryl to dump histogram to be used in kmer.histogram(). The merylfile
    are the files ending in .mcidx or .mcdat.
    """
    pf, sf = op.splitext(merylfile)
    outfile = pf + ".histogram"
    if need_update(merylfile, outfile):
        cmd = "meryl -Dh -s {0}".format(pf)
        sh(cmd, outfile=outfile)

    return outfile