How to use the dxpy.entry_point function in dxpy

To help you get started, we’ve selected a few dxpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ENCODE-DCC / long-rna-seq-pipeline / dnanexus / mad-qc / src / mad-qc.py View on Github external
@dxpy.entry_point("main")
def main(quants_a, quants_b):

    # tool_versions.py --applet $script_name --appver $script_ver
    sw_versions = subprocess.check_output(['tool_versions.py', '--dxjson', 'dnanexus-executable.json'])

    dxfile_a = dxpy.DXFile(quants_a)
    dxfile_b = dxpy.DXFile(quants_b)

    print "* Downloading files..."
    dxpy.download_dxfile(dxfile_a.get_id(), "quants_a")
    dxpy.download_dxfile(dxfile_b.get_id(), "quants_b")

    # Create and appropriate name for output files
    out_root = root_name_from_pair(dxfile_a.name.split('.')[0],dxfile_b.name.split('.')[0])
    mad_plot_file = out_root + '_mad_plot.png'
github ENCODE-DCC / chip-seq-pipeline / dnanexus / xcor / src / xcor.py View on Github external
@dxpy.entry_point('main')
def main(input_bam, paired_end, spp_version):

    # The following line(s) initialize your data object inputs on the platform
    # into dxpy.DXDataObject instances that you can start using immediately.

    input_bam_file = dxpy.DXFile(input_bam)

    input_bam_filename = input_bam_file.name
    input_bam_basename = input_bam_file.name.rstrip('.bam')
    dxpy.download_dxfile(input_bam_file.get_id(), input_bam_filename)

    intermediate_TA_filename = input_bam_basename + ".tagAlign"
    if paired_end:
        end_infix = 'PE2SE'
    else:
        end_infix = 'SE'
github ENCODE-DCC / chip-seq-pipeline / dnanexus / encode_map / src / encode_map.py View on Github external
@dxpy.entry_point("postprocess")
def postprocess(indexed_reads, unmapped_reads, reference_tar,
                bwa_version, samtools_version, debug):

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    samtools = SAMTOOLS_PATH.get(samtools_version)
    assert samtools, "samtools version %s is not supported" % (samtools_version)
    bwa = BWA_PATH.get(bwa_version)
    assert bwa, "BWA version %s is not supported" % (bwa_version)
    logger.info("In postprocess with samtools %s and bwa %s" % (samtools, bwa))

    indexed_reads_filenames = []
    unmapped_reads_filenames = []
github ENCODE-DCC / chip-seq-pipeline / dnanexus / pseudoreplicator / src / pseudoreplicator.py View on Github external
@dxpy.entry_point('main')
def main(input_tags, prefix=None):

    input_tags_file = dxpy.DXFile(input_tags)

    input_tags_filename = input_tags_file.name
    dxpy.download_dxfile(input_tags_file.get_id(), input_tags_filename)

    # introspect the file to determine tagAlign (thus SE) or BEDPE (thus PE)
    # strip extension as appropriate

    subprocess.check_output('ls', shell=True)
    with gzip.open(input_tags_filename) as f:
        firstline = f.readline()
    logger.info('First line of input_tags:\n%s' % (firstline))

    se_cols = 6
github ENCODE-DCC / chip-seq-pipeline / dnanexus / accessionator / src / accessionator.py View on Github external
@dxpy.entry_point('main')
def main(folder_name, key_name, assembly, noupload, force, debug):

	#accessions bams contained within the folder named folder_name/bams

	#Requires
	#. directory structure folder_name/bams/ENCSRxxxabc/ ... /basename[.anything].bam
	#. basename contains one or more ENCFF numbers from which the bam is derived
	#. bam_filename.flagstat.qc exists
	#. raw bam flagstat file exists in folder_name/raw_bams/ENCSRxxxabc/ ... /basename[.anything].flagstat.qc

	#if bam file's tags on DNAnexus already contains and ENCFF number, assume it's already accessioned and skip
	#create a fully qualified project:filename for submitted_file_name and calculate the file size
	#if an ENCFF objects exists with the same submitted_file_name, AND it has the same size, skip

	#**INFER the experiment accession number from the bam's containing folder
	#calculate the md5
github ENCODE-DCC / chip-seq-pipeline / dnanexus / encode_map / src / encode_map.py View on Github external
@dxpy.entry_point("crop")
def crop(reads1_file, reads2_file, crop_length, debug):

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
        logger.setLevel(logging.INFO)

    logger.setLevel(logging.INFO)
    if crop_length == 'native':
        output = dict(zip(
            ["cropped_reads1", "cropped_reads2"], [reads1_file, reads2_file]))
    else:
        reads1_filename = dxpy.describe(reads1_file)['name']
        reads1_basename = strip_extensions(reads1_filename, STRIP_EXTENSIONS)
        dxpy.download_dxfile(reads1_file, reads1_filename)
        if reads2_file:
github ENCODE-DCC / chip-seq-pipeline / dnanexus / input_shield / src / input_shield.py View on Github external
@dxpy.entry_point('main')
def main(reads1, reads2, crop_length, reference_tar,
         bwa_aln_params, bwa_version, samtools_version,
         keyfile, debug, key=None):

    # reads1 and reads2 are expected to be an arrays of file identifiers
    # indentifiers can be DNAnexus files or ENCODE file accession numbers
    # For SE, reads2 is empty
    # For PE, len(reads1) = len(reads2)
    # Multiple PE pairs or SE files are just catted before mapping
    # Error on mixed SE/PE - although this can be implemented as just a
    # "" entry at that position in reads2 array
    # TODO: Add option to down-sample mixed PE/SE to SE

    if debug:
        logger.setLevel(logging.DEBUG)
    else:
github ENCODE-DCC / chip-seq-pipeline / dnanexus / scrub / src / scrub.py View on Github external
@dxpy.entry_point('main')
def main(input_bams):

    # Initialize data object inputs on the platform
    # into dxpy.DXDataObject instances.

    input_bams = [dxpy.DXFile(item) for item in input_bams]

    # Download each file input to a new directory in the the local file system
    # using variable names for the filenames.
    # Construct output filenames.
    # Dispatch jobs to a pool of workers.

    out_paths = []
    pool = Pool()  # default is pool of cpu_count() workers

    for i, bam in enumerate(input_bams):
github ENCODE-DCC / chip-seq-pipeline / dnanexus / idr / src / idr.py View on Github external
@dxpy.entry_point('main')
def main(rep1_peaks, rep2_peaks, pooled_peaks, idr_threshold, rank, interactive):

    # Initialize the data object inputs on the platform into
    # dxpy.DXDataObject instances.

    idr_version = 1

    rep1_peaks_file = dxpy.DXFile(rep1_peaks)
    rep2_peaks_file = dxpy.DXFile(rep2_peaks)
    pooled_peaks_file = dxpy.DXFile(pooled_peaks)

    rep1_peaks_filename = rep1_peaks_file.name
    rep2_peaks_filename = rep2_peaks_file.name
    pooled_peaks_filename = pooled_peaks_file.name

    # Download the file inputs to the local file system.