Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# filePath = filePath.replace("'","").replace('"','').replace("}","").replace("{","")
try:
dxlink = json.loads(filePath.strip("'"))
except:
dxlink = None
if project != None:
try:
if dxlink != None:
dxfile = dxpy.get_handler(dxlink,project=project)
else:
dxfile = dxpy.get_handler(filePath,project=project)
except:
try:
dxlink = dxpy.dxlink(filePath,project=project)
dxfile = dxpy.get_handler(dxlink)
except:
try:
proj_id = env_get_current_project_id()
dxfile = dxpy.DXFile(filePath,project=proj_id)
except:
sys.stderr.write('ERROR: unable to find file "' + filePath + '": \n')
sys.exit(0) # Do not error on tool run in dx script
else:
try:
if dxlink != None:
dxfile = dxpy.get_handler(dxlink)
else:
dxfile = dxpy.get_handler(filePath)
if align_input.get('reads'):
del align_input['reads']
elif align_input.get('reads_1'):
del align_input['reads_1']
del align_input['reads_2']
stage_id = wf.add_stage(find_applet_by_name(star_step, applets_project_id), stage_input=align_input, folder=experiment)
star_annotation_output = dxpy.dxlink({
'stage': stage_id,
'outputField': 'annotation_bam'
})
del align_input['star_index']
align_input['tophat_index'] = prep_tophat_output
stage_id = wf.add_stage(find_applet_by_name(th_step, applets_project_id), stage_input=align_input, folder=experiment)
tophat_genome_output = dxpy.dxlink({
'stage': stage_id,
'outputField': 'genome_bam'
})
## above file is needed for bam-to-bigwig (unimplemented)
#'quant-rsem'
quant_input = {
'rsem_index': prep_rsem_output,
'annotation_bam': star_annotation_output,
'paired': inputs['paired'],
'stranded': inputs['stranded'],
'nthreads': inputs['nthreads']
}
if not export:
quant_input['read_prefix'] = index_prefix
quant_input['rnd_seed'] = inputs['rnd_seed']
if line[5] == '+':
UTR_type = "3' UTR"
else:
UTR_type = "5' UTR"
UTR_size = (min(blockSizes[i], exon_hi - thickEnd))
UTR_start = blockStarts[i] + thickEnd - exon_lo
span.add_row(generate_gene_row(line,
UTR_size,
UTR_start,
UTR_type,
default_row,
current_parent_id,
current_span_id))
current_span_id += 1
return dxpy.dxlink(span.get_id())
idr_stages.append({'name': 'IDR Pooled Pseudoreplicates', 'stage_id': idr_stage_id})
stage_input = {
'reps_peaks' : dxpy.dxlink(
{'stage': next(ss.get('stage_id') for ss in idr_stages if ss['name'] == 'IDR True Replicates'),
'outputField': 'IDR_peaks'}),
'r1pr_peaks' : dxpy.dxlink(
{'stage': next(ss.get('stage_id') for ss in idr_stages if ss['name'] == 'IDR Rep 1 Self-pseudoreplicates'),
'outputField': 'IDR_peaks'}),
'r2pr_peaks' : dxpy.dxlink(
{'stage': next(ss.get('stage_id') for ss in idr_stages if ss['name'] == 'IDR Rep 2 Self-pseudoreplicates'),
'outputField': 'IDR_peaks'}),
'pooledpr_peaks': dxpy.dxlink(
{'stage': next(ss.get('stage_id') for ss in idr_stages if ss['name'] == 'IDR Pooled Pseudoreplicates'),
'outputField': 'IDR_peaks'}),
'as_file': dxpy.dxlink(resolve_file(args.narrowpeak_as))
}
if blacklist:
stage_input.update({'blacklist': blacklist})
if chrom_sizes:
stage_input.update({'chrom_sizes': chrom_sizes})
else:
stage_input.update({'chrom_sizes': dxpy.dxlink({'stage': encode_spp_stage_id, 'inputField': 'chrom_sizes'})})
idr_stage_id = workflow.add_stage(
encode_idr_applet,
name='Final IDR peak calls',
folder=idr_output_folder,
stage_input=stage_input
)
idr_stages.append({'name': 'Final IDR peak calls', 'stage_id': idr_stage_id})
if not (args.nomap):
genome = find_reference_file_by_name(GENOME_REFERENCES[inputs['organism']][inputs['gender']]['genome'], ENCODE_REFERENCES_PROJECT)
rsem_genome = find_reference_file_by_name(GENOME_REFERENCES[inputs['organism']]['m']['genome'], ENCODE_REFERENCES_PROJECT)
# RSEM always takes male genome
index_prefix = inputs['spec_name']
#'merge-annotation',
if export:
merge_input = {}
else:
merge_input = {
'gene_annotation': gene_annotation,
'trna_annotation': trna_annotation,
'spike_in': spike_in
}
stage_id = wf.add_stage(find_applet_by_name('merge-annotation', applets_project_id), stage_input=merge_input, folder=experiment)
merge_output = dxpy.dxlink({
'stage': stage_id,
'outputField': 'combined_gtf'
})
#'prep-star', if export:
prep_input = {
'annotations': merge_output
}
if not export:
prep_input['genome'] = genome
prep_input['spike_in'] = spike_in
prep_input['index_prefix'] = index_prefix
stage_id = wf.add_stage(find_applet_by_name('prep-star', applets_project_id), stage_input=prep_input, folder=experiment)
prep_star_output = dxpy.dxlink({
'stage': stage_id,
'outputField': 'star_index'
})
else:
entry.append('')
spansTable.add_rows([entry])
spanId += 1
if hasGenes:
types = ["Genes", "gri"]
else:
types = ["Spans", "gri"]
for x in additional_type:
types.append(x)
spansTable.add_types(types)
spansTable.flush()
spansTable.close()
print(spansTable.get_id())
job_outputs = dxpy.dxlink(spansTable.get_id())
return job_outputs
rep1_stage_id = next(ss.get('map_stage_id') for ss in mapping_superstages if ss['name'] == 'Rep1')
for mapping_superstage in mapping_superstages:
superstage_name = mapping_superstage.get('name')
superstage_id = mapping_superstage.get('map_stage_id')
if mapping_superstage.get('input_args') or blank_workflow:
mapping_stage_input = {}
if superstage_name != "Rep1":
mapping_stage_input.update(
{'reference_tar': dxpy.dxlink(
{'stage': rep1_stage_id,
'inputField': 'reference_tar'})})
else:
if args.reference:
mapping_stage_input.update(
{'reference_tar': dxpy.dxlink(
reference_tar.get_id())})
if not blank_workflow:
for arg_index, input_arg in enumerate(mapping_superstage['input_args']): #read pairs assumed be in order read1,read2
reads = dxpy.dxlink(resolve_file(input_arg).get_id())
mapping_stage_input.update({'reads%d' %(arg_index+1): reads})
# this is now done in the first pass loop above
# mapped_stage_id = workflow.add_stage(
# mapping_applet,
# name='Map %s' %(superstage_name),
# folder=mapping_output_folder,
# stage_input=mapping_stage_input
# )
# mapping_superstage.update({'map_stage_id': mapped_stage_id})
workflow.update_stage(superstage_id, stage_input=mapping_stage_input)
filter_qc_stage_input = {
ctl_rep2_ta = dxpy.dxlink(
{'stage' : next(ss.get('xcor_stage_id') for ss in mapping_superstages if ss['name'] == 'Ctl2'),
'outputField': 'tagAlign_file'})
else:
ctl_rep2_ta = ctl_rep1_ta
rep1_paired_end = dxpy.dxlink(
{'stage': next(ss.get('xcor_stage_id') for ss in mapping_superstages if ss['name'] == 'Rep1'),
'outputField': 'paired_end'})
rep2_paired_end = dxpy.dxlink(
{'stage': next(ss.get('xcor_stage_id') for ss in mapping_superstages if ss['name'] == 'Rep2'),
'outputField': 'paired_end'})
else: #skipped the mapping, so just bring in the inputs from arguments
exp_rep1_ta = dxpy.dxlink(resolve_file(args.rep1[0]).get_id())
exp_rep2_ta = dxpy.dxlink(resolve_file(args.rep2[0]).get_id())
ctl_rep1_ta = dxpy.dxlink(resolve_file(args.ctl1[0]).get_id())
ctl_rep2_ta = dxpy.dxlink(resolve_file(args.ctl2[0]).get_id())
rep1_paired_end = args.rep1pe
rep2_paired_end = args.rep2pe
#here we need to calculate the cc scores files, because we're only being supplied tagAligns
#if we had mapped everything above we'd already have a handle to the cc file
xcor_only_applet = find_applet_by_name(XCOR_ONLY_APPLET_NAME, applet_project.get_id())
xcor_output_folder = resolve_folder(output_project, output_folder + '/' + xcor_only_applet.name)
xcor_only_stages = []
exp_rep1_cc_stage_id = workflow.add_stage(
xcor_only_applet,
name="Rep1 cross-correlation",
folder=xcor_output_folder,
stage_input={
'input_tagAlign': exp_rep1_ta,
'paired_end': rep1_paired_end
#'prep-star', if export:
prep_input = {
'annotations': merge_output
}
if not export:
prep_input['genome'] = genome
prep_input['spike_in'] = spike_in
prep_input['index_prefix'] = index_prefix
stage_id = wf.add_stage(find_applet_by_name('prep-star', applets_project_id), stage_input=prep_input, folder=experiment)
prep_star_output = dxpy.dxlink({
'stage': stage_id,
'outputField': 'star_index'
})
#'prep-tophat',
stage_id = wf.add_stage(find_applet_by_name('prep-tophat', applets_project_id), stage_input=prep_input, folder=experiment)
prep_tophat_output = dxpy.dxlink({
'stage': stage_id,
'outputField': 'tophat_index'
})
#'prep-rsem',
if not export:
prep_input['genome'] = rsem_genome
## overwrite with male only
stage_id = wf.add_stage(find_applet_by_name('prep-rsem', applets_project_id), stage_input=prep_input, folder=experiment)
prep_rsem_output = dxpy.dxlink({
'stage': stage_id,
'outputField': 'rsem_index'
})
## alignment steps
align_input = {
'outputField': 'CC_scores_file'})
ctl_rep1_ta = dxpy.dxlink(
{'stage' : next(ss.get('xcor_stage_id') for ss in mapping_superstages if ss['name'] == 'Ctl1'),
'outputField': 'tagAlign_file'})
ctl_rep2_ta = dxpy.dxlink(
{'stage' : next(ss.get('xcor_stage_id') for ss in mapping_superstages if ss['name'] == 'Ctl2'),
'outputField': 'tagAlign_file'})
rep1_paired_end = dxpy.dxlink(
{'stage': next(ss.get('xcor_stage_id') for ss in mapping_superstages if ss['name'] == 'Rep1'),
'outputField': 'paired_end'})
rep2_paired_end = dxpy.dxlink(
{'stage': next(ss.get('xcor_stage_id') for ss in mapping_superstages if ss['name'] == 'Rep2'),
'outputField': 'paired_end'})
else: #skipped the mapping, so just bring in the inputs from arguments
exp_rep1_ta = dxpy.dxlink(resolve_file(args.rep1[0]).get_id())
exp_rep2_ta = dxpy.dxlink(resolve_file(args.rep2[0]).get_id())
ctl_rep1_ta = dxpy.dxlink(resolve_file(args.ctl1[0]).get_id())
ctl_rep2_ta = dxpy.dxlink(resolve_file(args.ctl2[0]).get_id())
rep1_paired_end = args.rep1pe
rep2_paired_end = args.rep2pe
#here we need to calculate the cc scores files, because we're only being supplied tagAligns
#if we had mapped everything above we'd already have a handle to the cc file
xcor_only_applet = find_applet_by_name(XCOR_ONLY_APPLET_NAME, applet_project.get_id())
xcor_output_folder = resolve_folder(output_project, output_folder + '/' + xcor_only_applet.name)
xcor_only_stages = []
exp_rep1_cc_stage_id = workflow.add_stage(
xcor_only_applet,
name="Rep1 cross-correlation",
folder=xcor_output_folder,
stage_input={