Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -------------------------------------------------------------------------
# Process user defined annotations
# -------------------------------------------------------------------------
# Stock all of the more_beds if needed for multiple overlaps
all_more_beds = list()
if more_bed is not None:
message("Processing user-defined regions (bed format).")
for bed_anno, bed_lab in zip(more_bed, more_bed_labels):
message("Processing " + str(bed_lab), type="INFO")
if not force_chrom_more_bed:
chrom_list = set()
for i in BedTool(bed_anno.name):
chrom_list.add(i.chrom)
for i in chrom_list:
if i not in chrom_len:
message("Chromosome " + str(
i) + " is undefined in --more-bed with label " + bed_lab + ". Maybe use --force-chrom-more-bed.",
type="ERROR")
else:
bed_anno_sub = make_tmp_file(prefix='more_bed_x_chrom_info' + bed_lab, suffix='.bed')
n = 0
for i in BedTool(bed_anno.name):
if i.chrom in chrom_len:
bed_anno_sub.write("\t".join(i.fields) + "\n")
n += 1
if n == 0:
peak_file_sub = make_tmp_file(prefix='peaks_x_chrom_info', suffix='.bed')
n = 0
for i in peak_file:
if i.chrom in chrom_len:
peak_file_sub.write("\t".join(i.fields) + "\n")
n += 1
peak_file_sub.close()
if n == 0:
message("The --peak-file file does not contain any genomic feature "
"falling in chromosomes declared in --chrom-info.",
type="ERROR")
peak_file = BedTool(peak_file_sub.name)
# -------------------------------------------------------------------------
# Sort and merge the peaks
# -------------------------------------------------------------------------
# Just in case it was not, sort and merge the file.
# In any case, it should be short compared to the
# expected total running time.
peak_file = peak_file.sort().merge()
# -------------------------------------------------------------------------
# Region exclusion
# -------------------------------------------------------------------------
# If there is an exclusion of certain regions to be done, do it.
# Here, we do exclusion on the peak file ('bedA') and the chrom sizes.
# Exclusion on the other bed files or gtf extracted bed files ('bedsB') is
message("The --more-bed file does not contain any genomic feature "
"falling in chromosomes declared in --chrom-info.",
type="ERROR")
bed_anno_sub.close()
bed_anno = bed_anno_sub
# Stock all bed annos if multiple overlap is needed
all_more_beds += [BedTool(bed_anno.name)]
tmp_bed = make_tmp_file(prefix=bed_lab, suffix=".bed")
bed_anno_tosave = BedTool(bed_anno.name)
bed_anno_tosave.saveas(tmp_bed.name)
hits[bed_lab] = overlap_partial(bedA=peak_file,
bedsB=BedTool(bed_anno.name),
ft_type=bed_lab)
# TODO : prepare another possibility, where an option such as
# `-all-more-beds-together` is used, we do the multiple overlap of the
# region with ALL of the more beds.
# Now, bedsB can be a list !
"""
hits['multiple_beds'] = overlap_partial(bedA=peak_file, bedsB=all_more_beds)
"""
# ------------------ Treating the 'hits' dictionary --------------------- #
if len(hits) == 0:
message("No feature found.", type="ERROR")
### Print the 'hits' dictionary into a tabulated file
labels += [
os.path.splitext(
os.path.basename(
bw_list[i]))[0]]
# -------------------------------------------------------------------------
# Check input file is in bed or GTF format
#
# -------------------------------------------------------------------------
message("Loading input file...")
if inputfile.name == '':
gtf = GTF(inputfile.name)
is_gtf = True
else:
region_bo = BedTool(inputfile.name)
if len(region_bo) == 0:
message("Unable to find requested regions",
type="ERROR")
if region_bo.file_type == 'gff':
gtf = GTF(inputfile.name)
is_gtf = True
else:
is_gtf = False
# -------------------------------------------------------------------------
# Get regions of interest
#
# -------------------------------------------------------------------------
if is_gtf:
n = 0
for i in BedTool(bed_anno.name):
if i.chrom in chrom_len:
bed_anno_sub.write("\t".join(i.fields) + "\n")
n += 1
if n == 0:
message("The --more-bed file does not contain any genomic feature "
"falling in chromosomes declared in --chrom-info.",
type="ERROR")
bed_anno_sub.close()
bed_anno = bed_anno_sub
# Stock all bed annos if multiple overlap is needed
all_more_beds += [BedTool(bed_anno.name)]
tmp_bed = make_tmp_file(prefix=bed_lab, suffix=".bed")
bed_anno_tosave = BedTool(bed_anno.name)
bed_anno_tosave.saveas(tmp_bed.name)
hits[bed_lab] = overlap_partial(bedA=peak_file,
bedsB=BedTool(bed_anno.name),
ft_type=bed_lab)
# TODO : prepare another possibility, where an option such as
# `-all-more-beds-together` is used, we do the multiple overlap of the
# region with ALL of the more beds.
# Now, bedsB can be a list !
"""
hits['multiple_beds'] = overlap_partial(bedA=peak_file, bedsB=all_more_beds)
"""
if i.chrom in chrom_len:
bed_anno_sub.write("\t".join(i.fields) + "\n")
n += 1
if n == 0:
message("The --more-bed file does not contain any genomic feature "
"falling in chromosomes declared in --chrom-info.",
type="ERROR")
bed_anno_sub.close()
bed_anno = bed_anno_sub
# Stock all bed annos if multiple overlap is needed
all_more_beds += [BedTool(bed_anno.name)]
tmp_bed = make_tmp_file(prefix=bed_lab, suffix=".bed")
bed_anno_tosave = BedTool(bed_anno.name)
bed_anno_tosave.saveas(tmp_bed.name)
hits[bed_lab] = overlap_partial(bedA=peak_file,
bedsB=BedTool(bed_anno.name),
ft_type=bed_lab)
# TODO : prepare another possibility, where an option such as
# `-all-more-beds-together` is used, we do the multiple overlap of the
# region with ALL of the more beds.
# Now, bedsB can be a list !
"""
hits['multiple_beds'] = overlap_partial(bedA=peak_file, bedsB=all_more_beds)
"""
# ------------------ Treating the 'hits' dictionary --------------------- #