How to use the pygtftk.bedtool_extension.BedTool function in pygtftk

To help you get started, we’ve selected a few pygtftk examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dputhier / pygtftk / pygtftk / plugins / ologram.py View on Github external
# -------------------------------------------------------------------------
    # Process user defined annotations
    # -------------------------------------------------------------------------

    # Stock all of the more_beds if needed for multiple overlaps
    all_more_beds = list()

    if more_bed is not None:
        message("Processing user-defined regions (bed format).")
        for bed_anno, bed_lab in zip(more_bed, more_bed_labels):
            message("Processing " + str(bed_lab), type="INFO")

            if not force_chrom_more_bed:
                chrom_list = set()
                for i in BedTool(bed_anno.name):
                    chrom_list.add(i.chrom)

                for i in chrom_list:
                    if i not in chrom_len:
                        message("Chromosome " + str(
                            i) + " is undefined in --more-bed with label " + bed_lab + ". Maybe use --force-chrom-more-bed.",
                                type="ERROR")
            else:
                bed_anno_sub = make_tmp_file(prefix='more_bed_x_chrom_info' + bed_lab, suffix='.bed')

                n = 0
                for i in BedTool(bed_anno.name):
                    if i.chrom in chrom_len:
                        bed_anno_sub.write("\t".join(i.fields) + "\n")
                        n += 1
                if n == 0:
github dputhier / pygtftk / pygtftk / plugins / ologram.py View on Github external
peak_file_sub = make_tmp_file(prefix='peaks_x_chrom_info', suffix='.bed')

        n = 0
        for i in peak_file:
            if i.chrom in chrom_len:
                peak_file_sub.write("\t".join(i.fields) + "\n")
                n += 1

        peak_file_sub.close()

        if n == 0:
            message("The --peak-file file does not contain any genomic feature "
                    "falling in chromosomes declared in --chrom-info.",
                    type="ERROR")

        peak_file = BedTool(peak_file_sub.name)

    # -------------------------------------------------------------------------
    # Sort and merge the peaks
    # -------------------------------------------------------------------------
    # Just in case it was not, sort and merge the file.
    # In any case, it should be short compared to the
    # expected total running time.
    peak_file = peak_file.sort().merge()

    # -------------------------------------------------------------------------
    # Region exclusion
    # -------------------------------------------------------------------------

    # If there is an exclusion of certain regions to be done, do it.
    # Here, we do exclusion on the peak file ('bedA') and the chrom sizes.
    # Exclusion on the other bed files or gtf extracted bed files ('bedsB') is
github dputhier / pygtftk / pygtftk / plugins / ologram.py View on Github external
message("The --more-bed file does not contain any genomic feature "
                            "falling in chromosomes declared in --chrom-info.",
                            type="ERROR")

                bed_anno_sub.close()
                bed_anno = bed_anno_sub

                # Stock all bed annos if multiple overlap is needed
                all_more_beds += [BedTool(bed_anno.name)]

            tmp_bed = make_tmp_file(prefix=bed_lab, suffix=".bed")
            bed_anno_tosave = BedTool(bed_anno.name)
            bed_anno_tosave.saveas(tmp_bed.name)

            hits[bed_lab] = overlap_partial(bedA=peak_file,
                                            bedsB=BedTool(bed_anno.name),
                                            ft_type=bed_lab)

    # TODO : prepare another possibility, where an option such as
    # `-all-more-beds-together` is used,  we do the multiple overlap of the
    # region with ALL of the more beds.
    # Now, bedsB can be a list !
    """
    hits['multiple_beds'] = overlap_partial(bedA=peak_file, bedsB=all_more_beds)
    """

    # ------------------ Treating the 'hits' dictionary --------------------- #

    if len(hits) == 0:
        message("No feature found.", type="ERROR")

    ### Print the 'hits' dictionary into a tabulated file
github dputhier / pygtftk / pygtftk / plugins / cov_around_midpoints.py View on Github external
labels += [
                os.path.splitext(
                    os.path.basename(
                        bw_list[i]))[0]]

    # -------------------------------------------------------------------------
    # Check input file is in bed or GTF format
    #
    # -------------------------------------------------------------------------

    message("Loading input file...")
    if inputfile.name == '':
        gtf = GTF(inputfile.name)
        is_gtf = True
    else:
        region_bo = BedTool(inputfile.name)
        if len(region_bo) == 0:
            message("Unable to find requested regions",
                    type="ERROR")

        if region_bo.file_type == 'gff':
            gtf = GTF(inputfile.name)
            is_gtf = True
        else:
            is_gtf = False

    # -------------------------------------------------------------------------
    # Get regions of interest
    #
    # -------------------------------------------------------------------------

    if is_gtf:
github dputhier / pygtftk / pygtftk / plugins / ologram.py View on Github external
n = 0
                for i in BedTool(bed_anno.name):
                    if i.chrom in chrom_len:
                        bed_anno_sub.write("\t".join(i.fields) + "\n")
                        n += 1
                if n == 0:
                    message("The --more-bed file does not contain any genomic feature "
                            "falling in chromosomes declared in --chrom-info.",
                            type="ERROR")

                bed_anno_sub.close()
                bed_anno = bed_anno_sub

                # Stock all bed annos if multiple overlap is needed
                all_more_beds += [BedTool(bed_anno.name)]

            tmp_bed = make_tmp_file(prefix=bed_lab, suffix=".bed")
            bed_anno_tosave = BedTool(bed_anno.name)
            bed_anno_tosave.saveas(tmp_bed.name)

            hits[bed_lab] = overlap_partial(bedA=peak_file,
                                            bedsB=BedTool(bed_anno.name),
                                            ft_type=bed_lab)

    # TODO : prepare another possibility, where an option such as
    # `-all-more-beds-together` is used,  we do the multiple overlap of the
    # region with ALL of the more beds.
    # Now, bedsB can be a list !
    """
    hits['multiple_beds'] = overlap_partial(bedA=peak_file, bedsB=all_more_beds)
    """
github dputhier / pygtftk / pygtftk / plugins / ologram.py View on Github external
if i.chrom in chrom_len:
                        bed_anno_sub.write("\t".join(i.fields) + "\n")
                        n += 1
                if n == 0:
                    message("The --more-bed file does not contain any genomic feature "
                            "falling in chromosomes declared in --chrom-info.",
                            type="ERROR")

                bed_anno_sub.close()
                bed_anno = bed_anno_sub

                # Stock all bed annos if multiple overlap is needed
                all_more_beds += [BedTool(bed_anno.name)]

            tmp_bed = make_tmp_file(prefix=bed_lab, suffix=".bed")
            bed_anno_tosave = BedTool(bed_anno.name)
            bed_anno_tosave.saveas(tmp_bed.name)

            hits[bed_lab] = overlap_partial(bedA=peak_file,
                                            bedsB=BedTool(bed_anno.name),
                                            ft_type=bed_lab)

    # TODO : prepare another possibility, where an option such as
    # `-all-more-beds-together` is used,  we do the multiple overlap of the
    # region with ALL of the more beds.
    # Now, bedsB can be a list !
    """
    hits['multiple_beds'] = overlap_partial(bedA=peak_file, bedsB=all_more_beds)
    """

    # ------------------ Treating the 'hits' dictionary --------------------- #