How to use the pyensembl.normalization.normalize_chromosome function in pyensembl

To help you get started, we’ve selected a few pyensembl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github openvax / pyensembl / test / test_locus.py View on Github external
assert normalize_chromosome("chrmt") == "chrMT"

    with assert_raises(TypeError):
        normalize_chromosome({"a": "b"})

    with assert_raises(TypeError):
        normalize_chromosome([])

    with assert_raises(TypeError):
        normalize_chromosome(None)

    with assert_raises(ValueError):
        normalize_chromosome("")

    with assert_raises(ValueError):
        normalize_chromosome(0)
github openvax / pyensembl / test / test_locus.py View on Github external
def test_normalize_chromosome():
    assert normalize_chromosome("X") == "X"
    assert normalize_chromosome("chrX") == "chrX"

    assert normalize_chromosome("x") == "X"
    assert normalize_chromosome("chrx") == "chrX"

    assert normalize_chromosome(1) == "1"
    assert normalize_chromosome("1") == "1"
    assert normalize_chromosome("chr1") == "chr1"

    assert normalize_chromosome("chrM") == "chrM"
    assert normalize_chromosome("chrMT") == "chrMT"
    assert normalize_chromosome("M") == "M"
    assert normalize_chromosome("MT") == "MT"
    assert normalize_chromosome("m") == "M"
    assert normalize_chromosome("chrm") == "chrM"
    assert normalize_chromosome("mt") == "MT"
    assert normalize_chromosome("chrmt") == "chrMT"

    with assert_raises(TypeError):
        normalize_chromosome({"a": "b"})

    with assert_raises(TypeError):
        normalize_chromosome([])
github openvax / pyensembl / pyensembl / gtf.py View on Github external
def dataframe(
            self,
            contig=None,
            feature=None,
            strand=None,
            save_to_disk=False):
        """
        Load genome entries as a DataFrame, optionally restricted to
        particular contig or feature type.
        """
        if contig:
            contig = normalize_chromosome(contig)

        if strand:
            strand = normalize_strand(strand)

        if feature is not None:
            require_string(feature, "feature")

        key = (contig, feature, strand)

        if key not in self._dataframes:
            def _construct_df():
                full_df = self._load_full_dataframe_cached()

                assert len(full_df) > 0, \
                    "Dataframe representation of genomic database empty!"
github openvax / pyensembl / pyensembl / locus.py View on Github external
def on_contig(self, contig):
        return normalize_chromosome(contig) == self.contig
github openvax / pyensembl / pyensembl / database.py View on Github external
contig,
            position,
            end=None,
            strand=None,
            distinct=False,
            sorted=False):
        """
        Get the non-null values of a column from the database
        at a particular range of loci
        """

        # TODO: combine with the query method, since they overlap
        # significantly
        require_string(column_name, "column_name", nonempty=True)

        contig = normalize_chromosome(contig)

        require_integer(position, "position")

        if end is None:
            end = position

        require_integer(end, "end")

        if not self.column_exists(feature, column_name):
            raise ValueError("Table %s doesn't have column %s" % (
                feature, column_name,))

        if distinct:
            distinct_string = "DISTINCT "
        else:
            distinct_string = ""
github openvax / pyensembl / pyensembl / gtf.py View on Github external
feature : str, optional
            Path for subset of data restrict to given feature

        column : str, optional
            Restrict to single column

        strand : str, optional
            Positive ("+") or negative ("-") DNA strand. Default = either.

        distinct : bool, optional
            Only keep unique values (default=False)
        """
        csv_filename = self.gtf_base_filename + ".expanded"
        if contig:
            contig = normalize_chromosome(contig)
            csv_filename += ".contig.%s" % (contig,)
        if feature:
            csv_filename += ".feature.%s" % (feature,)
        if column:
            csv_filename += ".column.%s" % (column,)
        if strand:
            if strand == "+":
                strand_string = "positive"
            elif strand == "-":
                strand_string = "negative"
            else:
                raise ValueError("Invalid strand value: %s" % strand)
            csv_filename += ".strand.%s" % strand_string
        if distinct:
            csv_filename += ".distinct"
        csv_filename += extension