Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def hyp14():
c = """chr1 0 1 +"""
return PyRanges(pd.read_table(StringIO(c), sep="\s+", header=None, names="Chromosome Start End Strand".split()))
def hyp8():
c = """chr1 0 1 +"""
return PyRanges(pd.read_table(StringIO(c), sep="\s+", header=None, names="Chromosome Start End Strand".split()))
def expected_result_previous_bed_opposite_stranded(names):
c = """chr1 8 9 h 0 + 6 7 f 0 - 2
chr1 5 7 h 0 - 1 2 f 0 + 4"""
df = pd.read_table(StringIO(c), sep=" ", header=None, names="Chromosome Start End Name Score Strand Start_b End_b Name_b Score_b Strand_b Distance".split())
print(df)
return PyRanges(df)
def test_init(gr, strand):
c, s, e, strands = gr
if strand:
pr.PyRanges(chromosomes=c, starts=s, ends=e, strands=strands)
else:
pr.PyRanges(chromosomes=c, starts=s, ends=e)
gr2.df.to_csv(f2, sep="\t", header=False, index=False)
cmd = join_command.format(bedtools_strand, f1, f2)
print(cmd)
result = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode()
bedtools_df = pd.read_table(StringIO(result), header=None, squeeze=True, names="Chromosome Start End Name Score Strand Chromosome_b Start_b End_b Name_b Score_b Strand_b Overlap".split(), dtype={"Chromosome": "category", "Strand": "category"} )
bedtools_df = bedtools_df.drop(["Overlap", "Chromosome_b"], 1)
print("gr\n", gr)
print("gr2\n", gr2)
result = gr.join(gr2, strandedness=strandedness)
print("result\n", result)
print("bedtools\n", PyRanges(bedtools_df))
if result.df.empty:
assert bedtools_df.empty
else:
assert_df_equal(result.df, bedtools_df)
def test_read_gtf():
gr = pr.read_gtf("tests/test_data/ensembl.gtf", full=True)
assert len(gr.columns) == 28
df = gr.df
transcript = df.iloc[1]
assert transcript['tag'] == 'basic'
exon = df[df['exon_id'] == 'ENSE00003812156'].iloc[0]
assert exon['tag'] == 'basic'
gr = pr.read_gtf("tests/test_data/ensembl.gtf",
full=True, duplicate_attr=True)
assert len(gr.columns) == 28
df = gr.df
transcript = df.iloc[1]
assert transcript['tag'] == 'basic'
exon = df[df['exon_id'] == 'ENSE00003812156'].iloc[0]
assert exon['tag'] == 'CCDS,basic'
# assert list(gr.df.columns[:4]) == "Chromosome Start End Strand".split()
def test_stranded():
cpg = pr.data.cpg()
exons = pr.data.exons()
j = cpg.join(exons)
assert j.stranded
j.Strand = "."
assert not j.stranded
j.Strand = np.random.choice("+ -".split(), size=len(j))
assert j.stranded
for _, df in j:
assert len(df.Strand.drop_duplicates()) == 1
@settings(
max_examples=max_examples,
deadline=deadline,
suppress_health_check=HealthCheck.all())
@given(gr=df_data()) # pylint: disable=no-value-for-parameter
def test_init(gr, strand):
c, s, e, strands = gr
if strand:
pr.PyRanges(chromosomes=c, starts=s, ends=e, strands=strands)
else:
pr.PyRanges(chromosomes=c, starts=s, ends=e)
chipseq = pr.data.chipseq()
@settings(
max_examples=max_examples,
deadline=deadline,
suppress_health_check=HealthCheck.all())
@given(selector=selector()) # pylint: disable=no-value-for-parameter
def test_getitem(selector):
# have these weird returns to avoid being flagged as unused code
if len(selector) == 3:
a, b, c = selector
return chipseq[a, b, c]
elif len(selector) == 2:
a, b = selector
return chipseq[a, b]
def test_stranded():
cpg = pr.data.cpg()
exons = pr.data.exons()
j = cpg.join(exons)
assert j.stranded
j.Strand = "."
assert not j.stranded
j.Strand = np.random.choice("+ -".split(), size=len(j))
assert j.stranded
for _, df in j:
assert len(df.Strand.drop_duplicates()) == 1
def expected_result_unstranded():
c = """Chromosome Start End Name Score Strand Start_b End_b Name_b Score_b Strand_b Distance
0 chr1 3 6 h 0 + 6 7 f 0 - 1
1 chr1 5 7 h 0 - 6 7 f 0 - 0"""
return PyRanges(pd.read_table(StringIO(c), sep=" "))