Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def expected_result_previous_bed_opposite_stranded(names):
c = """chr1 8 9 h 0 + 6 7 f 0 - 2
chr1 5 7 h 0 - 1 2 f 0 + 4"""
df = pd.read_table(StringIO(c), sep=" ", header=None, names="Chromosome Start End Name Score Strand Start_b End_b Name_b Score_b Strand_b Distance".split())
print(df)
return PyRanges(df)
def test_init(gr, strand):
c, s, e, strands = gr
if strand:
pr.PyRanges(chromosomes=c, starts=s, ends=e, strands=strands)
else:
pr.PyRanges(chromosomes=c, starts=s, ends=e)
gr2.df.to_csv(f2, sep="\t", header=False, index=False)
cmd = join_command.format(bedtools_strand, f1, f2)
print(cmd)
result = subprocess.check_output(cmd, shell=True, executable="/bin/bash").decode()
bedtools_df = pd.read_table(StringIO(result), header=None, squeeze=True, names="Chromosome Start End Name Score Strand Chromosome_b Start_b End_b Name_b Score_b Strand_b Overlap".split(), dtype={"Chromosome": "category", "Strand": "category"} )
bedtools_df = bedtools_df.drop(["Overlap", "Chromosome_b"], 1)
print("gr\n", gr)
print("gr2\n", gr2)
result = gr.join(gr2, strandedness=strandedness)
print("result\n", result)
print("bedtools\n", PyRanges(bedtools_df))
if result.df.empty:
assert bedtools_df.empty
else:
assert_df_equal(result.df, bedtools_df)
# strand = draw(use_strand)
df.loc[:, "End"] += df.Start
df.insert(3, "Name", "a")
df.insert(4, "Score", 0)
df.insert(5, "gene_id", ids)
df.insert(6, "exon_id", list(range(len(df))))
# df.Start = df.Start.astype(np.int32)
# df.End = df.End.astype(np.int32)
# print(df.dtypes)
# stranded = draw(st.booleans())
# if not strand:
# df = df.drop("Strand", axis=1)
gr = PyRanges(df)
# print(gr)
# raise
# gr = PyRanges(df)
# do not sort like this, use pyranges sort
# np.random.seed(draw(st.integers(min_value=0, max_value=int(1e6))))
# gr.df = df.reindex(np.random.permutation(df.index.values))
return gr
def bioframe_to_pyranges(df):
pydf = df.copy()
pydf.rename(
{"chrom": "Chromosome", "start": "Start", "end": "End"},
axis="columns",
inplace=True,
)
return pr.PyRanges(pydf)
def dfs_no_min(draw): # nosec
df = draw(better_dfs_no_min)
# strand = draw(use_strand)
df.loc[:, "End"] += df.Start
df.insert(3, "Name", "a")
df.insert(4, "Score", 0)
# stranded = draw(st.booleans())
# if not strand:
# df = df.drop("Strand", axis=1)
gr = PyRanges(df, int64=True)
# gr = PyRanges(df)
# do not sort like this, use pyranges sort
# np.random.seed(draw(st.integers(min_value=0, max_value=int(1e6))))
# gr.df = df.reindex(np.random.permutation(df.index.values))
return gr
"Chromosome": "category",
"TranscriptID": "category",
"GeneID": "category",
"Strand": "category",
"Start": int,
"End": int
})
exons = _exons(df)
_df = (df.drop("XS XE".split(), axis=1).assign(Feature="transcript"))
df = pd.concat([_df, exons], sort=False).sort_values(
"Chromosome Start End".split()
)["Chromosome Start End Strand Feature TranscriptID ExonNumber".split()]
return pr.PyRanges(df)
def df_to_pyranges(df, start_col='start', end_col='end', chr_col='chr', start_slop=0, end_slop=0):
df['Chromosome'] = df[chr_col]
df['Start'] = df[start_col] - start_slop
df['End'] = df[end_col] + end_slop
return(pr.PyRanges(df))
if not result:
return pr.PyRanges()
first_hit = list(result.values())[0]
if isinstance(first_hit, pd.Series):
if first_hit.dtype == bool and subset:
return self[result]
elif col:
self.__setattr__(col, result)
return self
else:
raise Exception(
"Cannot return PyRanges when function returns a Series! Use as_pyranges=False."
)
return pr.PyRanges(result)
else:
return result
_column = column[k].values
else:
_column = column[k]
df.insert(pos, column_name, _column)
else:
df.insert(pos, column_name, column)
start_length = end_length
dfs[k] = df
if column_name not in ["Chromosome", "Strand"]:
self.__dict__["dfs"] = dfs
else:
self.__dict__["dfs"] = pr.PyRanges(pr.PyRanges(dfs).df).dfs # will merge the dfs, then split on keys again to ensure they are correct