Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
tmp_downstream = self.graph.successors(self.target)[0] if self.strand == '+' else self.graph.predecessors(self.target)[0]
if self.upstream and self.downstream:
if self.upstream != tmp_upstream or self.downstream != tmp_downstream:
# raise error if the user defined exon does not match expectation
raise utils.PrimerSeqError('Error: Flanking exon choice too far from target exon')
self.upstream = tmp_upstream # assign upstream after user-defined exon check
self.downstream = tmp_downstream # assign downstream after user-defined exon check
# defining two attributes as the same thing seems silly but in a
# different case with two biconnected components the two components
# need to be merged into a single self.total_components
self.total_components = [self.upstream, self.target, self.downstream]
self.component = self.total_components
# create a dummy all paths variable even though there is only one path
self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr)
self.all_paths.trim_tx_paths()
self.all_paths.set_all_path_coordinates()
# only one isoform, so read counts do not really matter
self.paths, self.counts = self.all_paths.estimate_counts()
utils.save_path_info(self.id, self.paths, self.counts)
# since the upstream, target, and downstream exon are constitutive then
# they all have inclusion of 1.0
self.psi_target, self.psi_upstream, self.psi_downstream = 1.0, 1.0, 1.0
"""
Strategy:
1. Use All Paths (then trim)
2. Save counts/paths to file
3. get sequence information
"""
# get possible exons for primer amplification
tmp_exons = copy.deepcopy(sGraph.get_graph().nodes())
tmp = sorted(tmp_exons, key=lambda x: (x[0], x[1]))
if sGraph.strand == '+':
my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1]
else:
my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1]
# Use correct tx's and estimate counts/psi
all_paths = algs.AllPaths(sGraph, my_exons, target, chr=sGraph.chr, strand=sGraph.strand)
# all_paths.trim_tx_paths()
#all_paths.trim_tx_paths_using_flanking_exons(sGraph.strand, upstream_exon, downstream_exon)
all_paths.trim_tx_paths_using_flanking_exons_and_target(sGraph.strand, target, upstream_exon, downstream_exon)
all_paths.set_all_path_coordinates()
# all_paths.keep_weakly_connected() # hack to prevent extraneous exons causing problems in EM alg
paths, counts = all_paths.estimate_counts() # run EM algorithm
# psi_target = algs.estimate_psi(target, paths, counts)
psi_target = mem.estimate_psi(target, paths, counts)
utils.save_path_info(id, paths, counts) # save paths/counts in tmp/isoforms/id.json
# get sequence of upstream/target/downstream combo
genome_chr = genome[sGraph.chr] # chr object from pygr
upstream_seq, target_seq, downstream_seq = genome_chr[upstream_exon[0]:upstream_exon[1]], genome_chr[target[0]:target[1]], genome_chr[downstream_exon[0]:downstream_exon[1]] # get sequence using pygr
if sGraph.strand == '-':
upstream_seq, target_seq, downstream_seq = -upstream_seq, -target_seq, -downstream_seq # get reverse-complement if necessary
'''
graph = sGraph.get_graph() # nx.DiGraph
# search through each biconnected component
for component in algs.get_biconnected(graph):
component = sorted(component, key=lambda x: (x[0], x[1])) # ensure first component is first exon, etc
if target in component[1:-1]:
# define upstream/downstream flanking exon
if sGraph.strand == '+':
upstream = component[0]
downstream = component[-1]
else:
upstream = component[-1]
downstream = component[0]
# get possible lengths
all_paths = algs.AllPaths(sGraph, component, target,
chr=sGraph.chr, strand=sGraph.strand)
# all_paths.set_all_path_lengths() # should no longer need this since it is done in primer.py
all_paths.set_all_path_coordinates()
# get sequence of upstream/target/downstream combo
genome_chr = genome[sGraph.chr] # chr object from pygr
upstream_seq, target_seq, downstream_seq = genome_chr[upstream[0]:upstream[1]], genome_chr[target[0]:target[1]], genome_chr[downstream[0]:downstream[1]]
if sGraph.strand == '-':
upstream_seq, target_seq, downstream_seq = \
-upstream_seq, -target_seq, -downstream_seq
return [sGraph.strand, name[1:], 'NA',
sGraph.chr + ':' + '-'.join(map(str, upstream)), '1.0',
sGraph.chr + ':' + '-'.join(map(str, downstream)), '1.0',
all_paths, str(upstream_seq).upper(),
str(target_seq).upper(), str(downstream_seq).upper()]
after_counts,
after_component[1:])
else:
self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(after_paths,
after_counts,
after_component[1:])
self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(before_paths,
before_counts,
list(reversed(before_component[:-1])))
self.total_components = before_component[:-1] + after_component
self.psi_target = 1.0
# handle the combined components
tmp_start_ix = self.total_components.index(self.upstream) if self.splice_graph.strand == '+' else self.total_components.index(self.downstream)
tmp_end_ix = self.total_components.index(self.downstream) if self.splice_graph.strand == '+' else self.total_components.index(self.upstream)
self.all_paths = algs.AllPaths(self.splice_graph, self.total_components[tmp_start_ix:tmp_end_ix], self.target, self.splice_graph.chr)
self.all_paths.trim_tx_paths()
self.all_paths.set_all_path_coordinates()
self.paths, self.counts = self.all_paths.estimate_counts() # used to be self.before_all_paths
utils.save_path_info(self.id, self.paths, self.counts)
upstream_exon = utils.get_pos(line[utils.UPSTREAM_EXON]) # get user-defined flanking exons
downstream_exon = utils.get_pos(line[utils.DOWNSTREAM_EXON])
first_primer, second_primer = utils.get_primer_coordinates(line[utils.PRIMER_COORD])
# get possible exons for primer amplification
tmp = sorted(tmp_sg.get_graph().nodes(), key=lambda x: (x[0], x[1]))
first_ex = utils.find_first_exon(first_primer, tmp)
last_ex = utils.find_last_exon(second_primer, tmp)
my_exons = tmp[first_ex:last_ex + 1]
# if tmp_sg.strand == '+':
# my_exons = tmp[tmp.index(upstream_exon):tmp.index(downstream_exon) + 1]
# else:
# my_exons = tmp[tmp.index(downstream_exon):tmp.index(upstream_exon) + 1]
# Use correct tx's and estimate counts/psi
all_paths = algs.AllPaths(tmp_sg,
my_exons,
utils.get_pos(line[utils.TARGET]), # tuple (start, end)
chr=chr,
strand=strand)
# all_paths.trim_tx_paths()
fexon = upstream_exon if strand == "+" else downstream_exon
lexon = downstream_exon if strand == "+" else upstream_exon
all_paths.trim_tx_paths_using_primers(first_primer, second_primer, fexon, lexon)
all_paths.set_all_path_coordinates()
paths, counts = all_paths.estimate_counts() # run EM algorithm
return paths, counts
def first_exon_case(self):
'''
Case where the target and one flanking exon is constitutive.
'''
print 'first exon case'
if len(self.graph.predecessors(self.target)) > 1:
logging.debug('Error: Conflict between biconnected components and predecessors')
# get tx path information
self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr)
self.all_paths.trim_tx_paths()
self.all_paths.set_all_path_coordinates()
self.paths, self.counts = self.all_paths.estimate_counts()
if self.upstream and self.downstream:
# user defined flanking exon case
if self.strand == '+' and self.graph.predecessors(self.target)[0] == self.upstream:
self.psi_upstream = 1.0
self.psi_downsteam = mem.estimate_psi(self.downstream, self.paths, self.counts)
elif self.strand == '-' and self.graph.predecessors(self.target)[0] == self.downstream:
self.psi_downstream = 1.0
self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts)
else:
raise utils.PrimerSeqError('Error: Flanking exon choice too far from target exon')
elif self.strand == '+':
self.upstream = self.graph.predecessors(self.target)[0]
def non_constitutive_case(self):
'''
In this case, I also estimate the psi for the target exon since
it is alternatively spliced. Both upstream and downstream exons are
checked for the closest sufficiently included exon.
'''
print 'non-constitutive case'
index = self.component.index(self.target)
# get tx path information
self.all_paths = algs.AllPaths(self.splice_graph, self.component, self.target, self.splice_graph.chr)
self.all_paths.trim_tx_paths()
self.all_paths.set_all_path_coordinates()
self.paths, self.counts = self.all_paths.estimate_counts()
if self.upstream and self.downstream:
# known flanking exon case
self.psi_upstream = mem.estimate_psi(self.upstream, self.paths, self.counts)
self.psi_downstream = mem.estimate_psi(self.downstream, self.paths, self.counts)
elif self.strand == '-':
self.upstream, self.psi_upstream = self.find_closest_exon_above_cutoff(self.paths,
self.counts,
self.component[index + 1:])
self.downstream, self.psi_downstream = self.find_closest_exon_above_cutoff(self.paths,
self.counts,
list(reversed(self.component[:index])))
else: