Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
continue
try:
row = OrderedDict(zip(header, line.split('\t')))
except:
continue
if self.genelist:
PIDD_GENE = []
Inheritance = []
Phenotype = []
try:
for genepanel_line in gene_list.fetch(row['#chr'],
int(row['start']),
int(row['end']),
parser=pysam.asTuple()):
PIDD_GENE += [genepanel_line[3]]
Inheritance += [genepanel_line[4]]
Phenotype += [genepanel_line[5]]
row['PIDD_GENE'] = "|".join(PIDD_GENE) if PIDD_GENE else 'NA'
row['Inheritance'] = "|".join(Inheritance) if Inheritance else 'NA'
row['Phenotype'] ="|".join(Phenotype) if Phenotype else 'NA'
except ValueError:
pass
if self.filter_line(row):
fout.write('\t'.join(row.values()) + '\n')
tbx = pysam.TabixFile(tsv_file)
# format annotations
window_list = []
if genome:
for chrom in sorted(annotation.keys()):
window_list += [["".join([genome, '_chr', chrom]), int(n[0]), int(n[1])] for n in annotation[chrom]]
else:
for chrom in sorted(annotation.keys()):
window_list += [["".join(['chr', chrom]), int(n[0]), int(n[1])] for n in annotation[chrom]]
print('building count matrix')
mtx = lil_matrix((nb_barcodes, len(window_list)), dtype=np.float32)
for i, tmp_feat in enumerate(tqdm(window_list)):
for row in tbx.fetch(tmp_feat[0], tmp_feat[1], tmp_feat[2], parser=pysam.asTuple()):
mtx[dict_barcodes[str(row).split('\t')[-2]], i] += 1
print('building AnnData object')
mtx = ad.AnnData(mtx.tocsr(),
obs=pd.DataFrame(index=barcodes),
var=pd.DataFrame(index=['_'.join([str(p) for p in n]) for n in window_list]))
if csv_file:
print('filtering barcodes')
df = pd.read_csv(csv_file)
if genome == 'mm10':
df_filtered = df[(df.is_mm10_cell_barcode == 1)]
elif genome == 'hg19':
df_filtered = df[(df.is_hg19_cell_barcode == 1)]
else:
df_filtered = df[(df.is_cell_barcode == 1)]
else:
skip = False
# if a header line
if line.startswith('>'):
# process prev sequence
if sequence != '' and len(sequence) % 3 == 0 and start_stop_ok(sequence):
align_dict = {}
# extract positions from alignment
for codon in [coords[i: i + 3] for i in range(0, len(coords), 3)]:
codon_dict = {}
for pos in codon:
align_pos = list(wga.fetch(chromo, pos - 1, pos, parser=pysam.asTuple()))
# get align data in form [('dmel', 'T'), ('dsim', '?'), ('dyak', 'T')]
try:
spp_bases = zip(align_pos[0][4].split(','), align_pos[0][7].split(','))
for spp in spp_bases:
if spp[0] not in codon_dict.keys():
codon_dict[spp[0]] = ''
if len(align_pos) == 0:
codon_dict[spp[0]] += 'N'
else:
codon_dict[spp[0]] += spp[1][0]
except IndexError:
continue
# if entire codon is not in alignment skip
def __init__(self, inFile, parser=pysam.asTuple()):
# because of odd Cython weirdness, we don't actually want to call super.__init__ here..
#super(TabixReader, self).__init__(inFile, parser=parser)
self.parser = parser
def __enter__(self):
except IOError:
raise IOError("Gemini cannot open this annotation file: %s. \n"
"Have you installed the annotation files? If so, "
"have they been moved or deleted? Exiting...\n\n"
"For more details:\n\t"
"http://gemini.readthedocs.org/en/latest/content/"
"#installation.html\#installing-annotation-files\n"
% anno_files[anno])
# ## Standard access to Tabix indexed files
PARSERS = {"bed": pysam.asBed(),
"vcf": pysam.asVCF(),
"tuple": pysam.asTuple(),
None: None}
def _get_hits(coords, annotation, parser_type, _parsers=PARSERS):
"""Retrieve BED information, recovering if BED annotation file does have a chromosome.
"""
try:
parser = _parsers[parser_type]
except KeyError:
raise ValueError("Unexpected parser type: %s" % parser)
chrom, start, end = coords
if isinstance(annotation, pysam.VariantFile):
return annotation.fetch(chrom, start, end)
elif isinstance(annotation, cyvcf2.VCF):
return annotation("%s:%d-%d" % (chrom, start - 1, end))
try:
hit_iter = annotation.fetch(str(chrom), start, end, parser=parser)
def __init__(self, inFile, parser=pysam.asTuple()):
# inFile is passed in but is never used, and yet the TabixReader works. How?!
# This inFile magic is because Tabixfile is a Cython object that uses def __cinit__
# rather than def __init__; the former is called automatically exactly once for the
# base class prior to any use of __init__. Use of subsequent __init__ should obey
# normal inheritance rules (assuming inheritance from object and it's not an old-style class).
# So __cinit__ sets the input file, and then our __init__ (which doesn't override a base method)
# is called and sets the parser.
# This could all break in the future if pysam moves away from __cinit__, but doing so would
# reduce performance and so it seems unlikely.
# See:
# https://github.com/cython/cython/blob/master/docs/src/userguide/special_methods.rst#id19
# https://github.com/pysam-developers/pysam/blob/master/pysam/libctabix.pyx#L331
#super(TabixReader, self).__init__(inFile, parser=parser)
self.parser = parser
#LOG.debug("index of '>' is {}".format(new_sequence.getvalue().find('>')))
start = mappings[0].from_start
LOG.debug("Setting start to {} (mappings[0].from_start)".format(start))
found = False
new_sequence_len = 0
LOG.debug('start={}'.format(start))
LOG.debug('last_pos={}'.format(last_pos))
LOG.debug("VCI Fetch {}".format(fasta_transform_params.vci_query))
local_vci_file = vci.VCIFile(fasta_transform_params.vci_file)
for line in local_vci_file.fetch(fasta_transform_params.vci_query, parser=pysam.asTuple()):
aline = line
if line[5] == '.':
continue
found = True
LOG.debug('')
LOG.debug("LINE: {}".format(line))
#new_sequence_value = new_sequence.getvalue()
#if len(new_sequence_value) > 50:
# LOG.debug('current={}...{}'.format(new_sequence_value[:25], new_sequence_value[-25:]))
#else:
# LOG.debug('current={}'.format(new_sequence_value))
# chromosome, position, shared_bases, deleted_bases, inserted_bases, fragment_size
tree = IntervalTree()
try:
if line_no % 10000 == 0:
print("CONTIG {} {}".format(contig, line_no))
pos_from = 0
pos_to = 0
tabix_file = pysam.TabixFile(filename_vci)
iterator = None
try:
iterator = tabix_file.fetch(contig, parser=pysam.asTuple())
except:
LOG.debug("Exception for {}".format(contig))
if iterator is None:
LOG.debug("No iterator")
return ret
for rec in iterator:
if len(rec) != 6:
raise exceptions.G2GError("Unexpected line in G2G file. Line #{0:,}: {1}".format(line_no, rec))
if rec[2] == '.':
continue
"""
1 3000019 G . A 3000019