Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _load_fasta(db, id_regex):
prot_dict = dict()
for header, seq in fasta.read(db):
seq = seq.replace("I", "L").upper() # convert DB sequence I -> L
prot_id = header.split()[0]
if id_regex is not None:
find_id = re.findall(id_regex, header)
if len(find_id) > 0:
prot_id = find_id[0]
prot_dict[prot_id] = seq
return prot_dict
"""
Calculate precursor mass and mz for given peptide and modification list,
using Pyteomics.
peptide: stripped peptide sequence
modifications: MS2PIP-style formatted modifications list (e.g.
`0|Acetyl|2|Oxidation`)
mass_shifts: dictionary with `modification_name -> mass_shift` pairs
Returns: tuple(prec_mass, prec_mz)
Note: This method does not use the build-in Pyteomics modification handling, as
that would require a known atomic composition of the modification.
"""
charge = int(charge)
unmodified_mass = mass.fast_mass(peptide)
mods_massses = sum([mass_shifts[mod] for mod in modifications.split('|')[1::2]])
prec_mass = unmodified_mass + mods_massses
prec_mz = (prec_mass + charge * PROTON_MASS) / charge
return prec_mass, prec_mz
for each in scanFileNumber:
if each[0] in scanDict.keys():
scanDict[each[0]].append(int(each[1]))
else:
scanDict[each[0]] = [int(each[1])]
# End of Reading Scans from PSM file
# inputPath = sys.argv[1]
inputPath = args.msms
##outPath = "/".join(sys.argv[3].split("/")[:-1])
# outPath = sys.argv[3]
outPath = args.out
##outFile = sys.argv[3].split("/")[-1]
allScanList = []
# Read all scan numbers using indexedmzML/indexList/index/offset tags
for k in mzml.read(inputPath).iterfind('indexedmzML/indexList/index/offset'):
if re.search("scan=(\d+)", k['idRef']):
a = re.search("scan=(\d+)", k['idRef'])
allScanList.append(int(a.group(1)))
allScanList = list(set(allScanList))
# End of Reading mzML file
# fraction_name = sys.argv[4]
fraction_name = args.filestring
if fraction_name in scanDict.keys():
scansInList = scanDict[fraction_name]
else:
scansInList = []
scansNotInList = list(set(allScanList) - set(scansInList))
flag = 0
if removeORretain == "remove":
scan2retain = scansNotInList
scan2retain = list(set(scan2retain))
import numpy as np
from six import string_types as basestring
from .scan import (
ScanFileMetadataBase, RandomAccessScanSource, ScanDataSource,
PrecursorInformation, _FakeGroupedScanIteratorImpl,
ChargeNotProvided)
from .metadata.file_information import (
FileInformation, MS_MSn_Spectrum)
from ._compression import test_if_file_has_fast_random_access
class _MGFParser(mgf.IndexedMGF):
def parse_charge(self, charge_text, list_only=False):
'''Pyteomics _parse_charge is very general-purpose, and
can't be sped up, so we specialize it here.'''
try:
if not list_only:
return int(charge_text.replace('+', ''))
return list(map(self.parse_charge, charge_text.split(" ")))
except Exception:
if '-' in charge_text:
return int(charge_text.replace("-", '')) * -1
raise
class _MGFMetadata(ScanFileMetadataBase):
"""Objects implementing this interface can describe the original source
if word == 'P':
values['peak_mode'] = 'profile'
else:
values['peak_mode'] = 'centroid'
word = words[i]
i += 1
ionization_info = ionization_pat.search(word)
if ionization_info is not None:
values['ionization'] = ionization_info.group(0)
word = words[i]
i += 1
cv_info = compensation_voltage_pat.search(word)
if cv_info is not None:
values['compensation_voltage'] = unitfloat(cv_info.group(1), None)
word = words[i]
i += 1
return values
except IndexError:
return values
def _acquisition_information(self, scan):
fline = self._filter_string(scan)
event = self._get_scan_event(scan)
trailer_extras = self._trailer_values(scan)
traits = {
'preset scan configuration': event,
'filter string': fline,
}
cv = fline.get("compensation_voltage")
if cv is not None:
traits[FAIMS_compensation_voltage] = cv
event = ScanEventInformation(
self._scan_time(scan),
injection_time=unitfloat(trailer_extras.get(
'Ion Injection Time (ms)', 0.0), 'millisecond'),
window_list=[ScanWindow(
fline.get("scan_window")[0], fline.get("scan_window")[1])],
traits=traits)
return ScanAcquisitionInformation("no combination", [event])
combination = "no combination"
elif "sum of spectra" in scan_list_struct:
combination = "sum of spectra"
elif "median of spectra" in scan_list_struct:
combination = "median of spectra"
elif "mean of spectra" in scan_list_struct:
combination = "mean of spectra"
scan_info['combination'] = combination
scan_info_scan_list = []
misplaced_FAIMS_value = scan.get(FAIMS_compensation_voltage.name, None)
for i, scan_ in enumerate(scan_list_struct.get("scan", [])):
scan_ = scan_.copy()
if misplaced_FAIMS_value is not None and i == 0:
scan[FAIMS_compensation_voltage.name] = misplaced_FAIMS_value
struct = {}
struct['start_time'] = scan_.pop('scan start time', unitfloat(0, 'minute'))
struct['injection_time'] = scan_.pop("ion injection time", unitfloat(0, 'millisecond'))
windows = []
for window in scan_.pop("scanWindowList", {}).get("scanWindow", []):
windows.append(ScanWindow(
window['scan window lower limit'],
window['scan window upper limit']))
struct['window_list'] = windows
scan_.pop("instrumentConfigurationRef", None)
struct['traits'] = scan_
scan_info_scan_list.append(ScanEventInformation(**struct))
scan_info['scan_list'] = scan_info_scan_list
return ScanAcquisitionInformation(**scan_info)
elif "sum of spectra" in scan_list_struct:
combination = "sum of spectra"
elif "median of spectra" in scan_list_struct:
combination = "median of spectra"
elif "mean of spectra" in scan_list_struct:
combination = "mean of spectra"
scan_info['combination'] = combination
scan_info_scan_list = []
misplaced_FAIMS_value = scan.get(FAIMS_compensation_voltage.name, None)
for i, scan_ in enumerate(scan_list_struct.get("scan", [])):
scan_ = scan_.copy()
if misplaced_FAIMS_value is not None and i == 0:
scan[FAIMS_compensation_voltage.name] = misplaced_FAIMS_value
struct = {}
struct['start_time'] = scan_.pop('scan start time', unitfloat(0, 'minute'))
struct['injection_time'] = scan_.pop("ion injection time", unitfloat(0, 'millisecond'))
windows = []
for window in scan_.pop("scanWindowList", {}).get("scanWindow", []):
windows.append(ScanWindow(
window['scan window lower limit'],
window['scan window upper limit']))
struct['window_list'] = windows
scan_.pop("instrumentConfigurationRef", None)
struct['traits'] = scan_
scan_info_scan_list.append(ScanEventInformation(**struct))
scan_info['scan_list'] = scan_info_scan_list
return ScanAcquisitionInformation(**scan_info)
def _acquisition_information(self, scan):
fline = self._filter_string(scan)
event = self._get_scan_event(scan)
trailer_extras = self._trailer_values(scan)
traits = {
'preset scan configuration': event,
'filter string': fline,
}
cv = fline.get("compensation_voltage")
if cv is not None:
traits[FAIMS_compensation_voltage] = cv
event = ScanEventInformation(
self._scan_time(scan),
injection_time=unitfloat(trailer_extras.get('Ion Injection Time (ms)', 0.0), 'millisecond'),
window_list=[ScanWindow(
fline.get("scan_window")[0], fline.get("scan_window")[1])], traits=traits)
return ScanAcquisitionInformation("no combination", [event])
def generate_aa_comp():
"""
>>> aa_comp = generate_aa_comp()
>>> aa_comp["M"]
Composition({'H': 9, 'C': 5, 'S': 1, 'O': 1, 'N': 1})
>>> aa_comp["Z"]
Composition({'H': 9, 'C': 5, 'S': 1, 'O': 2, 'N': 1})
"""
db = pyteomics.mass.Unimod()
aa_comp = dict(pyteomics.mass.std_aa_comp)
s = db.by_title("Oxidation")["composition"]
aa_comp["Z"] = aa_comp["M"] + s
s = db.by_title("Carbamidomethyl")["composition"]
aa_comp["C"] = aa_comp["C"] + s
return aa_comp