Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
modfile2,
PTMmap,
model,
fragerror,
tableau,
):
"""
Function for each worker to process a list of spectra. Each peptide's
sequence is extracted from the mgf file. Then models are chosen based on
model. PTMmap, Ntermmap and Ctermmap determine the modifications
applied to each peptide sequence and the spectrum is predicted. Then either
the feature vectors are returned, or a DataFrame with the predicted and
empirical intensities.
"""
ms2pip_pyx.ms2pip_init(
bytearray(afile.encode()),
bytearray(modfile.encode()),
bytearray(modfile2.encode()),
)
# transform pandas datastructure into dictionary for easy access
if "ce" in data.columns:
specdict = (
data[["spec_id", "peptide", "modifications", "ce"]]
.set_index("spec_id")
.to_dict()
)
ces = specdict["ce"]
else:
specdict = (
data[["spec_id", "peptide", "modifications"]].set_index("spec_id").to_dict()
def process_peptides(worker_num, data, afile, modfile, modfile2, PTMmap, model):
"""
Function for each worker to process a list of peptides. The models are
chosen based on model. PTMmap, Ntermmap and Ctermmap determine the
modifications applied to each peptide sequence. Returns the predicted
spectra for all the peptides.
"""
ms2pip_pyx.ms2pip_init(
bytearray(afile.encode()),
bytearray(modfile.encode()),
bytearray(modfile2.encode()),
)
pcount = 0
# Prepare output variables
mz_buf = []
prediction_buf = []
peplen_buf = []
charge_buf = []
pepid_buf = []
# transform pandas dataframe into dictionary for easy access
if "ce" in data.columns:
output_filename="{}".format(params['output_filename']),
write_mode=write_mode,
)
if 'mgf' in params['output_filetype']:
logging.info("Writing MGF file")
spectrum_output.write_mgf(
all_preds,
peprec=peprec_batch,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
if 'bibliospec' in params['output_filetype']:
logging.info("Writing BiblioSpec SSL and MS2 files")
spectrum_output.write_bibliospec(
all_preds,
peprec_batch,
ms2pip_params,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
if 'spectronaut' in params['output_filetype']:
logging.info("Writing Spectronaut CSV file")
spectrum_output.write_spectronaut(
all_preds,
peprec_batch,
ms2pip_params,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
if not return_results:
if "mgf" in out_formats:
print("writing MGF file {}_predictions.mgf...".format(output_filename))
spectrum_output.write_mgf(
all_preds, peprec=data, output_filename=output_filename
)
if "msp" in out_formats:
print("writing MSP file {}_predictions.msp...".format(output_filename))
spectrum_output.write_msp(
all_preds, data, output_filename=output_filename
)
if "bibliospec" in out_formats:
print("writing SSL/MS2 files...")
spectrum_output.write_bibliospec(
all_preds, data, params, output_filename=output_filename
)
if "spectronaut" in out_formats:
print("writing Spectronaut CSV files...")
spectrum_output.write_spectronaut(
all_preds, data, params, output_filename=output_filename
)
if "csv" in out_formats:
print("writing CSV {}_predictions.csv...".format(output_filename))
all_preds.to_csv(
"{}_predictions.csv".format(output_filename), index=False
)
sys.stdout.write("done!\n")
]
ionnumbers.extend([x + 1 for x in range(pl - 1)] * num_ion_types)
charges.extend([charge_bufs[pi]] * (num_ion_types * (pl - 1)))
pepids.extend([pepid_bufs[pi]] * (num_ion_types * (pl - 1)))
all_preds = pd.DataFrame()
all_preds["spec_id"] = pepids
all_preds["charge"] = charges
all_preds["ion"] = ions
all_preds["ionnumber"] = ionnumbers
all_preds["mz"] = np.concatenate(mz_bufs, axis=None)
all_preds["prediction"] = np.concatenate(prediction_bufs, axis=None)
if not return_results:
if "mgf" in out_formats:
print("writing MGF file {}_predictions.mgf...".format(output_filename))
spectrum_output.write_mgf(
all_preds, peprec=data, output_filename=output_filename
)
if "msp" in out_formats:
print("writing MSP file {}_predictions.msp...".format(output_filename))
spectrum_output.write_msp(
all_preds, data, output_filename=output_filename
)
if "bibliospec" in out_formats:
print("writing SSL/MS2 files...")
spectrum_output.write_bibliospec(
all_preds, data, params, output_filename=output_filename
)
if "spectronaut" in out_formats:
output_filename="{}_unmodified".format(params['output_filename']),
write_mode=write_mode,
)
"""
logging.info("Writing MSP file")
spectrum_output.write_msp(
all_preds,
peprec_batch,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode,
)
if 'mgf' in params['output_filetype']:
logging.info("Writing MGF file")
spectrum_output.write_mgf(
all_preds,
peprec=peprec_batch,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
if 'bibliospec' in params['output_filetype']:
logging.info("Writing BiblioSpec SSL and MS2 files")
spectrum_output.write_bibliospec(
all_preds,
peprec_batch,
ms2pip_params,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
mode=write_mode, append=append, min_itemsize=50
)
if 'msp' in params['output_filetype']:
"""
logging.info("Writing MSP file with unmodified peptides")
write_msp(
all_preds,
peprec_batch[peprec_batch['modifications'] == '-'],
output_filename="{}_unmodified".format(params['output_filename']),
write_mode=write_mode,
)
"""
logging.info("Writing MSP file")
spectrum_output.write_msp(
all_preds,
peprec_batch,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode,
)
if 'mgf' in params['output_filetype']:
logging.info("Writing MGF file")
spectrum_output.write_mgf(
all_preds,
peprec=peprec_batch,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
if 'bibliospec' in params['output_filetype']:
all_preds["charge"] = charges
all_preds["ion"] = ions
all_preds["ionnumber"] = ionnumbers
all_preds["mz"] = np.concatenate(mz_bufs, axis=None)
all_preds["prediction"] = np.concatenate(prediction_bufs, axis=None)
if not return_results:
if "mgf" in out_formats:
print("writing MGF file {}_predictions.mgf...".format(output_filename))
spectrum_output.write_mgf(
all_preds, peprec=data, output_filename=output_filename
)
if "msp" in out_formats:
print("writing MSP file {}_predictions.msp...".format(output_filename))
spectrum_output.write_msp(
all_preds, data, output_filename=output_filename
)
if "bibliospec" in out_formats:
print("writing SSL/MS2 files...")
spectrum_output.write_bibliospec(
all_preds, data, params, output_filename=output_filename
)
if "spectronaut" in out_formats:
print("writing Spectronaut CSV files...")
spectrum_output.write_spectronaut(
all_preds, data, params, output_filename=output_filename
)
if "csv" in out_formats:
write_mode=write_mode
)
if 'bibliospec' in params['output_filetype']:
logging.info("Writing BiblioSpec SSL and MS2 files")
spectrum_output.write_bibliospec(
all_preds,
peprec_batch,
ms2pip_params,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
if 'spectronaut' in params['output_filetype']:
logging.info("Writing Spectronaut CSV file")
spectrum_output.write_spectronaut(
all_preds,
peprec_batch,
ms2pip_params,
output_filename="{}".format(params['output_filename']),
write_mode=write_mode
)
del all_preds
del peprec_batch
if "msp" in out_formats:
print("writing MSP file {}_predictions.msp...".format(output_filename))
spectrum_output.write_msp(
all_preds, data, output_filename=output_filename
)
if "bibliospec" in out_formats:
print("writing SSL/MS2 files...")
spectrum_output.write_bibliospec(
all_preds, data, params, output_filename=output_filename
)
if "spectronaut" in out_formats:
print("writing Spectronaut CSV files...")
spectrum_output.write_spectronaut(
all_preds, data, params, output_filename=output_filename
)
if "csv" in out_formats:
print("writing CSV {}_predictions.csv...".format(output_filename))
all_preds.to_csv(
"{}_predictions.csv".format(output_filename), index=False
)
sys.stdout.write("done!\n")
else:
return all_preds