Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def validate(self, mzqc: MzQcFile):
# Semantic validation of the JSON file.
# Load the mzqc file specific ontologies
cvs: Dict[str, TermList] = dict()
for cv in mzqc.controlled_vocabularies:
try:
cvs[cv.ref] = Ontology(cv.uri, False)
except:
SemanticError(f'Failed to load cv {cv.name} from {cv.uri}. Does {cv.ref} exist?')
# For all cv terms involved:
for cv_parameter in self._get_cv_parameters(mzqc):
# Verify that cvRefs are valid.
if cv_parameter.cvRef not in cvs.keys():
raise SemanticError(f'Unknown CV reference <{cv_parameter.cv_ref}> in '
f'element `{str(type(cv_parameter))}`')
# Verify that the term exists in the CV.
cv_term = cvs[cv_parameter.cvRef].get(cv_parameter.accession)
if cv_term is None:
raise SemanticError(f'Term {cv_parameter.name} not found in CV <{cv_parameter.cvRef}>')
# Verify that the term name is correct.
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate term list from Disease Ontology and UMLS Metathesarus for terms')
parser.add_argument('--diseaseOntologyFile', required=True, type=str, help='Path to the Disease Ontology OBO file')
parser.add_argument('--stopwords',required=False,type=str,help='File containing terms to ignore')
parser.add_argument('--umlsConceptFile', required=False, type=str, help='Path on the MRCONSO.RRF file in UMLS metathesaurus')
parser.add_argument('--outFile', required=True, type=str, help='Path to output wordlist file')
args = parser.parse_args()
if args.umlsConceptFile:
print "Loading metathesaurus..."
metathesaurus = loadMetathesaurus(args.umlsConceptFile)
print "Loading disease ontology..."
ont = pronto.Ontology(args.diseaseOntologyFile)
cancerTerm = findTerm(ont,'cancer')
stopwords = set()
if args.stopwords:
print "Loading stopwords..."
with codecs.open(args.stopwords,'r','utf8') as f:
stopwords = [ line.strip().lower() for line in f ]
stopwords = set(stopwords)
print "Processing..."
allterms = {}
# Skip down to the children of the cancer term and then find all their descendents (recursive children)
for term in ont: #.rchildren(): #cancerTerm.children.rchildren():
if args.umlsConceptFile:
# Get the CUIDs for this term
def get_ontology(name):
"""Imports the requested ontology with pronto
Tries to reach the online version, and if it fails then
use the local version instead.
Arguments:
name (str): the name of the ontology to import (either
'MS' or 'IMS')
"""
warnings.simplefilter('ignore', pronto.utils.ProntoWarning)
if name == 'MS':
try:
obo = pronto.Ontology(MS_CV_URL, False)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"psi-ms.obo"), False)
warnings.warn("Could not use latest online MS ontology, "
"using local (version {})".format(obo.meta['version']))
elif name == 'IMS':
try:
obo = pronto.Ontology(IMS_CV_URL, True, 1)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"imagingMS.obo"), True, 1)
warnings.warn("Could not use latest online IMS ontology, "
"using local (version {})".format(obo.meta['version']))
else:
raise ValueError("Unknow ontology to import: {}".format(name))
return obo
class ImzMLFile(MzMLFile):
_XPATHS = copy.copy(MzMLFile._XPATHS)
_XPATHS.update(
{
"scan_settings": "{root}/s:scanSettingsList/s:scanSettings/s:cvParam",
"source": "{root}/{instrument}List/{instrument}/s:componentList/s:source/s:cvParam",
"scan_dimensions": "{root}/s:run/{spectrum}List/{spectrum}/{scanList}/s:scan/s:cvParam",
"scan_ref": "{root}/s:run/{spectrum}List/{spectrum}/s:referenceableParamGroupRef",
"ref_param_list": "{root}/s:referenceableParamGroupList/s:referenceableParamGroup",
}
)
_VOCABULARY = pronto.Ontology(
pkg_resources.resource_stream("mzml2isa", "ontologies/imagingMS.obo"),
import_depth=1,
)
@classmethod
def _assay_parameters(cls):
terms = super(ImzMLFile, cls)._assay_parameters()
terms["file_content"] = [
_CVParameter(
accession="MS:1000525",
cv=True,
name="Spectrum representation",
plus1=True,
value=False,
software=False,
"cv": "{root}/s:cvList/s:cv",
"raw_file": "{root}/s:fileDescription/s:sourceFileList/s:sourceFile",
"scan_sp": "s:cvParam",
"scan_combination": "{scanList}/s:cvParam",
"scan_configuration": "{scanList}/s:scan/s:cvParam",
"scan_binary": "s:binaryDataArrayList/s:binaryDataArray/s:cvParam",
"scan_activation": "s:precursorList/s:precursor/s:activation/s:cvParam",
"scan_isolation_window": "s:precursorList/s:precursor/s:isolationWindow/s:cvParam",
"scan_selected_ion": "s:precursorList/s:precursor/s:selectedIonList/s:selectedIon/s:cvParam",
"ref_sp": "s:referenceableParamGroupRef",
"ref_combination": "{scanList}/s:scan/s:referenceableParamGroupRef",
"ref_binary": "{scanList}/s:scan/s:referenceableParamGroupRef",
}
# `~pronto.Ontology`: the default MS controlled vocabulary to use.
_VOCABULARY = pronto.Ontology(
pkg_resources.resource_stream("mzml2isa", "ontologies/psi-ms.obo"),
imports=False,
)
def __init__(self, filesystem, path, vocabulary=None):
"""Open an ``mzML`` file from the given filesystem and path.
Arguments:
filesystem (`str` or `~fs.base.FS`): the filesystem the file is
located on, either as a filesystem instance or an FS URL.
path (str): the path to the file on the provided filesystem.
vocabulary (`~pronto.Ontology`, optional): a controlled vocabulary
to use (or `None` to use the default one).
Raises:
`~fs.errors.ResourceNotFound`: when the path does not exist
name (str): the name of the ontology to import (either
'MS' or 'IMS')
"""
warnings.simplefilter('ignore', pronto.utils.ProntoWarning)
if name == 'MS':
try:
obo = pronto.Ontology(MS_CV_URL, False)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"psi-ms.obo"), False)
warnings.warn("Could not use latest online MS ontology, "
"using local (version {})".format(obo.meta['version']))
elif name == 'IMS':
try:
obo = pronto.Ontology(IMS_CV_URL, True, 1)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"imagingMS.obo"), True, 1)
warnings.warn("Could not use latest online IMS ontology, "
"using local (version {})".format(obo.meta['version']))
else:
raise ValueError("Unknow ontology to import: {}".format(name))
return obo
Arguments:
name (str): the name of the ontology to import (either
'MS' or 'IMS')
"""
warnings.simplefilter('ignore', pronto.utils.ProntoWarning)
if name == 'MS':
try:
obo = pronto.Ontology(MS_CV_URL, False)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"psi-ms.obo"), False)
warnings.warn("Could not use latest online MS ontology, "
"using local (version {})".format(obo.meta['version']))
elif name == 'IMS':
try:
obo = pronto.Ontology(IMS_CV_URL, True, 1)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"imagingMS.obo"), True, 1)
warnings.warn("Could not use latest online IMS ontology, "
"using local (version {})".format(obo.meta['version']))
else:
raise ValueError("Unknow ontology to import: {}".format(name))
return obo
def get_ontology(name):
"""Imports the requested ontology with pronto
Tries to reach the online version, and if it fails then
use the local version instead.
Arguments:
name (str): the name of the ontology to import (either
'MS' or 'IMS')
"""
warnings.simplefilter('ignore', pronto.utils.ProntoWarning)
if name == 'MS':
try:
obo = pronto.Ontology(MS_CV_URL, False)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"psi-ms.obo"), False)
warnings.warn("Could not use latest online MS ontology, "
"using local (version {})".format(obo.meta['version']))
elif name == 'IMS':
try:
obo = pronto.Ontology(IMS_CV_URL, True, 1)
except BaseException as be:
obo = pronto.Ontology(os.path.join(ONTOLOGIES_DIR,"imagingMS.obo"), True, 1)
warnings.warn("Could not use latest online IMS ontology, "
"using local (version {})".format(obo.meta['version']))
else:
raise ValueError("Unknow ontology to import: {}".format(name))
return obo
def get_name_id_dict(hp_obo_file_name):
ont = pronto.Ontology(hp_obo_file_name)
phenont_json = json.loads(ont.json)
ret_dict = dict()
for phenont_id in phenont_json.keys():
name = phenont_json[phenont_id]['name']
ret_dict[name]=phenont_id
return ret_dict