Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_pmid_for_doi(doi):
"""
Query NCBI's E-utilities to retrieve the PMID for a DOI.
"""
assert isinstance(doi, str)
assert doi.startswith("10.")
params = {"db": "pubmed", "term": f"{doi}[DOI]"}
headers = {"User-Agent": get_manubot_user_agent()}
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
with _get_eutils_rate_limiter():
response = requests.get(url, params, headers=headers)
if not response.ok:
logging.warning(f"Status code {response.status_code} querying {response.url}\n")
return None
try:
element_tree = xml.etree.ElementTree.fromstring(response.text)
except Exception:
logging.warning(
f"Error in ESearch XML for DOI: {doi}.\n"
f"Response from {response.url}:\n{response.text}"
)
return None
id_elems = element_tree.findall("IdList/Id")
if len(id_elems) != 1:
def get_isbn_csl_item_citoid(isbn):
"""
Return CSL JSON Data for an ISBN using the Wikipedia Citoid API.
https://en.wikipedia.org/api/rest_v1/#!/Citation/getCitation
"""
import requests
from manubot.util import get_manubot_user_agent
headers = {"User-Agent": get_manubot_user_agent()}
url = f"https://en.wikipedia.org/api/rest_v1/data/citation/mediawiki/{isbn}"
response = requests.get(url, headers=headers)
result = response.json()
if isinstance(result, dict):
if result["title"] == "Not found.":
raise KeyError(f"Metadata for ISBN {isbn} not found at {url}")
else:
raise Exception(
f"Unable to extract CSL from JSON metadata for ISBN {isbn}:\n"
f"{json.dumps(result.text)}"
)
(mediawiki,) = result
csl_item = collections.OrderedDict()
csl_item["type"] = mediawiki.get("itemType", "book")
if "title" in mediawiki:
csl_item["title"] = mediawiki["title"]
def search_query(identifier):
"""
Retrive Zotero metadata for a DOI, ISBN, PMID, or arXiv ID.
Example usage:
```shell
curl --silent
--data '10.2307/4486062' \
--header 'Content-Type: text/plain' \
http://127.0.0.1:1969/search
```
"""
api_url = f"{base_url}/search"
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
response = requests.post(api_url, headers=headers, data=str(identifier))
try:
zotero_data = response.json()
except Exception as error:
logging.warning(
f"Error parsing search_query output as JSON for {identifier}:\n{response.text}"
)
raise error
zotero_data = _passthrough_zotero_data(zotero_data)
return zotero_data
def export_as_csl(zotero_data):
"""
Export Zotero JSON data to CSL JSON using a translation-server /export query.
Performs a similar query to the following curl command:
```
curl --verbose \
--data @items.json \
--header 'Content-Type: application/json' \
'https://translate.manubot.org/export?format=csljson'
```
"""
api_url = f"{base_url}/export"
params = {"format": "csljson"}
headers = {"User-Agent": get_manubot_user_agent()}
response = requests.post(api_url, params=params, headers=headers, json=zotero_data)
if not response.ok:
message = f"export_as_csl: translation-server returned status code {response.status_code}"
logging.warning(f"{message} with the following output:\n{response.text}")
raise requests.HTTPError(message)
try:
csl_json = response.json()
except Exception as error:
logging.warning(f"Error parsing export_as_csl output as JSON:\n{response.text}")
raise error
return csl_json
https://api.ncbi.nlm.nih.gov/lit/ctxp
"""
if database not in {"pubmed", "pmc"}:
logging.error(
f"Error calling _get_literature_citation_exporter_csl_item.\n"
f'database must be either "pubmed" or "pmc", not {database}'
)
assert False
if not identifier:
logging.error(
f"Error calling _get_literature_citation_exporter_csl_item.\n"
f"identifier cannot be blank"
)
assert False
params = {"format": "csl", "id": identifier}
headers = {"User-Agent": get_manubot_user_agent()}
url = f"https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/{database}/"
response = requests.get(url, params, headers=headers)
try:
csl_item = response.json()
except Exception as error:
logging.error(
f"Error fetching {database} metadata for {identifier}.\n"
f"Invalid JSON response from {response.url}:\n{response.text}"
)
raise error
assert isinstance(csl_item, dict)
if csl_item.get("status", "okay") == "error":
logging.error(
f"Error fetching {database} metadata for {identifier}.\n"
f"Literature Citation Exporter returned JSON indicating an error for {response.url}\n"
f"{json.dumps(csl_item, indent=2)}"
def web_query(url):
"""
Return Zotero citation metadata for a URL as a list containing a single element that
is a dictionary with the URL's metadata.
"""
headers = {"User-Agent": get_manubot_user_agent(), "Content-Type": "text/plain"}
params = {"single": 1}
api_url = f"{base_url}/web"
response = requests.post(api_url, params=params, headers=headers, data=str(url))
try:
zotero_data = response.json()
except Exception as error:
logging.warning(
f"Error parsing web_query output as JSON for {url}:\n{response.text}"
)
raise error
if response.status_code == 300:
# When single=1 is specified, multiple results should never be returned
logging.warning(
f"web_query returned multiple results for {url}:\n"
+ json.dumps(zotero_data, indent=2)
)
def get_pubmed_csl_item(pmid):
"""
Query NCBI E-Utilities to create CSL Items for PubMed IDs.
https://github.com/manubot/manubot/issues/21
https://github.com/ncbi/citation-exporter/issues/3#issuecomment-355313143
"""
pmid = str(pmid)
params = {"db": "pubmed", "id": pmid, "rettype": "full"}
headers = {"User-Agent": get_manubot_user_agent()}
url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
with _get_eutils_rate_limiter():
response = requests.get(url, params, headers=headers)
try:
element_tree = xml.etree.ElementTree.fromstring(response.text)
(element_tree,) = list(element_tree)
except Exception as error:
logging.error(
f"Error fetching PubMed metadata for {pmid}.\n"
f"Invalid XML response from {response.url}:\n{response.text}"
)
raise error
try:
csl_item = csl_item_from_pubmed_article(element_tree)
except Exception as error:
msg = f"Error parsing the following PubMed metadata for PMID {pmid}:\n{response.text}"