Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def standardize_citation(*args, **kwargs):
import warnings
warnings.warn(
"'standardize_citation' has been renamed to 'standardize_citekey'"
" and will be removed in a future release.",
category=FutureWarning,
)
return standardize_citekey(*args, **kwargs)
If extracting the citation from the "id" field, uses the infer_citekey_prefix function to set the prefix.
For example, if the extracted standard_id does not begin with a supported prefix (e.g. "doi:", "pmid:" or "raw:"),
the citation is assumed to be raw and given a "raw:" prefix.
The extracted citation is checked for validity and standardized, after which it is the final "standard_id".
Regarding csl_item modification, the csl_item "id" field is set to the standard_citation and the note field
is created or updated with key-value pairs for standard_id and original_id.
Note that the Manubot software generally refers to the "id" of a CSL Item as a citekey.
However, in this context, we use "id" rather than "citekey" for consistency with CSL's "id" field.
"""
original_id = self.get("id")
self.infer_id()
original_standard_id = self["id"]
assert is_valid_citekey(original_standard_id, allow_raw=True)
standard_id = standardize_citekey(original_standard_id, warn_if_changed=False)
add_to_note = {}
note_dict = self.note_dict
if original_id and original_id != standard_id:
if original_id != note_dict.get("original_id"):
add_to_note["original_id"] = original_id
if original_standard_id and original_standard_id != standard_id:
if original_standard_id != note_dict.get("original_standard_id"):
add_to_note["original_standard_id"] = original_standard_id
if standard_id != note_dict.get("standard_id"):
add_to_note["standard_id"] = standard_id
self.note_append_dict(dictionary=add_to_note)
self.set_id(standard_id)
return self
def cli_cite(args):
"""
Main function for the manubot cite command-line interface.
Does not allow user to directly specify Pandoc's --to argument, due to
inconsistent citaiton rendering by output format. See
https://github.com/jgm/pandoc/issues/4834
"""
# generate CSL JSON data
csl_list = list()
for citekey in args.citekeys:
try:
if not is_valid_citekey(citekey):
continue
citekey = standardize_citekey(citekey)
csl_item = citekey_to_csl_item(citekey, prune=args.prune_csl)
csl_list.append(csl_item)
except Exception as error:
logging.error(
f"citekey_to_csl_item for {citekey!r} failed "
f"due to a {error.__class__.__name__}:\n{error}"
)
logging.info(error, exc_info=True)
# output CSL JSON data, if --render is False
if not args.render:
write_file = (
args.output.open("w", encoding="utf-8") if args.output else sys.stdout
)
with write_file:
json.dump(csl_list, write_file, ensure_ascii=False, indent=2)
tag_df = tag_df.rename(columns={"citation": "detagged_citekey"})
for detagged_citekey in tag_df.detagged_citekey:
is_valid_citekey(detagged_citekey, allow_raw=True)
citekeys_df = citekeys_df.merge(
tag_df[["manuscript_citekey", "detagged_citekey"]], how="left"
)
else:
citekeys_df["detagged_citekey"] = None
logging.info(
f"missing {args.citation_tags_path} file: no citation tags (citekey aliases) set"
)
citekeys_df.detagged_citekey.fillna(
citekeys_df.manuscript_citekey.astype(str), inplace=True
)
citekeys_df["standard_citekey"] = citekeys_df.detagged_citekey.map(
standardize_citekey
)
citekeys_df["short_citekey"] = citekeys_df.standard_citekey.map(shorten_citekey)
citekeys_df = citekeys_df.sort_values(["standard_citekey", "detagged_citekey"])
citekeys_df.to_csv(args.citations_path, sep="\t", index=False)
check_collisions(citekeys_df)
check_multiple_citation_strings(citekeys_df)
return citekeys_df