Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
lambda x: is_valid_citekey(
x, allow_tag=True, allow_raw=True, allow_pandoc_xnos=True
),
The standard_id is extracted from a "standard_citation" field, the "note" field, or the "id" field.
If extracting the citation from the "id" field, uses the infer_citekey_prefix function to set the prefix.
For example, if the extracted standard_id does not begin with a supported prefix (e.g. "doi:", "pmid:" or "raw:"),
the citation is assumed to be raw and given a "raw:" prefix.
The extracted citation is checked for validity and standardized, after which it is the final "standard_id".
Regarding csl_item modification, the csl_item "id" field is set to the standard_citation and the note field
is created or updated with key-value pairs for standard_id and original_id.
Note that the Manubot software generally refers to the "id" of a CSL Item as a citekey.
However, in this context, we use "id" rather than "citekey" for consistency with CSL's "id" field.
"""
original_id = self.get("id")
self.infer_id()
original_standard_id = self["id"]
assert is_valid_citekey(original_standard_id, allow_raw=True)
standard_id = standardize_citekey(original_standard_id, warn_if_changed=False)
add_to_note = {}
note_dict = self.note_dict
if original_id and original_id != standard_id:
if original_id != note_dict.get("original_id"):
add_to_note["original_id"] = original_id
if original_standard_id and original_standard_id != standard_id:
if original_standard_id != note_dict.get("original_standard_id"):
add_to_note["original_standard_id"] = original_standard_id
if standard_id != note_dict.get("standard_id"):
add_to_note["standard_id"] = standard_id
self.note_append_dict(dictionary=add_to_note)
self.set_id(standard_id)
return self
citekeys_df = pandas.DataFrame({"manuscript_citekey": get_citekeys(text)})
if args.citation_tags_path.is_file():
tag_df = pandas.read_csv(args.citation_tags_path, sep="\t")
na_rows_df = tag_df[tag_df.isnull().any(axis="columns")]
if not na_rows_df.empty:
logging.error(
f"{args.citation_tags_path} contains rows with missing values:\n"
f"{na_rows_df}\n"
"This error can be caused by using spaces rather than tabs to delimit fields.\n"
"Proceeding to reread TSV with delim_whitespace=True."
)
tag_df = pandas.read_csv(args.citation_tags_path, delim_whitespace=True)
tag_df["manuscript_citekey"] = "tag:" + tag_df.tag
tag_df = tag_df.rename(columns={"citation": "detagged_citekey"})
for detagged_citekey in tag_df.detagged_citekey:
is_valid_citekey(detagged_citekey, allow_raw=True)
citekeys_df = citekeys_df.merge(
tag_df[["manuscript_citekey", "detagged_citekey"]], how="left"
)
else:
citekeys_df["detagged_citekey"] = None
logging.info(
f"missing {args.citation_tags_path} file: no citation tags (citekey aliases) set"
)
citekeys_df.detagged_citekey.fillna(
citekeys_df.manuscript_citekey.astype(str), inplace=True
)
citekeys_df["standard_citekey"] = citekeys_df.detagged_citekey.map(
standardize_citekey
)
citekeys_df["short_citekey"] = citekeys_df.standard_citekey.map(shorten_citekey)
citekeys_df = citekeys_df.sort_values(["standard_citekey", "detagged_citekey"])
def cli_cite(args):
"""
Main function for the manubot cite command-line interface.
Does not allow user to directly specify Pandoc's --to argument, due to
inconsistent citaiton rendering by output format. See
https://github.com/jgm/pandoc/issues/4834
"""
# generate CSL JSON data
csl_list = list()
for citekey in args.citekeys:
try:
if not is_valid_citekey(citekey):
continue
citekey = standardize_citekey(citekey)
csl_item = citekey_to_csl_item(citekey, prune=args.prune_csl)
csl_list.append(csl_item)
except Exception as error:
logging.error(
f"citekey_to_csl_item for {citekey!r} failed "
f"due to a {error.__class__.__name__}:\n{error}"
)
logging.info(error, exc_info=True)
# output CSL JSON data, if --render is False
if not args.render:
write_file = (
args.output.open("w", encoding="utf-8") if args.output else sys.stdout
)