Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def get_citation_df(args, text):
"""
Generate citation_df and save it to 'citations.tsv'.
"""
citation_df = pandas.DataFrame(
{'string': get_citation_strings(text)}
)
if args.citation_tags_path.is_file():
tag_df = pandas.read_table(args.citation_tags_path)
na_rows_df = tag_df[tag_df.isnull().any(axis='columns')]
if not na_rows_df.empty:
logging.error(f'{args.citation_tags_path} contains rows with missing values:\n{na_rows_df}\nThis error can be caused by using spaces rather than tabs to delimit fields.\nProceeding to reread TSV with delim_whitespace=True.') # noqa: E501
tag_df = pandas.read_table(args.citation_tags_path, delim_whitespace=True)
tag_df['string'] = '@tag:' + tag_df.tag
for citation in tag_df.citation:
is_valid_citation_string('@' + citation)
citation_df = citation_df.merge(tag_df[['string', 'citation']], how='left')
else:
citation_df['citation'] = None
logging.info(f'missing {args.citation_tags_path} file: no citation tags set')
citation_df.citation.fillna(citation_df.string.astype(str).str.lstrip('@'), inplace=True)
citation_df['standard_citation'] = citation_df.citation.map(standardize_citation)
citation_df['citation_id'] = citation_df.standard_citation.map(get_citation_id)
citation_df = citation_df.sort_values(['standard_citation', 'citation'])
citation_df.to_csv(args.citations_path, sep='\t', index=False)
check_collisions(citation_df)
check_multiple_citation_strings(citation_df)
return citation_df