Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_get_citation_strings_1():
text = '''
Sci-Hub has released article request records from its server logs, covering 165 days from September 2015 through February 2016 [@doi:10.1126/science.352.6285.508; @doi:10.1126/science.aaf5664; @doi:10.5061/dryad.q447c/1].
We filtered for valid requests by excluding DOIs not included in our literature catalog and omitting requests that occurred before an article's publication date.
Figure {@fig:citescore}B shows that articles from highly cited journals were visited much more frequently on average.
@10.5061/bad_doi says blah but @url:https://www.courtlistener.com/docket/4355308/1/elsevier-inc-v-sci-hub/ disagrees.
'''
citations = get_citation_strings(text)
expected = sorted([
'@doi:10.1126/science.352.6285.508',
'@doi:10.1126/science.aaf5664',
'@doi:10.5061/dryad.q447c/1',
'@url:https://www.courtlistener.com/docket/4355308/1/elsevier-inc-v-sci-hub/',
])
assert citations == expected
def get_citation_df(args, text):
"""
Generate citation_df and save it to 'citations.tsv'.
"""
citation_df = pandas.DataFrame(
{'string': get_citation_strings(text)}
)
if args.citation_tags_path.is_file():
tag_df = pandas.read_table(args.citation_tags_path)
na_rows_df = tag_df[tag_df.isnull().any(axis='columns')]
if not na_rows_df.empty:
logging.error(f'{args.citation_tags_path} contains rows with missing values:\n{na_rows_df}\nThis error can be caused by using spaces rather than tabs to delimit fields.\nProceeding to reread TSV with delim_whitespace=True.') # noqa: E501
tag_df = pandas.read_table(args.citation_tags_path, delim_whitespace=True)
tag_df['string'] = '@tag:' + tag_df.tag
for citation in tag_df.citation:
is_valid_citation_string('@' + citation)
citation_df = citation_df.merge(tag_df[['string', 'citation']], how='left')
else:
citation_df['citation'] = None
logging.info(f'missing {args.citation_tags_path} file: no citation tags set')
citation_df.citation.fillna(citation_df.string.astype(str).str.lstrip('@'), inplace=True)
citation_df['standard_citation'] = citation_df.citation.map(standardize_citation)