Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
doi_patterns = [
Matchable(['10.1145/0001234.1234567'],
re.compile(r'(?P10.\d{4,9}/[-._;()/:A-Z0-9]+)', re.I))
]
"""List of Matchable for DOIs in text.
We should probably match DOIs first because they are the source of a
lot of false positives for arxiv matches.
Only using the most general express from
https://www.crossref.org/blog/dois-and-matching-regular-expressions/
"""
_archive = '|'.join([re.escape(key) for key in taxonomy.definitions.ARCHIVES.keys()])
"""string for use in Regex for all arXiv archives"""
_category = '|'.join([re.escape(key) for key in taxonomy.definitions.CATEGORIES.keys()])
_arxiv_id_prefix = r'(?Par[xX]iv:)?'
"""Attempt to catch the arxiv prefix in front of arxiv ids so it can be
included in the <a> tag anchor. ARXIVNG-1284"""
basic_arxiv_id_patterns = [
Matchable(['math/0501233', 'hep-ph/0611734', 'gr-qc/0112123'],
re.compile(_arxiv_id_prefix + r'(?P(%s)\/\d{2}[01]\d{4}(v\d*)?)'
% _archive, re.I)),
Matchable(['1609.05068', '1207.1234v1', '1207.1234', '1807.12345',
'1807.12345v1', '1807.12345v12'],
re.compile(r'(?\d{4}\.\d{4,5}(v\d*)?)',
re.I)),</a>
Parameters
----------
arxiv_identifier : :class:`Identifier`
primary_category : :class: `Category`
Returns
-------
Dict of values to add to response_data
"""
# Set up the context
context = None
if ('context' in request.args and (
request.args['context'] == 'arxiv'
or request.args['context'] in taxonomy.definitions.CATEGORIES
or request.args['context'] in taxonomy.definitions.ARCHIVES)):
context = request.args['context']
elif primary_category:
pc = primary_category.canonical or primary_category
if not arxiv_identifier.is_old_id: # new style IDs
context = pc.id
else: # Old style id
if pc.id in taxonomy.definitions.ARCHIVES:
context = pc.id
else:
context = arxiv_identifier.archive
else:
context = None
response_data['browse_context'] = context
next_url = None
"""Attempt to validate the provided arXiv ID.
Parse constituent parts.
"""
self.ids = arxiv_id
"""The ID as specified."""
self.id: str = arxiv_id
self.archive: Optional[str] = None
self.filename: Optional[str] = None
self.year: Optional[int] = None
self.month: Optional[int] = None
self.is_old_id: Optional[bool] = None
if self.ids in taxonomy.definitions.ARCHIVES:
raise IdentifierIsArchiveException(
taxonomy.definitions.ARCHIVES[self.ids]['name'])
for subtup in SUBSTITUTIONS:
arxiv_id = re.sub(subtup[0],
subtup[1],
arxiv_id,
count=subtup[2],
flags=subtup[3])
self.version = 0
parse_actions = ((RE_ARXIV_OLD_ID, self._parse_old_id),
(RE_ARXIV_NEW_ID, self._parse_new_id))
id_match = None
for regex, parse_action in parse_actions:
id_match = re.match(regex, arxiv_id)
if id_match:
Raises
------
BadRequest
Raised when request parameters are missing, invalid, or when an ID
redirect cannot be returned even when the request parameters are valid.
"""
if id is None or not id:
raise BadRequest('Missing article identifier')
if function not in ['prev', 'next']:
raise BadRequest('Missing or invalid function request')
if context is None or not context:
raise BadRequest('Missing context')
if not (context in CATEGORIES_ACTIVE
or context in ARCHIVES or context == 'all'):
raise BadRequest('Invalid context')
try:
arxiv_id = Identifier(id)
except IdentifierException:
raise BadRequest(escape(f"Invalid article identifier {id}"))
seq_id = get_sequential_id(paper_id=arxiv_id,
is_next=function == 'next',
context=context)
if not seq_id:
raise BadRequest(
escape(f'No {function} article found for '
f'{arxiv_id.id} in {context}'))
redirect_url = url_for('browse.abstract', arxiv_id=seq_id, context=context)
def _to_classification(value: str) -> Tuple[Classification, ...]:
clsns = []
if value in taxonomy.definitions.GROUPS:
klass = taxonomy.Group
field = "group"
elif value in taxonomy.definitions.ARCHIVES:
klass = taxonomy.Archive
field = "archive"
elif value in taxonomy.definitions.CATEGORIES:
klass = taxonomy.Category
field = "category"
else:
raise ValueError("not a valid classification")
cast_value = klass(value)
clsns.append(Classification(**{field: {"id": value}})) # type: ignore
if cast_value.unalias() != cast_value:
clsns.append(
Classification( # type: ignore # noqa: E501 # fmt: off
**{field: {"id": cast_value.unalias()}}
)
)
if (
def archive_index(archive_id: str, status: int) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
"""Landing page for when there is no archive specified."""
data: Dict[str, Any] = {}
data["bad_archive"] = archive_id
archives = [
(id, ARCHIVES[id]["name"])
for id in ARCHIVES.keys()
if id not in ARCHIVES_SUBSUMED and not id.startswith("test")
]
archives.sort(key=lambda tpl: tpl[0])
data["archives"] = archives
defunct = [
(id, ARCHIVES[id]["name"], ARCHIVES_SUBSUMED.get(id, ""))
for id in ARCHIVES.keys()
if "end_date" in ARCHIVES[id]
]
defunct.sort(key=lambda tpl: tpl[0])
data["defunct"] = defunct
data["template"] = "archive/archive_list_all.html"
return data, status, {}
"""Landing page for when there is no archive specified."""
data: Dict[str, Any] = {}
data["bad_archive"] = archive_id
archives = [
(id, ARCHIVES[id]["name"])
for id in ARCHIVES.keys()
if id not in ARCHIVES_SUBSUMED and not id.startswith("test")
]
archives.sort(key=lambda tpl: tpl[0])
data["archives"] = archives
defunct = [
(id, ARCHIVES[id]["name"], ARCHIVES_SUBSUMED.get(id, ""))
for id in ARCHIVES.keys()
if "end_date" in ARCHIVES[id]
]
defunct.sort(key=lambda tpl: tpl[0])
data["defunct"] = defunct
data["template"] = "archive/archive_list_all.html"
return data, status, {}
def get_archive(archive_id: str) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
"""Gets archive page."""
data: Dict[str, Any] = {}
response_headers: Dict[str, Any] = {}
if archive_id == "list":
return archive_index(archive_id, status=status.HTTP_200_OK)
archive = ARCHIVES.get(archive_id, None)
if not archive:
cat_id = CATEGORIES.get(archive_id, {}).get("in_archive", None)
archive = ARCHIVES.get(cat_id, None)
if not archive:
return archive_index(archive_id,
status=status.HTTP_404_NOT_FOUND)
else:
archive_id = cat_id
_write_expires_header(response_headers)
subsumed_by = ARCHIVES_SUBSUMED.get(archive_id, None)
if subsumed_by:
data["subsumed_id"] = archive_id
data["subsumed_category"] = CATEGORIES.get(archive_id, {})
data["subsumed_by"] = subsumed_by