How to use the arxiv.taxonomy.definitions.ARCHIVES function in arxiv

To help you get started, we’ve selected a few arxiv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github arXiv / arxiv-browse / browse / util / id_patterns.py View on Github external
doi_patterns = [
    Matchable(['10.1145/0001234.1234567'],
              re.compile(r'(?P10.\d{4,9}/[-._;()/:A-Z0-9]+)', re.I))
]
"""List of Matchable for DOIs in text.

We should probably match DOIs first because they are the source of a
lot of false positives for arxiv matches.

Only using the most general express from
https://www.crossref.org/blog/dois-and-matching-regular-expressions/
"""

_archive = '|'.join([re.escape(key) for key in taxonomy.definitions.ARCHIVES.keys()])
"""string for use in Regex for all arXiv archives"""

_category = '|'.join([re.escape(key) for key in taxonomy.definitions.CATEGORIES.keys()])

_arxiv_id_prefix = r'(?Par[xX]iv:)?'
"""Attempt to catch the arxiv prefix in front of arxiv ids so it can be
included in the <a> tag anchor. ARXIVNG-1284"""

basic_arxiv_id_patterns = [
    Matchable(['math/0501233', 'hep-ph/0611734', 'gr-qc/0112123'],
              re.compile(_arxiv_id_prefix + r'(?P(%s)\/\d{2}[01]\d{4}(v\d*)?)'
                         % _archive, re.I)),
    Matchable(['1609.05068', '1207.1234v1', '1207.1234', '1807.12345',
               '1807.12345v1', '1807.12345v12'],
              re.compile(r'(?\d{4}\.\d{4,5}(v\d*)?)',
                         re.I)),</a>
github arXiv / arxiv-browse / browse / controllers / abs_page / __init__.py View on Github external
Parameters
    ----------
    arxiv_identifier : :class:`Identifier`
    primary_category : :class: `Category`

    Returns
    -------
    Dict of values to add to response_data
    """
    # Set up the context
    context = None
    if ('context' in request.args and (
            request.args['context'] == 'arxiv'
            or request.args['context'] in taxonomy.definitions.CATEGORIES
            or request.args['context'] in taxonomy.definitions.ARCHIVES)):
        context = request.args['context']
    elif primary_category:
        pc = primary_category.canonical or primary_category
        if not arxiv_identifier.is_old_id:  # new style IDs
            context = pc.id
        else:  # Old style id
            if pc.id in taxonomy.definitions.ARCHIVES:
                context = pc.id
            else:
                context = arxiv_identifier.archive
    else:
        context = None

    response_data['browse_context'] = context

    next_url = None
github arXiv / arxiv-browse / browse / domain / identifier.py View on Github external
"""Attempt to validate the provided arXiv ID.

        Parse constituent parts.
        """
        self.ids = arxiv_id
        """The ID as specified."""
        self.id: str = arxiv_id
        self.archive: Optional[str] = None
        self.filename: Optional[str] = None
        self.year: Optional[int] = None
        self.month: Optional[int] = None
        self.is_old_id: Optional[bool] = None

        if self.ids in taxonomy.definitions.ARCHIVES:
            raise IdentifierIsArchiveException(
                taxonomy.definitions.ARCHIVES[self.ids]['name'])

        for subtup in SUBSTITUTIONS:
            arxiv_id = re.sub(subtup[0],
                              subtup[1],
                              arxiv_id,
                              count=subtup[2],
                              flags=subtup[3])

        self.version = 0
        parse_actions = ((RE_ARXIV_OLD_ID, self._parse_old_id),
                         (RE_ARXIV_NEW_ID, self._parse_new_id))

        id_match = None
        for regex, parse_action in parse_actions:
            id_match = re.match(regex, arxiv_id)
            if id_match:
github arXiv / arxiv-browse / browse / controllers / prevnext / __init__.py View on Github external
Raises
    ------
    BadRequest
        Raised when request parameters are missing, invalid, or when an ID
        redirect cannot be returned even when the request parameters are valid.

    """
    if id is None or not id:
        raise BadRequest('Missing article identifier')
    if function not in ['prev', 'next']:
        raise BadRequest('Missing or invalid function request')
    if context is None or not context:
        raise BadRequest('Missing context')
    if not (context in CATEGORIES_ACTIVE
            or context in ARCHIVES or context == 'all'):
        raise BadRequest('Invalid context')

    try:
        arxiv_id = Identifier(id)
    except IdentifierException:
        raise BadRequest(escape(f"Invalid article identifier {id}"))

    seq_id = get_sequential_id(paper_id=arxiv_id,
                               is_next=function == 'next',
                               context=context)
    if not seq_id:
        raise BadRequest(
            escape(f'No {function} article found for '
                   f'{arxiv_id.id} in {context}'))

    redirect_url = url_for('browse.abstract', arxiv_id=seq_id, context=context)
github arXiv / arxiv-search / search / controllers / api / __init__.py View on Github external
def _to_classification(value: str) -> Tuple[Classification, ...]:
    clsns = []
    if value in taxonomy.definitions.GROUPS:
        klass = taxonomy.Group
        field = "group"
    elif value in taxonomy.definitions.ARCHIVES:
        klass = taxonomy.Archive
        field = "archive"
    elif value in taxonomy.definitions.CATEGORIES:
        klass = taxonomy.Category
        field = "category"
    else:
        raise ValueError("not a valid classification")
    cast_value = klass(value)
    clsns.append(Classification(**{field: {"id": value}}))  # type: ignore
    if cast_value.unalias() != cast_value:
        clsns.append(
            Classification(  # type: ignore # noqa: E501 # fmt: off
                **{field: {"id": cast_value.unalias()}}
            )
        )
    if (
github arXiv / arxiv-browse / browse / controllers / archive_page / __init__.py View on Github external
def archive_index(archive_id: str, status: int) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
    """Landing page for when there is no archive specified."""
    data: Dict[str, Any] = {}
    data["bad_archive"] = archive_id

    archives = [
        (id, ARCHIVES[id]["name"])
        for id in ARCHIVES.keys()
        if id not in ARCHIVES_SUBSUMED and not id.startswith("test")
    ]
    archives.sort(key=lambda tpl: tpl[0])
    data["archives"] = archives

    defunct = [
        (id, ARCHIVES[id]["name"], ARCHIVES_SUBSUMED.get(id, ""))
        for id in ARCHIVES.keys()
        if "end_date" in ARCHIVES[id]
    ]
    defunct.sort(key=lambda tpl: tpl[0])
    data["defunct"] = defunct
    
    data["template"] = "archive/archive_list_all.html"
    return data, status, {}
github arXiv / arxiv-browse / browse / controllers / archive_page / __init__.py View on Github external
"""Landing page for when there is no archive specified."""
    data: Dict[str, Any] = {}
    data["bad_archive"] = archive_id

    archives = [
        (id, ARCHIVES[id]["name"])
        for id in ARCHIVES.keys()
        if id not in ARCHIVES_SUBSUMED and not id.startswith("test")
    ]
    archives.sort(key=lambda tpl: tpl[0])
    data["archives"] = archives

    defunct = [
        (id, ARCHIVES[id]["name"], ARCHIVES_SUBSUMED.get(id, ""))
        for id in ARCHIVES.keys()
        if "end_date" in ARCHIVES[id]
    ]
    defunct.sort(key=lambda tpl: tpl[0])
    data["defunct"] = defunct
    
    data["template"] = "archive/archive_list_all.html"
    return data, status, {}
github arXiv / arxiv-browse / browse / controllers / archive_page / __init__.py View on Github external
def get_archive(archive_id: str) -> Tuple[Dict[str, Any], int, Dict[str, Any]]:
    """Gets archive page."""
    data: Dict[str, Any] = {}
    response_headers: Dict[str, Any] = {}

    if archive_id == "list":
        return archive_index(archive_id, status=status.HTTP_200_OK)

    archive = ARCHIVES.get(archive_id, None)
    if not archive:
        cat_id = CATEGORIES.get(archive_id, {}).get("in_archive", None)
        archive = ARCHIVES.get(cat_id, None)
        if not archive:
            return archive_index(archive_id,
                                 status=status.HTTP_404_NOT_FOUND)
        else:
            archive_id = cat_id

    _write_expires_header(response_headers)

    subsumed_by = ARCHIVES_SUBSUMED.get(archive_id, None)
    if subsumed_by:
        data["subsumed_id"] = archive_id
        data["subsumed_category"] = CATEGORIES.get(archive_id, {})
        data["subsumed_by"] = subsumed_by