How to use the arxiv.base.logging function in arxiv

To help you get started, we’ve selected a few arxiv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github arXiv / arxiv-search / tests / stubs / docmeta.py View on Github external
"""Stub for the docmeta service."""
import os
import json
from flask import Flask
from flask.json import jsonify
from werkzeug.exceptions import NotFound, InternalServerError

from arxiv.base import Base
from arxiv.base.converter import ArXivConverter
from arxiv.base import logging

logger = logging.getLogger(__name__)

METADATA_DIR = os.environ.get("METADATA_DIR")


app = Flask("metadata")
Base(app)

app.url_map.converters["arxiv"] = ArXivConverter


@app.route("/docmeta/", methods=["GET"])
def docmeta(document_id):
    """Retrieve document metadata."""
    logger.debug(f"Get metadata for {document_id}")
    logger.debug(f"Metadata base is {METADATA_DIR}")
    if not METADATA_DIR:
github arXiv / arxiv-search / search / services / index / authors.py View on Github external
"""Query-builders and helpers for searching by author name."""

from typing import Tuple, Optional, List
import re
from functools import reduce, wraps
from operator import ior, iand

from elasticsearch_dsl import Search, Q, SF

from arxiv.base import logging

from .util import wildcard_escape, escape, STRING_LITERAL, \
    remove_single_characters, has_wildcard

logger = logging.getLogger(__name__)
logger.propagate = False

# We don't remove stopwords from author names at index time because
# institutions and collaborations are often treated as authors just like
# people.
STOP = ["and", "or", "the", "of", "a", "for"]


def _remove_stopwords(term: str) -> str:
    """Remove common stopwords, except in literal queries."""
    parts = re.split(STRING_LITERAL, term)
    for stopword in STOP:
        parts = [re.sub(f"(^|\s+){stopword}(\s+|$)", " ", part)
                 if not part.startswith('"') and not part.startswith("'")
                 else part for part in parts]
    return "".join(parts)
github arXiv / arxiv-browse / browse / controllers / prevnext / __init__.py View on Github external
"""Handle requests to support sequential navigation between arXiv IDs."""

from flask import url_for, escape
from typing import Tuple, Dict, Any
from werkzeug.exceptions import BadRequest

from browse.domain.identifier import Identifier, IdentifierException
from browse.services.database import get_sequential_id
from arxiv import status
from arxiv.taxonomy.definitions import ARCHIVES, CATEGORIES_ACTIVE
from arxiv.base import logging


Response = Tuple[Dict[str, Any], int, Dict[str, Any]]
logger = logging.getLogger(__name__)


def get_prevnext(id: str, function: str, context: str) -> Response:
    """
    Get the next or previous arXiv ID in the browse context.

    The 'site' parameter from the classic prevnext is no longer supported.

    Parameters
    ----------
    id
        arxiv id
    function
        prev or next
    context
        which archive or category to browse
github arXiv / arxiv-search / search / agent / base.py View on Github external
.. _todo: This should move to arXiv-base, per ARXIVNG-281.
"""

import time
import json
import os
from typing import Any, Optional, Tuple, Generator, Callable
from contextlib import contextmanager
import signal

import boto3
from botocore.exceptions import WaiterError, NoCredentialsError, \
    PartialCredentialsError, BotoCoreError, ClientError

from arxiv.base import logging
logger = logging.getLogger(__name__)
logger.propagate = False

class CheckpointError(RuntimeError):
    """Checkpointing failed."""


class StreamNotAvailable(RuntimeError):
    """Could not find or connect to the stream."""


class KinesisRequestFailed(RuntimeError):
    """Raised when a Kinesis request failed permanently."""


class StopProcessing(RuntimeError):
    """Gracefully stopped processing upon unrecoverable error."""
github arXiv / arxiv-search / search / services / index / __init__.py View on Github external
MappingError,
)
from search.services.index.util import MAX_RESULTS
from search.services.index.advanced import advanced_search
from search.services.index.simple import simple_search
from search.services.index.api import api_search
from search.services.index.api_classic import classic_search
from search.services.index import highlighting
from search.services.index import results

logger = logging.getLogger(__name__)

# Disable the Elasticsearch logger. When enabled, the Elasticsearch logger
# dumps entire Tracebacks prior to propagating exceptions. Thus we end up with
# tracebacks in the logs even for handled exceptions.
logging.getLogger("elasticsearch").disabled = True


ALL_SEARCH_FIELDS = [
    "author",
    "title",
    "abstract",
    "comments",
    "journal_ref",
    "acm_class",
    "msc_class",
    "report_num",
    "paper_id",
    "doi",
    "orcid",
    "author_id",
]
github arXiv / arxiv-search / search / services / index / __init__.py View on Github external
QueryError,
    IndexConnectionError,
    DocumentNotFound,
    IndexingError,
    OutsideAllowedRange,
    MappingError,
)
from search.services.index.util import MAX_RESULTS
from search.services.index.advanced import advanced_search
from search.services.index.simple import simple_search
from search.services.index.api import api_search
from search.services.index.api_classic import classic_search
from search.services.index import highlighting
from search.services.index import results

logger = logging.getLogger(__name__)

# Disable the Elasticsearch logger. When enabled, the Elasticsearch logger
# dumps entire Tracebacks prior to propagating exceptions. Thus we end up with
# tracebacks in the logs even for handled exceptions.
logging.getLogger("elasticsearch").disabled = True


ALL_SEARCH_FIELDS = [
    "author",
    "title",
    "abstract",
    "comments",
    "journal_ref",
    "acm_class",
    "msc_class",
    "report_num",
github arXiv / arxiv-search / search / services / index / prepare.py View on Github external
from string import punctuation

from elasticsearch_dsl import Search, Q, SF

from arxiv.base import logging

from search.domain import SimpleQuery, Query, AdvancedQuery, Classification, \
    ClassificationList
from .util import strip_tex, Q_, is_tex_query, is_literal_query, escape, \
    wildcard_escape, remove_single_characters, has_wildcard, is_old_papernum, \
    parse_date, parse_date_partial

from .highlighting import HIGHLIGHT_TAG_OPEN, HIGHLIGHT_TAG_CLOSE
from .authors import author_query, author_id_query, orcid_query

logger = logging.getLogger(__name__)

START_YEAR = 1991
END_YEAR = datetime.now().year


def _query_title(term: str, default_operator: str = 'AND') -> Q:
    if is_tex_query(term):
        return Q("match", **{f'title.tex': {'query': term}})
    fields = ['title.english']
    if is_literal_query(term):
        fields += ['title']
    return Q("query_string", fields=fields, default_operator=default_operator,
             allow_leading_wildcard=False, query=escape(term))


def _query_abstract(term: str, default_operator: str = 'AND') -> Q:
github arXiv / arxiv-search / wsgi-classic-api.py View on Github external
"""Web Server Gateway Interface entry-point for classic API."""

import os
from arxiv.base import logging

from search.factory import create_classic_api_web_app

logger = logging.getLogger(__name__)

__flask_app__ = None


def application(environ, start_response):
    """WSGI application factory."""
    for key, value in environ.items():
        # In some deployment scenarios (e.g. uWSGI on k8s), uWSGI will pass in
        # the hostname as part of the request environ. This will usually just
        # be a container ID, which is not helpful for things like building
        # URLs. We want to keep ``SERVER_NAME`` explicitly configured, either
        # in config.py or via an os.environ var loaded by config.py.
        if key == "SERVER_NAME":
            continue
        if type(value) is str:
            os.environ[key] = value
github arXiv / arxiv-search / agent / pre_start_agent.py View on Github external
"""Creates the Kinesis stream if it does not already exist."""

import boto3
from botocore.exceptions import ClientError
from botocore.vendored.requests.exceptions import ConnectionError
from botocore.client import Config
import os
from search.factory import create_ui_web_app
from search.context import get_application_config
import sys
import time

from arxiv.base import logging
logger = logging.getLogger(__name__)


if __name__ == '__main__':
    logger.debug('Pre-start routine for indexing agent')
    app = create_ui_web_app()
    app.app_context().push()
    config = get_application_config()
    logger.debug('App context initialized')

    endpoint = config.get('KINESIS_ENDPOINT')
    region = config.get('AWS_REGION', 'us-east-1')
    access_key = config.get('AWS_ACCESS_KEY_ID', "")
    aws_secret = config.get('AWS_SECRET_ACCESS_KEY', "")
    verify = config.get('KINESIS_VERIFY') == 'true'
    stream_name = config.get('KINESIS_STREAM', 'MetadataIsAvailable')
    logger.debug(f'Kinesis endpoint: {endpoint}')
github arXiv / arxiv-browse / browse / controllers / abs_page / __init__.py View on Github external
from browse.services.util.response_headers import abs_expires_header, \
    mime_header_date
from browse.services.document import metadata
from browse.services.document.metadata import AbsException,\
    AbsNotFoundException, AbsVersionNotFoundException, AbsDeletedException
from arxiv.browse.domain.identifier import Identifier, IdentifierException,\
    IdentifierIsArchiveException
from browse.services.database import count_trackback_pings,\
    get_trackback_ping_latest_date, has_sciencewise_ping, \
    get_dblp_listing_path, get_dblp_authors
from browse.services.util.external_refs_cits import include_inspire_link,\
    include_dblp_section, get_computed_dblp_listing_path, get_dblp_bibtex_path
from browse.services.document.config.external_refs_cits import DBLP_BASE_URL,\
    DBLP_BIBTEX_PATH, DBLP_AUTHOR_SEARCH_PATH

logger = logging.getLogger(__name__)

Response = Tuple[Dict[str, Any], int, Dict[str, Any]]

truncate_author_list_size = 100


def get_abs_page(arxiv_id: str) -> Response:
    """
    Get abs page data from the document metadata service.

    Parameters
    ----------
    arxiv_id : str
        The arXiv identifier as provided in the request.
    download_format_pref: str
        Download format preference.