How to use the perceval.backend.Backend function in perceval

To help you get started, we’ve selected a few perceval examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github chaoss / grimoirelab-perceval / tests / test_backend.py View on Github external
def test_metadata_id(self):
        """Test whether an NotImplementedError exception is thrown"""

        b = Backend('test')

        with self.assertRaises(NotImplementedError):
            b.metadata_id(None)
github chaoss / grimoirelab-perceval / tests / test_backend.py View on Github external
def test_archive(self):
        """Test whether archive value is initializated"""

        archive_path = os.path.join(self.test_path, 'myarchive')
        archive = Archive.create(archive_path)

        b = Backend('test', archive=archive)
        self.assertEqual(b.archive, archive)

        b = Backend('test')
        self.assertEqual(b.archive, None)

        b.archive = archive
        self.assertEqual(b.archive, archive)
github chaoss / grimoirelab-perceval / tests / test_backend.py View on Github external
def test_init_client_not_implemented(self):
        """Test whether an NotImplementedError exception is thrown"""

        b = Backend('test')

        with self.assertRaises(NotImplementedError):
            b._init_client()
github chaoss / grimoirelab-perceval / perceval / backends / remo2.py View on Github external
def remo_metadata(func):
    """ReMo metadata decorator.

    This decorator takes items overrides `metadata` decorator to add extra
    information related to Kitsune (offset of the item).
    """
    @functools.wraps(func)
    def decorator(self, *args, **kwargs):
        for item in func(self, *args, **kwargs):
            item['offset'] = item['data'].pop('offset')
            yield item
    return decorator


class ReMo(Backend):
    """ReMo backend for Perceval.

    This class retrieves the events from a ReMo URL. To initialize
    this class an URL may be provided. If not, https://reps.mozilla.org
    will be used. The origin of the data will be set to this URL.

    It uses v2 API to get events, people and activities data.

    :param url: ReMo URL
    :param tag: label used to mark the data
    :param cache: cache object to store raw data
    """
    version = '0.4.0'

    def __init__(self, url=None, tag=None, cache=None):
        if not url:
github chaoss / grimoirelab-perceval / perceval / backends / core / dockerhub.py View on Github external
BackendCommand,
                        BackendCommandArgumentParser)
from ...client import HttpClient

CATEGORY_DOCKERHUB_DATA = "dockerhub-data"

DOCKERHUB_URL = "https://hub.docker.com/"
DOCKERHUB_API_URL = urijoin(DOCKERHUB_URL, 'v2')

DOCKER_OWNER = 'library'
DOCKER_SHORTCUT_OWNER = '_'

logger = logging.getLogger(__name__)


class DockerHub(Backend):
    """DockerHub backend for Perceval.

    This class retrieves data from a repository stored
    in the Docker Hub site. To initialize this class owner
    and repositories where data will be fetched must be provided.
    The origin of the data will be built with both parameters.

    Shortcut `_` owner for official Docker repositories will
    be replaced by its long name: `library`.

    :param owner: DockerHub owner
    :param repository: DockerHub repository owned by `owner`
    :param tag: label used to mark the data
    :param archive: archive to store/retrieve items
    """
    version = '0.5.0'
github chaoss / grimoirelab-perceval / perceval / backends / core / gitlab.py View on Github external
MAX_RATE_LIMIT = 500

PER_PAGE = 100

# Default sleep time and retries to deal with connection/server problems
DEFAULT_SLEEP_TIME = 1
MAX_RETRIES = 5

DEFAULT_RETRY_AFTER_STATUS_CODES = [500, 502]

TARGET_ISSUE_FIELDS = ['user_notes_count', 'award_emoji']

logger = logging.getLogger(__name__)


class GitLab(Backend):
    """GitLab backend for Perceval.

    This class allows the fetch the issues stored in GitLab
    repository.

    :param owner: GitLab owner
    :param repository: GitLab repository from the owner
    :param api_token: GitLab auth token to access the API
    :param is_oauth_token: True if the token is OAuth (default False)
    :param base_url: GitLab URL in enterprise edition case;
        when no value is set the backend will be fetch the data
        from the GitLab public site.
    :param tag: label used to mark the data
    :param archive: archive to store/retrieve items
    :param sleep_for_rate: sleep until rate limit is reset
    :param min_rate_to_sleep: minimun rate needed to sleep until
github chaoss / grimoirelab-perceval / perceval / backends / core / mediawiki.py View on Github external
from ...backend import (Backend,
                        BackendCommand,
                        BackendCommandArgumentParser)
from ...client import HttpClient
from ...errors import BackendError
from ...utils import DEFAULT_DATETIME

CATEGORY_PAGE = 'page'

logger = logging.getLogger(__name__)

MAX_RECENT_DAYS = 30  # max number of days included in MediaWiki recent changes


class MediaWiki(Backend):
    """MediaWiki backend for Perceval.

    This class retrieves the wiki pages and edits from a  MediaWiki site.
    To initialize this class the URL must be provided. The origin
    of the data will be set to this URL.

    It uses different APIs to support pre and post 1.27 MediaWiki versions.
    The pre 1.27 approach performance is better but it needs different
    logic for full an incremental retrieval.

    In pre 1.27 the incremental approach uses the recent changes API which just
    covers MAX_RECENT_DAYS. If the from_date used is older, all the pages must
    be retrieved and the consumer of the items must filter itself.

    Both approach return a common format: a page with all its revisions. It
    is different how the pages list is generated.
github chaoss / grimoirelab-perceval / perceval / backends / core / mbox.py View on Github external
datetime_to_utc,
                                          str_to_datetime)

from ...backend import (Backend,
                        BackendCommand,
                        BackendCommandArgumentParser)
from ...utils import (DEFAULT_DATETIME,
                      check_compressed_file_type,
                      message_to_dict)

CATEGORY_MESSAGE = "message"

logger = logging.getLogger(__name__)


class MBox(Backend):
    """MBox backend.

    This class allows the fetch the email messages stored one or several
    mbox files. Initialize this class passing the directory path where
    the mbox files are stored. The origin of the data will be set to to
    the value of `uri`.

    :param uri: URI of the mboxes; typically, the URL of their
        mailing list
    :param dirpath: directory path where the mboxes are stored
    :param tag: label used to mark the data
    :param archive: archive to store/retrieve items
    """
    version = '0.12.0'

    CATEGORIES = [CATEGORY_MESSAGE]
github chaoss / grimoirelab-perceval / perceval / backends / remo.py View on Github external
from ..backend import Backend, BackendCommand, metadata
from ..cache import Cache
from ..errors import BackendError, CacheError, ParseError

from ..utils import (DEFAULT_DATETIME,
                     datetime_to_utc,
                     str_to_datetime,
                     urljoin)


logger = logging.getLogger(__name__)

MOZILLA_REPS_URL = "https://reps.mozilla.org"

class ReMo(Backend):
    """ReMo backend for Perceval.

    This class retrieves the events from a ReMo URL. To initialize
    this class an URL may be provided. If not, https://reps.mozilla.org
    will be used. The origin of the data will be set to this URL.

    :param url: ReMo URL
    :param tag: label used to mark the data
    :param cache: cache object to store raw data
    """
    version = '0.3.0'

    def __init__(self, url=None, tag=None, cache=None):
        if not url:
            url = MOZILLA_REPS_URL
        origin = url
github chaoss / grimoirelab-perceval / perceval / backends / core / slack.py View on Github external
BackendCommand,
                        BackendCommandArgumentParser)
from ...client import HttpClient
from ...errors import BaseError
from ...utils import DEFAULT_DATETIME

CATEGORY_MESSAGE = "message"

SLACK_URL = 'https://slack.com/'
MAX_ITEMS = 1000
FLOAT_FORMAT = '{:.6f}'

logger = logging.getLogger(__name__)


class Slack(Backend):
    """Slack backend.

    This class retrieves the messages sent to a Slack channel.
    To access the server an API token is required, which must
    have enough permissions to read from the given channel.

    The origin of the data will be set to the `SLACK_URL` plus the
    identifier of the channel; i.e 'https://slack.com/C01234ABC'.

    :param channel: identifier of the channel where data will be fetched
    :param api_token: token or key needed to use the API
    :param max_items: maximum number of message requested on the same query
    :param tag: label used to mark the data
    :param archive: archive to store/retrieve items
    """
    version = '0.8.0'