How to use the hepcrawl.testlib.celery_monitor.CeleryMonitor.do_crawl function in hepcrawl

To help you get started, we’ve selected a few hepcrawl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github inspirehep / hepcrawl / tests / functional / wsp / test_wsp.py View on Github external
def test_wsp_ftp_crawl_twice(expected_results, settings, cleanup):
    crawler = get_crawler_instance(
        settings.get('CRAWLER_HOST_URL'),
    )

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=20,
        events_limit=1,
        crawler_instance=crawler,
        project=settings.get('CRAWLER_PROJECT'),
        spider='WSP',
        settings={},
        **settings.get('CRAWLER_ARGUMENTS')
    )

    assert len(crawl_results) == 1

    crawl_result = crawl_results[0]

    gotten_results = [
github inspirehep / hepcrawl / tests / functional / arxiv / test_arxiv.py View on Github external
def test_arxiv(
    expected_results,
    config,
    spider,
):
    crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=1,
        crawler_instance=crawler,
        project=config['CRAWLER_PROJECT'],
        spider=spider,
        settings={},
        **config['CRAWLER_ARGUMENTS']
    )

    assert len(crawl_results) == 1

    crawl_result = crawl_results[0]

    gotten_results = [
github inspirehep / hepcrawl / tests / functional / pos / test_pos.py View on Github external
def test_pos_conference_paper_record_and_proceedings_record(
    expected_results,
    config,
):
    crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=2,
        crawler_instance=crawler,
        project=config['CRAWLER_PROJECT'],
        spider='pos',
        settings={},
        **config['CRAWLER_ARGUMENTS']
    )

    assert len(crawl_results) == 1

    crawl_result = crawl_results[0]

    gotten_results = [
github inspirehep / hepcrawl / tests / functional / wsp / test_wsp.py View on Github external
def test_wsp(expected_results, settings, cleanup):
    crawler = get_crawler_instance(
        settings.get('CRAWLER_HOST_URL'),
    )

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=1,
        crawler_instance=crawler,
        project=settings.get('CRAWLER_PROJECT'),
        spider='WSP',
        settings={},
        **settings.get('CRAWLER_ARGUMENTS')
    )

    assert len(crawl_results) == 1

    crawl_result = crawl_results[0]

    gotten_results = [
github inspirehep / hepcrawl / tests / functional / desy / test_desy.py View on Github external
def test_desy_crawl_twice(expected_results, settings, cleanup):
    crawler = get_crawler_instance(
        settings.get('CRAWLER_HOST_URL')
    )

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=1,
        crawler_instance=crawler,
        project=settings.get('CRAWLER_PROJECT'),
        spider='desy',
        settings={},
        **settings.get('CRAWLER_ARGUMENTS')
    )

    assert len(crawl_results) == 1

    crawl_result = crawl_results[0]

    gotten_records = [
github inspirehep / hepcrawl / tests / functional / cds / test_cds.py View on Github external
def test_cds(
    expected_results,
    config,
    spider,
):
    crawler = get_crawler_instance(config['CRAWLER_HOST_URL'])

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=1,
        crawler_instance=crawler,
        project=config['CRAWLER_PROJECT'],
        spider=spider,
        settings={},
        **config['CRAWLER_ARGUMENTS']
    )

    assert len(crawl_results) == 1

    crawl_result = crawl_results[0]

    gotten_results = [
github inspirehep / hepcrawl / tests / functional / desy / test_desy.py View on Github external
def test_desy(
    expected_results,
    settings,
    cleanup,
):
    crawler = get_crawler_instance(
        settings.get('CRAWLER_HOST_URL')
    )

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=1,
        crawler_instance=crawler,
        project=settings.get('CRAWLER_PROJECT'),
        spider='desy',
        settings={},
        **settings.get('CRAWLER_ARGUMENTS')
    )

    crawl_result = crawl_results[0]

    gotten_records = [
        result['record'] for result in crawl_result['results_data']
    ]
github inspirehep / hepcrawl / tests / functional / desy / test_desy.py View on Github external
def test_desy_broken_xml(get_local_settings_for_broken, cleanup):
    settings = get_local_settings_for_broken
    crawler = get_crawler_instance(
        settings.get('CRAWLER_HOST_URL')
    )

    crawl_results = CeleryMonitor.do_crawl(
        app=celery_app,
        monitor_timeout=5,
        monitor_iter_limit=100,
        events_limit=2,
        crawler_instance=crawler,
        project=settings.get('CRAWLER_PROJECT'),
        spider='desy',
        settings={},
        **settings.get('CRAWLER_ARGUMENTS')
    )
    crawl_result = crawl_results[0]
    result_records = crawl_result['results_data']

    assert not crawl_result['errors']
    assert len(result_records) == 1
    res = result_records[0]