How to use the hepcrawl.testlib.fixtures.fake_response_from_file function in hepcrawl

To help you get started, we’ve selected a few hepcrawl examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github inspirehep / hepcrawl / tests / unit / test_infn.py View on Github external
def record():
    """Return scraping results from the INFN spider."""
    spider = infn_spider.InfnSpider()
    response = fake_response_from_file('infn/test_splash.html')

    parsed_item = spider.scrape_splash(response)
    assert parsed_item
    assert parsed_item.record

    return parsed_item.record
github inspirehep / hepcrawl / tests / unit / test_world_scientific.py View on Github external
def get_records(response_file_name):
    """Return all results generator from the WSP spider via pipelines."""
    # environmental variables needed for the pipelines payload
    os.environ['SCRAPY_JOB'] = 'scrapy_job'
    os.environ['SCRAPY_FEED_URI'] = 'scrapy_feed_uri'
    os.environ['SCRAPY_LOG_FILE'] = 'scrapy_log_file'

    spider = create_spider()
    records = spider.parse(
        fake_response_from_file(
            file_name=response_file_name,
            response_type=TextResponse
        )
    )

    pipeline = InspireCeleryPushPipeline()
    pipeline.open_spider(spider)

    return (
        pipeline.process_item(record, spider)['record']
        for record in records
    )
github inspirehep / hepcrawl / tests / unit / test_desy.py View on Github external
def get_records(response_file_name):
    """Return all results generator from the ``Desy`` spider via pipelines."""
    # environmental variables needed for the pipelines payload
    os.environ['SCRAPY_JOB'] = 'scrapy_job'
    os.environ['SCRAPY_FEED_URI'] = 'scrapy_feed_uri'
    os.environ['SCRAPY_LOG_FILE'] = 'scrapy_log_file'

    spider = create_spider()
    records = spider.parse(
        fake_response_from_file(
            file_name='desy/' + response_file_name,
            response_type=TextResponse
        )
    )

    pipeline = InspireCeleryPushPipeline()
    pipeline.open_spider(spider)

    return (
        pipeline.process_item(
            record,
            spider
        )['record'] for record in records
    )
github inspirehep / hepcrawl / tests / unit / test_phil.py View on Github external
def record():
    """Return results generator from the Phil spider.

    Thesis specific.
    """
    spider = phil_spider.PhilSpider()
    response = fake_response_from_file('phil/test_thesis.json')
    jsonrecord = json.loads(response.body_as_unicode())
    response.meta["jsonrecord"] = jsonrecord[0]
    response.meta["direct_links"] = [
        "http://philpapers.org/go.pl?id=BROBB&proxyId=none&u=http%3A%2F%2Fanalysis.oxfordjournals.org%2Fcontent%2F66%2F3%2F194.full.pdf%2Bhtml%3Fframe%3Dsidebar",
        "http://philpapers.org/go.pl?id=BROBB&proxyId=none&u=http%3A%2F%2Fbrogaardb.googlepages.com%2Ftensedrelationsoffprint.pdf"
    ]

    parsed_item = spider.build_item(response)
    assert parsed_item
    assert parsed_item.record

    return parsed_item.record
github inspirehep / hepcrawl / tests / unit / test_iop.py View on Github external
def record():
    """Return results generator from the WSP spider."""
    spider = iop_spider.IOPSpider()
    response = fake_response_from_file('iop/xml/test_standard.xml')
    node = get_node(spider, "Article", response)
    spider.pdf_files = TEST_PDF_DIR

    parsed_item = spider.parse_node(response, node)
    assert parsed_item
    assert parsed_item.record

    return parsed_item.record
github inspirehep / hepcrawl / tests / unit / test_aps.py View on Github external
def results_from_json():
    """Return results by parsing a JSON file."""
    from scrapy.http import TextResponse

    crawler = Crawler(spidercls=aps_spider.APSSpider)
    spider = aps_spider.APSSpider.from_crawler(crawler)
    parsed_items = list(
        spider.parse(
            fake_response_from_file(
                'aps/aps_single_response.json',
                response_type=TextResponse,
            )
        )
    )

    class MockFailure:
        """Mock twisted.python.failure.Failure, failure on JATS request."""
        def __init__(self):
            self.request = parsed_items[0]

    records = [spider._parse_json_on_failure(MockFailure()).record]

    assert records
    return records
github inspirehep / hepcrawl / tests / unit / test_dnb.py View on Github external
}
        )
        mock.head(
            'http://d-nb.info/1079912991/34',
            headers={
                'Content-Type': 'application/pdf;charset=base64',
            }
        )
        mock.head(
            'http://publikationen.ub.uni-frankfurt.de/frontdoor/index/index/docId/38625',
            headers={
                'Content-Type': 'text/html',
            }
        )
        request = spider.parse(
            fake_response_from_file('dnb/test_1.xml')
        ).next()
        response = HtmlResponse(
            url=request.url,
            request=request,
            body=scrape_pos_page_body,
            **{'encoding': 'utf-8'}
        )

        parsed_item = request.callback(response)
        assert parsed_item
        assert parsed_item.record

        return parsed_item.record
github inspirehep / hepcrawl / tests / unit / test_edp.py View on Github external
def record_rich(package_rich):
    """Return results from the EDP spider with 'rich' format.

    This is not an open access journal, so no splash scraping.
    """
    spider = edp_spider.EDPSpider()
    xml_path = package_rich.url.replace("file://", "")
    fake_resp = fake_response_from_file(xml_path)
    fake_resp.meta["rich"] = True
    node = get_node(spider, "//EDPSArticle", fake_resp)[0]

    parsed_item = spider.parse_node(fake_resp, node)
    assert parsed_item
    assert parsed_item.record

    return parsed_item.record
github inspirehep / hepcrawl / tests / unit / test_crossref.py View on Github external
def record():
    """Return results generator from the crossref spider. All fields, one record.
    """
    def _get_record_from_processed_item(item, spider):
        crawl_result = pipeline.process_item(item, spider)
        validate(crawl_result['record'], 'hep')
        assert crawl_result
        return crawl_result['record']

    crawler = Crawler(spidercls=crossref_spider.CrossrefSpider)
    spider = crossref_spider.CrossrefSpider.from_crawler(crawler, 'fakedoi')
    fake_response = fake_response_from_file(
        'crossref/sample_crossref_record.json',
        response_type=TextResponse,
    )
    parsed_items = spider.parse(fake_response)

    pipeline = InspireCeleryPushPipeline()
    pipeline.open_spider(spider)

    yield _get_record_from_processed_item(parsed_items, spider)

    clean_dir()
github inspirehep / hepcrawl / tests / unit / test_base.py View on Github external
def record():
    """Return built HEPRecord from the BASE spider."""
    spider = base_spider.BaseSpider()
    response = fake_response_from_file('base/test_1.xml')

    selector = Selector(response, type='xml')
    spider._register_namespaces(selector)
    nodes = selector.xpath('.//%s' % spider.itertag)
    response.meta["record"] = nodes[0].extract()
    response.meta["urls"] = ["http://hdl.handle.net/1885/10005"]

    parsed_item = spider.build_item(response)
    assert parsed_item
    assert parsed_item.record

    return parsed_item.record