How to use the scrapy.http.Response function in Scrapy

To help you get started, we’ve selected a few Scrapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github scrapy / scrapy / tests / test_downloadermiddleware_redirect.py View on Github external
def test_dont_redirect(self):
        url = 'http://www.example.com/301'
        url2 = 'http://www.example.com/redirected'
        req = Request(url, meta={'dont_redirect': True})
        rsp = Response(url, headers={'Location': url2}, status=301)

        r = self.mw.process_response(req, rsp, self.spider)
        assert isinstance(r, Response)
        assert r is rsp

        # Test that it redirects when dont_redirect is False
        req = Request(url, meta={'dont_redirect': False})
        rsp = Response(url2, status=200)

        r = self.mw.process_response(req, rsp, self.spider)
        assert isinstance(r, Response)
        assert r is rsp
github scrapy / scrapy / tests / test_pipeline_media.py View on Github external
def test_results_are_cached_across_multiple_items(self):
        rsp1 = Response('http://url1')
        req1 = Request('http://url1', meta=dict(response=rsp1))
        item = dict(requests=req1)
        new_item = yield self.pipe.process_item(item, self.spider)
        self.assertTrue(new_item is item)
        self.assertEqual(new_item['results'], [(True, rsp1)])

        # rsp2 is ignored, rsp1 must be in results because request fingerprints are the same
        req2 = Request(req1.url, meta=dict(response=Response('http://donot.download.me')))
        item = dict(requests=req2)
        new_item = yield self.pipe.process_item(item, self.spider)
        self.assertTrue(new_item is item)
        self.assertEqual(request_fingerprint(req1), request_fingerprint(req2))
        self.assertEqual(new_item['results'], [(True, rsp1)])
github scrapinghub / scrapinghub-entrypoint-scrapy / tests / test_extension.py View on Github external
def test_hs_mware_process_spider_output_filter_request(hs_mware):
    response = Response('http://resp-url')
    # provide a response and a new request in result
    child_response = Response('http://resp-url-child')
    child_response.request = Request('http://resp-url-child-req')
    child_request = Request('http://req-url-child')
    hs_mware._seen = WeakKeyDictionary({response: 'riq'})
    result = list(hs_mware.process_spider_output(
        response, [child_response, child_request], Spider('test')))
    assert len(result) == 2
    # make sure that we update hsparent meta only for requests
    assert result[0].meta.get(HS_PARENT_ID_KEY) is None
    assert result[1].meta[HS_PARENT_ID_KEY] == 'riq'
github scrapy / scrapy / tests / test_downloadermiddleware_httpcache.py View on Github external
assert mw.storage.retrieve_response(self.spider, req) is None
            assert mw.process_request(req, self.spider) is None

        # s3 scheme response is cached by default
        req, res = Request('s3://bucket/key'), Response('http://bucket/key')
        with self._middleware() as mw:
            assert mw.process_request(req, self.spider) is None
            mw.process_response(req, res, self.spider)

            cached = mw.process_request(req, self.spider)
            assert isinstance(cached, Response), type(cached)
            self.assertEqualResponse(res, cached)
            assert 'cached' in cached.flags

        # ignore s3 scheme
        req, res = Request('s3://bucket/key2'), Response('http://bucket/key2')
        with self._middleware(HTTPCACHE_IGNORE_SCHEMES=['s3']) as mw:
            assert mw.process_request(req, self.spider) is None
            mw.process_response(req, res, self.spider)

            assert mw.storage.retrieve_response(self.spider, req) is None
            assert mw.process_request(req, self.spider) is None
github scrapy / scrapy / tests / test_spider.py View on Github external
def test_get_sitemap_body_xml_url_compressed(self):
        r = Response(url="http://www.example.com/sitemap.xml.gz", body=self.GZBODY)
        self.assertSitemapBody(r, self.BODY)
github scrapy-plugins / scrapy-crawlera / tests / test_crawlera.py View on Github external
def _mock_crawlera_response(self, url, headers=None, **kwargs):
        crawlera_headers = {"X-Crawlera-Version": "1.36.3-cd5e44"}
        if headers:
            crawlera_headers.update(headers)
        return Response(url, headers=crawlera_headers, **kwargs)
github scrapy / scrapy / scrapy / trunk / scrapy / core / downloader / responsetypes / __init__.py View on Github external
def from_headers(self, headers):
        """Return the most appropiate Response class by looking at the HTTP
        headers"""
        cls = Response
        if 'Content-Type' in headers:
            cls = self.from_content_type(headers['Content-type'])
        if cls is Response and 'Content-Disposition' in headers:
            cls = self.from_content_disposition(headers['Content-Disposition'])
        return cls
github scrapy / scrapy / scrapy / contrib / response / soup.py View on Github external
def __init__(self):
        setattr(Response, 'getsoup', getsoup)
        setattr(Response, 'soup', property(getsoup))
github scrapy-plugins / scrapy-jsonrpc / scrapy_jsonrpc / serialize.py View on Github external
def default(self, o):
        if isinstance(o, datetime.datetime):
            return o.strftime("%s %s" % (self.DATE_FORMAT, self.TIME_FORMAT))
        elif isinstance(o, datetime.date):
            return o.strftime(self.DATE_FORMAT)
        elif isinstance(o, datetime.time):
            return o.strftime(self.TIME_FORMAT)
        elif isinstance(o, decimal.Decimal):
            return str(o)
        elif isinstance(o, defer.Deferred):
            return str(o)
        elif isinstance(o, BaseItem):
            return dict(o)
        elif isinstance(o, Request):
            return "<%s %s %s>" % (type(o).__name__, o.method, o.url)
        elif isinstance(o, Response):
            return "<%s %s %s>" % (type(o).__name__, o.status, o.url)
        else:
            return super(ScrapyJSONEncoder, self).default(o)