How to use the ruia.Middleware function in ruia

To help you get started, we’ve selected a few ruia examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github howie6879 / ruia / tests / test_middleware.py View on Github external
#!/usr/bin/env python

from ruia import Middleware

middleware01 = Middleware()

middleware02 = Middleware()


@middleware01.request
async def print_on_request01(spider_ins, request):
    request.headers = {"User-Agent": "ruia ua"}


@middleware01.response
async def print_on_response01(spider_ins, request, response):
    assert isinstance(response.html, str)


@middleware02.request
async def print_on_request02(spider_ins, request):
    pass
github howie6879 / ruia / tests / test_spider.py View on Github external
def test_spider_with_error_middleware():
    error_middleware = Middleware()

    @error_middleware.request
    def error_request(spider_ins, request, response):
        pass

    @error_middleware.response
    async def error_response(spider_ins, request, response):
        raise TypeError("error")

    class SpiderDemo(Spider):
        start_urls = ["https://httpbin.org/get?p=0"]

        async def parse(self, response):
            pass

    SpiderDemo.start(middleware=error_middleware)
github howie6879 / ruia / tests / test_spider.py View on Github external
#!/usr/bin/env python

import asyncio
import os

from ruia import Item, Middleware, Response, Request, Spider, TextField

html_path = os.path.join(
    os.path.dirname(os.path.realpath(__file__)), "data", "for_spider_testing.html"
)
with open(html_path, mode="r", encoding="utf-8") as file:
    HTML = file.read()

middleware = Middleware()


async def retry_func(request):
    request.request_config["TIMEOUT"] = 10


@middleware.request
async def print_on_request(spider_ins, request):
    request.headers = {"User-Agent": "ruia ua"}


@middleware.response
async def print_on_response(spider_ins, request, response):
    assert isinstance(response.html, str)
    assert request.headers == {"User-Agent": "ruia ua"}
github howie6879 / ruia / tests / test_middleware.py View on Github external
#!/usr/bin/env python

from ruia import Middleware

middleware01 = Middleware()

middleware02 = Middleware()


@middleware01.request
async def print_on_request01(spider_ins, request):
    request.headers = {"User-Agent": "ruia ua"}


@middleware01.response
async def print_on_response01(spider_ins, request, response):
    assert isinstance(response.html, str)


@middleware02.request
async def print_on_request02(spider_ins, request):
github howie6879 / ruia / examples / topics_examples / middleware_demo.py View on Github external
#!/usr/bin/env python

from ruia import Spider, Middleware

middleware = Middleware()


@middleware.request
async def print_on_request(spider_ins, request):
    request.metadata = {"url": request.url}
    print(f"request: {request.metadata}")
    # Just operate request object, and do not return anything.


@middleware.response
async def print_on_response(spider_ins, request, response):
    print(f"response: {response.metadata}")


class MiddlewareSpiderDemo(Spider):
    start_urls = ["https://httpbin.org/get"]
github howie6879 / ruia / examples / hacker_news_spider / middlewares.py View on Github external
#!/usr/bin/env python

from ruia import Middleware

middleware = Middleware()


@middleware.request
async def print_on_request(spider_ins, request):
    ua = "ruia user-agent"
    request.headers.update({"User-Agent": ua})
    # request.kwargs.update({"proxy": "http://0.0.0.0:8118"})