How to use the apify.openRequestList function in apify

To help you get started, we’ve selected a few apify examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github apifytech / apify-cli / src / templates / basic_crawler / main.js View on Github external
Apify.main(async () => {
    // Get input of the actor (here only for demonstration purposes).
    // If you'd like to have your input checked and have Apify display
    // a user interface for it, add INPUT_SCHEMA.json file to your actor.
    // For more information, see https://apify.com/docs/actor/input-schema
    const input = await Apify.getInput();
    console.log('Input:');
    console.dir(input);

    if (!input || !input.sources) throw new Error('Input must be a JSON object with the "sources" field!');

    const requestList = await Apify.openRequestList('my-request-list', input.sources);

    // Create a basic crawler that will use request-promise to download
    // web pages from a given list of URLs
    const basicCrawler = new Apify.BasicCrawler({
        requestList,
        handleRequestFunction: async ({ request }) => {
            await Apify.pushData({
                request,
                finishedAt: new Date(),
                html: await rp(request.url),
                '#debug': Apify.utils.createRequestDebugInfo(request),
            });
        },

        handleFailedRequestFunction: async ({ request }) => {
            await Apify.pushData({
github apifytech / actor-scraper / puppeteer-scraper / src / crawler_setup.js View on Github external
async _initializeAsync() {
        // RequestList
        const startUrls = this.input.startUrls.map((req) => {
            req.useExtendedUniqueKey = true;
            req.keepUrlFragment = this.input.keepUrlFragments;
            return req;
        });
        this.requestList = await Apify.openRequestList('PUPPETEER_SCRAPER', startUrls);

        // RequestQueue if selected
        if (this.input.useRequestQueue) this.requestQueue = await Apify.openRequestQueue();

        // Dataset
        this.dataset = await Apify.openDataset();
        const { itemsCount } = await this.dataset.getInfo();
        this.pagesOutputted = itemsCount || 0;

        // KeyValueStore
        this.keyValueStore = await Apify.openKeyValueStore();
    }
github apifytech / actor-scraper / web-scraper / src / crawler_setup.js View on Github external
async _initializeAsync() {
        // RequestList
        const startUrls = this.input.startUrls.map((req) => {
            req.useExtendedUniqueKey = true;
            req.keepUrlFragment = this.input.keepUrlFragments;
            return req;
        });
        this.requestList = await Apify.openRequestList('WEB_SCRAPER', startUrls);

        // RequestQueue if selected
        if (this.input.useRequestQueue) this.requestQueue = await Apify.openRequestQueue();

        // Dataset
        this.dataset = await Apify.openDataset();
        const { itemsCount } = await this.dataset.getInfo();
        this.pagesOutputted = itemsCount || 0;

        // KeyValueStore
        this.keyValueStore = await Apify.openKeyValueStore();
    }
github apifytech / actor-scraper / cheerio-scraper / src / crawler_setup.js View on Github external
async _initializeAsync() {
        // RequestList
        const startUrls = this.input.startUrls.map((req) => {
            req.useExtendedUniqueKey = true;
            req.keepUrlFragment = this.input.keepUrlFragments;
            return req;
        });
        this.requestList = await Apify.openRequestList('CHEERIO_SCRAPER', startUrls);

        // RequestQueue if selected
        if (this.input.useRequestQueue) this.requestQueue = await Apify.openRequestQueue();

        // Dataset
        this.dataset = await Apify.openDataset();
        const { itemsCount } = await this.dataset.getInfo();
        this.pagesOutputted = itemsCount || 0;

        // KeyValueStore
        this.keyValueStore = await Apify.openKeyValueStore();
    }

apify

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

Apache-2.0
Latest version published 25 days ago

Package Health Score

84 / 100
Full package analysis