How to use the apify.launchPuppeteer function in apify

To help you get started, we’ve selected a few apify examples, based on popular ways it is used in public projects.

github apifytech / apify-js / examples / puppeteer_with_proxy.js View on Github external
Apify.main(async () => {
    // Apify.launchPuppeteer() is similar to Puppeteer's launch() function.
    // It accepts the same parameters and returns a preconfigured Puppeteer.Browser instance.
    // Moreover, it accepts several additional options, such as useApifyProxy.
    const options = {
        useApifyProxy: true,
    };
    const browser = await Apify.launchPuppeteer(options);

    console.log('Running Puppeteer script...');

    // Proceed with a plain Puppeteer script.
    const page = await browser.newPage();
    const url = 'https://en.wikipedia.org/wiki/Main_Page';
    await page.goto(url);
    const title = await page.title();

    console.log(`Page title: ${title}`);

    // Cleaning up after yourself is always good.
    await browser.close();
    console.log('Puppeteer closed.');
});
github apifytech / actor-page-analyzer / src / index.js View on Github external
}

        const tests = input.tests || ['SCHEMA.ORG', 'JSON-LD', 'WINDOW', 'XHR', 'META', 'HTML'];
        output = new OutputGenerator(tests);

        const launchPuppeteerOptions = {
            stealth: true,
            headless: true,
        };

        if (process.env.PROXY_GROUP && process.env.PROXY_PASSWORD) {
            const { PROXY_PASSWORD, PROXY_GROUP, PROXY_ADDRESS } = process.env;
            const proxyAddress = PROXY_ADDRESS || 'proxy.apify.com:8000';
            launchPuppeteerOptions.proxyUrl = `http://groups-${PROXY_GROUP}:${PROXY_PASSWORD}@${proxyAddress}`;
        }
        const browser = await Apify.launchPuppeteer(launchPuppeteerOptions);

        let pageToAnalyze = null;
        for (let i = 0; i < input.pages.length; i++) {
            pageToAnalyze = input.pages[i];
            // eslint-disable-next-line no-await-in-loop
            await analysePage(browser, pageToAnalyze.url, pageToAnalyze.searchFor, pageToAnalyze.tests || tests);
        }

        log('Analyzer finished');
    } catch (error) {
        log('Top level error');
        console.error(error);
    }
});
github cermak-petr / actor-booking-scraper / src / util.js View on Github external
module.exports.getWorkingBrowser = async (startUrl, input) => {
    const sortBy = input.sortBy || 'bayesian_review_score';
    for (let i = 0; i < 1000; i++) {
        console.log('testing proxy...');
        const config = Object.assign({
            apifyProxySession: 'BOOKING_' + Math.random()
        }, input.proxyConfig || {});
        const browser = await Apify.launchPuppeteer(config);
        const page = await browser.newPage();
        try{
            await Apify.utils.puppeteer.hideWebDriver(page);
            await page.goto(startUrl, { timeout: 60000 });
            //await page.waitForNavigation({ timeout: 60000 });
        } catch(e) {
            console.log('invalid proxy, retrying...');
            console.log(e);
            continue;
        }
        const pageUrl = await page.url();
        if (pageUrl.indexOf(sortBy) > -1 || i === 999) {
            console.log('valid proxy found');
            await page.close();
            return browser;
        }
github apifytech / actor-scraper / src / modules / puppeteer_pool.js View on Github external
_createBrowser() {
        const puppeteerPromise = Apify.launchPuppeteer(this.puppeteerConfig);
        const browser = new Browser(this.browserCounter++, puppeteerPromise);

        browser.browserPromise.then((puppeteerBrowser) => {
            puppeteerBrowser.on('disconnected', () => {
                logError('Puppeteer sent "disconnect" event. Crashed???');

                if (!browser.retired) this._retireCurrentBrowser();
            });
        });

        return browser;
    }
github cermak-petr / actor-booking-scraper / src / main.js View on Github external
launchPuppeteerFunction: () => {
            if (!input.testProxy) {
                return Apify.launchPuppeteer(input.proxyConfig || {});
            }
            return getWorkingBrowser(startUrl, input);
        },

apify

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

Apache-2.0
Latest version published 2 months ago

Package Health Score

84 / 100
Full package analysis