How to use the apify.getInput function in apify

To help you get started, we’ve selected a few apify examples, based on popular ways it is used in public projects.

github apifytech / apify-cli / src / templates / puppeteer_single_page / main.js View on Github external
Apify.main(async () => {
    // Get input of the actor (here only for demonstration purposes).
    // If you'd like to have your input checked and have Apify display
    // a user interface for it, add INPUT_SCHEMA.json file to your actor.
    // For more information, see https://apify.com/docs/actor/input-schema
    const input = await Apify.getInput();
    console.log('Input:');
    console.dir(input);

    if (!input || !input.url) throw new Error('Input must be a JSON object with the "url" field!');

    console.log('Launching Puppeteer...');
    const browser = await Apify.launchPuppeteer();

    console.log(`Opening page ${input.url}...`);
    const page = await browser.newPage();
    await page.goto(input.url);
    const title = await page.title();
    console.log(`Title of the page "${input.url}" is "${title}".`);

    console.log('Saving output...');
    await Apify.setValue('OUTPUT', {
github apifytech / apify-js / examples / screenshots.js View on Github external
Apify.main(async () => {
    // Read the actor input configuration containing the URLs for the screenshot.
    // By convention, the input is present in the actor's default key-value store under the "INPUT" key.
    const input = await Apify.getInput();
    if (!input) throw new Error('Have you passed the correct INPUT ?');

    const { sources } = input;

    const requestList = new Apify.RequestList({ sources });
    await requestList.initialize();

    const crawler = new Apify.PuppeteerCrawler({
        requestList,
        handlePageFunction: async ({ page, request }) => {
            console.log(`Processing ${request.url}...`);

            // This is a Puppeteer function that takes a screenshot of the page and returns its buffer.
            const screenshotBuffer = await page.screenshot();

            // The record key may only include the following characters: a-zA-Z0-9!-_.'()
github apifytech / apify-cli / src / templates / hello_world / main.js View on Github external
Apify.main(async () => {
    // Get input of the actor.
    // If you'd like to have your input checked and have Apify display
    // a user interface for it, add INPUT_SCHEMA.json file to your actor.
    // For more information, see https://apify.com/docs/actor/input-schema
    const input = await Apify.getInput();
    console.log('Input:');
    console.dir(input);

    // Do something useful here...

    // Save output
    const output = {
        receivedInput: input,
        message: 'Hello sir!',
    };
    console.log('Output:');
    console.dir(output);
    await Apify.setValue('OUTPUT', output);
});
github apifytech / apify-cli / src / templates / cheerio_crawler / main.js View on Github external
Apify.main(async () => {
    // Get input of the actor (here only for demonstration purposes).
    // If you'd like to have your input checked and have Apify display
    // a user interface for it, add INPUT_SCHEMA.json file to your actor.
    // For more information, see https://apify.com/docs/actor/input-schema
    const input = await Apify.getInput();
    console.log('Input:');
    console.dir(input);

    // Open a request queue and add a start URL to it
    const requestQueue = await Apify.openRequestQueue();
    await requestQueue.addRequest({ url: 'https://www.iana.org/' });

    // Define a pattern of URLs that the crawler should visit
    const pseudoUrls = [new Apify.PseudoUrl('https://www.iana.org/[.*]')];

    // Create a crawler that will use headless Chrome / Puppeteer to extract data
    // from pages and recursively add links to newly-found pages
    const crawler = new Apify.PuppeteerCrawler({
        requestQueue,

        // This function is called for every page the crawler visits
github apifytech / apify-cli / src / templates / basic_crawler / main.js View on Github external
Apify.main(async () => {
    // Get input of the actor (here only for demonstration purposes).
    // If you'd like to have your input checked and have Apify display
    // a user interface for it, add INPUT_SCHEMA.json file to your actor.
    // For more information, see https://apify.com/docs/actor/input-schema
    const input = await Apify.getInput();
    console.log('Input:');
    console.dir(input);

    if (!input || !input.sources) throw new Error('Input must be a JSON object with the "sources" field!');

    const requestList = await Apify.openRequestList('my-request-list', input.sources);

    // Create a basic crawler that will use request-promise to download
    // web pages from a given list of URLs
    const basicCrawler = new Apify.BasicCrawler({
        requestList,
        handleRequestFunction: async ({ request }) => {
            await Apify.pushData({
                request,
                finishedAt: new Date(),
                html: await rp(request.url),
github apifytech / actor-scraper / web-scraper / src / actor.js View on Github external
Apify.main(async () => {
    log.debug('Reading INPUT.');
    const input = await Apify.getInput();
    if (!input) throw new Error('INPUT cannot be empty!');

    // Get crawler setup and startup options.
    log.info('Configuring Web Scraper.');
    const setup = new CrawlerSetup(input);
    const crawler = await setup.createCrawler();

    log.info('Configuration completed. Starting the scrape.');
    await crawler.run();
    log.info('Web Scraper finished.');
});
github apifytech / actor-scraper / cheerio-scraper / src / actor.js View on Github external
Apify.main(async () => {
    log.debug('Reading INPUT.');
    const input = await Apify.getInput();
    if (!input) throw new Error('INPUT cannot be empty!');

    // Get crawler setup and startup options.
    log.info('Configuring Cheerio Scraper.');
    const setup = new CrawlerSetup(input);
    const crawler = await setup.createCrawler();

    log.info('Configuration completed. Starting the scrape.');
    await crawler.run();
    log.info('Cheerio Scraper finished.');
});
github apifytech / actor-scraper / puppeteer-scraper / src / actor.js View on Github external
Apify.main(async () => {
    log.debug('Reading INPUT.');
    const input = await Apify.getInput();
    if (!input) throw new Error('INPUT cannot be empty!');

    // Get crawler setup and startup options.
    log.info('Configuring Puppeteer Scraper.');
    const setup = new CrawlerSetup(input);
    const crawler = await setup.createCrawler();

    log.info('Configuration completed. Starting the scrape.');
    await crawler.run();
    log.info('Puppeteer Scraper finished.');
});
github apifytech / apify-cli / src / templates / basic / main.js View on Github external
Apify.main(async () => {
    const input = await Apify.getInput();

    console.log(`My test input: ${input.test}`);

    await Apify.setValue('OUTPUT', { foo: 'bar' });

    console.log('Done.');
});

apify

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

Apache-2.0
Latest version published 2 months ago

Package Health Score

84 / 100
Full package analysis