How to use the apify.getValue function in apify

To help you get started, we’ve selected a few apify examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github apifytech / actor-page-analyzer / src / index.js View on Github external
Apify.main(async () => {
    log('Loading data from input');
    try {
        // Fetch the input and check it has a valid format
        // You don't need to check the input, but it's a good practice.
        let input = await Apify.getValue('INPUT');

        const isSinglePageInput = typeCheck(PAGE_INPUT_TYPE, input);
        const isMultiPageInput = typeCheck(INPUT_TYPE, input);

        if (!isMultiPageInput && !isSinglePageInput) {
            log('Expected input:');
            log(INPUT_TYPE);
            log('or');
            log(PAGE_INPUT_TYPE);
            log('Received input:');
            console.dir(input);
            throw new Error('Received invalid input');
        }
        if (isMultiPageInput) {
            input.pages.forEach(page => {
                if (!typeCheck(PAGE_INPUT_TYPE, page) && !isSinglePageInput) {
github drobnikj / crawler-google-places / src / enqueue_places_crawler.js View on Github external
const enqueueAllPlaceDetails = async (page, searchString, requestQueue, maxCrawledPlaces, request) => {
    page.on('response', enqueuePlacesFromResponse(requestQueue, searchString, maxCrawledPlaces));
    // Save state of listing pagination
    // NOTE: If pageFunction failed crawler skipped already scraped pagination
    const listingStateKey = `${LISTING_PAGINATION_KEY}-${request.id}`;
    const listingPagination = await Apify.getValue(listingStateKey) || {};

    await page.type('#searchboxinput', searchString);
    await sleep(5000);
    await page.click('#searchbox-searchbutton');
    await sleep(5000);
    await waitForGoogleMapLoader(page);
    try {
        await page.waitForSelector(PLACE_TITLE_SEL);
        // It there is place detail, it means there is just one detail and it was redirected here.
        // We do not need enqueue other places.
        log.debug(`Search string ${searchString} has just one place to scraper.`);
        return;
    } catch (e) {
        // It can happen if there is list of details.
    }
github cermak-petr / actor-booking-scraper / src / main.js View on Github external
Apify.main(async () => {
    // Actor INPUT variable
    const input = await Apify.getValue('INPUT');

    // Actor STATE variable
    const state = await Apify.getValue('STATE') || { crawled: {} };

    // Migrating flag
    let migrating = false;
    Apify.events.on('migrating', () => { migrating = true; });

    // Check if all required input attributes are present.
    if (!input.search && !input.startUrls) {
        throw new Error('Missing "search" or "startUrls" attribute in INPUT!');
    }
    else if(input.search && input.startUrls && input.search.trim().length > 0 && input.startUrls.length > 0){
        throw new Error('It is not possible to use both "search" and "startUrls" attributes in INPUT!');
    }
    if (!(input.proxyConfig && input.proxyConfig.useApifyProxy)) {
github VaclavRut / actor-amazon-crawler / src / main.js View on Github external
Apify.main(async () => {
    // Get queue and enqueue first url.
    const requestQueue = await Apify.openRequestQueue();
    const input = await Apify.getValue('INPUT');
    const env = await Apify.getEnv();
    // based on the input country and keywords, generate the search urls
    const urls = await createSearchUrls(input);
    for (const searchUrl of urls) {
        await requestQueue.addRequest(searchUrl);
    }

    const config = {
        maxConcurrency: input.maxConcurrency || 40,
        maxRequestsPerCrawl: input.maxRequestsPerCrawl || null,
        useApifyProxy: true,
        apifyProxyGroups: input.apifyProxyGroups || null,
        maxRequestRetries: 6,
        handlePageTimeoutSecs: 2.5 * 60 * 1000,
        liveView: input.liveView ? input.liveView : true,
        country: input.country,
github cermak-petr / actor-booking-scraper / src / main.js View on Github external
Apify.main(async () => {
    // Actor INPUT variable
    const input = await Apify.getValue('INPUT');

    // Actor STATE variable
    const state = await Apify.getValue('STATE') || { crawled: {} };

    // Migrating flag
    let migrating = false;
    Apify.events.on('migrating', () => { migrating = true; });

    // Check if all required input attributes are present.
    if (!input.search && !input.startUrls) {
        throw new Error('Missing "search" or "startUrls" attribute in INPUT!');
    }
    else if(input.search && input.startUrls && input.search.trim().length > 0 && input.startUrls.length > 0){
        throw new Error('It is not possible to use both "search" and "startUrls" attributes in INPUT!');
    }
    if (!(input.proxyConfig && input.proxyConfig.useApifyProxy)) {
        throw new Error('This actor cannot be used without Apify proxy.');
    }
    if (input.useFilters && input.propertyType != 'none') {
github apifytech / actor-scraper / src / bootstrap.js View on Github external
const fetchInput = async () => {
    const input = await Apify.getValue('INPUT');

    const crawler = input.crawlerId
        ? await Apify.client.crawlers.getCrawlerSettings({ crawlerId: input.crawlerId })
        : {};

    // NOTE: In old crawler settings can be some values null, replace them with default values
    deleteNullProperties(crawler);
    deleteNullProperties(input);

    const mergedInput = _.defaults(input, crawler, INPUT_DEFAULTS, {
        actId: APIFY_ACT_ID,
        runId: APIFY_ACT_RUN_ID,
    });

    mergedInput.crawlPurls = mergedInput.crawlPurls || [];
    mergedInput.crawlPurls.forEach((purl) => {
github apifytech / actor-scraper / src / main.js View on Github external
const fetchInput = async () => {
    const input = await Apify.getValue('INPUT');

    if (!input.crawlerId) return input;

    const crawler = await Apify.client.crawlers.getCrawlerSettings({ crawlerId: input.crawlerId });

    return Object.assign({}, input, crawler);
};
github apifytech / actor-scraper / scraper-tools / src / context.js View on Github external
async getValue(...args) {
        return Apify.getValue(...args);
    }
github apifytech / actor-scraper / src / modules / utils.js View on Github external
export const getValueOrUndefined = async (key) => {
    const value = await Apify
        .getValue(key)
        .catch(() => undefined);

    return value || undefined;
};
github drobnikj / crawler-google-places / src / main.js View on Github external
Apify.main(async () => {
    const input = await Apify.getValue('INPUT');
    const { searchString, searchStringsArray, proxyConfig, lat, lng, maxCrawledPlaces, regularTestRun,
        includeReviews = true, includeImages = true, includeHistogram = true, includeOpeningHours = true,
        walker, debug } = input;

    if (debug) log.setLevel(log.LEVELS.DEBUG);
    if (!searchString && !searchStringsArray) throw new Error('Attribute searchString or searchStringsArray is missing in input.');
    if (proxyConfig && proxyConfig.apifyProxyGroups
        && (proxyConfig.apifyProxyGroups.includes('GOOGLESERP') || proxyConfig.apifyProxyGroups.includes('GOOGLE_SERP'))) {
        throw new Error('It is not possible to crawl google places with GOOGLE SERP proxy group. Please use a different one and rerun crawler.');
    }

    log.info('Scraping Google Places for search string:', searchString);

    const startRequests = [];
    let startUrlSearch = 'https://www.google.com/maps/search/';
    if (lat || lng) {

apify

The scalable web crawling and scraping library for JavaScript/Node.js. Enables development of data extraction and web automation jobs (not only) with headless Chrome and Puppeteer.

Apache-2.0
Latest version published 17 days ago

Package Health Score

84 / 100
Full package analysis