Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
browser.on('disconnected', () => {
// If instance.killed === true then we killed the instance so don't log it.
if (!instance.killed) log.error('PuppeteerPool: Puppeteer sent "disconnect" event. Maybe it crashed???', { id });
this._retireInstance(instance);
});
// This one is done manually in Puppeteerpool.newPage() so that it happens immediately.
this.tunnelAgentExceptionListener = (err) => {
try {
const code = err.code === 'ERR_ASSERTION';
const name = err.name === 'AssertionError [ERR_ASSERTION]';
const operator = err.operator === '==';
const value = err.expected === 0;
const stack = err.stack.includes('/tunnel-agent/index.js');
// If this passes, we can be reasonably sure that it's
// the right error from tunnel-agent.
if (code && name && operator && value && stack) {
log.error('CheerioCrawler: Tunnel-Agent assertion error intercepted. The affected request will timeout.');
return;
}
} catch (caughtError) {
// Catch any exception resulting from the duck-typing
// check. It only means that the error is not the one
// we're looking for.
}
// Rethrow the original error if it's not a match.
throw err;
};
process.on('uncaughtException', this.tunnelAgentExceptionListener);
checkParamOrThrow(keyValueStoreName, 'keyValueStoreName', 'Maybe String');
checkParamOrThrow(screenshotQuality, 'screenshotQuality', 'Number');
const store = await openKeyValueStore(keyValueStoreName);
if (saveScreenshot) {
const screenshotBuffer = await page.screenshot({ fullPage: true, screenshotQuality, type: 'jpeg' });
await store.setValue(`${key}.jpg`, screenshotBuffer, { contentType: 'image/jpeg' });
}
if (saveHtml) {
const html = await page.content();
await store.setValue(`${key}.html`, html, { contentType: 'text/html' });
}
} catch (e) {
// I like this more than having to investigate stack trace
log.error(`saveSnapshot with key ${key || ''} failed with error:`);
throw e;
}
};
async _defaultHandleFailedRequestFunction({ request }) { // eslint-disable-line class-methods-use-this
const details = _.pick(request, 'id', 'url', 'method', 'uniqueKey');
log.error('PuppeteerCrawler: Request failed and reached maximum retries', details);
}
}
.catch((err) => {
this.emit(id, {
id,
url: page.url(),
error: err,
});
log.error(err);
});
};
handleFailedRequestFunction: ({ request }) => {
const details = _.pick(request, 'id', 'url', 'method', 'uniqueKey');
log.error('CheerioCrawler: Request failed and reached maximum retries', details);
},
ignoreSslErrors: false,
.catch(err => log.error(err));
}
.catch((err) => {
if (err.message.includes('Cannot persist state.')) {
log.error('BasicCrawler: The crawler attempted to persist its request list\'s state and failed due to missing or '
+ 'invalid config. Make sure to use either Apify.openRequestList() or the "stateKeyPrefix" option of RequestList '
+ 'constructor to ensure your crawling state is persisted through host migrations and restarts.');
} else {
log.exception(err, 'BasicCrawler: An unexpected error occured when the crawler '
+ 'attempted to persist its request list\'s state.');
}
});
}
handleFailedRequestFunction: ({ request }) => {
const details = _.pick(request, 'id', 'url', 'method', 'uniqueKey');
log.error('BasicCrawler: Request failed and reached maximum retries', details);
},
autoscaledPoolOptions: {},