Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
const exitWithError = (err, exitCode, message) => {
log.exception(err, message);
exited = true;
// console.log(`Exiting with code: ${exitCode}`);
process.exit(exitCode);
};
async _maybeFinish() {
if (this.queryingIsFinished) return;
if (this._currentConcurrency > 0) return;
this.queryingIsFinished = true;
try {
const isFinished = await this.isFinishedFunction();
if (isFinished && this.resolve) this.resolve();
} catch (err) {
if (this.reject) {
// No need to log all concurrent errors.
log.exception(err, 'AutoscaledPool: isFinishedFunction failed.');
this.reject(err);
}
} finally {
this.queryingIsFinished = false;
}
}
async _initBrowser(browserPromise, instance) {
const { id } = instance;
let browser;
try {
browser = await browserPromise;
} catch (err) {
log.exception(err, 'PuppeteerPool: Browser launch failed', { id });
delete this.activeInstances[id];
return;
}
instance.childProcess = browser.process();
instance.recycleDiskCacheDir = browser.recycleDiskCacheDir;
browser.on('disconnected', () => {
// If instance.killed === true then we killed the instance so don't log it.
if (!instance.killed) log.error('PuppeteerPool: Puppeteer sent "disconnect" event. Maybe it crashed???', { id });
this._retireInstance(instance);
});
// This one is done manually in Puppeteerpool.newPage() so that it happens immediately.
// browser.on('targetcreated', () => instance.activePages++);
browser.on('targetdestroyed', (target) => {
// The event is also called for service workers and Chromium extensions, which must be ignored!
const compileScript = (scriptString, context = Object.create(null)) => {
const funcString = `async ({ page, request }) => {${scriptString}}`;
let func;
try {
func = vm.runInNewContext(funcString, context); // "Secure" the context by removing prototypes, unless custom context is provided.
} catch (err) {
log.exception(err, 'Cannot compile script!');
throw err;
}
if (!_.isFunction(func)) throw new Error('Compilation result is not a function!'); // This should not happen...
return func;
};
async _requestFunctionErrorHandler(error, request, source) {
request.pushErrorMessage(error);
// Reclaim and retry request if flagged as retriable and retryCount is not exceeded.
if (!request.noRetry && request.retryCount < this.maxRequestRetries) {
request.retryCount++;
log.exception(
error,
'BasicCrawler: handleRequestFunction failed, reclaiming failed request back to the list or queue',
_.pick(request, 'url', 'retryCount', 'id'),
);
return source.reclaimRequest(request);
}
// If we get here, the request is either not retriable
// or failed more than retryCount times and will not be retried anymore.
// Mark the request as failed and do not retry.
this.handledRequestsCount++;
await source.markRequestHandled(request);
this.stats.failJob(request.id || request.url);
return this.handleFailedRequestFunction({ request, error }); // This function prints an error message.
}
await this.liveViewServer.stop().catch(err => log.exception(err, 'PuppeteerPool: Cannot close LiveViewServer.'));
}
.catch((err) => {
if (err.message.includes('Cannot persist state.')) {
log.error('BasicCrawler: The crawler attempted to persist its request list\'s state and failed due to missing or '
+ 'invalid config. Make sure to use either Apify.openRequestList() or the "stateKeyPrefix" option of RequestList '
+ 'constructor to ensure your crawling state is persisted through host migrations and restarts.');
} else {
log.exception(err, 'BasicCrawler: An unexpected error occured when the crawler '
+ 'attempted to persist its request list\'s state.');
}
});
}