Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
page.on('request', onRequest);
console.log(` * goto: ${urlToVisit}`);
const status = await page.goto(urlToVisit, {
timeout: 120000,
waitUntil: 'networkidle2',
});
const pageUrl = page.url();
if (pageUrl !== urlToVisit) {
console.log(` > ${page.url()}`);
}
// We do not collect URLs unless we are on the home-page
if (status.ok && url === undefined) {
const domainOfPage = getDomain(pageUrl);
const urlsOnPage = await page.evaluate(() => [...document.querySelectorAll('a')].map(a => a.href).filter(Boolean));
const sameDomainUrls = urlsOnPage.filter(
href => href
&& (href.startsWith('https://')
|| href.startsWith('http://')
|| href.startsWith('ws://')
|| href.startsWith('wss://'))
&& getDomain(href) === domainOfPage,
);
return [...new Set(sameDomainUrls)];
}
} catch (ex) {
console.log(`Could not fetch: ${urlToVisit}`, ex);
} finally {
await page.removeAllListeners('request');
await page.close();
const { cpt, sourceUrl, url } = request;
if (cpt === 'document' || url.length > 200) {
return;
}
const t0 = Date.now();
const { exception, filter } = engine.match({
cpt: cpt.toLowerCase(),
sourceUrl,
url,
});
const total = Date.now() - t0;
const sourceDomain = getDomain(sourceUrl);
if (total > 5) {
console.log('SLOW', total, cpt, sourceUrl, url.slice(0, 25), '...');
slowRequests.push(request);
}
if (filter !== undefined && !networkFilters.has(filter.rawLine)) {
networkFilters.add(filter.rawLine);
// console.log('> f', removeExtraHostnames(filter));
addFilter(sourceDomain, removeExtraHostnames(filter));
}
if (exception !== undefined && !networkFilters.has(exception.rawLine)) {
networkFilters.add(exception.rawLine);
// console.log('> e', removeExtraHostnames(exception));
addFilter(sourceDomain, removeExtraHostnames(exception));
private onFrame = async (frame: puppeteer.Frame): Promise => {
// DOM features
const { ids, hrefs, classes } = await frame.$$eval(
'[id],[class],[href]',
extractFeaturesFromDOM,
);
// Source features
const url = frame.url();
const parsed = parse(url);
const hostname = parsed.hostname || '';
const domain = parsed.domain || '';
// Get cosmetics to inject into the Frame
const { active, scripts, styles } = this.getCosmeticsFilters({
domain,
hostname,
url,
// DOM information
classes,
hrefs,
ids,
});
// Abort if cosmetics are disabled
public static fromRawDetails({
requestId = '0',
tabId = 0,
url = '',
hostname,
domain,
sourceUrl = '',
sourceHostname,
sourceDomain,
type = 'main_frame',
}: Partial): Request {
url = url.toLowerCase();
if (hostname === undefined || domain === undefined) {
const parsed = parse(url, TLDTS_OPTIONS);
hostname = hostname || parsed.hostname || '';
domain = domain || parsed.domain || '';
}
// Initialize source URL
if (sourceHostname === undefined || sourceDomain === undefined) {
const parsed = parse(sourceUrl, TLDTS_OPTIONS);
sourceHostname = sourceHostname || parsed.hostname || '';
sourceDomain = sourceDomain || parsed.domain || '';
}
// source URL
return new Request({
requestId,
tabId,
private onRuntimeMessage = (
msg: IBackgroundCallback & { action?: string },
sender: chrome.runtime.MessageSender,
sendResponse: (response?: any) => void,
): void => {
if (sender.tab === undefined || sender.tab.id === undefined || sender.frameId === undefined) {
return;
}
// Make sure we only listen to messages coming from our content-script
// based on the value of `action`.
if (msg.action === 'getCosmeticsFilters') {
// Extract hostname from sender's URL
const { url = '', frameId } = sender;
const parsed = parse(url);
const hostname = parsed.hostname || '';
const domain = parsed.domain || '';
// Once per tab/page load we inject base stylesheets. These are always
// the same for all frames of a given page because they do not depend on
// a particular domain and cannot be cancelled using unhide rules.
// Because of this, we specify `allFrames: true` when injecting them so
// that we do not need to perform this operation for sub-frames.
if (frameId === 0 && msg.lifecycle === 'start') {
const { active, styles } = this.getCosmeticsFilters({
domain,
hostname,
url,
classes: msg.classes,
hrefs: msg.hrefs,
sourceUrl = '',
sourceHostname,
sourceDomain,
type = 'main_frame',
}: Partial): Request {
url = url.toLowerCase();
if (hostname === undefined || domain === undefined) {
const parsed = parse(url, TLDTS_OPTIONS);
hostname = hostname || parsed.hostname || '';
domain = domain || parsed.domain || '';
}
// Initialize source URL
if (sourceHostname === undefined || sourceDomain === undefined) {
const parsed = parse(sourceUrl, TLDTS_OPTIONS);
sourceHostname = sourceHostname || parsed.hostname || '';
sourceDomain = sourceDomain || parsed.domain || '';
}
// source URL
return new Request({
requestId,
tabId,
domain,
hostname,
url,
sourceDomain,
sourceHostname,
sourceUrl,
private onGetCosmeticFilters = (
event: Electron.IpcMainEvent,
url: string,
msg: IBackgroundCallback,
): void => {
// Extract hostname from sender's URL
const parsed = parse(url);
const hostname = parsed.hostname || '';
const domain = parsed.domain || '';
const { active, styles, scripts } = this.getCosmeticsFilters({
domain,
hostname,
url,
classes: msg.classes,
hrefs: msg.hrefs,
ids: msg.ids,
// This needs to be done only once per frame
getBaseRules: msg.lifecycle === 'start',
getInjectionRules: msg.lifecycle === 'start',
getRulesFromHostname: msg.lifecycle === 'start',
match({ url, frameUrl }) {
return parse(url) && parse(frameUrl);
}
};
get domainInfo() {
if (!this._domainInfo) {
this._domainInfo = parse(this.hostname, {
extractHostname: false,
});
}
return this._domainInfo;
}
href => href
&& (href.startsWith('https://')
|| href.startsWith('http://')
|| href.startsWith('ws://')
|| href.startsWith('wss://'))
&& getDomain(href) === domainOfPage,
);