How to use the jsdom.JSDOM.fromURL function in jsdom

To help you get started, we’ve selected a few jsdom examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github aarmea / readability-scrape / index.js View on Github external
.arguments("")
  .option("--html", "Print the stripped HTML content")
  .option("--json", "Print the full Readability output as JSON")
  .description("Retrieve and print the primary article text from the URL")
  .action(function(cmdUrl, cmdOptions) {
    url = cmdUrl;
    options = cmdOptions;
  })
  .parse(process.argv);

if (typeof url === "undefined") {
  Commander.outputHelp()
  process.exit(1);
}

JSDOM.fromURL(url, JSDOM_OPTIONS)
  .then(dom => {
    // Readability relies on a global Node object to work properly
    // https://github.com/mozilla/readability/issues/346
    global.Node = dom.window.Node;

    // TODO: Extract all of the links before calling Readability

    var document = dom.window.document;
    var article =
      new Readability(document.documentURI, dom.window.document).parse();
    if (options.json) {
      console.log(JSON.stringify(article));
    } else if (options.html) {
      console.log(article.content);
    } else {
      console.log(article.textContent);
github feross / studynotes.org / liveupdater / index.js View on Github external
LiveUpdater.prototype.getTitle = function (url) {
  var self = this

  var title = self.titles[url]
  if (title) {
    return title
  } else {
    debug('getTitle: Fetching page title for ' + url)
    JSDOM.fromURL(config.siteOrigin + url).then(dom => {
      const { window } = dom
      const { document } = window

      window.addEventListener('load', () => {
        title = document.querySelector('title').textContent
        if (title === 'Site is under maintenance!') return
        var index = title.indexOf('- Study Notes')
        if (index !== -1) title = title.substring(0, index)

        self.titles[url] = title
        self.sendStatsUpdates(url)
        window.close()
      })
    }, _ => {
      // Handle error, or 404 page
      self.titles[url] = '404 Page Not Found'
github pjolayres / michelin-guide-crawler / src / crawler.js View on Github external
static async fetchLinks(url) {
    let links = [];
    let currentUrl = url;

    logger.info('Fetching list of restaurant URLs');

    do {
      const dom = await JSDOM.fromURL(currentUrl); // eslint-disable-line no-await-in-loop
      const currentLinks = Utilities.mapDOM(dom.window.document, '#restaurantList li.part .rname a', a => a.href);

      links = [...links, ...currentLinks];

      currentUrl = Utilities.firstOrDefaultDOM(dom.window.document, '#resultPager .pager li:nth-child(2) a', a => a.href);

      logger.info(`Acquired ${links.length} URLs`);
    } while (currentUrl);

    return links;
  }
github LenoxBot / LenoxBot / commands / nsfw / hentaihaven.js View on Github external
JSDOM.fromURL('https://hentaihaven.org').then((res) => {
        const elements = res.window.document.querySelectorAll('head > script');
        const element = Array.from(elements).filter((elem) => elem.text.startsWith('/*  */\nvar Pukka = '))[0];
        const Pukka = JSON.parse(element.text.replace(/^\/\* {2}\*\/\nvar Pukka = |\;\n\/\* {2}\*\/$/g, ''));
        const random_category = Pukka.category_links[Math.floor(Math.random() * (Pukka.category_links.length - 1))];
        JSDOM.fromURL(random_category).then((resource) => {
          const { document } = resource.window;
          const title = document.getElementsByClassName('archive-title')[0].textContent;
          const videos = document.getElementsByClassName('hidden animate_video');
          const video_selection = Math.floor(Math.random() * (videos.length - 1)); // select a random video off the page
          const video_url = videos[video_selection].attributes[1].value;
          const thumbnail_url = document.getElementsByClassName('hidden animate_image')[video_selection].attributes[1].value || document.getElementsByClassName('hidden solid_image')[video_selection].attributes[1].value || document.getElementsByClassName('lazy attachment-medium post-image')[video_selection].attributes[4].value;
          const embed = new Discord.MessageEmbed()
            .setImage(thumbnail_url)
            .setURL(video_url)
            .setFooter(video_url)
            .setColor('BLUE')
            .setTitle(title);

          return msg.channel.send({
            embed
          });
github pjolayres / michelin-guide-crawler / src / crawler.js View on Github external
static async fetchRestaurantMapInfo(mapUrl) {
    let retries = RetryCount;

    while (retries > 0) {
      try {
        const mapDom = await JSDOM.fromURL(mapUrl); // eslint-disable-line no-await-in-loop

        const mapHtml = mapDom.serialize();
        const [, latitude] = mapHtml.match(/lat: ([0-9.]+),/);
        const [, longitude] = mapHtml.match(/lng: ([0-9.]+),/);
        const [, mapUri] = mapHtml.match(/mapuri:'([^']+)'/);

        const result = {
          latitude,
          longitude,
          mapUri
        };

        return result;
      }
      catch (ex) {
        retries -= 1;
github LenoxBot / LenoxBot / src / commands / NSFW / hentai-haven.js View on Github external
async run(message) {
		try {
			/* eslint no-undef: 0 */
			await JSDOM.fromURL('https://hentaihaven.org').then(async(res) => {
				const elements = res.window.document.querySelectorAll('head > script');
				const element = Array.from(elements).filter(elem => elem.text.startsWith('/*  */\nvar Pukka = '))[0];
				const Pukka = JSON.parse(element.text.replace(/^\/\* {2}\*\/\nvar Pukka = |\;\n\/\* {2}\*\/$/g, ''));
				const random_category = Pukka.category_links[Math.floor(Math.random() * (Pukka.category_links.length - 1))];
				await JSDOM.fromURL(random_category).then(resource => {
					const document = resource.window.document;
					const title = document.getElementsByClassName('archive-title')[0].textContent;
					const videos = document.getElementsByClassName('hidden animate_video');
					const video_selection = Math.floor(Math.random() * (videos.length - 1)); // select a random video off the page
					const video_url = videos[video_selection].attributes[1].value;
					const thumbnail_url = document.getElementsByClassName('hidden animate_image')[video_selection].attributes[1].value || document.getElementsByClassName('hidden solid_image')[video_selection].attributes[1].value || document.getElementsByClassName('lazy attachment-medium post-image')[video_selection].attributes[4].value;
					return message.channel.send(new MessageEmbed()
						.setImage(thumbnail_url)
						.setURL(video_url)
						.setFooter(video_url)
						.setColor('BLUE')
github gucong3000 / free_ss / free_ss.js View on Github external
return Promise.all(Object.keys(srvs).map(url => (
		JSDOM.fromURL(url, {
			referrer: url,
		}).then(dom => (
			Array.from(
				dom.window.document.querySelectorAll(srvs[url])
			).map(node2config)
		), console.error)
	))).then(servers => (
		[].concat.apply([], servers).filter(server => {
github bbyars / mountebank / functionalTest / html / docsTester / docs.js View on Github external
function getDOM (endpoint) {
    const deferred = Q.defer(),
        url = api.url + endpoint;

    JSDOM.fromURL(url).then(dom => {
        deferred.resolve(dom.window);
    }).catch(errors => {
        deferred.reject(errors);
    });

    return deferred.promise;
}
github gucong3000 / gulp-reporter / shorturl.js View on Github external
function get(url, selector) {
	return JSDOM.fromURL(url, {
		referrer: url
	}).then(dom => {
		return Array.from(dom.window.document.querySelectorAll(selector)).map(a => a.href);
	});
}
github OpenByteDev / SourceScraper / packages / source-scraper-dom-runner / lib / DomRunner.ts View on Github external
protected async exec(
        url: string,
        scraper: (args: IDomRunnerArgs) => Promise,
        options: IDomRunnerOptions): Promise {
        const jsdom = await JSDOM.fromURL(url, options.jsdomConfig);
        return scraper({
            url,
            options,
            jsdom,
            document: jsdom.window.document,
            window: jsdom.window
        });
    }
}