How to use the cheerio.browser function in cheerio

To help you get started, we’ve selected a few cheerio examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github postlight / mercury-parser / src / test-helpers.js View on Github external
after: done => {
      if (!has_fixtures && !cheerio.browser) {
        has_fixtures = nock.recorder.play();
        // eslint-disable-next-line no-console
        console.log(
          `This is disabled for browser/node interop. To capture fixutres,
          open ${'`src/test-helpers.js`'} and uncomment lines 58 and 59 and
          the fs import at top of file.`
        );
        // const text = `const nock = require('nock');\n${has_fixtures.join('\n')}`;
        // fs.writeFile(fp, text, done);
      } else {
        done();
      }
    },
  };
github postlight / mercury-parser / src / test-helpers.js View on Github external
before: () => {
      if (cheerio.browser) return;
      if (!has_fixtures) {
        try {
          require(`../${fp}`); // eslint-disable-line global-require, import/no-dynamic-require, max-len
          has_fixtures = true;
        } catch (e) {
          nock.recorder.rec({
            dont_print: true,
          });
        }
      } else {
        has_fixtures = false;
        nock.recorder.rec({
          dont_print: true,
        });
      }
    },
github postlight / mercury-parser / src / resource / utils / constants.js View on Github external
import cheerio from 'cheerio';

// Browser does not like us setting user agent
export const REQUEST_HEADERS = cheerio.browser ? {} : {
  'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
};

// The number of milliseconds to attempt to fetch a resource before timing out.
export const FETCH_TIMEOUT = 10000;

// Content types that we do not extract content from
const BAD_CONTENT_TYPES = [
  'audio/mpeg',
  'image/gif',
  'image/jpeg',
  'image/jpg',
];

export const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');
github postlight / mercury-parser / src / mercury.js View on Github external
total_pages: 1,
        rendered_pages: 1,
      };
    }

    if (contentType === 'markdown') {
      const turndownService = new TurndownService();
      result.content = turndownService.turndown(result.content);
    } else if (contentType === 'text') {
      result.content = $.text($(result.content));
    }

    return { ...result, ...extendedTypes };
  },

  browser: !!cheerio.browser,

  // A convenience method for getting a resource
  // to work with, e.g., for custom extractor generator
  fetchResource(url) {
    return Resource.create(url);
  },

  addExtractor(extractor) {
    return addCustomExtractor(extractor);
  },
};

export default Mercury;
github postlight / mercury-parser / src / mercury.js View on Github external
async parse(url, { html, ...opts } = {}) {
    const {
      fetchAllPages = true,
      fallback = true,
      contentType = 'html',
      headers = {},
      extend,
      customExtractor,
    } = opts;

    // if no url was passed and this is the browser version,
    // set url to window.location.href and load the html
    // from the current page
    if (!url && cheerio.browser) {
      url = window.location.href; // eslint-disable-line no-undef
      html = html || cheerio.html();
    }

    const parsedUrl = URL.parse(url);

    if (!validateUrl(parsedUrl)) {
      return {
        error: true,
        message:
          'The url parameter passed does not look like a valid URL. Please check your URL and try again.',
      };
    }

    const $ = await Resource.create(url, html, parsedUrl, headers);
github postlight / mercury-parser / src / resource / index.js View on Github external
encodeDoc({ content, contentType }) {
    const encoding = getEncoding(contentType);
    let decodedContent = iconv.decode(content, encoding);
    let $ = cheerio.load(decodedContent);

    // after first cheerio.load, check to see if encoding matches
    const contentTypeSelector = cheerio.browser
      ? 'meta[http-equiv=content-type]'
      : 'meta[http-equiv=content-type i]';
    const metaContentType =
      $(contentTypeSelector).attr('content') ||
      $('meta[charset]').attr('charset');
    const properEncoding = getEncoding(metaContentType);

    // if encodings in the header/body dont match, use the one in the body
    if (metaContentType && properEncoding !== encoding) {
      decodedContent = iconv.decode(content, properEncoding);
      $ = cheerio.load(decodedContent);
    }

    return $;
  },
};