How to use the metascraper.scrapeUrl function in metascraper

To help you get started, we’ve selected a few metascraper examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github NullVoxPopuli / tanqueReact / js / components / chat / message-list / message-row / message-content / index.jsx View on Github external
getTags(url) {
    Metascraper
      .scrapeUrl(url)
      .then(metadata => {
        const hasTags = !_.isEmpty(metadata);
        this.setState({ tags: metadata, hasTags });
      })
      .catch(console.info);
  }
github olymp / olymp / packages / scrape / server / graphql.es6 View on Github external
scrape: async (source, { url }) => {
        const { body: html } = await got(url);
        const metadata = await Metascraper({ html, url });
        return {
          ...metadata,
          id: url,
        };
        return Metascraper.scrapeUrl(url, getRules(new URL(url)))
          .then(getImages)
          .then(metadata => ({
            ...metadata,
            id: url,
          }))
          .catch(() => ({}))
      }
    },
github jaredpalmer / react-email-workflow / web / extract.js View on Github external
router.post('/', (req, res, next) => {
  if (!req.body.url) {
    return res.status(400).json({
      type: 'error',
      error_code: 400,
      error_message: 'Invalid request. Missing url',
    });
  }
  const timer = logger.time('extract.post').namespace(req.body.url);
  Metascraper.scrapeUrl(req.body.url).then(
    data => {
      const payload = {
        url: data.url || req.body.url || '',
        title: data.title || 'Unable to scrape title.',
        content:
          data.description ||
          "Error: Unable to scrape description from the provided url. You'll have to do this on your own.",
        author: data.publisher || 'Unable to scrape author.',
        image: data.image || '',
      };
      cache.put(req.body.url, payload, TWENTY_FOUR_HOURS);
      logger.log(Object.assign({}, { type: 'info' }, payload));
      res.status(200).json(payload);
    },
    e => {
      timer.log();
github jaredpalmer / react-email-workflow / services / extract.js View on Github external
function onRequestDataExtraction(message, reply) {
    logger.log(message);
    const timer = logger.time('extract.post').namespace(message);
    const cachedResult = cache.get(message.url);
    if (cachedResult) {
      return reply(cachedResult);
    }
    Metascraper.scrapeUrl(message.url)
      .then(data => {
        timer.log();
        const payload = {
          url: data.url || message.url,
          title: data.title || 'Unable to scrape title.',
          content: data.description ||
            "Error: Unable to scrape description from the provided url. You'll have to do this on your own.",
          author: data.publisher || 'Unable to scrape author.',
          image: data.image || '',
        };
        cache.put(message.url, payload, TWENTY_FOUR_HOURS);
        logger.log(Object.assign({}, { type: 'info' }, payload));
        reply(payload);
      })
      .catch(e => {
        timer.log();

metascraper

A library to easily scrape metadata from an article on the web using Open Graph, JSON+LD, regular HTML metadata, and series of fallbacks.

MIT
Latest version published 1 month ago

Package Health Score

89 / 100
Full package analysis