How to use the fathom-web.out function in fathom-web

To help you get started, we’ve selected a few fathom-web examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mozilla / price-tracker / src / extraction / fathom / ruleset_factory.js View on Github external
// no background images, even ones that have reasonable aspect ratios
      // TODO: If necessary, also look at parents. I've seen them say
      // "background" in their IDs as well.
      rule(type('image'), score(this.hasBackgroundInID.bind(this)), {name: 'hasBackgroundInID'}),
      // return image element(s) with max score
      rule(type('image').max(), out('image')),

      /**
       * Title rules
       */
      // consider all eligible h1 elements
      rule(dom('h1').when(this.isEligibleTitle.bind(this)), type('title')),
      // better score based on y-axis proximity to max scoring image element
      rule(type('title'), score(this.isNearImageTopOrBottom.bind(this)), {name: 'isNearImageTopOrBottom'}),
      // return title element(s) with max score
      rule(type('title').max(), out('title')),

      /**
       * Price rules
       */
      // 72% by itself, at [4, 4, 4, 4...]!:
      // consider all eligible span and h2 elements
      rule(dom('span, h2').when(this.isEligiblePrice.bind(this)), type('price')),
      // check if the element has a '$' in its innerText
      rule(type('price'), score(this.hasDollarSign.bind(this)), {name: 'hasDollarSign'}),
      // better score the closer the element is to the top of the page
      rule(type('price'), score(this.isAboveTheFold.bind(this)), {name: 'isAboveTheFoldPrice'}),
      // check if the id has "price" in it
      rule(type('price'), score(this.hasPriceInID.bind(this)), {name: 'hasPriceInID'}),
      rule(type('price'), score(this.hasPriceInParentID.bind(this)), {name: 'hasPriceInParentID'}),
      // check if any class names have "price" in them
      rule(type('price'), score(this.hasPriceInClassName.bind(this)), {name: 'hasPriceInClassName'}),
github mozilla / price-tracker / src / extraction / fathom / ruleset_factory.js View on Github external
// TODO: Consider a bonus for <img> tags.
      rule(dom('div').when(fnode =&gt; this.isVisible(fnode) &amp;&amp; this.hasBackgroundImage(fnode)), type('image')),
      // better score the closer the element is to the top of the page
      rule(type('image'), score(this.isAboveTheFold.bind(this)), {name: 'isAboveTheFoldImage'}),
      // better score for larger images
      rule(type('image'), score(this.isBig.bind(this)), {name: 'isBig'}),
      // bonus for non-extreme aspect ratios, to filter out banners or nav elements
      // TODO: Meant to make this a penalty, but it turns out to work as is.
      // Try as a penalty.
      rule(type('image'), score(this.hasSquareAspectRatio.bind(this)), {name: 'hasSquareAspectRatio'}),
      // no background images, even ones that have reasonable aspect ratios
      // TODO: If necessary, also look at parents. I've seen them say
      // "background" in their IDs as well.
      rule(type('image'), score(this.hasBackgroundInID.bind(this)), {name: 'hasBackgroundInID'}),
      // return image element(s) with max score
      rule(type('image').max(), out('image')),

      /**
       * Title rules
       */
      // consider all eligible h1 elements
      rule(dom('h1').when(this.isEligibleTitle.bind(this)), type('title')),
      // better score based on y-axis proximity to max scoring image element
      rule(type('title'), score(this.isNearImageTopOrBottom.bind(this)), {name: 'isNearImageTopOrBottom'}),
      // return title element(s) with max score
      rule(type('title').max(), out('title')),

      /**
       * Price rules
       */
      // 72% by itself, at [4, 4, 4, 4...]!:
      // consider all eligible span and h2 elements
github mozilla / fathom-trainees / src / trainees.js View on Github external
const lightness = (cMax + cMin) / 2;
                const denom = (1 - (Math.abs(2 * lightness - 1)));
                // Return 0 if it's black (R, G, and B all 0).
                return (denom === 0) ? 0 : delta / denom;
            }

            /* The actual ruleset */

            const rules = ruleset([
                rule(dom('div'), type('overlay')),
                rule(type('overlay'), score(big), {name: 'big'}),
                rule(type('overlay'), score(nearlyOpaque), {name: 'nearlyOpaque'}),
                rule(type('overlay'), score(monochrome), {name: 'monochrome'}),
                rule(type('overlay'), score(suspiciousClassOrId), {name: 'classOrId'}),
                rule(type('overlay'), score(visible), {name: 'visible'}),
                rule(type('overlay').max(), out('overlay'))
            ]);
            return rules;
        }
    }
github mozilla / price-tracker / src / extraction / fathom / ruleset_factory.js View on Github external
// better score the closer the element is to the top of the page
      rule(type('price'), score(this.isAboveTheFold.bind(this)), {name: 'isAboveTheFoldPrice'}),
      // check if the id has "price" in it
      rule(type('price'), score(this.hasPriceInID.bind(this)), {name: 'hasPriceInID'}),
      rule(type('price'), score(this.hasPriceInParentID.bind(this)), {name: 'hasPriceInParentID'}),
      // check if any class names have "price" in them
      rule(type('price'), score(this.hasPriceInClassName.bind(this)), {name: 'hasPriceInClassName'}),
      rule(type('price'), score(this.hasPriceInParentClassName.bind(this)), {name: 'hasPriceInParentClassName'}),
      // better score for larger font size
      rule(type('price'), score(this.fontIsBig.bind(this)), {name: 'fontIsBig'}),
      // better score based on x-axis proximity to max scoring image element
      rule(type('price'), score(this.isNearImage.bind(this)), {name: 'isNearImage'}),
      // check if innerText has a price pattern
      rule(type('price'), score(this.hasPriceishPattern.bind(this)), {name: 'hasPriceishPattern'}),
      // return price element(s) with max score
      rule(type('price').max(), out('price')),
    ],
    coeffs,
    biases);
  }
}
github WorldBrain / Memex / src / util / fathom-extractor.js View on Github external
const paragraphishNote = fnode.noteFor('paragraphish')
            return paragraphishNote
                ? (1 - linkDensity(fnode, paragraphishNote.inlineLength)) * 1.5
                : (1 - linkDensity(fnode)) * 1.5
        }),
    ),
    rule(dom('p'), score(4.5).type('paragraphish')),
    rule(
        type('paragraphish').bestCluster({
            splittingDistance: 3,
            differentDepthCost: 6.5,
            differentTagCost: 2,
            sameTagCost: 0.5,
            strideCost: 0,
        }),
        out('content').allThrough(domSort),
    ),
)

export default rules