How to use the difflib.SequenceMatcher function in difflib

To help you get started, we’ve selected a few difflib examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github postlight / mercury-parser / src / extractors / generic / next-page-url / scoring / utils / score-similarity.js View on Github external
export default function scoreSimilarity(score, articleUrl, href) {
  // Do this last and only if we have a real candidate, because it's
  // potentially expensive computationally. Compare the link to this
  // URL using difflib to get the % similarity of these URLs. On a
  // sliding scale, subtract points from this link based on
  // similarity.
  if (score > 0) {
    const similarity = new difflib.SequenceMatcher(
      null,
      articleUrl,
      href
    ).ratio();
    // Subtract .1 from diff_percent when calculating modifier,
    // which means that if it's less than 10% different, we give a
    // bonus instead. Ex:
    //  3% different = +17.5 points
    // 10% different = 0 points
    // 20% different = -25 points
    const diffPercent = 1.0 - similarity;
    const diffModifier = -(250 * (diffPercent - 0.2));
    return score + diffModifier;
  }

  return 0;
github hoschi / yode / packages / core / src / ast / compareFunctions.js View on Github external
export function getClosestMatchIndex (searchTerm, possibilities) {
    let matcher = new difflib.SequenceMatcher()
    matcher.setSeq2(searchTerm)
    let cutoff = 0.6
    let results = []

    // check identity match first, ratio compution takes time
    let identityMatchIndex = possibilities.findIndex(text => text === searchTerm)
    if (identityMatchIndex >= 0) {
        return identityMatchIndex
    }

    // search for close match
    possibilities.forEach(function (testText, i) {
        matcher.setSeq1(testText)
        if (matcher.realQuickRatio() >= cutoff &&
            matcher.quickRatio() >= cutoff) {
            let score = matcher.ratio()
github mockee / istatic / istatic.js View on Github external
function localModified(src, dst) {
  if (!fs.existsSync(dst)) {
      return false;
  }
  var srcStr = fs.readFileSync(src, 'UTF-8')
    , dstStr = fs.readFileSync(dst, 'UTF-8')
    , diffRatio = (new difflib
        .SequenceMatcher(null, srcStr, dstStr))
        .quickRatio()

  return diffRatio !== 1
}
github nol13 / fuzzball.js / fuzzball_browser.js View on Github external
function _partial_ratio(str1, str2, options) {
        if (!_validate(str1)) return 0;
        if (!_validate(str2)) return 0;
        if (str1.length <= str2.length) {
            var shorter = str1
            var longer = str2
        }
        else {
            var shorter = str2
            var longer = str1
        }
        var m = new difflib.SequenceMatcher(null, shorter, longer);
        var blocks = m.getMatchingBlocks();
        var scores = [];
        for (var b = 0; b < blocks.length; b++) {
            var long_start = (blocks[b][1] - blocks[b][0]) > 0 ? (blocks[b][1] - blocks[b][0]) : 0;
            var long_end = long_start + shorter.length;
            var long_substr = longer.substring(long_start,long_end);
            var r = _ratio(shorter,long_substr,options);
            if (r > 99.5) return 100;
            else scores.push(r);
        }
        return Math.max.apply(null, scores);
    }
github Tarnadas / smmdb / website / src / similarity / index.ts View on Github external
for (const courseId in courses) {
    similarCourses[courseId] = []
  }
  const alreadyAssignedCourseIds: string[] = []
  const progressBar = new ProgressBar(
    'Comparing courses (:current/:total) [:bar] :percent',
    { total: courseList.length }
  )
  for (const courseId in courses) {
    const course = courses[courseId]
    if (course.isBroken) continue
    if (!course.hash) {
      throw new Error(`Hash for course with ID ${course._id} was not defined`)
    }
    const matches = lshIndex.query(course.hash)
    const sequenceMatcher = new difflib.SequenceMatcher(
      null,
      null,
      course.hash.hashbands
    )
    for (const matchId of matches) {
      if (courseId === matchId) continue
      if (alreadyAssignedCourseIds.includes(matchId)) continue
      const matchedCourse = courses[matchId]
      if (!matchedCourse.hash) {
        throw new Error(`Hash for course with ID ${course._id} was not defined`)
      }
      sequenceMatcher.setSeq1(matchedCourse.hash.hashbands)
      const sim = sequenceMatcher.ratio()
      if (sim < 0.1) continue
      similarCourses[courseId].push({ sim, courseId: matchedCourse._id })
      similarCourses[matchId].push({ sim, courseId: course._id })

difflib

text diff library ported from Python's difflib module

Unknown
Latest version published 13 years ago

Package Health Score

53 / 100
Full package analysis