Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
export default function scoreSimilarity(score, articleUrl, href) {
// Do this last and only if we have a real candidate, because it's
// potentially expensive computationally. Compare the link to this
// URL using difflib to get the % similarity of these URLs. On a
// sliding scale, subtract points from this link based on
// similarity.
if (score > 0) {
const similarity = new difflib.SequenceMatcher(
null,
articleUrl,
href
).ratio();
// Subtract .1 from diff_percent when calculating modifier,
// which means that if it's less than 10% different, we give a
// bonus instead. Ex:
// 3% different = +17.5 points
// 10% different = 0 points
// 20% different = -25 points
const diffPercent = 1.0 - similarity;
const diffModifier = -(250 * (diffPercent - 0.2));
return score + diffModifier;
}
return 0;
export function getClosestMatchIndex (searchTerm, possibilities) {
let matcher = new difflib.SequenceMatcher()
matcher.setSeq2(searchTerm)
let cutoff = 0.6
let results = []
// check identity match first, ratio compution takes time
let identityMatchIndex = possibilities.findIndex(text => text === searchTerm)
if (identityMatchIndex >= 0) {
return identityMatchIndex
}
// search for close match
possibilities.forEach(function (testText, i) {
matcher.setSeq1(testText)
if (matcher.realQuickRatio() >= cutoff &&
matcher.quickRatio() >= cutoff) {
let score = matcher.ratio()
function localModified(src, dst) {
if (!fs.existsSync(dst)) {
return false;
}
var srcStr = fs.readFileSync(src, 'UTF-8')
, dstStr = fs.readFileSync(dst, 'UTF-8')
, diffRatio = (new difflib
.SequenceMatcher(null, srcStr, dstStr))
.quickRatio()
return diffRatio !== 1
}
function _partial_ratio(str1, str2, options) {
if (!_validate(str1)) return 0;
if (!_validate(str2)) return 0;
if (str1.length <= str2.length) {
var shorter = str1
var longer = str2
}
else {
var shorter = str2
var longer = str1
}
var m = new difflib.SequenceMatcher(null, shorter, longer);
var blocks = m.getMatchingBlocks();
var scores = [];
for (var b = 0; b < blocks.length; b++) {
var long_start = (blocks[b][1] - blocks[b][0]) > 0 ? (blocks[b][1] - blocks[b][0]) : 0;
var long_end = long_start + shorter.length;
var long_substr = longer.substring(long_start,long_end);
var r = _ratio(shorter,long_substr,options);
if (r > 99.5) return 100;
else scores.push(r);
}
return Math.max.apply(null, scores);
}
for (const courseId in courses) {
similarCourses[courseId] = []
}
const alreadyAssignedCourseIds: string[] = []
const progressBar = new ProgressBar(
'Comparing courses (:current/:total) [:bar] :percent',
{ total: courseList.length }
)
for (const courseId in courses) {
const course = courses[courseId]
if (course.isBroken) continue
if (!course.hash) {
throw new Error(`Hash for course with ID ${course._id} was not defined`)
}
const matches = lshIndex.query(course.hash)
const sequenceMatcher = new difflib.SequenceMatcher(
null,
null,
course.hash.hashbands
)
for (const matchId of matches) {
if (courseId === matchId) continue
if (alreadyAssignedCourseIds.includes(matchId)) continue
const matchedCourse = courses[matchId]
if (!matchedCourse.hash) {
throw new Error(`Hash for course with ID ${course._id} was not defined`)
}
sequenceMatcher.setSeq1(matchedCourse.hash.hashbands)
const sim = sequenceMatcher.ratio()
if (sim < 0.1) continue
similarCourses[courseId].push({ sim, courseId: matchedCourse._id })
similarCourses[matchId].push({ sim, courseId: course._id })