Skip to content

Commit

Permalink
dev: Support alternate word break notation. (#2395)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jason3S committed Feb 3, 2022
1 parent 6888d48 commit 7d7a449
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 14 deletions.
15 changes: 14 additions & 1 deletion packages/cspell-trie-lib/src/lib/distance/weightedMaps.test.ts
@@ -1,4 +1,5 @@
import type { SuggestionCostMapDef } from '../models/suggestionCostsDef';
import { DEFAULT_COMPOUNDED_WORD_SEPARATOR } from '../suggestions/suggestCollector';
import {
addDefToWeightMap,
CostPosition,
Expand All @@ -8,7 +9,7 @@ import {
__testing__,
} from './weightedMaps';

const { splitMapSubstrings, splitMap, findTrieCostPrefixes } = __testing__;
const { splitMapSubstrings, splitMap, findTrieCostPrefixes, normalizeDef } = __testing__;

// const u = undefined; cspell:

Expand Down Expand Up @@ -188,8 +189,20 @@ describe('Validate weightedMaps', () => {
const results = lookupReplaceCost(map, wordA, wordB);
expect(results).toEqual(expected);
});

test.each`
def | expected
${defIns('(+A)(+B)', 50, { penalty: 150 })} | ${defIns(sep('(|A)(|B)'), 50, { penalty: 150 })}
${defIns('abc', 95)} | ${defIns('abc', 95)}
`('normalizeDef for compound separators $def', ({ def, expected }) => {
expect(normalizeDef(def)).toEqual(expected);
});
});

function sep(s: string): string {
return s.replace(/[|+]/g, DEFAULT_COMPOUNDED_WORD_SEPARATOR);
}

// function mo(...opts: Partial<SuggestionCostMapDef>[]): Partial<SuggestionCostMapDef> {
// return mergeOps(opts);
// }
Expand Down
20 changes: 17 additions & 3 deletions packages/cspell-trie-lib/src/lib/distance/weightedMaps.ts
@@ -1,7 +1,10 @@
import { SuggestionCostMapDef } from '../models/suggestionCostsDef';
import { DEFAULT_COMPOUNDED_WORD_SEPARATOR } from '../suggestions/suggestCollector';

export type WeightedRepMapTrie = Record<string, WeightedRepTrieNode>;

const matchPossibleWordSeparators = /[+∙•・●]/g;

interface WeightedRepTrieNode {
/** The nested Trie nodes */
r?: WeightedRepMapTrie | undefined;
Expand Down Expand Up @@ -86,7 +89,8 @@ export function addDefsToWeightMap(map: WeightMap, defs: SuggestionCostMapDef[])
addSetToTrieTrieCost(map.swap, set, def.swap, def.penalty);
}

for (const def of defs) {
for (const _def of defs) {
const def = normalizeDef(_def);
const mapSets = splitMap(def);
mapSets.forEach((s) => addSet(s, def));
}
Expand Down Expand Up @@ -369,9 +373,19 @@ export function lookupReplaceCost(map: WeightMap, a: string, b: string): undefin
return t?.c;
}

function normalizeDef(def: SuggestionCostMapDef): SuggestionCostMapDef {
const { map, ...rest } = def;
return { ...rest, map: normalizeMap(map) };
}

function normalizeMap(map: string): string {
return map.replace(matchPossibleWordSeparators, DEFAULT_COMPOUNDED_WORD_SEPARATOR);
}

export const __testing__ = {
splitMap,
splitMapSubstrings,
findTrieCostPrefixes,
findTrieTrieCostPrefixes,
normalizeDef,
splitMap,
splitMapSubstrings,
};
Expand Up @@ -4,7 +4,7 @@ import { RequireOptional } from '../types';
import { createTimer } from '../utils/timer';
import { clean, regexQuote, replaceAllFactory } from '../utils/util';
import { GenSuggestionOptions, GenSuggestionOptionsStrict } from './genSuggestionsOptions';
import { JOIN_SEPARATOR, WORD_SEPARATOR } from './walker';
import { WORD_SEPARATOR } from './walker';

const defaultMaxNumberSuggestions = 10;

Expand All @@ -16,7 +16,9 @@ const MAX_ALLOWED_COST_SCALE = 1.03 * MAX_COST_SCALE;

const collator = new Intl.Collator();

const regexSeparator = new RegExp(`[${regexQuote(JOIN_SEPARATOR + WORD_SEPARATOR)}]`, 'g');
// This is a bit broken, it was supposed to also include JOIN_SEPARATOR (`+`)
// Add it back later.
const regexSeparator = new RegExp(`[${regexQuote(WORD_SEPARATOR)}]`, 'g');

const wordLengthCost = [0, 50, 25, 5, 0];
const EXTRA_WORD_COST = 5;
Expand Down
25 changes: 18 additions & 7 deletions packages/cspell-trie-lib/src/lib/utils/util.test.ts
@@ -1,4 +1,4 @@
import { replaceAll, replaceAllFactory } from './util';
import { regexQuote, replaceAll, replaceAllFactory } from './util';

describe('util', () => {
// cspell:ignore aabbaab
Expand All @@ -13,12 +13,23 @@ describe('util', () => {
});

test.each`
texts | match | replaceWith | expected
${['']} | ${''} | ${''} | ${['']}
${['hello']} | ${''} | ${''} | ${['hello']}
${['hello']} | ${''} | ${'-'} | ${['-h-e-l-l-o-']}
${['aabbaab']} | ${'ab'} | ${'AB'} | ${['aABbaAB']}
${['aabbaab', 'aa', 'aba']} | ${'a'} | ${'B'} | ${['BBbbBBb', 'BB', 'BbB']}
text | expected
${'hello'} | ${'hello'}
${'+'} | ${'\\+'}
`('regexQuote $text', ({ text, expected }) => {
const r = regexQuote(text);
expect(r).toBe(expected);
expect(RegExp(r).test(text)).toBe(true);
});

test.each`
texts | match | replaceWith | expected
${['']} | ${''} | ${''} | ${['']}
${['hello']} | ${''} | ${''} | ${['hello']}
${['hello']} | ${''} | ${'-'} | ${['-h-e-l-l-o-']}
${['aabbaab']} | ${'ab'} | ${'AB'} | ${['aABbaAB']}
${['aabbaab', 'aa', 'aba']} | ${'a'} | ${'B'} | ${['BBbbBBb', 'BB', 'BbB']}
${['aa+bb+aab', 'a+a', 'aba']} | ${'+'} | ${'_'} | ${['aa_bb_aab', 'a_a', 'aba']}
`('replaceAllFactory [$texts, $match, $replaceWith]', ({ texts, match, replaceWith, expected }) => {
const fn = replaceAllFactory(match, replaceWith);
expect(texts.map(fn)).toEqual(expected);
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/utils/util.ts
Expand Up @@ -50,7 +50,7 @@ export function replaceAll(text: string, match: string, withText: string): strin
* @returns text that can be used in a regexp.
*/
export function regexQuote(text: string): string {
return text.replace(/[[\]\-+(){},|*.\\]/g, '\\$1');
return text.replace(/([[\]\-+(){},|*.\\])/g, '\\$1');
}

/**
Expand Down

0 comments on commit 7d7a449

Please sign in to comment.