Skip to content

Commit

Permalink
fix: Fix first letter insert costs on suggestions. (#2385)
Browse files Browse the repository at this point in the history
* refactor: move genSuggestionOptions
* fix: Fix first letter insert cost.
* refactor walker
* fix tests
* split walker and hinted walker tests
* Update createSpellingDictionary.test.ts
  • Loading branch information
Jason3S committed Feb 2, 2022
1 parent 7e72b91 commit e5b7ed5
Show file tree
Hide file tree
Showing 21 changed files with 204 additions and 132 deletions.
Expand Up @@ -74,8 +74,8 @@ describe('Validate createSpellingDictionary', () => {
word | ignoreCase | expected
${'Geschäft'} | ${false} | ${[c('Geschäft', 0)]}
${'Geschaft'} | ${false} | ${[c('Geschäft', 1)]}
${'fone'} | ${false} | ${[c('phone', 70), c('gone', 100)]}
${'failor'} | ${false} | ${[c('failure', 70), c('sailor', 100), c('failed', 175), c('fail', 200)]}
${'fone'} | ${false} | ${[c('phone', 70), c('gone', 104)]}
${'failor'} | ${false} | ${[c('failure', 70), c('sailor', 104), c('failed', 175), c('fail', 200)]}
`('createSpellingDictionary with dictionaryInformation "$word" "$ignoreCase"', ({ word, ignoreCase, expected }) => {
const words = sampleWords();
const options = { ...opts(), dictionaryInformation: sampleDictionaryInformation({}) };
Expand Down
Expand Up @@ -13,8 +13,8 @@ Array [
"replace": 1,
},
Object {
"map": "(^A)(^B)(^C)(^D)(^E)(^F)(^G)(^H)(^I)(^J)(^K)(^L)(^M)(^N)(^O)(^P)(^Q)(^R)(^S)(^T)(^U)(^V)(^W)(^X)(^Y)(^Z)(^a)(^b)(^c)(^d)(^e)(^f)(^g)(^h)(^i)(^j)(^k)(^l)(^m)(^n)(^o)(^p)(^q)(^r)(^s)(^t)(^u)(^v)(^w)(^x)(^y)(^z)",
"penalty": 4,
"map": "(^A)(^B)(^C)(^D)(^E)(^F)(^G)(^H)(^I)(^J)(^K)(^L)(^M)(^N)(^O)(^P)(^Q)(^R)(^S)(^T)(^U)(^V)(^W)(^X)(^Y)(^Z)(^a)(^b)(^c)(^d)(^e)(^f)(^g)(^h)(^i)(^j)(^k)(^l)(^m)(^n)(^o)(^p)(^q)(^r)(^s)(^t)(^u)(^v)(^w)(^x)(^y)(^z)(^)",
"penalty": 8,
"replace": 96,
},
Object {
Expand All @@ -38,8 +38,8 @@ Array [
"replace": 1,
},
Object {
"map": "(^a)(^b)(^c)(^d)(^e)",
"penalty": 4,
"map": "(^a)(^b)(^c)(^d)(^e)(^)",
"penalty": 8,
"replace": 96,
},
]
Expand All @@ -58,8 +58,8 @@ Array [
"replace": 1,
},
Object {
"map": "(^A)(^B)(^C)(^D)(^E)(^a)(^b)(^c)(^d)(^e)",
"penalty": 4,
"map": "(^A)(^B)(^C)(^D)(^E)(^a)(^b)(^c)(^d)(^e)(^)",
"penalty": 8,
"replace": 96,
},
Object {
Expand Down
Expand Up @@ -15,8 +15,8 @@ Array [
"replace": 1,
},
Object {
"map": "(^a)(^b)(^c)",
"penalty": 4,
"map": "(^a)(^b)(^c)(^)",
"penalty": 8,
"replace": 96,
},
]
Expand Down Expand Up @@ -66,8 +66,8 @@ Array [
"replace": 1,
},
Object {
"map": "(^a)(^b)(^c)",
"penalty": 4,
"map": "(^a)(^b)(^c)(^)",
"penalty": 8,
"replace": 96,
},
Object {
Expand Down
Expand Up @@ -112,9 +112,9 @@ describe('mapHunspellInformation', () => {
line | costs | expected
${''} | ${{}} | ${undefined}
${'MAP aàâäAÀÂÄ'} | ${{}} | ${undefined}
${'TRY abc'} | ${c({ mapCost: 1 })} | ${{ map: '(^a)(^b)(^c)', replace: 96, penalty: 4 }}
${'TRY abc'} | ${c({ tryCharCost: 90 })} | ${{ map: '(^a)(^b)(^c)', replace: 86, penalty: 4 }}
${'TRY abc'} | ${c({ firstLetterPenalty: 10 })} | ${{ map: '(^a)(^b)(^c)', replace: 90, penalty: 10 }}
${'TRY abc'} | ${c({ mapCost: 1 })} | ${{ map: '(^a)(^b)(^c)(^)', replace: 96, penalty: 8 }}
${'TRY abc'} | ${c({ tryCharCost: 90 })} | ${{ map: '(^a)(^b)(^c)(^)', replace: 86, penalty: 8 }}
${'TRY abc'} | ${c({ firstLetterPenalty: 10 })} | ${{ map: '(^a)(^b)(^c)(^)', replace: 90, penalty: 20 }}
`('affTryFirstCharacterReplace "$line" $costs', ({ line, costs, expected }) => {
expect(affTryFirstCharacterReplace(line, calcCosts(costs))).toEqual(expected);
});
Expand Down
21 changes: 11 additions & 10 deletions packages/cspell-trie-lib/src/lib/mappers/mapToSuggestionCostDef.ts
Expand Up @@ -77,15 +77,16 @@ export function calcFirstCharacterReplaceDefs(
}

export function calcFirstCharacterReplace(cs: CharacterSetCosts, editCost: EditCostsRequired): SuggestionCostMapDef {
const mapOfFirstLetters = [
...pipe(
expandCharacterSet(cs.characters),
opUnique(),
opMap((letter) => `(^${letter})`)
),
]
.sort()
.join('');
const mapOfFirstLetters =
[
...pipe(
expandCharacterSet(cs.characters),
opUnique(),
opMap((letter) => `(^${letter})`)
),
]
.sort()
.join('') + '(^)';

const penalty = editCost.firstLetterPenalty;
// Make it a bit cheaper so it will match
Expand All @@ -94,7 +95,7 @@ export function calcFirstCharacterReplace(cs: CharacterSetCosts, editCost: EditC
return {
map: mapOfFirstLetters,
replace: cost,
penalty,
penalty: penalty * 2,
};
}

Expand Down
@@ -1,5 +1,5 @@
import { WeightMap } from '.';
import { CompoundWordsMethod } from './walker';
import { WeightMap } from '..';
import { CompoundWordsMethod } from '../walker';

export interface GenSuggestionOptionsStrict {
/**
Expand All @@ -18,6 +18,12 @@ export interface GenSuggestionOptionsStrict {
* 3 is a good number. Above 5 can be very slow.
*/
changeLimit: number;

/**
* Include the `+` compound character when returning results.
* @default false
*/
includeCompoundChar?: boolean;
}

export type GenSuggestionOptions = Partial<GenSuggestionOptionsStrict>;
Expand Down Expand Up @@ -57,6 +63,7 @@ export const defaultGenSuggestionOptions: GenSuggestionOptionsStrict = {
compoundMethod: CompoundWordsMethod.NONE,
ignoreCase: true,
changeLimit: 5,
includeCompoundChar: false,
};

export const defaultSuggestionOptions: SuggestionOptionsStrict = {
Expand All @@ -82,6 +89,7 @@ const keyMapOfGenSuggestionOptionsStrict: KeyMapOfGenSuggestionOptionsStrict = {
changeLimit: 'changeLimit',
compoundMethod: 'compoundMethod',
ignoreCase: 'ignoreCase',
includeCompoundChar: 'includeCompoundChar',
} as const;

const keyMapOfSuggestionOptionsStrict: KeyMapOfSuggestionOptionsStrict = {
Expand Down
@@ -1,5 +1,5 @@
import { readTrie } from '../../test/dictionaries.test.helper';
import { GenSuggestionOptionsStrict } from '../genSuggestionsOptions';
import { GenSuggestionOptionsStrict } from './genSuggestionsOptions';
import { genCompoundableSuggestions, suggest } from './suggestAStar';
import {
suggestionCollector,
Expand Down
Expand Up @@ -7,7 +7,7 @@ import { clean } from '../trie-util';
import { DictionaryInformation } from '../models/DictionaryInformation';
import { mapDictionaryInformationToWeightMap, WeightMap } from '..';
import assert from 'assert';
import { SuggestionOptions } from '../genSuggestionsOptions';
import { SuggestionOptions } from './genSuggestionsOptions';

function getTrie() {
return readTrie('@cspell/dict-en_us/cspell-ext.json');
Expand Down
@@ -1,4 +1,4 @@
import { GenSuggestionOptions, SuggestionOptions } from '../genSuggestionsOptions';
import { GenSuggestionOptions, SuggestionOptions } from './genSuggestionsOptions';
import { parseDictionary } from '../SimpleDictionaryParser';
import { Trie } from '../trie';
import { cleanCopy } from '../utils/util';
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/suggestions/suggest.ts
@@ -1,4 +1,4 @@
import { createSuggestionOptions, GenSuggestionOptions, SuggestionOptions } from '../genSuggestionsOptions';
import { createSuggestionOptions, GenSuggestionOptions, SuggestionOptions } from './genSuggestionsOptions';
import { visualLetterMaskMap } from './orthography';
import {
MaxCost,
Expand Down
@@ -1,4 +1,4 @@
import { GenSuggestionOptionsStrict, SuggestionOptions } from '../genSuggestionsOptions';
import { GenSuggestionOptionsStrict, SuggestionOptions } from './genSuggestionsOptions';
import { parseDictionary } from '../SimpleDictionaryParser';
import * as Sug from './suggestAStar';
import { SuggestionCollector, suggestionCollector, SuggestionCollectorOptions } from './suggestCollector';
Expand Down
Expand Up @@ -3,7 +3,7 @@ import { CompoundWordsMethod, JOIN_SEPARATOR, WORD_SEPARATOR } from '../walker';
import { SuggestionGenerator, suggestionCollector, SuggestionResult } from './suggestCollector';
import { PairingHeap } from '../utils/PairingHeap';
import { visualLetterMaskMap } from './orthography';
import { createSuggestionOptions, GenSuggestionOptionsStrict, SuggestionOptions } from '../genSuggestionsOptions';
import { createSuggestionOptions, GenSuggestionOptionsStrict, SuggestionOptions } from './genSuggestionsOptions';

export function* genCompoundableSuggestions(
root: TrieRoot,
Expand Down
Expand Up @@ -121,7 +121,7 @@ describe('Validate suggestCollector', () => {
${'word'} | ${[s('word', 0), s('work', 100), s('words', 100)]}
${'words'} | ${[s('words', 0), s('word', 100), s('works', 100)]}
${'joy'} | ${[s('joy', 0)]}
${'joyo'} | ${[s('joy', 75), s('joyous', 155), s('yo-yo', 301)]}
${'joyo'} | ${[s('joy', 75), s('joyous', 155), s('yo-yo', 305)]}
${'woudt'} | ${[s('word', 200), s('words', 200), s('would', 200), s("won't", 210)]}
${'aple'} | ${[s('apple', 55), s('apples', 155)]}
${'cafe'} | ${[s('cafe', 0), s('café', 1), s('cafés', 101)]}
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/trie.test.ts
@@ -1,4 +1,4 @@
import { SuggestionOptions } from './genSuggestionsOptions';
import { SuggestionOptions } from './suggestions/genSuggestionsOptions';
import { CompoundWordsMethod, suggestionCollector } from './index';
import { parseDictionary } from './SimpleDictionaryParser';
import { SuggestionCollectorOptions } from './suggestions/suggestCollector';
Expand Down
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/trie.ts
Expand Up @@ -10,7 +10,7 @@ import {
isForbiddenWord,
PartialFindOptions,
} from './find';
import { SuggestionOptions } from './genSuggestionsOptions';
import { SuggestionOptions } from './suggestions/genSuggestionsOptions';
import { genSuggestions, suggest } from './suggest';
import { SuggestionCollector, SuggestionResult } from './suggestCollector';
import { PartialTrieOptions, TrieNode, TrieOptions, TrieRoot } from './TrieNode';
Expand Down
@@ -1,25 +1,9 @@
import { walker, YieldResult, hintedWalker } from './walker';
import { orderTrie, createTriFromList } from './trie-util';
import { parseLinesToDictionary } from './SimpleDictionaryParser';
import type { YieldResult } from './walkerTypes';
import { hintedWalker } from './hintedWalker';
import { orderTrie, createTriFromList } from '../trie-util';
import { parseLinesToDictionary } from '../SimpleDictionaryParser';

describe('Validate Util Functions', () => {
test('Tests Walker', () => {
const root = createTriFromList(sampleWords);
orderTrie(root);
const i = walker(root);
let goDeeper = true;
let ir: IteratorResult<YieldResult>;
const result: string[] = [];
while (!(ir = i.next(goDeeper)).done) {
const { text, node } = ir.value;
if (node.f) {
result.push(text);
}
goDeeper = text.length < 4;
}
expect(result).toEqual(sampleWords.filter((a) => a.length <= 4).sort());
});

test('Hinted Walker', () => {
const root = createTriFromList(sampleWords);
orderTrie(root);
Expand Down
@@ -1,78 +1,6 @@
import { isDefined } from './trie-util';
import { TrieNode, ChildMap, TrieRoot } from './TrieNode';

export const JOIN_SEPARATOR = '+';
export const WORD_SEPARATOR = ' ';

export interface YieldResult {
text: string;
node: TrieNode;
depth: number;
}

export enum CompoundWordsMethod {
/**
* Do not compound words.
*/
NONE = 0,
/**
* Create word compounds separated by spaces.
*/
SEPARATE_WORDS,
/**
* Create word compounds without separation.
*/
JOIN_WORDS,
}

export type WalkerIterator = Generator<YieldResult, void, boolean | undefined>;

/**
* Walks the Trie and yields a value at each node.
* next(goDeeper: boolean):
*/
export function* walker(
root: TrieNode,
compoundingMethod: CompoundWordsMethod = CompoundWordsMethod.NONE
): WalkerIterator {
const roots: { [index: number]: ChildMap | [string, TrieNode][] } = {
[CompoundWordsMethod.NONE]: [],
[CompoundWordsMethod.JOIN_WORDS]: [[JOIN_SEPARATOR, root]],
[CompoundWordsMethod.SEPARATE_WORDS]: [[WORD_SEPARATOR, root]],
};

function* children(n: TrieNode): IterableIterator<[string, TrieNode]> {
if (n.c) {
yield* n.c;
}
if (n.f) {
yield* roots[compoundingMethod];
}
}

let depth = 0;
const stack: { t: string; c: Iterator<[string, TrieNode]> }[] = [];
stack[depth] = { t: '', c: children(root) };
let ir: IteratorResult<[string, TrieNode]>;
while (depth >= 0) {
let baseText = stack[depth].t;
while (!(ir = stack[depth].c.next()).done) {
const [char, node] = ir.value;
const text = baseText + char;
const goDeeper = yield { text, node, depth };
if (goDeeper || goDeeper === undefined) {
depth++;
baseText = text;
stack[depth] = { t: text, c: children(node) };
}
}
depth -= 1;
}
}

export interface Hinting {
goDeeper: boolean;
}
import { isDefined } from '../trie-util';
import { TrieNode, TrieRoot } from '../TrieNode';
import { CompoundWordsMethod, JOIN_SEPARATOR, WORD_SEPARATOR, YieldResult } from './walkerTypes';

/**
* Ask for the next result.
Expand All @@ -87,6 +15,7 @@ export type HintedWalkerIterator = Generator<YieldResult, void, Hinting | undefi
* Walks the Trie and yields a value at each node.
* next(goDeeper: boolean):
*/

export function* hintedWalker(
root: TrieRoot,
ignoreCase: boolean,
Expand Down Expand Up @@ -191,7 +120,11 @@ export function* hintedWalker(
}
}

function existMap(values: string[]): Record<string, true> {
export interface Hinting {
goDeeper: boolean;
}

export function existMap(values: string[]): Record<string, true> {
const m: Record<string, true> = Object.create(null);
for (const v of values) {
m[v] = true;
Expand Down
3 changes: 3 additions & 0 deletions packages/cspell-trie-lib/src/lib/walker/index.ts
@@ -0,0 +1,3 @@
export * from './walker';
export * from './hintedWalker';
export * from './walkerTypes';

0 comments on commit e5b7ed5

Please sign in to comment.