Skip to content

Commit

Permalink
fix: Hunspell make sure COMPOUNDFLAG is supported (#2088)
Browse files Browse the repository at this point in the history
* fix: Hunspell make sure COMPOUNDFLAG is supported
* Make sure `isCompoundPermitted` is honored
  • Loading branch information
Jason3S committed Dec 11, 2021
1 parent 591860e commit 3bd772e
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 103 deletions.
6 changes: 3 additions & 3 deletions packages/cspell-tools/src/compiler/Reader.ts
Expand Up @@ -194,9 +194,9 @@ function* _mapAffWords(affWords: Iterable<AffWord>): Generator<AnnotatedWord> {
const compound = flags.isCompoundForbidden ? '' : COMPOUND_FIX;
const forbid = flags.isForbiddenWord ? FORBID_PREFIX : '';
if (!forbid) {
if (flags.canBeCompoundBegin) yield word + compound;
if (flags.canBeCompoundEnd) yield compound + word;
if (flags.canBeCompoundMiddle) yield compound + word + compound;
if (flags.canBeCompoundBegin || flags.isCompoundPermitted) yield word + compound;
if (flags.canBeCompoundEnd || flags.isCompoundPermitted) yield compound + word;
if (flags.canBeCompoundMiddle || flags.isCompoundPermitted) yield compound + word + compound;
if (!flags.isOnlyAllowedInCompound) yield word;
} else {
yield forbid + word;
Expand Down
48 changes: 1 addition & 47 deletions packages/cspell-trie-lib/src/lib/TrieBuilder.test.ts
@@ -1,8 +1,5 @@
import { countNodes, isCircular } from './util';
import { TrieBuilder, buildTrie, __testing__ } from './TrieBuilder';
import { TrieNode } from '.';

const { trimSignatures, trimMap } = __testing__;
import { TrieBuilder, buildTrie } from './TrieBuilder';

describe('Validate TrieBuilder', () => {
test('builder explicit consolidateSuffixes', () => {
Expand Down Expand Up @@ -34,49 +31,6 @@ describe('Validate TrieBuilder', () => {
const trie = buildTrie(sampleWords);
expect([...trie.words()]).toEqual(sampleWords.sort());
});

test('trimSignatures', () => {
const n: TrieNode = {};
const sigs = sampleWords;
const soloSigs = sigs.filter((_, i) => !!(i & 1));
const signatures = new Map(sigs.map((w) => [w, n]));
const solo = new Set(soloSigs);

// verify preconditions
expect(signatures.size).toBe(sigs.length);
expect(solo.size).toBe(soloSigs.length);

// Nothing should change, solo is within bounds.
trimSignatures(signatures, solo, sampleWords.length);
expect(signatures.size).toBe(sigs.length);
expect(solo.size).toBe(soloSigs.length);

// trim and make sure the newest values are left.
trimSignatures(signatures, solo, 5, 10);
expect(signatures.size).toBe(sigs.length - soloSigs.length + 5);
expect(solo.size).toBe(5);
// verify newest are left
expect([...solo]).toEqual(soloSigs.slice(-5));
});

test('trimMap', () => {
const n: TrieNode = {};
const values = sampleWords;
const mapOfValues = new Map(values.map((w) => [w, n]));

// verify preconditions
expect(mapOfValues.size).toBe(values.length);

// Nothing should change, solo is within bounds.
trimMap(mapOfValues, sampleWords.length);
expect(mapOfValues.size).toBe(values.length);

// trim and make sure the newest values are left.
trimMap(mapOfValues, 5, 10);
expect(mapOfValues.size).toBe(5);
// verify newest are left
expect([...mapOfValues.keys()]).toEqual(values.slice(-5));
});
});

const sampleWords = [
Expand Down
54 changes: 9 additions & 45 deletions packages/cspell-trie-lib/src/lib/TrieBuilder.ts
Expand Up @@ -2,6 +2,7 @@ import { TrieNode, TrieRoot } from './TrieNode';
import { Trie, PartialTrieOptions, TrieOptions } from './trie';
import { consolidate } from './consolidate';
import { createTriFromList, mergeOptionalWithDefaults, trieNodeToRoot, createTrieRoot } from './util';
import { SecondChanceCache } from './secondChanceCache';

/**
* Builds an optimized Trie from a Iterable<string>. It attempts to reduce the size of the trie
Expand Down Expand Up @@ -31,16 +32,15 @@ interface PathNode {
}

// cspell:words sigs
const MAX_NUM_SOLO_SIGS = 100000;
const MAX_NUM_SIGS = 100000;
const MAX_TRANSFORMS = 1000000;
const CACHE_PADDING = 1000;
const MAX_CACHE_SIZE = 1000000;

export class TrieBuilder {
private count = 0;
private readonly signatures = new Map<string, TrieNode>();
private readonly soloSignatures = new Set<string>();
private readonly cached = new Map<TrieNode, number>();
private readonly transforms = new Map<TrieNode, Map<string, TrieNode>>();
private readonly signatures = new SecondChanceCache<string, TrieNode>(MAX_NUM_SIGS);
private readonly cached = new SecondChanceCache<TrieNode, number>(MAX_CACHE_SIZE);
private readonly transforms = new SecondChanceCache<TrieNode, Map<string, TrieNode>>(MAX_TRANSFORMS);
private _eow: TrieNode = Object.freeze({ f: 1 });
/** position 0 of lastPath is always the root */
private lastPath: PathNode[] = [{ s: '', n: { f: undefined, c: undefined } }];
Expand Down Expand Up @@ -81,7 +81,9 @@ export class TrieBuilder {
}

private tryCacheFrozen(n: TrieNode) {
if (this.cached.has(n)) return n;
if (this.cached.has(n)) {
return n;
}
this.cached.set(n, this.count++);
return n;
}
Expand All @@ -106,11 +108,8 @@ export class TrieBuilder {
const sig = this.signature(n);
const ref = this.signatures.get(sig);
if (ref !== undefined) {
this.soloSignatures.delete(sig);
return this.tryCacheFrozen(ref);
}
this.soloSignatures.add(sig);
trimSignatures(this.signatures, this.soloSignatures, MAX_NUM_SOLO_SIGS);
this.signatures.set(sig, this.freeze(n));
return n;
}
Expand All @@ -119,7 +118,6 @@ export class TrieBuilder {
if (!Object.isFrozen(result) || !Object.isFrozen(src)) return;
const t = this.transforms.get(src) ?? new Map<string, TrieNode>();
t.set(s, result);
trimMap(this.transforms, MAX_TRANSFORMS);
this.transforms.set(src, t);
}

Expand Down Expand Up @@ -219,7 +217,6 @@ export class TrieBuilder {
this.cached.clear();
this.signatures.clear();
this.signatures.set(this.signature(this._eow), this._eow);
this.soloSignatures.clear();
this.count = 0;
this.cached.set(this._eow, this.count++);
}
Expand All @@ -237,36 +234,3 @@ function copyIfFrozen(n: TrieNode): TrieNode {
const c = n.c ? new Map(n.c) : undefined;
return { f: n.f, c };
}

function trimSignatures(
signatures: Map<string, TrieNode>,
soloSignatures: Set<string>,
size: number,
padding = CACHE_PADDING
): void {
if (soloSignatures.size >= size + padding) {
for (const soloSig of soloSignatures) {
signatures.delete(soloSig);
soloSignatures.delete(soloSig);
if (soloSignatures.size <= size) {
break;
}
}
}
}

function trimMap(map: Map<unknown, unknown>, size: number, padding = CACHE_PADDING) {
if (map.size >= size + padding) {
for (const key of map.keys()) {
map.delete(key);
if (map.size <= size) {
break;
}
}
}
}

export const __testing__ = {
trimSignatures,
trimMap,
};
47 changes: 47 additions & 0 deletions packages/cspell-trie-lib/src/lib/secondChanceCache.test.ts
@@ -0,0 +1,47 @@
import { SecondChanceCache } from './secondChanceCache';

describe('Validate SecondChanceCache', () => {
test('SecondChanceCache', () => {
const cache = new SecondChanceCache<string, number>(3);
let cnt = 0;
cache.set('a', ++cnt);
expect(cache.has('a')).toBe(true);
cache.set('b', ++cnt);
expect(cache.size).toBe(2);
cache.set('a', 1);
expect(cache.size).toBe(2);
cache.set('c', ++cnt);
expect(cache.size).toBe(3);
expect(cache.get('c')).toBe(3);
expect(cache.size).toBe(3);
cache.set('d', ++cnt);
expect(cache.size).toBe(4);
cache.set('e', ++cnt);
expect(cache.size).toBe(5);
cache.set('f', ++cnt);
expect(cache.size).toBe(6);
expect(cache.get('b')).toBe(2);
expect(cache.size).toBe(4);
expect(cache.size0).toBe(1);
expect(cache.size1).toBe(3);
expect(cache.toArray()).toEqual([
['d', 4],
['e', 5],
['f', 6],
['b', 2],
]);
cache.set('g', ++cnt);
expect(cache.size).toBe(5);
expect(cache.size0).toBe(2);
expect(cache.has('a')).toBe(false);
expect(cache.get('a')).toBe(undefined);
expect(cache.has('f')).toBe(true);
expect(cache.size0).toBe(3);
expect(cache.size1).toBe(2);
expect(cache.get('f')).toBe(6);
expect(cache.size0).toBe(3);
expect(cache.size1).toBe(2);
expect(cache.clear()).toBe(cache);
expect(cache.size).toBe(0);
});
});
65 changes: 65 additions & 0 deletions packages/cspell-trie-lib/src/lib/secondChanceCache.ts
@@ -0,0 +1,65 @@
export class SecondChanceCache<Key, Value> {
private map0: Map<Key, Value>;
private map1: Map<Key, Value>;

constructor(readonly maxL0Size: number) {
this.map0 = new Map<Key, Value>();
this.map1 = new Map<Key, Value>();
}

public has(key: Key) {
if (this.map0.has(key)) return true;
if (this.map1.has(key)) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
this.set(key, this.get1(key)!);
return true;
}
return false;
}

public get(key: Key): Value | undefined {
return this.map0.get(key) ?? this.get1(key);
}

public set(key: Key, value: Value): this {
if (this.map0.size >= this.maxL0Size && !this.map0.has(key)) {
this.map1 = this.map0;
this.map0 = new Map<Key, Value>();
}
this.map0.set(key, value);
return this;
}

public get size(): number {
return this.map0.size + this.map1.size;
}

public get size0(): number {
return this.map0.size;
}

public get size1(): number {
return this.map1.size;
}

public clear(): this {
this.map0.clear();
this.map1.clear();
return this;
}

private get1(key: Key): Value | undefined {
if (this.map1.has(key)) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
const v = this.map1.get(key)!;
this.map1.delete(key);
this.set(key, v);
return v;
}
return undefined;
}

public toArray(): [Key, Value][] {
return [...this.map1, ...this.map0];
}
}
11 changes: 6 additions & 5 deletions packages/hunspell-reader/cSpell.json
Expand Up @@ -6,21 +6,22 @@
"language": "en",
// words - list of words to be always considered correct
"words": [
"combinable",
"CHECKCOMPOUNDCASE",
"CHECKCOMPOUNDDUP",
"CHECKCOMPOUNDPATTERN",
"FORCEUCASE",
"COMPOUNDPERMITFLAG",
"CHECKCOMPOUNDREP",
"combinable",
"COMPOUNDFLAG",
"COMPOUNDPERMITFLAG",
"conv",
"denom",
"FORCEUCASE",
"MAXCPDSUGS",
"NOSPLITSUGS",
"NOSUGGEST",
"OCONV",
"ONLYINCOMPOUND",
"ONLYMAXDIFF",
"conv",
"denom",
"tsbuildinfo"
],
"ignorePaths": [
Expand Down
6 changes: 4 additions & 2 deletions packages/hunspell-reader/src/aff.ts
Expand Up @@ -54,10 +54,11 @@ export interface AffTransformFlags {
FORBIDDENWORD?: string;
NOSUGGEST?: string;
COMPOUNDBEGIN?: string;
COMPOUNDMIDDLE?: string;
COMPOUNDEND?: string;
COMPOUNDPERMITFLAG?: string;
COMPOUNDFLAG?: string;
COMPOUNDFORBIDFLAG?: string;
COMPOUNDMIDDLE?: string;
COMPOUNDPERMITFLAG?: string;
ONLYINCOMPOUND?: string;
}

Expand Down Expand Up @@ -409,6 +410,7 @@ const affFlag: Mapping<AffTransformFlags, AffWordFlags> = {
COMPOUNDBEGIN: { canBeCompoundBegin: true },
COMPOUNDMIDDLE: { canBeCompoundMiddle: true },
COMPOUNDEND: { canBeCompoundEnd: true },
COMPOUNDFLAG: { isCompoundPermitted: true },
COMPOUNDPERMITFLAG: { isCompoundPermitted: true },
COMPOUNDFORBIDFLAG: { isCompoundForbidden: true },
ONLYINCOMPOUND: { isOnlyAllowedInCompound: true },
Expand Down
3 changes: 2 additions & 1 deletion packages/hunspell-reader/src/affReader.ts
Expand Up @@ -211,7 +211,7 @@ interface AffFieldFunctionTable {
}

/*
cspell:ignore COMPOUNDBEGIN COMPOUNDEND COMPOUNDMIDDLE COMPOUNDMIN COMPOUNDPERMITFLAG COMPOUNDRULE COMPOUNDFORBIDFLAG
cspell:ignore COMPOUNDBEGIN COMPOUNDEND COMPOUNDMIDDLE COMPOUNDMIN COMPOUNDPERMITFLAG COMPOUNDRULE COMPOUNDFORBIDFLAG COMPOUNDFLAG
cspell:ignore FORBIDDENWORD KEEPCASE
cspell:ignore MAXDIFF NEEDAFFIX WORDCHARS
*/
Expand All @@ -228,6 +228,7 @@ const affTableField: AffFieldFunctionTable = {
COMPOUNDEND : asString,
COMPOUNDMIDDLE : asString,
COMPOUNDMIN : asNumber,
COMPOUNDFLAG : asString,
COMPOUNDPERMITFLAG : asString,
COMPOUNDFORBIDFLAG : asString,
COMPOUNDRULE : simpleTable,
Expand Down

0 comments on commit 3bd772e

Please sign in to comment.