Skip to content

Commit

Permalink
fix: cspell-tools - limit memory usage when build dictionaries (#2087)
Browse files Browse the repository at this point in the history
When compiling Estonian, it would cause node to run out of keys due to caching highly repetitive suffixes.
  • Loading branch information
Jason3S committed Dec 10, 2021
1 parent 245f35e commit 591860e
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 3 deletions.
1 change: 1 addition & 0 deletions cspell-dict.txt
Expand Up @@ -47,6 +47,7 @@ repo
repos
retryable
serializers
sigs
specberus
streetsidesoftware
submodule
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-trie-lib/cspell.json
Expand Up @@ -3,6 +3,7 @@
"tsbuildinfo"
],
"import": [
"../../cspell.json",
"@cspell/dict-es-es/cspell-ext.json"
]
}
48 changes: 47 additions & 1 deletion packages/cspell-trie-lib/src/lib/TrieBuilder.test.ts
@@ -1,5 +1,8 @@
import { countNodes, isCircular } from './util';
import { TrieBuilder, buildTrie } from './TrieBuilder';
import { TrieBuilder, buildTrie, __testing__ } from './TrieBuilder';
import { TrieNode } from '.';

const { trimSignatures, trimMap } = __testing__;

describe('Validate TrieBuilder', () => {
test('builder explicit consolidateSuffixes', () => {
Expand Down Expand Up @@ -31,6 +34,49 @@ describe('Validate TrieBuilder', () => {
const trie = buildTrie(sampleWords);
expect([...trie.words()]).toEqual(sampleWords.sort());
});

test('trimSignatures', () => {
const n: TrieNode = {};
const sigs = sampleWords;
const soloSigs = sigs.filter((_, i) => !!(i & 1));
const signatures = new Map(sigs.map((w) => [w, n]));
const solo = new Set(soloSigs);

// verify preconditions
expect(signatures.size).toBe(sigs.length);
expect(solo.size).toBe(soloSigs.length);

// Nothing should change, solo is within bounds.
trimSignatures(signatures, solo, sampleWords.length);
expect(signatures.size).toBe(sigs.length);
expect(solo.size).toBe(soloSigs.length);

// trim and make sure the newest values are left.
trimSignatures(signatures, solo, 5, 10);
expect(signatures.size).toBe(sigs.length - soloSigs.length + 5);
expect(solo.size).toBe(5);
// verify newest are left
expect([...solo]).toEqual(soloSigs.slice(-5));
});

test('trimMap', () => {
const n: TrieNode = {};
const values = sampleWords;
const mapOfValues = new Map(values.map((w) => [w, n]));

// verify preconditions
expect(mapOfValues.size).toBe(values.length);

// Nothing should change, solo is within bounds.
trimMap(mapOfValues, sampleWords.length);
expect(mapOfValues.size).toBe(values.length);

// trim and make sure the newest values are left.
trimMap(mapOfValues, 5, 10);
expect(mapOfValues.size).toBe(5);
// verify newest are left
expect([...mapOfValues.keys()]).toEqual(values.slice(-5));
});
});

const sampleWords = [
Expand Down
48 changes: 47 additions & 1 deletion packages/cspell-trie-lib/src/lib/TrieBuilder.ts
Expand Up @@ -30,9 +30,15 @@ interface PathNode {
n: TrieNode;
}

// cspell:words sigs
const MAX_NUM_SOLO_SIGS = 100000;
const MAX_TRANSFORMS = 1000000;
const CACHE_PADDING = 1000;

export class TrieBuilder {
private count = 0;
private readonly signatures = new Map<string, TrieNode>();
private readonly soloSignatures = new Set<string>();
private readonly cached = new Map<TrieNode, number>();
private readonly transforms = new Map<TrieNode, Map<string, TrieNode>>();
private _eow: TrieNode = Object.freeze({ f: 1 });
Expand Down Expand Up @@ -100,9 +106,11 @@ export class TrieBuilder {
const sig = this.signature(n);
const ref = this.signatures.get(sig);
if (ref !== undefined) {
this.soloSignatures.delete(sig);
return this.tryCacheFrozen(ref);
}

this.soloSignatures.add(sig);
trimSignatures(this.signatures, this.soloSignatures, MAX_NUM_SOLO_SIGS);
this.signatures.set(sig, this.freeze(n));
return n;
}
Expand All @@ -111,6 +119,7 @@ export class TrieBuilder {
if (!Object.isFrozen(result) || !Object.isFrozen(src)) return;
const t = this.transforms.get(src) ?? new Map<string, TrieNode>();
t.set(s, result);
trimMap(this.transforms, MAX_TRANSFORMS);
this.transforms.set(src, t);
}

Expand Down Expand Up @@ -209,6 +218,10 @@ export class TrieBuilder {
this._root = createTrieRoot(this.trieOptions);
this.cached.clear();
this.signatures.clear();
this.signatures.set(this.signature(this._eow), this._eow);
this.soloSignatures.clear();
this.count = 0;
this.cached.set(this._eow, this.count++);
}

build(consolidateSuffixes = false): Trie {
Expand All @@ -224,3 +237,36 @@ function copyIfFrozen(n: TrieNode): TrieNode {
const c = n.c ? new Map(n.c) : undefined;
return { f: n.f, c };
}

function trimSignatures(
signatures: Map<string, TrieNode>,
soloSignatures: Set<string>,
size: number,
padding = CACHE_PADDING
): void {
if (soloSignatures.size >= size + padding) {
for (const soloSig of soloSignatures) {
signatures.delete(soloSig);
soloSignatures.delete(soloSig);
if (soloSignatures.size <= size) {
break;
}
}
}
}

function trimMap(map: Map<unknown, unknown>, size: number, padding = CACHE_PADDING) {
if (map.size >= size + padding) {
for (const key of map.keys()) {
map.delete(key);
if (map.size <= size) {
break;
}
}
}
}

export const __testing__ = {
trimSignatures,
trimMap,
};
2 changes: 1 addition & 1 deletion packages/cspell-trie-lib/src/lib/index.ts
Expand Up @@ -3,7 +3,7 @@ export { TrieNode, FLAG_WORD, ChildMap, TrieRoot } from './TrieNode';
export * from './util';
export * from './walker';
export * from './importExport';
export * from './TrieBuilder';
export { buildTrie, buildTrieFast, TrieBuilder } from './TrieBuilder';
export * from './consolidate';
export { SuggestionResult, MaxCost, suggestionCollector, SuggestionCollector } from './suggestCollector';
export { parseDictionaryLines, parseDictionary } from './SimpleDictionaryParser';

0 comments on commit 591860e

Please sign in to comment.