How to use the genie-toolkit.DatasetAugmenter function in genie-toolkit

To help you get started, we’ve selected a few genie-toolkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stanford-oval / almond-cloud / training / tasks / prepare-training-set.js View on Github external
} else {
            // assume that the progress of synthetic generation is the overall progress, because
            // synthetic generation is the biggest part of the process, and augmentation happens in parallel
            basicSynthetic.on('progress', (value) => {
                this._task.setProgress(value).catch((e) => {
                    console.error(`Failed to update task progress: ${e.message}`);
                });
            });

            source = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });
        }

        const constProvider = new DatabaseParameterProvider(this._language, this._dbClient);
        const ppdb = await Genie.BinaryPPDB.mapFile(this._options.ppdbFile);

        const augmenter = new Genie.DatasetAugmenter(this._schemas, constProvider, this._tpClient, {
            quotedProbability: this._options.quotedProbability,
            untypedStringProbability: 0,
            maxSpanLength: MAX_SPAN_LENGTH,
            ppdbProbabilitySynthetic: this._options.ppdbProbabilitySynthetic,
            ppdbProbabilityParaphrase: this._options.ppdbProbabilityParaphrase,
            syntheticExpandFactor: 1,
            paraphrasingExpandFactor: 30,
            noQuoteExpandFactor: 10,

            ppdbFile: ppdb,

            locale: this._language,
            rng: this._rng,
            debug: this._options.debug,
        });