How to use genie-toolkit - 10 common examples

To help you get started, we’ve selected a few genie-toolkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stanford-oval / almond-cloud / training / sandboxed_synthetic_gen.js View on Github external
for (let f of options.flags)
        args.push('--set-flag', f);

    if (options.debug)
        console.log(args.join(' '));

    const child = child_process.spawn(processPath, args, {
        stdio: stdio,
        cwd: process.cwd(),
        env: env
    });

    child.stdout.setEncoding('utf8');
    const stream = child.stdout
        .pipe(byline())
        .pipe(new Genie.DatasetParser());

    // propagate errors from the child process to the stream
    child.on('error', (e) => stream.emit('error', e));
    child.on('exit', (code, signal) => {
        if (code === null)
            stream.emit('error', new InternalError(signal, `Synthetic generation worker died with signal ${signal}.`));
        else if (code !== 0)
            stream.emit('error', new InternalError('E_BAD_EXIT_CODE', `Synthetic generation worker exited with status ${code}.`));
    });

    return stream;
};
github stanford-oval / almond-cloud / nlp / query.js View on Github external
if (isTokenized) {
        tokenized = {
            tokens: query.split(' '),
            entities: {},
        };
        if (data.entities) {
            // safety against weird properties
            for (let key of Object.getOwnPropertyNames(data.entities)) {
                if (/^(.+)_([0-9]+)$/.test(key))
                    tokenized[key] = data.entities[key];
            }
        }
    } else {
        tokenized = await service.tokenizer.tokenize(languageTag, query, expect);
        if (data.entities)
            Genie.Utils.renumberEntities(tokenized, data.entities);
    }

    let result = null;
    let exact = null;

    const tokens = tokenized.tokens;
    if (tokens.length === 0) {
        result = [{
            code: ['bookkeeping', 'special', 'special:failed'],
            score: 'Infinity'
        }];
    } else if (tokens.length === 1 && (/^[A-Z]/.test(tokens[0]) || tokens[0] === '1' || tokens[0] === '0')) {
        // if the whole input is just an entity, return that as an answer
        result = [{
            code: ['bookkeeping', 'answer', tokens[0]],
            score: 'Infinity'
github stanford-oval / almond-cloud / training / tasks / prepare-training-set.js View on Github external
async process(ex) {
        if (ex.flags.synthetic) {
            // skip typechecking synthetic examples, we know they are correct
            this.push(ex);
            return;
        }

        try {
            const entities = Genie.Utils.makeDummyEntities(ex.preprocessed);
            const program = ThingTalk.NNSyntax.fromNN(ex.target_code.split(' '), entities);
            await program.typecheck(this._schemas);
            this.push(ex);
            return;
        } catch(e) {
            this._dropped++;
        }
    }
github stanford-oval / almond-cloud / training / tasks / prepare-training-set.js View on Github external
} else {
            // assume that the progress of synthetic generation is the overall progress, because
            // synthetic generation is the biggest part of the process, and augmentation happens in parallel
            basicSynthetic.on('progress', (value) => {
                this._task.setProgress(value).catch((e) => {
                    console.error(`Failed to update task progress: ${e.message}`);
                });
            });

            source = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });
        }

        const constProvider = new DatabaseParameterProvider(this._language, this._dbClient);
        const ppdb = await Genie.BinaryPPDB.mapFile(this._options.ppdbFile);

        const augmenter = new Genie.DatasetAugmenter(this._schemas, constProvider, this._tpClient, {
            quotedProbability: this._options.quotedProbability,
            untypedStringProbability: 0,
            maxSpanLength: MAX_SPAN_LENGTH,
            ppdbProbabilitySynthetic: this._options.ppdbProbabilitySynthetic,
            ppdbProbabilityParaphrase: this._options.ppdbProbabilityParaphrase,
            syntheticExpandFactor: 1,
            paraphrasingExpandFactor: 30,
            noQuoteExpandFactor: 10,

            ppdbFile: ppdb,

            locale: this._language,
            rng: this._rng,
            debug: this._options.debug,
        });
github stanford-oval / almond-cloud / training / tasks / prepare-training-set.js View on Github external
source = StreamUtils.chain([contextualParaphrase, contextualized, contextualSynthetic],
                { objectMode: true });
        } else {
            // assume that the progress of synthetic generation is the overall progress, because
            // synthetic generation is the biggest part of the process, and augmentation happens in parallel
            basicSynthetic.on('progress', (value) => {
                this._task.setProgress(value).catch((e) => {
                    console.error(`Failed to update task progress: ${e.message}`);
                });
            });

            source = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });
        }

        const constProvider = new DatabaseParameterProvider(this._language, this._dbClient);
        const ppdb = await Genie.BinaryPPDB.mapFile(this._options.ppdbFile);

        const augmenter = new Genie.DatasetAugmenter(this._schemas, constProvider, this._tpClient, {
            quotedProbability: this._options.quotedProbability,
            untypedStringProbability: 0,
            maxSpanLength: MAX_SPAN_LENGTH,
            ppdbProbabilitySynthetic: this._options.ppdbProbabilitySynthetic,
            ppdbProbabilityParaphrase: this._options.ppdbProbabilityParaphrase,
            syntheticExpandFactor: 1,
            paraphrasingExpandFactor: 30,
            noQuoteExpandFactor: 10,

            ppdbFile: ppdb,

            locale: this._language,
            rng: this._rng,
            debug: this._options.debug,
github stanford-oval / almond-cloud / training / synthetic-gen-process.js View on Github external
locale: args.locale,
        thingpedia: './thingpedia.tt',
        entities: './entities.json',
        dataset: './dataset.tt',
        flags: args.flags,
        template: 'contextual.genie',
        random_seed: 'almond is awesome',
        maxDepth: args.maxdepth,
        targetPruningSize: args.target_pruning_size,
        debug: false, // no debugging, ever, because debugging also goes to stdout
    };

    inputFile
        .pipe(Genie.parallelize(PARALLEL_GENERATION,
            require.resolve('./workers/generate-contextual-worker.js'), options))
        .pipe(new Genie.DatasetStringifier())
        .pipe(process.stdout);

    await StreamUtils.waitFinish(process.stdout);

    process.disconnect();
}
github stanford-oval / almond-cloud / training / synthetic-gen-process.js View on Github external
templateFile: 'index.genie',

        rng: rng,
        locale: args.locale,
        flags: args.flags || {},
        maxDepth: args.maxdepth,
        targetPruningSize: args.target_pruning_size,
        debug: false, // no debugging, ever, because debugging also goes to stdout
    };

    const generator = new Genie.BasicSentenceGenerator(options);
    generator.on('progress', (value) => {
        process.send({ cmd:'progress', v: value });
    });
    const stringifier = new Genie.DatasetStringifier();

    generator.pipe(stringifier).pipe(process.stdout);
    await StreamUtils.waitFinish(process.stdout);

    process.disconnect();
}
github stanford-oval / almond-cloud / training / tasks / prepare-training-set.js View on Github external
const contextualParaphrase = this._downloadParaphrase(true)
                .pipe(new TypecheckStream(this._schemas));

            const basicSource = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });

            // Spool the basic (non-contextual, not augmented) dataset to disk
            // We need to do this because:
            // 1) We don't want to run to many generation/processing steps as a pipeline, because that
            //    would use too much memory
            // 2) We need to do multiple passes over the basic dataset for different reasons, and
            //    we can't cache it in memory
            const { path: basicDataset, fd: basicDatasetFD } =
                await tmp.file({ mode: 0o600, dir: '/var/tmp' });

            await StreamUtils.waitFinish(basicSource
                .pipe(new Genie.DatasetStringifier())
                .pipe(fs.createWriteStream(basicDataset, { fd: basicDatasetFD })));
            // basicDatasetFD is closed here

            let contexts = await
                fs.createReadStream(basicDataset, { encoding: 'utf8' })
                .pipe(byline())
                .pipe(new Genie.DatasetParser({ contextual: false }))
                .pipe(new Genie.ContextExtractor(this._schemas))
                .read();

            const contextualized =
                fs.createReadStream(basicDataset, { encoding: 'utf8' })
                .pipe(byline())
                .pipe(new Genie.DatasetParser({ contextual: false }))
                .pipe(new Genie.Contextualizer(contexts, {
                    locale: this._language,
github stanford-oval / almond-cloud / training / tasks / update-dataset.js View on Github external
remote_commands: true,
                aggregation: true,
                bookkeeping: true,
                triple_commands: true,
                configure_actions: true,
                timer: true,
                projection: true,
                undefined_filter: true,
                projection_with_filter: false,
                extended_timers: false
            },
            maxDepth: this._options.maxDepth,
            debug: this._options.debug,
        };

        let generator = new Genie.BasicSentenceGenerator(options);
        if (this._forDevicesRegexp !== null)
            generator = generator.pipe(new ForDevicesFilter(this._forDevicesRegexp));

        const transform = new Stream.Transform({
            readableObjectMode: true,
            writableObjectMode: true,

            transform(ex, encoding, callback) {
                ex.type = 'generated';
                // do not set the training flag, we will regenerate the synthetic portion of the dataset
                // for training later
                ex.flags.exact = true;
                callback(null, ex);
            },

            flush(callback) {
github stanford-oval / almond-cloud / training / synthetic-gen-process.js View on Github external
const options = {
        thingpediaClient: tpClient,
        schemaRetriever: schemas,

        templateFile: 'index.genie',

        rng: rng,
        locale: args.locale,
        flags: args.flags || {},
        maxDepth: args.maxdepth,
        targetPruningSize: args.target_pruning_size,
        debug: false, // no debugging, ever, because debugging also goes to stdout
    };

    const generator = new Genie.BasicSentenceGenerator(options);
    generator.on('progress', (value) => {
        process.send({ cmd:'progress', v: value });
    });
    const stringifier = new Genie.DatasetStringifier();

    generator.pipe(stringifier).pipe(process.stdout);
    await StreamUtils.waitFinish(process.stdout);

    process.disconnect();
}