How to use the genie-toolkit.DatasetParser function in genie-toolkit

To help you get started, we’ve selected a few genie-toolkit examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stanford-oval / almond-cloud / training / sandboxed_synthetic_gen.js View on Github external
for (let f of options.flags)
        args.push('--set-flag', f);

    if (options.debug)
        console.log(args.join(' '));

    const child = child_process.spawn(processPath, args, {
        stdio: stdio,
        cwd: process.cwd(),
        env: env
    });

    child.stdout.setEncoding('utf8');
    const stream = child.stdout
        .pipe(byline())
        .pipe(new Genie.DatasetParser());

    // propagate errors from the child process to the stream
    child.on('error', (e) => stream.emit('error', e));
    child.on('exit', (code, signal) => {
        if (code === null)
            stream.emit('error', new InternalError(signal, `Synthetic generation worker died with signal ${signal}.`));
        else if (code !== 0)
            stream.emit('error', new InternalError('E_BAD_EXIT_CODE', `Synthetic generation worker exited with status ${code}.`));
    });

    return stream;
};
github stanford-oval / almond-cloud / training / tasks / prepare-training-set.js View on Github external
await StreamUtils.waitFinish(basicSource
                .pipe(new Genie.DatasetStringifier())
                .pipe(fs.createWriteStream(basicDataset, { fd: basicDatasetFD })));
            // basicDatasetFD is closed here

            let contexts = await
                fs.createReadStream(basicDataset, { encoding: 'utf8' })
                .pipe(byline())
                .pipe(new Genie.DatasetParser({ contextual: false }))
                .pipe(new Genie.ContextExtractor(this._schemas))
                .read();

            const contextualized =
                fs.createReadStream(basicDataset, { encoding: 'utf8' })
                .pipe(byline())
                .pipe(new Genie.DatasetParser({ contextual: false }))
                .pipe(new Genie.Contextualizer(contexts, {
                    locale: this._language,
                    numSamples: 20,
                    nullOnly: false,
                }));

            const contextualSynthetic = genSynthetic.generate(tmpDir, {
                contextual: true,
                contexts,

                language: this._language,
                flags: this._options.flags,
                maxDepth: this._options.maxDepth,
                targetPruningSize: this._options.contextualTargetPruningSize,
                debug: this._options.debug,
            });
github stanford-oval / almond-cloud / training / tasks / evaluate.js View on Github external
module.exports = async function main(task, argv) {
    task.handleKill();

    const jobdir = await AbstractFS.download(task.jobDir + '/');
    const datadir = path.resolve(jobdir, 'dataset');
    const outputdir = path.resolve(jobdir, 'output');

    const tpClient = new AdminThingpediaClient(task.language);
    const schemas = new ThingTalk.SchemaRetriever(tpClient, null, true);
    const parser = new LocalParserClient(outputdir, task.language);
    await parser.start();

    const output = fs.createReadStream(path.resolve(datadir, 'eval.tsv'))
        .setEncoding('utf8')
        .pipe(byline())
        .pipe(new Genie.DatasetParser({
            contextual: task.modelInfo.contextual,
            preserveId: true,
            parseMultiplePrograms: true
        }))
        .pipe(new Genie.SentenceEvaluatorStream(parser, schemas, true /* tokenized */, argv.debug))
        .pipe(new Genie.CollectSentenceStatistics());

    const result = await output.read();
    await task.setMetrics(result);

    await Promise.all([
        parser.stop(),
        TokenizerService.tearDown(),
        AbstractFS.removeTemporary(jobdir)
    ]);
};
github stanford-oval / almond-cloud / scripts / compile-exact-btrie.js View on Github external
async main(argv) {
        const matcher = new ExactMatcher;

        const output = readAllLines(argv.input_file)
            .pipe(new Genie.DatasetParser({ contextual: argv.contextual }))
            .pipe(new Stream.Writable({
                objectMode: true,

                write(ex, encoding, callback) {
                    matcher.add(ex.preprocessed, ex.target_code);
                    callback();
                },
            }));
        await StreamUtils.waitFinish(output);

        const builder = new BTrie.Builder((existing, newValue) => {
            assert(typeof newValue === 'string');
            if (existing === undefined)
                return newValue;
            else
                return existing + '\0' + newValue;