Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
for (let f of options.flags)
args.push('--set-flag', f);
if (options.debug)
console.log(args.join(' '));
const child = child_process.spawn(processPath, args, {
stdio: stdio,
cwd: process.cwd(),
env: env
});
child.stdout.setEncoding('utf8');
const stream = child.stdout
.pipe(byline())
.pipe(new Genie.DatasetParser());
// propagate errors from the child process to the stream
child.on('error', (e) => stream.emit('error', e));
child.on('exit', (code, signal) => {
if (code === null)
stream.emit('error', new InternalError(signal, `Synthetic generation worker died with signal ${signal}.`));
else if (code !== 0)
stream.emit('error', new InternalError('E_BAD_EXIT_CODE', `Synthetic generation worker exited with status ${code}.`));
});
return stream;
};
if (isTokenized) {
tokenized = {
tokens: query.split(' '),
entities: {},
};
if (data.entities) {
// safety against weird properties
for (let key of Object.getOwnPropertyNames(data.entities)) {
if (/^(.+)_([0-9]+)$/.test(key))
tokenized[key] = data.entities[key];
}
}
} else {
tokenized = await service.tokenizer.tokenize(languageTag, query, expect);
if (data.entities)
Genie.Utils.renumberEntities(tokenized, data.entities);
}
let result = null;
let exact = null;
const tokens = tokenized.tokens;
if (tokens.length === 0) {
result = [{
code: ['bookkeeping', 'special', 'special:failed'],
score: 'Infinity'
}];
} else if (tokens.length === 1 && (/^[A-Z]/.test(tokens[0]) || tokens[0] === '1' || tokens[0] === '0')) {
// if the whole input is just an entity, return that as an answer
result = [{
code: ['bookkeeping', 'answer', tokens[0]],
score: 'Infinity'
async process(ex) {
if (ex.flags.synthetic) {
// skip typechecking synthetic examples, we know they are correct
this.push(ex);
return;
}
try {
const entities = Genie.Utils.makeDummyEntities(ex.preprocessed);
const program = ThingTalk.NNSyntax.fromNN(ex.target_code.split(' '), entities);
await program.typecheck(this._schemas);
this.push(ex);
return;
} catch(e) {
this._dropped++;
}
}
} else {
// assume that the progress of synthetic generation is the overall progress, because
// synthetic generation is the biggest part of the process, and augmentation happens in parallel
basicSynthetic.on('progress', (value) => {
this._task.setProgress(value).catch((e) => {
console.error(`Failed to update task progress: ${e.message}`);
});
});
source = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });
}
const constProvider = new DatabaseParameterProvider(this._language, this._dbClient);
const ppdb = await Genie.BinaryPPDB.mapFile(this._options.ppdbFile);
const augmenter = new Genie.DatasetAugmenter(this._schemas, constProvider, this._tpClient, {
quotedProbability: this._options.quotedProbability,
untypedStringProbability: 0,
maxSpanLength: MAX_SPAN_LENGTH,
ppdbProbabilitySynthetic: this._options.ppdbProbabilitySynthetic,
ppdbProbabilityParaphrase: this._options.ppdbProbabilityParaphrase,
syntheticExpandFactor: 1,
paraphrasingExpandFactor: 30,
noQuoteExpandFactor: 10,
ppdbFile: ppdb,
locale: this._language,
rng: this._rng,
debug: this._options.debug,
});
source = StreamUtils.chain([contextualParaphrase, contextualized, contextualSynthetic],
{ objectMode: true });
} else {
// assume that the progress of synthetic generation is the overall progress, because
// synthetic generation is the biggest part of the process, and augmentation happens in parallel
basicSynthetic.on('progress', (value) => {
this._task.setProgress(value).catch((e) => {
console.error(`Failed to update task progress: ${e.message}`);
});
});
source = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });
}
const constProvider = new DatabaseParameterProvider(this._language, this._dbClient);
const ppdb = await Genie.BinaryPPDB.mapFile(this._options.ppdbFile);
const augmenter = new Genie.DatasetAugmenter(this._schemas, constProvider, this._tpClient, {
quotedProbability: this._options.quotedProbability,
untypedStringProbability: 0,
maxSpanLength: MAX_SPAN_LENGTH,
ppdbProbabilitySynthetic: this._options.ppdbProbabilitySynthetic,
ppdbProbabilityParaphrase: this._options.ppdbProbabilityParaphrase,
syntheticExpandFactor: 1,
paraphrasingExpandFactor: 30,
noQuoteExpandFactor: 10,
ppdbFile: ppdb,
locale: this._language,
rng: this._rng,
debug: this._options.debug,
locale: args.locale,
thingpedia: './thingpedia.tt',
entities: './entities.json',
dataset: './dataset.tt',
flags: args.flags,
template: 'contextual.genie',
random_seed: 'almond is awesome',
maxDepth: args.maxdepth,
targetPruningSize: args.target_pruning_size,
debug: false, // no debugging, ever, because debugging also goes to stdout
};
inputFile
.pipe(Genie.parallelize(PARALLEL_GENERATION,
require.resolve('./workers/generate-contextual-worker.js'), options))
.pipe(new Genie.DatasetStringifier())
.pipe(process.stdout);
await StreamUtils.waitFinish(process.stdout);
process.disconnect();
}
templateFile: 'index.genie',
rng: rng,
locale: args.locale,
flags: args.flags || {},
maxDepth: args.maxdepth,
targetPruningSize: args.target_pruning_size,
debug: false, // no debugging, ever, because debugging also goes to stdout
};
const generator = new Genie.BasicSentenceGenerator(options);
generator.on('progress', (value) => {
process.send({ cmd:'progress', v: value });
});
const stringifier = new Genie.DatasetStringifier();
generator.pipe(stringifier).pipe(process.stdout);
await StreamUtils.waitFinish(process.stdout);
process.disconnect();
}
const contextualParaphrase = this._downloadParaphrase(true)
.pipe(new TypecheckStream(this._schemas));
const basicSource = StreamUtils.chain([basicParaphrase, basicSynthetic], { objectMode: true });
// Spool the basic (non-contextual, not augmented) dataset to disk
// We need to do this because:
// 1) We don't want to run to many generation/processing steps as a pipeline, because that
// would use too much memory
// 2) We need to do multiple passes over the basic dataset for different reasons, and
// we can't cache it in memory
const { path: basicDataset, fd: basicDatasetFD } =
await tmp.file({ mode: 0o600, dir: '/var/tmp' });
await StreamUtils.waitFinish(basicSource
.pipe(new Genie.DatasetStringifier())
.pipe(fs.createWriteStream(basicDataset, { fd: basicDatasetFD })));
// basicDatasetFD is closed here
let contexts = await
fs.createReadStream(basicDataset, { encoding: 'utf8' })
.pipe(byline())
.pipe(new Genie.DatasetParser({ contextual: false }))
.pipe(new Genie.ContextExtractor(this._schemas))
.read();
const contextualized =
fs.createReadStream(basicDataset, { encoding: 'utf8' })
.pipe(byline())
.pipe(new Genie.DatasetParser({ contextual: false }))
.pipe(new Genie.Contextualizer(contexts, {
locale: this._language,
remote_commands: true,
aggregation: true,
bookkeeping: true,
triple_commands: true,
configure_actions: true,
timer: true,
projection: true,
undefined_filter: true,
projection_with_filter: false,
extended_timers: false
},
maxDepth: this._options.maxDepth,
debug: this._options.debug,
};
let generator = new Genie.BasicSentenceGenerator(options);
if (this._forDevicesRegexp !== null)
generator = generator.pipe(new ForDevicesFilter(this._forDevicesRegexp));
const transform = new Stream.Transform({
readableObjectMode: true,
writableObjectMode: true,
transform(ex, encoding, callback) {
ex.type = 'generated';
// do not set the training flag, we will regenerate the synthetic portion of the dataset
// for training later
ex.flags.exact = true;
callback(null, ex);
},
flush(callback) {
const options = {
thingpediaClient: tpClient,
schemaRetriever: schemas,
templateFile: 'index.genie',
rng: rng,
locale: args.locale,
flags: args.flags || {},
maxDepth: args.maxdepth,
targetPruningSize: args.target_pruning_size,
debug: false, // no debugging, ever, because debugging also goes to stdout
};
const generator = new Genie.BasicSentenceGenerator(options);
generator.on('progress', (value) => {
process.send({ cmd:'progress', v: value });
});
const stringifier = new Genie.DatasetStringifier();
generator.pipe(stringifier).pipe(process.stdout);
await StreamUtils.waitFinish(process.stdout);
process.disconnect();
}