Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
// would use too much memory
// 2) We need to do multiple passes over the basic dataset for different reasons, and
// we can't cache it in memory
const { path: basicDataset, fd: basicDatasetFD } =
await tmp.file({ mode: 0o600, dir: '/var/tmp' });
await StreamUtils.waitFinish(basicSource
.pipe(new Genie.DatasetStringifier())
.pipe(fs.createWriteStream(basicDataset, { fd: basicDatasetFD })));
// basicDatasetFD is closed here
let contexts = await
fs.createReadStream(basicDataset, { encoding: 'utf8' })
.pipe(byline())
.pipe(new Genie.DatasetParser({ contextual: false }))
.pipe(new Genie.ContextExtractor(this._schemas))
.read();
const contextualized =
fs.createReadStream(basicDataset, { encoding: 'utf8' })
.pipe(byline())
.pipe(new Genie.DatasetParser({ contextual: false }))
.pipe(new Genie.Contextualizer(contexts, {
locale: this._language,
numSamples: 20,
nullOnly: false,
}));
const contextualSynthetic = genSynthetic.generate(tmpDir, {
contextual: true,
contexts,