Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
var fs = require('fs');
var natural = require('natural');
var Tagger = require('simple-pos-tagger');
var ChartParsers = require('../index');
var parserFactory = new ChartParsers.ParserFactory();
var GrammarParser = ChartParsers.GrammarParser;
var path = './data/';
var sentences_file = path + 'sentences.txt';
var grammar_file = path + 'English grammar using Wordnet tags.txt';
var tagger_config_file = '../node_modules/simple-pos-tagger/data/English/lexicon_files.json';
tokenizer = new natural.TreebankWordTokenizer();
var wordnet = new natural.WordNet();
var sentences;
function initialise(callback) {
// read sentences from file
fs.readFile(sentences_file, 'utf8', function (error, sentences_text) {
if (error) {
logger.error(error);
}
sentences = sentences_text.split('\n');
// read grammar from file
fs.readFile(grammar_file, 'utf8', function (error, grammar_text) {
if (error) {
logger.error(error);
}
// parse the grammar
var natural = require('natural'),
pos = require('pos'),
wordnet = new natural.WordNet(),
_ = require('underscore'),
NGrams = natural.NGrams,
classifier = new natural.BayesClassifier(),
tokenizer = new natural.TreebankWordTokenizer(); // natural.WordTokenizer();
natural.PorterStemmer.attach();
// load the classifier data and learn the schema
/*natural.BayesClassifier.load('./classifier.json', null, function(err, classifier) {
// if the classifier hasn't been saved, then calculate it now
if (err) {
var traindata = require('./trainingdata.json');
for (i in traindata)
{
classifier.addDocument(traindata[i].query, traindata[i].category);
}
classifier.train();
classifier.save('classifier.json', function(err, classifier) {
});
async function obfuscate(previous) {
const { notice, status, stageHandler, useObfuscation } = previous.params;
if (!useObfuscation) {
notice("Skipping synonyms replacing process.");
return previous;
}
await notice("Obfuscating sentences...");
await status("Preparing obfuscator...");
let dummy = d => d;
let wnet = new WNet({
dataDir: wnetdb.path
});
let tzr = (new natural.TreebankWordTokenizer()).tokenize;
let isWord = /\w+/;
let ntrBf = path.join(path.dirname(require.resolve("natural")), "brill_pos_tagger");
let defaultCat = '?';
let lex = new natural.Lexicon(ntrBf + "/data/English/lexicon_from_posjs.json", defaultCat);
let rules = new natural.RuleSet(ntrBf + "/data/English/tr_from_posjs.txt");
let tagger = new natural.BrillPOSTagger(lex, rules);
let np = new natural.NounInflector();
let pvp = new natural.PresentVerbInflector();
let stmr = natural.PorterStemmer;
let _accumunator = 0;
let _total = 0;
let _st = "";
let _swp = (w) => ( isWord.test(w[0]) );
test();
});
}
function arraysEqual(arr1, arr2) {
if(arr1.length !== arr2.length)
return false;
for(var i = arr1.length; i--;) {
if(arr1[i] !== arr2[i])
return false;
}
return true;
}
var tokenizer = new natural.TreebankWordTokenizer();
var NGrams = natural.NGrams;
function tokenize(text){
return tokenizer.tokenize(text.toLowerCase())
}
function matchMarkov(search, text, n){
var search = tokenize(search);
var text = tokenize(text);
var searchgrams = NGrams.ngrams(search, n);
var textgrams = NGrams.ngrams(text, n);
var count = 0;
for (textgramNum in textgrams){
for (searchgramNum in searchgrams){
if (arraysEqual(searchgrams[searchgramNum], textgrams[textgramNum])){
count += 1;
}
'especially', 'fig', 'afraid', 'huge', 'sister', 'steel', 'discuss',
'forward', 'similar', 'guide', 'experience', 'score', 'apple',
'bought', 'led', 'pitch', 'coat', 'mass', 'card', 'band', 'rope',
'slip', 'win', 'dream', 'evening', 'condition', 'feed', 'tool',
']total', 'basic', 'smell', 'valley', 'nor', 'double', 'seat',
'arrive', 'master', 'track', 'parent', 'shore', 'division', 'sheet',
'substance', 'favor', 'connect', 'post', 'spend', 'chord', 'fat',
'glad', 'original', 'share', 'station', 'dad', 'bread', 'charge',
'proper', 'bar', 'offer', 'segment', 'slave', 'duck', 'instant',
'market', 'degree', 'populate', 'chick', 'dear', 'enemy', 'reply',
'drink', 'occur', 'support', 'speech', 'nature', 'range', 'steam',
'motion', 'path', 'liquid', 'log', 'meant', 'quotient', 'teeth',
'shell', 'neck' ];
var stemmer = natural.PorterStemmer;
var tokenizer = new natural.TreebankWordTokenizer();
var getTextNodesIn = function (node, includeWhitespaceNodes) {
var textNodes = [], whitespace = /^\s*$/;
function getTextNodes(node) {
if ($(node).attr('id') == 'fiveui-top') {
return;
}
if (node.nodeType == 3) {
if (includeWhitespaceNodes || !whitespace.test(node.nodeValue)) {
textNodes.push(node);
}
} else {
for (var i = 0, len = node.childNodes.length; i < len; ++i) {
getTextNodes(node.childNodes[i]);
}
function checkForWords(_r, _w) {
//Tokenize the record (break it into words)
tokenizer = new natural.TreebankWordTokenizer();
var words = [tokenizer.tokenize(_r)][0];
var chk = {chk:false, w:null};
for (var i = 0; i < _w.length; i++) {
for (var j = 0; j < words.length; j++) {
//Stem the word we're checking so that dogs becomes dog, etc.
var stemmedWord = natural.PorterStemmer.stem(words[j]);
if (stemmedWord.toLowerCase() == _w[i].toLowerCase()) {
chk.chk = true;
chk.w = _w[i];
}
}
}
return(chk);
}
function summarize(text, lines, callback) {
var sentenceTokenizer = new Tokenizer('utterer');
sentenceTokenizer.setEntry(text);
var sentences = sentenceTokenizer.getSentences();
var sentencesOriginal = sentences.slice();
var wordTokenizer = new natural.TreebankWordTokenizer();
sentences.forEach(function (sentence, index, array) {
array[index] = wordTokenizer.tokenize(sentence.toLowerCase());
});
var matrix = constructMatrix(sentences);
var sortedSentences = pageRank(matrix, sentencesOriginal);
var topLines = [];
for (var i = 0; i < Math.min(lines, sortedSentences.length); i++) {
topLines.push(sortedSentences[i]);
}
topLines.sort(function (a, b) {
return a.index - b.index;
});
});
//And what happens when it finishes parsing all of the records.
parser.on('end', function() {
onParseFinished();
});
}
//------------------CHECK FOR MATCHES FUNCTION ---------------------------------------------------------------------------!!
//This function checks any string (input) against any list of candidate strings (candidates)
//Uses NLP to split the sentence into words and also to stem
var tokenizer = new natural.TreebankWordTokenizer();
//Used to singularize the words so that frogs matches frog. Wether or not you have to do this will depend on what data you're trying to match.
//For example if it's something *already* standardized (ie. Subjects) you won't have to.
//This function is SLOW if there are a lot of words to check against
var nounInflector = new natural.NounInflector();
function checkForMatches(input, candidates) {
//Tokenize the record (break it into words)
var words = [tokenizer.tokenize(input)][0];
//Set up our return object, this is the state that is returned with no matches
var chk = {chk:false, words:[]};
for (var i = 0; i < candidates.length; i++) {
var cand = nounInflector.singularize(candidates[i].toLowerCase());
for (var j = 0; j < words.length; j++) {