Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
// Sample data for these examples (coerced to strings):
var economy = ' ' + fs.readFileSync('data/texts/economy.txt');
var politics = ' ' + fs.readFileSync('data/texts/politics.txt');
var sports = ' ' + fs.readFileSync('data/texts/sports.txt');
var natural = require('natural'),
tokenizer = new natural.WordTokenizer();
console.log("\n-- Tokenized sample text in politics.txt:");
console.log(tokenizer.tokenize(politics));
console.log("\n-- Use Porter Stemmer on a single word:");
console.log(natural.PorterStemmer.stem("dogs"));
natural.PorterStemmer.attach(); // add methods to string
console.log("\n-- Use Porter Stemmer text in file sports.txt:");
console.log(sports.tokenizeAndStem());
console.log("dog dogs Dog dogged".tokenizeAndStem());
var classifier = new natural.BayesClassifier();
classifier.addDocument(economy, 'economy');
classifier.addDocument(politics, 'politics');
classifier.addDocument(sports, 'sports');
classifier.train();
console.log("\n-- Bayesian classifier test results:");
console.log(classifier.classify('The President and Congress went on vacation.'));
function createHashedIndexFiles(lang, indexPath, indexData, type) {
var words_to_stem = {};
var stem_to_words = {};
var stemmer = null;
switch (lang) {
case 'eng':
stemmer = natural.PorterStemmer;
break;
case 'esp':
stemmer = natural.PorterStemmerEs;
break;
}
//console.log('trying to create index', stemmer);
if (type == 'words' && stemmer != null) {
// make stems
for (var key in indexData) {
var wordData = indexData[key],
stemmedWord = stemmer.stem(key);
return new Promise((resolve, reject) => {
const tokens = tokenizer.tokenize(phrase);
tokens.map(token => {
console.log(token, Natural.PorterStemmer.stem(token))
});
Natural.LancasterStemmer.attach();
console.log(phrase.tokenizeAndStem());
resolve({
engine: 'compromise',
ms: (new Date() - time),
tokens: tokenizer.tokenize(phrase),
stemmers: Natural.PorterStemmer.stem(phrase)
// glossary: glossary.parse(phrase),
// sentiment: analyser.classify(phrase),
});
});
};
/*!
* reds
* Copyright(c) 2011 TJ Holowaychuk
* cn-search
* Copyright(c) 2013 Sxyizhiren <786647787@qq.com>
* MIT Licensed
*/
/**
* Module dependencies.
*/
var natural = require('natural');
var metaphone = natural.Metaphone.process;
var stem = natural.PorterStemmer.stem;
var stopwords = natural.stopwords;
var cnstopwords = require('./cnstopWords');
// default chinese segment
var Segment = require('segment').Segment;
var segment = new Segment();
segment.useDefault();
/**
* Chinese Segment
* @type {*}
*/
var segmentSync = function(str){
var words=segment.doSegment(str);
var result=[];
for(var i= 0,len=words.length;i
return;
}
// Check the static Emoji mappings whitelist
var whitelist = emojiMappings.whitelist[t.toLowerCase()];
if (whitelist) {
args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), whitelist);
return;
}
var stem = natural.PorterStemmer.stem(t.toLowerCase());
// Check to see if the word directly matches an Emoji shortname
for (var key in emoji) {
var stemmedShortname = natural.PorterStemmer.stem(key.toLowerCase());
if (stemmedShortname === stem) {
args = args.replace(new RegExp(`\\b${t}\\b(?!:)`, 'g'), emoji[key].shortname);
return;
}
}
// Check to see if the word matches an Emoji alias
for (key in emoji) {
var _emoji = emoji[key];
if (_emoji.category === 'flags') {
continue;
}
if (_emoji.stemmedAliases.indexOf(stem) !== -1) {
function stem (words) {
var ret = [];
for (var i = 0, len = words.length; i < len; ++i) {
ret.push(natural.PorterStemmer.stem(words[i]));
}
return ret;
}
tokens.forEach((word, index) => {
word = word.toLowerCase();
word = natural.PorterStemmer.stem(word);
tokens[index] = word;
});
this.apply( function( text ) {
if ( type === 'Lancaster' ) {
return natural.LancasterStemmer.stem( text );
} else {
return natural.PorterStemmer.stem( text );
}
});
return this;
const natural = require("natural");
var PorterStemmer = natural.PorterStemmer;
let specClassifier = new natural.BayesClassifier(PorterStemmer,0.1);
natural.PorterStemmer.attach();
/*
* This classifier assumes one area of specialization per professor.
* When the classifier is receiving a criteria to proceed to the classification, there is a problem:
* As the professor's names are stemmed and each name is separated (constituting a unique name per se),
* It makes the classifier consider David De Matos and Ana Matos as having a similarity (surname), thus conficting the classification process
*
* Quick-fix:
* -Make professors' names unique (chosen technique) or:
* -Cross classify using keywords
* */
class MEIC_MODULE_3 {
constructor() {
/*!
* reds
* Copyright(c) 2011 TJ Holowaychuk
* MIT Licensed
*/
/**
* Module dependencies.
*/
var natural = require('natural');
var metaphone = natural.Metaphone.process;
var stem = natural.PorterStemmer.stem;
var stopwords = natural.stopwords;
var redis = require('redis');
function noop(){};
/**
* Library version.
*/
exports.version = '1.0.0';
/**
* Expose `Search`.
*/
exports.Search = Search;