Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
function frequency(content) {
var tfidf = new natural.TfIdf(),
processed = [],
words = [];
// Add the current content.
content = content.toLowerCase();
tfidf.addDocument(content);
tokenizer.tokenize(content).forEach(function wordFrequency(word) {
// Return early if word is processed, to short or only a number.
if (+word || word.length < 3 || ~processed.indexOf(word)) return;
words.push({
word: word,
score: tfidf.tfidf(word, 0)
});
function frequency(content) {
var tfidf = new natural.TfIdf(),
processed = [],
words = [];
// Add the current content.
content = content.toLowerCase();
tfidf.addDocument(content);
tokenizer.tokenize(content).forEach(function wordFrequency(word) {
// Return early if word is processed, to short or only a number.
if (+word || word.length < 3 || ~processed.indexOf(word)) return;
words.push({
word: word,
score: tfidf.tfidf(word, 0)
});
var process = function(docs, n, percentage) {
var processedDocs = {};
wordBag = {},
vocab = {},
words = [],
cut = 0,
len = 0,
n = 0;
tfidf = new natural.TfIdf;
for (d in docs) {
wordBags[n] = natural.PorterStemmer.tokenizeAndStem(d);
tfidf.addDocument(wordBags[n]);
len = tfidf.listTerms(n).length-1;
cut = (1-percentage)/2;
words = tfidf.listTerms(n).slice(cut, (len-cut));
for (w in words) {
if (!!vocab[words[w].term]) continue;
vocab[words[w].term] = {word: words[w].term, count: 0};
}
n = 0;
function wordAnalysis(title, content) {
let graph = gramophone.extract([title, content].join(' '), {
score: true,
stopWords: commonWordsArray,
stem: true,
limit: 20
})
let tfidf = new TfIdf()
tfidf.addDocument([title, content].join(' '))
let tfGraph = graph.map(item =>
_.merge({
score: tfidf.tfidf(item.term, 0)
}, item)
)
tfGraph = _.filter(tfGraph, item => item.term !== '')
return {
totalWords: content.split(' ').length,
relevance: tfGraph
}
}
facetValues = doc[facets[0]];
}
for (fieldKey in doc) {
if( Object.prototype.toString.call(doc[fieldKey]) === '[object Array]' ) {
value['fields'][fieldKey] = doc[fieldKey];
} else {
value['fields'][fieldKey] = doc[fieldKey].substring(0, maxStringFieldLength);
}
}
for (fieldKey in doc) {
if (indexMetaDataGlobal['indexedFieldNames'].indexOf(fieldKey) == -1) {
indexMetaDataGlobal['indexedFieldNames'].push(fieldKey);
}
tfidf = new TfIdf();
tfidf.addDocument(doc[fieldKey], fieldKey + '~' + id);
docVector = tfidf.documents[tfidf.documents.length - 1];
var highestFrequencyCount = 0;
for (var k in docVector) {
if (docVector[k] > highestFrequencyCount)
highestFrequencyCount = docVector[k];
}
var deleteKeys = [];
for (var k in docVector) {
if (k != '__key') {
var facetIndexKey = ['NO~FACETING'];
for (var l = 0; l < facets.length; l++) {
if (doc[facets[l]]) {
var thisFacetValue = doc[facets[l]];
for (var m = 0; m < thisFacetValue.length; m++) {
facetIndexKey.push(facets[l] + '~' + thisFacetValue[m]);