Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
/* tslint:disable */
import * as natural from 'natural';
const tokenizer = new natural.WordTokenizer();
console.log(tokenizer.tokenize('your do a dog dog has fleas.'));
const NGrams = natural.NGrams;
console.log(NGrams.ngrams('This is a text document to analyze.', 5));
import { CountVectorizer } from './text';
//
const cv = new CountVectorizer();
const text1 = ['deep learning ian good fellow learning jason shin shin', 'yoshua bengio'];
console.log('original text', text1);
const vocabCounts = cv.fit_transform(text1);
console.log(vocabCounts);
console.log(cv.vocabulary);
// -- More info: https://github.com/NaturalNode/natural
'use strict';
import Natural from 'natural';
// -- Internal
const tokenizer = new Natural.WordTokenizer()
export default (phrase) => {
const time = new Date();
return new Promise((resolve, reject) => {
const tokens = tokenizer.tokenize(phrase);
tokens.map(token => {
console.log(token, Natural.PorterStemmer.stem(token))
});
Natural.LancasterStemmer.attach();
console.log(phrase.tokenizeAndStem());
resolve({
engine: 'compromise',
ms: (new Date() - time),
'use strict';
var path = require('path'),
fs = require('fs'),
natural = require('natural'),
lunr = require('lunr'),
tokenizer = new natural.WordTokenizer(),
loc = path.resolve(__dirname, 'content'),
scraper = {
title: /\[meta:title\]:\s<>\s\((.+?)\)(?!\))/,
description: /\[meta:description\]:\s<>\s\((.+?)\)(?!\))/,
firstlines: /^((.*\n){2}){1,3}/
};
//
// ### @private function scrape()
// #### @content {String} document content
// #### @key {String} scraper key
// #### @n {Number} index of match that should be returned
// Scrapes the [key] from the content by Regular Epression
//
function scrape(content, key, n) {
if (!content) return '';
async add(filename, document) {
const PUNCTUATION = ['.', ',', ':', ''];
const tokenizer = new natural.WordTokenizer();
const tokens = tokenizer.tokenize(document);
// filter out punctuation, then add all tokens to a redis set.
await Promise.all(
tokens
.filter(token => PUNCTUATION.indexOf(token) === -1)
.map(token => {
const sadd = promisify(this.tokenClient.sadd).bind(this.tokenClient);
return sadd(token, filename);
})
);
const set = promisify(this.docsClient.set).bind(this.docsClient);
await set(filename, document);
}
var _ = require('lodash');
var unfluff = require('unfluff');
var natural = require('natural');
var tokenizer = new natural.WordTokenizer();
var sentiment = require('sentiment');
var Stats = require('text-statistics');
var glossary = require('glossary')({
minFreq: 2,
collapse: true,
verbose: true
});
var DEFAULTS = {
ok: false,
sentiment: 0,
title: null,
topics: [],
words: 0,
difficulty: 0,
minutes: 0,
let getTokens = (data) => {
let tokenizer = new natural.WordTokenizer();
let tokens = tokenizer.tokenize(data);
tokens.forEach((word, index) => {
word = word.toLowerCase();
word = natural.PorterStemmer.stem(word);
tokens[index] = word;
});
return tokens;
};
export const getTokensWithOutNumbersAndStopWords = (textContent) => {
const noNumbers = removeNumbers(textContent)
const tokenizer = new natural.WordTokenizer();
const tokens = tokenizer.tokenize(noNumbers);
const tokensInLowerCase = toLowerCase(tokens);
const singularizedTokens = getSingularizedWord(tokensInLowerCase);
return arrayDiff(singularizedTokens, stopwords.english);
}
async function getCategory(packageName) {
if (getInCategoryMap(packageName)) {
return {
label: getInCategoryMap(packageName),
score: 999,
}
}
const { description, keywords } = await getPackageDetails(packageName)
const tokenizer = new natural.WordTokenizer();
const tokenString = await stripMarkdown(description) + ' ' + keywords.join(' ')
const packageTokens =
tokenizer.tokenize(tokenString)
.map(token => token.toLowerCase())
.map(natural.PorterStemmer.stem)
.concat(
tokenizer.tokenize(packageName)
.map(natural.PorterStemmer.stem)
)
const scores = {}
let maxScoreCategory = {
category: '',
score: 0,
}
'160x600': {}
},
categories: require('../config/sonobi-codes.js')
})
this.config.oip.options = underscore.defaults(this.config.oip.options || {},
{ refillInterval: 2 * 1000,
retryInterval: 60 * 1000,
emptyInterval: 60 * 1000,
maxFlights: 8,
lowWater: 5,
highWater: 15
})
this.pqs = {}
this.tokenizer = new natural.WordTokenizer()
underscore.keys(this.config.oip.categories).forEach(category => {
var trie = new Trie()
trie.addStrings(this.tokenizer.tokenize(this.config.oip.categories[category]))
this.pqs[category] = { category: category,
name: this.config.oip.categories[category],
errors: 0,
sizes: {},
trie: trie,
intents: trie.keysWithPrefix(''),
query: { cat: {} }
}
this.pqs[category].query.cat[category] = 24 * 60 * 60
underscore.keys(this.config.oip.sizes).forEach(size => {
this.pqs[category].sizes[size] = { queue: new PriorityQ(pqComparator),
lowWater: this.config.oip.options.lowWater,
eleventyConfig.addLiquidFilter("getSentimentValue", function(content) {
if( content ) {
const tokenizer = new Natural.WordTokenizer();
return analyze.getSentiment(tokenizer.tokenize(content));
}
return 0;
});