Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
exports.getVector = function(text, options) {
if (typeof text != "string")
throw new Error("error: input must be a string");
var defaults = {
nGramLength: 1,
separator: /[\|' \.,\-|(\n)]+/,
stopwords: sw.getStopwords()
}
options = _.defaults(options || {}, defaults)
if (options.nGramLength == 0)
throw new Error("error: nGramLength must be greater than 0");
//tokenise string, remove stopwords
var tokens = sw.removeStopwords(text, {
inputSeparator: options.separator,
stopwords: options.stopwords
}).split(' ');
var vec = []
if (!isNaN(options.nGramLength)) {
return getTermVectorForNgramLength(tokens, options.nGramLength);
}
else if (options.nGramLength.constructor === Array) {
for (var i = 0; i < options.nGramLength.length; i++)
vec = vec.concat(getTermVectorForNgramLength(tokens, options.nGramLength[i]))
exports.getStopwords = function(lang) {
return sw.getStopwords(lang);
}