Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
const textItems = this.$store.state.leotext
children.forEach(child => {
const t = textItems[child.t]
let textData = {}
try {
textData = JSON.parse(t)
} catch (e) {
console.log(e, child.id)
}
items.push(_.get(textData, this.from, ''))
})
let text = items.join()
// text = stripchar.RSspecChar(text.toLowerCase())
text = text.replace(/[[\]&,;'"”’().*?]/g, ' ')
let words = split(text)
words = sw.removeStopwords(words)
const wf = {}
_.remove(words, word => /\d/.test(word))
words.forEach(word => {
if (word.length < 4) { return }
word = word.toLowerCase()
wf[word] = wf[word] ? wf[word] + 1 : 1
})
// debugger
const wordFreq = {}
Object.keys(wf).forEach(k => {
const v = wf[k]
if (v > this.threshold) wordFreq[k] = v
})
const keys = Object.keys(wordFreq)
keys.forEach(k => {
if (wordFreq[k + 's']) {
.splice(2)
.join("/")
.replace(/\.[^/.]+$/, "");
//Remove _index + index files from uri
const compUriArray = item.uri.split("/");
const lastItemInCompArray = compUriArray[compUriArray.length - 1];
if (
lastItemInCompArray.includes("index") ||
lastItemInCompArray.includes("_index")
) {
compUriArray.pop();
item.uri = compUriArray.join("/");
}
let content = stopword
.removeStopwords(meta.content.split(/\s+/))
.join(" ")
.replace(/\W/g, " ")
.trim();
let truncatedContent = truncate(content, _this.contentSize); // 20kB limit
item.content = truncatedContent;
// If this is a partial index, remove everything but the props we want
if (self.partial) {
item = _.pick(item, self.customInd);
}
// Include an objectID to prevent duplicated entries in the index.
item.objectID = meta.data.objectID
? meta.data.objectID
: item.uri
const withoutStopWords = str => {
// turn the string into an array of words
const arr = strToArray(str);
// filter out any words that are considered stop words
const cleaned = stopword.removeStopwords(arr);
// join the array back into a string
const joined = cleaned.join(' ');
// return the string
return joined;
};
export const withoutStopWords = (str: string): string => {
// turn the string into an array of words
const arr = strToArray(str);
// filter out any words that are considered stop words
const cleaned = stopword.removeStopwords(arr);
// join the array back into a string
const joined = cleaned.join(' ');
// return the string
return joined;
};
export const withoutStopWords = (str) => {
// turn the string into an array of words
const arr = strToArray(str);
// filter out any words that are considered stop words
const cleaned = stopword.removeStopwords(arr);
// join the array back into a string
const joined = cleaned.join(' ');
// return the string
return joined;
};
private preprocess(text: string, { removeSW = false }): string[] {
const tokenizer = new WordTokenizer();
let tokens = text.split(' ');
if (removeSW) {
tokens = sw.removeStopwords(tokens, ENGLISH_STOP_WORDS);
}
return tokenizer.tokenize(tokens.join(' '));
}
}
exports.getVector = function(text, options) {
if (typeof text != "string")
throw new Error("error: input must be a string");
var defaults = {
nGramLength: 1,
separator: /[\|' \.,\-|(\n)]+/,
stopwords: sw.getStopwords()
}
options = _.defaults(options || {}, defaults)
if (options.nGramLength == 0)
throw new Error("error: nGramLength must be greater than 0");
//tokenise string, remove stopwords
var tokens = sw.removeStopwords(text, {
inputSeparator: options.separator,
stopwords: options.stopwords
}).split(' ');
var vec = []
if (!isNaN(options.nGramLength)) {
return getTermVectorForNgramLength(tokens, options.nGramLength);
}
else if (options.nGramLength.constructor === Array) {
for (var i = 0; i < options.nGramLength.length; i++)
vec = vec.concat(getTermVectorForNgramLength(tokens, options.nGramLength[i]))
return vec;
}
else if (typeof(options.nGramLength)
&& (parseInt(options.nGramLength.gte) <= parseInt(options.nGramLength.lte))) {
var j = parseInt(options.nGramLength.gte);
while (j <= options.nGramLength.lte) {
const terms = uniqBy(papers, 'key').map(paper => {
const title = cleanTerms(paper.title ? paper.title : '')
const abstract = cleanTerms(paper.abstract ? paper.abstract : '')
let termset = uniq(title.concat(abstract))
termset = stopword.removeStopwords(termset)
termset = stopword.removeStopwords(termset, stopwords)
return termset.map(term => {
if (term === 'cells') return 'cell'
if (term === 'genes') return 'gene'
return term
})
})
const terms = uniqBy(papers, 'key').map(paper => {
const title = cleanTerms(paper.title ? paper.title : '')
const abstract = cleanTerms(paper.abstract ? paper.abstract : '')
let termset = uniq(title.concat(abstract))
termset = stopword.removeStopwords(termset)
termset = stopword.removeStopwords(termset, stopwords)
return termset.map(term => {
if (term === 'cells') return 'cell'
if (term === 'genes') return 'gene'
return term
})
})