How to use the stopword.removeStopwords function in stopword

To help you get started, we’ve selected a few stopword examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

kaleguy / leovue / src / components / WordCloud.vue View on Github

const textItems = this.$store.state.leotext
        children.forEach(child =&gt; {
          const t = textItems[child.t]
          let textData = {}
          try {
            textData = JSON.parse(t)
          } catch (e) {
            console.log(e, child.id)
          }
          items.push(_.get(textData, this.from, ''))
        })
        let text = items.join()
        // text = stripchar.RSspecChar(text.toLowerCase())
        text = text.replace(/[[\]&amp;,;'"”’().*?]/g, ' ')
        let words = split(text)
        words = sw.removeStopwords(words)
        const wf = {}
        _.remove(words, word =&gt; /\d/.test(word))
        words.forEach(word =&gt; {
          if (word.length &lt; 4) { return }
          word = word.toLowerCase()
          wf[word] = wf[word] ? wf[word] + 1 : 1
        })
        // debugger
        const wordFreq = {}
        Object.keys(wf).forEach(k =&gt; {
          const v = wf[k]
          if (v &gt; this.threshold) wordFreq[k] = v
        })
        const keys = Object.keys(wordFreq)
        keys.forEach(k =&gt; {
          if (wordFreq[k + 's']) {

replicatedhq / hugo-algolia / lib / index.js View on Github

.splice(2)
              .join("/")
              .replace(/\.[^/.]+$/, "");

        //Remove _index + index files from uri
        const compUriArray = item.uri.split("/");
        const lastItemInCompArray = compUriArray[compUriArray.length - 1];
        if (
          lastItemInCompArray.includes("index") ||
          lastItemInCompArray.includes("_index")
        ) {
          compUriArray.pop();
          item.uri = compUriArray.join("/");
        }

        let content = stopword
          .removeStopwords(meta.content.split(/\s+/))
          .join(" ")
          .replace(/\W/g, " ")
          .trim();
        let truncatedContent = truncate(content, _this.contentSize); // 20kB limit
        item.content = truncatedContent;

        // If this is a partial index, remove everything but the props we want
        if (self.partial) {
          item = _.pick(item, self.customInd);
        }
        
        // Include an objectID to prevent duplicated entries in the index.
        item.objectID = meta.data.objectID
          ? meta.data.objectID
          : item.uri

withspectrum / spectrum / iris / migrations / 20171208223206-index-messages-for-search.js View on Github

const withoutStopWords = str => {
  // turn the string into an array of words
  const arr = strToArray(str);
  // filter out any words that are considered stop words
  const cleaned = stopword.removeStopwords(arr);
  // join the array back into a string
  const joined = cleaned.join(' ');
  // return the string
  return joined;
};

withspectrum / spectrum / vulcan / utils / text-parsing.js View on Github

export const withoutStopWords = (str: string): string => {
  // turn the string into an array of words
  const arr = strToArray(str);
  // filter out any words that are considered stop words
  const cleaned = stopword.removeStopwords(arr);
  // join the array back into a string
  const joined = cleaned.join(' ');
  // return the string
  return joined;
};

specfm / spec-next / servers / search / lib / utils / text-parsing.ts View on Github

export const withoutStopWords = (str) => {
  // turn the string into an array of words
  const arr = strToArray(str);
  // filter out any words that are considered stop words
  const cleaned = stopword.removeStopwords(arr);
  // join the array back into a string
  const joined = cleaned.join(' ');
  // return the string
  return joined;
};

machinelearnjs / machinelearnjs / src / lib / feature_extraction / text.ts View on Github

private preprocess(text: string, { removeSW = false }): string[] {
    const tokenizer = new WordTokenizer();
    let tokens = text.split(' ');
    if (removeSW) {
      tokens = sw.removeStopwords(tokens, ENGLISH_STOP_WORDS);
    }
    return tokenizer.tokenize(tokens.join(' '));
  }
}

fergiemcdowall / term-vector / lib / term-vector.js View on Github

exports.getVector = function(text, options) {
  if (typeof text != "string")
    throw new Error("error: input must be a string");
  var defaults = {
    nGramLength: 1,
    separator: /[\|' \.,\-|(\n)]+/,
    stopwords: sw.getStopwords()
  }
  options = _.defaults(options || {}, defaults)
  if (options.nGramLength == 0)
    throw new Error("error: nGramLength must be greater than 0");
  //tokenise string, remove stopwords
  var tokens = sw.removeStopwords(text, {
    inputSeparator: options.separator,
    stopwords: options.stopwords
  }).split(' ');
  var vec = []
  if (!isNaN(options.nGramLength)) {
    return getTermVectorForNgramLength(tokens, options.nGramLength);
  }
  else if (options.nGramLength.constructor === Array) {
    for (var i = 0; i &lt; options.nGramLength.length; i++)
      vec = vec.concat(getTermVectorForNgramLength(tokens, options.nGramLength[i]))
    return vec;
  }
  else if (typeof(options.nGramLength)
           &amp;&amp; (parseInt(options.nGramLength.gte) &lt;= parseInt(options.nGramLength.lte))) {
    var j = parseInt(options.nGramLength.gte);
    while (j &lt;= options.nGramLength.lte) {

sciencefair-land / sciencefair / app / client / views / detail_multi_terms.js View on Github

const terms = uniqBy(papers, 'key').map(paper => {
    const title = cleanTerms(paper.title ? paper.title : '')
    const abstract = cleanTerms(paper.abstract ? paper.abstract : '')

    let termset = uniq(title.concat(abstract))
    termset = stopword.removeStopwords(termset)
    termset = stopword.removeStopwords(termset, stopwords)
    return termset.map(term => {
      if (term === 'cells') return 'cell'
      if (term === 'genes') return 'gene'
      return term
    })
  })

sciencefair-land / sciencefair / app / client / views / detail_multi_terms.js View on Github

const terms = uniqBy(papers, 'key').map(paper => {
    const title = cleanTerms(paper.title ? paper.title : '')
    const abstract = cleanTerms(paper.abstract ? paper.abstract : '')

    let termset = uniq(title.concat(abstract))
    termset = stopword.removeStopwords(termset)
    termset = stopword.removeStopwords(termset, stopwords)
    return termset.map(term => {
      if (term === 'cells') return 'cell'
      if (term === 'genes') return 'gene'
      return term
    })
  })

stopword

A module for node.js and the browser that takes in text and returns text that is stripped of stopwords. Has pre-defined stopword lists for 62 languages and also takes lists with custom stopwords as input.

GitHub

MIT

Latest version published 3 months ago

Package Health Score

84 / 100

Full package analysis

How to use the stopword.removeStopwords function in stopword

To help you get started, we’ve selected a few stopword examples, based on popular ways it is used in public projects.

stopword

Package Health Score

Popular stopword functions

Similar packages