How to use nodejieba - 10 common examples

To help you get started, we’ve selected a few nodejieba examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github alvinhui / machine-learning / 03_naive_bayes / chatbot / app.js View on Github external
listen(function(text) {
  if (text.length) {
    // const words = tokenizer(text);
    const words = nodejieba.cut(text);
    const testVec = words2Vec(vocabList, words);
    console.log(response(classify(testVec, weights, pAbusive)));
  } else {
    console.log('Any help?');
  }
})
github gitbook-plugins / gitbook-plugin-search-pro / main.js View on Github external
"page": function(page){

            // console.log(page);

            // 建立页面内容索引
            pageIndex[pageId] = {
                path : page.path.replace(/readme\.md$/i,'index.html').replace(/\.md$/,'.html'),
                title : page.progress.current.title,
                level : page.progress.current.level
            }

            // 分词
            var words = _.uniq( nodejieba.cut(page.content) );

            // 去重
            _(words).forEach(function(word) {

                // 不索引1个字的词
                //if(word.length > 1){

                    // 转为大写
                    word = word.toUpperCase();

                    // 如果没有这个词的索引空间
                    if(!searchIndexMap[word]) {
                        searchIndexMap[word] = [];
                    }

                    // 搜索词容器推入
github alibaba / ice / scripts / participle / index.js View on Github external
function cut(str = '', splitBy = ' ', limit = 500) {
  return jieba
    .extract(str, limit)
    .map(({ word }) => word)
    .filter((word) => {
      if (word.trim() === '') {
        return false;
      }
      if (ignoreWordsHash[word]) {
        return false;
      }
      // 过滤 CDN 图片地址
      if (/^TB\w+/.test(word)) {
        return false;
      }
      return true;
    })
    .join(splitBy);
github alvinhui / machine-learning / 03_naive_bayes / chatbot / training.js View on Github external
patterns.forEach(function(pattern) {
    // const words = tokenizer(pattern);
    const words = nodejieba.cut(pattern);
    document.push(words);
    classes.push(tag);
  });
  responses[tag] = intent.responses;
github ccforward / zhihu / spider / util / tag.js View on Github external
var cheerio = require('cheerio')
var co = require("co");
var ArticleDAO = require('../db/models/article');
var TagDAO = require('../db/models/tag');
var jieba = require("nodejieba");


var articleDAO = new ArticleDAO(),
    tagDAO = new TagDAO();

jieba.load({
    stopWordDict: './dict/stop_words.utf8',
});

var Tag = {
    saveTags: function(aid){
        // var gen = function* (){
        //     var tags = yield tagDAO.search({aid: aid});
        //     if(tags.length<=0){
        //         var result = yield articleDAO.search(aid);
        //         var $ = cheerio.load(result.body, {decodeEntities: false});
        //         var wordsArr = jieba.extract($.root().text(), 10),
        //             tagArr = [];
        //         for(var i=0,len=wordsArr.length;i
github alibaba / ice / scripts / participle / index.js View on Github external
const jieba = require('nodejieba');
// load dict at first time
jieba.load({
  userDict: './scripts/participle/userDict.utf8',
});
const ignoreWordsHash = {};

// const ignoreWords = [
//   ',',
//   '.',
//   ':',
//   '。',
//   ';',
//   ':',
//   '(',
//   ')',
//   '-',
//   '*',
//   '<',
github weather-bot / WxKitty / lib / segment.js View on Github external
const jieba = require("nodejieba");
const trans = require('chinese-conv');
const path = require("path");

jieba.load({
    // User's Words
    userDict: path.join(__dirname, '../data/jieba_userdict.utf8')
});

// nodejieba only support Simplified Chinese, so translate Traditonal to Simplified.
// After segment, translate back to Traditonal Chinese.
const segment = input => {
    const words = jieba.cut(trans.sify(input));
    const results = [];
    words.forEach(w => {
        results.push(trans.tify(w));
    })
    return results;
}
module.exports = segment;
github gitbook-plugins / gitbook-plugin-search-pro / main.js View on Github external
"init" : function(){

            config = this.options;

            pluginConfig = this.options.pluginsConfig['search-pro'];

            // 导入book.json里面插件配置中的自定义词典
            // nodejieba.insertWord("word1","word2",....);

            nodejieba.insertWord.apply(this,pluginConfig.defineWord);

        },
github alvinhui / machine-learning / 04_logistic / chatbot_natural / app.js View on Github external
listen(function(text) {
    if (text.length) {
      console.log(classifier.classify(nodejieba.cut(text)));
    } else {
      console.log('Any help?');
    }
  });
});
github feix760 / Chrome_12306 / app / lib / jieba.js View on Github external
exports.uniq = function(str) {
  return _.uniq(jieba.tag(str).sort())
    .filter(function(item) {
      return !item.match(/\s/);
    })
    .map(function(item) {
      item = item.split(':');
      return {
        name: item[0],
        type: item[1]
      }
    });
};

nodejieba

chinese word segmentation for node

MIT
Latest version published 21 days ago

Package Health Score

80 / 100
Full package analysis