How to use tomotopy - 2 common examples

To help you get started, we’ve selected a few tomotopy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github bab2min / tomotopy / example.py View on Github external
def hdp_example(input_file, save_path):
    mdl = tp.HDPModel(tw=tp.TermWeight.ONE, min_cf=3, rm_top=5)
    for n, line in enumerate(open(input_file, encoding='utf-8')):
        ch = line.strip().split()
        mdl.add_doc(ch)
    mdl.burn_in = 100
    mdl.train(0)
    print('Num docs:', len(mdl.docs), ', Vocab size:', mdl.num_vocabs, ', Num words:', mdl.num_words)
    print('Removed top words:', mdl.removed_top_words)
    print('Training...', file=sys.stderr, flush=True)
    for i in range(0, 1000, 10):
        mdl.train(10)
        print('Iteration: {}\tLog-likelihood: {}\tNum. of topics: {}'.format(i, mdl.ll_per_word, mdl.live_k))

    print('Saving...', file=sys.stderr, flush=True)
    mdl.save(save_path, True)

    important_topics = [k for k, v in sorted(enumerate(mdl.get_count_by_topics()), key=lambda x:x[1], reverse=True)]
github bab2min / tomotopy / example.py View on Github external
def lda_example(input_file, save_path):
    mdl = tp.LDAModel(tw=tp.TermWeight.ONE, min_cf=3, rm_top=5, k=20)
    for n, line in enumerate(open(input_file, encoding='utf-8')):
        ch = line.strip().split()
        mdl.add_doc(ch)
    mdl.burn_in = 100
    mdl.train(0)
    print('Num docs:', len(mdl.docs), ', Vocab size:', mdl.num_vocabs, ', Num words:', mdl.num_words)
    print('Removed top words:', mdl.removed_top_words)
    print('Training...', file=sys.stderr, flush=True)
    for i in range(0, 1000, 10):
        mdl.train(10)
        print('Iteration: {}\tLog-likelihood: {}'.format(i, mdl.ll_per_word))

    print('Saving...', file=sys.stderr, flush=True)
    mdl.save(save_path, True)

    for k in range(mdl.k):

tomotopy

Tomoto, Topic Modeling Tool for Python

MIT
Latest version published 4 months ago

Package Health Score

67 / 100
Full package analysis