How to use the ngram.group_text_by_category function in ngram

To help you get started, we’ve selected a few ngram examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github stanfordnlp / cocoa / src / analysis / negotiation / get_data_statistics.py View on Github external
def analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_category, output_dir=None, n=5):
    if output_dir is None:
        output_dir = stats_output

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    top_ngrams_by_agent = []
    for agent_type in args.agent_types:
        grouped_utterances = grouping_fn(transcripts, agent_type)
        analyzer = ngram.NgramAnalyzer(grouped_utterances, n=n, agent_type=agent_type)

        top_ngrams_by_cat = analyzer.analyze()
        top_ngrams_by_agent.append(top_ngrams_by_cat)

    ngram.plot_top_ngrams(top_ngrams_by_agent, agents=args.agent_types,
                          output_dir=output_dir,
github stanfordnlp / cocoa / src / analysis / negotiation / get_data_statistics.py View on Github external
# tf_idf_by_winner(transcripts)
        tf_idf_dir = os.path.join(stats_output, 'tfidf')
        if not os.path.exists(tf_idf_dir):
            os.makedirs(tf_idf_dir)
        n_range = xrange(2, 4)
        for i in n_range:
            analyze_tf_idf(transcripts,
                           grouping_fn=tf_idf.group_by_category_role_winner,
                           n=i,
                           output_dir=os.path.join(tf_idf_dir, 'by_winner'))

    if args.ngram:
        ngram_dir = os.path.join(stats_output, 'ngram')
        if not os.path.exists(ngram_dir):
            os.makedirs(ngram_dir)
        analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_category,
                       output_dir=os.path.join(ngram_dir, 'by_category'))
        analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_role,
                       output_dir=os.path.join(ngram_dir, 'by_role'))
        analyze_ngrams(transcripts, grouping_fn=ngram.group_text_by_winner,
                       output_dir=os.path.join(ngram_dir, 'by_winner'))