How to use the sumy.summarizers.lex_rank.LexRankSummarizer function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_summarizers / test_lex_rank.py View on Github external
We compute similarity of the same sentences. These should be exactly the same and
    therefor have similarity close to 1.0.
    see https://github.com/miso-belica/sumy/issues/58
    """
    sentence1 = ["this", "sentence", "is", "simple", "sentence"]
    tf1 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2}
    sentence2 = ["this", "sentence", "is", "simple", "sentence"]
    tf2 = {"this": 1/2, "sentence": 1.0, "is": 1/2, "simple": 1/2}
    idf = {
        "this": 2/2,
        "sentence": 2/2,
        "is": 2/2,
        "simple": 2/2,
    }

    summarizer = LexRankSummarizer()
    cosine = summarizer.cosine_similarity(sentence1, sentence2, tf1, tf2, idf)

    assert abs(1.0 - cosine) < 0.00001
github miso-belica / sumy / tests / test_summarizers / test_lex_rank.py View on Github external
def test_tf_metrics():
    summarizer = LexRankSummarizer()

    sentences = [
        ("this", "sentence", "is", "simple", "sentence"),
        ("this", "is", "simple", "sentence", "yes", "is", "too", "too", "too"),
    ]
    metrics = summarizer._compute_tf(sentences)

    expected = [
        {"this": 1/2, "is": 1/2, "simple": 1/2, "sentence": 1.0},
        {"this": 1/3, "is": 2/3, "yes": 1/3, "simple": 1/3, "sentence": 1/3, "too": 1.0},
    ]
    assert expected == metrics
github amyxzhang / wikum / wikum / website / views.py View on Github external
def auto_summarize_comment(request):
    
    from sumy.nlp.stemmers import Stemmer
    #from sumy.utils import get_stop_words
    from sumy.parsers.html import HtmlParser
    from sumy.nlp.tokenizers import Tokenizer
    #from sumy.summarizers.lsa import LsaSummarizer as Summarizer
    #from sumy.summarizers.text_rank import TextRankSummarizer as Summarizer
    from sumy.summarizers.lex_rank import LexRankSummarizer as Summarizer
         
    stemmer = Stemmer("english")
    summarizer = Summarizer(stemmer)
    
    comment_ids = request.POST.getlist('d_ids[]')
    
    sent_list = []
    
    for comment_id in comment_ids:
        comment = Comment.objects.get(id=comment_id)
        text = comment.text
        
        text = re.sub('<br>', ' ', text)
        text = re.sub('<br>', ' ', text)
        
        parser = HtmlParser.from_string(text, '', Tokenizer("english"))
        
        num_sents = request.GET.get('num_sents', None)
        if not num_sents:
github Jcharis / Streamlit_DataScience_Apps / NLP_App_with_Streamlit_Python / app.py View on Github external
def sumy_summarizer(docx):
	parser = PlaintextParser.from_string(docx,Tokenizer("english"))
	lex_summarizer = LexRankSummarizer()
	summary = lex_summarizer(parser.document,3)
	summary_list = [str(sentence) for sentence in summary]
	result = ' '.join(summary_list)
	return result
github BernhardWenzel / scraping-microservice-java-python-rabbitmq / python-scraping-service / scraper.py View on Github external
def scrape(self, url):
        complete_url = url
        try:
            # get summary
            print "Retrieving page summary of %s... " % url

            parser = HtmlParser.from_url(complete_url, Tokenizer(LANGUAGE))
            stemmer = Stemmer(LANGUAGE)

            summarizer = Summarizer(stemmer)
            summarizer.stop_words = get_stop_words(LANGUAGE)

            url_summary = ''.join(str(sentence) for sentence in summarizer(parser.document, SENTENCES_COUNT))

        except Exception, e:
            url_summary = "Could not scrape summary. Reason: %s" % e.message

        print "Done: %s = %s" % (url, url_summary)

        # create scraping result
        scraping_result = ScrapingResult()

        scraping_result.summary = url_summary
        scraping_result.url = url

        return scraping_result
github adityasarvaiya / Automatic_Question_Generation / aqg / utils / summarizer.py View on Github external
def summarize_from_file(self,file_name):

        parser = PlaintextParser.from_file(file_name, Tokenizer(self.LANGUAGE))
        stemmer = Stemmer(self.LANGUAGE)
        summarizer = Summarizer(stemmer)
        file_1 = open("summarizer_output.txt","w+")
        file_2 = open("summarizer_output2.txt","w+")
        for sentence in summarizer(parser.document, self.SENTENCES_COUNT):
            file_2.write(str(sentence))
            file_1.write(str(sentence))
            file_1.write("\n")
        file_1.close()
        file_2.close()
github DeFacto / DeFacto / python / trustworthiness / features_core.py View on Github external
def get_summary_lex_rank(self,num_sentence):
        from sumy.parsers.plaintext import PlaintextParser  # other parsers available for HTML etc.
        from sumy.nlp.tokenizers import Tokenizer
        from sumy.summarizers.lex_rank import LexRankSummarizer  # We're choosing Lexrank, other algorithms are also built in

        try:
            parser = HtmlParser.from_url(self.url, Tokenizer("english"))
        except:
            try:
                parser = PlaintextParser.from_string(self.body, Tokenizer("english"))
            except Exception as e:
                raise(e)

        summarizer = LexRankSummarizer()
        summary = summarizer(parser.document, num_sentence)
        out=''
        for sentence in summary:
            out+= str(sentence)
        return out
github miso-belica / sumy / sumy / __main__.py View on Github external
from .summarizers.lex_rank import LexRankSummarizer
from .summarizers.sum_basic import SumBasicSummarizer
from .summarizers.kl import KLSummarizer
from .nlp.stemmers import Stemmer

PARSERS = {
    "html": HtmlParser,
    "plaintext": PlaintextParser,
}

AVAILABLE_METHODS = {
    "luhn": LuhnSummarizer,
    "edmundson": EdmundsonSummarizer,
    "lsa": LsaSummarizer,
    "text-rank": TextRankSummarizer,
    "lex-rank": LexRankSummarizer,
    "sum-basic": SumBasicSummarizer,
    "kl": KLSummarizer,
}


def main(args=None):
    args = docopt(to_string(__doc__), args, version=__version__)
    summarizer, parser, items_count = handle_arguments(args)

    for sentence in summarizer(parser.document, items_count):
        if PY3:
            print(to_unicode(sentence))
        else:
            print(to_bytes(sentence))

    return 0
github ucfnlp / multidoc_summarization / src / sumy_summarize.py View on Github external
for x in ["1","2","su4"]:
        for y in ["precision","recall","f_score"]:
            key = "rouge_%s_%s" % (x,y)
            val = results_dict[key]
            log_str += "%.4f\t" % (val)
    log_str += "\n"
    print(log_str)
    results_file = os.path.join(dir_to_write, "ROUGE_results.txt")
    print("Writing final ROUGE results to %s...", results_file)
    with open(results_file, "w") as f:
        f.write(log_str)

for summary_method in summary_methods:
    print('Summarizing using the method: ' + summary_method)
    if summary_method == 'lexrank':
        summary_fn = LexRankSummarizer
    elif summary_method == 'kl':
        summary_fn = KLSummarizer
    elif summary_method == 'sumbasic':
        summary_fn = SumBasicSummarizer
    else:
        raise Exception('Could not find summary method ' + summary_method)

    if not os.path.exists(os.path.join(out_dir, summary_method, reference_folder)):
        os.makedirs(os.path.join(out_dir, summary_method, reference_folder))
    if not os.path.exists(os.path.join(out_dir, summary_method, decoded_folder)):
        os.makedirs(os.path.join(out_dir, summary_method, decoded_folder))
    print (os.path.join(out_dir, summary_method))
    article_names = sorted(os.listdir(articles_dir))
    for art_idx, article_name in enumerate(tqdm(article_names)):
        file = os.path.join(articles_dir, article_name)
        parser = HtmlParser.from_file(file, "", Tokenizer("english"))