How to use sumy - 10 common examples

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_models / test_tf.py View on Github external
def test_normalized_words_frequencies():
    words = "a b c d e c b d c e e d e d e".split()
    model = TfDocumentModel(tuple(words))

    assert model.normalized_term_frequency("a") == pytest.approx(1/5)
    assert model.normalized_term_frequency("b") == pytest.approx(2/5)
    assert model.normalized_term_frequency("c") == pytest.approx(3/5)
    assert model.normalized_term_frequency("d") == pytest.approx(4/5)
    assert model.normalized_term_frequency("e") == pytest.approx(5/5)
    assert model.normalized_term_frequency("z") == pytest.approx(0.0)

    assert model.most_frequent_terms() == ("e", "d", "c", "b", "a")
github miso-belica / sumy / tests / test_summarizers / test_kl.py View on Github external
def test_single_sentence(summarizer):
    s = Sentence("I am one slightly longer sentence.", Tokenizer("english"))
    document = build_document([s])

    returned = summarizer(document, 10)

    assert len(returned) == 1
github miso-belica / sumy / tests / test_models / test_tf.py View on Github external
def test_most_frequent_terms_empty():
    tokenizer = Tokenizer("english")
    model = TfDocumentModel("", tokenizer)

    assert model.most_frequent_terms() == ()
    assert model.most_frequent_terms(10) == ()
github miso-belica / sumy / tests / test_tokenizers.py View on Github external
def test_slovak_alias_into_czech_tokenizer():
    tokenizer = Tokenizer("slovak")
    assert tokenizer.language == "slovak"

    sentences = tokenizer.to_sentences("""
        Je to veľmi fajn. Bodaj by nie.
        Ale na druhej strane čo je to oproti inému?
        To nechám na čitateľa.
    """)

    expected = (
        "Je to veľmi fajn.",
        "Bodaj by nie.",
        "Ale na druhej strane čo je to oproti inému?",
        "To nechám na čitateľa.",
    )
    assert expected == sentences
github miso-belica / sumy / tests / test_models / test_tf.py View on Github external
def test_terms():
    tokenizer = Tokenizer("english")
    text = "wA wB wC wD wB wD wE"
    model = TfDocumentModel(text, tokenizer)

    terms = tuple(sorted(model.terms))
    assert terms == ("wa", "wb", "wc", "wd", "we")
github miso-belica / sumy / tests / test_summarizers / test_text_rank.py View on Github external
def test_empty_document(self):
        document = build_document()
        summarizer = TextRankSummarizer(Stemmer("english"))

        returned = summarizer(document, 10)
        self.assertEqual(len(returned), 0)
github miso-belica / sumy / tests / test_summarizers / test_reduction.py View on Github external
def test_empty_document():
    document = build_document()
    summarizer = ReductionSummarizer(Stemmer("english"))

    returned = summarizer(document, 10)
    assert len(returned) == 0
github miso-belica / sumy / tests / test_summarizers / test_lsa.py View on Github external
def test_real_example():
    """Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
    parser = PlaintextParser.from_string(
        load_resource("snippets/prevko.txt"),
        Tokenizer("czech")
    )
    summarizer = LsaSummarizer(Stemmer("czech"))
    summarizer.stop_words = get_stop_words("czech")

    sentences = summarizer(parser.document, 2)
    assert len(sentences) == 2
github miso-belica / sumy / tests / test_parsers.py View on Github external
def test_parse_plaintext_long(self):
        parser = PlaintextParser.from_string("""
            Ako sa máš? Ja dobre! A ty? No
            mohlo to byť aj lepšie!!! Ale pohodička.

            TOTO JE AKOŽE NADPIS
            A toto je text pod ním, ktorý je textový.
            A tak ďalej...

            VEĽKOLEPÉ PREKVAPENIE
            Tretí odstavec v tomto texte je úplne o ničom. Ale má
            vety a to je hlavné. Takže sa majte na pozore ;-)

            A tak ďalej...


            A tak este dalej!
        """, Tokenizer("czech"))
github miso-belica / sumy / tests / test_summarizers / test_lex_rank.py View on Github external
def test_article_example():
    """Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
    parser = PlaintextParser.from_string(
        load_resource("articles/prevko_cz_1.txt"),
        Tokenizer("czech")
    )
    summarizer = LexRankSummarizer(stem_word)
    summarizer.stop_words = get_stop_words("czech")

    sentences = summarizer(parser.document, 20)
    assert len(sentences) == 20