How to use the sumy.models.dom._sentence.Sentence function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_summarizers / test_kl.py View on Github external
def test_single_sentence(summarizer):
    s = Sentence("I am one slightly longer sentence.", Tokenizer("english"))
    document = build_document([s])

    returned = summarizer(document, 10)

    assert len(returned) == 1
github miso-belica / sumy / tests / test_summarizers / test_sum_basic.py View on Github external
def test_get_all_content_words_in_doc():
    summarizer = _build_summarizer(EMPTY_STOP_WORDS)
    s0 = Sentence("One two three.", Tokenizer("english"))
    s1 = Sentence("One two three.", Tokenizer("english"))
    document = build_document([s0, s1])

    content_words = summarizer._get_all_content_words_in_doc(document.sentences)
    content_words_freq = {}
    for w in content_words:
        content_words_freq[w] = content_words_freq.get(w, 0) + 1
    content_words_correct = {"one": 2, "two": 2, "three": 2}
    assert content_words_freq == content_words_correct
github miso-belica / sumy / tests / test_summarizers / test_sum_basic.py View on Github external
summarizer = _build_summarizer(EMPTY_STOP_WORDS)

    s0 = Sentence("Dog cat fish.", Tokenizer("english"))
    s1 = Sentence("Dog cat camel.", Tokenizer("english"))
    s2 = Sentence("Fish frog horse.", Tokenizer("english"))
    document = build_document([s0, s1, s2])

    ratings = summarizer._compute_ratings(document.sentences)
    assert ratings[s0] == 0
    assert ratings[s1] == -2
    assert ratings[s2] == -1

    # Due to the frequency discounting, after finding sentence s0,
    # s2 should come before s1 since only two of its words get discounted
    # rather than all 3 of s1's
    s0 = Sentence("one two three", Tokenizer("english"))
    s1 = Sentence("one two four", Tokenizer("english"))
    s2 = Sentence("three five six", Tokenizer("english"))
    document = build_document([s0, s1, s2])

    ratings = summarizer._compute_ratings(document.sentences)
    assert ratings[s0] == 0
    assert ratings[s1] == -2
    assert ratings[s2] == -1
github miso-belica / sumy / tests / test_summarizers / test_sum_basic.py View on Github external
def test_compute_tf():
    summarizer = _build_summarizer(EMPTY_STOP_WORDS)
    s0 = Sentence("kicking soccer balls.", Tokenizer("english"))
    s1 = Sentence("eating chicken dumplings.", Tokenizer("english"))
    document = build_document([s0, s1])
    freq = summarizer._compute_tf(document.sentences)
    assert freq["kicking"] == 1/6
    assert freq["soccer"] == 1/6
    assert freq["balls"] == 1/6
    assert freq["eating"] == 1/6
    assert freq["chicken"] == 1/6
    assert freq["dumplings"] == 1/6

    document = build_document([s0, s0, s1])
    freq = summarizer._compute_tf(document.sentences)
    assert freq["kicking"] == 2/9
    assert freq["soccer"] == 2/9
    assert freq["balls"] == 2/9
    assert freq["eating"] == 1/9
    assert freq["chicken"] == 1/9
github miso-belica / sumy / tests / test_summarizers / test_sum_basic.py View on Github external
s0 = Sentence("Dog cat fish.", Tokenizer("english"))
    s1 = Sentence("Dog cat camel.", Tokenizer("english"))
    s2 = Sentence("Fish frog horse.", Tokenizer("english"))
    document = build_document([s0, s1, s2])

    ratings = summarizer._compute_ratings(document.sentences)
    assert ratings[s0] == 0
    assert ratings[s1] == -2
    assert ratings[s2] == -1

    # Due to the frequency discounting, after finding sentence s0,
    # s2 should come before s1 since only two of its words get discounted
    # rather than all 3 of s1's
    s0 = Sentence("one two three", Tokenizer("english"))
    s1 = Sentence("one two four", Tokenizer("english"))
    s2 = Sentence("three five six", Tokenizer("english"))
    document = build_document([s0, s1, s2])

    ratings = summarizer._compute_ratings(document.sentences)
    assert ratings[s0] == 0
    assert ratings[s1] == -2
    assert ratings[s2] == -1
github miso-belica / sumy / tests / test_summarizers / test_sum_basic.py View on Github external
def test_single_sentence():

    s = Sentence("I am one slightly longer sentence.", Tokenizer("english"))
    document = build_document([s])
    summarizer = _build_summarizer(EMPTY_STOP_WORDS)

    returned = summarizer(document, 10)
    assert len(returned) == 1
github miso-belica / sumy / tests / test_summarizers / test_sum_basic.py View on Github external
def test_get_all_content_words_in_doc():
    summarizer = _build_summarizer(EMPTY_STOP_WORDS)
    s0 = Sentence("One two three.", Tokenizer("english"))
    s1 = Sentence("One two three.", Tokenizer("english"))
    document = build_document([s0, s1])

    content_words = summarizer._get_all_content_words_in_doc(document.sentences)
    content_words_freq = {}
    for w in content_words:
        content_words_freq[w] = content_words_freq.get(w, 0) + 1
    content_words_correct = {"one": 2, "two": 2, "three": 2}
    assert content_words_freq == content_words_correct
github miso-belica / sumy / tests / test_summarizers / test_kl.py View on Github external
def test_tf_idf_metric_should_be_real_number():
    """https://github.com/miso-belica/sumy/issues/41"""
    summarizer = KLSummarizer()
    frequencies = summarizer.compute_tf([Sentence("There are five words, jop.", Tokenizer("english"))])

    assert frequencies == {
        "there": 0.2,
        "are": 0.2,
        "five": 0.2,
        "words": 0.2,
        "jop": 0.2,
    }
github miso-belica / sumy / tests / test_summarizers / test_sum_basic.py View on Github external
def test_compute_tf():
    summarizer = _build_summarizer(EMPTY_STOP_WORDS)
    s0 = Sentence("kicking soccer balls.", Tokenizer("english"))
    s1 = Sentence("eating chicken dumplings.", Tokenizer("english"))
    document = build_document([s0, s1])
    freq = summarizer._compute_tf(document.sentences)
    assert freq["kicking"] == 1/6
    assert freq["soccer"] == 1/6
    assert freq["balls"] == 1/6
    assert freq["eating"] == 1/6
    assert freq["chicken"] == 1/6
    assert freq["dumplings"] == 1/6

    document = build_document([s0, s0, s1])
    freq = summarizer._compute_tf(document.sentences)
    assert freq["kicking"] == 2/9
    assert freq["soccer"] == 2/9
    assert freq["balls"] == 2/9
    assert freq["eating"] == 1/9
github miso-belica / sumy / sumy / models / dom / _sentence.py View on Github external
def __eq__(self, sentence):
        assert isinstance(sentence, Sentence)
        return self._is_heading is sentence._is_heading and self._text == sentence._text