How to use the sumy.models.dom.Sentence function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_models / test_dom.py View on Github external
def test_empty_sentences_equal():
    sentence1 = Sentence("", Tokenizer("czech"))
    sentence2 = Sentence("", Tokenizer("czech"))

    assert sentence1 == sentence2
github miso-belica / sumy / tests / utils.py View on Github external
def build_sentence(sentence_as_string, is_heading=False):
    return Sentence(sentence_as_string, _TOKENIZER, is_heading)
github miso-belica / sumy / tests / test_models / test_dom.py View on Github external
def test_same_sentences_equal():
    sentence1 = Sentence("word another.", Tokenizer("czech"))
    sentence2 = Sentence("word another.", Tokenizer("czech"))

    assert sentence1 == sentence2
github miso-belica / sumy / tests / test_models / test_dom.py View on Github external
def test_same_sentences_equal():
    sentence1 = Sentence("word another.", Tokenizer("czech"))
    sentence2 = Sentence("word another.", Tokenizer("czech"))

    assert sentence1 == sentence2
github miso-belica / sumy / sumy / parsers / plaintext.py View on Github external
def _to_sentence(self, text):
        assert text.strip()
        return Sentence(text, self._tokenizer)
github miso-belica / sumy / sumy / parsers / html.py View on Github external
annotated_text = self._article.main_text

        paragraphs = []
        for paragraph in annotated_text:
            sentences = []

            current_text = ""
            for text, annotations in paragraph:
                if annotations and ("h1" in annotations or "h2" in annotations or "h3" in annotations):
                    sentences.append(Sentence(text, self._tokenizer, is_heading=True))
                # skip <pre> nodes
                elif not (annotations and "pre" in annotations):
                    current_text += " " + text

            new_sentences = self.tokenize_sentences(current_text)
            sentences.extend(Sentence(s, self._tokenizer) for s in new_sentences)
            paragraphs.append(Paragraph(sentences))

        return ObjectDocumentModel(paragraphs)
</pre>
github miso-belica / sumy / sumy / parsers / plaintext.py View on Github external
def document(self):
        current_paragraph = []
        paragraphs = []
        for line in self._text.splitlines():
            line = line.strip()
            if line.isupper():
                heading = Sentence(line, self._tokenizer, is_heading=True)
                current_paragraph.append(heading)
            elif not line and current_paragraph:
                sentences = self._to_sentences(current_paragraph)
                paragraphs.append(Paragraph(sentences))
                current_paragraph = []
            elif line:
                current_paragraph.append(line)

        sentences = self._to_sentences(current_paragraph)
        paragraphs.append(Paragraph(sentences))

        return ObjectDocumentModel(paragraphs)