Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_empty_sentences_equal():
sentence1 = Sentence("", Tokenizer("czech"))
sentence2 = Sentence("", Tokenizer("czech"))
assert sentence1 == sentence2
def build_sentence(sentence_as_string, is_heading=False):
return Sentence(sentence_as_string, _TOKENIZER, is_heading)
def test_same_sentences_equal():
sentence1 = Sentence("word another.", Tokenizer("czech"))
sentence2 = Sentence("word another.", Tokenizer("czech"))
assert sentence1 == sentence2
def test_same_sentences_equal():
sentence1 = Sentence("word another.", Tokenizer("czech"))
sentence2 = Sentence("word another.", Tokenizer("czech"))
assert sentence1 == sentence2
def _to_sentence(self, text):
assert text.strip()
return Sentence(text, self._tokenizer)
annotated_text = self._article.main_text
paragraphs = []
for paragraph in annotated_text:
sentences = []
current_text = ""
for text, annotations in paragraph:
if annotations and ("h1" in annotations or "h2" in annotations or "h3" in annotations):
sentences.append(Sentence(text, self._tokenizer, is_heading=True))
# skip <pre> nodes
elif not (annotations and "pre" in annotations):
current_text += " " + text
new_sentences = self.tokenize_sentences(current_text)
sentences.extend(Sentence(s, self._tokenizer) for s in new_sentences)
paragraphs.append(Paragraph(sentences))
return ObjectDocumentModel(paragraphs)
</pre>
def document(self):
current_paragraph = []
paragraphs = []
for line in self._text.splitlines():
line = line.strip()
if line.isupper():
heading = Sentence(line, self._tokenizer, is_heading=True)
current_paragraph.append(heading)
elif not line and current_paragraph:
sentences = self._to_sentences(current_paragraph)
paragraphs.append(Paragraph(sentences))
current_paragraph = []
elif line:
current_paragraph.append(line)
sentences = self._to_sentences(current_paragraph)
paragraphs.append(Paragraph(sentences))
return ObjectDocumentModel(paragraphs)