Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@cached_property
def headings(self):
return tuple(s for s in self._sentences if s.is_heading)
@cached_property
def document(self):
current_paragraph = []
paragraphs = []
for line in self._text.splitlines():
line = line.strip()
if line.isupper():
heading = Sentence(line, self._tokenizer, is_heading=True)
current_paragraph.append(heading)
elif not line and current_paragraph:
sentences = self._to_sentences(current_paragraph)
paragraphs.append(Paragraph(sentences))
current_paragraph = []
elif line:
current_paragraph.append(line)
sentences = self._to_sentences(current_paragraph)
@cached_property
def document(self):
# a abbr acronym b big blink blockquote cite code
# dd del dfn dir dl dt em h h1 h2 h3 h4
# h5 h6 i ins kbd li marquee menu ol pre q
# s samp strike strong sub sup tt u ul var
headers, annotated_text = ('h1', 'h2', 'h3'), self._article.main_text
paragraphs = []
for paragraph in annotated_text:
sentences, current_text = list(), str()
for text, annotations in paragraph:
if annotations and any(h_tag in annotations for h_tag in headers): # noqa
sentences.append(Sentence(text, self._tokenizer, is_heading=True)) # noqa
# Skip <pre> Tags
elif not (annotations and 'pre' in annotations): # noqa
current_text += ' ' + text
new_sentences = self.tokenize_sentences(current_text)</pre>
@cached_property
def significant_words(self):
words = []
for paragraph in self._article.main_text:
for text, annotations in paragraph:
if not self._contains_any(annotations, *self.SIGNIFICANT_TAGS):
continue
words.extend(self.tokenize_words(text))
return tuple(words) if words else self.SIGNIFICANT_WORDS
@cached_property
def stigma_words(self):
words = []
for paragraph in self._article.main_text:
for (text, annotations) in paragraph:
if self._contains_any(annotations, 'a', 'strike', 's', 'span'):
words.extend(self.tokenize_words(text))
return tuple(words) if words else self.STIGMA_WORDS
@cached_property
def significant_words(self):
words = []
for paragraph in self._article.main_text:
for text, annotations in paragraph:
if self._contains_any(annotations, *self.SIGNIFICANT_TAGS):
words.extend(self.tokenize_words(text))
if words:
return tuple(words)
else:
return self.SIGNIFICANT_WORDS
@cached_property
def words(self):
return self._tokenizer.to_words(self._text)
@cached_property
def words(self):
words = (p.words for p in self._paragraphs)
return tuple(chain(*words))
@cached_property
def headings(self):
headings = (p.headings for p in self._paragraphs)
return tuple(chain(*headings))