Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_real_example():
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("snippets/prevko.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 2)
assert len(sentences) == 2
def test_issue_5_svd_converges():
"""Source: https://github.com/miso-belica/sumy/issues/5"""
pytest.skip("Can't reproduce the issue.")
parser = PlaintextParser.from_string(
load_resource("articles/svd_converges.txt"),
Tokenizer("english")
)
summarizer = LsaSummarizer(Stemmer("english"))
summarizer.stop_words = get_stop_words("english")
sentences = summarizer(parser.document, 20)
assert len(sentences) == 20
def test_article_example():
"""Source: http://www.prevko.cz/dite/skutecne-pribehy-deti"""
parser = PlaintextParser.from_string(
load_resource("articles/prevko_cz_1.txt"),
Tokenizer("czech")
)
summarizer = LsaSummarizer(Stemmer("czech"))
summarizer.stop_words = get_stop_words("czech")
sentences = summarizer(parser.document, 20)
assert len(sentences) == 20
def build_lsa(parser, language):
summarizer = LsaSummarizer(Stemmer(language))
summarizer.stop_words = get_stop_words(language)
return summarizer
if '\\/' not in message:
inputLine = inputLine + message['message'] + '. '
# blob = TextBlob(inputLine)
# wordCounts = blob.word_counts
# sortedWordCounts = sorted(wordCounts, key=wordCounts.get, reverse=True)
# outputLine = " ".join(sortedWordCounts[:5])
# outputLine = groupName.capitalize() + " summarized as " + outputLine
# self.send_to_whatsapp_id("WACAO!",outputLine)
LANGUAGE = "english"
SENTENCES_COUNT = '20%'
outputLine = groupName.capitalize() + " summarized as: \n"
parser = PlaintextParser.from_string(inputLine, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
outputLine = outputLine + unicode(str(sentence), "utf-8") + "\n"
self.send_to_whatsapp_id("WACAO!",outputLine)
# print "sum_basic:"
def get_summary(long_text, sentences=SENTENCES_COUNT):
parser = PlaintextParser.from_string(chinese_normalnize(long_text), Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
return [str(sentence) for sentence in summarizer(parser.document, sentences)]
def summarize(srt_file, n_sentences, language="english"):
""" Generate segmented summary
Args:
srt_file(str) : The name of the SRT FILE
n_sentences(int): No of sentences
language(str) : Language of subtitles (default to English)
Returns:
list: segment of subtitles
"""
parser = PlaintextParser.from_string(
srt_to_txt(srt_file), Tokenizer(language))
stemmer = Stemmer(language)
summarizer = LsaSummarizer(stemmer)
summarizer.stop_words = get_stop_words(language)
segment = []
for sentence in summarizer(parser.document, n_sentences):
index = int(re.findall("\(([0-9]+)\)", str(sentence))[0])
item = srt_file[index]
segment.append(srt_segment_to_range(item))
return segment
stemmer = Stemmer(LANGUAGE)
print("\n====== Luhn ======")
summarizerLuhn = LuhnSummarizer(stemmer)
summarizerLuhn.stop_words = get_stop_words(LANGUAGE)
for sentenceLuhn in summarizerLuhn(parser.document, SENTENCES_COUNT):
print(sentenceLuhn, "\n")
print("====== TextRank ======")
summarizerTR = TextRankSummarizer(stemmer)
summarizerTR.stop_words = get_stop_words(LANGUAGE)
for sentenceTR in summarizerTR(parser.document, SENTENCES_COUNT):
print(sentenceTR, "\n")
print("====== LSA ======")
summarizerLSA = LsaSummarizer(stemmer)
summarizerLSA.stop_words = get_stop_words(LANGUAGE)
for sentenceLSA in summarizerLSA(parser.document, SENTENCES_COUNT):
print(sentenceLSA, "\n")
print("====== Edmonson ======")
summarizerEd = EdmundsonSummarizer(stemmer)
summarizerEd.bonus_words = ('focus', 'proposed', 'method', 'describes')
summarizerEd.stigma_words = ('example')
summarizerEd.null_words = ('literature', 'however')
for sentenceEd in summarizerEd(parser.document, SENTENCES_COUNT):
print(sentenceEd, "\n")
def get_summary(self,num_sentence):
out = ''
try:
try:
parser = HtmlParser.from_url(self.url, Tokenizer("english"))
except:
try:
parser = PlaintextParser.from_string(self.body, Tokenizer("english"))
except Exception as e:
raise(e)
stemmer = Stemmer('english')
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words('english')
for sentence in summarizer(parser.document, num_sentence):
out+=str(sentence)
except:
return self.body
return out
if '\\/' not in message:
inputLine = inputLine + message['message'] + '. '
# blob = TextBlob(inputLine)
# wordCounts = blob.word_counts
# sortedWordCounts = sorted(wordCounts, key=wordCounts.get, reverse=True)
# outputLine = " ".join(sortedWordCounts[:5])
# outputLine = groupName.capitalize() + " summarized as " + outputLine
# self.send_to_whatsapp_id("WACAO!",outputLine)
LANGUAGE = "english"
SENTENCES_COUNT = '20%'
outputLine = groupName.capitalize() + " summarized as: \n"
parser = PlaintextParser.from_string(inputLine, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = LsaSummarizer(stemmer)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
outputLine = outputLine + unicode(str(sentence), "utf-8") + "\n"
self.send_to_whatsapp_id("WACAO!",outputLine)
# print "sum_basic:"