Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test_empty_document():
document = build_document()
summarizer = ReductionSummarizer(Stemmer("english"))
returned = summarizer(document, 10)
assert len(returned) == 0
@author: megan squire
"""
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.luhn import LuhnSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.edmundson import EdmundsonSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words
LANGUAGE = "english"
SENTENCES_COUNT = 4
parser = PlaintextParser.from_file("sampleText.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
print("\n====== Luhn ======")
summarizerLuhn = LuhnSummarizer(stemmer)
summarizerLuhn.stop_words = get_stop_words(LANGUAGE)
for sentenceLuhn in summarizerLuhn(parser.document, SENTENCES_COUNT):
print(sentenceLuhn, "\n")
print("====== TextRank ======")
summarizerTR = TextRankSummarizer(stemmer)
summarizerTR.stop_words = get_stop_words(LANGUAGE)
for sentenceTR in summarizerTR(parser.document, SENTENCES_COUNT):
print(sentenceTR, "\n")
print("====== LSA ======")
summarizerLSA = LsaSummarizer(stemmer)
summarizerLSA.stop_words = get_stop_words(LANGUAGE)
def run_summarizer(parser, sentences, language='english'):
"""
:params parser: Parser for selected document type
:params sentences: Maximum sentences for summarizer.
:returns summary: Summarized page.
"""
summarizer = Summarizer(Stemmer(language))
summarizer.stop_words = get_stop_words(language)
return [str(sentence)
for sentence in summarizer(parser.document, sentences)]
except Exception as e:
results.add_error({'url':url,'lib':last_lib,'message':str(e)})
#detect lang of the text
try:
lang_detect=detect(text)
except Exception as e:
results.add_error({'url':url,'lib':last_lib,'message':str(e)})
lang_detect=""
#generate summary
sumy_summary=""
sum_title=""
if lang_detect!="":
parser = PlaintextParser.from_string(text, Tokenizer(self.LANGUAGES[lang_detect]))
stemmer = Stemmer(self.LANGUAGES[lang_detect])
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(self.LANGUAGES[lang_detect])
#build title from summary
try:
for sentence in summarizer(parser.document, 1):
sum_title+=sentence.__unicode__()
# build summary
for sentence in summarizer(parser.document, self.SENTENCES_COUNT):
sumy_summary+=sentence.__unicode__()+u"\n"
except:
sumy_summary=""
doc={"link":url,"content":[{"base":url,"language":lang_detect}]}
parser = PARSERS[document_format or "plaintext"]
document_content = args["--text"]
else:
parser = PARSERS[document_format or "plaintext"]
document_content = default_input_stream.read()
items_count = ItemsCount(args["--length"])
language = args["--language"]
if args['--stopwords']:
stop_words = read_stop_words(args['--stopwords'])
else:
stop_words = get_stop_words(language)
parser = parser(document_content, Tokenizer(language))
stemmer = Stemmer(language)
summarizer_class = next(cls for name, cls in AVAILABLE_METHODS.items() if args[name])
summarizer = build_summarizer(summarizer_class, stop_words, stemmer, parser)
return summarizer, parser, items_count
summarizer.stop_words = get_stop_words(LANGUAGE)
# now summarize: output as [txtfile]_summary.txt
g=open(textfile[0:-4]+'_summary.txt','w')
for sentence in summarizer(parser.document, SENTENCES_COUNT):
print(sentence)
g.write(str(sentence))
g.close()
os.system('open %s'%(textfile[0:-4]+'_summary.txt'))
elif ftype in ['w']:
# for URLS
url=input('what link would you like to summarize on Wikipedia? \n')
parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
# for plaintext
#parser = PlaintextParser.from_file("poetry.txt", Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
# now summarize: output as [txtfile]_summary.txt
g=open('web_summary.txt','w')
for sentence in summarizer(parser.document, SENTENCES_COUNT):
print(sentence)
g.write(str(sentence))
g.close()
os.system('open web_summary.txt')
def main():
filename = sys.argv[1]
# or for plain text files
fp = open(filename)
content = fp.read()
fp.close()
fp2 = open(filename, "w")
result = content.decode("ascii", "replace").replace(u"\ufffd", " ")
fp2.write(result)
fp2.close()
parser = PlaintextParser.from_file(filename, Tokenizer(LANGUAGE))
stemmer = Stemmer(LANGUAGE)
summarizer = Summarizer(stemmer)
summarizer.stop_words = get_stop_words(LANGUAGE)
for sentence in summarizer(parser.document, SENTENCES_COUNT):
print(sentence)