How to use the sumy.parsers.plaintext.PlaintextParser function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_parsers.py View on Github external
def test_parse_plaintext(self):
        parser = PlaintextParser.from_string("""
            Ako sa máš? Ja dobre! A ty? No
            mohlo to byť aj lepšie!!! Ale pohodička.


            TOTO JE AKOŽE NADPIS
            A toto je text pod ním, ktorý je textový.
            A tak ďalej...
        """, Tokenizer("czech"))

        document = parser.document

        self.assertEqual(len(document.paragraphs), 2)

        self.assertEqual(len(document.paragraphs[0].headings), 0)
        self.assertEqual(len(document.paragraphs[0].sentences), 5)
github sidhusmart / WACAO / build / lib / webwhatsapi / __init__.py View on Github external
inputLine = ''
        for message in messages:
            if '\\/' not in message:
                inputLine = inputLine + message['message'] + '. '
        # blob = TextBlob(inputLine)
        # wordCounts = blob.word_counts
        # sortedWordCounts = sorted(wordCounts, key=wordCounts.get, reverse=True)
        # outputLine = " ".join(sortedWordCounts[:5])
        # outputLine = groupName.capitalize() + " summarized as " + outputLine
        # self.send_to_whatsapp_id("WACAO!",outputLine)

        LANGUAGE = "english"
        SENTENCES_COUNT = '20%'

        outputLine = groupName.capitalize() + " summarized as: \n"
        parser = PlaintextParser.from_string(inputLine, Tokenizer(LANGUAGE))
        stemmer = Stemmer(LANGUAGE)
        summarizer = LsaSummarizer(stemmer)
        summarizer = Summarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)
        for sentence in summarizer(parser.document, SENTENCES_COUNT):
            outputLine = outputLine + unicode(str(sentence), "utf-8") + "\n"
        self.send_to_whatsapp_id("WACAO!",outputLine)
        # print "sum_basic:"
github miso-belica / sumy / sumy / evaluation / __main__.py View on Github external
def main(args=None):
    args = docopt(to_string(__doc__), args, version=__version__)
    summarizer, document, items_count, reference_summary = handle_arguments(args)

    evaluated_sentences = summarizer(document, items_count)
    reference_document = PlaintextParser.from_string(reference_summary,
        Tokenizer(args["--language"]))
    reference_sentences = reference_document.document.sentences

    for name, evaluate_document, evaluate in AVAILABLE_EVALUATIONS:
        if evaluate_document:
            result = evaluate(evaluated_sentences, document.sentences)
        else:
            result = evaluate(evaluated_sentences, reference_sentences)
        print("%s: %f" % (name, result))

    return 0
github dataiku / dataiku-contrib / text-summarization / custom-recipes / text-summarization-compute / recipe.py View on Github external
def summarize(text):
    if isvalid(text): 
        all_capital = False
        # to avoid that all capital letter sentence gives empty output: we lower all and the upper all later on
        if text.upper() == text:
            text = text.lower()
            all_capital = True
        
        if (sys.version_info > (3,0)):
            parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
        else:
            parser = PlaintextParser.from_string(text.decode('ascii', errors='ignore'), Tokenizer(LANGUAGE))

        stemmer = Stemmer(LANGUAGE)

        summarizer = Summarizer(stemmer)
        summarizer.stop_words = get_stop_words(LANGUAGE)

        sentences = [str(s) for s in summarizer(
            parser.document, sentences_count=n_sentences)]
        
        if all_capital:
            output_sentences = ' '.join(sentences).upper()
            all_capital = False
        else:
            output_sentences = ' '.join(sentences)
github miso-belica / sumy / sumy / __main__.py View on Github external
from ._compat import to_string, to_unicode, to_bytes, PY3
from .nlp.tokenizers import Tokenizer
from .parsers.html import HtmlParser
from .parsers.plaintext import PlaintextParser
from .summarizers.luhn import LuhnSummarizer
from .summarizers.edmundson import EdmundsonSummarizer
from .summarizers.lsa import LsaSummarizer
from .summarizers.text_rank import TextRankSummarizer
from .summarizers.lex_rank import LexRankSummarizer
from .summarizers.sum_basic import SumBasicSummarizer
from .summarizers.kl import KLSummarizer
from .nlp.stemmers import Stemmer

PARSERS = {
    "html": HtmlParser,
    "plaintext": PlaintextParser,
}

AVAILABLE_METHODS = {
    "luhn": LuhnSummarizer,
    "edmundson": EdmundsonSummarizer,
    "lsa": LsaSummarizer,
    "text-rank": TextRankSummarizer,
    "lex-rank": LexRankSummarizer,
    "sum-basic": SumBasicSummarizer,
    "kl": KLSummarizer,
}


def main(args=None):
    args = docopt(to_string(__doc__), args, version=__version__)
    summarizer, parser, items_count = handle_arguments(args)
github miso-belica / sumy / sumy / parsers / plaintext.py View on Github external
def __init__(self, text, tokenizer):
        super(PlaintextParser, self).__init__(tokenizer)
        self._text = to_unicode(text).strip()
github SalmaanP / samacharbot2 / altsummary.py View on Github external
def summary(text):

    stemmer = Stemmer(LANGUAGE)
    parser = PlaintextParser(text, Tokenizer(LANGUAGE))
    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    short = ""
    for sentence in summarizer(parser.document, SENTENCES_COUNT):
        short = short + ">" + "* " + str(sentence).decode('ascii','ignore') + "\n\n"
        #print(sentence)
    return short
github adityasarvaiya / Automatic_Question_Generation / aqg / utils / summarizer.py View on Github external
def summarize_from_file(self,file_name):

        parser = PlaintextParser.from_file(file_name, Tokenizer(self.LANGUAGE))
        stemmer = Stemmer(self.LANGUAGE)
        summarizer = Summarizer(stemmer)
        file_1 = open("summarizer_output.txt","w+")
        file_2 = open("summarizer_output2.txt","w+")
        for sentence in summarizer(parser.document, self.SENTENCES_COUNT):
            file_2.write(str(sentence))
            file_1.write(str(sentence))
            file_1.write("\n")
        file_1.close()
        file_2.close()
github jjangsangy / ExplainToMe / ExplainToMe / __main__.py View on Github external
def main(url, max_sent, language='english'):
    tokenizer = Tokenizer(language)
    article = alt_extract(url)
    parser = PlaintextParser.from_string(article, tokenizer)
    return click.echo(get_summarizer(parser, max_sent, language))
github smileboywtu / MillionHeroAssistant / core / textsummary.py View on Github external
def get_summary(long_text, sentences=SENTENCES_COUNT):
    parser = PlaintextParser.from_string(chinese_normalnize(long_text), Tokenizer(LANGUAGE))
    stemmer = Stemmer(LANGUAGE)
    summarizer = Summarizer(stemmer)
    summarizer.stop_words = get_stop_words(LANGUAGE)
    return [str(sentence) for sentence in summarizer(parser.document, sentences)]