How to use the sumy.summarizers._summarizer.AbstractSummarizer function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / sumy / summarizers / sum_basic.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from ._summarizer import AbstractSummarizer


class SumBasicSummarizer(AbstractSummarizer):
    """
    SumBasic: a frequency-based summarization system that adjusts word frequencies as 
    sentences are extracted.
    Source: http://www.cis.upenn.edu/~nenkova/papers/ipm.pdf

    """
    _stop_words = frozenset()

    @property
    def stop_words(self):
        return self._stop_words

    @stop_words.setter
    def stop_words(self, words):
        self._stop_words = frozenset(map(self.normalize_word, words))
github miso-belica / sumy / sumy / summarizers / edmundson.py View on Github external
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from collections import defaultdict
from ..nlp.stemmers import null_stemmer
from ._summarizer import AbstractSummarizer
from .edmundson_cue import EdmundsonCueMethod
from .edmundson_key import EdmundsonKeyMethod
from .edmundson_title import EdmundsonTitleMethod
from .edmundson_location import EdmundsonLocationMethod


_EMPTY_SET = frozenset()


class EdmundsonSummarizer(AbstractSummarizer):
    _bonus_words = _EMPTY_SET
    _stigma_words = _EMPTY_SET
    _null_words = _EMPTY_SET

    def __init__(self, stemmer=null_stemmer, cue_weight=1.0, key_weight=0.0,
            title_weight=1.0, location_weight=1.0):
        super(EdmundsonSummarizer, self).__init__(stemmer)

        self._ensure_correct_weights(cue_weight, key_weight, title_weight,
            location_weight)

        self._cue_weight = float(cue_weight)
        self._key_weight = float(key_weight)
        self._title_weight = float(title_weight)
        self._location_weight = float(location_weight)
github miso-belica / sumy / sumy / summarizers / kl.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

import math

from ._summarizer import AbstractSummarizer


class KLSummarizer(AbstractSummarizer):
    """
    Method that greedily adds sentences to a summary so long as it decreases the 
    KL Divergence.
    Source: http://www.aclweb.org/anthology/N09-1041
    """

    stop_words = frozenset()

    def __call__(self, document, sentences_count):
        ratings = self._get_ratings(document)
        return self._get_best_sentences(document.sentences, sentences_count, ratings)

    def _get_ratings(self, document):
        sentences = document.sentences

        ratings = self._compute_ratings(sentences)
github miso-belica / sumy / sumy / summarizers / random.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

import random

from ._summarizer import AbstractSummarizer


class RandomSummarizer(AbstractSummarizer):
    """Summarizer that picks sentences randomly."""

    def __call__(self, document, sentences_count):
        sentences = document.sentences
        ratings = self._get_random_ratings(sentences)

        return self._get_best_sentences(sentences, sentences_count, ratings)

    def _get_random_ratings(self, sentences):
        ratings = list(range(len(sentences)))
        random.shuffle(ratings)

        return dict((s, r) for s, r in zip(sentences, ratings))
github miso-belica / sumy / sumy / summarizers / edmundson_key.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from .._compat import Counter
from ._summarizer import AbstractSummarizer


class EdmundsonKeyMethod(AbstractSummarizer):
    def __init__(self, stemmer, bonus_words):
        super(EdmundsonKeyMethod, self).__init__(stemmer)
        self._bonus_words = bonus_words

    def __call__(self, document, sentences_count, weight):
        significant_words = self._compute_significant_words(document, weight)

        return self._get_best_sentences(document.sentences,
            sentences_count, self._rate_sentence, significant_words)

    def _compute_significant_words(self, document, weight):
        # keep only stems contained in bonus words
        words = map(self.stem_word, document.words)
        words = filter(self._is_bonus_word, words)

        # compute frequencies of bonus words in document
github miso-belica / sumy / sumy / summarizers / edmundson_title.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from operator import attrgetter
from itertools import chain
from .._compat import ffilter
from ._summarizer import AbstractSummarizer


class EdmundsonTitleMethod(AbstractSummarizer):
    def __init__(self, stemmer, null_words):
        super(EdmundsonTitleMethod, self).__init__(stemmer)
        self._null_words = null_words

    def __call__(self, document, sentences_count):
        sentences = document.sentences
        significant_words = self._compute_significant_words(document)

        return self._get_best_sentences(sentences, sentences_count,
            self._rate_sentence, significant_words)

    def _compute_significant_words(self, document):
        heading_words = map(attrgetter("words"), document.headings)

        significant_words = chain(*heading_words)
        significant_words = map(self.stem_word, significant_words)
github miso-belica / sumy / sumy / summarizers / lsa.py View on Github external
from warnings import warn

try:
    import numpy
except ImportError:
    numpy = None

try:
    from numpy.linalg import svd as singular_value_decomposition
except ImportError:
    singular_value_decomposition = None
from ._summarizer import AbstractSummarizer


class LsaSummarizer(AbstractSummarizer):
    MIN_DIMENSIONS = 3
    REDUCTION_RATIO = 1/1
    _stop_words = frozenset()

    @property
    def stop_words(self):
        return self._stop_words

    @stop_words.setter
    def stop_words(self, words):
        self._stop_words = frozenset(map(self.normalize_word, words))

    def __call__(self, document, sentences_count):
        self._ensure_dependecies_installed()

        dictionary = self._create_dictionary(document)
github miso-belica / sumy / sumy / summarizers / luhn.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from ..models import TfDocumentModel
from ._summarizer import AbstractSummarizer


class LuhnSummarizer(AbstractSummarizer):
    max_gap_size = 4
    # TODO: better recognition of significant words (automatic)
    significant_percentage = 1
    _stop_words = frozenset()

    @property
    def stop_words(self):
        return self._stop_words

    @stop_words.setter
    def stop_words(self, words):
        self._stop_words = frozenset(map(self.normalize_word, words))

    def __call__(self, document, sentences_count):
        words = self._get_significant_words(document.words)
        return self._get_best_sentences(document.sentences,
github miso-belica / sumy / sumy / summarizers / edmundson_cue.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

from ._summarizer import AbstractSummarizer


class EdmundsonCueMethod(AbstractSummarizer):
    def __init__(self, stemmer, bonus_words, stigma_words):
        super(EdmundsonCueMethod, self).__init__(stemmer)
        self._bonus_words = bonus_words
        self._stigma_words = stigma_words

    def __call__(self, document, sentences_count, bunus_word_weight, stigma_word_weight):
        return self._get_best_sentences(document.sentences,
            sentences_count, self._rate_sentence, bunus_word_weight,
            stigma_word_weight)

    def _rate_sentence(self, sentence, bunus_word_weight, stigma_word_weight):
        # count number of bonus/stigma words in sentece
        words = map(self.stem_word, sentence.words)
        bonus_words_count, stigma_words_count = self._count_words(words)

        # compute positive & negative rating
github miso-belica / sumy / sumy / summarizers / text_rank.py View on Github external
# -*- coding: utf-8 -*-

from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals

import math

try:
    import numpy
except ImportError:
    numpy = None

from ._summarizer import AbstractSummarizer


class TextRankSummarizer(AbstractSummarizer):
    """An implementation of TextRank algorithm for summarization.

    Source: https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf
    """
    epsilon = 1e-4
    damping = 0.85
    # small number to prevent zero-division error, see https://github.com/miso-belica/sumy/issues/112
    _delta = 1e-7 
    _stop_words = frozenset()

    @property
    def stop_words(self):
        return self._stop_words

    @stop_words.setter
    def stop_words(self, words):