How to use the sumy.utils.ItemsCount function in sumy

To help you get started, we’ve selected a few sumy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github miso-belica / sumy / tests / test_utils / test_utils.py View on Github external
returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1]

    count = ItemsCount("100%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    count = ItemsCount("50%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4]

    count = ItemsCount("30%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]

    count = ItemsCount("35%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]
github miso-belica / sumy / tests / test_utils / test_utils.py View on Github external
def test_unsupported_items_count():
    count = ItemsCount("Hacker")

    with pytest.raises(ValueError):
        count([])
github miso-belica / sumy / tests / test_utils / test_utils.py View on Github external
def test_float_items_count():
    count = ItemsCount(3.5)
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]

    count = ItemsCount(True)
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0]

    count = ItemsCount(False)
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == []
github miso-belica / sumy / tests / test_utils / test_utils.py View on Github external
def test_percentage_items_count():
    count = ItemsCount("20%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1]

    count = ItemsCount("100%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    count = ItemsCount("50%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4]

    count = ItemsCount("30%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]

    count = ItemsCount("35%")
github miso-belica / sumy / tests / test_utils / test_utils.py View on Github external
def test_percentage_items_count():
    count = ItemsCount("20%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1]

    count = ItemsCount("100%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    count = ItemsCount("50%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4]

    count = ItemsCount("30%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]

    count = ItemsCount("35%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]
github miso-belica / sumy / tests / test_utils / test_utils.py View on Github external
def test_percentage_items_count():
    count = ItemsCount("20%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1]

    count = ItemsCount("100%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

    count = ItemsCount("50%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2, 3, 4]

    count = ItemsCount("30%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]

    count = ItemsCount("35%")
    returned = count([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
    assert returned == [0, 1, 2]
github miso-belica / sumy / sumy / __main__.py View on Github external
if args["--url"] is not None:
        parser = PARSERS[document_format or "html"]
        document_content = fetch_url(args["--url"])
    elif args["--file"] is not None:
        parser = PARSERS[document_format or "plaintext"]
        with open(args["--file"], "rb") as file:
            document_content = file.read()
    elif args["--text"] is not None:
        parser = PARSERS[document_format or "plaintext"]
        document_content = args["--text"]
    else:
        parser = PARSERS[document_format or "plaintext"]
        document_content = default_input_stream.read()

    items_count = ItemsCount(args["--length"])

    language = args["--language"]
    if args['--stopwords']:
        stop_words = read_stop_words(args['--stopwords'])
    else:
        stop_words = get_stop_words(language)

    parser = parser(document_content, Tokenizer(language))
    stemmer = Stemmer(language)

    summarizer_class = next(cls for name, cls in AVAILABLE_METHODS.items() if args[name])
    summarizer = build_summarizer(summarizer_class, stop_words, stemmer, parser)

    return summarizer, parser, items_count
github miso-belica / sumy / sumy / summarizers / _summarizer.py View on Github external
def _get_best_sentences(sentences, count, rating, *args, **kwargs):
        rate = rating
        if isinstance(rating, dict):
            assert not args and not kwargs
            rate = lambda s: rating[s]

        infos = (SentenceInfo(s, o, rate(s, *args, **kwargs))
            for o, s in enumerate(sentences))

        # sort sentences by rating in descending order
        infos = sorted(infos, key=attrgetter("rating"), reverse=True)
        # get `count` first best rated sentences
        if not isinstance(count, ItemsCount):
            count = ItemsCount(count)
        infos = count(infos)
        # sort sentences by their order in document
        infos = sorted(infos, key=attrgetter("order"))

        return tuple(i.sentence for i in infos)
github miso-belica / sumy / sumy / evaluation / __main__.py View on Github external
document_content = fetch_url(args["--url"])
    elif args["--file"] is not None:
        parser = PARSERS.get(document_format, PlaintextParser)
        with open(args["--file"], "rb") as file:
            document_content = file.read()
    else:
        parser = PARSERS["plaintext"]
        document_content = sys.stdin.read()

    summarizer_builder = AVAILABLE_METHODS["luhn"]
    for method, builder in AVAILABLE_METHODS.items():
        if args[method]:
            summarizer_builder = builder
            break

    items_count = ItemsCount(args["--length"])

    parser = parser(document_content, Tokenizer(args["--language"]))

    with open(args[""], "rb") as file:
        reference_summmary = file.read().decode("utf-8")

    return summarizer_builder(parser, args["--language"]), parser.document, items_count, reference_summmary