How to use the textacy.make_spacy_doc function in textacy

To help you get started, we’ve selected a few textacy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ebursztein / sitefab / tests / test_nlp.py View on Github external
def test_stats():
    text = "the quick fox and the cat. The turtle and the rabbit."
    doc = make_spacy_doc(text, lang=SPACY_MODEL)
    stats = nlp.compute_stats(doc)
    assert stats.counts.sentences == 2
    assert stats.counts.words == 11
github chartbeat-labs / textacy / tests / ke / test_sgrank.py View on Github external
def empty_spacy_doc():
    return textacy.make_spacy_doc("", lang="en")
github ebursztein / sitefab / tests / test_nlp.py View on Github external
def test_terms():
    text = "the quick fox and the cat. The turtle and the rabbit."
    doc = make_spacy_doc(text, lang=SPACY_MODEL)
    terms = nlp.extract_key_terms(doc, num_terms=5)
    terms = [t[0] for t in terms]  # remove scores
    assert 'fox' in terms
    assert 'cat' in terms
    assert 'turtle' in terms
    assert 'rabbit' in terms
github chartbeat-labs / textacy / tests / ke / test_textrank.py View on Github external
def empty_spacy_doc():
    return textacy.make_spacy_doc("", lang="en")
github chartbeat-labs / textacy / tests / ke / test_textrank.py View on Github external
def spacy_doc():
    ds = datasets.CapitolWords()
    text = next(ds.texts(min_len=1500, limit=1))
    return textacy.make_spacy_doc(text, lang="en")
github dermatologist / nlp-qrmine / src / qrmine / nlp_qrmine.py View on Github external
def filter_content(self, titles):
        if self._content is not None:
            ct = 0
            for document in self._content.documents:
                metadata = {}
                try:
                    if any(self._content.titles[ct] in s for s in titles):
                        metadata['title'] = self._content.titles[ct]
                        # self._corpus.add_text(
                        #     textacy.preprocess_text(document, lowercase=True, no_punct=True, no_numbers=True),
                        #     metadata=metadata)
                        #doc_text = textacy.preprocess_text(document, lowercase=True, no_punct=True, no_numbers=True)
                        doc_text = preprocessing.replace.replace_numbers(preprocessing.remove.remove_punctuation(document), 'NUM').lower()

                        doc = textacy.make_spacy_doc((doc_text, metadata), lang=self._en)
                        self._corpus.add_doc(doc)

                except IndexError:
                    metadata['title'] = 'Empty'

                ct += 1
            self.load_matrix()
github dermatologist / nlp-qrmine / qrmine / qrmine.py View on Github external
def main(input_file):
    # content property returns the entire text and the documents returns the array of documents
    data = ReadData()
    data.read_file(input_file)

    q = Qrmine()
    all_interviews = Content(data.content)

    q.content = data

    ## Summary
    click.echo(" ".join(all_interviews.generate_summary(2)))
    click.echo("_________________________________________")

    doc = textacy.make_spacy_doc(all_interviews.doc)

    ## Sentiment
    s = Sentiment()
    x = []
    for sentence in doc.sents:
        if len(sentence) > 3:
            x.append(sentence.text)
            sent = s.sentiment_analyzer_scores(sentence.text)
            click.echo("{:-<40} {}\n".format(sent["sentence"], str(sent["score"])))
            click.echo("{:-<40} {}\n".format(sentence.text, str(s.similarity(sentence.text, "Dummy sentence"))))

    ## Network
    n = Network()
    click.echo(n.sents_to_network(x))
    # n.draw_graph(True)
    click.echo(n.draw_graph(False))
github dermatologist / nlp-qrmine / qrmine / qrmine.py View on Github external
if len(tags) > 0:
        ct = 0
        for title in data.titles:
            for tag in tags:
                if title == tag:
                    click.echo(tag)
                    content = data.documents[ct]
            ct += 1
        interview = Content(content)
        doc = textacy.make_spacy_doc(interview.doc)
        return q.print_categories(doc, num)

    else:
        all_interviews = Content(data.content)
        doc = textacy.make_spacy_doc(all_interviews.doc)
        return q.print_categories(doc, num)
github dermatologist / nlp-qrmine / src / qrmine / main.py View on Github external
if len(tags) > 0:
        ct = 0
        for title in data.titles:
            for tag in tags:
                if title == tag:
                    click.echo(tag)
                    content = data.documents[ct]
            ct += 1
        interview = Content(content)
        doc = textacy.make_spacy_doc(interview.doc)
        return q.print_categories(doc, num)

    else:
        all_interviews = Content(data.content)
        doc = textacy.make_spacy_doc(all_interviews.doc)
        return q.print_categories(doc, num)