How to use the nlp.nlp_utils.lemmatize_sentence function in nlp

To help you get started, we’ve selected a few nlp examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github melqkiades / yelp / source / python / topicmodeling / context / topic_latex_generator.py View on Github external
def build_text_automatic(self, record):
        text = record[Constants.TEXT_FIELD]
        sentences = nlp_utils.get_sentences(text)
        lemmatized_words = []
        for sentence in sentences:
            lemmatized_words.append(nlp_utils.lemmatize_sentence(
                sentence, nltk.re.compile(''),
                min_length=1, max_length=100))

        doc_parts = []
        itemize = Itemize()

        for sentence in lemmatized_words:
            new_words = []
            itemize.add_item('')
            for tagged_word in sentence:
                tag = tagged_word[1]
                word = tagged_word[0]
                singular = pattern.text.en.singularize(word)
                word_found = False

                # if tag == 'VBD':
github melqkiades / yelp / source / python / etl / reviews_preprocessor.py View on Github external
def lemmatize_sentences(records):
        print('%s: lemmatize sentences' % time.strftime("%Y/%m/%d-%H:%M:%S"))

        sentence_records = []
        record_index = 0
        document_level = Constants.DOCUMENT_LEVEL
        for record in records:
            sentences = \
                nlp_utils.get_sentences(record[Constants.TEXT_FIELD])
            sentence_index = 0
            for sentence in sentences:
                if isinstance(document_level, (int, float)) and\
                        sentence_index >= document_level:
                    break
                tagged_words = nlp_utils.lemmatize_sentence(sentence)
                sentence_record = {}
                sentence_record.update(record)
                sentence_record[Constants.TEXT_FIELD] = sentence
                sentence_record['sentence_index'] = sentence_index
                sentence_record[Constants.POS_TAGS_FIELD] = tagged_words
                sentence_records.append(sentence_record)
                sentence_index += 1
                # print(sentence_record)
            record_index += 1
            # print('\rrecord index: %d/%d' % (record_index, len(records))),
        return sentence_records
github melqkiades / yelp / source / python / topicmodeling / context / topic_latex_generator.py View on Github external
def build_text_manual(self, record):
        text = record[Constants.TEXT_FIELD]
        sentences = nlp_utils.get_sentences(text)
        lemmatized_words = []
        for sentence in sentences:
            lemmatized_words.append(nlp_utils.lemmatize_sentence(
                sentence, nltk.re.compile(''),
                min_length=1, max_length=100))

        doc_parts = []
        itemize = Itemize()

        for sentence in lemmatized_words:
            new_words = []
            itemize.add_item('')
            for tagged_word in sentence:
                tag = tagged_word[1]
                word = tagged_word[0]
                singular = pattern.text.en.singularize(word)
                word_found = False

                if tag == 'VBD':