How to use the textblob.nltk.corpus.reader.wordnet.WordNetError function in textblob

To help you get started, we’ve selected a few textblob examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
def lemma_from_key(self, key):
        # Keys are case sensitive and always lower-case
        key = key.lower()

        lemma_name, lex_sense = key.split('%')
        pos_number, lexname_index, lex_id, _, _ = lex_sense.split(':')
        pos = self._pos_names[int(pos_number)]

        # open the key -> synset file if necessary
        if self._key_synset_file is None:
            self._key_synset_file = self.open('index.sense')

        # Find the synset for the lemma.
        synset_line = _binary_search_file(self._key_synset_file, key)
        if not synset_line:
            raise WordNetError("No synset found for key %r" % key)
        offset = int(synset_line.split()[1])
        synset = self._synset_from_pos_and_offset(pos, offset)

        # return the corresponding lemma
        for lemma in synset.lemmas:
            if lemma.key == key:
                return lemma
        raise WordNetError("No lemma found for for key %r" % key)
github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
def synset(self, name):
        # split name into lemma, part of speech and synset number
        lemma, pos, synset_index_str = name.lower().rsplit('.', 2)
        synset_index = int(synset_index_str) - 1

        # get the offset for this synset
        try:
            offset = self._lemma_pos_offset_map[lemma][pos][synset_index]
        except KeyError:
            message = 'no lemma %r with part of speech %r'
            raise WordNetError(message % (lemma, pos))
        except IndexError:
            n_senses = len(self._lemma_pos_offset_map[lemma][pos])
            message = "lemma %r with part of speech %r has only %i %s"
            if n_senses == 1:
                tup = lemma, pos, n_senses, "sense"
            else:
                tup = lemma, pos, n_senses, "senses"
            raise WordNetError(message % tup)

        # load synset information from the appropriate file
        synset = self._synset_from_pos_and_offset(pos, offset)

        # some basic sanity checks on loaded attributes
        if pos == 's' and synset.pos == 'a':
            message = ('adjective satellite requested but only plain '
                       'adjective found for lemma %r')
github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
synset_index = int(synset_index_str) - 1

        # get the offset for this synset
        try:
            offset = self._lemma_pos_offset_map[lemma][pos][synset_index]
        except KeyError:
            message = 'no lemma %r with part of speech %r'
            raise WordNetError(message % (lemma, pos))
        except IndexError:
            n_senses = len(self._lemma_pos_offset_map[lemma][pos])
            message = "lemma %r with part of speech %r has only %i %s"
            if n_senses == 1:
                tup = lemma, pos, n_senses, "sense"
            else:
                tup = lemma, pos, n_senses, "senses"
            raise WordNetError(message % tup)

        # load synset information from the appropriate file
        synset = self._synset_from_pos_and_offset(pos, offset)

        # some basic sanity checks on loaded attributes
        if pos == 's' and synset.pos == 'a':
            message = ('adjective satellite requested but only plain '
                       'adjective found for lemma %r')
            raise WordNetError(message % lemma)
        assert synset.pos == pos or (pos == 'a' and synset.pos == 's')

        # Return the synset object.
        return synset
github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
if lemma_number == 0:
                        synset.frame_ids.append(frame_number)
                        for lemma in synset.lemmas:
                            lemma.frame_ids.append(frame_number)
                            lemma.frame_strings.append(frame_string_fmt %
                                                       lemma.name)
                    # only a specific word in the synset
                    else:
                        lemma = synset.lemmas[lemma_number - 1]
                        lemma.frame_ids.append(frame_number)
                        lemma.frame_strings.append(frame_string_fmt %
                                                   lemma.name)

        # raise a more informative error with line text
        except ValueError as e:
            raise WordNetError('line %r: %s' % (data_file_line, e))

        # set sense keys for Lemma objects - note that this has to be
        # done afterwards so that the relations are available
        for lemma in synset.lemmas:
            if synset.pos == ADJ_SAT:
                head_lemma = synset.similar_tos()[0].lemmas[0]
                head_name = head_lemma.name
                head_id = '%02d' % head_lemma._lex_id
            else:
                head_name = head_id = ''
            tup = (lemma.name, WordNetCorpusReader._pos_numbers[synset.pos],
                   lemma._lexname_index, lemma._lex_id, head_name, head_id)
            lemma.key = ('%s%%%d:%02d:%02d:%s:%s' % tup).lower()

        # the canonical name is based on the first lemma
        lemma_name = synset.lemmas[0].name.lower()
github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
def information_content(synset, ic):
    try:
        icpos = ic[synset.pos]
    except KeyError:
        msg = 'Information content file has no entries for part-of-speech: %s'
        raise WordNetError(msg % synset.pos)

    counts = icpos[synset.offset]
    if counts == 0:
        return _INF
    else:
        return -math.log(counts / icpos[0])
github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
the highest information content value.  If two nodes have no
    explicit common subsumer, assume that they share an artificial
    root node that is the hypernym of all explicit roots.

    :type synset1: Synset
    :param synset1: First input synset.
    :type synset2: Synset
    :param synset2: Second input synset.  Must be the same part of
    speech as the first synset.
    :type  ic: dict
    :param ic: an information content object (as returned by ``load_ic()``).
    :return: The information content of the two synsets and their most
    informative subsumer
    """
    if synset1.pos != synset2.pos:
        raise WordNetError('Computing the least common subsumer requires ' + \
                           '%s and %s to have the same part of speech.' % \
                               (synset1, synset2))

    ic1 = information_content(synset1, ic)
    ic2 = information_content(synset2, ic)
    subsumers = synset1.common_hypernyms(synset2)
    if len(subsumers) == 0:
        subsumer_ic = 0
    else:
        subsumer_ic = max(information_content(s, ic) for s in subsumers)

    if verbose:
        print("> LCS Subsumer by content:", subsumer_ic)

    return ic1, ic2, subsumer_ic
github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
_ = [_next_token() for _ in xrange(n_pointers)]

                    # same as number of synsets
                    n_senses = int(_next_token())
                    assert n_synsets == n_senses

                    # get number of senses ranked according to frequency
                    _ = int(_next_token())

                    # get synset offsets
                    synset_offsets = [int(_next_token()) for _ in xrange(n_synsets)]

                # raise more informative error with file name and line number
                except (AssertionError, ValueError) as e:
                    tup = ('index.%s' % suffix), (i + 1), e
                    raise WordNetError('file %s, line %i: %s' % tup)

                # map lemmas and parts of speech to synsets
                self._lemma_pos_offset_map[lemma][pos] = synset_offsets
                if pos == ADJ:
                    self._lemma_pos_offset_map[lemma][ADJ_SAT] = synset_offsets
github sloria / TextBlob / textcorpus / reader / wordnet.py View on Github external
def lemma(self, name):
        # e.g.: '.45_caliber.a.01..45_caliber'
        separator = SENSENUM_RE.search(name).start()
        synset_name, lemma_name = name[:separator+3], name[separator+4:]
        synset = self.synset(synset_name)
        for lemma in synset.lemmas:
            if lemma.name == lemma_name:
                return lemma
        raise WordNetError('no lemma %r in %r' % (lemma_name, synset_name))