How to use the gensim.matutils.unitvec function in gensim

To help you get started, we’ve selected a few gensim examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github davidsbatista / BREDS / BREDS / test-sentence-generation.py View on Github external
def similarity_3_contexts(p, t):
        (bef, bet, aft) = (0, 0, 0)

        if t.bef_vector is not None and p.bef_vector is not None:
            bef = dot(matutils.unitvec(t.bef_vector), matutils.unitvec(p.bef_vector))

        if t.bet_vector is not None and p.bet_vector is not None:
            bet = dot(matutils.unitvec(t.bet_vector), matutils.unitvec(p.bet_vector))

        if t.aft_vector is not None and p.aft_vector is not None:
            aft = dot(matutils.unitvec(t.aft_vector), matutils.unitvec(p.aft_vector))

        return 0*bef + 1*bet + 0*aft
github davidsbatista / BREDS / BREDS-new-sentence.py View on Github external
def similarity_3_contexts(self, p, t):
        (bef, bet, aft) = (0, 0, 0)

        if t.bef_vector is not None and p.bef_vector is not None:
            bef = dot(matutils.unitvec(t.bef_vector), matutils.unitvec(p.bef_vector))

        if t.bet_vector is not None and p.bet_vector is not None:
            bet = dot(matutils.unitvec(t.bet_vector), matutils.unitvec(p.bet_vector))

        if t.aft_vector is not None and p.aft_vector is not None:
            aft = dot(matutils.unitvec(t.aft_vector), matutils.unitvec(p.aft_vector))

        return self.config.alpha*bef + self.config.beta*bet + self.config.gamma*aft
github davidsbatista / BREDS / breds-parallel.py View on Github external
def similarity_3_contexts(self, t, p):
        (bef, bet, aft) = (0, 0, 0)

        if t.bef_vector is not None and p.bef_vector is not None:
            bef = dot(
                matutils.unitvec(t.bef_vector), matutils.unitvec(p.bef_vector)
            )

        if t.bet_vector is not None and p.bet_vector is not None:
            bet = dot(
                matutils.unitvec(t.bet_vector), matutils.unitvec(p.bet_vector)
            )

        if t.aft_vector is not None and p.aft_vector is not None:
            aft = dot(
                matutils.unitvec(t.aft_vector), matutils.unitvec(p.aft_vector)
            )

        return self.config.alpha*bef + \
               self.config.beta*bet + \
               self.config.gamma*aft
github piskvorky / sim-shootout / prepare_shootout.py View on Github external
else:
        tfidf = gensim.models.TfidfModel(corpus)
        tfidf.save(tfidf_file)

    # build/load LSI model, on top of the TF-IDF model
    lsi_file = outfile('lsi.model')
    if os.path.exists(lsi_file):
        lsi = gensim.models.LsiModel.load(lsi_file)
    else:
        lsi = gensim.models.LsiModel(tfidf[corpus], id2word=corpus.dictionary, num_topics=NUM_TOPICS, chunksize=10000)
        lsi.save(lsi_file)

    # convert all articles to latent semantic space, store the result as a MatrixMarket file
    # normalize all vectors to unit length, to simulate cossim in libraries that only support euclidean distance
    vectors_file = os.path.join(outdir, 'lsi_vectors.mm')
    gensim.corpora.MmCorpus.serialize(vectors_file, (gensim.matutils.unitvec(vec) for vec in lsi[tfidf[corpus]]))

    logger.info("finished running %s" % program)
github davidsbatista / BREDS / BREDS-parallel-fe.py View on Github external
def similarity_3_contexts(self, p, t):
        (bef, bet, aft) = (0, 0, 0)

        if t.bef_vector is not None and p.bef_vector is not None:
            bef = dot(matutils.unitvec(t.bef_vector), matutils.unitvec(p.bef_vector))

        if t.bet_vector is not None and p.bet_vector is not None:
            bet = dot(matutils.unitvec(t.bet_vector), matutils.unitvec(p.bet_vector))

        if t.aft_vector is not None and p.aft_vector is not None:
            aft = dot(matutils.unitvec(t.aft_vector), matutils.unitvec(p.aft_vector))

        return self.config.alpha*bef + self.config.beta*bet + self.config.gamma*aft
github RaRe-Technologies / gensim / gensim / interfaces.py View on Github external
Returns
        -------
        {`scipy.sparse.csr.csr_matrix`, list of (int, float)}
            Similarities given document or corpus and objects corpus, depends on `query`.

        """
        is_corpus, query = utils.is_corpus(query)
        if self.normalize:
            # self.normalize only works if the input is a plain gensim vector/corpus (as
            # advertised in the doc). in fact, input can be a numpy or scipy.sparse matrix
            # as well, but in that case assume tricks are happening and don't normalize
            # anything (self.normalize has no effect).
            if not matutils.ismatrix(query):
                if is_corpus:
                    query = [matutils.unitvec(v) for v in query]
                else:
                    query = matutils.unitvec(query)
        result = self.get_similarities(query)

        if self.num_best is None:
            return result

        # if maintain_sparsity is True, result is scipy sparse. Sort, clip the
        # topn and return as a scipy sparse matrix.
        if getattr(self, 'maintain_sparsity', False):
            return matutils.scipy2scipy_clipped(result, self.num_best)

        # if the input query was a corpus (=more documents), compute the top-n
        # most similar for each document in turn
        if matutils.ismatrix(result):
            return [matutils.full2sparse_clipped(v, self.num_best) for v in result]
github davidsbatista / BREDS / Misc. / patterns-word2vec.py View on Github external
def mean_rep(v1, v2):
    return dot(matutils.unitvec(array(v1).mean(axis=0)), matutils.unitvec(array(v2).mean(axis=0)))
github davidsbatista / BREDS / BREDS.py View on Github external
def similarity_3_contexts(self, p, t):
        (bef, bet, aft) = (0, 0, 0)

        if t.bef_vector is not None and p.bef_vector is not None:
            bef = dot(
                matutils.unitvec(t.bef_vector), matutils.unitvec(p.bef_vector)
            )

        if t.bet_vector is not None and p.bet_vector is not None:
            bet = dot(
                matutils.unitvec(t.bet_vector), matutils.unitvec(p.bet_vector)
            )

        if t.aft_vector is not None and p.aft_vector is not None:
            aft = dot(
                matutils.unitvec(t.aft_vector), matutils.unitvec(p.aft_vector)
            )

        return self.config.alpha*bef + self.config.beta*bet + self.config.gamma*aft
github RaRe-Technologies / gensim / gensim / similarities / docsim.py View on Github external
>>> index.add_documents(one_more_corpus)  # add more documents in corpus

        """
        min_ratio = 1.0  # 0.5 to only reopen shards that are <50% complete
        if self.shards and len(self.shards[-1]) < min_ratio * self.shardsize:
            # The last shard was incomplete (<; load it back and add the documents there, don't start a new shard
            self.reopen_shard()
        for doc in corpus:
            if isinstance(doc, numpy.ndarray):
                doclen = len(doc)
            elif scipy.sparse.issparse(doc):
                doclen = doc.nnz
            else:
                doclen = len(doc)
                if doclen < 0.3 * self.num_features:
                    doc = matutils.unitvec(matutils.corpus2csc([doc], self.num_features).T, self.norm)
                else:
                    doc = matutils.unitvec(matutils.sparse2full(doc, self.num_features), self.norm)
            self.fresh_docs.append(doc)
            self.fresh_nnz += doclen
            if len(self.fresh_docs) >= self.shardsize:
                self.close_shard()
            if len(self.fresh_docs) % 10000 == 0:
                logger.info("PROGRESS: fresh_shard size=%i", len(self.fresh_docs))