How to use the scipy.sparse function in scipy

To help you get started, we’ve selected a few scipy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pysal / pysal / pysal / model / spreg / diagnostics.py View on Github external
# Check for constant, if none add one, see Greene 2003, pg. 222
    # if constant == False:
    #    X = np.hstack((np.ones((n,1)),X))

    # Check for multicollinearity in the X matrix
    ci = condition_index(reg)
    if ci > 30:
        white_result = "Not computed due to multicollinearity."
        return white_result

    # Compute cross-products and squares of the regression variables
    if type(X).__name__ == 'ndarray':
        A = np.zeros((n, (k * (k + 1)) // 2))
    elif type(X).__name__ == 'csc_matrix' or type(X).__name__ == 'csr_matrix':
        # this is probably inefficient
        A = SP.lil_matrix((n, (k * (k + 1)) // 2))
    else:
        raise Exception("unknown X type, %s" % type(X).__name__)
    counter = 0
    for i in range(k):
        for j in range(i, k):
            v = spmultiply(X[:, i], X[:, j], False)
            A[:, counter] = v
            counter += 1

    # Append the original variables
    A = sphstack(X, A)   # note: this also converts a LIL to CSR
    n, k = A.shape

    # Check to identify any duplicate or constant columns in A
    omitcolumn = []
    for i in range(k):
github dmlc / dgl / tests / compute / test_traversal.py View on Github external
if u in layers_nx[-1]:
                frontier.add(v)
                edge_frontier.add(g.edge_id(u, v))
            else:
                layers_nx.append(frontier)
                edges_nx.append(edge_frontier)
                frontier = set([v])
                edge_frontier = set([g.edge_id(u, v)])
        # avoids empty successors
        if len(frontier) > 0 and len(edge_frontier) > 0:
            layers_nx.append(frontier)
            edges_nx.append(edge_frontier)
        return layers_nx, edges_nx

    g = dgl.DGLGraph()
    a = sp.random(n, n, 3 / n, data_rvs=lambda n: np.ones(n))
    g.from_scipy_sparse_matrix(a)
    g_nx = g.to_networkx()
    src = random.choice(range(n))
    layers_nx, _ = _bfs_nx(g_nx, src)
    layers_dgl = dgl.bfs_nodes_generator(g, src)
    assert len(layers_dgl) == len(layers_nx)
    assert all(toset(x) == y for x, y in zip(layers_dgl, layers_nx))

    g_nx = nx.random_tree(n, seed=42)
    g = dgl.DGLGraph()
    g.from_networkx(g_nx)
    src = 0
    _, edges_nx = _bfs_nx(g_nx, src)
    edges_dgl = dgl.bfs_edges_generator(g, src)
    assert len(edges_dgl) == len(edges_nx)
    assert all(toset(x) == y for x, y in zip(edges_dgl, edges_nx))
github makarandtapaswi / MovieQA_CVPR2016 / encode_qa_and_text.py View on Github external
else:
            features = sps.dok_matrix((len(sentence_list), desc_dim), dtype='float32')

        for s, sentence in enumerate(sentence_list):
            # NOTE: use both alphanumeric and stemming normalization
            sentence = utils.normalize_stemming(utils.normalize_alphanumeric(sentence.lower())).split(' ')
            # for each word in the normalized sentence
            for word in sentence:
                if word not in model.vocab: continue
                widx = model.vocab.index(word)
                features[s,widx] = model.tfidf[widx][midx]

            if is_qa:  # if not sparse, use numpy.linalg.norm
                features[s] /= (np.linalg.norm(features[s]) + 1e-6)
            else:  # if sparse, use scipy.sparse.linalg.norm
                features[s] /= (sps.linalg.norm(features[s]) + 1e-6)

    elif desc == 'word2vec':
        desc_dim = model.get_vector(model.vocab[-1]).shape[0]
        features = np.zeros((len(sentence_list), desc_dim), dtype='float32')
        for s, sentence in enumerate(sentence_list):
            # NOTE: use only alphanumeric normalization, no stemming
            sentence = utils.normalize_alphanumeric(sentence.lower()).split(' ')
            # for each word in the normalized sentence
            for word in sentence:
                if word not in model.vocab: continue
                features[s] += model.get_vector(word)

            features[s] /= (np.linalg.norm(features[s]) + 1e-6)

    return features
github Rinoahu / SwiftOrtho / bin / deprecate / deprecate2 / mcl_mem.py View on Github external
def mcl_xyz(f):
    l2n = {}
    dmx = 0
    # for x,y,z in xyz:
    for i in f:
        x, y = i.split('\t', 3)[:2]
        if x not in l2n:
            l2n[x] = dmx
            dmx += 1
        if y not in l2n:
            l2n[y] = dmx
            dmx += 1

    f.seek(0)
    dmx += 1
    G_d = sparse.lil_matrix((dmx, dmx), dtype='float32')
    # for x,y,z in xyz:
    for i in f:
        x, y, z = i.split('\t', 4)[:3]
        if x > y:
            continue
        X, Y = map(l2n.get, [x, y])
        Z = float(z)
        G_d[X, Y] = Z
        G_d[Y, X] = Z

    #print G_d.data
    n2l = {}
    while l2n:
        key, val = l2n.popitem()
        n2l[val] = key
github ivanistheone / Latent-Dirichlet-Allocation / gensim / matutils.py View on Github external
def unitVec(vec):
    """
    Scale a vector to unit length. The only exception is the zero vector, which
    is returned back unchanged.

    If the input is sparse (list of 2-tuples), output will also be sparse. Otherwise,
    output will be a numpy array.
    """
    if scipy.sparse.issparse(vec): # convert scipy.sparse to standard numpy array
        vec = vec.toarray().flatten()

    try:
        first = iter(vec).next() # is there at least one element?
    except:
        return vec

    if isinstance(first, tuple): # sparse format?
        vecLen = 1.0 * math.sqrt(sum(val * val for _, val in vec))
        assert vecLen > 0.0, "sparse documents must not contain any explicit zero entries"
        if vecLen != 1.0:
            return [(termId, val / vecLen) for termId, val in vec]
        else:
            return list(vec)
    else: # dense format
        vec = numpy.asarray(vec, dtype=float)
github PMEAL / OpenPNM / OpenPNM / Network / __DelaunayVoronoiDual__.py View on Github external
raise Exception('Must specify either "points" or "num_points"')
            points = tools.generate_base_points(num_points=num_points,
                                                domain_size=domain_size)

        # Perform tessellation
        vor = sptl.Voronoi(points=points)

        # Combine points
        pts_vor = vor.vertices
        pts_all = sp.vstack((points, pts_vor))
        Npts = sp.size(points, 0)
        Nvor = sp.size(pts_vor, 0)
        Nall = Nvor + Npts

        # Create adjacency matrix in lil format for quick matrix construction
        am = sp.sparse.lil_matrix((Nall, Nall))
        for ridge in vor.ridge_dict.keys():
            # Make Delaunay-to-Delauny connections
            [am.rows[i].extend([ridge[0], ridge[1]]) for i in ridge]
            row = vor.ridge_dict[ridge]
            if -1 not in row:
                # Index Voronoi vertex numbers by Npts
                row = [i + Npts for i in row]
                # Make Voronoi-to-Delaunay connections
                [am.rows[i].extend(row) for i in ridge]
                # Make Voronoi-to-Voronoi connections
                row.append(row[0])
                [am.rows[row[i]].append(row[i+1]) for i in range(len(row)-1)]
                # Ensure connections are made symmetrically
                [am.rows[row[i+1]].append(row[i]) for i in range(len(row)-1)]
        # Finalize adjacency matrix by assigning data values to each location
        am.data = am.rows  # Values don't matter, only shape, so use 'rows'
github epfl-lts2 / pygsp / pygsp / graphs / graph.py View on Github external
if method == self._lmax_method:
            return
        self._lmax_method = method

        if method == 'lanczos':
            try:
                # We need to cast the matrix L to a supported type.
                # TODO: not good for memory. Cast earlier?
                lmax = sparse.linalg.eigsh(self.L.asfptype(), k=1, tol=5e-3,
                                           ncv=min(self.N, 10),
                                           return_eigenvectors=False)
                lmax = lmax[0]
                assert lmax <= self._get_upper_bound() + 1e-12
                lmax *= 1.01  # Increase by 1% to be robust to errors.
                self._lmax = lmax
            except sparse.linalg.ArpackNoConvergence:
                raise ValueError('The Lanczos method did not converge. '
                                 'Try to use bounds.')

        elif method == 'bounds':
            self._lmax = self._get_upper_bound()

        else:
            raise ValueError('Unknown method {}'.format(method))
github Parallel-in-Time / pySDC / pySDC / tools / transfer_tools.py View on Github external
def to_dense(D):
    if sprs.issparse(D):
        return D.toarray()
    elif isinstance(D, np.ndarray):
        return D
github scipy / scipy / scipy / optimize / _linprog_ip.py View on Github external
def solve(M, r, sym_pos=False):
                return sps.linalg.lsqr(M, r)[0]
        else:
github wangle1218 / Advertising-algorithm-competition / 2018tengxun / _2_lgb_model.py View on Github external
non_cv_col = training.columns.tolist()
    non_cv_col.remove('index')
    train_cv = training['index']
    predict_cv = predict['index']
    training=training[non_cv_col]
    predict=predict[non_cv_col]

    training = training.astype(np.float16)
    predict = predict.astype(np.float16)

    cv = CountVectorizer(ngram_range=(1, 1), max_df=0.8, min_df=5)
    train_cv = cv.fit_transform(train_cv)
    predict_cv = cv.transform(predict_cv)
    training = sparse.hstack((training, train_cv))
    predict = sparse.hstack((predict, predict_cv))

    del train_cv
    del predict_cv
    gc.collect()

    training = training.tocsr()
    predict = predict.tocsr()

    print("training, predict shape:",training.shape, predict.shape)

    return training,label,predict