How to use the karateclub.estimator.Estimator function in karateclub

To help you get started, we’ve selected a few karateclub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github benedekrozemberczki / karateclub / karateclub / community_detection / overlapping / ego_splitter.py View on Github external
import community
import networkx as nx
from karateclub.estimator import Estimator

class EgoNetSplitter(Estimator):
    r"""An implementation of `"Ego-Splitting" `_
    from the KDD '17 paper "Ego-Splitting Framework: from Non-Overlapping to Overlapping Clusters". The tool first creates
    the ego-nets of nodes. A persona-graph is created which is clustered by the Louvain method. The resulting overlapping
    cluster memberships are stored as a dictionary.

    Args:
        resolution (float): Resolution parameter of Python Louvain. Default 1.0.
        seed (int): Random seed value. Default is 42.
    """
    def __init__(self, resolution=1.0, seed=42):
        self.resolution = resolution
        self.seed = seed

    def _create_egonet(self, node):
        """
        Creating an ego net, extracting personas and partitioning it.
github benedekrozemberczki / karateclub / karateclub / graph_embedding / gl2vec.py View on Github external
import numpy as np
import networkx as nx
from karateclub.estimator import Estimator
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from karateclub.utils.treefeatures import WeisfeilerLehmanHashing

class GL2Vec(Estimator):
    r"""An implementation of `"GL2Vec" `_
    from the ICONIP '19 paper "GL2vec: Graph Embedding Enriched by Line Graphs with Edge Features".
    First, the algorithm creates the line graph of each graph in the graph dataset.
    The procedure creates Weisfeiler-Lehman tree features for nodes in graphs. Using
    these features a document (graph) - feature co-occurence matrix is decomposed in order
    to generate representations for the graphs.

    The procedure assumes that nodes have no string feature present and the WL-hashing
    defaults to the degree centrality. However, if a node feature with the key "feature"
    is supported for the nodes the feature extraction happens based on the values of this key.

    Args:
        wl_iterations (int): Number of Weisfeiler-Lehman iterations. Default is 2.
        dimensions (int): Dimensionality of embedding. Default is 128.
        workers (int): Number of cores. Default is 4.
        down_sampling (float): Down sampling frequency. Default is 0.0001.
github benedekrozemberczki / karateclub / karateclub / graph_embedding / graph2vec.py View on Github external
import numpy as np
import networkx as nx
from karateclub.estimator import Estimator
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from karateclub.utils.treefeatures import WeisfeilerLehmanHashing

class Graph2Vec(Estimator):
    r"""An implementation of `"Graph2Vec" `_
    from the MLGWorkshop '17 paper "Graph2Vec: Learning Distributed Representations of Graphs".
    The procedure creates Weisfeiler-Lehman tree features for nodes in graphs. Using
    these features a document (graph) - feature co-occurence matrix is decomposed in order
    to generate representations for the graphs.

    The procedure assumes that nodes have no string feature present and the WL-hashing
    defaults to the degree centrality. However, if a node feature with the key "feature"
    is supported for the nodes the feature extraction happens based on the values of this key.

    Args:
        wl_iterations (int): Number of Weisfeiler-Lehman iterations. Default is 2.
        attributed (bool): Presence of graph attributes. Default is False.
        dimensions (int): Dimensionality of embedding. Default is 128.
        workers (int): Number of cores. Default is 4.
        down_sampling (float): Down sampling frequency. Default is 0.0001.
github benedekrozemberczki / karateclub / karateclub / node_embedding / neighbourhood / grarep.py View on Github external
import math
import numpy as np
import networkx as nx
from scipy import sparse
from sklearn.decomposition import TruncatedSVD
from karateclub.estimator import Estimator

class GraRep(Estimator):
    r"""An implementation of `"GraRep" `_
    from the CIKM '15 paper "GraRep: Learning Graph Representations with Global
    Structural Information". The procedure uses sparse truncated SVD to learn
    embeddings for the powers of the PMI matrix computed from powers of the
    normalized adjacency matrix.

    Args:
        dimensions (int): Number of individual embedding dimensions. Default is 32.
        iteration (int): Number of SVD iterations. Default is 10.
        order (int): Number of PMI matrix powers. Default is 5.
        seed (int): SVD random seed. Default is 42.
    """
    def __init__(self, dimensions=32, iteration=10, order=5, seed=42):
        self.dimensions = dimensions
        self.iterations = iteration
        self.order = order
github benedekrozemberczki / karateclub / karateclub / graph_embedding / feathergraph.py View on Github external
import math
import numpy as np
import networkx as nx
import scipy.sparse as sparse
from karateclub.estimator import Estimator

class FeatherGraph(Estimator):
    r"""An implementation of `"FEATHER-G" `_
    from the CIKM '20 paper "Characteristic Functions on Graphs: Birds of a Feather,
    from Statistical Descriptors to Parametric Models". The procedure
    uses characteristic functions of node features with random walk weights to describe
    node neighborhoods. These node level features are pooled by mean pooling to
    create graph level statistics.

    Args:
        order (int): Adjacency matrix powers. Default is 5.
        eval_points (int): Number of evaluation points. Default is 25.
        theta_max (int): Maximal evaluation point value. Default is 2.5.
        seed (int): Random seed value. Default is 42.
    """
    def __init__(self, order=5, eval_points=25, theta_max=2.5, seed=42):
        self.order = order
        self.eval_points = eval_points
github benedekrozemberczki / karateclub / karateclub / node_embedding / attributed / tene.py View on Github external
import numpy as np
import networkx as nx
from scipy import sparse
from karateclub.estimator import Estimator

class TENE(Estimator):
    r"""An implementation of `"TENE" `_
    from the ICPR '18 paper "Enhanced Network Embedding with Text Information". The 
    procedure jointly factorizes the adjacency and node feature matrices using alternating
    least squares.
       
    Args:
        dimensions (int): Number of embedding dimensions. Default is 32.
        lower_control (float): Embedding score minimal value. Default is 10**-15.
        alpha (float): Adjacency matrix regularization coefficient. Default is 0.1. 
        beta (float): Feature matrix regularization coefficient. Default is 0.1.
        iterations (int): ALS iterations. Default is 200.
        seed (int): Random seed value. Default is 42.
    """
    def __init__(self, dimensions=32, lower_control=10**-15,
                 alpha=0.1, beta=0.1, iterations=200, seed=42):
        self.dimensions = dimensions
github benedekrozemberczki / karateclub / karateclub / graph_embedding / netlsd.py View on Github external
import numpy as np
import networkx as nx
import scipy.sparse as sps
from karateclub.estimator import Estimator

class NetLSD(Estimator):
    r"""An implementation of `"NetLSD" `_
    from the KDD '18 paper "NetLSD: Hearing the Shape of a Graph". The procedure
    calculate the heat kernel trace of the normalized Laplacian matrix over a
    vector of time scales. If the matrix is large it switches to an approximation
    of the eigenvalues. 

    Args:
        scale_min (float): Time scale interval minimum. Default is -2.0.
        scale_max (float): Time scale interval maximum. Default is 2.0.
        scale_steps (int): Number of steps in time scale. Default is 250.
        scale_approximations (int): Number of eigenvalue approximations. Default is 200.
        seed (int): Random seed value. Default is 42.
    """
    def __init__(self, scale_min=-2.0, scale_max=2.0,
                 scale_steps=250, approximations=200, seed=42):
github benedekrozemberczki / karateclub / karateclub / node_embedding / structural / role2vec.py View on Github external
import numpy as np
import networkx as nx
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from karateclub.utils.walker import RandomWalker
from karateclub.estimator import Estimator
from karateclub.utils.treefeatures import WeisfeilerLehmanHashing

class Role2Vec(Estimator):
    r"""An implementation of `"Role2vec" `_
    from the IJCAI '18 paper "Learning Role-based Graph Embeddings".
    The procedure uses random walks to approximate the pointwise mutual information
    matrix obtained by multiplying the pooled adjacency power matrix with a 
    structural feature matrix (in this case Weisfeiler-Lehman features). This way
    one gets structural node embeddings.

    Args:
        walk_number (int): Number of random walks. Default is 10.
        walk_length (int): Length of random walks. Default is 80.
        dimensions (int): Dimensionality of embedding. Default is 128.
        workers (int): Number of cores. Default is 4.
        window_size (int): Matrix power order. Default is 2.
        epochs (int): Number of epochs. Default is 1.
        learning_rate (float): HogWild! learning rate. Default is 0.05.
        down_sampling (float): Down sampling frequency. Default is 0.0001.
github benedekrozemberczki / karateclub / karateclub / community_detection / overlapping / nnsed.py View on Github external
from scipy import sparse
import numpy as np
import networkx as nx
from karateclub.estimator import Estimator

class NNSED(Estimator):
    r"""An implementation of `"NNSED"
    `_
    from the CIKM '17 paper "A Non-negative Symmetric Encoder-Decoder Approach
    for Community Detection". The procedure uses non-negative matrix factorization
    in order to learn an unnormalized cluster membership distribution over nodes.
    The method can be used in an overlapping and non-overlapping way.

    Args:
        layers (int): Embedding layer size. Default is 32.
        iterations (int): Number of training epochs. Default 10.
        seed (int): Random seed for weight initializations. Default 42.
    """
    def __init__(self, dimensions=32, iterations=10, seed=42):
        self.dimensions = dimensions
        self.iterations = iterations
        self.seed = seed
github benedekrozemberczki / karateclub / karateclub / community_detection / overlapping / danmf.py View on Github external
import numpy as np
import networkx as nx
from sklearn.decomposition import NMF
from karateclub.estimator import Estimator

class DANMF(Estimator):
    r"""An implementation of `"DANMF" `_
    from the CIKM '18 paper "Deep Autoencoder-like Nonnegative Matrix Factorization for
    Community Detection". The procedure uses telescopic non-negative matrix factorization
    in order to learn a cluster membership distribution over nodes. The method can be 
    used in an overlapping and non-overlapping way.

    Args:
        layers (list): Autoencoder layer sizes in a list of integers. Default [32, 8].
        pre_iterations (int): Number of pre-training epochs. Default 100.
        iterations (int): Number of training epochs. Default 100.
        seed (int): Random seed for weight initializations. Default 42.
        lamb (float): Regularization parameter. Default 0.01.
        seed (int): Random seed value. Default is 42.
    """
    def __init__(self, layers=[32, 8], pre_iterations=100,
                 iterations=100, seed=42, lamb=0.01):