How to use the pgmpy.base.DAG function in pgmpy

To help you get started, we’ve selected a few pgmpy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github pgmpy / pgmpy / pgmpy / estimators / HillClimbSearch.py View on Github external
... data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 9)), columns=list('ABCDEFGHI'))
        >>> # add 10th dependent variable
        ... data['J'] = data['A'] * data['B']
        >>> est = HillClimbSearch(data, scoring_method=BicScore(data))
        >>> best_model = est.estimate()
        >>> sorted(best_model.nodes())
        ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
        >>> best_model.edges()
        [('B', 'J'), ('A', 'J')]
        >>> # search a model with restriction on the number of parents:
        >>> est.estimate(max_indegree=1).edges()
        [('J', 'A'), ('B', 'J')]
        """
        nodes = self.state_names.keys()
        if start is None:
            start = DAG()
            start.add_nodes_from(nodes)
        elif not isinstance(start, DAG) or not set(start.nodes()) == set(nodes):
            raise ValueError(
                "'start' should be a DAG with the same variables as the data set, or 'None'."
            )

        tabu_list = []
        current_model = start

        iter_no = 0
        while iter_no <= max_iter:
            iter_no += 1

            best_score_delta = 0
            best_operation = None
github pgmpy / pgmpy / pgmpy / models / SEM.py View on Github external
import numpy as np
import warnings
import itertools

from networkx.algorithms.dag import descendants
from pyparsing import OneOrMore, Word, Optional, Suppress, alphanums, nums

from pgmpy.base import DAG
from pgmpy.global_vars import HAS_PANDAS


if HAS_PANDAS:
    import pandas as pd


class SEMGraph(DAG):
    """
    Base class for graphical representation of Structural Equation Models(SEMs).

    All variables are by default assumed to have an associated error latent variable, therefore
    doesn't need to be specified.

    Attributes
    ----------
    latents: list
        List of all the latent variables in the model except the error terms.

    observed: list
        List of all the observed variables in the model.

    graph: nx.DirectedGraph
        The graphical structure of the latent and observed variables except the error terms.
github pgmpy / pgmpy / pgmpy / estimators / ConstraintBasedEstimator.py View on Github external
>>> # pdag_to_dag is static:
        ... pdag1 = DAG([('A', 'B'), ('C', 'B'), ('C', 'D'), ('D', 'C'), ('D', 'A'), ('A', 'D')])
        >>> ConstraintBasedEstimator.pdag_to_dag(pdag1).edges()
        [('D', 'C'), ('C', 'B'), ('A', 'B'), ('A', 'D')]

        >>> # example of a pdag with no faithful extension:
        ... pdag2 = DAG([('A', 'B'), ('A', 'C'), ('B', 'C'), ('C', 'B')])
        >>> ConstraintBasedEstimator.pdag_to_dag(pdag2).edges()
        UserWarning: PDAG has no faithful extension (= no oriented DAG with the same v-structures as PDAG).
        Remaining undirected PDAG edges oriented arbitrarily.
        [('B', 'C'), ('A', 'B'), ('A', 'C')]
        """

        pdag = pdag.copy()
        dag = DAG()
        dag.add_nodes_from(pdag.nodes())

        # add already directed edges of pdag to dag
        for X, Y in pdag.edges():
            if not pdag.has_edge(Y, X):
                dag.add_edge(X, Y)

        while pdag.number_of_nodes() > 0:
            # find node with (1) no directed outgoing edges and
            #                (2) the set of undirected neighbors is either empty or
            #                    undirected neighbors + parents of X are a clique
            found = False
            for X in pdag.nodes():
                directed_outgoing_edges = set(pdag.successors(X)) - set(
                    pdag.predecessors(X)
                )
github pgmpy / pgmpy / pgmpy / models / BayesianModel.py View on Github external
import pandas as pd
from tqdm import tqdm
from joblib import Parallel, delayed

from pgmpy.base import DAG
from pgmpy.factors.discrete import (
    TabularCPD,
    JointProbabilityDistribution,
    DiscreteFactor,
)
from pgmpy.factors.continuous import ContinuousFactor
from pgmpy.independencies import Independencies
from pgmpy.models.MarkovModel import MarkovModel


class BayesianModel(DAG):
    """
    Base class for bayesian model.

    A models stores nodes and edges with conditional probability
    distribution (cpd) and other attributes.

    models hold directed edges.  Self loops are not allowed neither
    multiple (parallel) edges.

    Nodes can be any hashable python object.

    Edges are represented as links between nodes.

    Parameters
    ----------
    data : input graph
github pgmpy / pgmpy / pgmpy / estimators / HillClimbSearch.py View on Github external
... data['J'] = data['A'] * data['B']
        >>> est = HillClimbSearch(data, scoring_method=BicScore(data))
        >>> best_model = est.estimate()
        >>> sorted(best_model.nodes())
        ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J']
        >>> best_model.edges()
        [('B', 'J'), ('A', 'J')]
        >>> # search a model with restriction on the number of parents:
        >>> est.estimate(max_indegree=1).edges()
        [('J', 'A'), ('B', 'J')]
        """
        nodes = self.state_names.keys()
        if start is None:
            start = DAG()
            start.add_nodes_from(nodes)
        elif not isinstance(start, DAG) or not set(start.nodes()) == set(nodes):
            raise ValueError(
                "'start' should be a DAG with the same variables as the data set, or 'None'."
            )

        tabu_list = []
        current_model = start

        iter_no = 0
        while iter_no <= max_iter:
            iter_no += 1

            best_score_delta = 0
            best_operation = None

            for operation, score_delta in self._legal_operations(
                current_model, tabu_list, max_indegree, black_list, white_list
github pgmpy / pgmpy / pgmpy / estimators / ConstraintBasedEstimator.py View on Github external
def model_to_pdag(model):
        """Construct the DAG pattern (representing the I-equivalence class) for
        a given DAG. This is the "inverse" to pdag_to_dag.
        """

        if not isinstance(model, DAG):
            raise TypeError(
                "model: Expected DAG instance, "
                + "got type {model_type}".format(model_type=type(model))
            )

        skel, separating_sets = ConstraintBasedEstimator.build_skeleton(
            model.nodes(), model.get_independencies()
        )
        pdag = ConstraintBasedEstimator.skeleton_to_pdag(skel, separating_sets)

        return pdag
github pgmpy / pgmpy / pgmpy / models / DynamicBayesianNetwork.py View on Github external
from itertools import combinations
from collections import defaultdict

import numpy as np
import networkx as nx

from pgmpy.factors.discrete import TabularCPD
from pgmpy.base import DAG


class DynamicBayesianNetwork(DAG):
    def __init__(self, ebunch=None):
        """
        Base class for Dynamic Bayesian Network

        This is a time variant model of the static Bayesian model, where each
        time-slice has some static nodes and is then replicated over a certain
        time period.

        The nodes can be any hashable python objects.

        Parameters
        ----------
        ebunch: Data to initialize graph.  If data=None (default) an empty
              graph is created.  The data can be an edge list, or any NetworkX
              graph object
github pgmpy / pgmpy / pgmpy / estimators / ExhaustiveSearch.py View on Github external
>>> import numpy as np
        >>> from pgmpy.estimators import ExhaustiveSearch
        >>> # create random data sample with 3 variables, where B and C are identical:
        >>> data = pd.DataFrame(np.random.randint(0, 5, size=(5000, 2)), columns=list('AB'))
        >>> data['C'] = data['B']
        >>> est = ExhaustiveSearch(data)
        >>> best_model = est.estimate()
        >>> best_model
        
        >>> best_model.edges()
        [('B', 'C')]
        """

        best_dag = max(self.all_dags(), key=self.scoring_method.score)

        best_model = DAG()
        best_model.add_nodes_from(sorted(best_dag.nodes()))
        best_model.add_edges_from(sorted(best_dag.edges()))
        return best_model