How to use the sdgym.synthesizers.base.BaseSynthesizer function in sdgym

To help you get started, we’ve selected a few sdgym examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github DAI-Lab / SDGym / sdgym / synthesizers / privbn.py View on Github external
import numpy as np

from sdgym.constants import CATEGORICAL, ORDINAL
from sdgym.synthesizers.base import BaseSynthesizer
from sdgym.synthesizers.utils import Transformer

LOGGER = logging.getLogger(__name__)


def try_mkdirs(dir):
    if not os.path.isdir(dir):
        os.makedirs(dir)


class PrivBNSynthesizer(BaseSynthesizer):
    """docstring for IdentitySynthesizer."""

    def __init__(self, theta=20, max_samples=25000):
        assert os.path.exists("privbayes/privBayes.bin")
        self.theta = theta
        self.max_samples = max_samples

    def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.data = data.copy()
        self.meta = Transformer.get_metadata(data, categorical_columns, ordinal_columns)

    def sample(self, n):
        try_mkdirs("__privbn_tmp/data")
        try_mkdirs("__privbn_tmp/log")
        try_mkdirs("__privbn_tmp/output")
        shutil.copy("privbayes/privBayes.bin", "__privbn_tmp/privBayes.bin")
github DAI-Lab / SDGym / sdgym / synthesizers / identity.py View on Github external
import pandas as pd

from sdgym.synthesizers.base import BaseSynthesizer


class IdentitySynthesizer(BaseSynthesizer):
    """Trivial synthesizer.

    Returns the same exact data that is used to fit it.
    """

    def fit(self, train_data, *args):
        self.data = pd.DataFrame(train_data)

    def sample(self, samples):
        return self.data.sample(samples, replace=True).values
github DAI-Lab / SDGym / sdgym / synthesizers / privbn.py View on Github external
import shutil
import subprocess

import numpy as np

from sdgym.constants import CATEGORICAL, ORDINAL
from sdgym.synthesizers.base import BaseSynthesizer
from sdgym.synthesizers.utils import Transformer


def try_mkdirs(dir):
    if not os.path.isdir(dir):
        os.makedirs(dir)


class PrivBNSynthesizer(BaseSynthesizer):
    """docstring for IdentitySynthesizer."""

    def __init__(self):
        assert os.path.exists("privbayes/privBayes.bin")

    def fit(self, data, categoricals=tuple(), ordinals=tuple()):
        self.data = data.copy()
        self.meta = Transformer.get_metadata(data, categoricals, ordinals)

    def sample(self, n):
        try_mkdirs("__privbn_tmp/data")
        try_mkdirs("__privbn_tmp/log")
        try_mkdirs("__privbn_tmp/output")
        shutil.copy("privbayes/privBayes.bin", "__privbn_tmp/privBayes.bin")
        d_cols = []
        with open("__privbn_tmp/data/real.domain", "w") as f:
github DAI-Lab / SDGym / sdgym / synthesizers / tablegan.py View on Github external
layers_C += [Conv2d(layer_dims[-1][0], 1, layer_dims[-1][1], 1, 0)]

    return layers_D, layers_G, layers_C


def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('Conv') != -1:
        init.normal_(m.weight.data, 0.0, 0.02)

    elif classname.find('BatchNorm') != -1:
        init.normal_(m.weight.data, 1.0, 0.02)
        init.constant_(m.bias.data, 0)


class TableganSynthesizer(BaseSynthesizer):
    """docstring for TableganSynthesizer??"""

    def __init__(self,
                 random_dim=100,
                 num_channels=64,
                 l2scale=1e-5,
                 batch_size=500,
                 epochs=300):

        self.random_dim = random_dim
        self.num_channels = num_channels
        self.l2scale = l2scale

        self.batch_size = batch_size
        self.epochs = epochs
github DAI-Lab / SDGym / sdgym / synthesizers / uniform.py View on Github external
import numpy as np

from sdgym.constants import CONTINUOUS
from sdgym.synthesizers.base import BaseSynthesizer
from sdgym.synthesizers.utils import Transformer


class UniformSynthesizer(BaseSynthesizer):
    """UniformSynthesizer."""

    def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.dtype = data.dtype
        self.shape = data.shape
        self.meta = Transformer.get_metadata(data, categorical_columns, ordinal_columns)

    def sample(self, samples):
        data = np.random.uniform(0, 1, (samples, self.shape[1]))

        for i, c in enumerate(self.meta):
            if c['type'] == CONTINUOUS:
                data[:, i] = data[:, i] * (c['max'] - c['min']) + c['min']
            else:
                data[:, i] = (data[:, i] * (1 - 1e-8) * c['size']).astype('int32')
github DAI-Lab / SDGym / sdgym / synthesizers / clbn.py View on Github external
import json

import numpy as np
from pomegranate import BayesianNetwork, ConditionalProbabilityTable, DiscreteDistribution

from sdgym.synthesizers.base import BaseSynthesizer
from sdgym.synthesizers.utils import DiscretizeTransformer


class CLBNSynthesizer(BaseSynthesizer):
    """CLBNSynthesizer."""

    def fit(self, data, categoricals=tuple(), ordinals=tuple()):
        self.discretizer = DiscretizeTransformer(n_bins=15)
        self.discretizer.fit(data, categoricals, ordinals)
        discretized_data = self.discretizer.transform(data)
        self.model = BayesianNetwork.from_samples(discretized_data, algorithm='chow-liu')

    def bn_sample(self, num_samples):
        """Sample from the bayesian network.

        Args:
            num_samples(int): Number of samples to generate.
        """
        nodes_parents = self.model.structure
        processing_order = []
github DAI-Lab / SDGym / sdgym / synthesizers / veegan.py View on Github external
ed = st + item[0]
                data_t.append(torch.tanh(data[:, st:ed]))
                st = ed

            elif item[1] == 'softmax':
                ed = st + item[0]
                data_t.append(softmax(data[:, st:ed], dim=1))
                st = ed

            else:
                assert 0

        return torch.cat(data_t, dim=1)


class VEEGANSynthesizer(BaseSynthesizer):
    """VEEGANSynthesizer."""

    def __init__(
        self,
        embedding_dim=32,
        gen_dim=(128, 128),
        dis_dim=(128, ),
        rec_dim=(128, 128),
        l2scale=1e-6,
        batch_size=500,
        epochs=300
    ):

        self.embedding_dim = embedding_dim
        self.gen_dim = gen_dim
        self.dis_dim = dis_dim
github DAI-Lab / SDGym / sdgym / synthesizers / ctgan.py View on Github external
# interpolates = torch.Variable(interpolates, requires_grad=True, device=device)

    disc_interpolates = netD(interpolates)

    gradients = torch.autograd.grad(
        outputs=disc_interpolates, inputs=interpolates,
        grad_outputs=torch.ones(disc_interpolates.size(), device=device),
        create_graph=True, retain_graph=True, only_inputs=True)[0]

    gradient_penalty = (
        (gradients.view(-1, pac * real_data.size(1)).norm(2, dim=1) - 1) ** 2).mean() * lambda_
    return gradient_penalty


class CTGANSynthesizer(BaseSynthesizer):
    """docstring for IdentitySynthesizer."""

    def __init__(self,
                 embedding_dim=128,
                 gen_dim=(256, 256),
                 dis_dim=(256, 256),
                 l2scale=1e-6,
                 batch_size=500,
                 epochs=300):

        self.embedding_dim = embedding_dim
        self.gen_dim = gen_dim
        self.dis_dim = dis_dim

        self.l2scale = l2scale
        self.batch_size = batch_size
github DAI-Lab / SDGym / sdgym / synthesizers / tvae.py View on Github external
st = ed

        elif item[1] == 'softmax':
            ed = st + item[0]
            loss.append(cross_entropy(
                recon_x[:, st:ed], torch.argmax(x[:, st:ed], dim=-1), reduction='sum'))
            st = ed

        else:
            assert 0
    assert st == recon_x.size()[1]
    KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return sum(loss) * factor / x.size()[0], KLD / x.size()[0]


class TVAESynthesizer(BaseSynthesizer):
    """TVAESynthesizer."""

    def __init__(
        self,
        embedding_dim=128,
        compress_dims=(128, 128),
        decompress_dims=(128, 128),
        l2scale=1e-5,
        batch_size=500,
        epochs=300
    ):

        self.embedding_dim = embedding_dim
        self.compress_dims = compress_dims
        self.decompress_dims = decompress_dims
github DAI-Lab / SDGym / sdgym / synthesizers / medgan.py View on Github external
for item in output_info:
        if item[1] == 'sigmoid':
            ed = st + item[0]
            loss.append(mse_loss(sigmoid(fake[:, st:ed]), real[:, st:ed], reduction='sum'))
            st = ed
        elif item[1] == 'softmax':
            ed = st + item[0]
            loss.append(cross_entropy(
                fake[:, st:ed], torch.argmax(real[:, st:ed], dim=-1), reduction='sum'))
            st = ed
        else:
            assert 0
    return sum(loss) / fake.size()[0]


class MedganSynthesizer(BaseSynthesizer):
    """docstring for IdentitySynthesizer."""

    def __init__(self,
                 embedding_dim=128,
                 random_dim=128,
                 generator_dims=(128, 128),          # 128 -> 128 -> 128
                 discriminator_dims=(256, 128, 1),   # datadim * 2 -> 256 -> 128 -> 1
                 compress_dims=(),                   # datadim -> embedding_dim
                 decompress_dims=(),                 # embedding_dim -> datadim
                 bn_decay=0.99,
                 l2scale=0.001,
                 pretrain_epoch=200,
                 batch_size=1000,
                 epochs=2000):

        self.embedding_dim = embedding_dim