How to use the sdv.sampler.Sampler function in sdv

To help you get started, we’ve selected a few sdv examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test__get_primary_keys_no_pk(self):
        """If no primary key, _get_primary_keys return a duple of None """
        # Setup
        data_navigator = MagicMock(spec=DataNavigator)
        data_navigator.get_meta_data.return_value = {}
        modeler = MagicMock(spec=Modeler)
        sampler = Sampler(data_navigator=data_navigator, modeler=modeler)

        # Run
        result = sampler._get_primary_keys('table', 5)

        # Check
        primary_key, primary_key_values = result
        assert primary_key is None
        assert primary_key_values is None
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test_sample_table(self, rows_mock):
        """ """
        # Setup
        data_navigator = MagicMock(spec=DataNavigator)
        data_navigator.tables = {
            'table': MagicMock(**{'data.shape': ('rows', 'columns')})
        }
        modeler = MagicMock(spec=Modeler)
        sampler = Sampler(data_navigator=data_navigator, modeler=modeler)

        rows_mock.return_value = 'samples'

        table_name = 'table'
        reset_primary_keys = False

        expected_result = 'samples'

        # Run
        result = sampler.sample_table(table_name, reset_primary_keys=reset_primary_keys)

        # Check
        assert result == expected_result

        rows_mock.assert_called_once_with(
            sampler, 'table', 'rows', sample_children=False, reset_primary_keys=False)
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test__reset_primary_keys_generators(self):
        """_reset_primary_keys deletes all generators and counters."""
        # Setup
        data_navigator = MagicMock(spec=DataNavigator)
        modeler = MagicMock(spec=Modeler)
        sampler = Sampler(data_navigator=data_navigator, modeler=modeler)

        sampler.primary_key = {
            'table': 'generator for table'
        }
        sampler.remaining_primary_key = {
            'table': 'counter for table'
        }

        # Run
        sampler._reset_primary_keys_generators()

        # Check
        assert sampler.primary_key == dict()
        assert sampler.remaining_primary_key == dict()
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test__get_missing_valid_rows_excess_rows(self):
        """If more rows than required are passed, the result is cut to num_rows."""
        # Setup
        data_navigator = MagicMock(spec=DataNavigator)
        modeler = MagicMock(spec=Modeler)
        sampler = Sampler(data_navigator, modeler)

        synthesized = pd.DataFrame(columns=list('AB'), index=range(3, 7))
        drop_indices = pd.Series(False, index=range(3, 7))
        valid_rows = pd.DataFrame(columns=list('AB'), index=range(2))
        num_rows = 5

        # Run
        result = sampler._get_missing_valid_rows(synthesized, drop_indices, valid_rows, num_rows)
        missing_rows, valid_rows = result

        # Check
        assert missing_rows == 0
        assert valid_rows.equals(pd.DataFrame(columns=list('AB'), index=range(5)))

        data_navigator.assert_not_called()
        assert data_navigator.method_calls == []
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test__sample_model_vine(self, qualified_mock):
        """_sample_model sample the number of rows from the given model."""
        # Setup
        data_navigator = MagicMock(spec=DataNavigator)
        modeler = MagicMock(spec=Modeler)

        sampler = Sampler(data_navigator, modeler)
        model = MagicMock()
        values = [
            np.array([1, 1, 1]),
            np.array([2, 2, 2]),
            np.array([3, 3, 3])
        ]

        qualified_mock.return_value = 'copulas.multivariate.vine.VineCopula'

        model.sample.side_effect = values
        num_rows = 3
        columns = list('ABC')

        expected_result = pd.DataFrame(values, columns=columns)

        # Run
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test_sample_all(self, rows_mock):
        """Check sample_all and returns some value."""
        # Setup
        data_navigator = MagicMock()
        data_navigator.tables = ['TABLE_A', 'TABLE_B']
        data_navigator.get_parents.side_effect = lambda x: x != 'TABLE_A'
        modeler = MagicMock()
        sampler = Sampler(data_navigator, modeler)

        def fake_dataframe(*args, **kwargs):
            kwargs['sampled_data'][args[1]] = 'sampled_data'

        rows_mock.side_effect = fake_dataframe

        expected_get_parents_call_list = [(('TABLE_A',), {}), (('TABLE_B',), {})]
        expected_result = {
            'TABLE_A': 'sampled_data'
        }

        # Run
        result = sampler.sample_all(num_rows=5)

        # Check
        assert result == expected_result
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test__get_primary_keys_raises_error(self):
        """_get_primary_keys raises an exception if there aren't enough values."""
        # Setup
        data_navigator = MagicMock(spec=DataNavigator)
        data_navigator.get_meta_data.return_value = {
            'primary_key': 'table_pk',
            'fields': {
                'table_pk': {
                    'regex': 'regex for table_pk',
                    'type': 'number',
                    'subtype': 'integer'
                },
            }
        }
        modeler = MagicMock(spec=Modeler)
        sampler = Sampler(data_navigator=data_navigator, modeler=modeler)
        sampler.primary_key['table'] = 'a generator'
        sampler.remaining_primary_key['table'] = 0

        # Run / Check
        with self.assertRaises(ValueError):
            sampler._get_primary_keys('table', 5)
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test__unflatten_dict_mixed_array(self):
        """unflatten_dict restructure arrays."""
        # Setup
        data_navigator = MagicMock()
        modeler = MagicMock()
        sampler = Sampler(data_navigator, modeler)
        flat = {
            'first_key__0__0': 1,
            'first_key__0__1': 0,
            'first_key__1__0': 0,
            'first_key__1__1': 1,
            'second_key__0__std': 0.5,
            'second_key__0__mean': 0.5,
            'second_key__1__std': 0.25,
            'second_key__1__mean': 0.25
        }

        expected_result = {
            'first_key': [
                [1, 0],
                [0, 1]
            ],
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test___init__(self):
        """Test create a default instance of Sampler class"""
        # Run
        models = {'test': Mock()}
        sampler = Sampler('test_metadata', models, SDVModel, dict())

        # Asserts
        assert sampler.metadata == 'test_metadata'
        assert sampler.models == models
        assert sampler.primary_key == dict()
        assert sampler.remaining_primary_key == dict()
        assert sampler.model == SDVModel
        assert sampler.model_kwargs == dict()
github HDI-Project / SDV / tests / test_sampler.py View on Github external
def test__unflatten_dict_respect_covariance_matrix(self):
        """unflatten_dict restructures the covariance matrix into an square matrix."""
        # Setup
        data_navigator = MagicMock()
        modeler = MagicMock()
        sampler = Sampler(data_navigator, modeler)

        def fake_values(i, j):
            return '{}, {}'.format(i, j)

        expected_result = {
            'covariance': np.array([
                [fake_values(i, j) for j in range(40)]
                for i in range(40)
            ]).tolist()
        }

        flat = {
            'covariance__{}__{}'.format(i, j): fake_values(i, j)
            for i in range(40) for j in range(40)
        }