Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
self.output_dim = 0
for info in self.meta:
if info['type'] in [CONTINUOUS, ORDINAL]:
self.output_dim += 1
else:
self.output_dim += info['size']
def sample(self, n):
try_mkdirs("__privbn_tmp/data")
try_mkdirs("__privbn_tmp/log")
try_mkdirs("__privbn_tmp/output")
shutil.copy("privbayes/privBayes.bin", "__privbn_tmp/privBayes.bin")
d_cols = []
with open("__privbn_tmp/data/real.domain", "w") as f:
for id_, info in enumerate(self.meta):
if info['type'] in [CATEGORICAL, ORDINAL]:
print("D", end='', file=f)
counter = 0
for i in range(info['size']):
if i > 0 and i % 4 == 0:
counter += 1
print(" {", end='', file=f)
print("", i, end='', file=f)
print(" }" * counter, file=f)
d_cols.append(id_)
else:
minn = info['min']
maxx = info['max']
d = (maxx - minn) * 0.03
minn = minn - d
maxx = maxx + d
print("C", minn, maxx, file=f)
def sample(self, n):
try_mkdirs("__privbn_tmp/data")
try_mkdirs("__privbn_tmp/log")
try_mkdirs("__privbn_tmp/output")
shutil.copy("privbayes/privBayes.bin", "__privbn_tmp/privBayes.bin")
d_cols = []
with open("__privbn_tmp/data/real.domain", "w") as f:
for id_, info in enumerate(self.meta):
if info['type'] in [CATEGORICAL, ORDINAL]:
print("D", end='', file=f)
counter = 0
for i in range(info['size']):
if i > 0 and i % 4 == 0:
counter += 1
print(" {", end='', file=f)
print("", i, end='', file=f)
print(" }" * counter, file=f)
d_cols.append(id_)
else:
minn = info['min']
maxx = info['max']
d = (maxx - minn) * 0.03
minn = minn - d
maxx = maxx + d
print("C", minn, maxx, file=f)
def _compute_distance(train, syn, metadata, sample=300):
mask_d = np.zeros(len(metadata['columns']))
for id_, info in enumerate(metadata['columns']):
if info['type'] in [CATEGORICAL, ORDINAL]:
mask_d[id_] = 1
else:
mask_d[id_] = 0
std = np.std(train, axis=0) + 1e-6
dis_all = []
for i in range(min(sample, len(train))):
current = syn[i]
distance_d = (train - current) * mask_d > 0
distance_d = np.sum(distance_d, axis=1)
distance_c = (train - current) * (1 - mask_d) / 2 / std
distance_c = np.sum(distance_c ** 2, axis=1)
distance = np.sqrt(np.min(distance_c + distance_d))
dis_all.append(distance)
def _get_columns(metadata):
categorical_columns = list()
ordinal_columns = list()
for column_idx, column in enumerate(metadata['columns']):
if column['type'] == CATEGORICAL:
categorical_columns.append(column_idx)
elif column['type'] == ORDINAL:
ordinal_columns.append(column_idx)
return categorical_columns, ordinal_columns