Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
"max": column.max(),
})
return meta
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
raise NotImplementedError
def transform(self, data):
raise NotImplementedError
def inverse_transform(self, data):
raise NotImplementedError
class DiscretizeTransformer(Transformer):
"""Discretize continuous columns into several bins.
Attributes:
meta
column_index
discretizer(sklearn.preprocessing.KBinsDiscretizer)
Transformation result is a int array.
"""
def __init__(self, n_bins):
self.n_bins = n_bins
self.meta = None
self.column_index = None
self.discretizer = None
p_argmax = np.argmax(v, axis=1)
std_t = stds[p_argmax]
mean_t = means[p_argmax]
tmp = u * 4 * std_t + mean_t
data_t[:, id_] = tmp
else:
current = data[:, st:st + info['size']]
st += info['size']
idx = np.argmax(current, axis=1)
data_t[:, id_] = list(map(info['i2s'].__getitem__, idx))
return data_t
class TableganTransformer(Transformer):
def __init__(self, side):
self.height = side
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
self.minn = np.zeros(len(self.meta))
self.maxx = np.zeros(len(self.meta))
for i in range(len(self.meta)):
if self.meta[i]['type'] == CONTINUOUS:
self.minn[i] = self.meta[i]['min'] - 1e-3
self.maxx[i] = self.meta[i]['max'] + 1e-3
else:
self.minn[i] = -1e-3
self.maxx[i] = self.meta[i]['size'] - 1 + 1e-3
p_argmax = np.argmax(v, axis=1)
std_t = stds[p_argmax]
mean_t = means[p_argmax]
tmp = u * 2 * std_t + mean_t
data_t[:, id_] = tmp
else:
current = data[:, st:st + info['size']]
st += info['size']
idx = np.argmax(current, axis=1)
data_t[:, id_] = list(map(info['i2s'].__getitem__, idx))
return data_t
class BGMTransformer(Transformer):
"""Model continuous columns with a BayesianGMM and normalized to a scalar [0, 1] and a vector.
Discrete and ordinal columns are converted to a one-hot vector.
"""
def __init__(self, n_clusters=10, eps=0.005):
"""n_cluster is the upper bound of modes."""
self.meta = None
self.n_clusters = n_clusters
self.eps = eps
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
model = []
self.output_info = []
if self.act == 'tanh':
current = (current + 1) / 2
current = current * info['size']
current = np.round(current).clip(0, info['size'] - 1)
data_t[:, id_] = current
else:
current = data[:, :info['size']]
data = data[:, info['size']:]
idx = np.argmax(current, axis=1)
data_t[:, id_] = list(map(info['i2s'].__getitem__, idx))
return data_t
class GMMTransformer(Transformer):
"""
Continuous columns are modeled with a GMM.
and then normalized to a scalor [0, 1] and a n_cluster dimensional vector.
Discrete and ordinal columns are converted to a one-hot vector.
"""
def __init__(self, n_clusters=5):
self.meta = None
self.n_clusters = n_clusters
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
model = []
self.output_info = []
if self.column_index == []:
return data.astype('int')
data[:, self.column_index] = self.discretizer.transform(data[:, self.column_index])
return data.astype('int')
def inverse_transform(self, data):
if self.column_index == []:
return data
data = data.astype('float32')
data[:, self.column_index] = self.discretizer.inverse_transform(data[:, self.column_index])
return data
class GeneralTransformer(Transformer):
"""Continuous and ordinal columns are normalized to [0, 1].
Discrete columns are converted to a one-hot vector.
"""
def __init__(self, act='sigmoid'):
self.act = act
self.meta = None
self.output_dim = None
def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
self.meta = self.get_metadata(data, categorical_columns, ordinal_columns)
self.output_dim = 0
for info in self.meta:
if info['type'] in [CONTINUOUS, ORDINAL]:
self.output_dim += 1
else: