Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
features = []
labels = []
for index, cinfo in enumerate(self.columns):
col = data[:, index]
if cinfo['name'] == self.label_column:
if self.label_type == 'int':
labels = col.astype(int)
elif self.label_type == 'float':
labels = col.astype(float)
else:
assert 0, 'unkown label type'
continue
if cinfo['type'] == CONTINUOUS:
cmin = cinfo['min']
cmax = cinfo['max']
if cmin >= 0 and cmax >= 1e3:
feature = np.log(np.maximum(col, 1e-2))
else:
feature = (col - cmin) / (cmax - cmin) * 5
elif cinfo['type'] == ORDINAL:
feature = col
else:
if cinfo['size'] <= 2:
feature = col
else:
def sample(self, samples):
data = np.random.uniform(0, 1, (samples, self.shape[1]))
for i, c in enumerate(self.meta):
if c['type'] == CONTINUOUS:
data[:, i] = data[:, i] * (c['max'] - c['min']) + c['min']
else:
data[:, i] = (data[:, i] * (1 - 1e-8) * c['size']).astype('int32')
return data.astype(self.dtype)
def transform(self, data):
values = []
for id_, info in enumerate(self.meta):
current = data[:, id_]
if info['type'] == CONTINUOUS:
current = current.reshape([-1, 1])
means = self.model[id_].means_.reshape((1, self.n_clusters))
stds = np.sqrt(self.model[id_].covariances_).reshape((1, self.n_clusters))
features = (current - means) / (2 * stds)
probs = self.model[id_].predict_proba(current.reshape([-1, 1]))
argmax = np.argmax(probs, axis=1)
idx = np.arange((len(features)))
features = features[idx, argmax].reshape([-1, 1])
features = np.clip(features, -.99, .99)
values += [features, probs]
else:
col_t = np.zeros([len(data), info['size']])
def sample(self, samples):
data = np.zeros([samples, len(self.meta)], self.dtype)
for i, info in enumerate(self.meta):
if info['type'] == CONTINUOUS:
x, _ = self.models[i].sample(samples)
np.random.shuffle(x)
data[:, i] = x.reshape([samples])
data[:, i] = data[:, i].clip(info['min'], info['max'])
else:
size = len(self.models[i])
data[:, i] = np.random.choice(np.arange(size), samples, p=self.models[i])
return data