Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
class QuartileDiscretizer(BaseDiscretizer):
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None):
BaseDiscretizer.__init__(self, data, categorical_features,
feature_names, labels=labels,
random_state=random_state)
def bins(self, data, labels):
bins = []
for feature in self.to_discretize:
qts = np.array(np.percentile(data[:, feature], [25, 50, 75]))
bins.append(qts)
return bins
class DecileDiscretizer(BaseDiscretizer):
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None):
BaseDiscretizer.__init__(self, data, categorical_features,
feature_names, labels=labels,
random_state=random_state)
def bins(self, data, labels):
bins = []
for feature in self.to_discretize:
qts = np.array(np.percentile(data[:, feature],
[10, 20, 30, 40, 50, 60, 70, 80, 90]))
bins.append(qts)
return bins
class EntropyDiscretizer(BaseDiscretizer):
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None):
class DecileDiscretizer(BaseDiscretizer):
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None):
BaseDiscretizer.__init__(self, data, categorical_features,
feature_names, labels=labels,
random_state=random_state)
def bins(self, data, labels):
bins = []
for feature in self.to_discretize:
qts = np.array(np.percentile(data[:, feature],
[10, 20, 30, 40, 50, 60, 70, 80, 90]))
bins.append(qts)
return bins
class EntropyDiscretizer(BaseDiscretizer):
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None):
if(labels is None):
raise ValueError('Labels must be not None when using \
EntropyDiscretizer')
BaseDiscretizer.__init__(self, data, categorical_features,
feature_names, labels=labels,
random_state=random_state)
def bins(self, data, labels):
bins = []
for feature in self.to_discretize:
# Entropy splitting / at most 8 bins so max_depth=3
dt = sklearn.tree.DecisionTreeClassifier(criterion='entropy',
max_depth=3,
random_state=self.random_state)
x = np.reshape(data[:, feature], (-1, 1))
if discretizer == 'quartile':
self.discretizer = QuartileDiscretizer(
training_data, self.categorical_features,
self.feature_names, labels=training_labels,
random_state=self.random_state)
elif discretizer == 'decile':
self.discretizer = DecileDiscretizer(
training_data, self.categorical_features,
self.feature_names, labels=training_labels,
random_state=self.random_state)
elif discretizer == 'entropy':
self.discretizer = EntropyDiscretizer(
training_data, self.categorical_features,
self.feature_names, labels=training_labels,
random_state=self.random_state)
elif isinstance(discretizer, BaseDiscretizer):
self.discretizer = discretizer
else:
raise ValueError('''Discretizer must be 'quartile',''' +
''' 'decile', 'entropy' or a''' +
''' BaseDiscretizer instance''')
self.categorical_features = list(range(training_data.shape[1]))
# Get the discretized_training_data when the stats are not provided
if(self.training_data_stats is None):
discretized_training_data = self.discretizer.discretize(
training_data)
if kernel_width is None:
kernel_width = np.sqrt(training_data.shape[1]) * .75
kernel_width = float(kernel_width)
def undiscretize(self, data):
ret = data.copy()
for feature in self.means:
if len(data.shape) == 1:
ret[feature] = self.get_undiscretize_values(
feature, ret[feature].astype(int).reshape(-1, 1)
)
else:
ret[:, feature] = self.get_undiscretize_values(
feature, ret[:, feature].astype(int)
)
return ret
class StatsDiscretizer(BaseDiscretizer):
"""
Class to be used to supply the data stats info when discretize_continuous is true
"""
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None,
data_stats=None):
BaseDiscretizer.__init__(self, data, categorical_features,
feature_names, labels=labels,
random_state=random_state,
data_stats=data_stats)
def bins(self, data, labels):
bins_from_stats = self.data_stats.get("bins")
bins = []
if bins_from_stats is not None:
random_state=random_state,
data_stats=data_stats)
def bins(self, data, labels):
bins_from_stats = self.data_stats.get("bins")
bins = []
if bins_from_stats is not None:
for feature in self.to_discretize:
bins_from_stats_feature = bins_from_stats.get(feature)
if bins_from_stats_feature is not None:
qts = np.array(bins_from_stats_feature)
bins.append(qts)
return bins
class QuartileDiscretizer(BaseDiscretizer):
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None):
BaseDiscretizer.__init__(self, data, categorical_features,
feature_names, labels=labels,
random_state=random_state)
def bins(self, data, labels):
bins = []
for feature in self.to_discretize:
qts = np.array(np.percentile(data[:, feature], [25, 50, 75]))
bins.append(qts)
return bins
class DecileDiscretizer(BaseDiscretizer):
def __init__(self, data, categorical_features, feature_names, labels=None, random_state=None):