Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
discType = self.classDiscretization
classVar = self.originalData.domain.classVar
if discType == 2:
try:
content = str(self.classCustomLineEdit.text()).replace(":", " ").replace(",", " ").replace("-", " ").split()
customs = dict.fromkeys([float(x) for x in content]).keys() # remove duplicates (except 8.0, 8.000 ...)
customs.sort()
except:
customs = []
if not customs:
discType = 0
if discType == 0:
discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
elif discType == 1:
discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
else:
discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)
self.data = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
classVar = self.originalData.domain.classVar
if discType == 2:
try:
content = self.customClassSplits.replace(":", " ").replace(",", " ").replace("-", " ").split()
customs = dict.fromkeys([float(x) for x in content]).keys() # remove duplicates (except 8.0, 8.000 ...)
customs.sort()
except:
customs = []
if not customs:
discType = 0
try:
if discType == 0:
discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
elif discType == 1:
discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
else:
discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)
self.discClassData = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
if self.data:
self.data = self.discClassData
# else, the data has no continuous attributes other then the class
self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
self.error(0)
self.warning(0)
return True
except:
if self.data:
classVar = self.originalData.domain.classVar
if discType == 2:
try:
content = self.customClassSplits.replace(":", " ").replace(",", " ").replace("-", " ").split()
customs = dict.fromkeys([float(x) for x in content]).keys() # remove duplicates (except 8.0, 8.000 ...)
customs.sort()
except:
customs = []
if not customs:
discType = 0
try:
if discType == 0:
discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
elif discType == 1:
discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
else:
discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)
self.discClassData = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
if self.data:
self.data = self.discClassData
# else, the data has no continuous attributes other then the class
self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
self.error(0)
self.warning(0)
return True
except:
if self.data:
classVar = self.originalData.domain.classVar
if discType == 2:
try:
content = self.customClassSplits.replace(":", " ").replace(",", " ").replace("-", " ").split()
customs = dict.fromkeys([float(x) for x in content]).keys() # remove duplicates (except 8.0, 8.000 ...)
customs.sort()
except:
customs = []
if not customs:
discType = 0
try:
if discType == 0:
discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
elif discType == 1:
discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
else:
discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)
self.discClassData = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
if self.data:
self.data = self.discClassData
# else, the data has no continuous attributes other then the class
self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
self.error(0)
self.warning(0)
return True
except:
if self.data:
newattrs = [disc(attr, data) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])
for attr in newattrs:
print "%s: %s" % (attr.name, attr.values)
print
for attr in newattrs:
print "%15s: first interval at %5.3f, step %5.3f" % (attr.name, attr.getValueFrom.transformer.firstCut, attr.getValueFrom.transformer.step)
print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
print
print "\n\nQuartile discretization"
disc = orange.EquiNDiscretization(numberOfIntervals = 6)
newattrs = [disc(attr, data) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])
for attr in newattrs:
print "%s: %s" % (attr.name, attr.values)
print
for attr in newattrs:
print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
print
print "\nManual construction of EquiDistDiscretizer - all attributes"
edisc = orange.EquiDistDiscretizer(firstCut = 2.0, step = 1.0, numberOfIntervals = 5)
newattrs = [edisc.constructVariable(attr) for attr in data.domain.attributes]
discType = self.discretization+1
intervals = self.intervals
discName = "%s ->%s)" % (self.shortDiscNames[indiData[0]][:-1], self.shortDiscNames[discType][2:-1])
defaultUsed = True
if onlyDefaults and not defaultUsed:
return
discType -= 1
try:
if discType == self.D_LEAVE: # leave continuous
discretizer = None
elif discType == self.D_ENTROPY:
discretizer = orange.EntropyDiscretization(attr, self.data)
elif discType == self.D_FREQUENCY:
discretizer = orange.EquiNDiscretization(attr, self.data, numberOfIntervals = intervals)
elif discType == self.D_WIDTH:
discretizer = orange.EquiDistDiscretization(attr, self.data, numberOfIntervals = intervals)
elif discType == self.D_REMOVE:
discretizer = False
else:
discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(attr)
except:
discretizer = False
self.discretizers[idx] = discretizer
if discType == self.D_LEAVE:
discInts = ""
elif discType == self.D_REMOVE:
discInts = ""
def computeMatrix(self):
self.error(0)
if self.data:
atts = self.data.domain.attributes
if len(atts) < 2:
self.error(0, "Dataset must contain at least two attributes")
return None
matrix = orange.SymMatrix(len(atts))
matrix.setattr('items', atts)
if self.classInteractions < 3:
if self.data.domain.hasContinuousAttributes():
if self.discretizedData is None:
try:
self.discretizedData = orange.Preprocessor_discretize(self.data, method=orange.EquiNDiscretization(numberOfIntervals=4))
except orange.KernelException, ex:
self.error(0, "An error ocured during data discretization: %s" % ex.message)
return None
data = self.discretizedData
else:
data = self.data
# This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares
# some common stuff for all measures. If we want to use his code, we need the class variable, so we
# prepare a fake one
if not data.domain.classVar:
if self.classInteractions == 0:
classedDomain = orange.Domain(data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"]))
data = orange.ExampleTable(classedDomain, data)
else:
self.error(0, "The selected distance measure requires a data set with a class attribute")
# Description: Creates a list of association rules, selects five rules and prints them out
# Category: description
# Uses: imports-85
# Classes: orngAssoc.build, Preprocessor_discretize, EquiNDiscretization
# Referenced: assoc.htm
import orange, orngAssoc
data = orange.ExampleTable("imports-85")
data = orange.Preprocessor_discretize(data, \
method=orange.EquiNDiscretization(numberOfIntervals=3))
data = data.select(range(10))
rules = orange.AssociationRulesInducer(data, support=0.4)
print "%i rules with support higher than or equal to %5.3f found.\n" % (len(rules), 0.4)
orngAssoc.sort(rules, ["support", "confidence"])
orngAssoc.printRules(rules[:5], ["support", "confidence"])
print
del rules[:3]
orngAssoc.printRules(rules[:5], ["support", "confidence"])
print
# Description: Attribute-based discretization. Shows how different attributes may be discretized with different categorization methods. Also shows how the resulting domain is put together using orange.select.
# Category: preprocessing
# Uses: iris
# Classes: EquiNDiscretization, EntropyDiscretization
# Referenced: o_categorization.htm
def printexamples(data, inxs, msg="First %i examples"):
print msg % len(inxs)
for i in inxs:
print i, data[i]
print
import orange
iris = orange.ExampleTable("iris")
equiN = orange.EquiNDiscretization(numberOfIntervals=4)
entropy = orange.EntropyDiscretization()
pl = equiN("petal length", iris)
sl = equiN("sepal length", iris)
sl_ent = entropy("sepal length", iris)
inxs = [0, 15, 35, 50, 98]
d_iris = iris.select(["sepal width", pl, "sepal length",sl, sl_ent, iris.domain.classVar])
printexamples(iris, inxs, "%i examples before discretization")
printexamples(d_iris, inxs, "%i examples before discretization")
# Description: Cloning of association rules, filtering
# Category: description
# Uses: imports-85
# Classes: orngAssoc.build, Preprocessor_discretize, EquiNDiscretization
# Referenced: assoc.htm
import orange, orngAssoc
data = orange.ExampleTable("imports-85")
data = orange.Preprocessor_discretize(data, \
method=orange.EquiNDiscretization(numberOfIntervals=3))
data = data.select(range(10))
minSupport = 0.2
rules = orngAssoc.build(data, minSupport)
print "%i rules with support higher than or equal to %5.3f found.\n" % (len(rules), minSupport)
rules2 = rules.clone()
rules2.sortByConfidence()
n = 5
print "Best %i rules:" % n
subset = rules[:n]
subset.printMeasures(['support','confidence'])