How to use the orange.EquiNDiscretization function in Orange

To help you get started, we’ve selected a few Orange examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biolab / orange2 / orange / OrangeWidgets / Data / OWInteractiveDiscretization.py View on Github external
discType = self.classDiscretization
            classVar = self.originalData.domain.classVar
            
            if discType == 2:
                try:
                    content = str(self.classCustomLineEdit.text()).replace(":", " ").replace(",", " ").replace("-", " ").split()
                    customs = dict.fromkeys([float(x) for x in content]).keys()  # remove duplicates (except 8.0, 8.000 ...)
                    customs.sort()
                except:
                    customs = []

                if not customs:
                    discType = 0

            if discType == 0:
                discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)

            elif discType == 1:
                discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)

            else:
                discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)

            self.data = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
            
            self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
github biolab / orange2 / orange / OrangeWidgets / Prototypes / OWDiscretizeQt.py View on Github external
classVar = self.originalData.domain.classVar

            if discType == 2:
                try:
                    content = self.customClassSplits.replace(":", " ").replace(",", " ").replace("-", " ").split()
                    customs = dict.fromkeys([float(x) for x in content]).keys()  # remove duplicates (except 8.0, 8.000 ...)
                    customs.sort()
                except:
                    customs = []

                if not customs:
                    discType = 0

            try:
                if discType == 0:
                    discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
                elif discType == 1:
                    discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
                else:
                    discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)

                self.discClassData = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
                if self.data:
                    self.data = self.discClassData
                # else, the data has no continuous attributes other then the class

                self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
                self.error(0)
                self.warning(0)
                return True
            except:
                if self.data:
github biolab / orange2 / Orange / OrangeWidgets / Data / OWDiscretize.py View on Github external
classVar = self.originalData.domain.classVar

            if discType == 2:
                try:
                    content = self.customClassSplits.replace(":", " ").replace(",", " ").replace("-", " ").split()
                    customs = dict.fromkeys([float(x) for x in content]).keys()  # remove duplicates (except 8.0, 8.000 ...)
                    customs.sort()
                except:
                    customs = []

                if not customs:
                    discType = 0

            try:
                if discType == 0:
                    discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
                elif discType == 1:
                    discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
                else:
                    discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)

                self.discClassData = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
                if self.data:
                    self.data = self.discClassData
                # else, the data has no continuous attributes other then the class

                self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
                self.error(0)
                self.warning(0)
                return True
            except:
                if self.data:
github biolab / orange2 / Orange / OrangeWidgets / VisualizeQt / OWDiscretizeQt.py View on Github external
classVar = self.originalData.domain.classVar

            if discType == 2:
                try:
                    content = self.customClassSplits.replace(":", " ").replace(",", " ").replace("-", " ").split()
                    customs = dict.fromkeys([float(x) for x in content]).keys()  # remove duplicates (except 8.0, 8.000 ...)
                    customs.sort()
                except:
                    customs = []

                if not customs:
                    discType = 0

            try:
                if discType == 0:
                    discretizer = orange.EquiNDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
                elif discType == 1:
                    discretizer = orange.EquiDistDiscretization(classVar, self.originalData, numberOfIntervals = self.classIntervals)
                else:
                    discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(classVar)

                self.discClassData = orange.ExampleTable(orange.Domain(self.originalData.domain.attributes, discretizer), self.originalData)
                if self.data:
                    self.data = self.discClassData
                # else, the data has no continuous attributes other then the class

                self.classIntervalsLabel.setText("Current splits: " + ", ".join([str(classVar(x)) for x in discretizer.getValueFrom.transformer.points]))
                self.error(0)
                self.warning(0)
                return True
            except:
                if self.data:
github biolab / orange2 / orange / doc / reference / discretization.py View on Github external
newattrs = [disc(attr, data) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])

for attr in newattrs:
    print "%s: %s" % (attr.name, attr.values)
print

for attr in newattrs:
    print "%15s: first interval at %5.3f, step %5.3f" % (attr.name, attr.getValueFrom.transformer.firstCut, attr.getValueFrom.transformer.step)
    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
print



print "\n\nQuartile discretization"
disc = orange.EquiNDiscretization(numberOfIntervals = 6)
newattrs = [disc(attr, data) for attr in data.domain.attributes]
data2 = data.select(newattrs + [data.domain.classVar])

for attr in newattrs:
    print "%s: %s" % (attr.name, attr.values)
print

for attr in newattrs:
    print " "*17 + "cutoffs at " + ", ".join(["%5.3f" % x for x in attr.getValueFrom.transformer.points])
print



print "\nManual construction of EquiDistDiscretizer - all attributes"
edisc = orange.EquiDistDiscretizer(firstCut = 2.0, step = 1.0, numberOfIntervals = 5)
newattrs = [edisc.constructVariable(attr) for attr in data.domain.attributes]
github biolab / orange2 / Orange / OrangeWidgets / VisualizeQt / OWDiscretizeQt.py View on Github external
discType = self.discretization+1
                intervals = self.intervals
                discName = "%s ->%s)" % (self.shortDiscNames[indiData[0]][:-1], self.shortDiscNames[discType][2:-1])
                defaultUsed = True

        if onlyDefaults and not defaultUsed:
            return

        discType -= 1
        try:
            if discType == self.D_LEAVE: # leave continuous
                discretizer = None
            elif discType == self.D_ENTROPY:
                discretizer = orange.EntropyDiscretization(attr, self.data)
            elif discType == self.D_FREQUENCY:
                discretizer = orange.EquiNDiscretization(attr, self.data, numberOfIntervals = intervals)
            elif discType == self.D_WIDTH:
                discretizer = orange.EquiDistDiscretization(attr, self.data, numberOfIntervals = intervals)
            elif discType == self.D_REMOVE:
                discretizer = False
            else:
                discretizer = orange.IntervalDiscretizer(points = customs).constructVariable(attr)
        except:
            discretizer = False


        self.discretizers[idx] = discretizer

        if discType == self.D_LEAVE:
            discInts = ""
        elif discType == self.D_REMOVE:
            discInts = ""
github biolab / orange2 / Orange / OrangeWidgets / Prototypes / OWAttributeDistance.py View on Github external
def computeMatrix(self):
        self.error(0)
        if self.data:
            atts = self.data.domain.attributes
            if len(atts) < 2:
                self.error(0, "Dataset must contain at least two attributes")
                return None
            matrix = orange.SymMatrix(len(atts))
            matrix.setattr('items', atts)
            if self.classInteractions < 3:
                if self.data.domain.hasContinuousAttributes():
                    if self.discretizedData is None:
                        try:
                            self.discretizedData = orange.Preprocessor_discretize(self.data, method=orange.EquiNDiscretization(numberOfIntervals=4))
                        except orange.KernelException, ex:
                            self.error(0, "An error ocured during data discretization: %s" % ex.message)
                            return None
                    data = self.discretizedData
                else:
                    data = self.data

                # This is ugly, but: Aleks' code which computes Chi2 requires the class attribute because it prepares
                # some common stuff for all measures. If we want to use his code, we need the class variable, so we
                # prepare a fake one
                if not data.domain.classVar:
                    if self.classInteractions == 0:
                        classedDomain = orange.Domain(data.domain.attributes, orange.EnumVariable("foo", values=["0", "1"]))
                        data = orange.ExampleTable(classedDomain, data)
                    else:
                        self.error(0, "The selected distance measure requires a data set with a class attribute")
github biolab / orange2 / orange / doc / ofb / assoc1.py View on Github external
# Description: Creates a list of association rules, selects five rules and prints them out
# Category:    description
# Uses:        imports-85
# Classes:     orngAssoc.build, Preprocessor_discretize, EquiNDiscretization
# Referenced:  assoc.htm

import orange, orngAssoc

data = orange.ExampleTable("imports-85")
data = orange.Preprocessor_discretize(data, \
  method=orange.EquiNDiscretization(numberOfIntervals=3))
data = data.select(range(10))

rules = orange.AssociationRulesInducer(data, support=0.4)

print "%i rules with support higher than or equal to %5.3f found.\n" % (len(rules), 0.4)

orngAssoc.sort(rules, ["support", "confidence"])

orngAssoc.printRules(rules[:5], ["support", "confidence"])
print

del rules[:3]
orngAssoc.printRules(rules[:5], ["support", "confidence"])
print
github biolab / orange2 / docs / tutorial / rst / code / disc2.py View on Github external
# Description: Attribute-based discretization. Shows how different attributes may be discretized with different categorization methods. Also shows how the resulting domain is put together using orange.select.
# Category:    preprocessing
# Uses:        iris
# Classes:     EquiNDiscretization, EntropyDiscretization
# Referenced:  o_categorization.htm

def printexamples(data, inxs, msg="First %i examples"):
  print msg % len(inxs)
  for i in inxs:
    print i, data[i]
  print

import orange
iris = orange.ExampleTable("iris")

equiN = orange.EquiNDiscretization(numberOfIntervals=4)
entropy = orange.EntropyDiscretization()

pl = equiN("petal length", iris)
sl = equiN("sepal length", iris)
sl_ent = entropy("sepal length", iris)

inxs = [0, 15, 35, 50, 98]
d_iris = iris.select(["sepal width", pl, "sepal length",sl, sl_ent, iris.domain.classVar])
printexamples(iris, inxs, "%i examples before discretization")
printexamples(d_iris, inxs, "%i examples before discretization")
github biolab / orange2 / orange / doc / ofb / assoc3.py View on Github external
# Description: Cloning of association rules, filtering
# Category:    description
# Uses:        imports-85
# Classes:     orngAssoc.build, Preprocessor_discretize, EquiNDiscretization
# Referenced:  assoc.htm

import orange, orngAssoc

data = orange.ExampleTable("imports-85")
data = orange.Preprocessor_discretize(data, \
  method=orange.EquiNDiscretization(numberOfIntervals=3))
data = data.select(range(10))

minSupport = 0.2
rules = orngAssoc.build(data, minSupport)
print "%i rules with support higher than or equal to %5.3f found.\n" % (len(rules), minSupport)

rules2 = rules.clone()
rules2.sortByConfidence()

n = 5
print "Best %i rules:" % n
subset = rules[:n]
subset.printMeasures(['support','confidence'])