How to use the orange.Distribution function in Orange

To help you get started, we’ve selected a few Orange examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github biolab / orange2 / orange / orng / orngVizRank.py View on Github external
prediction[res.actualClass] += res.classes[0]==res.actualClass
                countsByFold[res.iterationNumber] += 1
            prediction = [val*100.0 for val in prediction]
        elif self.qualityMeasure == AUC:
            aucResult = orngStat.AUC(results)
            if aucResult:
                return aucResult[0], None
            else:
                return 0, None

        # compute accuracy only for classes that are selected as interesting. other class values do not participate in projection evaluation
        acc = sum(prediction) / float(max(1, len(results.results)))                 # accuracy over all class values
        classes = self.selectedClasses or range(len(self.graph.data_domain.classVar.values))
        val = sum([prediction[index] for index in classes])    # accuracy over all selected classes

        currentClassDistribution = [int(v) for v in orange.Distribution(table.domain.classVar, table)]
        s = sum([currentClassDistribution[index] for index in classes])

        prediction = [prediction[i] / float(max(1, currentClassDistribution[i])) for i in range(len(prediction))] # turn to probabilities
        
        return val/max(1, float(s)), (acc, prediction, list(currentClassDistribution))
github biolab / orange2 / orange / OrangeWidgets / Associate / OWSOMVisualizer.py View on Github external
def updateHistogramColors(self):
        if self.parent().drawPies():
            return
        attr=self.somMap.examples.domain.variables[self.parent().attribute]
        for n in self.canvasObj:
            if n.hasNode:
                if attr.varType==orange.VarTypes.Discrete:
                    if self.parent().inputSet:
                        dist=orange.Distribution(attr, n.node.mappedExamples)
                    else:
                        dist=orange.Distribution(attr, n.node.examples)
                    colors=OWColorPalette.ColorPaletteHSV(len(dist))
                    maxProb=max(dist)
                    majValInd=filter(lambda i:dist[i]==maxProb, range(len(dist)))[0]
                    if self.parent().discHistMode==1:
                        n.histObj[0].setBrush(QBrush(colors[majValInd]))
                    elif self.parent().discHistMode==2:
                        light=180-80*float(dist[majValInd])/max(sum(dist),1)
                        n.histObj[0].setBrush(QBrush(colors[majValInd].light(light)))
                else:
                    if self.parent().inputSet:
                        dist=orange.Distribution(attr, n.node.mappedExamples)
                        fullDist=orange.Distribution(attr, self.parent().examples)
                    else:
                        dist=orange.Distribution(attr, n.node.examples)
                        fullDist=orange.Distribution(attr, self.somMap.examples)
                    if len(dist)==0:
github biolab / orange2 / orange / orng / orngMosaic.py View on Github external
d = self.data.select([newFeature, self.data.domain.classVar])     # create a dataset that has only this new feature and class info

            if not self.cvIndices:
                if self.testingMethod == PROPORTION_TEST:
                    pick = orange.MakeRandomIndices2(stratified = orange.MakeRandomIndices.StratifiedIfPossible, p0 = 0.7, randomGenerator = 0)
                    self.cvIndices = [pick(d) for i in range(10)]
                elif self.testingMethod == CROSSVALIDATION:
                    ind = orange.MakeRandomIndicesCV(d, 10, randomGenerator = 0, stratified = orange.MakeRandomIndices.StratifiedIfPossible)
                    self.cvIndices = [[val == i for val in ind] for i in range(10)]

            acc = 0.0; count = 0
            for ind in self.cvIndices:
                learnset = d.selectref(ind, 0)
                testset = d.selectref(ind, 1)
                learnDist = orange.Distribution(d.domain.classVar, learnset)
                newFeatureDist = orange.Distribution(newFeature, testset)
                learnConts = orange.ContingencyAttrClass(newFeature, learnset)
                testConts  = orange.ContingencyAttrClass(newFeature, testset)
                for val in testConts.keys():
                    s = sum(learnConts[val])
                    if not s: continue
                    learnClassProb = [v/float(s) for v in learnConts[val]]      # class distribution for each class value (on learning set)
                    testClassDist = [v for v in testConts[val]]                 # number of examples for each class value (on testing set)
                    for i in range(len(testClassDist)):
                        acc   += learnClassProb[i] * testClassDist[i]
                        count += testClassDist[i]
            retVal = 100*acc / max(1, float(count))

        del newFeature, quality
        return retVal
github biolab / orange2 / orange / OrangeWidgets / Associate / OWSOMVisualizer.py View on Github external
if self.parent().inputSet:
                        dist=orange.Distribution(attr, n.node.mappedExamples)
                    else:
                        dist=orange.Distribution(attr, n.node.examples)
                    colors=OWColorPalette.ColorPaletteHSV(len(dist))
                    maxProb=max(dist)
                    majValInd=filter(lambda i:dist[i]==maxProb, range(len(dist)))[0]
                    if self.parent().discHistMode==1:
                        n.histObj[0].setBrush(QBrush(colors[majValInd]))
                    elif self.parent().discHistMode==2:
                        light=180-80*float(dist[majValInd])/max(sum(dist),1)
                        n.histObj[0].setBrush(QBrush(colors[majValInd].light(light)))
                else:
                    if self.parent().inputSet:
                        dist=orange.Distribution(attr, n.node.mappedExamples)
                        fullDist=orange.Distribution(attr, self.parent().examples)
                    else:
                        dist=orange.Distribution(attr, n.node.examples)
                        fullDist=orange.Distribution(attr, self.somMap.examples)
                    if len(dist)==0:
                        continue

                    if self.parent().contHistMode==0:
                        n.histObj[0].setBrush(QBrush(DefColor))
                    if self.parent().contHistMode==1:
                        std=(dist.average()-fullDist.average())/max(fullDist.dev(),1)
                        std=min(max(std,-1),1)
                        #print std
                        n.histObj[0].setBrush(QBrush(QColor(70*(std+1)+50, 70*(std+1)+50, 0)))
                    if self.parent().contHistMode==2:
                        light = 300-200*dist.var()/fullDist.var()
                        n.histObj[0].setBrush(QBrush(QColor(0,0,20).light(light)))
github biolab / orange2 / orange / orng / orngInteractions.py View on Github external
def entropy(x, data):
    """entropy of an attribute x from dataset data"""
    if type(x)==orange.EnumVariable:
        return _entropy(p2f(orange.Distribution(x, data)))
    if type(x)==list:
        if len(x)==2: # joint entropy of a pair of attributes
            c = orange.ContingencyAttrAttr(x, y, data)
            return _entropy(p2f(flatten(c)))
        else: # joint entropy of for a set of attributes
            pass
github biolab / orange2 / Orange / OrangeWidgets / OWClusterOptimization.py View on Github external
diffClass.append(d)
            diffClass.sort()
            dist = sum(diffClass[:5]) / float(len(diffClass[:5]))

            """
            # one way of computing the value
            area = sqrt(sqrt(areaDict[key]))
            if area > 0: value = points * dist / area
            else: value = 0
            """

            # another way of computing value
            #value = points * dist / aveDistDict[key]

            if self.distributionScale:
                d = orange.Distribution(graph.objects.domain.classVar, graph.objects)
                v = d[graph.objects[polygonVerticesDict[key][0]].getclass()]
                if v == 0: continue
                points *= sum(d) / float(v)   # turn the number of points into a percentage of all points that belong to this class value and then multiply by the number of all data points in the data set

            # and another
            #dist = sqrt(dist*1000.0)/sqrt(aveDistDict[key]*1000.0)
            dist = sqrt(dist*1000.0)
            value = points
            if self.considerDistance: value *= dist

            valueDict[key] = value
            #enlargedClosureDict[key] = enlargeClosure(graph, closureDict[key], aveDistDict[key])
            enlargedClosureDict[key] = []

            #otherDict[key] = (graph.objects[polygonVerticesDict[key][0]].getclass(), value, points, dist, area)
            #otherDict[key] = (graph.objects[polygonVerticesDict[key][0]].getclass().value, value, points, dist, aveDistDict[key])
github biolab / orange2 / Orange / orng / orngVizRank.py View on Github external
if not results.results or not results.results[0].probabilities[0]: return 0, 0
                for res in results.results:  val += res.probabilities[0].density(res.actualClass)
                if len(results.results) > 0: val/= float(len(results.results))
                return 100.0*val, (100.0*val)

        # ###############################
        # do we want to use very fast heuristic
        # ###############################
        elif self.evaluationAlgorithm == ALGORITHM_HEURISTIC:
            # if input attributes are continuous (may be discrete for evaluating scatterplots, where we dicretize the whole domain...)
            if testTable.domain[0].varType == orange.VarTypes.Continuous and testTable.domain[1].varType == orange.VarTypes.Continuous:
                discX = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
                discY = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
                testTable = testTable.select([discX, discY, testTable.domain.classVar])

            currentClassDistribution = [int(v) for v in orange.Distribution(testTable.domain.classVar, testTable)]
            prediction = [0.0 for i in range(len(testTable.domain.classVar.values))]

            # create a new attribute that is a cartesian product of the two visualized attributes
            nattr = orange.EnumVariable(values=[str(i) for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS)])
            nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1])
            for i in range(len(nattr.getValueFrom.lookupTable)): nattr.getValueFrom.lookupTable[i] = i

            for dist in orange.ContingencyAttrClass(nattr, testTable):
                dist = list(dist)
                if sum(dist) == 0: continue
                m = max(dist)
                prediction[dist.index(m)] += m * m / float(sum(dist))

            prediction = [val*100.0 for val in prediction]             # turn prediction array into percents
            acc = sum(prediction) / float(max(1, len(testTable)))               # compute accuracy for all classes
            val = 0.0; s = 0.0
github biolab / orange2 / orange / orng / orngVizRank.py View on Github external
if not results.results or not results.results[0].probabilities[0]: return 0, 0
                for res in results.results:  val += res.probabilities[0].density(res.actualClass)
                if len(results.results) > 0: val/= float(len(results.results))
                return 100.0*val, (100.0*val)

        # ###############################
        # do we want to use very fast heuristic
        # ###############################
        elif self.evaluationAlgorithm == ALGORITHM_HEURISTIC:
            # if input attributes are continuous (may be discrete for evaluating scatterplots, where we dicretize the whole domain...)
            if testTable.domain[0].varType == orange.VarTypes.Continuous and testTable.domain[1].varType == orange.VarTypes.Continuous:
                discX = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
                discY = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
                testTable = testTable.select([discX, discY, testTable.domain.classVar])

            currentClassDistribution = [int(v) for v in orange.Distribution(testTable.domain.classVar, testTable)]
            prediction = [0.0 for i in range(len(testTable.domain.classVar.values))]

            # create a new attribute that is a cartesian product of the two visualized attributes
            nattr = orange.EnumVariable(values=[str(i) for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS)])
            nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1])
            for i in range(len(nattr.getValueFrom.lookupTable)): nattr.getValueFrom.lookupTable[i] = i

            for dist in orange.ContingencyAttrClass(nattr, testTable):
                dist = list(dist)
                if sum(dist) == 0: continue
                m = max(dist)
                prediction[dist.index(m)] += m * m / float(sum(dist))

            prediction = [val*100.0 for val in prediction]             # turn prediction array into percents
            acc = sum(prediction) / float(max(1, len(testTable)))               # compute accuracy for all classes
            val = 0.0; s = 0.0