Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# ###############################
# do we want to use very fast heuristic
# ###############################
elif self.evaluationAlgorithm == ALGORITHM_HEURISTIC:
# if input attributes are continuous (may be discrete for evaluating scatterplots, where we dicretize the whole domain...)
if testTable.domain[0].varType == orange.VarTypes.Continuous and testTable.domain[1].varType == orange.VarTypes.Continuous:
discX = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
discY = orange.EquiDistDiscretization(testTable.domain[0], testTable, numberOfIntervals = NUMBER_OF_INTERVALS)
testTable = testTable.select([discX, discY, testTable.domain.classVar])
currentClassDistribution = [int(v) for v in orange.Distribution(testTable.domain.classVar, testTable)]
prediction = [0.0 for i in range(len(testTable.domain.classVar.values))]
# create a new attribute that is a cartesian product of the two visualized attributes
nattr = orange.EnumVariable(values=[str(i) for i in range(NUMBER_OF_INTERVALS*NUMBER_OF_INTERVALS)])
nattr.getValueFrom = orange.ClassifierByLookupTable2(nattr, testTable.domain[0], testTable.domain[1])
for i in range(len(nattr.getValueFrom.lookupTable)): nattr.getValueFrom.lookupTable[i] = i
for dist in orange.ContingencyAttrClass(nattr, testTable):
dist = list(dist)
if sum(dist) == 0: continue
m = max(dist)
prediction[dist.index(m)] += m * m / float(sum(dist))
prediction = [val*100.0 for val in prediction] # turn prediction array into percents
acc = sum(prediction) / float(max(1, len(testTable))) # compute accuracy for all classes
val = 0.0; s = 0.0
for index in self.selectedClasses: # compute accuracy for selected classes
val += prediction[index]
s += currentClassDistribution[index]
for i in range(len(prediction)):
# Description: Shows how to construct an orange.ExampleTable out of nothing
# Category: basic classes
# Classes: ExampleTable, Domain
# Uses:
# Referenced: ExampleTable.htm
import orange, random
random.seed(0)
card = [3, 3, 2, 3, 4, 2]
values = ["1", "2", "3", "4"]
attributes = [orange.EnumVariable(chr(97+i), values = values[:card[i]])
for i in range(6)]
classattr = orange.EnumVariable("y", values = ["0", "1"])
domain = orange.Domain(attributes + [classattr])
data = orange.ExampleTable(domain)
for i in range(5):
ex = [random.randint(0, c-1) for c in card]
ex.append(ex[0]==ex[1] or ex[4]==0)
data.append(ex)
for ex in data:
print ex
loe = [
["3", "1", "1", "2", "1", "1", "1"],
# Description: Manual categorization of continuous attributes.
# Category: preprocessing
# Uses: iris
# Classes: ClassifierFromVar, IntervalDiscretizer, getValueFrom
# Referenced: o_categorization.htm
import orange
def printexamples(data, inxs, msg="First %i examples"):
print msg % len(inxs)
for i in inxs:
print data[i]
print
iris = orange.ExampleTable("iris")
pl = orange.EnumVariable("pl")
getValue = orange.ClassifierFromVar()
getValue.whichVar = iris.domain["petal length"]
getValue.classVar = pl
getValue.transformer = orange.IntervalDiscretizer()
getValue.transformer.points = [2.0, 4.0]
pl.getValueFrom = getValue
pl.values = ['low', 'medium', 'high']
d_iris = iris.select(["petal length", pl, iris.domain.classVar])
printexamples(d_iris, [0, 15, 35, 50, 98], "%i examples after discretization")
for i in range(len(all_attributes)):
for j in t:
if j[i].isSpecial():
special_attributes.append(i)
break
# create new attributes
if len(special_attributes) > 0:
# prepare attributes
newatts = []
for i in range(len(all_attributes)):
old = all_attributes[i]
if i in special_attributes:
oldv = [v for v in old.values]
assert('.' not in oldv)
new = orange.EnumVariable(name='M_'+old.name, values=oldv+['.'])
warnings.warn('Removing special values from %s into %s.'%(old.name,new.name))
newatts.append(new)
else:
newatts.append(old)
# convert table
exs = []
# 2006-08-23: added by PJ: add a class variable (if not already existing)
if not t.domain.classVar:
newatts.append(orange.EnumVariable("class", values=["."]))
t = orange.ExampleTable(orange.Domain(t.domain.attributes, newatts[-1]), t)
newd = orange.Domain(newatts)
for ex in t:
nex = []
for i in range(len(newatts)):
def mergeClassValues(data, value):
selection = orange.EnumVariable("Selection", values = ["0", "1"])
selectedClassesStr = [value]
nonSelectedClassesStr = []
for val in data.domain.classVar.values:
if val not in selectedClassesStr: nonSelectedClassesStr.append(val)
shortData1 = data.select({data.domain.classVar.name: selectedClassesStr})
shortData2 = data.select({data.domain.classVar.name: nonSelectedClassesStr})
d1 = orange.Domain(shortData1.domain.attributes + [selection])
selection.getValueFrom = lambda ex, what: orange.Value(selection, "0")
data1 = orange.ExampleTable(d1, shortData1)
selection.getValueFrom = lambda ex, what: orange.Value(selection, "1")
data2 = orange.ExampleTable(d1, shortData2)
data1.extend(data2)
return data1
def removeSelectedClassLabel(self):
index = self.selectedClassLabelIndex()
if index is not None and len(self.classValuesModel) > 1:
label = self.classValuesModel[index]
examples = [ex for ex in self.graph.data if str(ex.getclass()) != label]
values = [val for val in self.classValuesModel if val != label]
newclass = orange.EnumVariable("Class label", values=values)
newdomain = orange.Domain(self.graph.data.domain.attributes, newclass)
newdata = orange.ExampleTable(newdomain)
for ex in examples:
if ex[self.classVariable] != label and ex[self.classVariable] in values:
newdata.append(orange.Example(newdomain, [ex[a] for a in ex.domain.attributes] + [str(ex.getclass())]))
self.classVariable = newclass
self.classValuesModel.wrap(self.classVariable.values)
self.graph.data = newdata
self.graph.updateGraph()
newindex = self.classValuesModel.index(max(0, index - 1))
self.classValuesView.selectionModel().select(newindex, QItemSelectionModel.ClearAndSelect)
self.removeClassLabel.setEnabled(len(self.classValuesModel) > 1)
def __loadDataFromES(self, dataType, domain):
table = None
if dataType != "train":
table = orange.ExampleTable(domain)
else:
attributes = map(self.__getOrangeVariableForFeature, self.features)
classAttribute = orange.EnumVariable("is_good", values = ["0", "1"])
domain = orange.Domain(attributes, classAttribute)
domain.addmeta(orange.newmetaid(), orange.StringVariable("phrase"))
table = orange.ExampleTable(domain)
phrases = []
if dataType == "train":
phrasesCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}})
size = phrasesCount["count"]
phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_training":["1","0"]}}}, size=size)
phrases = phrases["hits"]["hits"]
elif dataType == "holdout":
phraseCount = self.esClient.count(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}})
size = phrasesCount["count"]
phrases = self.esClient.search(index=self.processorIndex, doc_type=self.processorPhraseType, body={"query":{"terms":{"is_holdout":["1","0"]}}}, size=size)
phrases = phrases["hits"]["hits"]
else:
self.phraseData = self.esClient.get(index=self.processorIndex, doc_type=self.processorPhraseType, id=self.phraseId)
def learnModel(self, X, y):
if numpy.unique(y).shape[0] != 2:
raise ValueError("Can only operate on binary data")
classes = numpy.unique(y)
self.worstResponse = classes[classes!=self.bestResponse][0]
#We need to convert y into indices
newY = self.labelsToInds(y)
XY = numpy.c_[X, newY]
attrList = []
for i in range(X.shape[1]):
attrList.append(orange.FloatVariable("X" + str(i)))
attrList.append(orange.EnumVariable("y"))
attrList[-1].addValue(str(self.bestResponse))
attrList[-1].addValue(str(self.worstResponse))
self.domain = orange.Domain(attrList)
eTable = orange.ExampleTable(self.domain, XY)
#Weight examples and equalise
#Equalizing computes such weights that the weighted number of examples
#in each class is equivalent.
preprocessor = orange.Preprocessor_addClassWeight(equalize=1)
preprocessor.classWeights = [1-self.weight, self.weight]
eTable, weightID = preprocessor(eTable)
eTable.domain.addmeta(weightID, orange.FloatVariable("w"))
self.learner = orngTree.TreeLearner(m_pruning=self.m, measure="gainRatio")
self.learner.max_depth = self.maxDepth
def sortAttrValues(self, attr, interattr=None):
if not interattr:
interattr = attr
newvalues = list(interattr.values)
newvalues.sort()
if newvalues == list(interattr.values):
return interattr
newattr = orange.EnumVariable(interattr.name, values=newvalues)
newattr.getValueFrom = orange.ClassifierByLookupTable(newattr, attr)
lookupTable = newattr.getValueFrom.lookupTable
distributions = newattr.getValueFrom.distributions
for val in interattr.values:
idx = attr.values.index(val)
lookupTable[idx] = val
distributions[idx][newvalues.index(val)] += 1
return newattr
newDomain.addmetas(newData.domain.getmetas())
finalData = orange.ExampleTable(newDomain,finalData)
newData = orange.ExampleTable(newDomain,origData)
origData = orange.ExampleTable(newDomain,origData)
for d in origData:
d[atDisc] = 0
for d in finalData:
d[atDisc] = 0
for i,d in enumerate(newData):
d[atDisc] = 1
d[at] = 0
d[weightID] = 100*data[i][weightID]
elif at.varType == orange.VarTypes.Discrete:
# v dataOrig, dataFinal in newData atributu "at" dodaj ee eno vreednost, ki ima vrednost kar ime atributa + "X"
atNew = orange.EnumVariable(at.name, values = at.values + [at.name+"X"])
newDomain = orange.Domain(filter(lambda x: x!=at, origData.domain.attributes)+[atNew,origData.domain.classVar])
newDomain.addmetas(origData.domain.getmetas())
temp_finalData = orange.ExampleTable(finalData)
finalData = orange.ExampleTable(newDomain,finalData)
newData = orange.ExampleTable(newDomain,origData)
temp_origData = orange.ExampleTable(origData)
origData = orange.ExampleTable(newDomain,origData)
for i,d in enumerate(origData):
d[atNew] = temp_origData[i][at]
for i,d in enumerate(finalData):
d[atNew] = temp_finalData[i][at]
for i,d in enumerate(newData):
d[atNew] = at.name+"X"
d[weightID] = 10*data[i][weightID]
finalData.extend(newData)
return finalData