Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# Description: Ranking and selection of best N attributes
# Category: preprocessing
# Uses: voting
# Referenced: orngFSS.htm
# Classes: orngFSS.attMeasure, orngFSS.bestNAtts
import orange, orngFSS
data = orange.ExampleTable("voting")
print 'Attribute scores for best three attributes:'
ma = orngFSS.attMeasure(data)
for m in ma[:3]:
print "%5.3f %s" % (m[1], m[0])
n = 3
best = orngFSS.bestNAtts(ma, n)
print '\nBest %d attributes:' % n
for s in best:
print s
data.append(ex)
for ex in data:
print ex
loe = [
["3", "1", "1", "2", "1", "1", "1"],
["3", "1", "1", "2", "2", "1", "0"],
["3", "3", "1", "2", "2", "1", "1"]]
d2 = orange.ExampleTable(domain, loe)
d2[0] = ["1", "1", 1, "1", "1", "1", "1"]
import numpy
d = orange.Domain([orange.FloatVariable('a%i'%x) for x in range(5)])
a = numpy.array([[1, 2, 3, 4, 5], [5, 4, 3, 2, 1]])
t = orange.ExampleTable(a)
print len(t)
print t[0]
print t[1]
try:
self.quirks.beforeRead(curs)
curs.execute(self.query)
except Exception, e:
self.conn.rollback()
raise e
self.desc = curs.description
# for reasons unknown, the attributes get reordered.
domainIndexes = [0] * len(self.desc)
self._createDomain()
attrNames = []
for i, name in enumerate(self.desc):
# print name[0], '->', self.domain.index(name[0])
domainIndexes[self._domain.index(name[0])] = i
attrNames.append(name[0])
self.exampleTable = orange.ExampleTable(self.domain)
r = curs.fetchone()
while r:
# for reasons unknown, domain rearranges the properties
example = orange.Example(self.domain)
for i in xrange(len(r)):
if r[i] is not None:
val = str(r[i])
var = example[attrNames[i]].variable
if type(var) == orange.EnumVariable and val not in var.values:
var.values.append(val)
example[attrNames[i]] = str(r[i])
self.exampleTable.append(example)
r = curs.fetchone()
self._dirty = False
except Exception, e:
self.domain = None
def cmatrix(self, matrix=None):
self.closeContext()
self.origMatrix = matrix
self.data = data = None
if matrix:
self.data = data = getattr(matrix, "items", range(matrix.dim))
matrix.matrixType = orange.SymMatrix.Symmetric
self.graph.ColorAttr = 0
self.graph.SizeAttr = 0
self.graph.ShapeAttr = 0
self.graph.NameAttr = 0
self.graph.closestPairs = None
if isinstance(data, orange.ExampleTable):
self.setExampleTable(data)
elif isinstance(data, orange.VarList):
self.setVarList(data)
elif data is not None:
self.setList(data)
if matrix:
self.mds = orngMDS.MDS(matrix)
self.mds.points = numpy.random.random(
size=[self.mds.n, self.mds.dim]
)
self.mds.getStress()
self.stress = self.getAvgStress(self.stressFunc[self.StressFunc][1])
if data and type(data) == orange.ExampleTable:
self.openContext("", self.data)
# Description: Writes a data set to and reads from an SQL database
# Category: file formats
# Classes: ExampleTable, orngSQL.SQLReader, orngSQL.SQLWriter
# Uses: iris.tab
# Referenced: orngSQL.htm
import orange, orngSQL, orngTree
data = orange.ExampleTable("iris")
print "Input data domain:"
for a in data.domain.variables:
print a
r = orngSQL.SQLReader('mysql://user:somepass@localhost/test')
w = orngSQL.SQLWriter('mysql://user:somepass@localhost/test')
# the following line only works with mysql because it uses the enum type.
w.create('iris', data,
renameDict = {'sepal length':'seplen',
'sepal width':'sepwidth',
'petal length':'petlen',
'petal width':'petwidth'},
typeDict = {'iris':"""enum('Iris-setosa', 'Iris-versicolor', 'Iris-virginica')"""})
r.execute("SELECT petwidth, petlen FROM iris WHERE seplen<5.0;")
data = r.data()
import orange
import orngClustering
data = orange.ExampleTable("iris")
sample = data.selectref(orange.MakeRandomIndices2(data, 20), 0)
root = orngClustering.hierarchicalClustering(sample)
reduced = orange.ExampleTable(orange.Domain(sample.domain[:2], False), sample)
my_colors = [(255,0,0), (0,255,0), (0,0,255)]
cls = orngClustering.hierarchicalClustering_topClusters(root, 3)
colors = dict([(cl, col) for cl, col in zip(cls, my_colors)])
print data.native(2)
orngClustering.dendrogram_draw("hclust-colored-dendrogram.png", root, data = reduced, labels=[str(d.getclass()) for d in sample],
cluster_colors=colors, color_palette=[(0, 255, 0), (0, 0, 0), (255, 0, 0)], gamma=0.5, minv=2.0, maxv=7.0)
print "Attributes in favor of %s = %s [%f]"%(t.domain.classVar.name,t.domain.classVar.values[0],1-m.probfunc(m.example_c[idx][0]))
printpie(e0,1-m.probfunc(m.example_c[idx][0]))
print "Attributes in favor of %s = %s [%f]"%(t.domain.classVar.name,t.domain.classVar.values[1],m.probfunc(m.example_c[idx][0]))
printpie(e1,m.probfunc(m.example_c[idx][0]))
print "\nProjection of the example in the basis space:"
j = 0
for i in range(len(m.coeff_names)):
print m.coeff_names[i][0],':'
for x in m.coeff_names[i][1:]:
print '\t',x,'=',vector[j]
j += 1
print "beta:",-m.beta
#t = orange.ExampleTable('c:/proj/domains/voting.tab') # discrete
t = orange.ExampleTable(r"E:\Development\Orange Datasets\UCI\shuttle.tab" ) # discrete
#t = orange.ExampleTable('c_cmc.tab') # continuous
print "NAIVE BAYES"
print "==========="
bl = orange.BayesLearner()
bl.estimatorConstructor = orange.ProbabilityEstimatorConstructor_Laplace()
# prevent too many estimation points
# increase the smoothing level
bl.conditionalEstimatorConstructorContinuous = orange.ConditionalProbabilityEstimatorConstructor_loess(windowProportion=0.5,nPoints = 10)
c = bl(t)
printmodel(t,c,printexamples=0)
print "\n\nLOGISTIC REGRESSION"
print "==================="
c = orngLR_Jakulin.BasicLogisticLearner()(t)
def xtestChinaEuropeSimpler():
import orange, orngTree # @UnusedImport @UnresolvedImport
trainData = orange.ExampleTable('ismir2011_fb_folkTrain.tab')
testData = orange.ExampleTable('ismir2011_fb_folkTest.tab')
majClassifier = orange.MajorityLearner(trainData)
knnClassifier = orange.kNNLearner(trainData)
majWrong = 0
knnWrong = 0
for testRow in testData:
majGuess = majClassifier(testRow)
knnGuess = knnClassifier(testRow)
realAnswer = testRow.getclass()
if majGuess != realAnswer:
majWrong += 1
if knnGuess != realAnswer:
knnWrong += 1
# Description: Association rule sorting and filtering
# Category: description
# Uses: imports-85
# Classes: orngAssoc.build, Preprocessor_discretize, EquiNDiscretization
# Referenced: assoc.htm
import orange, orngAssoc
data = orange.ExampleTable("imports-85")
data = orange.Preprocessor_discretize(data, \
method=orange.EquiNDiscretization(numberOfIntervals=3))
data = data.select(range(10))
rules = orange.AssociationRulesInducer(data, support=0.4)
n = 5
print "%i most confident rules:" % (n)
orngAssoc.sort(rules, ["confidence", "support"])
orngAssoc.printRules(rules[0:n], ['confidence', 'support', 'lift'])
conf = 0.8; lift = 1.1
print "\nRules with confidence>%5.3f and lift>%5.3f" % (conf, lift)
rulesC = rules.filter(lambda x: x.confidence > conf and x.lift > lift)
orngAssoc.sort(rulesC, ['confidence'])
orngAssoc.printRules(rulesC, ['confidence', 'support', 'lift'])
# Description: Read data and for each attribute report percent of instances with missing value
# Category: description
# Uses: adult_sample.tab
# Referenced: basic_exploration.htm
import orange
data = orange.ExampleTable("../../datasets/adult_sample")
natt = len(data.domain.attributes)
missing = [0.] * natt
for i in data:
for j in range(natt):
if i[j].isSpecial():
missing[j] += 1
missing = map(lambda x, l=len(data):x/l*100., missing)
print "Missing values per attribute:"
atts = data.domain.attributes
for i in range(natt):
print " %5.1f%s %s" % (missing[i], '%', atts[i].name)