Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for pruning data and labels
pruneData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
pruneGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
pruneMergedData = MergedNumericTable(pruneData, pruneGroundTruth)
# Retrieve the data from the input file
pruneDataSource.loadDataBlock(pruneMergedData)
# Create an algorithm object to train the decision tree classification model
algorithm = training.Batch(nClasses)
# Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
algorithm.input.setTable(training.dataForPruning, pruneData)
algorithm.input.setTable(training.labelsForPruning, pruneGroundTruth)
# Train the decision tree classification model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
model = trainingResult.get(classifier.training.model)
def printResults():
printNumericTables(
testGroundTruth, predictionResult.get(classifier.prediction.prediction),
"Ground truth\t", "Classification results",
"SVM classification results (first 20 observations):", 20, flt64=False
)
dict = trainData.getDictionary()
# Add a feature type to the dictionary
dict[0].featureType = features.DAAL_CONTINUOUS
dict[1].featureType = features.DAAL_CONTINUOUS
dict[2].featureType = features.DAAL_CATEGORICAL
# Create an algorithm object to train the gradient boosted trees classification model
algorithm = training.Batch(nClasses)
algorithm.parameter().maxIterations = maxIterations
algorithm.parameter().minObservationsInLeafNode = minObservationsInLeafNode
algorithm.parameter().featuresPerNode = nFeatures
# Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Train the gradient boosted trees classification model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
model = trainingResult.get(classifier.training.model)
# Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Create an algorithm object to train the SVM model
algorithm = svm.training.Batch()
algorithm.parameter.kernel = kernel
algorithm.parameter.cacheSize = 600000000
# Pass a training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Build the SVM model and get the algorithm results
trainingResult = algorithm.compute()
algorithm = training.Distributed(step2Master, nClasses, gmethod=training.fastCSR)
parts_list = partsRDD.collect()
# Add partial results computed on local nodes to the algorithm on the master node
for key, value in parts_list:
deserialized_pres = deserializePartialResult(value, training)
algorithm.input.add(training.partialModels, deserialized_pres)
# Train the Naive Bayes model on the master node
algorithm.compute()
# Finalize computations and retrieve the training results
trainingResult = algorithm.finalizeCompute()
return trainingResult.get(classifier.training.model)
def testModelQuality():
global predictedLabels, qualityMetricSetResult, groundTruthLabels
# Retrieve predicted labels
predictedLabels = predictionResult.get(classifier.prediction.prediction)
# Create a quality metric set object to compute quality metrics of the SVM algorithm
qualityMetricSet = svm.quality_metric_set.Batch()
input = qualityMetricSet.getInputDataCollection().getInput(svm.quality_metric_set.confusionMatrix)
input.set(binary_confusion_matrix.predictedLabels, predictedLabels)
input.set(binary_confusion_matrix.groundTruthLabels, groundTruthLabels)
# Compute quality metrics and get the quality metrics
# returns ResultCollection class from svm.quality_metric_set
qualityMetricSetResult = qualityMetricSet.compute()
# Add a feature type to the dictionary
dict[0].featureType = features.DAAL_CONTINUOUS
dict[1].featureType = features.DAAL_CONTINUOUS
dict[2].featureType = features.DAAL_CATEGORICAL
# Create an algorithm object to train the decision forest classification model
algorithm = training.Batch(nClasses)
algorithm.parameter.nTrees = nTrees
algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
algorithm.parameter.featuresPerNode = nFeatures
algorithm.parameter.varImportance = decision_forest.training.MDI
algorithm.parameter.resultsToCompute = decision_forest.training.computeOutOfBagError
# Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Train the decision forest classification model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
model = trainingResult.get(classifier.training.model)
printNumericTable(trainingResult.getTable(training.variableImportance), "Variable importance results: ")
printNumericTable(trainingResult.getTable(training.outOfBagError), "OOB error: ")
# Get the dictionary and update it with additional information about data
dict = trainData.getDictionary()
# Add a feature type to the dictionary
dict[0].featureType = features.DAAL_CONTINUOUS
dict[1].featureType = features.DAAL_CONTINUOUS
dict[2].featureType = features.DAAL_CATEGORICAL
# Create an algorithm object to train the gradient boosted trees classification model
algorithm = training.Batch(nClasses)
algorithm.parameter().maxIterations = maxIterations
algorithm.parameter().minObservationsInLeafNode = minObservationsInLeafNode
algorithm.parameter().featuresPerNode = nFeatures
# Pass the training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainGroundTruth)
# Train the gradient boosted trees classification model and retrieve the results of the training algorithm
trainingResult = algorithm.compute()
model = trainingResult.get(classifier.training.model)
def trainModel():
# Create Numeric Tables for training data and dependent variables
trainData, trainDependentVariable = loadData(trainDatasetFileName)
# Create an algorithm object to train the decision forest classification model
algorithm = decision_forest.classification.training.Batch(nClasses)
# Pass a training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
algorithm.input.set(classifier.training.labels, trainDependentVariable)
algorithm.parameter.nTrees = nTrees
algorithm.parameter.featuresPerNode = nFeatures
algorithm.parameter.minObservationsInLeafNode = minObservationsInLeafNode
algorithm.parameter.maxTreeDepth = maxTreeDepth
# Build the decision forest classification model and return the result
return algorithm.compute()
def printResults():
printNumericTable(predictionResult.get(classifier.prediction.prediction),"Logistic regression prediction results (first 10 rows):",10)
printNumericTable(testGroundTruth,"Ground truth (first 10 rows):",10)
printNumericTable(predictionResult.get(logistic_regression.prediction.probabilities),"Logistic regression prediction probabilities (first 10 rows):",10)
printNumericTable(predictionResult.get(logistic_regression.prediction.logProbabilities),"Logistic regression prediction log probabilities (first 10 rows):",10)