Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
sys.path.insert(0, utils_folder)
from utils import printNumericTables
DAAL_PREFIX = os.path.join('..', 'data')
# Input data set parameters
datasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'outlierdetection.csv')
if __name__ == "__main__":
# Initialize FileDataSource to retrieve the test data from a .csv file
dataSource = FileDataSource(
datasetFileName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Retrieve the data from the input file
dataSource.loadDataBlock()
# Create an algorithm to detect outliers using the Bacon method
algorithm = bacon_outlier_detection.Batch()
algorithm.input.set(bacon_outlier_detection.data, dataSource.getNumericTable())
# Compute outliers amd get the computed results
res = algorithm.compute()
printNumericTables(
dataSource.getNumericTable(), res.get(bacon_outlier_detection.weights),
import daal.algorithms.math.logistic as logistic
from daal.data_management import FileDataSource, DataSourceIface
utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
sys.path.insert(0, utils_folder)
from utils import printNumericTable
# Input data set parameters
datasetName = os.path.join('..', 'data', 'batch', 'covcormoments_dense.csv')
if __name__ == "__main__":
# Retrieve the input data
dataSource = FileDataSource(datasetName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext)
dataSource.loadDataBlock()
# Create an algorithm
algorithm = logistic.Batch()
# Set an input object for the algorithm
algorithm.input.set(logistic.data, dataSource.getNumericTable())
# Compute Logistic function
res = algorithm.compute()
# Print the results of the algorithm
printNumericTable(res.get(logistic.value), "Logistic result (first 5 rows):", 5)
utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
sys.path.insert(0, utils_folder)
from utils import printNumericTable
DAAL_PREFIX = os.path.join('..', 'data')
# Input data set parameters
datasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'qr.csv')
nRows = 16000
if __name__ == "__main__":
# Initialize FileDataSource to retrieve input data from .csv file
dataSource = FileDataSource(
datasetFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Retrieve the data from input file
dataSource.loadDataBlock(nRows)
# Create algorithm to compute QR decomposition
algorithm = qr.Batch()
algorithm.input.set(qr.data, dataSource.getNumericTable())
res = algorithm.compute()
# Print results
printNumericTable(res.get(qr.matrixQ), "Orthogonal matrix Q:", 10)
printNumericTable(res.get(qr.matrixR), "Triangular matrix R:")
rightDatasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'kernel_function.csv')
# Kernel algorithm parameters
k = 1.0 # Linear kernel coefficient in the k(X,Y) + b model
b = 0.0 # Linear kernel coefficient in the k(X,Y) + b model
if __name__ == "__main__":
# Initialize FileDataSource to retrieve the input data from a .csv file
leftDataSource = FileDataSource(
leftDatasetFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
rightDataSource = FileDataSource(
rightDatasetFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Retrieve the data from the input file
leftDataSource.loadDataBlock()
rightDataSource.loadDataBlock()
# Create algorithm objects for the kernel algorithm using the default method
algorithm = kernel_function.linear.Batch()
# Set the kernel algorithm parameter
algorithm.parameter.k = k
algorithm.parameter.b = b
algorithm.parameter.computationMode = kernel_function.matrixMatrix
# Set an input data table for the algorithm
import daal.algorithms.pca.transform as pca_transform
from daal.data_management import DataSourceIface, FileDataSource
utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))))
if utils_folder not in sys.path:
sys.path.insert(0, utils_folder)
from utils import printNumericTable
from daal.data_management import NumericTable
# Input data set parameters
datasetName = os.path.join('..', 'data', 'batch', 'pca_transform.csv')
if __name__ == "__main__":
# Retrieve the input data
dataSource = FileDataSource(datasetName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext)
dataSource.loadDataBlock()
data = dataSource.getNumericTable()
# Create an algorithm
algorithm = pca.Batch(fptype=np.float64,method=pca.svdDense)
# Set the algorithm input data
algorithm.input.setDataset(pca.data, data)
# Set the algorithm normalization parameters (mean and variance)
# to be exported for transform and whitening parameter (eigenvalue)
# If whitening is not required eigenvalues should be removed
# The eigenvalues would be calculated in pca.eigenvalues table of result
# but would not be passed to dataForTranform collection
# algorithm.paramter.resultsToCompute = (pca.mean | pca.variance | pca.eigenvalue)
def computestep1Local():
global serializedData, dataFromStep1ForStep3
# Initialize FileDataSource to retrieve the input data from a .csv file
dataSource = FileDataSource(datasetFileNames[rankId],
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext)
# Retrieve the input data
dataSource.loadDataBlock()
# Create an algorithm to compute QR decomposition on local nodes
alg = qr.Distributed(step1Local)
alg.input.set(qr.data, dataSource.getNumericTable())
# Compute QR decomposition
pres = alg.compute()
dataFromStep1ForStep2 = pres.get(qr.outputOfStep1ForStep2)
dataFromStep1ForStep3 = pres.get(qr.outputOfStep1ForStep3)
utils_folder = os.path.realpath(os.path.abspath(os.path.dirname(os.path.dirname(__file__))))
if utils_folder not in sys.path:
sys.path.insert(0, utils_folder)
from utils import printNumericTable
DAAL_PREFIX = os.path.join('..', 'data')
# Input data set parameters
dataFileName = os.path.join(DAAL_PREFIX, 'batch', 'quantiles.csv')
if __name__ == "__main__":
# Initialize FileDataSource to retrieve input data from .csv file
dataSource = FileDataSource(
dataFileName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Retrieve the data from input file
dataSource.loadDataBlock()
# Create algorithm to compute quantiles in batch mode
algorithm = quantiles.Batch()
# Set input arguments of the algorithm
algorithm.input.set(quantiles.data, dataSource.getNumericTable())
# Get computed quantiles
res = algorithm.compute()
# Print result
def printResults():
testGroundTruth = FileDataSource(
testGroundTruthFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
testGroundTruth.loadDataBlock(nTestObservations)
printNumericTables(
testGroundTruth.getNumericTable(),
predictionResult.get(classifier.prediction.prediction),
"Ground truth", "Classification results",
"NaiveBayes classification results (first 20 observations):", 20, 15, flt64=False
)
def testModel():
global trainingResult, predictionResult
# Initialize FileDataSource to retrieve the input data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and ground truth values
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
testGroundTruth = HomogenNumericTable(nDependentVariables, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Retrieve the data from the input file
testDataSource.loadDataBlock(mergedData)
# Create an algorithm object to predict values of multiple linear regression
algorithm = prediction.Batch()
# Pass a testing data set and the trained model to the algorithm
algorithm.input.setTable(prediction.data, testData)
def trainModel():
global trainData, trainingResult
masterAlgorithm = training.Distributed(step2Master, nClasses, method=training.fastCSR)
for i in range(nBlocks):
# Read trainDatasetFileNames and create a numeric table to store the input data
trainData[i] = createSparseTable(trainDatasetFileNames[i])
# Initialize FileDataSource to retrieve the input data from a .csv file
trainLabelsSource = FileDataSource(
trainGroundTruthFileNames[i], DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Retrieve the data from an input file
trainLabelsSource.loadDataBlock(nTrainVectorsInBlock)
# Create an algorithm object to train the Naive Bayes model on the local-node data
localAlgorithm = training.Distributed(step1Local, nClasses, method=training.fastCSR)
# Pass a training data set and dependent values to the algorithm
localAlgorithm.input.set(classifier.training.data, trainData[i])
localAlgorithm.input.set(classifier.training.labels, trainLabelsSource.getNumericTable())
# Build the Naive Bayes model on the local node
# Set the local Naive Bayes model as input for the master-node algorithm
masterAlgorithm.input.add(training.partialModels, localAlgorithm.compute())