Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
DAAL_PREFIX = os.path.join('..', 'data')
# Input data set parameters
datasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'kmeans_dense.csv')
# K-Means algorithm parameters
nClusters = 20
nIterations = 5
if __name__ == "__main__":
# Initialize FileDataSource to retrieve the input data from a .csv file
dataSource = FileDataSource(
datasetFileName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Retrieve the data from the input file
dataSource.loadDataBlock()
# Get initial clusters for the K-Means algorithm
initAlg = kmeans.init.Batch(nClusters, method=kmeans.init.randomDense)
initAlg.input.set(kmeans.init.data, dataSource.getNumericTable())
res = initAlg.compute()
centroidsResult = res.get(kmeans.init.centroids)
# Create an algorithm object for the K-Means algorithm
algorithm = kmeans.Batch(nClusters, nIterations, method=kmeans.lloydDense)
def loadData(fileName):
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and dependent variables
data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(data, dependentVar)
# Retrieve the data from input file
trainDataSource.loadDataBlock(mergedData)
dictionary = data.getDictionary()
for i in range(len(categoricalFeaturesIndices)):
dictionary[categoricalFeaturesIndices[i]].featureType = features.DAAL_CATEGORICAL
return data, dependentVar
datasetFileName = os.path.join('..', 'data', 'batch', 'mse.csv')
nIterations = 1000
nFeatures = 3
learningRate = 1.0
accuracyThreshold = 0.0000001
batchSize = 1
startPoint = np.array([[8], [2], [1], [4]], dtype=np.float64)
if __name__ == "__main__":
# Initialize FileDataSource to retrieve the input data from a .csv file
dataSource = FileDataSource(datasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext)
# Create Numeric Tables for data and values for dependent variable
data = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
dependentVariables = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(data, dependentVariables)
# Retrieve the data from the input file
dataSource.loadDataBlock(mergedData)
nVectors = data.getNumberOfRows()
mseObjectiveFunction = optimization_solver.mse.Batch(nVectors)
mseObjectiveFunction.input.set(optimization_solver.mse.data, data)
mseObjectiveFunction.input.set(optimization_solver.mse.dependentVariables, dependentVariables)
# Create objects to compute the Adagrad result using the default method
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Create an algorithm object to train the multi-class SVM model
algorithm = multi_class_classifier.training.Batch(nClasses)
algorithm.parameter.training = trainingBatch
algorithm.parameter.prediction = predictionBatch
sys.path.insert(0, utils_folder)
from utils import printNumericTable
DAAL_PREFIX = os.path.join('..', 'data')
# Input data set parameters
datasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'em_gmm.csv')
nComponents = 2
if __name__ == "__main__":
# Initialize FileDataSource to retrieve the input data from a .csv file
dataSource = FileDataSource(
datasetFileName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
nFeatures = dataSource.getNumberOfColumns()
# Retrieve the data from the input file
dataSource.loadDataBlock()
# Create algorithm objects to initialize the EM algorithm for the GMM
# computing the number of components using the default method
initAlgorithm = em_gmm.init.Batch(nComponents)
# Set an input data table for the initialization algorithm
initAlgorithm.input.set(em_gmm.init.data, dataSource.getNumericTable())
# Compute initial values for the EM algorithm for the GMM with the default parameters
resultInit = initAlgorithm.compute()
def testModel():
global predictionResult, testGroundTruth
# Initialize FileDataSource to retrieve the test data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName,
DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, testGroundTruth)
# Retrieve the data from input file
testDataSource.loadDataBlock(mergedData)
# Create an algorithm object to predict multi-class SVM values
algorithm = multi_class_classifier.prediction.Batch(nClasses)
algorithm.parameter.training = trainingBatch
algorithm.parameter.prediction = predictionBatch
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and dependent variables
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
trainDependentVariables = HomogenNumericTable(
nDependentVariables, 0, NumericTableIface.doNotAllocate
)
mergedData = MergedNumericTable(trainData, trainDependentVariables)
# Create an algorithm object to train the multiple linear regression model
algorithm = training.Online(method=training.qrDense)
while(trainDataSource.loadDataBlock(nTrainVectorsInBlock, mergedData) == nTrainVectorsInBlock):
# Pass a training data set and dependent values to the algorithm
algorithm.input.set(training.data, trainData)
algorithm.input.set(training.dependentVariables, trainDependentVariables)
def testModel():
global predictionResult, groundTruthLabels
# Initialize FileDataSource to retrieve the input data from a .csv file
testDataSource = FileDataSource(
testDatasetFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for testing data and labels
testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
groundTruthLabels = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
mergedData = MergedNumericTable(testData, groundTruthLabels)
# Retrieve the data from input file
testDataSource.loadDataBlock(mergedData)
# Create an algorithm object to predict SVM values
algorithm = svm.prediction.Batch()
algorithm.parameter.kernel = kernel
# Pass a testing data set and the trained model to the algorithm
def trainModel():
global trainingResult
# Initialize FileDataSource to retrieve the input data from a .csv file
trainLabelsDataSource = FileDataSource(
trainLabelsFileName, DataSourceIface.doAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create numeric table for training data
trainData = createSparseTable(trainDatasetFileName)
# Retrieve the data from the input file
trainLabelsDataSource.loadDataBlock()
# Create an algorithm object to train the SVM model
algorithm = training.Batch()
algorithm.parameter.kernel = kernel
algorithm.parameter.cacheSize = 40000000
# Pass a training data set and dependent values to the algorithm
algorithm.input.set(classifier.training.data, trainData)
def trainModel():
global model
# Initialize FileDataSource to retrieve the input data from a .csv file
trainDataSource = FileDataSource(
trainDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)
# Create Numeric Tables for training data and labels
trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
mergedData = MergedNumericTable(trainData, trainGroundTruth)
# Retrieve the data from the input file
trainDataSource.loadDataBlock(mergedData)
# Initialize FileDataSource to retrieve the input data from a .csv file
pruneDataSource = FileDataSource(
pruneDatasetFileName,
DataSourceIface.notAllocateNumericTable,
DataSourceIface.doDictionaryFromContext
)