How to use the daal.data_management.DataSourceIface.doDictionaryFromContext function in daal

To help you get started, we’ve selected a few daal examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github intel / daal / examples / python / source / kmeans / kmeans_dense_batch.py View on Github external
DAAL_PREFIX = os.path.join('..', 'data')

# Input data set parameters
datasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'kmeans_dense.csv')

# K-Means algorithm parameters
nClusters = 20
nIterations = 5

if __name__ == "__main__":

    # Initialize FileDataSource to retrieve the input data from a .csv file
    dataSource = FileDataSource(
        datasetFileName,
        DataSourceIface.doAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Retrieve the data from the input file
    dataSource.loadDataBlock()

    # Get initial clusters for the K-Means algorithm
    initAlg = kmeans.init.Batch(nClusters, method=kmeans.init.randomDense)

    initAlg.input.set(kmeans.init.data, dataSource.getNumericTable())

    res = initAlg.compute()
    centroidsResult = res.get(kmeans.init.centroids)

    # Create an algorithm object for the K-Means algorithm
    algorithm = kmeans.Batch(nClusters, nIterations, method=kmeans.lloydDense)
github intel / daal / examples / python / source / decision_forest / df_reg_traverse_model.py View on Github external
def loadData(fileName):

    # Initialize FileDataSource to retrieve the input data from a .csv file
    trainDataSource = FileDataSource(
        fileName, DataSourceIface.notAllocateNumericTable, DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for training data and dependent variables
    data = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
    dependentVar = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
    mergedData = MergedNumericTable(data, dependentVar)

    # Retrieve the data from input file
    trainDataSource.loadDataBlock(mergedData)

    dictionary = data.getDictionary()
    for i in range(len(categoricalFeaturesIndices)):
        dictionary[categoricalFeaturesIndices[i]].featureType = features.DAAL_CATEGORICAL

    return data, dependentVar
github intel / daal / examples / python / source / optimization_solvers / adagrad_dense_batch.py View on Github external
datasetFileName = os.path.join('..', 'data', 'batch', 'mse.csv')

nIterations = 1000
nFeatures = 3
learningRate = 1.0
accuracyThreshold = 0.0000001
batchSize = 1

startPoint = np.array([[8], [2], [1], [4]], dtype=np.float64)

if __name__ == "__main__":

    # Initialize FileDataSource to retrieve the input data from a .csv file
    dataSource = FileDataSource(datasetFileName,
                                DataSourceIface.notAllocateNumericTable,
                                DataSourceIface.doDictionaryFromContext)

    # Create Numeric Tables for data and values for dependent variable
    data = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
    dependentVariables = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
    mergedData = MergedNumericTable(data, dependentVariables)

    # Retrieve the data from the input file
    dataSource.loadDataBlock(mergedData)

    nVectors = data.getNumberOfRows()

    mseObjectiveFunction = optimization_solver.mse.Batch(nVectors)
    mseObjectiveFunction.input.set(optimization_solver.mse.data, data)
    mseObjectiveFunction.input.set(optimization_solver.mse.dependentVariables, dependentVariables)

    # Create objects to compute the Adagrad result using the default method
github intel / daal / examples / python / source / svm / svm_multi_class_dense_batch.py View on Github external
def trainModel():
    global trainingResult

    # Initialize FileDataSource to retrieve the input data from a .csv file
    trainDataSource = FileDataSource(
        trainDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for training data and labels
    trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
    trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
    mergedData = MergedNumericTable(trainData, trainGroundTruth)

    # Retrieve the data from the input file
    trainDataSource.loadDataBlock(mergedData)

    # Create an algorithm object to train the multi-class SVM model
    algorithm = multi_class_classifier.training.Batch(nClasses)

    algorithm.parameter.training = trainingBatch
    algorithm.parameter.prediction = predictionBatch
github intel / daal / examples / python / source / em / em_gmm_dense_batch.py View on Github external
sys.path.insert(0, utils_folder)
from utils import printNumericTable

DAAL_PREFIX = os.path.join('..', 'data')

# Input data set parameters
datasetFileName = os.path.join(DAAL_PREFIX, 'batch', 'em_gmm.csv')
nComponents = 2

if __name__ == "__main__":

    # Initialize FileDataSource to retrieve the input data from a .csv file
    dataSource = FileDataSource(
        datasetFileName,
        DataSourceIface.doAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )
    nFeatures = dataSource.getNumberOfColumns()

    # Retrieve the data from the input file
    dataSource.loadDataBlock()

    # Create algorithm objects to initialize the EM algorithm for the GMM
    # computing the number of components using the default method
    initAlgorithm = em_gmm.init.Batch(nComponents)

    # Set an input data table for the initialization algorithm
    initAlgorithm.input.set(em_gmm.init.data, dataSource.getNumericTable())

    # Compute initial values for the EM algorithm for the GMM with the default parameters
    resultInit = initAlgorithm.compute()
github intel / daal / examples / python / source / svm / svm_multi_class_dense_batch.py View on Github external
def testModel():
    global predictionResult, testGroundTruth

    # Initialize FileDataSource to retrieve the test data from a .csv file
    testDataSource = FileDataSource(
        testDatasetFileName,
        DataSourceIface.doAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for testing data and labels
    testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
    testGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
    mergedData = MergedNumericTable(testData, testGroundTruth)

    # Retrieve the data from input file
    testDataSource.loadDataBlock(mergedData)

    # Create an algorithm object to predict multi-class SVM values
    algorithm = multi_class_classifier.prediction.Batch(nClasses)

    algorithm.parameter.training = trainingBatch
    algorithm.parameter.prediction = predictionBatch
github intel / daal / examples / python / source / linear_regression / lin_reg_qr_dense_online.py View on Github external
def trainModel():
    global trainingResult

    # Initialize FileDataSource to retrieve the input data from a .csv file
    trainDataSource = FileDataSource(
        trainDatasetFileName, DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for training data and dependent variables
    trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
    trainDependentVariables = HomogenNumericTable(
        nDependentVariables, 0, NumericTableIface.doNotAllocate
    )
    mergedData = MergedNumericTable(trainData, trainDependentVariables)

    # Create an algorithm object to train the multiple linear regression model
    algorithm = training.Online(method=training.qrDense)

    while(trainDataSource.loadDataBlock(nTrainVectorsInBlock, mergedData) == nTrainVectorsInBlock):
        # Pass a training data set and dependent values to the algorithm
        algorithm.input.set(training.data, trainData)
        algorithm.input.set(training.dependentVariables, trainDependentVariables)
github intel / daal / examples / python / source / quality_metrics / svm_two_class_metrics_dense_batch.py View on Github external
def testModel():
    global predictionResult, groundTruthLabels

    # Initialize FileDataSource to retrieve the input data from a .csv file
    testDataSource = FileDataSource(
        testDatasetFileName, DataSourceIface.doAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for testing data and labels
    testData = HomogenNumericTable(nFeatures, 0, NumericTableIface.doNotAllocate)
    groundTruthLabels = HomogenNumericTable(1, 0, NumericTableIface.doNotAllocate)
    mergedData = MergedNumericTable(testData, groundTruthLabels)

    # Retrieve the data from input file
    testDataSource.loadDataBlock(mergedData)

    # Create an algorithm object to predict SVM values
    algorithm = svm.prediction.Batch()

    algorithm.parameter.kernel = kernel

    # Pass a testing data set and the trained model to the algorithm
github intel / daal / examples / python / source / svm / svm_two_class_csr_batch.py View on Github external
def trainModel():
    global trainingResult

    # Initialize FileDataSource to retrieve the input data from a .csv file
    trainLabelsDataSource = FileDataSource(
        trainLabelsFileName, DataSourceIface.doAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create numeric table for training data
    trainData = createSparseTable(trainDatasetFileName)

    # Retrieve the data from the input file
    trainLabelsDataSource.loadDataBlock()

    # Create an algorithm object to train the SVM model
    algorithm = training.Batch()

    algorithm.parameter.kernel = kernel
    algorithm.parameter.cacheSize = 40000000

    # Pass a training data set and dependent values to the algorithm
    algorithm.input.set(classifier.training.data, trainData)
github intel / daal / examples / python / source / decision_tree / dt_cls_dense_batch.py View on Github external
def trainModel():
    global model

    # Initialize FileDataSource to retrieve the input data from a .csv file
    trainDataSource = FileDataSource(
        trainDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )

    # Create Numeric Tables for training data and labels
    trainData = HomogenNumericTable(nFeatures, 0, NumericTableIface.notAllocate)
    trainGroundTruth = HomogenNumericTable(1, 0, NumericTableIface.notAllocate)
    mergedData = MergedNumericTable(trainData, trainGroundTruth)

    # Retrieve the data from the input file
    trainDataSource.loadDataBlock(mergedData)

    # Initialize FileDataSource to retrieve the input data from a .csv file
    pruneDataSource = FileDataSource(
        pruneDatasetFileName,
        DataSourceIface.notAllocateNumericTable,
        DataSourceIface.doDictionaryFromContext
    )