How to use the mindsdb.libs.phases.base_module.BaseModule function in MindsDB

To help you get started, we’ve selected a few MindsDB examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github mindsdb / mindsdb / mindsdb / libs / phases / data_extractor / data_extractor.py View on Github external
from mindsdb.config import CONFIG
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from mindsdb.libs.data_types.mindsdb_logger import log
from mindsdb.libs.helpers.text_helpers import hashtext
from mindsdb.external_libs.stats import calculate_sample_size

import random
import traceback
import pandas
import numpy as np


class DataExtractor(BaseModule):

    phase_name = PHASE_DATA_EXTRACTOR

    def _get_data_frame_from_when_conditions(self):
        """
        :return:
        """

        columns = self.transaction.lmd['columns']
        when_conditions = self.transaction.hmd['model_when_conditions']

        when_conditions_list = []
        # here we want to make a list of the type  ( ValueForField1, ValueForField2,..., ValueForFieldN ), ...
        for when_condition in when_conditions:
            cond_list = [None] * len(columns)  # empty list with blanks for values
github mindsdb / mindsdb / mindsdb / libs / phases / model_trainer / model_trainer.py View on Github external
from __future__ import unicode_literals, print_function, division

from mindsdb.config import CONFIG
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from mindsdb.libs.workers.train import TrainWorker

from mindsdb.libs.data_types.transaction_metadata import TransactionMetadata

import _thread
import time



class ModelTrainer(BaseModule):

    phase_name = PHASE_MODEL_TRAINER



    def run(self):
        """
        Run the training process, we can perhaps iterate over all hyper parameters here and spun off model variations
        TODO: checkout the RISELab distributed ML projects for this

        :return: None
        """

        model_name = self.transaction.persistent_model_metadata.model_name
        train_meta_data = self.transaction.train_metadata # type: TransactionMetadata
github mindsdb / mindsdb / mindsdb / libs / phases / data_encoder / data_encoder.py View on Github external
from mindsdb.config import CONFIG
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from collections import OrderedDict
from mindsdb.libs.workers.train import TrainWorker
from bson.objectid import ObjectId

import _thread
import time



class DataEncoder(BaseModule):

    phase_name = PHASE_DATA_ENCODER



    def run(self):
        """
        Run the training process, we can perhaps iterate over all hyper parameters here and spun off model variations
        TODO: checkout the RISELab distributed ML projects for this

        :return: None
        """


        model_name = self.transaction.model_metadata[KEY_MODEL_NAME]
        model_stats = self.session.mongo.mindsdb.model_stats
github mindsdb / mindsdb / mindsdb / libs / phases / stats_loader / stats_loader.py View on Github external
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule


class StatsLoader(BaseModule):

    phase_name = PHASE_STATS_GENERATOR

    def run(self):

        self.transaction.persistent_model_metadata = self.transaction.persistent_model_metadata.find_one(self.transaction.persistent_model_metadata.getPkey())

        # laod the most accurate model

        info = self.transaction.persistent_ml_model_info.find({'model_name':self.transaction.metadata.model_name}, order_by=[('r_squared',-1)])

        if info is not None and len(info)>0:
            self.transaction.persistent_ml_model_info = info[0]
        else:
            self.log.error('No model found for this statement, please check if model_name {model_name} was trained'.format(model_name=self.transaction.metadata.model_name))
github mindsdb / mindsdb / mindsdb / libs / phases / data_splitter / data_splitter.py View on Github external
from mindsdb.config import CONFIG
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from mindsdb.libs.data_types.mindsdb_logger import log

from mindsdb.libs.constants.mindsdb import *

class DataSplitter(BaseModule):
    def run(self):
        group_by = self.transaction.lmd['model_group_by']
        if group_by is None or len(group_by) == 0:
            group_by = []
            for col in self.transaction.lmd['predict_columns']:
                if self.transaction.lmd['column_stats'][col]['data_type'] == DATA_TYPES.CATEGORICAL:
                    group_by.append(col)
            if len(group_by) > 0:
                self.transaction.input_data.data_frame = self.transaction.input_data.data_frame.sort_values(group_by)

        KEY_NO_GROUP_BY = '{PLEASE_DONT_TELL_ME_ANYONE_WOULD_CALL_A_COLUMN_THIS}##ALL_ROWS_NO_GROUP_BY##{PLEASE_DONT_TELL_ME_ANYONE_WOULD_CALL_A_COLUMN_THIS}'

        # create all indexes by group by, that is all the rows that belong to each group by
        all_indexes = {}
        train_indexes = {}
        test_indexes = {}
github mindsdb / mindsdb / mindsdb / libs / phases / data_devectorizer / data_devectorizer.py View on Github external
* This file is part of MindsDB Server.
 *
 * MindsDB Server can not be copied and/or distributed without the express
 * permission of MindsDB Inc
 *******************************************************
"""


import numpy
from mindsdb.config import *
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from collections import OrderedDict
from mindsdb.libs.helpers.norm_denorm_helpers import denorm

class DataDevectorizer(BaseModule):

    phase_name = PHASE_DATA_DEVECTORIZATION

    def run(self):

        result = []

        #NOTE: we only use this model in PREDICT

        for group in self.transaction.model_data.predict_set:
            for column in self.transaction.model_data.predict_set[group]:
                column_results = []
                for value in self.transaction.model_data.predict_set[group][column]:
                    stats = self.transaction.model_stats[column]
                    denormed = denorm(value=value, cell_stats=stats)
                    column_results.append(denormed)
github mindsdb / mindsdb / mindsdb / libs / phases / stats_generator / stats_generator.py View on Github external
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.cluster import MiniBatchKMeans
import imagehash
from PIL import Image

from mindsdb.config import CONFIG
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from mindsdb.libs.helpers.text_helpers import splitRecursive, clean_float, cast_string_to_python_type
from mindsdb.external_libs.stats import calculate_sample_size



class StatsGenerator(BaseModule):
    """
    # The stats generator phase is responsible for generating the insights we need about the data in order to vectorize it
    # Additionally, the stats generator also provides the user with some extra meaningful information about his data,
    thoguh this functionality may be moved to a different step (after vectorization) in the future
    """

    phase_name = PHASE_STATS_GENERATOR

    def _get_file_type(self, potential_path):
        could_be_fp = False
        for char in ('/', '\\', ':\\'):
            if char in potential_path:
                could_be_fp = True

        if not could_be_fp:
            return False
github mindsdb / mindsdb / mindsdb / libs / phases / model_interface / model_interface.py View on Github external
from mindsdb.libs.phases.base_module import BaseModule
from mindsdb.libs.constants.mindsdb import *

import datetime


class ModelInterface(BaseModule):
    def run(self, mode='train'):

        try:
            from mindsdb.libs.backends.ludwig import LudwigBackend
        except ImportError as e:
            self.transaction.log.warning(e)

        try:
            from mindsdb.libs.backends.lightwood import LightwoodBackend
        except ImportError as e:
            self.transaction.log.warning(e)
        if self.transaction.hmd['model_backend'] == 'ludwig':
            self.transaction.model_backend = LudwigBackend(self.transaction)
        elif self.transaction.hmd['model_backend'] == 'lightwood':
            self.transaction.model_backend = LightwoodBackend(self.transaction)
        else:
github mindsdb / mindsdb / mindsdb / libs / phases / model_analyzer / model_analyzer.py View on Github external
from mindsdb.libs.helpers.general_helpers import pickle_obj
from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from mindsdb.libs.helpers.probabilistic_validator import ProbabilisticValidator
from mindsdb.libs.phases.model_analyzer.helpers.column_evaluator import ColumnEvaluator

import pandas as pd
import numpy as np

class ModelAnalyzer(BaseModule):
    def run(self):
        """
        # Runs the model on the validation set in order to fit a probabilistic model that will evaluate the accuracy of future predictions
        """

        output_columns = self.transaction.lmd['predict_columns']
        input_columns = [col for col in self.transaction.lmd['columns'] if col not in output_columns and col not in self.transaction.lmd['columns_to_ignore']]
        # Test some hypotheses about our columns

        if self.transaction.lmd['disable_optional_analysis'] is False:
            column_evaluator = ColumnEvaluator(self.transaction)
            column_importances, buckets_stats, columnless_prediction_distribution, all_columns_prediction_distribution = column_evaluator.get_column_importance(model=self.transaction.model_backend, output_columns=output_columns, input_columns=input_columns, full_dataset=self.transaction.input_data.validation_df, stats=self.transaction.lmd['column_stats'])

            self.transaction.lmd['column_importances'] = column_importances
            self.transaction.lmd['columns_buckets_importances'] = buckets_stats
            self.transaction.lmd['columnless_prediction_distribution'] = columnless_prediction_distribution
github mindsdb / mindsdb / mindsdb / libs / phases / data_vectorizer / data_vectorizer.py View on Github external
import copy
import numpy as np
import itertools
import logging
import traceback

from mindsdb.libs.constants.mindsdb import *
from mindsdb.libs.phases.base_module import BaseModule
from collections import OrderedDict
from mindsdb.libs.helpers.norm_denorm_helpers import norm, norm_buckets
from mindsdb.libs.helpers.text_helpers import hashtext, cleanfloat, tryCastToNumber
from mindsdb.libs.data_types.transaction_metadata import TransactionMetadata


class DataVectorizer(BaseModule):

    phase_name = PHASE_DATA_VECTORIZATION

    def _getRowExtraVector(self, ret, column_name, col_row_index, distances):

        predict_columns = self.train_meta_data.model_predict_columns

        desired_total = self.train_meta_data.window_size
        batch_height = len(ret[column_name])
        remaining_row_count = batch_height - (col_row_index +1)


        harvest_count = desired_total if desired_total < remaining_row_count else remaining_row_count
        empty_count = desired_total - harvest_count
        empty_vector_len = (
                len(ret[column_name][col_row_index])