How to use the deeppavlov.core.common.errors.ConfigError function in deeppavlov

To help you get started, we’ve selected a few deeppavlov examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github deepmipt / DeepPavlov / deeppavlov / models / sklearn / sklearn_component.py View on Github external
else:
            y_ = None

        try:
            log.info("Fitting model {}".format(self.model_name))
            self.model.fit(x_features, y_)
        except TypeError or ValueError:
            try:
                if issparse(x_features):
                    log.info("Converting input for model {} to dense array".format(self.model_name))
                    self.model.fit(x_features.todense(), y_)
                else:
                    log.info("Converting input for model {} to sparse array".format(self.model_name))
                    self.model.fit(csr_matrix(x_features), y_)
            except:
                raise ConfigError("Can not fit on the given data".format(self.model_name))

        return
github deepmipt / DeepPavlov / deeppavlov / pipeline_manager / pipeline_manager.py View on Github external
"from the rest datasets.".format(config['dataset_reader']['data_path']))

            iterator = get_iterator_from_config(config, data)

            if isinstance(iterator, DataFittingIterator):
                raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
            else:
                if config.get('train', None):
                    if config['train']['test_best'] and len(iterator.data['test']) == 0:
                        raise ConfigError(
                            "The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
                            " Please check the dataset_iterator config.")

                    if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
                            len(iterator.data['valid']) == 0:
                        raise ConfigError(
                            "The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
                            " or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
                else:
                    if len(iterator.data['test']) == 0:
                        raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
                                          "but default value of 'test_best' is 'True'. "
                                          "Please check the dataset_iterator config.")

            # get a tiny data from dataset
            if len(iterator.data['train']) <= 100:
                print("!!!!!!!!!!!!! WARNING !!!!!!!!!!!!! Length of 'train' part dataset <= 100. "
                      "Please check the dataset_iterator config")
                tiny_train = copy(iterator.data['train'])
            else:
                tiny_train = copy(iterator.data['train'][:10])
            iterator.train = tiny_train
github deepmipt / DeepPavlov / deeppavlov / core / commands / train.py View on Github external
'validate_best': True,
        'test_best': True
    }

    train_config = dict(default_train_config, **train_config)

    if train_config['metric_optimization'] == 'maximize':
        def improved(score, best):
            return score > best
        best = float('-inf')
    elif train_config['metric_optimization'] == 'minimize':
        def improved(score, best):
            return score < best
        best = float('inf')
    else:
        raise ConfigError('metric_optimization has to be one of {}'.format(['maximize', 'minimize']))

    i = 0
    epochs = 0
    examples = 0
    saved = False
    patience = 0
    log_on = train_config['log_every_n_batches'] > 0 or train_config['log_every_n_epochs'] > 0
    train_y_true = []
    train_y_predicted = []
    start_time = time.time()
    break_flag = False
    try:
        while True:
            for x, y_true in iterator.batch_generator(train_config['batch_size']):
                if log_on:
                    y_predicted = list(model(list(x)))
github deepmipt / DeepPavlov / deeppavlov / pipeline_manager / pipeline_manager.py View on Github external
dataset_composition_ = dict(train=False, valid=False, test=False)
            data = read_data_by_config(config)
            if i == 0:
                for dtype in dataset_composition_.keys():
                    if len(data.get(dtype, [])) != 0:
                        dataset_composition_[dtype] = True
            else:
                for dtype in dataset_composition_.keys():
                    if len(data.get(dtype, [])) == 0 and dataset_composition_[dtype]:
                        raise ConfigError("The file structure in the {0} dataset differs "
                                          "from the rest datasets.".format(config['dataset_reader']['data_path']))

            iterator = get_iterator_from_config(config, data)

            if isinstance(iterator, DataFittingIterator):
                raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
            else:
                if config.get('train', None):
                    if config['train']['test_best'] and len(iterator.data['test']) == 0:
                        raise ConfigError(
                            "The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
                            " Please check the dataset_iterator config.")

                    if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
                            len(iterator.data['valid']) == 0:
                        raise ConfigError(
                            "The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
                            " or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
                else:
                    if len(iterator.data['test']) == 0:
                        raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
                                          "but default value of 'test_best' is 'True'. "
github deepmipt / DeepPavlov / deeppavlov / pipeline_manager / pipeline_manager.py View on Github external
self.root = expand_path(
            self.exp_config['pipeline_search'].get('root',
                '~/.deeppavlov/experiments'))
        self.plot = self.exp_config['pipeline_search'].get('plot', False)
        self.save_best = self.exp_config['pipeline_search'].get('save_best', False)
        self.do_test = self.exp_config['pipeline_search'].get('do_test', False)

        self.search_type = self.exp_config['pipeline_search'].get('search_type', 'random')
        self.sample_num = self.exp_config['pipeline_search'].get('sample_num', 10)
        self.target_metric = self.exp_config['pipeline_search'].get('target_metric')
        self.multiprocessing = self.exp_config['pipeline_search'].get('multiprocessing', True)
        self.max_num_workers = self.exp_config['pipeline_search'].get('max_num_workers')
        cpu_num = cpu_count()
        if self.max_num_workers:
            if self.max_num_workers > cpu_num:
                raise ConfigError("Parameter 'max_num_workers'={0}, "
                                  "but amounts of cpu is {1}.".format(self.max_num_workers, cpu_num))
            elif self.max_num_workers < 1:
                raise ConfigError("The number of workers must be at least equal to one. "
                                  "Please check 'max_num_workers' parameter in config.")

        self.use_gpu = self.exp_config['pipeline_search'].get('use_all_gpus', False)
        self.memory_fraction = self.exp_config['pipeline_search'].get('gpu_memory_fraction', 1.0)
        self.max_num_workers = None
        self.available_gpu = None

        # create the observer
        self.save_path = self.root / self.date / self.exp_name / 'checkpoints'
        self.observer = ExperimentObserver(self.exp_name, self.root, self.info, self.date, self.plot)
        # create the pipeline generator
        self.pipeline_generator = PipeGen(self.exp_config, self.save_path, self.search_type, self.sample_num, False)
        self.gen_len = self.pipeline_generator.length
github deepmipt / DeepPavlov / deeppavlov / models / sklearn / sklearn_component.py View on Github external
Returns:
            sparse or dense array of stacked data
        """
        x_features = []
        for i in range(len(x)):
            if ((isinstance(x[i], tuple) or isinstance(x[i], list) or isinstance(x[i], np.ndarray) and len(x[i]))
                    or (issparse(x[i]) and x[i].shape[0])):
                if issparse(x[i][0]):
                    x_features.append(vstack(list(x[i])))
                elif isinstance(x[i][0], np.ndarray) or isinstance(x[i][0], list):
                    x_features.append(np.vstack(list(x[i])))
                elif isinstance(x[i][0], str):
                    x_features.append(np.array(x[i]))
                else:
                    raise ConfigError('Not implemented this type of vectors')
            else:
                raise ConfigError("Input vectors cannot be empty")

        sparse = False
        for inp in x_features:
            if issparse(inp):
                sparse = True
        if sparse:
            x_features = hstack(list(x_features))
        else:
            x_features = np.hstack(list(x_features))

        return x_features
github deepmipt / DeepPavlov / deeppavlov / pipeline_manager / pipeline_manager.py View on Github external
raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
            else:
                if config.get('train', None):
                    if config['train']['test_best'] and len(iterator.data['test']) == 0:
                        raise ConfigError(
                            "The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
                            " Please check the dataset_iterator config.")

                    if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
                            len(iterator.data['valid']) == 0:
                        raise ConfigError(
                            "The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
                            " or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
                else:
                    if len(iterator.data['test']) == 0:
                        raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
                                          "but default value of 'test_best' is 'True'. "
                                          "Please check the dataset_iterator config.")

            # get a tiny data from dataset
            if len(iterator.data['train']) <= 100:
                print("!!!!!!!!!!!!! WARNING !!!!!!!!!!!!! Length of 'train' part dataset <= 100. "
                      "Please check the dataset_iterator config")
                tiny_train = copy(iterator.data['train'])
            else:
                tiny_train = copy(iterator.data['train'][:10])
            iterator.train = tiny_train

            if len(iterator.data['valid']) <= 20:
                tiny_valid = copy(iterator.data['valid'])
            else:
                tiny_valid = copy(iterator.data['valid'][:5])
github deepmipt / DeepPavlov / deeppavlov / pipeline_manager / pipeline_manager_parallel.py View on Github external
iterator = get_iterator_from_config(config, data)
        if isinstance(iterator, DataFittingIterator):
            raise ConfigError("Instance of a class 'DataFittingIterator' is not supported.")
        else:
            if config.get('train', None):
                if config['train']['test_best'] and len(iterator.data['test']) == 0:
                    raise ConfigError("The 'test' part of dataset is empty, but 'test_best' in train config is 'True'."
                                      " Please check the dataset_iterator config.")

                if (config['train']['validate_best'] or config['train'].get('val_every_n_epochs', False) > 0) and \
                        len(iterator.data['valid']) == 0:
                    raise ConfigError("The 'valid' part of dataset is empty, but 'valid_best' in train config is 'True'"
                                      " or 'val_every_n_epochs' > 0. Please check the dataset_iterator config.")
            else:
                if len(iterator.data['test']) == 0:
                    raise ConfigError("The 'test' part of dataset is empty as a 'train' part of config file, "
                                      "but default value of 'test_best' is 'True'. "
                                      "Please check the dataset_iterator config.")

        # get a tiny data from dataset
        if len(iterator.data['train']) <= 100:
            print("!!!!!!!!!!!!! WARNING !!!!!!!!!!!!! Length of 'train' part dataset <= 100. "
                  "Please check the dataset_iterator config")
            tiny_train = copy(iterator.data['train'])
        else:
            tiny_train = copy(iterator.data['train'][:10])
        iterator.train = tiny_train

        if len(iterator.data['valid']) <= 20:
            tiny_valid = copy(iterator.data['valid'])
        else:
            tiny_valid = copy(iterator.data['valid'][:5])