Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def train_model(input_features, output_features, data_csv):
"""
Helper method to avoid code repetition in running an experiment
:param input_features: input schema
:param output_features: output schema
:param data_csv: path to data
:return: None
"""
model_definition = {
'input_features': input_features,
'output_features': output_features,
'combiner': {'type': 'concat', 'fc_size': 14},
'training': {'epochs': 2}
}
model = LudwigModel(model_definition)
# Training with csv
model.train(
data_csv=data_csv,
skip_save_processed_input=True,
skip_save_progress=True,
skip_save_unprocessed_output=True
)
model.predict(data_csv=data_csv)
# Remove results/intermediate data saved to disk
shutil.rmtree(model.exp_dir_name, ignore_errors=True)
# Training with dataframe
data_df = read_csv(data_csv)
def run_api_experiment(input_features, output_features):
"""
Helper method to avoid code repetition in running an experiment
:param input_features: input schema
:param output_features: output schema
:return: None
"""
model_definition = {
'input_features': input_features,
'output_features': output_features,
'combiner': {'type': 'concat', 'fc_size': 14},
'training': {'epochs': 2}
}
model = LudwigModel(model_definition)
return model
def setup_model(self):
"""Configure and setup test model"""
model_definition = {
'input_features': self.input_features,
'output_features': self.output_features,
'combiner': {'type': 'concat', 'fc_size': 14},
'training': {'epochs': 2}
}
self.model = LudwigModel(model_definition)
return ModelSklearn(
model=joblib.load(os.path.join(model_folder_path, 'model.joblib')),
predictors=metadata['predictors'],
targets=metadata['targets'],
system=metadata['system'],
model_id=model_id,
search_id=metadata['search_id'],
train_specification=metadata['train_specification'],
preprocess=preprocess,
task=metadata['task'])
if metadata['system'] == 'ludwig':
from ludwig.api import LudwigModel
return ModelLudwig(
model=LudwigModel.load(model_folder_path),
predictors=metadata['predictors'],
targets=metadata['targets'],
model_id=model_id,
search_id=metadata['search_id'],
task=metadata['task'])
if metadata['system'] == 'h2o':
import h2o
h2o.init()
return ModelH2O(
model=h2o.load_model(os.path.join(model_folder_path, metadata['model_filename'])),
model_id=model_id,
predictors=metadata['predictors'],
targets=metadata['targets'],
search_id=metadata['search_id'],
train_specification=metadata['train_specification'],
return ModelSklearn(
model=joblib.load(os.path.join(model_folder_path, 'model.joblib')),
predictors=metadata['predictors'],
targets=metadata['targets'],
system=metadata['system'],
model_id=model_id,
search_id=metadata['search_id'],
train_specification=metadata['train_specification'],
preprocess=preprocess,
task=metadata['task'])
if metadata['system'] == 'ludwig':
from ludwig.api import LudwigModel
return ModelLudwig(
model=LudwigModel.load(model_folder_path),
predictors=metadata['predictors'],
targets=metadata['targets'],
model_id=model_id,
search_id=metadata['search_id'],
task=metadata['task'])
if metadata['system'] == 'h2o':
import h2o
h2o.init()
return ModelH2O(
model=h2o.load_model(os.path.join(model_folder_path, metadata['model_filename'])),
model_id=model_id,
predictors=metadata['predictors'],
targets=metadata['targets'],
search_id=metadata['search_id'],
train_specification=metadata['train_specification'],
def test_train(
data_csv,
model_definition,
batch_size=128,
gpus=None,
gpu_fraction=1,
debug=False,
logging_level=logging.ERROR,
**kwargs
):
ludwig_model = LudwigModel(model_definition, logging_level=logging_level)
train_stats = ludwig_model.train(
data_csv=data_csv,
gpus=gpus,
gpu_fraction=gpu_fraction,
debug=debug
)
logger.critical(train_stats)
# predict
predictions = ludwig_model.predict(
data_csv=data_csv,
batch_size=batch_size,
gpus=gpus,
gpu_fraction=gpu_fraction,
# Return
:return: (LudwigModel) a LudwigModel object
# Example usage
```python
ludwig_model = LudwigModel.load(model_dir)
```
"""
model, model_definition = load_model_and_definition(model_dir)
ludwig_model = LudwigModel(model_definition)
ludwig_model.model = model
ludwig_model.train_set_metadata = load_metadata(
os.path.join(
model_dir,
TRAIN_SET_METADATA_FILE_NAME
)
)
return ludwig_model
# preprocessing.sequence.skipgrams,
# preprocessing.sequence.make_sampling_table,
# ],
# 'classes': [
# preprocessing.sequence.TimeseriesGenerator,
# ],
# 'all_module_functions': [initializers],
# 'all_module_classes': [initializers]
# }
# ]
PAGES = [
{
'page': 'api/LudwigModel.md',
'classes': [
(LudwigModel, "*")
],
},
{
'page': 'api/visualization.md',
'functions': [
learning_curves,
compare_performance,
compare_classifiers_performance_from_prob,
compare_classifiers_performance_from_pred,
compare_classifiers_performance_subset,
compare_classifiers_performance_changing_k,
compare_classifiers_multiclass_multimetric,
compare_classifiers_predictions,
confidence_thresholding_2thresholds_2d,
confidence_thresholding_2thresholds_3d,
confidence_thresholding,
batch_size=128,
gpus=None,
gpu_fraction=1,
debug=False,
logging_level=logging.ERROR,
**kwargs
):
model_definition = merge_with_defaults(model_definition)
data, train_set_metadata = build_dataset(
data_csv,
(model_definition['input_features'] +
model_definition['output_features']),
model_definition['preprocessing']
)
ludwig_model = LudwigModel(model_definition, logging_level=logging_level)
ludwig_model.initialize_model(train_set_metadata=train_set_metadata)
ludwig_model.train_online(
data_csv=data_csv,
batch_size=128,
gpus=gpus,
gpu_fraction=gpu_fraction,
)
ludwig_model.train_online(
data_csv=data_csv,
batch_size=128,
gpus=gpus,
gpu_fraction=gpu_fraction,
)
# predict