How to use the mlblocks.ml_pipeline.ml_pipeline.MLPipeline.from_ml_json function in mlblocks

To help you get started, we’ve selected a few mlblocks examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github HDI-Project / MLBlocks / examples / pipelines / audio / audio.py View on Github external
""")

# Data loading.
classes = ['street_music', 'siren', 'jackhammer', 'gun_shot', 'engine_idling', 'drilling', 'dog_bark', 'children_playing', 'car_horn', 'air_conditioner']

labels = []
all_filepaths = []
for label_class in classes:
    for filepath in glob.glob(os.path.join('data/UrbanSound/data', label_class, '*.wav')):
        all_filepaths.append(filepath)
        labels.append(label_class)

filepaths, filepaths_test, y, y_test = train_test_split(
    all_filepaths, labels, train_size=160, test_size=40)

audio_pipeline = MLPipeline.from_ml_json(['audio_featurizer', 'audio_padder', 'pca', 'random_forest_classifier'])

# Check that the hyperparameters are correct.
for hyperparam in audio_pipeline.get_tunable_hyperparams():
    print(hyperparam)

# Check that the steps are correct.
expected_steps = {'audio_featurizer', 'audio_padder', 'pca', 'rf_classifier'}
steps = set(audio_pipeline.steps_dict.keys())
assert expected_steps == steps

# Check that we can score properly.
print("\nFitting pipeline...")
X, sample_freqs = load_and_segment(filepaths)
produce_params = {('audio_featurizer', 'sample_freqs'): sample_freqs}
audio_pipeline.fit(X, y, produce_params=produce_params)
print("\nFit pipeline.")
github HDI-Project / MLBlocks / examples / pipelines / multitable / multitable.py View on Github external
============================================
    Testing Multi Table Pipeline
    ============================================
    """)
    orders = pd.read_csv("data/Retail/orders.csv")
    order_products = pd.read_csv("data/Retail/order_products.csv")
    label_times = pd.read_csv("data/Retail/label_times.csv")

    X = label_times.sample(frac=0.8)
    X_test = label_times.drop(X.index)
    y = X["label"]
    y_test = X_test["label"]

    es = make_entity_set(orders, order_products)

    multitable = MLPipeline.from_ml_json(['dfs', 'random_forest_classifier'])

    # Check that the hyperparameters are correct.
    for hyperparam in multitable.get_tunable_hyperparams():
        print(hyperparam)

    # Check that the steps are correct.
    expected_steps = {'dfs', 'rf_classifier'}
    steps = set(multitable.steps_dict.keys())
    assert expected_steps == steps

    # Check that we can score properly.
    produce_params = {
        ('dfs', 'entityset'): es,
        ('dfs', 'cutoff_time_in_index'): True
    }
    print("\nFitting pipeline...")
github HDI-Project / MLBlocks / mlblocks / components / pipelines / text / traditional_text.py View on Github external
def __new__(cls, *args, **kwargs):
        return MLPipeline.from_ml_json([
            'count_vectorizer', 'to_array', 'tfidf_transformer',
            'multinomial_nb'
        ])