Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
""")
# Data loading.
classes = ['street_music', 'siren', 'jackhammer', 'gun_shot', 'engine_idling', 'drilling', 'dog_bark', 'children_playing', 'car_horn', 'air_conditioner']
labels = []
all_filepaths = []
for label_class in classes:
for filepath in glob.glob(os.path.join('data/UrbanSound/data', label_class, '*.wav')):
all_filepaths.append(filepath)
labels.append(label_class)
filepaths, filepaths_test, y, y_test = train_test_split(
all_filepaths, labels, train_size=160, test_size=40)
audio_pipeline = MLPipeline.from_ml_json(['audio_featurizer', 'audio_padder', 'pca', 'random_forest_classifier'])
# Check that the hyperparameters are correct.
for hyperparam in audio_pipeline.get_tunable_hyperparams():
print(hyperparam)
# Check that the steps are correct.
expected_steps = {'audio_featurizer', 'audio_padder', 'pca', 'rf_classifier'}
steps = set(audio_pipeline.steps_dict.keys())
assert expected_steps == steps
# Check that we can score properly.
print("\nFitting pipeline...")
X, sample_freqs = load_and_segment(filepaths)
produce_params = {('audio_featurizer', 'sample_freqs'): sample_freqs}
audio_pipeline.fit(X, y, produce_params=produce_params)
print("\nFit pipeline.")
============================================
Testing Multi Table Pipeline
============================================
""")
orders = pd.read_csv("data/Retail/orders.csv")
order_products = pd.read_csv("data/Retail/order_products.csv")
label_times = pd.read_csv("data/Retail/label_times.csv")
X = label_times.sample(frac=0.8)
X_test = label_times.drop(X.index)
y = X["label"]
y_test = X_test["label"]
es = make_entity_set(orders, order_products)
multitable = MLPipeline.from_ml_json(['dfs', 'random_forest_classifier'])
# Check that the hyperparameters are correct.
for hyperparam in multitable.get_tunable_hyperparams():
print(hyperparam)
# Check that the steps are correct.
expected_steps = {'dfs', 'rf_classifier'}
steps = set(multitable.steps_dict.keys())
assert expected_steps == steps
# Check that we can score properly.
produce_params = {
('dfs', 'entityset'): es,
('dfs', 'cutoff_time_in_index'): True
}
print("\nFitting pipeline...")
def __new__(cls, *args, **kwargs):
return MLPipeline.from_ml_json([
'count_vectorizer', 'to_array', 'tfidf_transformer',
'multinomial_nb'
])