How to use the dataprep.Package.Package function in dataprep

To help you get started, we’ve selected a few dataprep examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github Azure-Samples / MachineLearningSamples-ChurnPrediction / CATelcoCustomerChurnModeling.py View on Github external
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
import pandas as pd
import numpy as np
import csv
from sklearn.metrics import accuracy_score
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import LabelEncoder

from azureml.logging import get_azureml_logger

# initialize the logger
run_logger = get_azureml_logger() 
run_logger.log('amlrealworld.ChurnPrediction.CATelcoCustomerChurnModeling','true')

with Package.open_package('CATelcoCustomerChurnTrainingSample.dprep') as pkg:
    df = pkg.dataflows[0].get_dataframe(spark=False)

columns_to_encode = list(df.select_dtypes(include=['category','object']))
for column_to_encode in columns_to_encode:
    dummies = pd.get_dummies(df[column_to_encode])
    one_hot_col_names = []
    for col_name in list(dummies.columns):
        one_hot_col_names.append(column_to_encode + '_' + col_name)
    dummies.columns = one_hot_col_names
    df = df.drop(column_to_encode, axis=1)
    df = df.join(dummies)

model = GaussianNB()

random_seed = 42
train, test = train_test_split(df, random_state = random_seed, test_size = 0.3)