How to use the kaggler.data_io.save_data function in Kaggler

To help you get started, we’ve selected a few Kaggler examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github jeongyoonlee / kaggler-template / src / generate_j1.py View on Github external
len(num_cols)))

    df = pd.concat([trn, tst], axis=0)

    logging.info('label encoding categorical variables')
    lbe = LabelEncoder(min_obs=10)
    df[cat_cols] = lbe.fit_transform(df[cat_cols])
    df[num_cols] = df[num_cols].fillna(-1)

    with open(feature_map_file, 'w') as f:
        for i, col in enumerate(df.columns):
            f.write('{}\t{}\tq\n'.format(i, col))

    logging.info('saving features')
    save_data(df.values[:n_trn,], y, train_feature_file)
    save_data(df.values[n_trn:,], None, test_feature_file)
github jeongyoonlee / kaggler-template / src / generate_n1.py View on Github external
df[num_cols] = nm.fit_transform(df[num_cols].values)

    logging.info('label encoding categorical variables')
    ohe = OneHotEncoder(min_obs=10)
    X_ohe = ohe.fit_transform(df[cat_cols])
    ohe_cols = ['ohe{}'.format(i) for i in range(X_ohe.shape[1])]

    X = sparse.hstack((df[num_cols].values, X_ohe), format='csr')

    with open(feature_map_file, 'w') as f:
        for i, col in enumerate(num_cols + ohe_cols):
            f.write('{}\t{}\tq\n'.format(i, col))

    logging.info('saving features')
    save_data(X[:n_trn,], y, train_feature_file)
    save_data(X[n_trn:,], None, test_feature_file)
github jeongyoonlee / kaggler-template / src / generate_j1.py View on Github external
logging.info('categorical: {}, numerical: {}'.format(len(cat_cols),
                                                         len(num_cols)))

    df = pd.concat([trn, tst], axis=0)

    logging.info('label encoding categorical variables')
    lbe = LabelEncoder(min_obs=10)
    df[cat_cols] = lbe.fit_transform(df[cat_cols])
    df[num_cols] = df[num_cols].fillna(-1)

    with open(feature_map_file, 'w') as f:
        for i, col in enumerate(df.columns):
            f.write('{}\t{}\tq\n'.format(i, col))

    logging.info('saving features')
    save_data(df.values[:n_trn,], y, train_feature_file)
    save_data(df.values[n_trn:,], None, test_feature_file)
github jeongyoonlee / kaggler-template / src / generate_n1.py View on Github external
nm = Normalizer()
    df[num_cols] = nm.fit_transform(df[num_cols].values)

    logging.info('label encoding categorical variables')
    ohe = OneHotEncoder(min_obs=10)
    X_ohe = ohe.fit_transform(df[cat_cols])
    ohe_cols = ['ohe{}'.format(i) for i in range(X_ohe.shape[1])]

    X = sparse.hstack((df[num_cols].values, X_ohe), format='csr')

    with open(feature_map_file, 'w') as f:
        for i, col in enumerate(num_cols + ohe_cols):
            f.write('{}\t{}\tq\n'.format(i, col))

    logging.info('saving features')
    save_data(X[:n_trn,], y, train_feature_file)
    save_data(X[n_trn:,], None, test_feature_file)