How to use the zat.dataframe_to_matrix.DataFrameToMatrix function in zat

To help you get started, we’ve selected a few zat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SuperCowPowers / zat / zat / dataframe_to_matrix.py View on Github external
# Assert that the dataframe we passed in didn't change
    copy_test_df.equals(test_df)

    # Test that the conversion gives us the same columns on a df with different category values
    # This also tests NaN in a category column
    print('TRANSFORM2')
    matrix2 = to_matrix.transform(test_df2)
    assert matrix.shape == matrix2.shape

    # First two ROWS should be the same
    np_test_utils.assert_equal(matrix[0], matrix2[0])
    np_test_utils.assert_equal(matrix[1], matrix2[1])

    # Test normalize
    to_matrix_norm = DataFrameToMatrix()
    print('FIT-TRANSFORM')
    norm_matrix = to_matrix_norm.fit_transform(test_df)
    print(norm_matrix)
    assert(norm_matrix[:, 0].min() == 0)
    assert(norm_matrix[:, 0].max() == 1)

    # Make sure normalize 'does the right thing' when doing transform
    print('TRANSFORM')
    norm_matrix2 = to_matrix_norm.transform(test_df2)
    assert(norm_matrix2[:, 0].min() == 0)
    assert(norm_matrix2[:, 0].max() == 2)    # Normalization is based on FIT range

    # Test div by zero in normalize
    test_df3 = test_df2.copy()
    test_df3['D'] = [1, 1, 1, 1]
    print('FIT-TRANSFORM')
github SuperCowPowers / zat / zat / dataframe_to_matrix.py View on Github external
to_matrix = DataFrameToMatrix()
    my_matrix = to_matrix.fit_transform(test_df)
    kmeans = KMeans(n_clusters=2).fit_predict(my_matrix)

    # Now we can put our ML results back onto our dataframe!
    test_df['cluster'] = kmeans
    cluster_groups = test_df.groupby('cluster')

    # Now print out the details for each cluster
    for key, group in cluster_groups:
        print('Rows in Cluster: {:d}'.format(len(group)))
        print(group.head(), '\n')
    del test_df['cluster']

    # Now we're going to intentionally introduce NaNs in the categorical output just to see what happens
    to_matrix = DataFrameToMatrix()
    _ = to_matrix.fit_transform(test_df)
    my_matrix2 = to_matrix.transform(test_df2)
    kmeans = KMeans(n_clusters=2).fit_predict(my_matrix2)

    # Now we can put our ML results back onto our dataframe!
    test_df2['cluster'] = kmeans
    cluster_groups = test_df2.groupby('cluster')

    # Now print out the details for each cluster
    for key, group in cluster_groups:
        print('Rows in Cluster: {:d}'.format(len(group)))
        print(group.head(), '\n')
github SuperCowPowers / zat / zat / dataframe_to_matrix.py View on Github external
test_df2 = pd.DataFrame(
        {'A': pd.Categorical(['a', 'b', 'b', 'a'], ordered=True),
         'B': pd.Categorical(['a', 'b', 'd', 'a'], ordered=False),
         'C': pd.Categorical(['a', 'b', 'z', 'y'], categories=['a', 'b', 'z', 'd']),
         'D': [1, 2, 3, 7],
         'E': ['w', 'x', 'z', 'foo'],
         'F': [1.1, 2.2, 3.3, 4.4],
         'H': [True, False, False, False]
         }
    )

    # Copy the test_df for testing later
    copy_test_df = test_df.copy()

    # Test the transformation from dataframe to numpy ndarray and back again
    to_matrix = DataFrameToMatrix()
    print('FIT-TRANSFORM')
    matrix = to_matrix.fit_transform(test_df)
    print('TRANSFORM')
    matrix_test = to_matrix.transform(test_df)

    # These two matrices should be the same
    np_test_utils.assert_equal(matrix, matrix_test)

    # Assert that the dataframe we passed in didn't change
    copy_test_df.equals(test_df)

    # Test that the conversion gives us the same columns on a df with different category values
    # This also tests NaN in a category column
    print('TRANSFORM2')
    matrix2 = to_matrix.transform(test_df2)
    assert matrix.shape == matrix2.shape
github SuperCowPowers / zat / zat / dataframe_to_matrix.py View on Github external
os.unlink(temp.name)

    # Try 'nullable' integer arrays
    null_df = test_df2.copy()
    null_df['I'] = pd.Series([10, 11, 12, np.NaN], dtype='UInt64')
    print('FIT-TRANSFORM')
    matrix = to_matrix.fit_transform(null_df)
    print('TRANSFORM')
    matrix_test = to_matrix.transform(null_df)

    # These two matrices should be the same
    np_test_utils.assert_equal(matrix, matrix_test)

    # Now actually try the matrix with a scikit-learn algo
    from sklearn.cluster import KMeans
    to_matrix = DataFrameToMatrix()
    my_matrix = to_matrix.fit_transform(test_df)
    kmeans = KMeans(n_clusters=2).fit_predict(my_matrix)

    # Now we can put our ML results back onto our dataframe!
    test_df['cluster'] = kmeans
    cluster_groups = test_df.groupby('cluster')

    # Now print out the details for each cluster
    for key, group in cluster_groups:
        print('Rows in Cluster: {:d}'.format(len(group)))
        print(group.head(), '\n')
    del test_df['cluster']

    # Now we're going to intentionally introduce NaNs in the categorical output just to see what happens
    to_matrix = DataFrameToMatrix()
    _ = to_matrix.fit_transform(test_df)