How to use the fancyimpute.IterativeSVD function in fancyimpute

To help you get started, we’ve selected a few fancyimpute examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github iskandr / fancyimpute / test / test_iterative_svd.py View on Github external
def test_iterative_svd_with_low_rank_random_matrix():
    solver = IterativeSVD(rank=3)
    XY_completed = solver.fit_transform(XY_incomplete)
    _, missing_mae = reconstruction_error(
        XY,
        XY_completed,
        missing_mask,
        name="IterativeSVD")
    assert missing_mae < 0.1, "Error too high!"
github crowdresearch / daemo / mturk / tasks.py View on Github external
# COLUMNS = ["worker_id", "score", "accuracy", "attempted", "correct", "boomerang"]

    data = pivoted.copy(deep=True)
    matrix = data.ix[:, 1:]  # without worker_id

    # data['accuracy'] = matrix.mean(axis=ROW_WISE) * 100
    # data['attempted'] = matrix.count(axis=ROW_WISE)
    # data['correct'] = matrix.sum(axis=ROW_WISE)

    # data = data[data["attempted"]>=MIN_TASKS]

    # turn incorrect to -1 as imputations will fill with 0
    # matrix[matrix <= 0] = -1

    try:
        mat = IterativeSVD(verbose=False, init_fill_method="mean").complete(matrix)
    except Exception:
        mat = SoftImpute(verbose=False, init_fill_method="mean").complete(matrix)

    data['score'] = mat.mean(axis=ROW_WISE)
    data = data.sort_values(by=['score'], ascending=[False])

    percentile = data['score'].quantile(settings.WORKER_SPLIT_PERCENTILE)

    # Top 25% = 3-2 and Bottom 75% = 2-1
    num_workers = len(data)
    num_workers_top_x = len(data[data['score'] >= percentile])

    top_x = data.head(num_workers_top_x)

    # add extra worker at inflexion point from top set as it will have 2.0 duplicated
    bottom_y = data.tail(num_workers - num_workers_top_x + 1)
github AlexsLemonade / refinebio / workers / data_refinery_workers / processors / create_compendia.py View on Github external
del transposed_matrix_with_zeros

    # Store the absolute/percentages of imputed values
    total = transposed_matrix.isnull().sum().sort_values(ascending=False)
    percent = (transposed_matrix.isnull().sum()/transposed_matrix.isnull().count()).sort_values(ascending=False)
    total_percent_imputed = sum(percent) / len(transposed_matrix.count())
    job_context['total_percent_imputed'] = total_percent_imputed
    logger.info("Total percentage of data to impute!", total_percent_imputed=total_percent_imputed)

    # Perform imputation of missing values with IterativeSVD (rank=10) on the transposed_matrix; imputed_matrix
    svd_algorithm = job_context['dataset'].svd_algorithm
    if svd_algorithm != 'NONE':
        svd_start = time.time()
        logger.info("IterativeSVD algorithm: %s" % svd_algorithm)
        svd_algorithm = str.lower(svd_algorithm)
        imputed_matrix = IterativeSVD(rank=10, svd_algorithm=svd_algorithm).fit_transform(transposed_matrix)
    else:
        imputed_matrix = transposed_matrix
        logger.info("Skipping IterativeSVD")
    del transposed_matrix

    # Untranspose imputed_matrix (genes are now rows, samples are now columns)
    untransposed_imputed_matrix = imputed_matrix.transpose()
    del imputed_matrix

    # Convert back to Pandas
    untransposed_imputed_matrix_df = pd.DataFrame.from_records(untransposed_imputed_matrix)
    untransposed_imputed_matrix_df.index = row_col_filtered_combined_matrix_samples_index
    untransposed_imputed_matrix_df.columns = row_col_filtered_combined_matrix_samples_columns
    del untransposed_imputed_matrix
    del row_col_filtered_combined_matrix_samples_index
    del row_col_filtered_combined_matrix_samples_columns
github ANTsX / ANTsPy / ants / utils / impute.py View on Github external
if value is None:
            value = 3
        X_filled = KNN(k=value, verbose=False).complete(X_incomplete)

    elif method == 'BiScaler':
        X_filled = BiScaler(verbose=False).fit_transform(X_incomplete)

    elif method == 'SoftImpute':
        X_filled = SoftImpute(verbose=False).complete(X_incomplete)

    elif method == 'IterativeSVD':
        if value is None:
            rank = min(10, X_incomplete.shape[0]-2)
        else:
            rank = value
        X_filled = IterativeSVD(rank=rank, verbose=False).complete(X_incomplete)

    elif method == 'mean':
        col_means = np.nanmean(X_incomplete, axis=0)
        for i in range(X_incomplete.shape[1]):
            X_incomplete[:,i][np.isnan(X_incomplete[:,i])] = col_means[i]
        X_filled = X_incomplete

    elif method == 'median':
        col_means = np.nanmean(X_incomplete, axis=0)
        for i in range(X_incomplete.shape[1]):
            X_incomplete[:,i][np.isnan(X_incomplete[:,i])] = col_means[i]
        X_filled = X_incomplete

    elif method == 'constant':
        if value is None:
            raise ValueError('Must give `value` argument if method == constant')
github iskandr / fancyimpute / experiments / complete_faces.py View on Github external
table.add_entry(
            solver=KNN(
                k=k,
                orientation="rows"),
            name="KNN_k%d" % (k,))

    for shrinkage_value in [25, 50, 100]:
        # SoftImpute without rank constraints
        table.add_entry(
            solver=SoftImpute(
                shrinkage_value=shrinkage_value),
            name="SoftImpute_lambda%d" % (shrinkage_value,))

    for rank in [10, 20, 40]:
        table.add_entry(
            solver=IterativeSVD(
                rank=rank,
                init_fill_method="zero"),
            name="IterativeSVD_rank%d" % (rank,))

    table.save_html_table()
    table.print_sorted_errors()