How to use the fancyimpute.KNN function in fancyimpute

To help you get started, we’ve selected a few fancyimpute examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github ANTsX / ANTsPy / ants / utils / impute.py View on Github external
Low-Rank SVD via Fast Alternating Least Squares.
    """
    _fancyimpute_options = {'KNN', 'BiScaler', 'NuclearNormMinimization', 'SoftImpute', 'IterativeSVD'}
    if (not has_fancyimpute) and (method in _fancyimpute_options):
        raise ValueError('You must install `fancyimpute` (pip install fancyimpute) to use this method')

    _base_options = {'mean', 'median', 'constant'}
    if (method not in _base_options) and (method not in _fancyimpute_options) and (not isinstance(method, (int,float))):
        raise ValueError('method not understood.. Use `mean`, `median`, a scalar, or an option from `fancyimpute`')

    X_incomplete = data.copy()

    if method == 'KNN':
        if value is None:
            value = 3
        X_filled = KNN(k=value, verbose=False).complete(X_incomplete)

    elif method == 'BiScaler':
        X_filled = BiScaler(verbose=False).fit_transform(X_incomplete)

    elif method == 'SoftImpute':
        X_filled = SoftImpute(verbose=False).complete(X_incomplete)

    elif method == 'IterativeSVD':
        if value is None:
            rank = min(10, X_incomplete.shape[0]-2)
        else:
            rank = value
        X_filled = IterativeSVD(rank=rank, verbose=False).complete(X_incomplete)

    elif method == 'mean':
        col_means = np.nanmean(X_incomplete, axis=0)
github usc-isi-i2 / dsbox-cleaning / dsbox / datapreprocessing / cleaner / missing_value_pred.py View on Github external
if (value == "zero"):
        inputed_value = 0
    elif (value == "mean"):
        inputed_value = np.mean(data_drop)
    elif (value == "max"):
        inputed_value = np.max(data_drop)
    elif (value == "min"):
        inputed_value = np.min(data_drop)
    elif (value == "new"):
        inputed_value = 0   # 0 is the value that never happens in our categorical map
    elif (value == "popular"):
        inputed_value = popular_value(data_drop)
    # special type of imputed, just return after imputation
    elif (value == "knn"):
        from fancyimpute import KNN
        data_clean = KNN(k=5).complete(data)
        return data_clean
    else:
        raise ValueError("no such impute strategy: {}".format(value))

    if np.isnan(inputed_value):
        inputed_value = 0

    data_imputed[index] = inputed_value

    if verbose: print("imputed missing value: {}".format(inputed_value))
    return data_imputed
github iskandr / fancyimpute / experiments / complete_faces.py View on Github external
regularization_weight = 10.0 ** -negative_log_regularization_weight
        table.add_entry(
            solver=IterativeImputer(
                n_nearest_features=80,
                max_iter=50
            ),
            name="IterativeImputer_%d" % negative_log_regularization_weight)

    for fill_method in ["mean", "median"]:
        table.add_entry(
            solver=SimpleFill(fill_method=fill_method),
            name="SimpleFill_%s" % fill_method)

    for k in [1, 3, 7]:
        table.add_entry(
            solver=KNN(
                k=k,
                orientation="rows"),
            name="KNN_k%d" % (k,))

    for shrinkage_value in [25, 50, 100]:
        # SoftImpute without rank constraints
        table.add_entry(
            solver=SoftImpute(
                shrinkage_value=shrinkage_value),
            name="SoftImpute_lambda%d" % (shrinkage_value,))

    for rank in [10, 20, 40]:
        table.add_entry(
            solver=IterativeSVD(
                rank=rank,
                init_fill_method="zero"),
github LaureBerti / Learn2Clean / python-package / learn2clean / imputation / imputer.py View on Github external
# only for numerical values
        # Nearest neighbor imputations which weights samples
        # using the mean squared difference on features for which two
        # rows both have observed data.

        from fancyimpute import KNN

        df = dataset

        if dataset.select_dtypes(['number']).isnull().sum().sum() > 0:

            X = dataset.select_dtypes(['number'])

            for i in X.columns:

                X[i] = KNN(k=k, verbose=False).fit_transform(X)

            Z = dataset.select_dtypes(include=['object'])

            df = pd.DataFrame.from_records(
                X, columns=dataset.select_dtypes(['number']).columns)

            df = df.join(Z)

        else:

            pass

        return df
github usc-isi-i2 / dsbox-cleaning / dsbox / datapreprocessing / cleaner / imputation_pipeline.py View on Github external
if (label_col_name==None or len(label_col_name)==0):
            is_eval = False
        else:
            is_eval = True

        missing_col_id = []
        data, label = self.__df2np(data, label_col_name, missing_col_id)
        # mask = np.isnan(data)
        # imputation_list = ["mean"] * len(missing_col_id)
        # data_mean = mvp.imputeData(data, missing_col_id, imputation_list, self.verbose)
        # data_mean = scale(data_mean)
        # data_mean[mask] = np.nan

        # data_clean = KNN(k=5, normalizer=BiScaler).complete(data)
        data_clean = KNN(k=5).complete(data)
        #data_clean = MICE().complete(data)

        if (is_eval): self.__evaluation(data_clean, label)

        return data_clean
github usc-isi-i2 / dsbox-cleaning / dsbox / datapreprocessing / cleaner / knn.py View on Github external
def __knn(self, test_data):
        """
        wrap fancyimpute-knn
        """
        missing_col_id = []
        test_data = mvp.df2np(test_data, missing_col_id, self._verbose)
        if (len(missing_col_id) == 0): return test_data
        complete_data = knn(k=self._k, verbose=(1 if self._verbose else 0)).complete(test_data)
        return complete_data
github awslabs / datawig / experiments / benchmarks.py View on Github external
def impute_knn(X, mask, hyperparams={'k':[2,4,6]}):
    return fancyimpute_hpo(KNN,hyperparams, X, mask)