How to use the sklearn.preprocessing.LabelEncoder function in sklearn

To help you get started, we’ve selected a few sklearn examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

feranick / SpectralMachine / Other / img_cnn / test_spectra_cnn.py View on Github

try:
    with open(learnFile, 'r') as f:
        M = np.loadtxt(f, unpack =False)
except:
    print('\033[1m' + ' Learning file not found \n' + '\033[0m')

En = np.delete(np.array(M[0,:]),np.s_[0:1],0)
M = np.delete(M,np.s_[0:1],0)
Cl = ['{:.2f}'.format(x) for x in M[:,0]]
A = np.delete(M,np.s_[0:1],1)
learnFileRoot = os.path.splitext(learnFile)[0]
totA = A
totCl = Cl
numTotClasses = np.unique(totCl).size
le = preprocessing.LabelEncoder()
totCl2 = le.fit_transform(totCl)
Cl2 = le.transform(Cl)
totCl2 = keras.utils.to_categorical(totCl2, num_classes=np.unique(totCl).size)
Cl2 = keras.utils.to_categorical(Cl2, num_classes=np.unique(Cl).size+1)



df = pd.DataFrame()
for i in range(A.shape[0]):
    G = np.dstack([np.ones(En.shape[0]), En])
    G = np.dstack([G, A[0]])
    S = pd.Series(G.tolist())
    df[i] = S.values


S = np.asarray([df.iloc[:,1].values[0]])

RasaHQ / rasa / rasa / nlu / featurizers / ngram_featurizer.py View on Github

    @staticmethod
    def encode_labels(labels):
        from sklearn import preprocessing

        intent_encoder = preprocessing.LabelEncoder()
        intent_encoder.fit(labels)
        return intent_encoder.transform(labels)

ckbjimmy / cdc / cdc / src / MLPipeline.py View on Github

a = alg.split('+')
    alg_list = [alg_list[alg_abbreviation.index(i)] for i in a]
    num_cores = multiprocessing.cpu_count()

    os.chdir(path + 'data/')
    
    print "--- Data preprocessing ---"
    df = pd.read_csv(data, sep='\t')
    df = df.sort('fname', ascending=True).reset_index(drop=True)
    df['label'] = pd.read_csv(label, sep='\t', header=None)
    #for i in xrange(len(df.fname.tolist())):
    #    df.label[i] = re.sub(r"(.*)(.*)( \([0-9]+\).xml)", r"\1", df.fname.tolist()[i])
    y_unencoded = df.label

    print "Label encoding"
    encoder = LabelEncoder()
    encoder.fit(y_unencoded)
    y = encoder.transform(y_unencoded)

    pred_table = pd.DataFrame()
    score_table = pd.DataFrame()
    best_score = 0
    
    ff = feature.split('+')
    
    for f in ff:
        
        time_start = time.time()
        
        if alg == 'dnn':
            best_model, best_feature, best_algorithm, best_coef, score_table, pred_table, metrics = \
                Dnn(df=df, y=y, encoder=encoder, class_wt=False, trained_w2v='umls', batch_size=int(df.shape[0]/100), n_epoch=25, \

ghzhang233 / Leakage-Neutral-Learning-for-QuoraQP / quantify / leaky_predict.py View on Github

def encode_data(data):
    data['label'] = LabelEncoder().fit_transform(data['label'])
    le = LabelEncoder()
    le.fit(data['sen1'].append(data['sen2']))
    data['tid1'] = le.transform(data['sen1'])
    data['tid2'] = le.transform(data['sen2'])
    return data

xflows / rdm / benchmark_studies / benchmark_algorithms.py View on Github

if en[-1] != '':
                    features_target = en[-1]
                    features_train = en[0:len(en)-1]
                    features_train = [1 if x == "+" else 0 for x in features_train]
                    entries_test.append(features_train)
                    targets_test.append(features_target)

            if learner == "aleph":
                targets_test = ['positive' if x == target_attr_value else 'negative' for x in targets_test]

        train_features = pd.DataFrame(entries)
        train_targets = pd.DataFrame(targets)
        test_features = pd.DataFrame(entries_test)
        test_targets = pd.DataFrame(targets_test)

        le = preprocessing.LabelEncoder()
        le.fit(train_targets)

        targets_train_encoded = le.transform(train_targets)

        targets_test_encoded = le.transform(test_targets)

        clf = tree.DecisionTreeClassifier()
        clf.fit(train_features,targets_train_encoded)
        preds = clf.predict(test_features)

        acc = accuracy_score(preds,targets_test_encoded)
        f1 = f1_score(preds,targets_test_encoded)
        predictions.append(acc)
        predictions_f1.append(f1)
        end = timer()
        times.append(end-start)

feranick / SpectralMachine / Archive / 20171010c / SpectraLearnPredict.py View on Github

else:
        dnntfDef.alwaysImprove = True
        model_directory = None
        print("\n  Training model not saved\n")

    #**********************************************
    ''' Initialize Estimator and training data '''
    #**********************************************
    print(' Initializing TensorFlow...')
    tf.reset_default_graph()

    totA = np.vstack((A, A_test))
    totCl = np.append(Cl, Cl_test)
    numTotClasses = np.unique(totCl).size
    
    le = preprocessing.LabelEncoder()
    totCl2 = le.fit_transform(totCl)
    Cl2 = le.transform(Cl)
    Cl2_test = le.transform(Cl_test)
    
    validation_monitor = skflow.monitors.ValidationMonitor(input_fn=lambda: input_fn(A_test, Cl2_test),
                                                           eval_steps=1,
                                                           every_n_steps=dnntfDef.valMonitorSecs)

    feature_columns = skflow.infer_real_valued_columns_from_input(totA.astype(np.float32))
    clf = skflow.DNNClassifier(feature_columns=feature_columns, hidden_units=dnntfDef.hidden_layers,
            optimizer=dnntfDef.optimizer, n_classes=numTotClasses,
            activation_fn=dnntfDef.activationFn, model_dir=model_directory,
            config=skflow.RunConfig(save_checkpoints_secs=dnntfDef.timeCheckpoint),
            dropout=dnntfDef.dropout_perc)
                               
    print("\n Number of global steps:",dnntfDef.trainingSteps)

braindynamicslab / dyneusr / dyneusr / tools / graph_utils.py View on Github

if len(meta) &lt; 1:
            continue
        # process meta label
        meta_label = None
        if isinstance(labels, dict):
            # one list per column
            if meta_col in labels:
                meta_label = list(labels[meta_col])
        elif isinstance(labels, list):
            # shared list
            if not isinstance(labels[0], list):
                meta_label = list(labels)

        # process meta
        if str(meta[0]).isalpha() or type(meta[0]) is str:
            encoder = LabelEncoder()
            yi = encoder.fit_transform(meta)
            yi_bins = np.linspace(yi.min(), yi.max(), num=min(5, len(set(yi))), endpoint=True)
            meta = np.digitize(yi, yi_bins, right=True)
            meta = yi_bins[meta]
            meta_label = [list(yi_bins).index(_) for _ in sorted(set(meta))]
            #meta_label = ['Group '+str(_+1) for _ in meta_label]
            meta_bins = [_ for _ in zip(yi_bins[:-1], yi_bins[1:])] 
            meta_label = ['Group {} {}'.format(_+1, __) for _,__ in enumerate(meta_bins)]

        
        elif len(set(meta)) &gt; 9 and zscore is True:
            # TODO: how do we decide whether to use continuous vs. discrete
            # zscore
            yi = meta.copy()
            yi_nz = yi[~np.isnan(yi)]
            zi = stats.zscore(yi_nz)

albertsl / toolkit / templates / python for data science.py View on Github

# Remove categorical columns (will replace with one-hot encoding)
num_X_train = X_train.drop(low_cardinality_cols, axis=1)
num_X_val = X_val.drop(low_cardinality_cols, axis=1)
# Add one-hot encoded columns to numerical features
OH_X_train = pd.concat([num_X_train, OH_cols_train], axis=1)
OH_X_val = pd.concat([num_X_valid, OH_cols_val], axis=1)

#Use pandas get_dummies for categories encoded as strings
pd.get_dummies(df, columns=['col1','col2'])

#OrdinalEncoding for categories which have an order (example: low/medium/high)
map_dict = {'low': 0, 'medium': 1, 'high': 2}
df['var_oe'] = df['var'].apply(lambda x: map_dict[x])
#We can also do it with sklearn's LabelEncoder
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['var_oe'] = le.fit_transform(df['var'])

#BinaryEncoder when we have many categories in one variable it means creating many columns with OHE. With Binary encoding we can do so with many less columns by using binary numbers. Use only when there is a high cardinality in the categorical variable.
from category_encoders.binary import BinaryEncoder
be = BinaryEncoder(cols = ['var'])
df = be.fit_transform(df)

#HashingEncoder
from category_encoders.hashing import HashingEncoder
he = HashingEncoder(cols = ['var'])
df = he.fit_transform(df)

#Feature selection: Drop attributes that provide no useful information for the task
#Unsupervised Feature selection before training a model
from sklearn.feature_selection import SelectKBest, chi2
bestfeatures = SelectKBest(score_func=chi2, k='all')

andreasvc / disco-dop / discodop / functiontags.py View on Github

copy=False, with_mean=False))])
	# PTB has no function tags on pretermintals, Negra/Tiger/Lassy do.
	posfunc = any(functions(node) for tree in trees
			for node in tree.subtrees()
			if node and isinstance(node[0], int))
	target = [functions(node) for tree in trees
			for node in tree.subtrees()
			if tree is not node and node
				and (posfunc or isinstance(node[0], Tree))]
	# PTB may have multiple tags (or 0) per node.
	# Negra/Tiger/Lassy have exactly 1 tag for every node.
	multi = any(len(a) > 1 for a in target)
	if multi:
		encoder = preprocessing.MultiLabelBinarizer()
	else:
		encoder = preprocessing.LabelEncoder()
		target = [a[0] if a else '--' for a in target]
	# binarize features (output is a sparse array)
	trainfeats = vectorizer.fit_transform(functionfeatures(node, sent)
			for tree, sent in zip(trees, sents)
				for node in tree.subtrees()
				if tree is not node
				and node and (posfunc or isinstance(node[0], Tree)))
	trainfuncs = encoder.fit_transform(target)
	classifier = linear_model.SGDClassifier(
			loss='hinge',
			penalty='elasticnet',
			max_iter=int(10 ** 6 / len(trees)))
	alphas = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6]
	if multi:
		classifier = multiclass.OneVsRestClassifier(
				classifier, n_jobs=numproc or -1)

kengz / aiva / lib / py / ais / ai_lib / preprocess.py View on Github

def restore(self,model_path):
    '''
    Restore a saved multiencoder from path using npz file, by reconstructing the LabelEncoders with the classes.
    Restore the X header too.
    '''
    path = model_path + '/encoder.npz'
    h_path = model_path + '/header.npz'
    npzfile = np.load(path)
    h_npzfile = np.load(h_path)
    self.header = h_npzfile['header']
    self.encoders = {}
    for k,v in npzfile.items():
      le = LabelEncoder()
      le.classes_ = v
      self.encoders[k] = le
    self.columns = list(self.encoders.keys())
    return self

How to use the sklearn.preprocessing.LabelEncoder function in sklearn

To help you get started, we’ve selected a few sklearn examples, based on popular ways it is used in public projects.

sklearn

Package Health Score

Popular sklearn functions

Similar packages