How to use the verticapy.learn.ensemble.RandomForestClassifier function in verticapy

To help you get started, we’ve selected a few verticapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vertica / Vertica-ML-Python / verticapy / utilities.py View on Github external
parameters = info[1].split(",")
	if (model_type != "svd"):
		parameters = [item.split("=") for item in parameters]
		parameters_dict = {}
		for item in parameters:
			parameters_dict[item[0]] = item[1]
	info = info[0]
	for elem in parameters_dict:
		if type(parameters_dict[elem]) == str:
			parameters_dict[elem] = parameters_dict[elem].replace("'", "")
	if (model_type == "rf_regressor"):
		from verticapy.learn.ensemble import RandomForestRegressor
		model = RandomForestRegressor(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "rf_classifier"):
		from verticapy.learn.ensemble import RandomForestClassifier
		model = RandomForestClassifier(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "logistic_reg"):
		from verticapy.learn.linear_model import LogisticRegression
		model = LogisticRegression(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "linear_reg"):
		from verticapy.learn.linear_model import ElasticNet
		model = ElasticNet(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "naive_bayes"):
		from verticapy.learn.naive_bayes import MultinomialNB
		model = MultinomialNB(name, cursor, float(parameters_dict['alpha']))
	elif (model_type == "svm_regressor"):
		from verticapy.learn.svm import LinearSVR
		model = LinearSVR(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], float(parameters_dict['error_tolerance']), int(parameters_dict['max_iterations']))
	elif (model_type == "svm_classifier"):
		from verticapy.learn.svm import LinearSVC
		model = LinearSVC(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], [float(item) for item in parameters_dict['class_weights'].split(",")], int(parameters_dict['max_iterations']))
	elif (model_type == "kmeans"):
github vertica / Vertica-ML-Python / verticapy / vcolumn.py View on Github external
rand_int = random.randint(0, 10000000)
			temp_information = ("{}.VERTICAPY_TEMP_VIEW_{}".format(schema, rand_int), "{}.VERTICAPY_TEMP_MODEL_{}".format(schema, rand_int))
			if (bins < 2):
				raise ValueError("Parameter 'bins' must be greater or equals to 2 in case of discretization using the method 'smart'")
			columns_check([response], self.parent)
			response = vdf_columns_names([response], self.parent)[0]
			try:
				self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP VIEW IF EXISTS {}".format(temp_information[0]))
			except:
				try:
					self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP MODEL IF EXISTS {}".format(temp_information[1]))
				except:
					pass
			self.parent.to_db(temp_information[0])
			from verticapy.learn.ensemble import RandomForestClassifier
			model = RandomForestClassifier(temp_information[1], self.parent._VERTICAPY_VARIABLES_["cursor"], n_estimators = 20, max_depth = 3, nbins = 100, min_samples_leaf = min_bin_size)
			model.fit(temp_information[0], [self.alias], response)
			query = ["(SELECT READ_TREE(USING PARAMETERS model_name = '{}', tree_id = {}, format = 'tabular'))".format(temp_information[1], i) for i in range(20)]
			query = "SELECT split_value FROM (SELECT split_value, COUNT(*) FROM ({}) x WHERE split_value IS NOT NULL GROUP BY 1 ORDER BY 2 DESC LIMIT {}) y ORDER BY split_value::float".format(" UNION ALL ".join(query), bins - 1)
			self.parent.__executeSQL__(query = query, title = "Computes the optimized histogram bins using Random Forest.")
			result = self.parent._VERTICAPY_VARIABLES_["cursor"].fetchall()
			result = [elem[0] for elem in result]
			self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP VIEW IF EXISTS {}".format(temp_information[0]))
			self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP MODEL IF EXISTS {}".format(temp_information[1]))
			result = [self.min()] + result + [self.max()]
		elif (method == "topk"):
			if (k < 2):
				raise ValueError("Parameter 'k' must be greater or equals to 2 in case of discretization using the method 'topk'")
			distinct = self.topk(k).values["index"]
			trans = ("(CASE WHEN {} IN ({}) THEN {} || '' ELSE '{}' END)".format(convert_special_type(self.category(), False), ', '.join(["'{}'".format(str(elem).replace("'", "''")) for elem in distinct]), convert_special_type(self.category(), False), new_category.replace("'", "''")), "varchar", "text")
		elif (self.isnum() and method == "same_freq"):
			if (bins < 2):
github vertica / Vertica-ML-Python / verticapy / learn / tree.py View on Github external
def DummyTreeClassifier(name: str, 
						cursor = None):
	"""
---------------------------------------------------------------------------
Dummy Tree Classifier. This classifier learns by heart the training data. 
 => very depth RandomForestClassifier of one tree using all the data.

Parameters
----------
name: str
	Name of the the model. The model will be stored in the DB.
cursor: DBcursor, optional
	Vertica DB cursor. 
	"""
	return RandomForestClassifier(name = name, 
								  cursor = cursor, 
								  n_estimators = 1, 
								  max_features = "max", 
								  max_leaf_nodes = 1e9,
								  sample = 1.0,
								  max_depth = 100,
								  min_samples_leaf = 1,
								  min_info_gain = 0.0,
								  nbins = 1000)
#---#
github vertica / Vertica-ML-Python / verticapy / learn / tree.py View on Github external
The maximum number of leaf nodes a tree in the forest can have, an integer 
	between 1 and 1e9, inclusive. 
max_depth: int, optional
	The maximum depth for growing each tree, an integer between 1 and 100, inclusive.
min_samples_leaf: int, optional
	The minimum number of samples each branch must have after splitting a node, an 
	integer between 1 and 1e6, inclusive. A split that causes fewer remaining samples 
	is discarded. 
min_info_gain: float, optional
	The minimum threshold for including a split, a float between 0.0 and 1.0, inclusive. 
	A split with information gain less than this threshold is discarded.
nbins: int, optional 
	The number of bins to use for continuous features, an integer between 2 and 1000, 
	inclusive.
	"""
	return RandomForestClassifier(name = name, 
								  cursor = cursor, 
								  n_estimators = 1, 
								  max_features = max_features, 
								  max_leaf_nodes = max_leaf_nodes,
								  sample = 1.0,
								  max_depth = max_depth,
								  min_samples_leaf = min_samples_leaf,
								  min_info_gain = min_info_gain,
								  nbins = nbins)
#---#