How to use verticapy - 10 common examples

To help you get started, we’ve selected a few verticapy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github vertica / Vertica-ML-Python / verticapy / learn / model_selection.py View on Github external
Returns
-------
int
	the KMeans K
	"""
	check_types([
		("X", X, [list], False), 
		("input_relation", input_relation, [str], False), 
		("n_cluster", n_cluster, [list, tuple], False),
		("init", init, ["kmeanspp", "random"], True),
		("max_iter", max_iter, [int, float], False),
		("tol", tol, [int, float], False),
		("elbow_score_stop", elbow_score_stop, [int, float], False)])
	if not(cursor):
		conn = read_auto_connect()
		cursor = conn.cursor()
	else:
		conn = False
		check_cursor(cursor)
	if not(type(n_cluster) == list):
		L = range(n_cluster[0], n_cluster[1])
	else:
		L = n_cluster
		L.sort()
	schema, relation = schema_relation(input_relation)
	schema = str_column(schema)
	relation_alpha = ''.join(ch for ch in relation if ch.isalnum())
	for i in L:
		cursor.execute("DROP MODEL IF EXISTS {}.__vpython_kmeans_tmp_model_{}__".format(schema, relation_alpha))
		model = KMeans("{}.__vpython_kmeans_tmp_model_{}__".format(schema, relation_alpha), cursor, i, init, max_iter, tol)
		model.fit(input_relation, X)
github vertica / Vertica-ML-Python / verticapy / connections / connect.py View on Github external
path += "/all/{}.vertica".format(name)
		file = open(path, "r")
	except:
		raise Exception("No auto connection is available. To create an auto connection, use the new_auto_connection function of the verticapy.connections.connect module.")
	dsn = file.read()
	dsn = dsn.split("\n")
	if (dsn[0] == "vertica_python"):
		import vertica_python
		conn = vertica_python.connect(** {"host": dsn[1], "port": dsn[2], "database": dsn[3], "user": dsn[4], "password": dsn[5]}, autocommit = True)
	elif (dsn[0] == "pyodbc"):
		import pyodbc
		conn = pyodbc.connect(dsn[1], autocommit = True)
	elif (dsn[0] == "jaydebeapi"):
		import jaydebeapi
		jdbc_driver_name = "com.vertica.jdbc.Driver"
		jdbc_driver_loc = os.path.dirname(verticapy.__file__) + "/connections/vertica-jdbc-9.3.1-0.jar"
		conn = jaydebeapi.connect(jdbc_driver_name, dsn[1], {'user': dsn[2], 'password': dsn[3]}, jars = jdbc_driver_loc)
	else:
		raise Exception("The auto connection format is incorrect. To create a new auto connection, use the new_auto_connection function of the verticapy.connections.connect module.")
	return(conn)
#---#
github vertica / Vertica-ML-Python / verticapy / utilities.py View on Github external
parameters = info[1].split(",")
	if (model_type != "svd"):
		parameters = [item.split("=") for item in parameters]
		parameters_dict = {}
		for item in parameters:
			parameters_dict[item[0]] = item[1]
	info = info[0]
	for elem in parameters_dict:
		if type(parameters_dict[elem]) == str:
			parameters_dict[elem] = parameters_dict[elem].replace("'", "")
	if (model_type == "rf_regressor"):
		from verticapy.learn.ensemble import RandomForestRegressor
		model = RandomForestRegressor(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "rf_classifier"):
		from verticapy.learn.ensemble import RandomForestClassifier
		model = RandomForestClassifier(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "logistic_reg"):
		from verticapy.learn.linear_model import LogisticRegression
		model = LogisticRegression(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "linear_reg"):
		from verticapy.learn.linear_model import ElasticNet
		model = ElasticNet(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "naive_bayes"):
		from verticapy.learn.naive_bayes import MultinomialNB
		model = MultinomialNB(name, cursor, float(parameters_dict['alpha']))
	elif (model_type == "svm_regressor"):
		from verticapy.learn.svm import LinearSVR
		model = LinearSVR(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], float(parameters_dict['error_tolerance']), int(parameters_dict['max_iterations']))
	elif (model_type == "svm_classifier"):
		from verticapy.learn.svm import LinearSVC
		model = LinearSVC(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], [float(item) for item in parameters_dict['class_weights'].split(",")], int(parameters_dict['max_iterations']))
	elif (model_type == "kmeans"):
github vertica / Vertica-ML-Python / verticapy / vcolumn.py View on Github external
rand_int = random.randint(0, 10000000)
			temp_information = ("{}.VERTICAPY_TEMP_VIEW_{}".format(schema, rand_int), "{}.VERTICAPY_TEMP_MODEL_{}".format(schema, rand_int))
			if (bins < 2):
				raise ValueError("Parameter 'bins' must be greater or equals to 2 in case of discretization using the method 'smart'")
			columns_check([response], self.parent)
			response = vdf_columns_names([response], self.parent)[0]
			try:
				self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP VIEW IF EXISTS {}".format(temp_information[0]))
			except:
				try:
					self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP MODEL IF EXISTS {}".format(temp_information[1]))
				except:
					pass
			self.parent.to_db(temp_information[0])
			from verticapy.learn.ensemble import RandomForestClassifier
			model = RandomForestClassifier(temp_information[1], self.parent._VERTICAPY_VARIABLES_["cursor"], n_estimators = 20, max_depth = 3, nbins = 100, min_samples_leaf = min_bin_size)
			model.fit(temp_information[0], [self.alias], response)
			query = ["(SELECT READ_TREE(USING PARAMETERS model_name = '{}', tree_id = {}, format = 'tabular'))".format(temp_information[1], i) for i in range(20)]
			query = "SELECT split_value FROM (SELECT split_value, COUNT(*) FROM ({}) x WHERE split_value IS NOT NULL GROUP BY 1 ORDER BY 2 DESC LIMIT {}) y ORDER BY split_value::float".format(" UNION ALL ".join(query), bins - 1)
			self.parent.__executeSQL__(query = query, title = "Computes the optimized histogram bins using Random Forest.")
			result = self.parent._VERTICAPY_VARIABLES_["cursor"].fetchall()
			result = [elem[0] for elem in result]
			self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP VIEW IF EXISTS {}".format(temp_information[0]))
			self.parent._VERTICAPY_VARIABLES_["cursor"].execute("DROP MODEL IF EXISTS {}".format(temp_information[1]))
			result = [self.min()] + result + [self.max()]
		elif (method == "topk"):
			if (k < 2):
				raise ValueError("Parameter 'k' must be greater or equals to 2 in case of discretization using the method 'topk'")
			distinct = self.topk(k).values["index"]
			trans = ("(CASE WHEN {} IN ({}) THEN {} || '' ELSE '{}' END)".format(convert_special_type(self.category(), False), ', '.join(["'{}'".format(str(elem).replace("'", "''")) for elem in distinct]), convert_special_type(self.category(), False), new_category.replace("'", "''")), "varchar", "text")
		elif (self.isnum() and method == "same_freq"):
			if (bins < 2):
github vertica / Vertica-ML-Python / verticapy / utilities.py View on Github external
info = info[0]
	for elem in parameters_dict:
		if type(parameters_dict[elem]) == str:
			parameters_dict[elem] = parameters_dict[elem].replace("'", "")
	if (model_type == "rf_regressor"):
		from verticapy.learn.ensemble import RandomForestRegressor
		model = RandomForestRegressor(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "rf_classifier"):
		from verticapy.learn.ensemble import RandomForestClassifier
		model = RandomForestClassifier(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "logistic_reg"):
		from verticapy.learn.linear_model import LogisticRegression
		model = LogisticRegression(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "linear_reg"):
		from verticapy.learn.linear_model import ElasticNet
		model = ElasticNet(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "naive_bayes"):
		from verticapy.learn.naive_bayes import MultinomialNB
		model = MultinomialNB(name, cursor, float(parameters_dict['alpha']))
	elif (model_type == "svm_regressor"):
		from verticapy.learn.svm import LinearSVR
		model = LinearSVR(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], float(parameters_dict['error_tolerance']), int(parameters_dict['max_iterations']))
	elif (model_type == "svm_classifier"):
		from verticapy.learn.svm import LinearSVC
		model = LinearSVC(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], [float(item) for item in parameters_dict['class_weights'].split(",")], int(parameters_dict['max_iterations']))
	elif (model_type == "kmeans"):
		from verticapy.learn.cluster import KMeans
		model = KMeans(name, cursor, -1, parameters_dict['init_method'], int(parameters_dict['max_iterations']), float(parameters_dict['epsilon']))
	elif (model_type == "pca"):
		from verticapy.learn.decomposition import PCA
		model = PCA(name, cursor, 0, bool(parameters_dict['scale']))
	elif (model_type == "svd"):
github vertica / Vertica-ML-Python / verticapy / connections / connect.py View on Github external
def read_dsn(dsn: str):
	"""
---------------------------------------------------------------------------
Reads the DSN information from the ODBCINI environment variable.

Parameters
----------
dsn: str
	DSN name

Returns
-------
dict
	dictionary with all the credentials
	"""
	check_types([("dsn", dsn, [str], False)])
	f = open(os.environ['ODBCINI'], "r")
	odbc = f.read()
	f.close()
	if ("[{}]".format(dsn) not in odbc):
		raise ValueError("The DSN '{}' doesn't exist".format(dsn))
	odbc = odbc.split("[{}]\n".format(dsn))[1].split("\n\n")[0].split("\n")
	dsn = {}
	for elem in odbc:
		info = elem.replace(' ','').split('=')
		dsn[info[0].lower()] = info[1]
	return (dsn)
#---#
github vertica / Vertica-ML-Python / verticapy / learn / model_selection.py View on Github external
conn = read_auto_connect()
		cursor = conn.cursor()
	else:
		conn = False
		check_cursor(cursor)
	if not(type(n_cluster) == list):
		L = range(n_cluster[0], n_cluster[1])
	else:
		L = n_cluster
		L.sort()
	schema, relation = schema_relation(input_relation)
	schema = str_column(schema)
	relation_alpha = ''.join(ch for ch in relation if ch.isalnum())
	for i in L:
		cursor.execute("DROP MODEL IF EXISTS {}.__vpython_kmeans_tmp_model_{}__".format(schema, relation_alpha))
		model = KMeans("{}.__vpython_kmeans_tmp_model_{}__".format(schema, relation_alpha), cursor, i, init, max_iter, tol)
		model.fit(input_relation, X)
		score = model.metrics.values["value"][3]
		if (score > elbow_score_stop):
			return i
		score_prev = score
	if (conn):
		conn.close()
	print("\u26A0 The K was not found. The last K (= {}) is returned with an elbow score of {}".format(i, score))
	return i
#---#
github vertica / Vertica-ML-Python / verticapy / learn / plot.py View on Github external
cursor = conn.cursor()
	else:
		conn = False
		check_cursor(cursor)
	schema, relation = schema_relation(input_relation)
	schema = str_column(schema)
	relation_alpha = ''.join(ch for ch in relation if ch.isalnum())
	all_within_cluster_SS = []
	if not(type(n_cluster) == list):
		L = [i for i in range(n_cluster[0], n_cluster[1])] 
	else:
		L = n_cluster
		L.sort()
	for i in L:
		cursor.execute("DROP MODEL IF EXISTS {}.VERTICAPY_KMEANS_TMP_{}".format(schema, relation_alpha))
		model = KMeans("{}.VERTICAPY_KMEANS_TMP_{}".format(schema, relation_alpha), cursor, i, init, max_iter, tol)
		model.fit(input_relation, X)
		all_within_cluster_SS += [float(model.metrics.values["value"][3])]
		model.drop()
	if (conn):
		conn.close()
	plt.figure(figsize = (10,8))
	plt.rcParams['axes.facecolor'] = '#F4F4F4'
	plt.grid()
	plt.plot(L, all_within_cluster_SS, marker = "s", color = "#FE5016")
	plt.title("Elbow Curve")
	plt.xlabel('Number of Clusters')
	plt.ylabel('Between-Cluster SS / Total SS')
	plt.subplots_adjust(left = 0.2)
	plt.show()
	values = {"index": L, "Within-Cluster SS": all_within_cluster_SS}
	return tablesample(values = values, table_info = False)
github vertica / Vertica-ML-Python / verticapy / utilities.py View on Github external
model = LogisticRegression(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "linear_reg"):
		from verticapy.learn.linear_model import ElasticNet
		model = ElasticNet(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "naive_bayes"):
		from verticapy.learn.naive_bayes import MultinomialNB
		model = MultinomialNB(name, cursor, float(parameters_dict['alpha']))
	elif (model_type == "svm_regressor"):
		from verticapy.learn.svm import LinearSVR
		model = LinearSVR(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], float(parameters_dict['error_tolerance']), int(parameters_dict['max_iterations']))
	elif (model_type == "svm_classifier"):
		from verticapy.learn.svm import LinearSVC
		model = LinearSVC(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], [float(item) for item in parameters_dict['class_weights'].split(",")], int(parameters_dict['max_iterations']))
	elif (model_type == "kmeans"):
		from verticapy.learn.cluster import KMeans
		model = KMeans(name, cursor, -1, parameters_dict['init_method'], int(parameters_dict['max_iterations']), float(parameters_dict['epsilon']))
	elif (model_type == "pca"):
		from verticapy.learn.decomposition import PCA
		model = PCA(name, cursor, 0, bool(parameters_dict['scale']))
	elif (model_type == "svd"):
		from verticapy.learn.decomposition import SVD
		model = SVD(name, cursor)
	elif (model_type == "one_hot_encoder_fit"):
		from verticapy.learn.preprocessing import OneHotEncoder
		model = OneHotEncoder(name, cursor)
	model.input_relation = info.split(",")[1].replace("'", '').replace('\\', '')
	model.test_relation = test_relation if (test_relation) else model.input_relation
	if (model_type not in ("kmeans", "pca", "svd", "one_hot_encoder_fit")):
		model.X = info.split(",")[3:len(info.split(","))]
		model.X = [item.replace("'", '').replace('\\', '') for item in model.X]
		model.y = info.split(",")[2].replace("'", '').replace('\\', '')
	elif (model_type in ("pca")):
github vertica / Vertica-ML-Python / verticapy / utilities.py View on Github external
del parameters[0]
		parameters += ["class_weights=" + info[1].split("class_weights=")[1].split("'")[1]]
	elif (model_type != "svd"):
		parameters = info[1].split(",")
	if (model_type != "svd"):
		parameters = [item.split("=") for item in parameters]
		parameters_dict = {}
		for item in parameters:
			parameters_dict[item[0]] = item[1]
	info = info[0]
	for elem in parameters_dict:
		if type(parameters_dict[elem]) == str:
			parameters_dict[elem] = parameters_dict[elem].replace("'", "")
	if (model_type == "rf_regressor"):
		from verticapy.learn.ensemble import RandomForestRegressor
		model = RandomForestRegressor(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "rf_classifier"):
		from verticapy.learn.ensemble import RandomForestClassifier
		model = RandomForestClassifier(name, cursor, int(parameters_dict['ntree']), int(parameters_dict['mtry']), int(parameters_dict['max_breadth']), float(parameters_dict['sampling_size']), int(parameters_dict['max_depth']), int(parameters_dict['min_leaf_size']), float(parameters_dict['min_info_gain']), int(parameters_dict['nbins']))
	elif (model_type == "logistic_reg"):
		from verticapy.learn.linear_model import LogisticRegression
		model = LogisticRegression(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "linear_reg"):
		from verticapy.learn.linear_model import ElasticNet
		model = ElasticNet(name, cursor, parameters_dict['regularization'], float(parameters_dict['epsilon']), float(parameters_dict['lambda']), int(parameters_dict['max_iterations']), parameters_dict['optimizer'], float(parameters_dict['alpha']))
	elif (model_type == "naive_bayes"):
		from verticapy.learn.naive_bayes import MultinomialNB
		model = MultinomialNB(name, cursor, float(parameters_dict['alpha']))
	elif (model_type == "svm_regressor"):
		from verticapy.learn.svm import LinearSVR
		model = LinearSVR(name, cursor, float(parameters_dict['epsilon']), float(parameters_dict['C']), True, float(parameters_dict['intercept_scaling']), parameters_dict['intercept_mode'], float(parameters_dict['error_tolerance']), int(parameters_dict['max_iterations']))
	elif (model_type == "svm_classifier"):