Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
Returns
-------
vDataFrame
self.parent
See Also
--------
vDataFrame.eval : Evaluates a customized expression.
"""
check_types([("name", name, [str], False)])
name = str_column(name.replace('"', '_'))
if not(name.replace('"', '')):
raise ValueError("The parameter 'name' must not be empty")
elif column_check_ambiguous(name, self.parent.get_columns()):
raise ValueError("A vcolumn has already the alias {}.\nBy changing the parameter 'name', you'll be able to solve this issue.".format(name))
new_vColumn = vColumn(name, parent = self.parent, transformations = [item for item in self.transformations], catalog = self.catalog)
setattr(self.parent, name, new_vColumn)
setattr(self.parent, name[1:-1], new_vColumn)
self.parent._VERTICAPY_VARIABLES_["columns"] += [name]
self.parent.__add_to_history__("[Add Copy]: A copy of the vcolumn {} named {} was added to the vDataFrame.".format(self.alias, name))
return (self.parent)
#---#
vdf._VERTICAPY_VARIABLES_["history"] = history
vdf._VERTICAPY_VARIABLES_["saving"] = saving
try:
cursor.execute("DROP TABLE IF EXISTS v_temp_schema.VERTICAPY_{}_TEST;".format(name))
except:
pass
cursor.execute("CREATE LOCAL TEMPORARY TABLE VERTICAPY_{}_TEST ON COMMIT PRESERVE ROWS AS SELECT * FROM {} LIMIT 10;".format(name, relation))
cursor.execute("SELECT column_name, data_type FROM columns WHERE table_name = 'VERTICAPY_{}_TEST' AND table_schema = 'v_temp_schema'".format(name))
result = cursor.fetchall()
cursor.execute("DROP TABLE IF EXISTS v_temp_schema.VERTICAPY_{}_TEST;".format(name))
vdf._VERTICAPY_VARIABLES_["columns"] = ['"' + item[0] + '"' for item in result]
for column, ctype in result:
if ('"' in column):
print("\u26A0 Warning: A double quote \" was found in the column {}, its alias was changed using underscores '_' to {}".format(column, column.replace('"', '_')))
from verticapy.vcolumn import vColumn
new_vColumn = vColumn('"{}"'.format(column.replace('"', '_')), parent = vdf, transformations = [('"{}"'.format(column.replace('"', '""')), ctype, category_from_type(ctype))])
setattr(vdf, '"{}"'.format(column.replace('"', '_')), new_vColumn)
setattr(vdf, column.replace('"', '_'), new_vColumn)
return (vdf)
#---#
distinct_elements = self.distinct()
if (distinct_elements not in ([0, 1], [1, 0]) or self.ctype() == "boolean"):
all_new_features = []
prefix = self.alias.replace('"', '') + prefix_sep.replace('"', '_') if not(prefix) else prefix.replace('"', '_') + prefix_sep.replace('"', '_')
n = 1 if drop_first else 0
columns = self.parent.get_columns()
for k in range(len(distinct_elements) - n):
name = '"{}{}"'.format(prefix, k) if (use_numbers_as_suffix) else '"{}{}"'.format(prefix, str(distinct_elements[k]).replace('"', '_'))
if (column_check_ambiguous(name, columns)):
raise ValueError("A vcolumn has already the alias of one of the dummies ({}).\nIt can be the result of using previously the method on the vcolumn or simply because of ambiguous columns naming.\nBy changing one of the parameters ('prefix', 'prefix_sep'), you'll be able to solve this issue.".format(name))
for k in range(len(distinct_elements) - n):
name = '"{}{}"'.format(prefix, k) if (use_numbers_as_suffix) else '"{}{}"'.format(prefix, str(distinct_elements[k]).replace('"', '_'))
name = name.replace(' ', '_').replace('/', '_').replace(',', '_').replace("'", '_')
expr = "DECODE({}, '{}', 1, 0)".format("{}", str(distinct_elements[k]).replace("'", "''"))
transformations = self.transformations + [(expr, "bool", "int")]
new_vColumn = vColumn(name, parent = self.parent, transformations = transformations, catalog = {"min": 0, "max": 1, "count": self.parent.shape()[0], "percent": 1.0, "unique": 2, "approx_unique": 2, "prod": 0})
setattr(self.parent, name, new_vColumn)
setattr(self.parent, name.replace('"', ''), new_vColumn)
self.parent._VERTICAPY_VARIABLES_["columns"] += [name]
all_new_features += [name]
self.parent.__add_to_history__("[Get Dummies]: One hot encoder was applied to the vcolumn {}\n{} feature(s) was/were created: {}".format(self.alias, len(all_new_features), ", ".join(all_new_features)) + ".")
return (self.parent)
#---#