Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
@join.register(LazyTbl)
def _join(left, right, on = None, *args, how = "inner", sql_on = None):
_raise_if_args(args)
# Needs to be on the table, not the select
left_sel = left.last_op.alias()
right_sel = right.last_op.alias()
# handle arguments ----
on = _validate_join_arg_on(on, sql_on)
how = _validate_join_arg_how(how)
# for equality join used to combine keys into single column
consolidate_keys = on if sql_on is None else {}
if how == "right":
# switch joins, since sqlalchemy doesn't have right join arg
@ungroup.register(LazyTbl)
def _ungroup(__data):
return __data.copy(group_by = tuple())
@transmute.register(LazyTbl)
def _transmute(__data, **kwargs):
# will use mutate, then select some cols
f_mutate = mutate.registry[type(__data)]
# transmute keeps grouping cols, and any defined in kwargs
cols_to_keep = ordered_union(__data.group_by, kwargs)
sel = f_mutate(__data, **kwargs).last_op
columns = lift_inner_cols(sel)
sel_stripped = sel.with_only_columns([columns[k] for k in cols_to_keep])
return __data.append_op(sel_stripped)
@summarize.register(LazyTbl)
def _summarize(__data, **kwargs):
# https://stackoverflow.com/questions/14754994/why-is-sqlalchemy-count-much-slower-than-the-raw-query
# what if windowed mutate or filter has been done?
# - filter is fine, since it uses a CTE
# - need to detect any window functions...
sel = __data.last_op._clone()
new_calls = {}
for k, expr in kwargs.items():
new_calls[k] = __data.shape_call(
expr, window = False,
verb_name = "Summarize", arg_name = k
)
needs_cte = [col_expr_requires_cte(call, sel) for call in new_calls.values()]
@arrange.register(LazyTbl)
def _arrange(__data, *args):
last_op = __data.last_op
cols = lift_inner_cols(last_op)
new_calls = []
for ii, expr in enumerate(args):
if callable(expr):
res = __data.shape_call(
expr, window = False,
verb_name = "Arrange", arg_name = ii
)
else:
res = expr
@show_query.register(LazyTbl)
def _show_query(tbl, simplify = False):
query = tbl.last_op #if not simplify else
compile_query = lambda: query.compile(
dialect = tbl.source.dialect,
compile_kwargs = {"literal_binds": True}
)
if simplify:
# try to strip table names and labels where uneccessary
with use_simple_names():
print(compile_query())
else:
# use a much more verbose query
print(compile_query())
@rename.register(LazyTbl)
def _rename(__data, **kwargs):
sel = __data.last_op
columns = lift_inner_cols(sel)
# old_keys uses dict as ordered set
old_to_new = {simple_varname(v):k for k,v in kwargs.items()}
if None in old_to_new:
raise KeyError("positional arguments must be simple column, "
"e.g. _.colname or _['colname']"
)
labs = [c.label(old_to_new[k]) if k in old_to_new else c for k,c in columns.items()]
new_sel = sel.with_only_columns(labs)
@distinct.register(LazyTbl)
def _distinct(__data, *args, _keep_all = False, **kwargs):
if (args or kwargs) and _keep_all:
raise NotImplementedError("Distinct with variables specified in sql requires _keep_all = False")
inner_sel = mutate(__data, **kwargs).last_op if kwargs else __data.last_op
# TODO: this is copied from the df distinct version
# cols dict below is used as ordered set
cols = {simple_varname(x): True for x in args}
cols.update(kwargs)
if None in cols:
raise KeyError("positional arguments must be simple column, "
"e.g. _.colname or _['colname']"
)
@select.register(LazyTbl)
def _select(__data, *args, **kwargs):
# see https://stackoverflow.com/questions/25914329/rearrange-columns-in-sqlalchemy-select-object
if kwargs:
raise NotImplementedError(
"Using kwargs in select not currently supported. "
"Use _.newname == _.oldname instead"
)
last_op = __data.last_op
columns = {c.key: c for c in last_op.inner_columns}
# same as for DataFrame
colnames = Series(list(columns))
vl = VarList()
evaluated = (arg(vl) if callable(arg) else arg for arg in args)
od = var_select(colnames, *evaluated)