Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def _print_num_of_rows_with_column_equality(self, myfile):
# match_dataframe contains columns from both dataframes with flag to indicate if columns matched
match_dataframe = self._get_or_create_joined_dataframe().select(*self.columns_compared)
match_dataframe.createOrReplaceTempView("matched_df")
where_cond = " AND ".join(
["A." + name + "=" + str(MatchType.MATCH.value) for name in self.columns_compared]
)
match_query = r"""SELECT count(*) AS row_count FROM matched_df A WHERE {}""".format(
where_cond
)
all_rows_matched = self.spark.sql(match_query)
matched_rows = all_rows_matched.head()[0]
print("\n****** Row Comparison ******", file=myfile)
print(
"Number of rows with some columns unequal: {}".format(
self.common_row_count - matched_rows
),
file=myfile,
)
print("Number of rows with all columns equal: {}".format(matched_rows), file=myfile)
+ kd["transformation"].format(new_input, input=new_input)
+ ") = A.{name})"
)
case_string = (
"( CASE WHEN ("
+ " OR ".join(equal_comparisons)
+ ") THEN {match_success} WHEN ("
+ " OR ".join(known_diff_comparisons)
+ ") THEN {match_known_difference} ELSE {match_failure} END) "
+ "AS {name}, A.{name} AS {name}_base, B.{name} AS {name}_compare"
)
return case_string.format(
name=name,
match_success=MatchType.MATCH.value,
match_known_difference=MatchType.KNOWN_DIFFERENCE.value,
match_failure=MatchType.MISMATCH.value,
)