How to use the datacompy.sparkcompare.MatchType.MISMATCH.value function in datacompy

To help you get started, we’ve selected a few datacompy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github capitalone / datacompy / datacompy / sparkcompare.py View on Github external
print(format_pattern.format(*[h[0] for h in headers_columns_unequal_valid]), file=myfile)
        print(
            format_pattern.format(*["-" * len(h[0]) for h in headers_columns_unequal_valid]),
            file=myfile,
        )

        for column_name, column_values in sorted(
            self.columns_match_dict.items(), key=lambda i: i[0]
        ):
            num_matches = column_values[MatchType.MATCH.value]
            num_known_diffs = (
                None
                if self._known_differences is None
                else column_values[MatchType.KNOWN_DIFFERENCE.value]
            )
            num_mismatches = column_values[MatchType.MISMATCH.value]
            compare_column = self._base_to_compare_name(column_name)

            if num_mismatches or num_known_diffs or self.show_all_columns:
                output_row = [
                    column_name,
                    compare_column,
                    base_types.get(column_name),
                    compare_types.get(column_name),
                    str(num_matches),
                    str(num_mismatches),
                ]
                if self.match_rates:
                    match_rate = 100 * (
                        1
                        - (column_values[MatchType.MISMATCH.value] + 0.0) / self.common_row_count
                        + 0.0
github capitalone / datacompy / datacompy / sparkcompare.py View on Github external
)

        case_string = (
            "( CASE WHEN ("
            + " OR ".join(equal_comparisons)
            + ") THEN {match_success} WHEN ("
            + " OR ".join(known_diff_comparisons)
            + ") THEN {match_known_difference} ELSE {match_failure} END) "
            + "AS {name}, A.{name} AS {name}_base, B.{name} AS {name}_compare"
        )

        return case_string.format(
            name=name,
            match_success=MatchType.MATCH.value,
            match_known_difference=MatchType.KNOWN_DIFFERENCE.value,
            match_failure=MatchType.MISMATCH.value,
        )