How to use the datacompy.sparkcompare.MatchType function in datacompy

To help you get started, we’ve selected a few datacompy examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github capitalone / datacompy / datacompy / sparkcompare.py View on Github external
def _create_select_statement(self, name):
        if self._known_differences:
            match_type_comparison = ""
            for k in MatchType:
                match_type_comparison += " WHEN (A.{name}={match_value}) THEN '{match_name}'".format(
                    name=name, match_value=str(k.value), match_name=k.name
                )
            return "A.{name}_base, A.{name}_compare, (CASE WHEN (A.{name}={match_failure}) THEN False ELSE True END) AS {name}_match, (CASE {match_type_comparison} ELSE 'UNDEFINED' END) AS {name}_match_type ".format(
                name=name,
                match_failure=MatchType.MISMATCH.value,
                match_type_comparison=match_type_comparison,
            )
        else:
            return "A.{name}_base, A.{name}_compare, CASE WHEN (A.{name}={match_failure})  THEN False ELSE True END AS {name}_match ".format(
                name=name, match_failure=MatchType.MISMATCH.value
            )
github capitalone / datacompy / datacompy / sparkcompare.py View on Github external
def _create_select_statement(self, name):
        if self._known_differences:
            match_type_comparison = ""
            for k in MatchType:
                match_type_comparison += " WHEN (A.{name}={match_value}) THEN '{match_name}'".format(
                    name=name, match_value=str(k.value), match_name=k.name
                )
            return "A.{name}_base, A.{name}_compare, (CASE WHEN (A.{name}={match_failure}) THEN False ELSE True END) AS {name}_match, (CASE {match_type_comparison} ELSE 'UNDEFINED' END) AS {name}_match_type ".format(
                name=name,
                match_failure=MatchType.MISMATCH.value,
                match_type_comparison=match_type_comparison,
            )
        else:
            return "A.{name}_base, A.{name}_compare, CASE WHEN (A.{name}={match_failure})  THEN False ELSE True END AS {name}_match ".format(
                name=name, match_failure=MatchType.MISMATCH.value
            )
github capitalone / datacompy / datacompy / sparkcompare.py View on Github external
def _print_row_matches_by_column(self, myfile):
        self._populate_columns_match_dict()
        columns_with_mismatches = {
            key: self.columns_match_dict[key]
            for key in self.columns_match_dict
            if self.columns_match_dict[key][MatchType.MISMATCH.value]
        }
        columns_fully_matching = {
            key: self.columns_match_dict[key]
            for key in self.columns_match_dict
            if sum(self.columns_match_dict[key])
            == self.columns_match_dict[key][MatchType.MATCH.value]
        }
        columns_with_any_diffs = {
            key: self.columns_match_dict[key]
            for key in self.columns_match_dict
            if sum(self.columns_match_dict[key])
            != self.columns_match_dict[key][MatchType.MATCH.value]
        }
        base_types = {x[0]: x[1] for x in self.base_df.dtypes}
        compare_types = {x[0]: x[1] for x in self.compare_df.dtypes}

        print("\n****** Column Comparison ******", file=myfile)

        if self._known_differences:
            print(
                "Number of columns compared with unexpected differences in some values: {}".format(
                    len(columns_with_mismatches)