How to use the piicatcher.scanner.RegexScanner function in piicatcher

To help you get started, we’ve selected a few piicatcher examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tokern / piicatcher / tests / test_dbmetadata.py View on Github external
def test_null_scan_column(self):
        col = Column("col")
        col.scan(None, [RegexScanner(), NERScanner()])
        self.assertFalse(col.has_pii())
        self.assertEqual({"pii_types": [], "name": "col"}, col.get_dict())
github tokern / piicatcher / tests / test_dbmetadata.py View on Github external
def test_positive_scan_column(self):
        col = Column("col")
        col.scan("Jonathan Smith", [RegexScanner(), NERScanner()])
        self.assertTrue(col.has_pii())
        self.assertEqual(
            {"pii_types": [PiiTypes.PERSON], "name": "col"}, col.get_dict()
        )
github tokern / piicatcher / tests / test_dbmetadata.py View on Github external
def test_negative_scan_column(self):
        col = Column("col")
        col.scan("abc", [RegexScanner(), NERScanner()])
        self.assertFalse(col.has_pii())
        self.assertEqual({"pii_types": [], "name": "col"}, col.get_dict())
github tokern / piicatcher / tests / test_scanner.py View on Github external
def setUp(self):
        self.parser = RegexScanner()
github tokern / piicatcher / piicatcher / explorer / metadata.py View on Github external
def scan(self, generator):
        self.logger.debug("Scanning table name %s" % self.get_name())
        scanners = [RegexScanner(), NERScanner()]
        for row in generator(
            column_list=self.get_children(), schema_name=self._schema, table_name=self
        ):
            for col, val in zip(self.get_children(), row):
                col.scan(val, scanners)

        for col in self.get_children():
            [self._pii.add(p) for p in col.get_pii_types()]

        self.logger.debug("%s has %s", self.get_name(), self.get_pii_types_str())
github tokern / piicatcher / piicatcher / explorer / files.py View on Github external
% (os.path.abspath(self._path), mime_type)
            )
        else:
            for root, subdirs, files in os.walk(self._path):
                for filename in files:
                    file_path = os.path.join(root, filename)
                    mime_type = magic.from_file(file_path, mime=True)

                    logging.debug(
                        "\t- full path: %s, mime_type: %s" % (file_path, mime_type)
                    )
                    self._files.append(File(file_path, mime_type))

        context = {
            "tokenizer": Tokenizer(),
            "regex": RegexScanner(),
            "ner": NERScanner(),
        }
        for f in self._files:
            f.scan(context)