How to use the piicatcher.catalog.glue.GlueStore function in piicatcher

To help you get started, we’ve selected a few piicatcher examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github tokern / piicatcher / tests / test_glue.py View on Github external
},
                'StoredAsSubDirectories': False
            },
            'PartitionKeys': [], 'TableType': 'EXTERNAL_TABLE',
            'Parameters': {
                'CrawlerSchemaDeserializerVersion': '1.0', 'CrawlerSchemaSerializerVersion': '1.0',
                'UPDATED_BY_CRAWLER': 'TaxiCrawler', 'areColumnsQuoted': 'false', 'averageRecordSize': '36',
                'classification': 'csv', 'columnsOrdered': 'true', 'compressionType': 'none', 'delimiter': ',',
                'exclusions': '["s3://nyc-tlc/misc/*foil*","s3://nyc-tlc/misc/shared*","s3://nyc-tlc/misc/uber*",'
                              '"s3://nyc-tlc/misc/*.html","s3://nyc-tlc/misc/*.zip","s3://nyc-tlc/misc/FOIL_*"]',
                'objectCount': '1', 'recordCount': '342', 'sizeKey': '12322', 'skip.header.line.count': '1',
                'typeOfData': 'file'
            }
        }

        updated_table_params = GlueStore.update_table_params(table_params, updated_columns)
        self.assertEqual(updated_table_params, expected_table_params)
github tokern / piicatcher / piicatcher / explorer / aws.py View on Github external
def output(cls, ns, explorer):
        if ns.catalog["format"] == "glue":
            GlueStore.save_schemas(explorer)
        else:
            super(AthenaExplorer, cls).output(ns, explorer)
github tokern / piicatcher / piicatcher / catalog / glue.py View on Github external
for schema in schemas:
            logging.debug("Processing schema {0}".format(schema.get_name()))
            for table in schema.get_tables():
                field_value = GlueStore.get_pii_table(table)
                table_info = client.get_table(
                    DatabaseName=schema.get_name(), Name=table.get_name()
                )

                logging.debug(table_info)

                updated_columns, is_table_updated = GlueStore.update_column_parameters(
                    table_info["Table"]["StorageDescriptor"]["Columns"], field_value
                )

                if is_table_updated:
                    updated_params = GlueStore.update_table_params(
                        table_info["Table"], updated_columns
                    )
                    client.update_table(
                        DatabaseName=schema.get_name(), TableInput=updated_params
                    )