How to use the zat.dataframe_cache.DataFrameCache function in zat

To help you get started, we’ve selected a few zat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SuperCowPowers / zat / zat / dataframe_cache.py View on Github external
def test():
    """Test for DataFrameCache Class"""
    import copy

    df_cache = DataFrameCache(max_cache_size=10, max_cache_time=1)  # Make it small and short for testing

    # Make some fake data
    base_row = {'id': 0, 'foo': 'bar', 'port': 80, 'protocol': 17}

    # Create an array of test rows
    test_data = []
    for i in range(20):
        row = copy.deepcopy(base_row)
        row['id'] = i
        test_data.append(row)

    # Add rows
    df_cache.add_rows(test_data)

    # Make sure the cache size is working properly
    my_df = df_cache.dataframe()
github SuperCowPowers / zat / examples / anomaly_detection_streaming.py View on Github external
if 'dns' in args.bro_log:
            log_type = 'dns'
        else:
            print('This example only works with Zeek with dns.log files..')
            sys.exit(1)

        # Create a Zeek log reader
        print('Opening Data File: {:s}'.format(args.bro_log))
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=True)

        # Create a Zeek IDS log live simulator
        print('Opening Data File: {:s}'.format(args.bro_log))
        reader = live_simulator.LiveSimulator(args.bro_log, eps=10)  # 10 events per second

        # Create a Dataframe Cache
        df_cache = dataframe_cache.DataFrameCache(max_cache_time=600)  # 10 minute cache

        # Streaming Clustering Class
        batch_kmeans = MiniBatchKMeans(n_clusters=5, verbose=True)

        # Use the BroThon DataframeToMatrix class
        to_matrix = dataframe_to_matrix.DataFrameToMatrix()

        # Add each new row into the cache
        time_delta = 10
        timer = time.time() + time_delta
        FIRST_TIME = True
        for row in reader.readrows():
            df_cache.add_row(row)

            # Every 30 seconds grab the dataframe from the cache
            if time.time() > timer: