Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
def test():
"""Test for DataFrameCache Class"""
import copy
df_cache = DataFrameCache(max_cache_size=10, max_cache_time=1) # Make it small and short for testing
# Make some fake data
base_row = {'id': 0, 'foo': 'bar', 'port': 80, 'protocol': 17}
# Create an array of test rows
test_data = []
for i in range(20):
row = copy.deepcopy(base_row)
row['id'] = i
test_data.append(row)
# Add rows
df_cache.add_rows(test_data)
# Make sure the cache size is working properly
my_df = df_cache.dataframe()
if 'dns' in args.bro_log:
log_type = 'dns'
else:
print('This example only works with Zeek with dns.log files..')
sys.exit(1)
# Create a Zeek log reader
print('Opening Data File: {:s}'.format(args.bro_log))
reader = bro_log_reader.BroLogReader(args.bro_log, tail=True)
# Create a Zeek IDS log live simulator
print('Opening Data File: {:s}'.format(args.bro_log))
reader = live_simulator.LiveSimulator(args.bro_log, eps=10) # 10 events per second
# Create a Dataframe Cache
df_cache = dataframe_cache.DataFrameCache(max_cache_time=600) # 10 minute cache
# Streaming Clustering Class
batch_kmeans = MiniBatchKMeans(n_clusters=5, verbose=True)
# Use the BroThon DataframeToMatrix class
to_matrix = dataframe_to_matrix.DataFrameToMatrix()
# Add each new row into the cache
time_delta = 10
timer = time.time() + time_delta
FIRST_TIME = True
for row in reader.readrows():
df_cache.add_row(row)
# Every 30 seconds grab the dataframe from the cache
if time.time() > timer: