How to use the zat.log_to_dataframe.LogToDataFrame function in zat

To help you get started, we’ve selected a few zat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SuperCowPowers / zat / zat / log_to_dataframe.py View on Github external
def test():
    """Test for LogToDataFrame Class"""
    import os
    pd.set_option('display.width', 1000)
    from zat.utils import file_utils

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    log_path = os.path.join(data_path, 'conn.log')

    # Convert it to a Pandas DataFrame
    log_to_df = LogToDataFrame()
    my_df = log_to_df.create_dataframe(log_path)

    # Print out the head
    print(my_df.head())

    # Print out the datatypes
    print(my_df.dtypes)

    # Test a bunch
    tests = ['app_stats.log', 'dns.log', 'http.log', 'notice.log', 'tor_ssl.log',
             'conn.log', 'dhcp_002.log', 'files.log',  'smtp.log', 'weird.log',
             'ftp.log',  'ssl.log', 'x509.log']
    for log_path in [os.path.join(data_path, log) for log in tests]:
        print('Testing: {:s}...'.format(log_path))
        my_df = log_to_df.create_dataframe(log_path)
        print(my_df.head())
github SuperCowPowers / zat / examples / anomaly_detection.py View on Github external
args.bro_log = os.path.expanduser(args.bro_log)

        # Sanity check either http or dns log
        if 'http' in args.bro_log:
            log_type = 'http'
            features = ['id.resp_p', 'method', 'resp_mime_types', 'request_body_len']
        elif 'dns' in args.bro_log:
            log_type = 'dns'
            features = ['Z', 'proto', 'qtype_name', 'query_length', 'answer_length', 'entropy']
        else:
            print('This example only works with Zeek with http.log or dns.log files..')
            sys.exit(1)

        # Create a Pandas dataframe from a Zeek log
        try:
            log_to_df = log_to_dataframe.LogToDataFrame()
            bro_df = log_to_df.create_dataframe(args.bro_log)
            print(bro_df.head())
        except IOError:
            print('Could not open or parse the specified logfile: %s' % args.bro_log)
            sys.exit(1)
        print('Read in {:d} Rows...'.format(len(bro_df)))

        # Using Pandas we can easily and efficiently compute additional data metrics
        # Here we use the vectorized operations of Pandas/Numpy to compute query length
        # We'll also compute entropy of the query
        if log_type == 'dns':
            bro_df['query_length'] = bro_df['query'].str.len()
            bro_df['answer_length'] = bro_df['answers'].str.len()
            bro_df['entropy'] = bro_df['query'].map(lambda x: entropy(x))

        # Use the zat DataframeToMatrix class
github SuperCowPowers / zat / examples / zeek_to_scikit.py View on Github external
# Check for unknown args
    if commands:
        print('Unrecognized args: %s' % commands)
        sys.exit(1)

    # Sanity check that this is a dns log
    if 'dns' not in args.bro_log:
        print('This example only works with Zeek dns.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Create a Pandas dataframe from the Zeek log
        log_to_df = log_to_dataframe.LogToDataFrame()
        bro_df = log_to_df.create_dataframe(args.bro_log)

        # Add query length
        bro_df['query_length'] = bro_df['query'].str.len()

        # Normalize this field
        #ql = bro_df['query_length']
        #bro_df['query_length_norm'] = (ql - ql.min()) / (ql.max()-ql.min())

        # These are the features we want (note some of these are categorical!)
        features = ['AA', 'RA', 'RD', 'TC', 'Z', 'rejected', 'proto', 'qtype_name', 'rcode_name', 'query_length']
        feature_df = bro_df[features]

        # Use the super awesome DataframeToMatrix class (handles categorical data!)
        to_matrix = dataframe_to_matrix.DataFrameToMatrix()
        bro_matrix = to_matrix.fit_transform(feature_df)
github SuperCowPowers / zat / examples / zeek_to_pandas.py View on Github external
# Collect args from the command line
    parser = argparse.ArgumentParser()
    parser.add_argument('bro_log', type=str, help='Specify a bro log to run BroLogReader test on')
    args, commands = parser.parse_known_args()

    # Check for unknown args
    if commands:
        print('Unrecognized args: %s' % commands)
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Create a Pandas dataframe from a Zeek log
        log_to_df = LogToDataFrame()
        bro_df = log_to_df.create_dataframe(args.bro_log)

        # Print out the head of the dataframe
        print(bro_df.head())

        # Print out the types of the columns
        print(bro_df.dtypes)

        # Print out size and memory usage
        print('DF Shape: {:s}'.format(str(bro_df.shape)))
        print('DF Memory:')
        memory_usage = bro_df.memory_usage(deep=True)
        total = memory_usage.sum()
        for item in memory_usage.items():
            print('\t {:s}: \t{:.2f} MB'.format(item[0], item[1]/1e6))
        print('DF Total: {:.2f} GB'.format(total/(1e9)))