How to use the zat.utils.file_utils function in zat

To help you get started, we’ve selected a few zat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SuperCowPowers / zat / zat / log_to_dataframe.py View on Github external
def test():
    """Test for LogToDataFrame Class"""
    import os
    pd.set_option('display.width', 1000)
    from zat.utils import file_utils

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    log_path = os.path.join(data_path, 'conn.log')

    # Convert it to a Pandas DataFrame
    log_to_df = LogToDataFrame()
    my_df = log_to_df.create_dataframe(log_path)

    # Print out the head
    print(my_df.head())

    # Print out the datatypes
    print(my_df.dtypes)

    # Test a bunch
    tests = ['app_stats.log', 'dns.log', 'http.log', 'notice.log', 'tor_ssl.log',
             'conn.log', 'dhcp_002.log', 'files.log',  'smtp.log', 'weird.log',
             'ftp.log',  'ssl.log', 'x509.log']
github SuperCowPowers / zat / zat / dataframe_stats.py View on Github external
def test():
    """Test for DataFrame Stats module"""

    import os
    from zat.utils import file_utils

    # Open a dataset (relative path)
    data_dir = file_utils.relative_dir(__file__, 'test_data')
    file_path = os.path.join(data_dir, 'g_test_data.csv')
    dataframe = pd.read_csv(file_path)
    print(dataframe.head())

    # Print out the contingency_table
    print('\nContingency Table')
    print(contingency_table(dataframe, 'name', 'status'))

    # Print out the joint_distribution
    print('\nJoint Distribution Table')
    print(joint_distribution(dataframe, 'name', 'status'))

    # Print out the expected_counts
    print('\nExpected Counts Table')
    print(expected_counts(dataframe, 'name', 'status'))
github SuperCowPowers / zat / zat / bro_multi_log_reader.py View on Github external
def test():
    """Test for BroMultiLogReader Python Class"""
    from zat.utils import file_utils

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')

    # For each file, create the Class and test the reader
    files = ['http.log.gz', 'dhcp*.log', 'dhcp*.log.gz']
    for bro_log in files:
        test_path = os.path.join(data_path, bro_log)
        print('Opening Data File: {:s}'.format(test_path))
        reader = BroMultiLogReader(test_path)
        for line in reader.readrows():
            print(line)
    print('Tests successful!')
github SuperCowPowers / zat / zat / utils / dir_watcher.py View on Github external
def test():
    """Test the DirWatcher Class"""
    watch_path = file_utils.relative_dir(__file__, '../../data')
    print('Watching Directory: %s' % watch_path)
    DirWatcher(watch_path, my_callback)

    # Create a file and then delete it
    temp_file = os.path.join(watch_path, 'test.tmp')
    open(temp_file, 'w').close()
    time.sleep(1)
    os.remove(temp_file)
github SuperCowPowers / zat / zat / log_to_sparkdf.py View on Github external
def test():
    """Test for LogToSparkDF Class"""
    import os
    from zat.utils import file_utils
    from pyspark.sql import SparkSession

    # Spin up a local Spark Session (with 4 executors)
    spark = SparkSession.builder.master('local[4]').appName('my_awesome').getOrCreate()

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    log_path = os.path.join(data_path, 'ftp.log')

    # Convert it to a Spark DataFrame
    log_to_spark = LogToSparkDF(spark)
    spark_df = log_to_spark.create_dataframe(log_path)

    # Print out the head
    print(spark_df.show())

    # Print out the datatypes
    print(spark_df.printSchema())

    num_rows = spark_df.count()
    print("Number of Spark DataFrame rows: {:d}".format(num_rows))
    columns = spark_df.columns
    print("Columns: {:s}".format(','.join(columns)))
github SuperCowPowers / zat / zat / live_simulator.py View on Github external
def test():
    """Test for LiveSimulator Python Class"""

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')
    test_path = os.path.join(data_path, 'conn.log')
    print('Opening Data File: {:s}'.format(test_path))

    # Create a LiveSimulator reader
    data_stream = LiveSimulator(test_path, max_rows=10)
    for line in data_stream.rows():
        print(line)
    print('Read with max_rows Test successful!')
github SuperCowPowers / zat / zat / utils / file_tailer.py View on Github external
def test():
    """Test for FileTailer Python Class"""

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../../data')
    test_path = os.path.join(data_path, 'http.log')
    print('Opening Data File: {:s}'.format(test_path))

    # Create the Class
    tailer = FileTailer(test_path, tail=False)  # First with no tailing
    for line in tailer.readlines():
        print(line)
    print('Read with NoTail Test successful!')

    # Now include tailing (note: as an automated test this needs to timeout quickly)
    try:
        from interruptingcow import timeout

        # Spin up the class
        tailer = FileTailer(test_path)  # Tail = True
github SuperCowPowers / zat / zat / bro_log_reader.py View on Github external
def test():
    """Test for BroLogReader Python Class"""
    import pytest

    # Grab a test file
    data_path = file_utils.relative_dir(__file__, '../data')

    # For each file, create the Class and test the reader
    files = ['app_stats.log', 'conn.log', 'dhcp.log', 'dns.log', 'files.log', 'ftp.log',
             'http.log', 'notice.log', 'smtp.log', 'ssl.log', 'weird.log', 'x509.log']
    for bro_log in files:
        test_path = os.path.join(data_path, bro_log)
        print('Opening Data File: {:s}'.format(test_path))
        reader = BroLogReader(test_path, tail=False)  # First with no tailing
        for line in reader.readrows():
            print(line)
    print('Read with NoTail Test successful!')

    # Test an empty log (a log with header/close but no data rows)
    test_path = os.path.join(data_path, 'http_empty.log')
    reader = BroLogReader(test_path)
    for line in reader.readrows():