How to use the zat.bro_log_reader function in zat

To help you get started, we’ve selected a few zat examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github SuperCowPowers / zat / examples / file_log_vtquery.py View on Github external
sys.exit(1)

    # Sanity check that this is a file log
    if 'files' not in args.bro_log:
        print('This example only works with Zeek files.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Create a VirusTotal Query Class
        vtq = vt_query.VTQuery()

        # Run the bro reader on a given log file
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=True)
        for row in reader.readrows():
            file_sha = row.get('sha256', '-') # Zeek uses - for empty field
            if file_sha == '-':
                file_sha = row.get('sha1', '-') # Zeek uses - for empthy field
                if file_sha == '-':
                    print('Should not find a sha256 or a sha1 key! Skipping...')
                    continue

            # Make the query with either sha
            results = vtq.query_file(file_sha)
            if results.get('positives', 0) > 1: # At least two hits
                pprint(results)
github SuperCowPowers / zat / examples / tor_and_port_count.py View on Github external
# Check for unknown args
    if commands:
        print('Unrecognized args: %s' % commands)
        sys.exit(1)

    # Sanity check that this is a ssl log
    if 'ssl' not in args.bro_log:
        print('This example only works with Zeek ssl.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Run the bro reader on the ssl.log file looking for potential Tor connections
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=args.t)
        # Just a counter to keep an eye on how many possible Tor connections we identify
        number = 0
        # A empty list to use for the port statistics
        ports = []

        for row in reader.readrows():
            # Add the destination port to the list of ports
            ports.append(row['id.resp_p'])
            # Pull out the Certificate Issuer
            try:
                issuer = row['issuer']
            except KeyError:
                print('Could not find the issuer field in your ssl.log. Please verify your log file.')
                sys.exit(1)
            # Check if the issuer matches the known Tor format
            if issuer_regex.match(issuer):
github SuperCowPowers / zat / examples / cert_checker.py View on Github external
if 'x509' not in args.bro_log:
        print('This example only works with Zeek x509.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Create a VirusTotal Query Class
        vtq = vt_query.VTQuery()

        # These domains may be spoofed with a certificate issued by 'Let's Encrypt'
        spoofed_domains = set(['paypal', 'gmail', 'google', 'apple','ebay', 'amazon'])

        # Run the bro reader on the x509.log file looking for spoofed domains
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=True)
        for row in reader.readrows():

            # Pull out the Certificate Issuer
            issuer = row['certificate.issuer']
            if "Let's Encrypt" in issuer:

                # Check if the certificate subject has any spoofed domains
                subject = row['certificate.subject']
                domain = subject[3:] # Just chopping off the 'CN=' part
                if any([domain in subject for domain in spoofed_domains]):
                    print('\n<<< Suspicious Certificate Found >>>')
                    pprint(row)

                    # Make a Virus Total query with the spoofed domain (just for fun)
                    results = vtq.query_url(domain)
                    if results.get('positives', 0) >= 2: # At least two hits
github SuperCowPowers / zat / examples / http_user_agents.py View on Github external
if commands:
        print('Unrecognized args: %s' % commands)
        sys.exit(1)

    # Sanity check that this is a http log
    if 'http' not in args.bro_log:
        print('This example only works with Zeek http.log files..')
        sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Run the bro reader on a given log file counting up user agents
        http_agents = Counter()
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=args.t)
        for count, row in enumerate(reader.readrows()):
            # Track count
            http_agents[row['user_agent']] += 1

            # Every hundred rows report agent counts (least common)
            if not args.s:
                if count%100==0:
                    print('\n<<>>')
                    pprint(http_agents.most_common()[:-50:-1])

        # Also report at the end (if there is one)
        print('\nLeast Common User Agents:')
        pprint(http_agents.most_common()[:-50:-1])
github SuperCowPowers / zat / examples / anomaly_detection_streaming.py View on Github external
sys.exit(1)

    # File may have a tilde in it
    if args.bro_log:
        args.bro_log = os.path.expanduser(args.bro_log)

        # Sanity check dns log
        if 'dns' in args.bro_log:
            log_type = 'dns'
        else:
            print('This example only works with Zeek with dns.log files..')
            sys.exit(1)

        # Create a Zeek log reader
        print('Opening Data File: {:s}'.format(args.bro_log))
        reader = bro_log_reader.BroLogReader(args.bro_log, tail=True)

        # Create a Zeek IDS log live simulator
        print('Opening Data File: {:s}'.format(args.bro_log))
        reader = live_simulator.LiveSimulator(args.bro_log, eps=10)  # 10 events per second

        # Create a Dataframe Cache
        df_cache = dataframe_cache.DataFrameCache(max_cache_time=600)  # 10 minute cache

        # Streaming Clustering Class
        batch_kmeans = MiniBatchKMeans(n_clusters=5, verbose=True)

        # Use the BroThon DataframeToMatrix class
        to_matrix = dataframe_to_matrix.DataFrameToMatrix()

        # Add each new row into the cache
        time_delta = 10
github SuperCowPowers / zat / zat / log_to_dataframe.py View on Github external
def _get_field_info(self, log_filename):
        """Internal Method: Use ZAT log reader to read header for names and types"""
        _bro_reader = bro_log_reader.BroLogReader(log_filename)
        _, field_names, field_types, _ = _bro_reader._parse_bro_header(log_filename)
        return field_names, field_types
github SuperCowPowers / zat / zat / log_to_sparkdf.py View on Github external
def create_dataframe(self, log_filename, fillna=True):
        """ Create a Spark dataframe from a Bro/Zeek log file
            Args:
               log_fllename (string): The full path to the Zeek log
               fillna (bool): Fill in NA/NaN values (default=True)
        """

        # Create a Zeek log reader just to read in the header for names and types
        _bro_reader = bro_log_reader.BroLogReader(log_filename)
        _, field_names, field_types, _ = _bro_reader._parse_bro_header(log_filename)

        # Get the appropriate types for the Spark Dataframe
        spark_schema = self.build_spark_schema(field_names, field_types)

        # Now actually read the Zeek Log using Spark read CSV
        _df = self.spark.read.csv(log_filename, schema=spark_schema, sep='\t', comment="#", nullValue='-')

        ''' Secondary processing (cleanup)
            - Fix column names with '.' in them
            - Fill in Nulls (optional)
            - timestamp convert
            - boolean convert
        '''

        # Fix column names
github SuperCowPowers / zat / zat / bro_multi_log_reader.py View on Github external
# For each file (may be just one) create a BroLogReader and use it
        for self._filepath in self._files:

            # Check if the file is zipped
            tmp = None
            if self._filepath.endswith('.gz'):
                tmp = tempfile.NamedTemporaryFile(delete=False)
                with gzip.open(self._filepath, 'rb') as f_in, open(tmp.name, 'wb') as f_out:
                    shutil.copyfileobj(f_in, f_out)

                # Set the file path to the new temp file
                self._filepath = tmp.name

            # Create a BroLogReader
            reader = bro_log_reader.BroLogReader(self._filepath)
            for row in reader.readrows():
                yield row

            # Clean up any temp files
            try:
                if tmp:
                    os.remove(tmp.name)
                    print('Removed temporary file {:s}...'.format(tmp.name))
            except IOError:
                pass
github SuperCowPowers / zat / examples / risky_dns.py View on Github external
try:
            vtq = pickle.load(open('vtq.pkl', 'rb'))
            print('Opening VirusTotal Query Cache (cache_size={:d})...'.format(vtq.size))
        except IOError:
            vtq = vt_query.VTQuery(max_cache_time=60*24*7) # One week cache

        # See our 'Risky Domains' Notebook for the analysis and
        # statistical methods used to compute this risky set of TLDs
        risky_tlds = set(['info', 'tk', 'xyz', 'online', 'club', 'ru', 'website', 'in', 'ws',
                          'top', 'site', 'work', 'biz', 'name', 'tech', 'loan', 'win', 'pro'])

        # Launch long lived process with signal catcher
        with signal_utils.signal_catcher(save_vtq):

            # Run the bro reader on the dns.log file looking for risky TLDs
            reader = bro_log_reader.BroLogReader(args.bro_log)
            for row in reader.readrows():

                # Pull out the TLD
                query = row['query']
                tld = tldextract.extract(query).suffix

                # Check if the TLD is in the risky group
                if tld in risky_tlds:
                    # Make the query with the full query
                    results = vtq.query_url(query)
                    if results.get('positives', 0) > 3: # At least four hits
                        print('\nRisky Domain DNS Query Found')
                        print('From: {:s} To: {:s} QType: {:s} RCode: {:s}'.format(row['id.orig_h'],
                               row['id.resp_h'], row['qtype_name'], row['rcode_name']))
                        pprint(results)
github SuperCowPowers / zat / zat / live_simulator.py View on Github external
def __init__(self, filepath, eps=10, max_rows=None, only_once=False):
        """Initialization for the LiveSimulator Class
           Args:
               eps (int): Events Per Second that the simulator will emit events (default = 10)
               max_rows (int): The maximum number of rows to generate (default = None (go forever))
        """

        # Compute EPS timer
        # Logic:
        #     - Normal distribution centered around 1.0/eps
        #     - Make sure never less than 0
        #     - Precompute 1000 deltas and then just cycle around
        self.eps_timer = itertools.cycle([max(0, delta) for delta in np.random.normal(1.0/float(eps), .5/float(eps), size=1000)])

        # Initialize the Zeek log reader
        self.log_reader = bro_log_reader.BroLogReader(filepath, tail=False)

        # Store max_rows and only_once flag
        self.max_rows = max_rows
        self.only_once = only_once