Improve collector script and restructure code
- Moved the netflow library out of the src directory - The UDP listener was restructured so that multiple threads can receive packets and push them into a queue. The main thread then pulls the packets off the queue one at a time and processes them. This means that the collector will never drop a packet because it was blocked on processing the previous one. - Adds a property to the ExportPacket class to expose if any new templates are contained in it. - The collector will now only retry parsing past packets when a new template is found. Also refactored the retry logic a bit to remove duplicate code (retrying just pushes the packets back into the main queue to be processed again like all the other packets). - The collector no longer continually reads and writes to/from the disk. It just caches the data in memory until it exits instead.
This commit is contained in:
parent
ce2be709d6
commit
ef151f8d28
186
main.py
Normal file → Executable file
186
main.py
Normal file → Executable file
|
@ -8,133 +8,123 @@ Copyright 2017-2019 Dominik Pataky <dev@bitkeks.eu>
|
||||||
Licensed under MIT License. See LICENSE.
|
Licensed under MIT License. See LICENSE.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
|
||||||
import argparse
|
import argparse
|
||||||
|
from collections import namedtuple
|
||||||
|
from queue import Queue
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import socketserver
|
import socketserver
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
import json
|
|
||||||
import os.path
|
from netflow.v9 import ExportPacket, TemplateNotRecognized
|
||||||
|
|
||||||
|
|
||||||
logging.getLogger().setLevel(logging.INFO)
|
__log__ = logging.getLogger(__name__)
|
||||||
ch = logging.StreamHandler(sys.stdout)
|
|
||||||
ch.setLevel(logging.DEBUG)
|
|
||||||
formatter = logging.Formatter('%(message)s')
|
|
||||||
ch.setFormatter(formatter)
|
|
||||||
logging.getLogger().addHandler(ch)
|
|
||||||
|
|
||||||
try:
|
# Amount of time to wait before dropping an undecodable ExportPacket
|
||||||
from netflow.collector_v9 import ExportPacket, TemplateNotRecognized
|
PACKET_TIMEOUT = 60 * 60
|
||||||
except ImportError:
|
|
||||||
logging.warning("Netflow v9 not installed as package! Running from directory.")
|
|
||||||
from src.netflow.collector_v9 import ExportPacket, TemplateNotRecognized
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description="A sample netflow collector.")
|
RawPacket = namedtuple('RawPacket', ['ts', 'data'])
|
||||||
parser.add_argument("--host", type=str, default="",
|
|
||||||
help="collector listening address")
|
|
||||||
parser.add_argument("--port", "-p", type=int, default=2055,
|
|
||||||
help="collector listener port")
|
|
||||||
parser.add_argument("--file", "-o", type=str, dest="output_file",
|
|
||||||
default="{}.json".format(int(time.time())),
|
|
||||||
help="collector export JSON file")
|
|
||||||
parser.add_argument("--debug", "-D", action="store_true",
|
|
||||||
help="Enable debug output")
|
|
||||||
|
|
||||||
|
|
||||||
class SoftflowUDPHandler(socketserver.BaseRequestHandler):
|
|
||||||
# We need to save the templates our NetFlow device
|
|
||||||
# send over time. Templates are not resended every
|
|
||||||
# time a flow is sent to the collector.
|
|
||||||
templates = {}
|
|
||||||
buffered = {}
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def set_output_file(cls, path):
|
|
||||||
cls.output_file = path
|
|
||||||
|
|
||||||
|
class QueuingRequestHandler(socketserver.BaseRequestHandler):
|
||||||
def handle(self):
|
def handle(self):
|
||||||
if not os.path.exists(self.output_file):
|
|
||||||
with open(self.output_file, 'w') as fh:
|
|
||||||
json.dump({}, fh)
|
|
||||||
|
|
||||||
with open(self.output_file, 'r') as fh:
|
|
||||||
try:
|
|
||||||
existing_data = json.load(fh)
|
|
||||||
except json.decoder.JSONDecodeError as ex:
|
|
||||||
logging.error("Malformed JSON output file. Cannot read existing data, aborting.")
|
|
||||||
return
|
|
||||||
|
|
||||||
data = self.request[0]
|
data = self.request[0]
|
||||||
host = self.client_address[0]
|
self.server.queue.put(RawPacket(time.time(), data))
|
||||||
logging.debug("Received data from {}, length {}".format(host, len(data)))
|
__log__.debug(
|
||||||
|
"Recieved %d bytes of data from %s", len(data), self.client_address[0]
|
||||||
|
)
|
||||||
|
|
||||||
export = None
|
|
||||||
try:
|
|
||||||
export = ExportPacket(data, self.templates)
|
|
||||||
except TemplateNotRecognized:
|
|
||||||
self.buffered[time.time()] = data
|
|
||||||
logging.warning("Received data with unknown template, data stored in buffer!")
|
|
||||||
return
|
|
||||||
|
|
||||||
if not export:
|
class QueuingUDPListener(socketserver.ThreadingUDPServer):
|
||||||
logging.error("Error with exception handling while disecting export, export is None")
|
"""A threaded UDP server that adds a (time, data) tuple to a queue for
|
||||||
return
|
every request it sees
|
||||||
|
"""
|
||||||
|
def __init__(self, interface, queue):
|
||||||
|
self.queue = queue
|
||||||
|
super().__init__(interface, QueuingRequestHandler)
|
||||||
|
|
||||||
logging.debug("Processed ExportPacket with {} flows.".format(export.header.count))
|
|
||||||
logging.debug("Size of buffer: {}".format(len(self.buffered)))
|
|
||||||
|
|
||||||
# In case the export held some new templates
|
def get_export_packets(host, port):
|
||||||
self.templates.update(export.templates)
|
"""A generator that will yield ExportPacket objects until it is killed
|
||||||
|
or has a truthy value sent to it"""
|
||||||
|
|
||||||
remain_buffered = {}
|
__log__.info("Starting the NetFlow listener on {}:{}".format(host, port))
|
||||||
processed = []
|
queue = Queue()
|
||||||
for timestamp, data in self.buffered.items():
|
server = QueuingUDPListener((host, port), queue)
|
||||||
|
thread = threading.Thread(target=server.serve_forever)
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
# Process packets from the queue
|
||||||
|
templates = {}
|
||||||
|
to_retry = []
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
pkt = queue.get()
|
||||||
try:
|
try:
|
||||||
buffered_export = ExportPacket(data, self.templates)
|
export = ExportPacket(pkt.data, templates)
|
||||||
processed.append(timestamp)
|
|
||||||
except TemplateNotRecognized:
|
except TemplateNotRecognized:
|
||||||
remain_buffered[timestamp] = data
|
if time.time() - pkt.ts > PACKET_TIMEOUT:
|
||||||
logging.debug("Template of buffered ExportPacket still not recognized")
|
__log__.warning("Dropping an old and undecodable ExportPacket")
|
||||||
|
else:
|
||||||
|
to_retry.append(pkt)
|
||||||
|
__log__.debug("Failed to decode an ExportPacket - will "
|
||||||
|
"re-attempt when a new template is dicovered")
|
||||||
continue
|
continue
|
||||||
logging.debug("Processed buffered ExportPacket with {} flows.".format(buffered_export.header.count))
|
|
||||||
existing_data[timestamp] = [flow.data for flow in buffered_export.flows]
|
|
||||||
|
|
||||||
# Delete processed items from the buffer
|
__log__.debug("Processed an ExportPacket with %d flows.",
|
||||||
for pro in processed:
|
export.header.count)
|
||||||
del self.buffered[pro]
|
|
||||||
|
|
||||||
# Update the buffer
|
# If any new templates were discovered, dump the unprocessable
|
||||||
self.buffered.update(remain_buffered)
|
# data back into the queue and try to decode them again
|
||||||
|
if export.contains_new_templates and to_retry:
|
||||||
# Append new flows
|
__log__.debug("Recieved new template(s)")
|
||||||
existing_data[time.time()] = [flow.data for flow in export.flows]
|
__log__.debug("Will re-attempt to decode %d old ExportPackets",
|
||||||
|
len(to_retry))
|
||||||
with open(self.output_file, 'w') as fh:
|
for p in to_retry:
|
||||||
json.dump(existing_data, fh)
|
queue.put(p)
|
||||||
|
to_retry.clear()
|
||||||
|
|
||||||
|
stop = yield pkt.ts, export
|
||||||
|
if stop:
|
||||||
|
break
|
||||||
|
finally:
|
||||||
|
__log__.info("Shutting down the NetFlow listener")
|
||||||
|
server.shutdown()
|
||||||
|
server.server_close()
|
||||||
|
thread.join()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="A sample netflow collector.")
|
||||||
|
parser.add_argument("--host", type=str, default="0.0.0.0",
|
||||||
|
help="collector listening address")
|
||||||
|
parser.add_argument("--port", "-p", type=int, default=2055,
|
||||||
|
help="collector listener port")
|
||||||
|
parser.add_argument("--file", "-o", type=str, dest="output_file",
|
||||||
|
default="{}.json".format(int(time.time())),
|
||||||
|
help="collector export JSON file")
|
||||||
|
parser.add_argument("--debug", "-D", action="store_true",
|
||||||
|
help="Enable debug output")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
|
||||||
|
|
||||||
if args.debug:
|
if args.debug:
|
||||||
logging.getLogger().setLevel(logging.DEBUG)
|
__log__.setLevel(logging.DEBUG)
|
||||||
|
|
||||||
output_file = args.output_file
|
|
||||||
SoftflowUDPHandler.set_output_file(output_file)
|
|
||||||
|
|
||||||
host = args.host
|
|
||||||
port = args.port
|
|
||||||
logging.info("Listening on interface {}:{}".format(host, port))
|
|
||||||
server = socketserver.UDPServer((host, port), SoftflowUDPHandler)
|
|
||||||
|
|
||||||
|
data = {}
|
||||||
try:
|
try:
|
||||||
logging.debug("Starting the NetFlow listener")
|
# TODO: For a long-running processes, this will consume loads of memory
|
||||||
server.serve_forever(poll_interval=0.5)
|
for ts, export in get_export_packets(args.host, args.port):
|
||||||
except (IOError, SystemExit):
|
data[ts] = [flow.data for flow in export.flows]
|
||||||
raise
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
raise
|
pass
|
||||||
|
|
||||||
server.server_close()
|
if data:
|
||||||
|
__log__.info("Outputting collected data to '%s'", args.output_file)
|
||||||
|
with open(args.output_file, 'w') as f:
|
||||||
|
json.dump(data, f)
|
||||||
|
else:
|
||||||
|
__log__.info("No data collected")
|
||||||
|
|
|
@ -11,12 +11,10 @@ Copyright 2017, 2018 Dominik Pataky <dev@bitkeks.eu>
|
||||||
Licensed under MIT License. See LICENSE.
|
Licensed under MIT License. See LICENSE.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import socket
|
|
||||||
import struct
|
import struct
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
field_types = {
|
FIELD_TYPES = {
|
||||||
0: 'UNKNOWN_FIELD_TYPE', # fallback for unknown field types
|
0: 'UNKNOWN_FIELD_TYPE', # fallback for unknown field types
|
||||||
|
|
||||||
# Cisco specs for NetFlow v9
|
# Cisco specs for NetFlow v9
|
||||||
|
@ -153,10 +151,14 @@ field_types = {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TemplateNotRecognized(KeyError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class DataRecord:
|
class DataRecord:
|
||||||
"""This is a 'flow' as we want it from our source. What it contains is
|
"""This is a 'flow' as we want it from our source. What it contains is
|
||||||
variable in NetFlow V9, so to work with the data you have to analyze the
|
variable in NetFlow V9, so to work with the data you have to analyze the
|
||||||
data dict keys (which are integers and can be mapped with the field_types
|
data dict keys (which are integers and can be mapped with the FIELD_TYPES
|
||||||
dict).
|
dict).
|
||||||
|
|
||||||
Should hold a 'data' dict with keys=field_type (integer) and value (in bytes).
|
Should hold a 'data' dict with keys=field_type (integer) and value (in bytes).
|
||||||
|
@ -195,7 +197,7 @@ class DataFlowSet:
|
||||||
|
|
||||||
for field in template.fields:
|
for field in template.fields:
|
||||||
flen = field.field_length
|
flen = field.field_length
|
||||||
fkey = field_types[field.field_type]
|
fkey = FIELD_TYPES[field.field_type]
|
||||||
fdata = None
|
fdata = None
|
||||||
|
|
||||||
# The length of the value byte slice is defined in the template
|
# The length of the value byte slice is defined in the template
|
||||||
|
@ -218,20 +220,18 @@ class DataFlowSet:
|
||||||
|
|
||||||
|
|
||||||
class TemplateField:
|
class TemplateField:
|
||||||
"""A field with type identifier and length.
|
"""A field with type identifier and length."""
|
||||||
"""
|
|
||||||
def __init__(self, field_type, field_length):
|
def __init__(self, field_type, field_length):
|
||||||
self.field_type = field_type # integer
|
self.field_type = field_type # integer
|
||||||
self.field_length = field_length # bytes
|
self.field_length = field_length # bytes
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<TemplateField type {}:{}, length {}>".format(
|
return "<TemplateField type {}:{}, length {}>".format(
|
||||||
self.field_type, field_types[self.field_type], self.field_length)
|
self.field_type, FIELD_TYPES[self.field_type], self.field_length)
|
||||||
|
|
||||||
|
|
||||||
class TemplateRecord:
|
class TemplateRecord:
|
||||||
"""A template record contained in a TemplateFlowSet.
|
"""A template record contained in a TemplateFlowSet."""
|
||||||
"""
|
|
||||||
def __init__(self, template_id, field_count, fields):
|
def __init__(self, template_id, field_count, fields):
|
||||||
self.template_id = template_id
|
self.template_id = template_id
|
||||||
self.field_count = field_count
|
self.field_count = field_count
|
||||||
|
@ -240,7 +240,7 @@ class TemplateRecord:
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<TemplateRecord {} with {} fields: {}>".format(
|
return "<TemplateRecord {} with {} fields: {}>".format(
|
||||||
self.template_id, self.field_count,
|
self.template_id, self.field_count,
|
||||||
' '.join([field_types[field.field_type] for field in self.fields]))
|
' '.join([FIELD_TYPES[field.field_type] for field in self.fields]))
|
||||||
|
|
||||||
|
|
||||||
class TemplateFlowSet:
|
class TemplateFlowSet:
|
||||||
|
@ -268,7 +268,7 @@ class TemplateFlowSet:
|
||||||
# Get all fields of this template
|
# Get all fields of this template
|
||||||
offset += 4
|
offset += 4
|
||||||
field_type, field_length = struct.unpack('!HH', data[offset:offset+4])
|
field_type, field_length = struct.unpack('!HH', data[offset:offset+4])
|
||||||
if field_type not in field_types:
|
if field_type not in FIELD_TYPES:
|
||||||
field_type = 0 # Set field_type to UNKNOWN_FIELD_TYPE as fallback
|
field_type = 0 # Set field_type to UNKNOWN_FIELD_TYPE as fallback
|
||||||
field = TemplateField(field_type, field_length)
|
field = TemplateField(field_type, field_length)
|
||||||
fields.append(field)
|
fields.append(field)
|
||||||
|
@ -288,8 +288,7 @@ class TemplateFlowSet:
|
||||||
|
|
||||||
|
|
||||||
class Header:
|
class Header:
|
||||||
"""The header of the ExportPacket.
|
"""The header of the ExportPacket."""
|
||||||
"""
|
|
||||||
def __init__(self, data):
|
def __init__(self, data):
|
||||||
pack = struct.unpack('!HHIIII', data[:20])
|
pack = struct.unpack('!HHIIII', data[:20])
|
||||||
|
|
||||||
|
@ -302,11 +301,11 @@ class Header:
|
||||||
|
|
||||||
|
|
||||||
class ExportPacket:
|
class ExportPacket:
|
||||||
"""The flow record holds the header and all template and data flowsets.
|
"""The flow record holds the header and all template and data flowsets."""
|
||||||
"""
|
|
||||||
def __init__(self, data, templates):
|
def __init__(self, data, templates):
|
||||||
self.header = Header(data)
|
self.header = Header(data)
|
||||||
self.templates = templates
|
self.templates = templates
|
||||||
|
self._new_templates = False
|
||||||
self.flows = []
|
self.flows = []
|
||||||
|
|
||||||
offset = 20
|
offset = 20
|
||||||
|
@ -314,6 +313,12 @@ class ExportPacket:
|
||||||
flowset_id = struct.unpack('!H', data[offset:offset+2])[0]
|
flowset_id = struct.unpack('!H', data[offset:offset+2])[0]
|
||||||
if flowset_id == 0: # TemplateFlowSet always have id 0
|
if flowset_id == 0: # TemplateFlowSet always have id 0
|
||||||
tfs = TemplateFlowSet(data[offset:])
|
tfs = TemplateFlowSet(data[offset:])
|
||||||
|
# Check for any new/changed templates
|
||||||
|
if not self._new_templates:
|
||||||
|
for id_, template in tfs.templates.items():
|
||||||
|
if id_ not in self.templates or self.templates[id_] != template:
|
||||||
|
self._new_templates = True
|
||||||
|
break
|
||||||
self.templates.update(tfs.templates)
|
self.templates.update(tfs.templates)
|
||||||
offset += tfs.length
|
offset += tfs.length
|
||||||
else:
|
else:
|
||||||
|
@ -321,10 +326,10 @@ class ExportPacket:
|
||||||
self.flows += dfs.flows
|
self.flows += dfs.flows
|
||||||
offset += dfs.length
|
offset += dfs.length
|
||||||
|
|
||||||
|
@property
|
||||||
|
def contains_new_templates(self):
|
||||||
|
return self._new_templates
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<ExportPacket version {} counting {} records>".format(
|
return "<ExportPacket version {} counting {} records>".format(
|
||||||
self.header.version, self.header.count)
|
self.header.version, self.header.count)
|
||||||
|
|
||||||
|
|
||||||
class TemplateNotRecognized(KeyError):
|
|
||||||
pass
|
|
Loading…
Reference in a new issue