diff --git a/collector-v9.py b/collector-v9.py index d7e54e5..13dd4eb 100644 --- a/collector-v9.py +++ b/collector-v9.py @@ -1,13 +1,14 @@ #!/usr/bin/env python3 """ -Netflow V9 collector implementation in Python 3. +Netflow V9 collector and parser implementation in Python 3. Created for learning purposes and unsatisfying alternatives. This script is specifically implemented in combination with softflowd. See https://github.com/djmdjm/softflowd (C) 2016 Dominik Pataky +Licensed under MIT License. See LICENSE. """ from collections import namedtuple @@ -105,34 +106,39 @@ field_types = { 89: 'FORWARDING STATUS', } -# We need to save the templates our NetFlow device send over time. Templates -# are not resended every time a flow is sent to the collector. -_templates = {} - -sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) -sock.bind((HOST, PORT)) -print("Listening on interface {}:{}".format(HOST, PORT)) - class DataRecord: - """Should hold a 'data' dict with keys=field_type and value (in bytes). + """This is a 'flow' as we want it from our source. What it contains is + variable in NetFlow V9, so to work with the data you have to analyze the + data dict keys (which are integers and can be mapped with the field_types + dict). + + Should hold a 'data' dict with keys=field_type (integer) and value (in bytes). """ - data = {} + def __init__(self): + self.data = {} + + def __repr__(self): + return "".format(self.data) class DataFlowSet: - """ + """Holds one or multiple DataRecord which are all defined after the same + template. This template is referenced in the field 'flowset_id' of this + DataFlowSet and must not be zero. """ def __init__(self, data, templates): pack = struct.unpack('!HH', data[:4]) - self.template_id = pack[0] # flowset_id is reference to template_id + self.template_id = pack[0] # flowset_id is reference to a template_id self.length = pack[1] self.flows = [] offset = 4 template = templates[self.template_id] - padding_size = 4 - (self.length % 4) + + # As the field lengths are variable V9 has padding to next 32 Bit + padding_size = 4 - (self.length % 4) # 4 Byte while offset <= (self.length - padding_size): new_record = DataRecord() @@ -142,23 +148,14 @@ class DataFlowSet: fkey = field_types[field.field_type] fdata = None + # The length of the value byte slice is defined in the template dataslice = data[offset:offset+flen] - if flen == 1: - fdata = struct.unpack('!B', dataslice) - elif flen == 2: - fdata = struct.unpack('!H', dataslice) - elif flen == 4: - fdata = struct.unpack('!I', dataslice) - elif flen == 8: - fdata = struct.unpack('!Q', dataslice) - elif flen == 16: - # IPv6 address - fdata = int.from_bytes(dataslice, byteorder='big') - else: - raise ValueError("Length of field was not 1/2/4/8/16") + # Better solution than struct.unpack with variable field length + fdata = int.from_bytes(dataslice, byteorder='big') new_record.data[fkey] = fdata + offset += flen self.flows.append(new_record) @@ -177,7 +174,7 @@ class TemplateField: ) -class Template: +class TemplateRecord: """A template record contained in a TemplateFlowSet. """ def __init__(self, template_id, field_count, fields): @@ -186,7 +183,7 @@ class Template: self.fields = fields def __repr__(self): - return "