Add support for v1 and v5 NetFlow packets

Thanks to @alieissa for the initial v1 and v5 code
2019-10-16 23:23:51 -04:00 · 2019-10-16 23:23:51 -04:00 · 96817f1f8d
parent 186b648c4d
commit 96817f1f8d
6 changed files with 233 additions and 19 deletions
--- a/analyze_json.py
+++ b/analyze_json.py
@ -26,6 +26,16 @@ Pair = namedtuple('Pair', ['src', 'dest'])
 def resolve_hostname(ip):
    return socket.getfqdn(ip)

+
+def fallback(d, keys):
+    for k in keys:
+        try:
+            return d[k]
+        except KeyError:
+            pass
+    raise KeyError(", ".join(keys))
+
+
 class Connection:
    """Connection model for two flows.
    The direction of the data flow can be seen by looking at the size.
@ -37,7 +47,10 @@ class Connection:
        if not flow1 or not flow2:
            raise Exception("A connection requires two flows")

-        if flow1['IN_BYTES'] >= flow2['IN_BYTES']:
+        # Assume the size that sent the most data is the source
+        size1 = fallback(flow1, ['IN_BYTES', 'IN_OCTETS'])
+        size2 = fallback(flow2, ['IN_BYTES', 'IN_OCTETS'])
+        if size1 >= size2:
            src = flow1
            dest = flow2
        else:
@ -47,9 +60,9 @@ class Connection:
        ips = self.get_ips(src)
        self.src = ips.src
        self.dest = ips.dest
-        self.src_port = src['L4_SRC_PORT']
-        self.dest_port = src['L4_DST_PORT']
-        self.size = src['IN_BYTES']
+        self.src_port = fallback(src, ['L4_SRC_PORT', 'SRC_PORT'])
+        self.dest_port = fallback(src, ['L4_DST_PORT', 'DST_PORT'])
+        self.size = fallback(src, ['IN_BYTES', 'IN_OCTETS'])

        # Duration is given in milliseconds
        self.duration = src['LAST_SWITCHED'] - src['FIRST_SWITCHED']
--- a/main.py
+++ b/main.py
@ -1,7 +1,7 @@
 #!/usr/bin/env python3

 """
-Example collector script for NetFlow v9.
+Example collector script for NetFlow v1, v5, and v9.
 This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.

 Copyright 2017-2019 Dominik Pataky <dev@bitkeks.eu>
@ -18,7 +18,7 @@ import socketserver
 import threading
 import time

-from netflow.v9 import ExportPacket, TemplateNotRecognized
+from netflow import parse_packet, TemplateNotRecognized, UnknownNetFlowVersion


 __log__ = logging.getLogger(__name__)
@ -26,6 +26,7 @@ __log__ = logging.getLogger(__name__)
 # Amount of time to wait before dropping an undecodable ExportPacket
 PACKET_TIMEOUT = 60 * 60

+# TODO: Add source IP
 RawPacket = namedtuple('RawPacket', ['ts', 'data'])

 class QueuingRequestHandler(socketserver.BaseRequestHandler):
@ -110,24 +111,27 @@ class NetFlowListener(threading.Thread):
                    continue

                try:
-                    export = ExportPacket(pkt.data, templates)
+                    export = parse_packet(pkt.data, templates)
+                except UnknownNetFlowVersion as e:
+                    __log__.error("%s, ignoring the packet", e)
+                    continue
                except TemplateNotRecognized:
                    if time.time() - pkt.ts > PACKET_TIMEOUT:
-                        __log__.warning("Dropping an old and undecodable ExportPacket")
+                        __log__.warning("Dropping an old and undecodable v9 ExportPacket")
                    else:
                        to_retry.append(pkt)
-                        __log__.debug("Failed to decode a ExportPacket - will "
+                        __log__.debug("Failed to decode a v9 ExportPacket - will "
                                      "re-attempt when a new template is discovered")
                    continue

-                __log__.debug("Processed an ExportPacket with %d flows.",
+                __log__.debug("Processed a v%d ExportPacket with %d flows.",
                              export.header.version, export.header.count)

                # If any new templates were discovered, dump the unprocessable
                # data back into the queue and try to decode them again
-                if export.contains_new_templates and to_retry:
+                if (export.header.version == 9 and export.contains_new_templates and to_retry):
                    __log__.debug("Received new template(s)")
-                    __log__.debug("Will re-attempt to decode %d old ExportPackets",
+                    __log__.debug("Will re-attempt to decode %d old v9 ExportPackets",
                                  len(to_retry))
                    for p in to_retry:
                        self.input.put(p)
--- a/netflow/init.py
+++ b/netflow/init.py
@ -0,0 +1,34 @@
+#!/usr/bin/env python
+
+import struct
+
+from netflow.v1 import V1ExportPacket
+from netflow.v5 import V5ExportPacket
+from netflow.v9 import V9ExportPacket, TemplateNotRecognized
+
+__all__ = ["TemplateNotRecognized", "UnknownNetFlowVersion", "parse_packet"]
+
+class UnknownNetFlowVersion(Exception):
+    def __init__(self, data, version):
+        self.data = data
+        self.version = version
+        r = repr(data)
+        data_str = ("{:.25}..." if len(r) >= 28 else "{}").format(r)
+        super().__init__(
+            "Unknown NetFlow version {} for data {}".format(version, data_str)
+        )
+
+
+def get_netflow_version(data):
+    return struct.unpack('!H', data[:2])[0]
+
+
+def parse_packet(data, templates):
+    version = get_netflow_version(data)
+    if version == 1:
+        return V1ExportPacket(data)
+    elif version == 5:
+        return V5ExportPacket(data)
+    elif version == 9:
+        return V9ExportPacket(data, templates)
+    raise UnknownNetFlowVersion(data, version)
--- a/netflow/v1.py
+++ b/netflow/v1.py
@ -0,0 +1,74 @@
+#!/usr/bin/env python3
+
+"""
+Netflow V1 collector and parser implementation in Python 3.
+Created purely for fun. Not battled tested nor will it be.
+
+Reference https://www.cisco.com/c/en/us/td/docs/net_mgmt/netflow_collection_engine/3-6/user/guide/format.html
+
+This script is specifically implemented in combination with softflowd.
+See https://github.com/djmdjm/softflowd
+
+"""
+
+import struct
+
+
+class DataFlow:
+    """Holds one v1 DataRecord"""
+
+    length = 48
+
+    def __init__(self, data):
+        self.data = {}
+        self.data['IPV4_SRC_ADDR'] = struct.unpack('!I', data[:4])[0]
+        self.data['IPV4_DST_ADDR'] = struct.unpack('!I', data[4:8])[0]
+        self.data['NEXT_HOP'] = struct.unpack('!I', data[8:12])[0]
+        self.data['INPUT'] = struct.unpack('!H', data[12:14])[0]
+        self.data['OUTPUT'] = struct.unpack('!H', data[14:16])[0]
+        self.data['IN_PACKETS'] = struct.unpack('!I', data[16:20])[0]
+        self.data['IN_OCTETS'] = struct.unpack('!I', data[20:24])[0]
+        self.data['FIRST_SWITCHED'] = struct.unpack('!I', data[24:28])[0]
+        self.data['LAST_SWITCHED'] = struct.unpack('!I', data[28:32])[0]
+        self.data['SRC_PORT'] = struct.unpack('!H', data[32:34])[0]
+        self.data['DST_PORT'] = struct.unpack('!H', data[34:36])[0]
+        # Word at 36 is used for padding
+        self.data['PROTO'] = struct.unpack('!B', data[38:39])[0]
+        self.data['TOS'] = struct.unpack('!B', data[39:40])[0]
+        self.data['TCP_FLAGS'] = struct.unpack('!B', data[40:41])[0]
+        # Data at 41-48 is padding
+
+    def __repr__(self):
+        return "<DataRecord with data {}>".format(self.data)
+
+
+class Header:
+    """The header of the V1ExportPacket"""
+
+    length = 16
+
+    def __init__(self, data):
+        header = struct.unpack('!HHIII', data[:self.length])
+        self.version = header[0]
+        self.count = header[1]
+        self.uptime = header[2]
+        self.timestamp = header[3]
+        self.timestamp_nano = header[4]
+
+
+class V1ExportPacket:
+    """The flow record holds the header and data flowsets."""
+
+    def __init__(self, data):
+        self.flows = []
+        self.header = Header(data)
+
+        offset = self.header.length
+        for flow_count in range(0, self.header.count):
+            flow = V1DataFlow(data[offset:])
+            self.flows.append(flow)
+            offset += flow.length
+
+    def __repr__(self):
+        return "<ExportPacket v{} with {} records>".format(
+                self.header.version, self.header.count)
--- a/netflow/v5.py
+++ b/netflow/v5.py
@ -0,0 +1,82 @@
+#!/usr/bin/env python3
+
+"""
+Netflow V5 collector and parser implementation in Python 3.
+Created purely for fun. Not battled tested nor will it be.
+
+Reference: https://www.cisco.com/c/en/us/td/docs/net_mgmt/netflow_collection_engine/3-6/user/guide/format.html
+
+This script is specifically implemented in combination with softflowd.
+See https://github.com/djmdjm/softflowd
+
+"""
+
+import struct
+
+
+class DataFlow:
+    """Holds one v5 DataRecord"""
+
+    length = 48
+
+    def __init__(self, data):
+        self.data = {}
+        self.data['IPV4_SRC_ADDR'] = struct.unpack('!I', data[:4])[0]
+        self.data['IPV4_DST_ADDR'] = struct.unpack('!I', data[4:8])[0]
+        self.data['NEXT_HOP'] = struct.unpack('!I', data[8:12])[0]
+        self.data['INPUT'] = struct.unpack('!H', data[12:14])[0]
+        self.data['OUTPUT'] = struct.unpack('!H', data[14:16])[0]
+        self.data['IN_PACKETS'] = struct.unpack('!I', data[16:20])[0]
+        self.data['IN_OCTETS'] = struct.unpack('!I', data[20:24])[0]
+        self.data['FIRST_SWITCHED'] = struct.unpack('!I', data[24:28])[0]
+        self.data['LAST_SWITCHED'] = struct.unpack('!I', data[28:32])[0]
+        self.data['SRC_PORT'] = struct.unpack('!H', data[32:34])[0]
+        self.data['DST_PORT'] = struct.unpack('!H', data[34:36])[0]
+        # Byte 36 is used for padding
+        self.data['TCP_FLAGS'] = struct.unpack('!B', data[37:38])[0]
+        self.data['PROTO'] = struct.unpack('!B', data[38:39])[0]
+        self.data['TOS'] = struct.unpack('!B', data[39:40])[0]
+        self.data['SRC_AS'] = struct.unpack('!H', data[40:42])[0]
+        self.data['DST_AS'] = struct.unpack('!H', data[42:44])[0]
+        self.data['SRC_MASK'] = struct.unpack('!B', data[44:45])[0]
+        self.data['DST_MASK'] = struct.unpack('!B', data[45:46])[0]
+        # Word 46 is used for padding
+
+    def __repr__(self):
+        return "<DataRecord with data {}>".format(self.data)
+
+
+class Header:
+    """The header of the V5ExportPacket"""
+
+    length = 24
+
+    def __init__(self, data):
+        header = struct.unpack('!HHIIIIBBH', data[:self.length])
+        self.version = header[0]
+        self.count = header[1]
+        self.uptime = header[2]
+        self.timestamp = header[3]
+        self.timestamp_nano = header[4]
+        self.sequence = header[5]
+        self.engine_type = header[6]
+        self.engine_id = header[7]
+        self.sampling_interval = header[8]
+
+
+class V5ExportPacket:
+    """The flow record holds the header and data flowsets."""
+
+    def __init__(self, data):
+        self.flows = []
+        self.header = Header(data)
+
+        offset = self.header.length
+        for flow_count in range(0, self.header.count):
+            flow = DataFlow(data[offset:])
+            self.flows.append(flow)
+            offset += flow.length
+
+    def __repr__(self):
+        return "<ExportPacket v{} with {} records>".format(
+                self.header.version, self.header.count)
--- a/netflow/v9.py
+++ b/netflow/v9.py
@ -4,6 +4,8 @@
 Netflow V9 collector and parser implementation in Python 3.
 Created for learning purposes and unsatisfying alternatives.

+Reference: https://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html
+
 This script is specifically implemented in combination with softflowd.
 See https://github.com/djmdjm/softflowd

@ -258,7 +260,7 @@ class TemplateFlowSet:
        offset = 4  # Skip header

        # Iterate through all template records in this template flowset
-        while offset != self.length:
+        while offset < self.length:
            pack = struct.unpack('!HH', data[offset:offset+4])
            template_id = pack[0]
            field_count = pack[1]
@ -288,9 +290,12 @@ class TemplateFlowSet:


 class Header:
-    """The header of the ExportPacket."""
+    """The header of the V9ExportPacket"""
+
+    length = 20
+
    def __init__(self, data):
-        pack = struct.unpack('!HHIIII', data[:20])
+        pack = struct.unpack('!HHIIII', data[:self.length])

        self.version = pack[0]
        self.count = pack[1]  # not sure if correct. softflowd: no of flows
@ -300,15 +305,16 @@ class Header:
        self.source_id = pack[5]


-class ExportPacket:
+class V9ExportPacket:
    """The flow record holds the header and all template and data flowsets."""
+
    def __init__(self, data, templates):
        self.header = Header(data)
        self.templates = templates
        self._new_templates = False
        self.flows = []

-        offset = 20
+        offset = self.header.length
        while offset != len(data):
            flowset_id = struct.unpack('!H', data[offset:offset+2])[0]
            if flowset_id == 0:  # TemplateFlowSet always have id 0
@ -331,5 +337,6 @@ class ExportPacket:
        return self._new_templates

    def __repr__(self):
-        return "<ExportPacket version {} counting {} records>".format(
-            self.header.version, self.header.count)
+        s = " and new template(s)" if self.contains_new_templates else ""
+        return "<ExportPacket v{} with {} records{}>".format(
+            self.header.version, self.header.count, s)