Merge branch 'props-master'

Merging pull request #9 by @pR0Ps
https://github.com/bitkeks/python-netflow-v9-softflowd/pull/9

Thanks for the contribution!

Resolves #9
This commit is contained in:
Dominik Pataky 2019-10-31 18:02:06 +01:00
commit 3dee135a22
10 changed files with 572 additions and 291 deletions

View file

@ -1,5 +1,5 @@
# Python NetFlow v9 parser and UDP collector
This script is able to parse incoming UDP NetFlow packets of **NetFlow version 9**.
This script is able to collect and parse incoming UDP NetFlow packets of **NetFlow versions 1, 5 and 9**.
Version 9 is the first NetFlow version using templates.
Templates make dynamically sized and configured NetFlow data flowsets possible,

138
analyze_json.py Normal file → Executable file
View file

@ -8,33 +8,30 @@ Copyright 2017-2019 Dominik Pataky <dev@bitkeks.eu>
Licensed under MIT License. See LICENSE.
"""
import argparse
from collections import namedtuple
import contextlib
from datetime import datetime
import functools
import ipaddress
import json
import os.path
import sys
import socket
from collections import namedtuple
import sys
Pair = namedtuple('Pair', 'src dest')
def getIPs(flow):
use_ipv4 = False # optimistic default case of IPv6
Pair = namedtuple('Pair', ['src', 'dest'])
if 'IP_PROTOCOL_VERSION' in flow and flow['IP_PROTOCOL_VERSION'] == 4:
use_ipv4 = True
elif 'IPV4_SRC_ADDR' in flow or 'IPV4_DST_ADDR' in flow:
use_ipv4 = True
if use_ipv4:
return Pair(
ipaddress.ip_address(flow['IPV4_SRC_ADDR']),
ipaddress.ip_address(flow['IPV4_DST_ADDR']))
@functools.lru_cache(maxsize=128)
def resolve_hostname(ip):
return socket.getfqdn(ip)
# else: return IPv6 pair
return Pair(
ipaddress.ip_address(flow['IPV6_SRC_ADDR']),
ipaddress.ip_address(flow['IPV6_DST_ADDR']))
def fallback(d, keys):
for k in keys:
if k in d:
return d[k]
raise KeyError(", ".join(keys))
class Connection:
@ -42,33 +39,62 @@ class Connection:
The direction of the data flow can be seen by looking at the size.
'src' describes the peer which sends more data towards the other. This
does NOT have to mean, that 'src' was the initiator of the connection.
does NOT have to mean that 'src' was the initiator of the connection.
"""
def __init__(self, flow1, flow2):
if flow1['IN_BYTES'] >= flow2['IN_BYTES']:
if not flow1 or not flow2:
raise Exception("A connection requires two flows")
# Assume the size that sent the most data is the source
# TODO: this might not always be right, maybe use earlier timestamp?
size1 = fallback(flow1, ['IN_BYTES', 'IN_OCTETS'])
size2 = fallback(flow2, ['IN_BYTES', 'IN_OCTETS'])
if size1 >= size2:
src = flow1
dest = flow2
else:
src = flow2
dest = flow1
ips = getIPs(src)
ips = self.get_ips(src)
self.src = ips.src
self.dest = ips.dest
self.src_port = src['L4_SRC_PORT']
self.dest_port = src['L4_DST_PORT']
self.size = src['IN_BYTES']
self.src_port = fallback(src, ['L4_SRC_PORT', 'SRC_PORT'])
self.dest_port = fallback(dest, ['L4_DST_PORT', 'DST_PORT'])
self.size = fallback(src, ['IN_BYTES', 'IN_OCTETS'])
# Duration is given in milliseconds
self.duration = src['LAST_SWITCHED'] - src['FIRST_SWITCHED']
if self.duration < 0:
# 32 bit int has its limits. Handling overflow here
# TODO: Should be handled in the collection phase
self.duration = (2**32 - src['FIRST_SWITCHED']) + src['LAST_SWITCHED']
def __repr__(self):
return "<Connection from {} to {}, size {}>".format(
self.src, self.dest, self.human_size)
@staticmethod
def get_ips(flow):
# TODO: These values should be parsed into strings in the collection phase.
# The floating point representation of an IPv6 address in JSON
# could lose precision.
# IPv4
if flow.get('IP_PROTOCOL_VERSION') == 4 \
or 'IPV4_SRC_ADDR' in flow \
or 'IPV4_DST_ADDR' in flow:
return Pair(
ipaddress.ip_address(flow['IPV4_SRC_ADDR']),
ipaddress.ip_address(flow['IPV4_DST_ADDR'])
)
# IPv6
return Pair(
ipaddress.ip_address(flow['IPV6_SRC_ADDR']),
ipaddress.ip_address(flow['IPV6_DST_ADDR'])
)
@property
def human_size(self):
# Calculate a human readable size of the traffic
@ -96,52 +122,42 @@ class Connection:
@property
def hostnames(self):
# Resolve the IPs of this flows to their hostname
src_hostname = socket.getfqdn(self.src.compressed)
dest_hostname = socket.getfqdn(self.dest.compressed)
src_hostname = resolve_hostname(self.src.compressed)
dest_hostname = resolve_hostname(self.dest.compressed)
return Pair(src_hostname, dest_hostname)
@property
def service(self):
# Resolve ports to their services, if known
service = "unknown"
try:
# Try service of sending peer first
service = socket.getservbyport(self.src_port)
except OSError:
# Resolving the sport did not work, trying dport
try:
service = socket.getservbyport(self.dest_port)
except OSError:
pass
return service
# Try source port, fallback to dest port, otherwise "unknown"
with contextlib.suppress(OSError):
return socket.getservbyport(self.src_port)
with contextlib.suppress(OSError):
return socket.getservbyport(self.dest_port)
return "unknown"
# Handle CLI args and load the data dump
if len(sys.argv) < 2:
exit("Use {} <filename>.json".format(sys.argv[0]))
filename = sys.argv[1]
if not os.path.exists(filename):
exit("File {} does not exist!".format(filename))
with open(filename, 'r') as fh:
data = json.loads(fh.read())
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Output a basic analysis of NetFlow data")
parser.add_argument('filename', nargs='?', type=argparse.FileType('r'),
default=sys.stdin,
help="The file to analyze (defaults to stdin if not provided)")
args = parser.parse_args()
data = json.load(args.filename)
# Go through data and disect every flow saved inside the dump
for export in sorted(data):
timestamp = datetime.fromtimestamp(float(export)).strftime("%Y-%m-%d %H:%M.%S")
# Go through data and disect every flow saved inside the dump
for key in sorted(data):
timestamp = datetime.fromtimestamp(float(key)).strftime("%Y-%m-%d %H:%M.%S")
flows = data[export]
pending = None # Two flows normally appear together for duplex connection
for flow in flows:
if not pending:
pending = flow
else:
flows = data[key]
pending = None # Two flows normally appear together for duplex connection
for flow in flows:
if not pending:
pending = flow
continue
con = Connection(pending, flow)
print("{timestamp}: {service:7} | {size:8} | {duration:9} | {src_host} ({src}) to"\
" {dest_host} ({dest})".format(
timestamp=timestamp, service=con.service.upper(),
src_host=con.hostnames.src, src=con.src,
dest_host=con.hostnames.dest, dest=con.dest,
size=con.human_size, duration=con.human_duration))
print("{timestamp}: {service:7} | {size:8} | {duration:9} | {src_host} ({src}) to {dest_host} ({dest})" \
.format(timestamp=timestamp, service=con.service.upper(), src_host=con.hostnames.src, src=con.src,
dest_host=con.hostnames.dest, dest=con.dest, size=con.human_size, duration=con.human_duration))
pending = None

257
main.py Normal file → Executable file
View file

@ -1,140 +1,201 @@
#!/usr/bin/env python3
"""
Example collector script for NetFlow v9.
Example collector script for NetFlow v1, v5, and v9.
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2017-2019 Dominik Pataky <dev@bitkeks.eu>
Licensed under MIT License. See LICENSE.
"""
import logging
import argparse
from collections import namedtuple
import queue
import json
import logging
import sys
import socketserver
import threading
import time
import json
import os.path
from netflow import parse_packet, TemplateNotRecognized, UnknownNetFlowVersion
logging.getLogger().setLevel(logging.INFO)
ch = logging.StreamHandler(sys.stdout)
ch.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
logging.getLogger().addHandler(ch)
logger = logging.getLogger(__name__)
try:
from netflow.collector_v9 import ExportPacket, TemplateNotRecognized
except ImportError:
logging.warning("Netflow v9 not installed as package! Running from directory.")
from src.netflow.collector_v9 import ExportPacket, TemplateNotRecognized
# Amount of time to wait before dropping an undecodable ExportPacket
PACKET_TIMEOUT = 60 * 60
parser = argparse.ArgumentParser(description="A sample netflow collector.")
parser.add_argument("--host", type=str, default="",
help="collector listening address")
parser.add_argument("--port", "-p", type=int, default=2055,
help="collector listener port")
parser.add_argument("--file", "-o", type=str, dest="output_file",
default="{}.json".format(int(time.time())),
help="collector export JSON file")
parser.add_argument("--debug", "-D", action="store_true",
help="Enable debug output")
# TODO: Add source IP
RawPacket = namedtuple('RawPacket', ['ts', 'data'])
class SoftflowUDPHandler(socketserver.BaseRequestHandler):
# We need to save the templates our NetFlow device
# send over time. Templates are not resended every
# time a flow is sent to the collector.
templates = {}
buffered = {}
@classmethod
def set_output_file(cls, path):
cls.output_file = path
class QueuingRequestHandler(socketserver.BaseRequestHandler):
def handle(self):
if not os.path.exists(self.output_file):
with open(self.output_file, 'w') as fh:
json.dump({}, fh)
with open(self.output_file, 'r') as fh:
try:
existing_data = json.load(fh)
except json.decoder.JSONDecodeError as ex:
logging.error("Malformed JSON output file. Cannot read existing data, aborting.")
return
data = self.request[0]
host = self.client_address[0]
logging.debug("Received data from {}, length {}".format(host, len(data)))
self.server.queue.put(RawPacket(time.time(), data))
logger.debug(
"Received %d bytes of data from %s", len(data), self.client_address[0]
)
export = None
class QueuingUDPListener(socketserver.ThreadingUDPServer):
"""A threaded UDP server that adds a (time, data) tuple to a queue for
every request it sees
"""
def __init__(self, interface, queue):
self.queue = queue
super().__init__(interface, QueuingRequestHandler)
class NetFlowListener(threading.Thread):
"""A thread that listens for incoming NetFlow packets, processes them, and
makes them available to consumers.
- When initialized, will start listening for NetFlow packets on the provided
host and port and queuing them for processing.
- When started, will start processing and parsing queued packets.
- When stopped, will shut down the listener and stop processing.
- When joined, will wait for the listener to exit
For example, a simple script that outputs data until killed with CTRL+C:
>>> listener = NetFlowListener('0.0.0.0', 2055)
>>> print("Listening for NetFlow packets")
>>> listener.start() # start processing packets
>>> try:
... while True:
... ts, export = listener.get()
... print("Time: {}".format(ts))
... for f in export.flows:
... print(" - {IPV4_SRC_ADDR} sent data to {IPV4_DST_ADDR}"
... "".format(**f))
... finally:
... print("Stopping...")
... listener.stop()
... listener.join()
... print("Stopped!")
"""
def __init__(self, host, port):
logger.info("Starting the NetFlow listener on {}:{}".format(host, port))
self.output = queue.Queue()
self.input = queue.Queue()
self.server = QueuingUDPListener((host, port), self.input)
self.thread = threading.Thread(target=self.server.serve_forever)
self.thread.start()
self._shutdown = threading.Event()
super().__init__()
def get(self, block=True, timeout=None):
"""Get a processed flow.
If optional args 'block' is true and 'timeout' is None (the default),
block if necessary until a flow is available. If 'timeout' is
a non-negative number, it blocks at most 'timeout' seconds and raises
the queue.Empty exception if no flow was available within that time.
Otherwise ('block' is false), return a flow if one is immediately
available, else raise the queue.Empty exception ('timeout' is ignored
in that case).
"""
return self.output.get(block, timeout)
def run(self):
# Process packets from the queue
try:
export = ExportPacket(data, self.templates)
except TemplateNotRecognized:
self.buffered[time.time()] = data
logging.warning("Received data with unknown template, data stored in buffer!")
return
templates = {}
to_retry = []
while not self._shutdown.is_set():
try:
# 0.5s delay to limit CPU usage while waiting for new packets
pkt = self.input.get(block=True, timeout=0.5)
except queue.Empty:
continue
if not export:
logging.error("Error with exception handling while disecting export, export is None")
return
try:
export = parse_packet(pkt.data, templates)
except UnknownNetFlowVersion as e:
logger.error("%s, ignoring the packet", e)
continue
except TemplateNotRecognized:
if time.time() - pkt.ts > PACKET_TIMEOUT:
logger.warning("Dropping an old and undecodable v9 ExportPacket")
else:
to_retry.append(pkt)
logger.debug("Failed to decode a v9 ExportPacket - will "
"re-attempt when a new template is discovered")
continue
logging.debug("Processed ExportPacket with {} flows.".format(export.header.count))
logging.debug("Size of buffer: {}".format(len(self.buffered)))
logger.debug("Processed a v%d ExportPacket with %d flows.",
export.header.version, export.header.count)
# In case the export held some new templates
self.templates.update(export.templates)
# If any new templates were discovered, dump the unprocessable
# data back into the queue and try to decode them again
if (export.header.version == 9 and export.contains_new_templates and to_retry):
logger.debug("Received new template(s)")
logger.debug("Will re-attempt to decode %d old v9 ExportPackets",
len(to_retry))
for p in to_retry:
self.input.put(p)
to_retry.clear()
remain_buffered = {}
processed = []
for timestamp, data in self.buffered.items():
try:
buffered_export = ExportPacket(data, self.templates)
processed.append(timestamp)
except TemplateNotRecognized:
remain_buffered[timestamp] = data
logging.debug("Template of buffered ExportPacket still not recognized")
continue
logging.debug("Processed buffered ExportPacket with {} flows.".format(buffered_export.header.count))
existing_data[timestamp] = [flow.data for flow in buffered_export.flows]
self.output.put((pkt.ts, export))
finally:
self.server.shutdown()
self.server.server_close()
# Delete processed items from the buffer
for pro in processed:
del self.buffered[pro]
def stop(self):
logger.info("Shutting down the NetFlow listener")
self._shutdown.set()
# Update the buffer
self.buffered.update(remain_buffered)
def join(self, timeout=None):
self.thread.join(timeout=timeout)
super().join(timeout=timeout)
# Append new flows
existing_data[time.time()] = [flow.data for flow in export.flows]
with open(self.output_file, 'w') as fh:
json.dump(existing_data, fh)
def get_export_packets(host, port):
"""A generator that will yield ExportPacket objects until it is killed"""
listener = NetFlowListener(host, port)
listener.start()
try:
while True:
yield listener.get()
finally:
listener.stop()
listener.join()
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="A sample netflow collector.")
parser.add_argument("--host", type=str, default="0.0.0.0",
help="collector listening address")
parser.add_argument("--port", "-p", type=int, default=2055,
help="collector listener port")
parser.add_argument("--file", "-o", type=str, dest="output_file",
default="{}.json".format(int(time.time())),
help="collector export JSON file")
parser.add_argument("--debug", "-D", action="store_true",
help="Enable debug output")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO, stream=sys.stdout, format="%(message)s")
if args.debug:
logging.getLogger().setLevel(logging.DEBUG)
output_file = args.output_file
SoftflowUDPHandler.set_output_file(output_file)
host = args.host
port = args.port
logging.info("Listening on interface {}:{}".format(host, port))
server = socketserver.UDPServer((host, port), SoftflowUDPHandler)
logger.setLevel(logging.DEBUG)
data = {}
try:
logging.debug("Starting the NetFlow listener")
server.serve_forever(poll_interval=0.5)
except (IOError, SystemExit):
raise
# TODO: For a long-running processes, this will consume loads of memory
for ts, export in get_export_packets(args.host, args.port):
data[ts] = [flow.data for flow in export.flows]
except KeyboardInterrupt:
raise
logger.info("Received KeyboardInterrupt, passing through")
pass
server.server_close()
if data:
# TODO: this should be done periodically to not lose any data (only saved in memory)
logger.info("Outputting collected data to '%s'", args.output_file)
with open(args.output_file, 'w') as f:
json.dump(data, f)
else:
logger.info("No data collected")

35
netflow/__init__.py Normal file
View file

@ -0,0 +1,35 @@
#!/usr/bin/env python
import struct
from netflow.v1 import V1ExportPacket
from netflow.v5 import V5ExportPacket
from netflow.v9 import V9ExportPacket, TemplateNotRecognized
__all__ = ["TemplateNotRecognized", "UnknownNetFlowVersion", "parse_packet"]
class UnknownNetFlowVersion(Exception):
def __init__(self, data, version):
self.data = data
self.version = version
r = repr(data)
data_str = ("{:.25}..." if len(r) >= 28 else "{}").format(r)
super().__init__(
"Unknown NetFlow version {} for data {}".format(version, data_str)
)
def get_netflow_version(data):
return struct.unpack('!H', data[:2])[0]
def parse_packet(data, templates):
version = get_netflow_version(data)
if version == 1:
return V1ExportPacket(data)
elif version == 5:
return V5ExportPacket(data)
elif version == 9:
return V9ExportPacket(data, templates)
raise UnknownNetFlowVersion(data, version)

74
netflow/v1.py Normal file
View file

@ -0,0 +1,74 @@
#!/usr/bin/env python3
"""
Netflow V1 collector and parser implementation in Python 3.
Created purely for fun. Not battled tested nor will it be.
Reference https://www.cisco.com/c/en/us/td/docs/net_mgmt/netflow_collection_engine/3-6/user/guide/format.html
This script is specifically implemented in combination with softflowd.
See https://github.com/djmdjm/softflowd
"""
import struct
class DataFlow:
"""Holds one v1 DataRecord"""
length = 48
def __init__(self, data):
self.data = {}
self.data['IPV4_SRC_ADDR'] = struct.unpack('!I', data[:4])[0]
self.data['IPV4_DST_ADDR'] = struct.unpack('!I', data[4:8])[0]
self.data['NEXT_HOP'] = struct.unpack('!I', data[8:12])[0]
self.data['INPUT'] = struct.unpack('!H', data[12:14])[0]
self.data['OUTPUT'] = struct.unpack('!H', data[14:16])[0]
self.data['IN_PACKETS'] = struct.unpack('!I', data[16:20])[0]
self.data['IN_OCTETS'] = struct.unpack('!I', data[20:24])[0]
self.data['FIRST_SWITCHED'] = struct.unpack('!I', data[24:28])[0]
self.data['LAST_SWITCHED'] = struct.unpack('!I', data[28:32])[0]
self.data['SRC_PORT'] = struct.unpack('!H', data[32:34])[0]
self.data['DST_PORT'] = struct.unpack('!H', data[34:36])[0]
# Word at 36 is used for padding
self.data['PROTO'] = struct.unpack('!B', data[38:39])[0]
self.data['TOS'] = struct.unpack('!B', data[39:40])[0]
self.data['TCP_FLAGS'] = struct.unpack('!B', data[40:41])[0]
# Data at 41-48 is padding
def __repr__(self):
return "<DataRecord with data {}>".format(self.data)
class Header:
"""The header of the V1ExportPacket"""
length = 16
def __init__(self, data):
header = struct.unpack('!HHIII', data[:self.length])
self.version = header[0]
self.count = header[1]
self.uptime = header[2]
self.timestamp = header[3]
self.timestamp_nano = header[4]
class V1ExportPacket:
"""The flow record holds the header and data flowsets."""
def __init__(self, data):
self.flows = []
self.header = Header(data)
offset = self.header.length
for flow_count in range(0, self.header.count):
flow = V1DataFlow(data[offset:])
self.flows.append(flow)
offset += flow.length
def __repr__(self):
return "<ExportPacket v{} with {} records>".format(
self.header.version, self.header.count)

82
netflow/v5.py Normal file
View file

@ -0,0 +1,82 @@
#!/usr/bin/env python3
"""
Netflow V5 collector and parser implementation in Python 3.
Created purely for fun. Not battled tested nor will it be.
Reference: https://www.cisco.com/c/en/us/td/docs/net_mgmt/netflow_collection_engine/3-6/user/guide/format.html
This script is specifically implemented in combination with softflowd.
See https://github.com/djmdjm/softflowd
"""
import struct
class DataFlow:
"""Holds one v5 DataRecord"""
length = 48
def __init__(self, data):
self.data = {}
self.data['IPV4_SRC_ADDR'] = struct.unpack('!I', data[:4])[0]
self.data['IPV4_DST_ADDR'] = struct.unpack('!I', data[4:8])[0]
self.data['NEXT_HOP'] = struct.unpack('!I', data[8:12])[0]
self.data['INPUT'] = struct.unpack('!H', data[12:14])[0]
self.data['OUTPUT'] = struct.unpack('!H', data[14:16])[0]
self.data['IN_PACKETS'] = struct.unpack('!I', data[16:20])[0]
self.data['IN_OCTETS'] = struct.unpack('!I', data[20:24])[0]
self.data['FIRST_SWITCHED'] = struct.unpack('!I', data[24:28])[0]
self.data['LAST_SWITCHED'] = struct.unpack('!I', data[28:32])[0]
self.data['SRC_PORT'] = struct.unpack('!H', data[32:34])[0]
self.data['DST_PORT'] = struct.unpack('!H', data[34:36])[0]
# Byte 36 is used for padding
self.data['TCP_FLAGS'] = struct.unpack('!B', data[37:38])[0]
self.data['PROTO'] = struct.unpack('!B', data[38:39])[0]
self.data['TOS'] = struct.unpack('!B', data[39:40])[0]
self.data['SRC_AS'] = struct.unpack('!H', data[40:42])[0]
self.data['DST_AS'] = struct.unpack('!H', data[42:44])[0]
self.data['SRC_MASK'] = struct.unpack('!B', data[44:45])[0]
self.data['DST_MASK'] = struct.unpack('!B', data[45:46])[0]
# Word 46 is used for padding
def __repr__(self):
return "<DataRecord with data {}>".format(self.data)
class Header:
"""The header of the V5ExportPacket"""
length = 24
def __init__(self, data):
header = struct.unpack('!HHIIIIBBH', data[:self.length])
self.version = header[0]
self.count = header[1]
self.uptime = header[2]
self.timestamp = header[3]
self.timestamp_nano = header[4]
self.sequence = header[5]
self.engine_type = header[6]
self.engine_id = header[7]
self.sampling_interval = header[8]
class V5ExportPacket:
"""The flow record holds the header and data flowsets."""
def __init__(self, data):
self.flows = []
self.header = Header(data)
offset = self.header.length
for flow_count in range(0, self.header.count):
flow = DataFlow(data[offset:])
self.flows.append(flow)
offset += flow.length
def __repr__(self):
return "<ExportPacket v{} with {} records>".format(
self.header.version, self.header.count)

View file

@ -4,6 +4,8 @@
Netflow V9 collector and parser implementation in Python 3.
Created for learning purposes and unsatisfying alternatives.
Reference: https://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html
This script is specifically implemented in combination with softflowd.
See https://github.com/djmdjm/softflowd
@ -11,12 +13,10 @@ Copyright 2017, 2018 Dominik Pataky <dev@bitkeks.eu>
Licensed under MIT License. See LICENSE.
"""
import socket
import struct
import sys
field_types = {
FIELD_TYPES = {
0: 'UNKNOWN_FIELD_TYPE', # fallback for unknown field types
# Cisco specs for NetFlow v9
@ -153,10 +153,14 @@ field_types = {
}
class TemplateNotRecognized(KeyError):
pass
class DataRecord:
"""This is a 'flow' as we want it from our source. What it contains is
variable in NetFlow V9, so to work with the data you have to analyze the
data dict keys (which are integers and can be mapped with the field_types
data dict keys (which are integers and can be mapped with the FIELD_TYPES
dict).
Should hold a 'data' dict with keys=field_type (integer) and value (in bytes).
@ -195,7 +199,7 @@ class DataFlowSet:
for field in template.fields:
flen = field.field_length
fkey = field_types[field.field_type]
fkey = FIELD_TYPES[field.field_type]
fdata = None
# The length of the value byte slice is defined in the template
@ -218,20 +222,18 @@ class DataFlowSet:
class TemplateField:
"""A field with type identifier and length.
"""
"""A field with type identifier and length."""
def __init__(self, field_type, field_length):
self.field_type = field_type # integer
self.field_length = field_length # bytes
def __repr__(self):
return "<TemplateField type {}:{}, length {}>".format(
self.field_type, field_types[self.field_type], self.field_length)
self.field_type, FIELD_TYPES[self.field_type], self.field_length)
class TemplateRecord:
"""A template record contained in a TemplateFlowSet.
"""
"""A template record contained in a TemplateFlowSet."""
def __init__(self, template_id, field_count, fields):
self.template_id = template_id
self.field_count = field_count
@ -240,7 +242,7 @@ class TemplateRecord:
def __repr__(self):
return "<TemplateRecord {} with {} fields: {}>".format(
self.template_id, self.field_count,
' '.join([field_types[field.field_type] for field in self.fields]))
' '.join([FIELD_TYPES[field.field_type] for field in self.fields]))
class TemplateFlowSet:
@ -258,7 +260,7 @@ class TemplateFlowSet:
offset = 4 # Skip header
# Iterate through all template records in this template flowset
while offset != self.length:
while offset < self.length:
pack = struct.unpack('!HH', data[offset:offset+4])
template_id = pack[0]
field_count = pack[1]
@ -268,7 +270,7 @@ class TemplateFlowSet:
# Get all fields of this template
offset += 4
field_type, field_length = struct.unpack('!HH', data[offset:offset+4])
if field_type not in field_types:
if field_type not in FIELD_TYPES:
field_type = 0 # Set field_type to UNKNOWN_FIELD_TYPE as fallback
field = TemplateField(field_type, field_length)
fields.append(field)
@ -288,10 +290,12 @@ class TemplateFlowSet:
class Header:
"""The header of the ExportPacket.
"""
"""The header of the V9ExportPacket"""
length = 20
def __init__(self, data):
pack = struct.unpack('!HHIIII', data[:20])
pack = struct.unpack('!HHIIII', data[:self.length])
self.version = pack[0]
self.count = pack[1] # not sure if correct. softflowd: no of flows
@ -301,19 +305,26 @@ class Header:
self.source_id = pack[5]
class ExportPacket:
"""The flow record holds the header and all template and data flowsets.
"""
class V9ExportPacket:
"""The flow record holds the header and all template and data flowsets."""
def __init__(self, data, templates):
self.header = Header(data)
self.templates = templates
self._new_templates = False
self.flows = []
offset = 20
offset = self.header.length
while offset != len(data):
flowset_id = struct.unpack('!H', data[offset:offset+2])[0]
if flowset_id == 0: # TemplateFlowSet always have id 0
tfs = TemplateFlowSet(data[offset:])
# Check for any new/changed templates
if not self._new_templates:
for id_, template in tfs.templates.items():
if id_ not in self.templates or self.templates[id_] != template:
self._new_templates = True
break
self.templates.update(tfs.templates)
offset += tfs.length
else:
@ -321,10 +332,11 @@ class ExportPacket:
self.flows += dfs.flows
offset += dfs.length
@property
def contains_new_templates(self):
return self._new_templates
def __repr__(self):
return "<ExportPacket version {} counting {} records>".format(
self.header.version, self.header.count)
class TemplateNotRecognized(KeyError):
pass
s = " and new template(s)" if self.contains_new_templates else ""
return "<ExportPacket v{} with {} records{}>".format(
self.header.version, self.header.count, s)

View file

@ -1,17 +1,12 @@
#!/usr/bin/env python3
from setuptools import setup, find_packages
import os
from setuptools import setup
data_files = [(d, [os.path.join(d, f) for f in files])
for d, folders, files in os.walk(os.path.join('src', 'config'))]
setup(name='netflow-v9',
version='0.7.0',
description='NetFlow v9 parser and collector implemented in Python 3. Developed to be used with softflowd v0.9.9',
setup(name='netflow',
version='0.8.0',
description='NetFlow v1, v5, and v9 collector, parser and analyzer implemented in Python 3.',
author='Dominik Pataky',
author_email='dev@bitkeks.eu',
packages=find_packages('src'),
package_dir={'': 'src'},
packages=["netflow"],
license='MIT'
)

196
tests.py Normal file → Executable file
View file

@ -1,144 +1,150 @@
#!/usr/bin/env python3
"""
This file contains tests for the softflowd UDP collector saved in main.py
The test packets (defined below as hex streams) were extracted from a "real" softflowd export
based on a sample PCAP capture file. They consist of one export with the templates and three without.
Two tests are defined, one slow, one fast. During some runs exceptions occured which might hint
to race conditions during reading and writing to the JSON output file.
For now, both tests run successfully.
This file contains tests for the softflowd UDP collector saved in main.py The
test packets (defined below as hex streams) were extracted from a "real"
softflowd export based on a sample PCAP capture file. They consist of one
export with the templates and three without.
Copyright 2017-2019 Dominik Pataky <dev@bitkeks.eu>
Licensed under MIT License. See LICENSE.
"""
import ipaddress
import json
import logging
from pprint import pprint
import queue
import random
import socket
import socketserver
import subprocess
import tempfile
from time import sleep
import threading
import sys
import time
import unittest
from main import SoftflowUDPHandler
from main import NetFlowListener
logging.getLogger().setLevel(logging.DEBUG)
# TODO: add tests for v1 and v5
# TODO: tests with 500 packets fail?
# The flowset with 2 templates and 8 flows
template_packet = '0009000a000000035c9f55980000000100000000000000400400000e00080004000c000400150004001600040001000400020004000a0004000e000400070002000b00020004000100060001003c000100050001000000400800000e001b0010001c001000150004001600040001000400020004000a0004000e000400070002000b00020004000100060001003c000100050001040001447f0000017f000001fb3c1aaafb3c18fd000190100000004b00000000000000000050942c061b04007f0000017f000001fb3c1aaafb3c18fd00000f94000000360000000000000000942c0050061f04007f0000017f000001fb3c1cfcfb3c1a9b0000d3fc0000002a000000000000000000509434061b04007f0000017f000001fb3c1cfcfb3c1a9b00000a490000001e000000000000000094340050061f04007f0000017f000001fb3bb82cfb3ba48b000002960000000300000000000000000050942a061904007f0000017f000001fb3bb82cfb3ba48b00000068000000020000000000000000942a0050061104007f0000017f000001fb3c1900fb3c18fe0000004c0000000100000000000000000035b3c9110004007f0000017f000001fb3c1900fb3c18fe0000003c000000010000000000000000b3c9003511000400'
TEMPLATE_PACKET = '0009000a000000035c9f55980000000100000000000000400400000e00080004000c000400150004001600040001000400020004000a0004000e000400070002000b00020004000100060001003c000100050001000000400800000e001b0010001c001000150004001600040001000400020004000a0004000e000400070002000b00020004000100060001003c000100050001040001447f0000017f000001fb3c1aaafb3c18fd000190100000004b00000000000000000050942c061b04007f0000017f000001fb3c1aaafb3c18fd00000f94000000360000000000000000942c0050061f04007f0000017f000001fb3c1cfcfb3c1a9b0000d3fc0000002a000000000000000000509434061b04007f0000017f000001fb3c1cfcfb3c1a9b00000a490000001e000000000000000094340050061f04007f0000017f000001fb3bb82cfb3ba48b000002960000000300000000000000000050942a061904007f0000017f000001fb3bb82cfb3ba48b00000068000000020000000000000000942a0050061104007f0000017f000001fb3c1900fb3c18fe0000004c0000000100000000000000000035b3c9110004007f0000017f000001fb3c1900fb3c18fe0000003c000000010000000000000000b3c9003511000400'
# Three packets without templates, each with 12 flows, anonymized
packets = [
PACKETS = [
'0009000c000000035c9f55980000000200000000040001e47f0000017f000001fb3c1a17fb3c19fd000001480000000200000000000000000035ea82110004007f0000017f000001fb3c1a17fb3c19fd0000007a000000020000000000000000ea820035110004007f0000017f000001fb3c1a17fb3c19fd000000f80000000200000000000000000035c6e2110004007f0000017f000001fb3c1a17fb3c19fd0000007a000000020000000000000000c6e20035110004007f0000017f000001fb3c1a9efb3c1a9c0000004c0000000100000000000000000035adc1110004007f0000017f000001fb3c1a9efb3c1a9c0000003c000000010000000000000000adc10035110004007f0000017f000001fb3c1b74fb3c1b720000004c0000000100000000000000000035d0b3110004007f0000017f000001fb3c1b74fb3c1b720000003c000000010000000000000000d0b30035110004007f0000017f000001fb3c2f59fb3c1b7100001a350000000a000000000000000000509436061b04007f0000017f000001fb3c2f59fb3c1b710000038a0000000a000000000000000094360050061b04007f0000017f000001fb3c913bfb3c91380000004c0000000100000000000000000035e262110004007f0000017f000001fb3c913bfb3c91380000003c000000010000000000000000e262003511000400',
'0009000c000000035c9f55980000000300000000040001e47f0000017f000001fb3ca523fb3c913b0000030700000005000000000000000000509438061b04007f0000017f000001fb3ca523fb3c913b000002a200000005000000000000000094380050061b04007f0000017f000001fb3f7fe1fb3dbc970002d52800000097000000000000000001bb8730061b04007f0000017f000001fb3f7fe1fb3dbc970000146c000000520000000000000000873001bb061f04007f0000017f000001fb3d066ffb3d066c0000004c0000000100000000000000000035e5bd110004007f0000017f000001fb3d066ffb3d066c0000003c000000010000000000000000e5bd0035110004007f0000017f000001fb3d1a61fb3d066b000003060000000500000000000000000050943a061b04007f0000017f000001fb3d1a61fb3d066b000002a2000000050000000000000000943a0050061b04007f0000017f000001fb3fed00fb3f002c0000344000000016000000000000000001bbae50061f04007f0000017f000001fb3fed00fb3f002c00000a47000000120000000000000000ae5001bb061b04007f0000017f000001fb402f17fb402a750003524c000000a5000000000000000001bbc48c061b04007f0000017f000001fb402f17fb402a75000020a60000007e0000000000000000c48c01bb061f0400',
'0009000c000000035c9f55980000000400000000040001e47f0000017f000001fb3d7ba2fb3d7ba00000004c0000000100000000000000000035a399110004007f0000017f000001fb3d7ba2fb3d7ba00000003c000000010000000000000000a3990035110004007f0000017f000001fb3d8f85fb3d7b9f000003070000000500000000000000000050943c061b04007f0000017f000001fb3d8f85fb3d7b9f000002a2000000050000000000000000943c0050061b04007f0000017f000001fb3d9165fb3d7f6d0000c97b0000002a000000000000000001bbae48061b04007f0000017f000001fb3d9165fb3d7f6d000007f40000001a0000000000000000ae4801bb061b04007f0000017f000001fb3dbc96fb3dbc7e0000011e0000000200000000000000000035bd4f110004007f0000017f000001fb3dbc96fb3dbc7e0000008e000000020000000000000000bd4f0035110004007f0000017f000001fb3ddbb3fb3c1a180000bfee0000002f00000000000000000050ae56061b04007f0000017f000001fb3ddbb3fb3c1a1800000982000000270000000000000000ae560050061b04007f0000017f000001fb3ddbb3fb3c1a180000130e0000001200000000000000000050e820061b04007f0000017f000001fb3ddbb3fb3c1a180000059c000000140000000000000000e8200050061b0400'
]
INVALID_PACKET = "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
class ThreadedUDPServer(socketserver.ThreadingMixIn, socketserver.UDPServer):
pass
CONNECTION = ('127.0.0.1', 1337)
NUM_PACKETS = 50
class TestSoftflowExport(unittest.TestCase):
CONNECTION = ('127.0.0.1', 1337)
COUNT_PACKETS_TO_TEST = 5
SLEEP_TIME = 0.3
RUN_ANALYZER = False
def setUp(self):
logging.debug("Creating temporary JSON output file.")
self.temp_output_file = tempfile.NamedTemporaryFile(prefix="softflowd_")
def emit_packets(packets, delay=0):
"""Send the provided packets to the listener"""
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
for p in packets:
sock.sendto(bytes.fromhex(p), CONNECTION)
time.sleep(delay)
sock.close()
# FIXME: templates are saved between test runs, because they are stored with the class
# Maybe the templates should be stored with an instance?
logging.debug("Resetting SoftflowUDPHandler templates.")
SoftflowUDPHandler.templates = {}
logging.debug("Setting temporary file {} as output for SoftflowUDPHandler".format(self.temp_output_file.name))
SoftflowUDPHandler.set_output_file(self.temp_output_file.name)
def send_recv_packets(packets, delay=0):
"""Starts a listener, send packets, receives packets
logging.debug("Writing empty dict to output file.")
with open(self.temp_output_file.name, "w") as fh:
json.dump({}, fh)
returns a tuple: ([(ts, export), ...], time_started_sending, time_stopped_sending)
"""
l = NetFlowListener(*CONNECTION)
tstart = time.time()
emit_packets(packets, delay=delay)
time.sleep(0.5) # Allow packets to be sent and recieved
tend = time.time()
l.start()
logging.debug("Creating and running the Softflow collector in another thread.")
self.server = ThreadedUDPServer(self.CONNECTION, SoftflowUDPHandler)
self.server_thread = threading.Thread(target=self.server.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
pkts = []
while True:
try:
pkts.append(l.get(timeout=0.5))
except queue.Empty:
break
l.stop()
l.join()
return pkts, tstart, tend
logging.debug("Creating UDP socket for client packets.")
self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
def tearDown(self):
logging.debug("Running tear down procedure.")
self.server.shutdown()
self.server.server_close()
self.server_thread.join()
self.sock.close()
self.temp_output_file.close()
class TestSoftFlowExport(unittest.TestCase):
def _test_export(self):
logging.info("Running UDP client sending raw hex packets with flows.")
def _test_recv_all_packets(self, num, template_idx, delay=0):
"""Fling packets at the server and test that it receives them all"""
def gen_pkts(n, idx):
for x in range(n):
if x == idx:
yield TEMPLATE_PACKET
else:
yield random.choice(PACKETS)
# Get a random index on which the template is sent
template_idx = random.randint(1, self.COUNT_PACKETS_TO_TEST - 1) # 1 for enhanced testing, -1 because randint
pkts, tstart, tend = send_recv_packets(gen_pkts(num, template_idx), delay=delay)
# Save the order of lengths for later check
lens = []
# check number of packets
self.assertEqual(len(pkts), num)
for idx in range(self.COUNT_PACKETS_TO_TEST):
# Choose a random packet payload
p = random.choice(packets)
# check timestamps are when packets were sent, not processed
self.assertTrue(all(tstart < p[0] < tend for p in pkts))
logging.info("Sending packet {}.".format(idx))
self.sock.sendto(bytes.fromhex(p), self.CONNECTION)
lens.append(12)
sleep(self.SLEEP_TIME)
# check number of "things" in the packets (flows + templates)
# template packet = 10 things
# other packets = 12 things
self.assertEqual(sum(p[1].header.count for p in pkts), (num-1)*12 + 10)
# Randomly inject the template packet
if idx == template_idx:
logging.info("Sending template packet.")
self.sock.sendto(bytes.fromhex(template_packet), self.CONNECTION)
lens.append(8)
sleep(self.SLEEP_TIME)
# check number of flows in the packets
# template packet = 8 flows (2 templates)
# other packets = 12 flows
self.assertEqual(sum(len(p[1].flows) for p in pkts), (num-1)*12 + 8)
with open(self.temp_output_file.name, "r") as fh:
exported = json.load(fh)
def test_recv_all_packets_template_first(self):
"""Test all packets are received when the template is sent first"""
self._test_recv_all_packets(NUM_PACKETS, 0)
# We got four exports
logging.info("Testing the existence of all exports, including the ones with formerly unknown templates: {} of {}".format(
len(exported.keys()), self.COUNT_PACKETS_TO_TEST + 1))
self.assertEqual(len(exported.keys()), self.COUNT_PACKETS_TO_TEST + 1) # +1 including the template packet
def test_recv_all_packets_template_middle(self):
"""Test all packets are received when the template is sent in the middle"""
self._test_recv_all_packets(NUM_PACKETS, NUM_PACKETS//2)
# Test lengths of exports
logging.info("Testing the correct lengths of all exports.")
for idx, val in enumerate(exported.values()):
self.assertEqual(len(val), lens[idx])
def test_recv_all_packets_template_last(self):
"""Test all packets are received when the template is sent last"""
self._test_recv_all_packets(NUM_PACKETS, NUM_PACKETS-1)
if self.RUN_ANALYZER:
logging.info("Running analyze_json.py")
analyzer = subprocess.run(['python3', 'analyze_json.py', self.temp_output_file.name], stdout=subprocess.PIPE)
for line in analyzer.stdout.split(b"\n"):
print(line.decode())
def test_recv_all_packets_slowly(self):
"""Test all packets are received when things are sent slooooowwwwwwwwlllllllyyyyyy"""
self._test_recv_all_packets(3, 0, delay=1)
def test_slow(self):
logging.info("Running slow test")
self.SLEEP_TIME = 0.5
self.COUNT_PACKETS_TO_TEST = 3
self._test_export()
def test_ignore_invalid_packets(self):
"""Test that invlalid packets log a warning but are otherwise ignored"""
with self.assertLogs(level='WARNING'):
pkts, _, _ = send_recv_packets([
INVALID_PACKET, TEMPLATE_PACKET, random.choice(PACKETS), INVALID_PACKET,
random.choice(PACKETS), INVALID_PACKET
])
self.assertEqual(len(pkts), 3)
def test_analyzer(self):
"""Test thar the analyzer doesn't break and outputs the correct number of lines"""
pkts, _, _ = send_recv_packets([TEMPLATE_PACKET, *PACKETS])
data = {p[0]: [f.data for f in p[1].flows] for p in pkts}
analyzer = subprocess.run(
[sys.executable, 'analyze_json.py'],
input=json.dumps(data),
encoding='utf-8',
capture_output=True
)
# every 2 flows are written as a single line (any extras are dropped)
num_flows = sum(len(f) for f in data.values())
self.assertEqual(len(analyzer.stdout.splitlines()), num_flows//2)
# make sure there are no errors
self.assertEqual(analyzer.stderr, "")
def test_fast(self):
logging.info("Running fast test")
self.SLEEP_TIME = 0.1
self.COUNT_PACKETS_TO_TEST = 30
self._test_export()
if __name__ == '__main__':
unittest.main()