From 1646a52f17c5a0eb3d32b609b215eb76d3947e47 Mon Sep 17 00:00:00 2001 From: Dominik Pataky Date: Sun, 3 Nov 2019 13:30:50 +0100 Subject: [PATCH] Store IP addresses (v4 + v6) as strings rather than ints As mentioned by @pR0Ps in https://github.com/bitkeks/python-netflow-v9-softflowd/blame/6b9d20c8a6e2145aa2bd3c094b1f18fe31794555/analyze_json.py#L83 IP addresses, especially in IPv6, should better be stored as parsed strings instead of their raw integer values. Implemented. --- analyze_json.py | 9 ++------- netflow/v9.py | 13 +++++++++++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/analyze_json.py b/analyze_json.py index 992506e..ee93d07 100755 --- a/analyze_json.py +++ b/analyze_json.py @@ -80,14 +80,9 @@ class Connection: @staticmethod def get_ips(flow): - # TODO: These values should be parsed into strings in the collection phase. - # The floating point representation of an IPv6 address in JSON - # could lose precision. - # IPv4 - if flow.get('IP_PROTOCOL_VERSION') == 4 \ - or 'IPV4_SRC_ADDR' in flow \ - or 'IPV4_DST_ADDR' in flow: + if flow.get('IP_PROTOCOL_VERSION') == 4 or \ + 'IPV4_SRC_ADDR' in flow or 'IPV4_DST_ADDR' in flow: return Pair( ipaddress.ip_address(flow['IPV4_SRC_ADDR']), ipaddress.ip_address(flow['IPV4_DST_ADDR']) diff --git a/netflow/v9.py b/netflow/v9.py index 4ea8301..07754d3 100644 --- a/netflow/v9.py +++ b/netflow/v9.py @@ -13,6 +13,7 @@ Copyright 2017, 2018 Dominik Pataky Licensed under MIT License. See LICENSE. """ +import ipaddress import struct @@ -200,7 +201,6 @@ class DataFlowSet: for field in template.fields: flen = field.field_length fkey = FIELD_TYPES[field.field_type] - fdata = None # The length of the value byte slice is defined in the template dataslice = data[offset:offset+flen] @@ -210,7 +210,16 @@ class DataFlowSet: for idx, byte in enumerate(reversed(bytearray(dataslice))): fdata += byte << (idx * 8) - new_record.data[fkey] = fdata + # Special handling of IP addresses to convert integers to strings to not lose precision in dump + if fkey in ["IPV4_SRC_ADDR", "IPV4_DST_ADDR", "IPV6_SRC_ADDR", "IPV6_DST_ADDR"]: + try: + ip = ipaddress.ip_address(fdata) + except ValueError: + print("IP address could not be parsed: {}".format(fdata)) + continue + new_record.data[fkey] = ip.compressed + else: + new_record.data[fkey] = fdata offset += flen