Store IP addresses (v4 + v6) as strings rather than ints

As mentioned by @pR0Ps in 6b9d20c8a6/analyze_json.py (L83)
IP addresses, especially in IPv6, should better be stored as parsed
strings instead of their raw integer values. Implemented.
This commit is contained in:
Dominik Pataky 2019-11-03 13:30:50 +01:00
parent 6b9d20c8a6
commit 1646a52f17
2 changed files with 13 additions and 9 deletions

View file

@ -80,14 +80,9 @@ class Connection:
@staticmethod
def get_ips(flow):
# TODO: These values should be parsed into strings in the collection phase.
# The floating point representation of an IPv6 address in JSON
# could lose precision.
# IPv4
if flow.get('IP_PROTOCOL_VERSION') == 4 \
or 'IPV4_SRC_ADDR' in flow \
or 'IPV4_DST_ADDR' in flow:
if flow.get('IP_PROTOCOL_VERSION') == 4 or \
'IPV4_SRC_ADDR' in flow or 'IPV4_DST_ADDR' in flow:
return Pair(
ipaddress.ip_address(flow['IPV4_SRC_ADDR']),
ipaddress.ip_address(flow['IPV4_DST_ADDR'])

View file

@ -13,6 +13,7 @@ Copyright 2017, 2018 Dominik Pataky <dev@bitkeks.eu>
Licensed under MIT License. See LICENSE.
"""
import ipaddress
import struct
@ -200,7 +201,6 @@ class DataFlowSet:
for field in template.fields:
flen = field.field_length
fkey = FIELD_TYPES[field.field_type]
fdata = None
# The length of the value byte slice is defined in the template
dataslice = data[offset:offset+flen]
@ -210,7 +210,16 @@ class DataFlowSet:
for idx, byte in enumerate(reversed(bytearray(dataslice))):
fdata += byte << (idx * 8)
new_record.data[fkey] = fdata
# Special handling of IP addresses to convert integers to strings to not lose precision in dump
if fkey in ["IPV4_SRC_ADDR", "IPV4_DST_ADDR", "IPV6_SRC_ADDR", "IPV6_DST_ADDR"]:
try:
ip = ipaddress.ip_address(fdata)
except ValueError:
print("IP address could not be parsed: {}".format(fdata))
continue
new_record.data[fkey] = ip.compressed
else:
new_record.data[fkey] = fdata
offset += flen