Performance improvement: rearrange netflow v9 packet parsing (use struct.unpack to extract all of the values at once)
This commit is contained in:
parent
c12507343b
commit
1bffe3a2a3
|
@ -208,37 +208,47 @@ class V9DataFlowSet:
|
||||||
# As the field lengths are variable V9 has padding to next 32 Bit
|
# As the field lengths are variable V9 has padding to next 32 Bit
|
||||||
padding_size = 4 - (self.length % 4) # 4 Byte
|
padding_size = 4 - (self.length % 4) # 4 Byte
|
||||||
|
|
||||||
while offset <= (self.length - padding_size):
|
# For performance reasons, we use struct.unpack to get individual values. Here
|
||||||
new_record = V9DataRecord()
|
# we prepare the format string for parsing it:
|
||||||
|
struct_format = '!'
|
||||||
|
struct_len = 0
|
||||||
for field in template.fields:
|
for field in template.fields:
|
||||||
# The length of the value byte slice is defined in the template
|
# The length of the value byte slice is defined in the template
|
||||||
|
flen = field.field_length
|
||||||
|
if flen == 4:
|
||||||
|
struct_format += 'L'
|
||||||
|
elif flen == 2:
|
||||||
|
struct_format += 'H'
|
||||||
|
elif flen == 1:
|
||||||
|
struct_format += 'B'
|
||||||
|
else:
|
||||||
|
struct_format += f'{flen}s'
|
||||||
|
struct_len += flen
|
||||||
|
|
||||||
|
while offset <= (self.length - padding_size):
|
||||||
|
unpacked_values = struct.unpack(struct_format, data[offset:offset + struct_len])
|
||||||
|
|
||||||
|
new_record = V9DataRecord()
|
||||||
|
for field, value in zip(template.fields, unpacked_values):
|
||||||
flen = field.field_length
|
flen = field.field_length
|
||||||
fkey = V9_FIELD_TYPES[field.field_type]
|
fkey = V9_FIELD_TYPES[field.field_type]
|
||||||
|
|
||||||
# Special handling of IP addresses to convert integers to strings to not lose precision in dump
|
# Special handling of IP addresses to convert integers to strings to not lose precision in dump
|
||||||
# TODO: might only be needed for IPv6
|
# TODO: might only be needed for IPv6
|
||||||
if field.field_type in FIELD_TYPES_CONTAINING_IP:
|
if field.field_type in FIELD_TYPES_CONTAINING_IP:
|
||||||
dataslice = data[offset:offset+flen]
|
|
||||||
try:
|
try:
|
||||||
ip = ipaddress.ip_address(dataslice)
|
ip = ipaddress.ip_address(value)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print("IP address could not be parsed: {}".format(fdata))
|
print("IP address could not be parsed: {}".format(repr(value)))
|
||||||
continue
|
continue
|
||||||
new_record.data[fkey] = ip.compressed
|
new_record.data[fkey] = ip.compressed
|
||||||
|
elif flen in (1, 2, 4):
|
||||||
|
# These values are already converted to numbers by struct.unpack:
|
||||||
|
new_record.data[fkey] = value
|
||||||
else:
|
else:
|
||||||
# For performance reasons, we use struct.unpack for known lengths:
|
|
||||||
if flen == 4:
|
|
||||||
new_record.data[fkey], = struct.unpack_from('!L', data, offset)
|
|
||||||
elif flen == 2:
|
|
||||||
new_record.data[fkey], = struct.unpack_from('!H', data, offset)
|
|
||||||
elif flen == 1:
|
|
||||||
new_record.data[fkey], = struct.unpack_from('!B', data, offset)
|
|
||||||
else:
|
|
||||||
dataslice = data[offset:offset+flen]
|
|
||||||
# Caveat: this code assumes little-endian system (like x86)
|
# Caveat: this code assumes little-endian system (like x86)
|
||||||
fdata = 0
|
fdata = 0
|
||||||
for idx, byte in enumerate(reversed(bytearray(dataslice))):
|
for idx, byte in enumerate(reversed(bytearray(value))):
|
||||||
fdata += byte << (idx * 8)
|
fdata += byte << (idx * 8)
|
||||||
new_record.data[fkey] = fdata
|
new_record.data[fkey] = fdata
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue