2016-08-10 16:28:29 +02:00
#!/usr/bin/env python3
"""
2016-08-10 22:33:57 +02:00
Netflow V9 collector and parser implementation in Python 3.
2016-08-10 16:28:29 +02:00
Created for learning purposes and unsatisfying alternatives .
2019-10-17 05:23:51 +02:00
Reference : https : / / www . cisco . com / en / US / technologies / tk648 / tk362 / technologies_white_paper09186a00800a3db9 . html
2016-08-10 20:38:07 +02:00
This script is specifically implemented in combination with softflowd .
See https : / / github . com / djmdjm / softflowd
2018-02-20 12:09:10 +01:00
Copyright 2017 , 2018 Dominik Pataky < dev @bitkeks.eu >
2016-08-10 22:33:57 +02:00
Licensed under MIT License . See LICENSE .
2016-08-10 16:28:29 +02:00
"""
2019-11-03 13:30:50 +01:00
import ipaddress
2016-08-10 16:28:29 +02:00
import struct
2019-10-05 09:07:02 +02:00
FIELD_TYPES = {
2018-06-15 13:48:17 +02:00
0 : ' UNKNOWN_FIELD_TYPE ' , # fallback for unknown field types
# Cisco specs for NetFlow v9
# https://tools.ietf.org/html/rfc3954
# https://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html
2016-08-10 16:28:29 +02:00
1 : ' IN_BYTES ' ,
2 : ' IN_PKTS ' ,
3 : ' FLOWS ' ,
4 : ' PROTOCOL ' ,
5 : ' SRC_TOS ' ,
6 : ' TCP_FLAGS ' ,
7 : ' L4_SRC_PORT ' ,
8 : ' IPV4_SRC_ADDR ' ,
9 : ' SRC_MASK ' ,
10 : ' INPUT_SNMP ' ,
11 : ' L4_DST_PORT ' ,
12 : ' IPV4_DST_ADDR ' ,
13 : ' DST_MASK ' ,
14 : ' OUTPUT_SNMP ' ,
15 : ' IPV4_NEXT_HOP ' ,
16 : ' SRC_AS ' ,
17 : ' DST_AS ' ,
18 : ' BGP_IPV4_NEXT_HOP ' ,
19 : ' MUL_DST_PKTS ' ,
20 : ' MUL_DST_BYTES ' ,
21 : ' LAST_SWITCHED ' ,
22 : ' FIRST_SWITCHED ' ,
23 : ' OUT_BYTES ' ,
24 : ' OUT_PKTS ' ,
25 : ' MIN_PKT_LNGTH ' ,
26 : ' MAX_PKT_LNGTH ' ,
27 : ' IPV6_SRC_ADDR ' ,
28 : ' IPV6_DST_ADDR ' ,
29 : ' IPV6_SRC_MASK ' ,
30 : ' IPV6_DST_MASK ' ,
31 : ' IPV6_FLOW_LABEL ' ,
32 : ' ICMP_TYPE ' ,
33 : ' MUL_IGMP_TYPE ' ,
34 : ' SAMPLING_INTERVAL ' ,
35 : ' SAMPLING_ALGORITHM ' ,
36 : ' FLOW_ACTIVE_TIMEOUT ' ,
37 : ' FLOW_INACTIVE_TIMEOUT ' ,
38 : ' ENGINE_TYPE ' ,
39 : ' ENGINE_ID ' ,
40 : ' TOTAL_BYTES_EXP ' ,
41 : ' TOTAL_PKTS_EXP ' ,
42 : ' TOTAL_FLOWS_EXP ' ,
# 43 vendor proprietary
44 : ' IPV4_SRC_PREFIX ' ,
45 : ' IPV4_DST_PREFIX ' ,
46 : ' MPLS_TOP_LABEL_TYPE ' ,
47 : ' MPLS_TOP_LABEL_IP_ADDR ' ,
48 : ' FLOW_SAMPLER_ID ' ,
49 : ' FLOW_SAMPLER_MODE ' ,
50 : ' NTERVAL ' ,
# 51 vendor proprietary
52 : ' MIN_TTL ' ,
53 : ' MAX_TTL ' ,
54 : ' IPV4_IDENT ' ,
55 : ' DST_TOS ' ,
56 : ' IN_SRC_MAC ' ,
57 : ' OUT_DST_MAC ' ,
58 : ' SRC_VLAN ' ,
59 : ' DST_VLAN ' ,
60 : ' IP_PROTOCOL_VERSION ' ,
61 : ' DIRECTION ' ,
62 : ' IPV6_NEXT_HOP ' ,
63 : ' BPG_IPV6_NEXT_HOP ' ,
64 : ' IPV6_OPTION_HEADERS ' ,
# 65-69 vendor proprietary
70 : ' MPLS_LABEL_1 ' ,
71 : ' MPLS_LABEL_2 ' ,
72 : ' MPLS_LABEL_3 ' ,
73 : ' MPLS_LABEL_4 ' ,
74 : ' MPLS_LABEL_5 ' ,
75 : ' MPLS_LABEL_6 ' ,
76 : ' MPLS_LABEL_7 ' ,
77 : ' MPLS_LABEL_8 ' ,
78 : ' MPLS_LABEL_9 ' ,
79 : ' MPLS_LABEL_10 ' ,
80 : ' IN_DST_MAC ' ,
81 : ' OUT_SRC_MAC ' ,
82 : ' IF_NAME ' ,
83 : ' IF_DESC ' ,
84 : ' SAMPLER_NAME ' ,
85 : ' IN_PERMANENT_BYTES ' ,
86 : ' IN_PERMANENT_PKTS ' ,
# 87 vendor property
88 : ' FRAGMENT_OFFSET ' ,
2018-06-15 13:48:17 +02:00
89 : ' FORWARDING_STATUS ' ,
90 : ' MPLS_PAL_RD ' ,
91 : ' MPLS_PREFIX_LEN ' , # Number of consecutive bits in the MPLS prefix length.
92 : ' SRC_TRAFFIC_INDEX ' , # BGP Policy Accounting Source Traffic Index
93 : ' DST_TRAFFIC_INDEX ' , # BGP Policy Accounting Destination Traffic Index
94 : ' APPLICATION_DESCRIPTION ' , # Application description
95 : ' APPLICATION_TAG ' , # 8 bits of engine ID, followed by n bits of classification
96 : ' APPLICATION_NAME ' , # Name associated with a classification
98 : ' postipDiffServCodePoint ' , # The value of a Differentiated Services Code Point (DSCP) encoded in the Differentiated Services Field, after modification
99 : ' replication_factor ' , # Multicast replication factor
100 : ' DEPRECATED ' , # DEPRECATED
102 : ' layer2packetSectionOffset ' , # Layer 2 packet section offset. Potentially a generic offset
103 : ' layer2packetSectionSize ' , # Layer 2 packet section size. Potentially a generic size
104 : ' layer2packetSectionData ' , # Layer 2 packet section data
# 105-127 reserved for future use by Cisco
# ASA extensions
# https://www.cisco.com/c/en/us/td/docs/security/asa/special/netflow/guide/asa_netflow.html
148 : ' NF_F_CONN_ID ' , # An identifier of a unique flow for the device
176 : ' NF_F_ICMP_TYPE ' , # ICMP type value
177 : ' NF_F_ICMP_CODE ' , # ICMP code value
178 : ' NF_F_ICMP_TYPE_IPV6 ' , # ICMP IPv6 type value
179 : ' NF_F_ICMP_CODE_IPV6 ' , # ICMP IPv6 code value
225 : ' NF_F_XLATE_SRC_ADDR_IPV4 ' , # Post NAT Source IPv4 Address
226 : ' NF_F_XLATE_DST_ADDR_IPV4 ' , # Post NAT Destination IPv4 Address
227 : ' NF_F_XLATE_SRC_PORT ' , # Post NATT Source Transport Port
228 : ' NF_F_XLATE_DST_PORT ' , # Post NATT Destination Transport Port
281 : ' NF_F_XLATE_SRC_ADDR_IPV6 ' , # Post NAT Source IPv6 Address
282 : ' NF_F_XLATE_DST_ADDR_IPV6 ' , # Post NAT Destination IPv6 Address
233 : ' NF_F_FW_EVENT ' , # High-level event code
33002 : ' NF_F_FW_EXT_EVENT ' , # Extended event code
323 : ' NF_F_EVENT_TIME_MSEC ' , # The time that the event occurred, which comes from IPFIX
152 : ' NF_F_FLOW_CREATE_TIME_MSEC ' ,
231 : ' NF_F_FWD_FLOW_DELTA_BYTES ' , # The delta number of bytes from source to destination
232 : ' NF_F_REV_FLOW_DELTA_BYTES ' , # The delta number of bytes from destination to source
33000 : ' NF_F_INGRESS_ACL_ID ' , # The input ACL that permitted or denied the flow
33001 : ' NF_F_EGRESS_ACL_ID ' , # The output ACL that permitted or denied a flow
40000 : ' NF_F_USERNAME ' , # AAA username
# PaloAlto PAN-OS 8.0
# https://www.paloaltonetworks.com/documentation/80/pan-os/pan-os/monitoring/netflow-monitoring/netflow-templates
346 : ' PANOS_privateEnterpriseNumber ' ,
56701 : ' PANOS_APPID ' ,
56702 : ' PANOS_USERID '
2016-08-10 16:28:29 +02:00
}
2016-08-10 20:38:07 +02:00
2019-10-05 09:07:02 +02:00
class TemplateNotRecognized ( KeyError ) :
pass
2016-08-10 18:55:38 +02:00
class DataRecord :
2016-08-10 22:33:57 +02:00
""" This is a ' flow ' as we want it from our source. What it contains is
variable in NetFlow V9 , so to work with the data you have to analyze the
2019-10-05 09:07:02 +02:00
data dict keys ( which are integers and can be mapped with the FIELD_TYPES
2016-08-10 22:33:57 +02:00
dict ) .
Should hold a ' data ' dict with keys = field_type ( integer ) and value ( in bytes ) .
2016-08-10 18:55:38 +02:00
"""
2016-08-10 22:33:57 +02:00
def __init__ ( self ) :
self . data = { }
def __repr__ ( self ) :
return " <DataRecord with data: {} > " . format ( self . data )
2016-08-10 18:55:38 +02:00
2016-08-10 20:38:07 +02:00
2016-08-10 18:55:38 +02:00
class DataFlowSet :
2016-08-10 22:33:57 +02:00
""" Holds one or multiple DataRecord which are all defined after the same
template . This template is referenced in the field ' flowset_id ' of this
DataFlowSet and must not be zero .
2016-08-10 18:55:38 +02:00
"""
2016-08-10 20:38:07 +02:00
def __init__ ( self , data , templates ) :
2016-08-10 18:55:38 +02:00
pack = struct . unpack ( ' !HH ' , data [ : 4 ] )
2016-08-10 22:33:57 +02:00
self . template_id = pack [ 0 ] # flowset_id is reference to a template_id
2016-08-10 18:55:38 +02:00
self . length = pack [ 1 ]
2016-08-10 20:38:07 +02:00
self . flows = [ ]
offset = 4
2019-03-31 20:51:34 +02:00
if self . template_id not in templates :
raise TemplateNotRecognized
2016-08-10 20:38:07 +02:00
template = templates [ self . template_id ]
2016-08-10 22:33:57 +02:00
# As the field lengths are variable V9 has padding to next 32 Bit
padding_size = 4 - ( self . length % 4 ) # 4 Byte
2016-08-10 20:38:07 +02:00
while offset < = ( self . length - padding_size ) :
new_record = DataRecord ( )
for field in template . fields :
flen = field . field_length
2019-10-05 09:07:02 +02:00
fkey = FIELD_TYPES [ field . field_type ]
2016-08-10 20:38:07 +02:00
2016-08-10 22:33:57 +02:00
# The length of the value byte slice is defined in the template
2016-08-10 20:38:07 +02:00
dataslice = data [ offset : offset + flen ]
2016-08-10 22:33:57 +02:00
# Better solution than struct.unpack with variable field length
2016-11-30 04:50:09 +01:00
fdata = 0
for idx , byte in enumerate ( reversed ( bytearray ( dataslice ) ) ) :
fdata + = byte << ( idx * 8 )
2016-08-10 20:38:07 +02:00
2019-11-03 13:30:50 +01:00
# Special handling of IP addresses to convert integers to strings to not lose precision in dump
if fkey in [ " IPV4_SRC_ADDR " , " IPV4_DST_ADDR " , " IPV6_SRC_ADDR " , " IPV6_DST_ADDR " ] :
try :
ip = ipaddress . ip_address ( fdata )
except ValueError :
print ( " IP address could not be parsed: {} " . format ( fdata ) )
continue
new_record . data [ fkey ] = ip . compressed
else :
new_record . data [ fkey ] = fdata
2016-08-10 22:33:57 +02:00
2016-08-10 20:38:07 +02:00
offset + = flen
self . flows . append ( new_record )
2016-08-10 18:55:38 +02:00
2016-08-10 22:58:18 +02:00
def __repr__ ( self ) :
return " <DataFlowSet with template {} of length {} holding {} flows> " \
. format ( self . template_id , self . length , len ( self . flows ) )
2016-08-10 18:55:38 +02:00
2016-08-10 16:28:29 +02:00
class TemplateField :
2019-10-05 09:07:02 +02:00
""" A field with type identifier and length. """
2016-08-10 18:55:38 +02:00
def __init__ ( self , field_type , field_length ) :
self . field_type = field_type # integer
2016-08-10 20:38:07 +02:00
self . field_length = field_length # bytes
def __repr__ ( self ) :
return " <TemplateField type {} : {} , length {} > " . format (
2019-10-05 09:07:02 +02:00
self . field_type , FIELD_TYPES [ self . field_type ] , self . field_length )
2016-08-10 20:38:07 +02:00
2016-08-10 16:28:29 +02:00
2016-08-10 22:33:57 +02:00
class TemplateRecord :
2019-10-05 09:07:02 +02:00
""" A template record contained in a TemplateFlowSet. """
2016-08-10 20:38:07 +02:00
def __init__ ( self , template_id , field_count , fields ) :
self . template_id = template_id
self . field_count = field_count
self . fields = fields
def __repr__ ( self ) :
2016-08-10 22:33:57 +02:00
return " <TemplateRecord {} with {} fields: {} > " . format (
2016-08-10 20:38:07 +02:00
self . template_id , self . field_count ,
2019-10-05 09:07:02 +02:00
' ' . join ( [ FIELD_TYPES [ field . field_type ] for field in self . fields ] ) )
2016-08-10 20:38:07 +02:00
2016-08-10 16:28:29 +02:00
class TemplateFlowSet :
2016-08-10 18:55:38 +02:00
""" A template flowset, which holds an id that is used by data flowsets to
reference back to the template . The template then has fields which hold
2016-08-10 22:58:18 +02:00
identifiers of data types ( eg " IP_SRC_ADDR " , " PKTS " . . ) . This way the flow
sender can dynamically put together data flowsets .
2016-08-10 18:55:38 +02:00
"""
def __init__ ( self , data ) :
pack = struct . unpack ( ' !HH ' , data [ : 4 ] )
self . flowset_id = pack [ 0 ]
self . length = pack [ 1 ] # total length including this header in bytes
2016-08-10 20:38:07 +02:00
self . templates = { }
2016-08-10 18:55:38 +02:00
2016-08-10 20:38:07 +02:00
offset = 4 # Skip header
# Iterate through all template records in this template flowset
2019-10-17 05:23:51 +02:00
while offset < self . length :
2016-08-10 18:55:38 +02:00
pack = struct . unpack ( ' !HH ' , data [ offset : offset + 4 ] )
template_id = pack [ 0 ]
field_count = pack [ 1 ]
2016-08-10 20:38:07 +02:00
fields = [ ]
for field in range ( field_count ) :
# Get all fields of this template
offset + = 4
field_type , field_length = struct . unpack ( ' !HH ' , data [ offset : offset + 4 ] )
2019-10-05 09:07:02 +02:00
if field_type not in FIELD_TYPES :
2018-06-15 13:48:17 +02:00
field_type = 0 # Set field_type to UNKNOWN_FIELD_TYPE as fallback
2016-08-10 20:38:07 +02:00
field = TemplateField ( field_type , field_length )
fields . append ( field )
2019-11-03 15:58:40 +01:00
# Create a template object with all collected data
2016-08-10 22:33:57 +02:00
template = TemplateRecord ( template_id , field_count , fields )
2016-08-10 18:55:38 +02:00
2016-08-10 20:38:07 +02:00
# Append the new template to the global templates list
self . templates [ template . template_id ] = template
# Set offset to next template_id field
offset + = 4
2016-08-10 18:55:38 +02:00
2016-08-10 22:58:18 +02:00
def __repr__ ( self ) :
return " <TemplateFlowSet with id {} of length {} containing templates: {} > " \
. format ( self . flowset_id , self . length , self . templates . keys ( ) )
2016-08-10 16:28:29 +02:00
class Header :
2019-10-17 05:23:51 +02:00
""" The header of the V9ExportPacket """
length = 20
2016-08-10 16:28:29 +02:00
def __init__ ( self , data ) :
2019-10-17 05:23:51 +02:00
pack = struct . unpack ( ' !HHIIII ' , data [ : self . length ] )
2016-08-10 16:28:29 +02:00
2016-08-10 18:55:38 +02:00
self . version = pack [ 0 ]
2016-08-10 20:38:07 +02:00
self . count = pack [ 1 ] # not sure if correct. softflowd: no of flows
2016-08-10 18:55:38 +02:00
self . uptime = pack [ 2 ]
self . timestamp = pack [ 3 ]
self . sequence = pack [ 4 ]
self . source_id = pack [ 5 ]
2016-08-10 16:28:29 +02:00
2016-08-10 18:55:38 +02:00
2019-10-17 05:23:51 +02:00
class V9ExportPacket :
2019-10-05 09:07:02 +02:00
""" The flow record holds the header and all template and data flowsets. """
2019-10-17 05:23:51 +02:00
2016-08-10 22:33:57 +02:00
def __init__ ( self , data , templates ) :
2016-08-10 20:38:07 +02:00
self . header = Header ( data )
2016-08-10 22:33:57 +02:00
self . templates = templates
2019-10-05 09:07:02 +02:00
self . _new_templates = False
2016-08-10 20:38:07 +02:00
self . flows = [ ]
2016-08-10 16:28:29 +02:00
2019-10-17 05:23:51 +02:00
offset = self . header . length
2016-08-10 20:38:07 +02:00
while offset != len ( data ) :
flowset_id = struct . unpack ( ' !H ' , data [ offset : offset + 2 ] ) [ 0 ]
2016-08-10 18:55:38 +02:00
if flowset_id == 0 : # TemplateFlowSet always have id 0
2016-08-10 20:38:07 +02:00
tfs = TemplateFlowSet ( data [ offset : ] )
2019-10-05 09:07:02 +02:00
# Check for any new/changed templates
if not self . _new_templates :
for id_ , template in tfs . templates . items ( ) :
if id_ not in self . templates or self . templates [ id_ ] != template :
self . _new_templates = True
break
2016-08-10 22:33:57 +02:00
self . templates . update ( tfs . templates )
2016-08-10 20:38:07 +02:00
offset + = tfs . length
2016-08-10 18:55:38 +02:00
else :
2016-08-10 22:33:57 +02:00
dfs = DataFlowSet ( data [ offset : ] , self . templates )
2016-08-10 20:38:07 +02:00
self . flows + = dfs . flows
offset + = dfs . length
2016-08-10 18:55:38 +02:00
2019-10-05 09:07:02 +02:00
@property
def contains_new_templates ( self ) :
return self . _new_templates
2016-08-10 16:28:29 +02:00
def __repr__ ( self ) :
2019-10-17 05:23:51 +02:00
s = " and new template(s) " if self . contains_new_templates else " "
return " <ExportPacket v {} with {} records {} > " . format (
self . header . version , self . header . count , s )