Ensure compatibility with Python 3.5.3

This commit replaces multiple occurences of new features which were not
yet implemented with Python 3.5.3, which is the reference backwards
compatibility version for this package. The version is based on the
current Python version in Debian Stretch (oldstable). According to
pkgs.org, all other distros use 3.6+, so 3.5.3 is the lower boundary.

Changes:
  * Add maxsize argument to functools.lru_cache decorator
  * Replace f"" with .format()
  * Replace variable type hints "var: type = val" with "# type:" comments
  * Replace pstats.SortKey enum with strings in performance tests

Additionally, various styling fixes were applied.
The version compatibility was tested with tox, pyenv and Python 3.5.3,
but there is no tox.ini yet which automates this test.

Bump patch version number to 0.10.3
Update author's email address.

Resolves #27
This commit is contained in:
Dominik Pataky 2020-04-24 16:34:37 +02:00 committed by Dominik Pataky
parent 5d1c5b8710
commit 5cdb514ffc
15 changed files with 77 additions and 65 deletions

View file

@ -3,7 +3,7 @@ This package contains libraries and tools for **NetFlow versions 1, 5 and 9, and
Version 9 is the first NetFlow version using templates. Templates make dynamically sized and configured NetFlow data flowsets possible, which makes the collector's job harder. The library provides the `netflow.parse_packet()` function as the main API point (see below). By importing `netflow.v1`, `netflow.v5` or `netflow.v9` you have direct access to the respective parsing objects, but at the beginning you probably will have more success by running the reference collector (example below) and look into its code. IPFIX (IP Flow Information Export) is based on NetFlow v9 and standardized by the IETF. All related classes are contained in `netflow.ipfix`.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.

View file

@ -3,7 +3,7 @@
"""
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""

View file

@ -4,14 +4,12 @@
Reference analyzer script for NetFlow Python package.
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
import argparse
from collections import namedtuple
import contextlib
from datetime import datetime
import functools
import gzip
import ipaddress
@ -20,7 +18,8 @@ import logging
import os.path
import socket
import sys
from collections import namedtuple
from datetime import datetime
IP_PROTOCOLS = {
1: "ICMP",
@ -65,10 +64,10 @@ def human_size(size_bytes):
return "%dB" % size_bytes
elif size_bytes / 1024. < 1024:
return "%.2fK" % (size_bytes / 1024.)
elif size_bytes / 1024.**2 < 1024:
return "%.2fM" % (size_bytes / 1024.**2)
elif size_bytes / 1024. ** 2 < 1024:
return "%.2fM" % (size_bytes / 1024. ** 2)
else:
return "%.2fG" % (size_bytes / 1024.**3)
return "%.2fG" % (size_bytes / 1024. ** 3)
def human_duration(seconds):
@ -78,7 +77,7 @@ def human_duration(seconds):
return "%d sec" % seconds
if seconds / 60 > 60:
# hours
return "%d:%02d.%02d hours" % (seconds / 60**2, seconds % 60**2 / 60, seconds % 60)
return "%d:%02d.%02d hours" % (seconds / 60 ** 2, seconds % 60 ** 2 / 60, seconds % 60)
# minutes
return "%02d:%02d min" % (seconds / 60, seconds % 60)
@ -90,6 +89,7 @@ class Connection:
'src' describes the peer which sends more data towards the other. This
does NOT have to mean that 'src' was the initiator of the connection.
"""
def __init__(self, flow1, flow2):
if not flow1 or not flow2:
raise Exception("A connection requires two flows")
@ -129,7 +129,7 @@ class Connection:
if self.duration < 0:
# 32 bit int has its limits. Handling overflow here
# TODO: Should be handled in the collection phase
self.duration = (2**32 - src['FIRST_SWITCHED']) + src['LAST_SWITCHED']
self.duration = (2 ** 32 - src['FIRST_SWITCHED']) + src['LAST_SWITCHED']
def __repr__(self):
return "<Connection from {} to {}, size {}>".format(
@ -298,25 +298,27 @@ if __name__ == "__main__":
continue
if first_line:
print("{:19} | {:14} | {:8} | {:9} | {:7} | Involved hosts".format("Timestamp", "Service", "Size", "Duration", "Packets"))
print("{:19} | {:14} | {:8} | {:9} | {:7} | Involved hosts".format("Timestamp", "Service", "Size",
"Duration", "Packets"))
print("-" * 100)
first_line = False
print("{timestamp} | {service:<14} | {size:8} | {duration:9} | {packets:7} | "
"Between {src_host} ({src}) and {dest_host} ({dest})" \
"Between {src_host} ({src}) and {dest_host} ({dest})"
.format(timestamp=timestamp, service=con.service.upper(), src_host=con.hostnames.src, src=con.src,
dest_host=con.hostnames.dest, dest=con.dest, size=con.human_size, duration=con.human_duration,
packets=con.total_packets))
if skipped > 0:
print(f"{skipped} connections skipped, because they had less than {skipped_threshold} packets (this value can be set with the -p flag).")
print("{skipped} connections skipped, because they had less than {skipped_threshold} packets "
"(this value can be set with the -p flag).".format(skipped=skipped, skipped_threshold=skipped_threshold))
if not args.verbose:
# Exit here if no debugging session was wanted
exit(0)
if len(pending) > 0:
print(f"\nThere are {len(pending)} first_switched entries left in the pending dict!")
print("\nThere are {pending} first_switched entries left in the pending dict!".format(pending=len(pending)))
all_noise = True
for first_switched, flows in sorted(pending.items(), key=lambda x: x[0]):
for peer, flow in flows.items():
@ -327,19 +329,21 @@ if __name__ == "__main__":
src = flow.get("IPV4_SRC_ADDR") or flow.get("IPV6_SRC_ADDR")
src_host = resolve_hostname(src)
src_text = f"{src}" if src == src_host else f"{src_host} ({src})"
src_text = "{}".format(src) if src == src_host else "{} ({})".format(src_host, src)
dst = flow.get("IPV4_DST_ADDR") or flow.get("IPV6_DST_ADDR")
dst_host = resolve_hostname(dst)
dst_text = f"{dst}" if dst == dst_host else f"{dst_host} ({dst})"
dst_text = "{}".format(dst) if dst == dst_host else "{} ({})".format(dst_host, dst)
proto = flow["PROTOCOL"]
size = flow["IN_BYTES"]
packets = flow["IN_PKTS"]
src_port = flow.get("L4_SRC_PORT", 0)
dst_port = flow.get("L4_DST_PORT", 0)
print(f"From {src_text}:{src_port} to {dst_text}:{dst_port} with "
f"proto {IP_PROTOCOLS.get(proto, 'UNKNOWN')} and size {human_size(size)}"
f" ({packets} packets)")
print("From {src_text}:{src_port} to {dst_text}:{dst_port} with "
"proto {proto} and size {size}"
" ({packets} packets)".format(src_text=src_text, src_port=src_port, dst_text=dst_text,
dst_port=dst_port, proto=IP_PROTOCOLS.get(proto, 'UNKNOWN'),
size=human_size(size), packets=packets))
if all_noise:
print("They were all noise!")

View file

@ -4,23 +4,23 @@
Reference collector script for NetFlow v1, v5, and v9 Python package.
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
import argparse
import gzip
import json
from collections import namedtuple
import queue
import logging
import queue
import socket
import socketserver
import threading
import time
from collections import namedtuple
from .ipfix import IPFIXTemplateNotRecognized
from .utils import UnknownExportVersion, parse_packet
from .v9 import V9TemplateNotRecognized
from .ipfix import IPFIXTemplateNotRecognized
RawPacket = namedtuple('RawPacket', ['ts', 'client', 'data'])
ParsedPacket = namedtuple('ParsedPacket', ['ts', 'client', 'export'])
@ -118,7 +118,7 @@ class ThreadedNetFlowListener(threading.Thread):
while not self._shutdown.is_set():
try:
# 0.5s delay to limit CPU usage while waiting for new packets
pkt: RawPacket = self.input.get(block=True, timeout=0.5)
pkt = self.input.get(block=True, timeout=0.5) # type: RawPacket
except queue.Empty:
continue

View file

@ -4,12 +4,12 @@
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Reference: https://tools.ietf.org/html/rfc7011
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
from collections import namedtuple
import functools
import struct
from collections import namedtuple
from typing import Optional, Union, List, Dict
FieldType = namedtuple("FieldType", ["id", "name", "type"])
@ -488,7 +488,7 @@ class IPFIXFieldTypes:
]
@classmethod
@functools.lru_cache
@functools.lru_cache(maxsize=128)
def by_id(cls, id_: int) -> Optional[FieldType]:
for item in cls.iana_field_types:
if item[0] == id_:
@ -496,7 +496,7 @@ class IPFIXFieldTypes:
return None
@classmethod
@functools.lru_cache
@functools.lru_cache(maxsize=128)
def by_name(cls, key: str) -> Optional[FieldType]:
for item in cls.iana_field_types:
if item[1] == key:
@ -504,7 +504,7 @@ class IPFIXFieldTypes:
return None
@classmethod
@functools.lru_cache
@functools.lru_cache(maxsize=128)
def get_type_unpack(cls, key: Union[int, str]) -> Optional[DataType]:
"""
This method covers the mapping from a field type to a struct.unpack format string.
@ -555,7 +555,7 @@ class IPFIXDataTypes:
]
@classmethod
@functools.lru_cache
@functools.lru_cache(maxsize=128)
def by_name(cls, key: str) -> Optional[DataType]:
"""
Get DataType by name if found, else None.
@ -732,13 +732,13 @@ class IPFIXDataRecord:
# Here, reduced-size encoding of fields blocks the usage of IPFIXFieldTypes.get_type_unpack.
# See comment in IPFIXFieldTypes.get_type_unpack for more information.
field_type: FieldType = IPFIXFieldTypes.by_id(field_type_id)
field_type = IPFIXFieldTypes.by_id(field_type_id) # type: Optional[FieldType]
if not field_type and type(field) is not TemplateFieldEnterprise:
# This should break, since the exporter seems to use a field identifier
# which is not standardized by IANA.
raise NotImplementedError("Field type with ID {} is not implemented".format(field_type_id))
datatype: str = field_type.type
datatype = field_type.type # type: str
discovered_fields.append((field_type.name, field_type_id))
# Catch fields which are meant to be raw bytes and skip the rest
@ -749,7 +749,7 @@ class IPFIXDataRecord:
# Go into int, uint, float types
issigned = IPFIXDataTypes.is_signed(datatype)
isfloat = IPFIXDataTypes.is_float(datatype)
assert not(all([issigned, isfloat])) # signed int and float are exclusive
assert not (all([issigned, isfloat])) # signed int and float are exclusive
if field_length == 1:
unpacker += "b" if issigned else "B"
@ -833,7 +833,8 @@ class IPFIXSet:
elif self.header.set_id >= 256: # data set, set_id is template id
while offset < self.header.length:
template: List[Union[TemplateField, TemplateFieldEnterprise]] = templates.get(self.header.set_id)
template = templates.get(
self.header.set_id) # type: List[Union[TemplateField, TemplateFieldEnterprise]]
if not template:
raise IPFIXTemplateNotRecognized
data_record = IPFIXDataRecord(data[offset:], template)
@ -889,6 +890,7 @@ class IPFIXSetHeader:
class IPFIXExportPacket:
"""IPFIX export packet with header, templates, options and data flowsets
"""
def __init__(self, data: bytes, templates: Dict[int, list]):
self.header = IPFIXHeader(data[:IPFIXHeader.size])
self.sets = []
@ -945,8 +947,8 @@ def parse_fields(data: bytes, count: int) -> (list, int):
:param count:
:return: List of fields and the new offset.
"""
offset: int = 0
fields: List[Union[TemplateField, TemplateFieldEnterprise]] = []
offset = 0
fields = [] # type: List[Union[TemplateField, TemplateFieldEnterprise]]
for ctr in range(count):
if data[offset] & 1 << 7 != 0: # enterprise flag set
pack = struct.unpack("!HHI", data[offset:offset + 8])

View file

@ -3,17 +3,17 @@
"""
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
import struct
from typing import Union
from .ipfix import IPFIXExportPacket
from .v1 import V1ExportPacket
from .v5 import V5ExportPacket
from .v9 import V9ExportPacket
from .ipfix import IPFIXExportPacket
class UnknownExportVersion(Exception):

View file

@ -83,4 +83,4 @@ class V1ExportPacket:
def __repr__(self):
return "<ExportPacket v{} with {} records>".format(
self.header.version, self.header.count)
self.header.version, self.header.count)

View file

@ -77,6 +77,7 @@ class V5Header:
class V5ExportPacket:
"""The flow record holds the header and data flowsets.
"""
def __init__(self, data):
self.flows = []
self.header = V5Header(data)
@ -90,4 +91,4 @@ class V5ExportPacket:
def __repr__(self):
return "<ExportPacket v{} with {} records>".format(
self.header.version, self.header.count)
self.header.version, self.header.count)

View file

@ -8,7 +8,7 @@ Created for learning purposes and unsatisfying alternatives.
Reference: https://www.cisco.com/en/US/technologies/tk648/tk362/technologies_white_paper09186a00800a3db9.html
This script is specifically implemented in combination with softflowd. See https://github.com/djmdjm/softflowd
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
@ -116,7 +116,8 @@ V9_FIELD_TYPES = {
94: 'APPLICATION_DESCRIPTION', # Application description
95: 'APPLICATION_TAG', # 8 bits of engine ID, followed by n bits of classification
96: 'APPLICATION_NAME', # Name associated with a classification
98: 'postipDiffServCodePoint', # The value of a Differentiated Services Code Point (DSCP) encoded in the Differentiated Services Field, after modification
98: 'postipDiffServCodePoint', # The value of a Differentiated Services Code Point (DSCP)
# encoded in the Differentiated Services Field, after modification
99: 'replication_factor', # Multicast replication factor
100: 'DEPRECATED', # DEPRECATED
102: 'layer2packetSectionOffset', # Layer 2 packet section offset. Potentially a generic offset
@ -144,7 +145,7 @@ V9_FIELD_TYPES = {
231: 'NF_F_FWD_FLOW_DELTA_BYTES', # The delta number of bytes from source to destination
232: 'NF_F_REV_FLOW_DELTA_BYTES', # The delta number of bytes from destination to source
33000: 'NF_F_INGRESS_ACL_ID', # The input ACL that permitted or denied the flow
33001: 'NF_F_EGRESS_ACL_ID', # The output ACL that permitted or denied a flow
33001: 'NF_F_EGRESS_ACL_ID', # The output ACL that permitted or denied a flow
40000: 'NF_F_USERNAME', # AAA username
# PaloAlto PAN-OS 8.0
@ -166,6 +167,7 @@ class V9DataRecord:
dict).
Should hold a 'data' dict with keys=field_type (integer) and value (in bytes).
"""
def __init__(self):
self.data = {}
@ -178,6 +180,7 @@ class V9DataFlowSet:
template. This template is referenced in the field 'flowset_id' of this
DataFlowSet and must not be zero.
"""
def __init__(self, data, templates):
pack = struct.unpack('!HH', data[:4])
@ -203,7 +206,7 @@ class V9DataFlowSet:
fkey = V9_FIELD_TYPES[field.field_type]
# The length of the value byte slice is defined in the template
dataslice = data[offset:offset+flen]
dataslice = data[offset:offset + flen]
# Better solution than struct.unpack with variable field length
fdata = 0
@ -228,13 +231,14 @@ class V9DataFlowSet:
self.flows.append(new_record)
def __repr__(self):
return "<DataFlowSet with template {} of length {} holding {} flows>"\
return "<DataFlowSet with template {} of length {} holding {} flows>" \
.format(self.template_id, self.length, len(self.flows))
class V9TemplateField:
"""A field with type identifier and length.
"""
def __init__(self, field_type, field_length):
self.field_type = field_type # integer
self.field_length = field_length # bytes
@ -247,6 +251,7 @@ class V9TemplateField:
class V9TemplateRecord:
"""A template record contained in a TemplateFlowSet.
"""
def __init__(self, template_id, field_count, fields):
self.template_id = template_id
self.field_count = field_count
@ -264,6 +269,7 @@ class V9TemplateFlowSet:
identifiers of data types (eg "IP_SRC_ADDR", "PKTS"..). This way the flow
sender can dynamically put together data flowsets.
"""
def __init__(self, data):
pack = struct.unpack('!HH', data[:4])
self.flowset_id = pack[0]
@ -274,7 +280,7 @@ class V9TemplateFlowSet:
# Iterate through all template records in this template flowset
while offset < self.length:
pack = struct.unpack('!HH', data[offset:offset+4])
pack = struct.unpack('!HH', data[offset:offset + 4])
template_id = pack[0]
field_count = pack[1]
@ -282,7 +288,7 @@ class V9TemplateFlowSet:
for field in range(field_count):
# Get all fields of this template
offset += 4
field_type, field_length = struct.unpack('!HH', data[offset:offset+4])
field_type, field_length = struct.unpack('!HH', data[offset:offset + 4])
if field_type not in V9_FIELD_TYPES:
field_type = 0 # Set field_type to UNKNOWN_FIELD_TYPE as fallback
field = V9TemplateField(field_type, field_length)
@ -298,7 +304,7 @@ class V9TemplateFlowSet:
offset += 4
def __repr__(self):
return "<TemplateFlowSet with id {} of length {} containing templates: {}>"\
return "<TemplateFlowSet with id {} of length {} containing templates: {}>" \
.format(self.flowset_id, self.length, self.templates.keys())
@ -323,6 +329,7 @@ class V9Header:
class V9ExportPacket:
"""The flow record holds the header and all template and data flowsets.
"""
def __init__(self, data, templates):
self.header = V9Header(data)
self._templates = templates
@ -332,7 +339,7 @@ class V9ExportPacket:
offset = self.header.length
skipped_flowsets_offsets = []
while offset != len(data):
flowset_id = struct.unpack('!H', data[offset:offset+2])[0]
flowset_id = struct.unpack('!H', data[offset:offset + 2])[0]
if flowset_id == 0: # TemplateFlowSet always have id 0
tfs = V9TemplateFlowSet(data[offset:])
@ -353,7 +360,7 @@ class V9ExportPacket:
offset += dfs.length
except V9TemplateNotRecognized:
# Could not be parsed, continue to check for templates
length = struct.unpack("!H", data[offset+2:offset+4])[0]
length = struct.unpack("!H", data[offset + 2:offset + 4])[0]
skipped_flowsets_offsets.append(offset)
offset += length

View file

@ -7,16 +7,16 @@ with open("README.md", "r") as fh:
setup(
name='netflow',
version='0.10.2',
version='0.10.3',
description='NetFlow v1, v5, v9 and IPFIX tool suite implemented in Python 3',
long_description=long_description,
long_description_content_type='text/markdown',
author='Dominik Pataky',
author_email='dev@bitkeks.eu',
author_email='software+pynetflow@dpataky.eu',
url='https://github.com/bitkeks/python-netflow-v9-softflowd',
packages=["netflow"],
license='MIT',
python_requires='>=3.5',
python_requires='>=3.5.3',
keywords='netflow ipfix collector parser',
classifiers=[
"Programming Language :: Python :: 3",

View file

@ -4,7 +4,7 @@ This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
The test packets (defined below as hex streams) were extracted from "real"
softflowd exports based on a sample PCAP capture file.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""

View file

@ -3,7 +3,7 @@
"""
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
import gzip

View file

@ -3,7 +3,7 @@
"""
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
# TODO: tests with 500 packets fail with delay=0. Probably a problem with UDP sockets buffer

View file

@ -3,7 +3,7 @@
"""
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
# TODO: tests with 500 packets fail with delay=0. Probably a problem with UDP sockets buffer

View file

@ -3,24 +3,22 @@
"""
This file belongs to https://github.com/bitkeks/python-netflow-v9-softflowd.
Copyright 2016-2020 Dominik Pataky <dev@bitkeks.eu>
Copyright 2016-2020 Dominik Pataky <software+pynetflow@dpataky.eu>
Licensed under MIT License. See LICENSE.
"""
import cProfile
import io
import linecache
import cProfile
import pathlib
import pstats
import resource
import tracemalloc
import unittest
from pstats import SortKey
from tests.lib import send_recv_packets, generate_packets
NUM_PACKETS_PERFORMANCE = 2000
@unittest.skip("Not necessary in functional tests, used as analysis tool")
class TestNetflowIPFIXPerformance(unittest.TestCase):
def setUp(self) -> None:
"""
@ -69,7 +67,7 @@ class TestNetflowIPFIXPerformance(unittest.TestCase):
for idx, stat in enumerate(stats[:topx]):
frame = stat.traceback[0]
print("\n{idx:02d}: {filename}:{lineno} {size:.1f} KiB, count {count}".format(
idx=idx+1, filename=frame.filename, lineno=frame.lineno, size=stat.size / 1024, count=stat.count
idx=idx + 1, filename=frame.filename, lineno=frame.lineno, size=stat.size / 1024, count=stat.count
))
lines = []
@ -182,7 +180,7 @@ class TestNetflowIPFIXPerformance(unittest.TestCase):
self.assertEqual(len(pkts), NUM_PACKETS_PERFORMANCE)
profile.disable()
for sort_by in [SortKey.CUMULATIVE, SortKey.CALLS]:
for sort_by in ['cumulative', 'calls']:
s = io.StringIO()
ps = pstats.Stats(profile, stream=s)
ps.sort_stats(sort_by).print_stats("netflow")