nagios-nrpe: Amélioration du check phpfpm_status et phpfpm_multi
Some checks reported errors
continuous-integration/drone/push Build was killed

Pour phpfpm_status > Ajout de la possibilité d'avoir un seuil de max procs actifs
Pour phpfpm_multi > Utilisation des seuils max (calculé sur le pm.max_children) + timeout
This commit is contained in:
Ludovic Poujol 2022-01-14 17:06:48 +01:00
parent ea382a1686
commit c8a862c5e7
2 changed files with 33 additions and 12 deletions

View file

@ -33,7 +33,11 @@ for pool_file in $POOL_FILES; do
pool_name=$(grep "^\[" "$pool_file" | sed -E 's/^\[(.*)\].*$/\1/g') pool_name=$(grep "^\[" "$pool_file" | sed -E 's/^\[(.*)\].*$/\1/g')
pool_status_path=$(grep -E "^pm.status_path\s?=" "$pool_file" | sed -E "s/.*=\s?'?([^']*)'?\s?$/\1/g") pool_status_path=$(grep -E "^pm.status_path\s?=" "$pool_file" | sed -E "s/.*=\s?'?([^']*)'?\s?$/\1/g")
pool_listen=$(grep -E "^listen\s?=" "$pool_file" | sed -E 's/.*=\s?(.*)\s?$/\1/g') pool_listen=$(grep -E "^listen\s?=" "$pool_file" | sed -E 's/.*=\s?(.*)\s?$/\1/g')
pool_max_children=$(grep -E "^pm.max_children" "$pool_file" | sed -E 's/.*=\s?(.*)\s?$/\1/g' )
pool_crit_procs=$(expr $pool_max_children \* 85 / 100)
pool_warn_procs=$(expr $pool_max_children \* 55 / 100)
if [[ "$pool_status_path" == '' ]]; then if [[ "$pool_status_path" == '' ]]; then
nb_unchk=$((nb_unchk + 1)) nb_unchk=$((nb_unchk + 1))
output="${output}UNCHK - ${pool_name} (missing pm.status_path definition)\n" output="${output}UNCHK - ${pool_name} (missing pm.status_path definition)\n"
@ -47,7 +51,7 @@ for pool_file in $POOL_FILES; do
target=(-H "$(echo "$pool_listen" | cut -d':' -f1)" -p "$(echo "$pool_listen" | cut -d':' -f2 )") target=(-H "$(echo "$pool_listen" | cut -d':' -f1)" -p "$(echo "$pool_listen" | cut -d':' -f2 )")
fi fi
result=$(perl /usr/local/lib/nagios/plugins/check_phpfpm_status.pl "${target[@]}" -u "$pool_status_path") result=$(perl /usr/local/lib/nagios/plugins/check_phpfpm_status.pl -t 5 "${target[@]}" -u "$pool_status_path" -c "$pool_crit_procs" -w "$pool_warn_procs" )
ret="${?}" ret="${?}"
if [ "${ret}" -ge 2 ]; then if [ "${ret}" -ge 2 ]; then
@ -80,3 +84,4 @@ printf "%b" "${output}" | grep -E "OK"
printf "%b" "${output}" | grep -E "^UNCHK" printf "%b" "${output}" | grep -E "^UNCHK"
exit "${return}" exit "${return}"

View file

@ -38,6 +38,8 @@ my $o_user= undef; # user for auth
my $o_pass= ''; # password for auth my $o_pass= ''; # password for auth
my $o_realm= ''; # password for auth my $o_realm= ''; # password for auth
my $o_version= undef; # print version my $o_version= undef; # print version
my $o_warn_a_level= -1; # Max number of active workers that will cause a warning
my $o_crit_a_level= -1; # Max number of active workers that will cause an error
my $o_warn_p_level= -1; # Min number of idle workers that will cause a warning my $o_warn_p_level= -1; # Min number of idle workers that will cause a warning
my $o_crit_p_level= -1; # Min number of idle workersthat will cause an error my $o_crit_p_level= -1; # Min number of idle workersthat will cause an error
my $o_warn_q_level= -1; # Number of Max Queue Reached that will cause a warning my $o_warn_q_level= -1; # Number of Max Queue Reached that will cause a warning
@ -130,17 +132,19 @@ sub help {
-X, --cacert -X, --cacert
Full path to the cacert.pem certificate authority used to verify ssl certificates (use with --verifyssl). Full path to the cacert.pem certificate authority used to verify ssl certificates (use with --verifyssl).
if not given the cacert from Mozilla::CA cpan plugin will be used. if not given the cacert from Mozilla::CA cpan plugin will be used.
-w, --warn=MIN_AVAILABLE_PROCESSES,PROC_MAX_REACHED,QUEUE_MAX_REACHED -w, --warn=MAX_ACTIVE_PROCESSES,MIN_AVAILABLE_PROCESSES,PROC_MAX_REACHED,QUEUE_MAX_REACHED
number of available workers, or max states reached that will cause a warning number of available workers, or max states reached that will cause a warning
-1 for no warning -1 for no warning
-c, --critical=MIN_AVAILABLE_PROCESSES,PROC_MAX_REACHED,QUEUE_MAX_REACHED -c, --critical=MAX_ACTIVE_PROCESSES,MIN_AVAILABLE_PROCESSES,PROC_MAX_REACHED,QUEUE_MAX_REACHED
number of available workers, or max states reached that will cause an error number of available workers, or max states reached that will cause an error
-1 for no CRITICAL -1 for no CRITICAL
-V, --version -V, --version
prints version number prints version number
Note : Note :
3 items can be managed on this check, this is why -w and -c parameters are using 3 values thresholds 4 items can be managed on this check, this is why -w and -c parameters are using 3 values thresholds
- MAX_ACTIVE_PROCESSES: Working with the number of available (Idle) and working processes (Busy).
Generating WARNING and CRITICAL if you do have too many Active processes.
- MIN_AVAILABLE_PROCESSES: Working with the number of available (Idle) and working processes (Busy). - MIN_AVAILABLE_PROCESSES: Working with the number of available (Idle) and working processes (Busy).
Generating WARNING and CRITICAL if you do not have enough Idle processes. Generating WARNING and CRITICAL if you do not have enough Idle processes.
- PROC_MAX_REACHED: the fpm-status report will show us how many times the max processes were reached since start, - PROC_MAX_REACHED: the fpm-status report will show us how many times the max processes were reached since start,
@ -231,12 +235,12 @@ sub check_options {
}; };
if (defined($o_warn_threshold)) { if (defined($o_warn_threshold)) {
($o_warn_p_level,$o_warn_m_level,$o_warn_q_level) = split(',', $o_warn_threshold); ($o_warn_a_level,$o_warn_p_level,$o_warn_m_level,$o_warn_q_level) = split(',', $o_warn_threshold);
} else { } else {
$o_warn_threshold = 'undefined' $o_warn_threshold = 'undefined'
} }
if (defined($o_crit_threshold)) { if (defined($o_crit_threshold)) {
($o_crit_p_level,$o_crit_m_level,$o_crit_q_level) = split(',', $o_crit_threshold); ($o_crit_a_level,$o_crit_p_level,$o_crit_m_level,$o_crit_q_level) = split(',', $o_crit_threshold);
} else { } else {
$o_crit_threshold = 'undefined' $o_crit_threshold = 'undefined'
} }
@ -247,20 +251,24 @@ sub check_options {
$o_fastcgi = 1; $o_fastcgi = 1;
} }
if (defined($o_debug)) { if (defined($o_debug)) {
print("\nDEBUG thresholds: \nWarning: ($o_warn_threshold) => Min Idle: $o_warn_p_level Max Reached :$o_warn_m_level MaxQueue: $o_warn_q_level"); print("\nDEBUG thresholds: \nWarning: ($o_warn_threshold) => Max Active: $o_warn_a_level Min Idle: $o_warn_p_level Max Reached :$o_warn_m_level MaxQueue: $o_warn_q_level");
print("\nCritical ($o_crit_threshold) => : Min Idle: $o_crit_p_level Max Reached: $o_crit_m_level MaxQueue : $o_crit_q_level\n"); print("\nCritical ($o_crit_threshold) => Max Active: $o_crit_a_level Min Idle: $o_crit_p_level Max Reached: $o_crit_m_level MaxQueue : $o_crit_q_level\n");
}
if ((defined($o_warn_a_level) && defined($o_crit_a_level)) &&
(($o_warn_a_level != -1) && ($o_crit_a_level != -1) && ($o_warn_a_level <= $o_crit_p_level)) ) {
nagios_exit($phpfpm,"UNKNOWN","Check warning and critical values for ActiveProcess (1st part of threshold), warning level must be > crit level!");
} }
if ((defined($o_warn_p_level) && defined($o_crit_p_level)) && if ((defined($o_warn_p_level) && defined($o_crit_p_level)) &&
(($o_warn_p_level != -1) && ($o_crit_p_level != -1) && ($o_warn_p_level <= $o_crit_p_level)) ) { (($o_warn_p_level != -1) && ($o_crit_p_level != -1) && ($o_warn_p_level <= $o_crit_p_level)) ) {
nagios_exit($phpfpm,"UNKNOWN","Check warning and critical values for IdleProcesses (1st part of threshold), warning level must be > crit level!"); nagios_exit($phpfpm,"UNKNOWN","Check warning and critical values for IdleProcesses (2nd part of threshold), warning level must be > crit level!");
} }
if ((defined($o_warn_m_level) && defined($o_crit_m_level)) && if ((defined($o_warn_m_level) && defined($o_crit_m_level)) &&
(($o_warn_m_level != -1) && ($o_crit_m_level != -1) && ($o_warn_m_level >= $o_crit_m_level)) ) { (($o_warn_m_level != -1) && ($o_crit_m_level != -1) && ($o_warn_m_level >= $o_crit_m_level)) ) {
nagios_exit($phpfpm,"UNKNOWN","Check warning and critical values for MaxProcesses (2nd part of threshold), warning level must be < crit level!"); nagios_exit($phpfpm,"UNKNOWN","Check warning and critical values for MaxProcesses (3rd part of threshold), warning level must be < crit level!");
} }
if ((defined($o_warn_q_level) && defined($o_crit_q_level)) && if ((defined($o_warn_q_level) && defined($o_crit_q_level)) &&
(($o_warn_q_level != -1) && ($o_crit_q_level != -1) && ($o_warn_q_level >= $o_crit_q_level)) ) { (($o_warn_q_level != -1) && ($o_crit_q_level != -1) && ($o_warn_q_level >= $o_crit_q_level)) ) {
nagios_exit($phpfpm,"UNKNOWN","Check warning and critical values for MaxQueue (3rd part of threshold), warning level must be < crit level!"); nagios_exit($phpfpm,"UNKNOWN","Check warning and critical values for MaxQueue (4th part of threshold), warning level must be < crit level!");
} }
# Check compulsory attributes # Check compulsory attributes
if (!defined($o_host)) { if (!defined($o_host)) {
@ -654,6 +662,9 @@ if ($response->is_success) {
if (defined($o_crit_p_level) && (-1!=$o_crit_p_level) && ($IdleProcesses <= $o_crit_p_level)) { if (defined($o_crit_p_level) && (-1!=$o_crit_p_level) && ($IdleProcesses <= $o_crit_p_level)) {
nagios_exit($phpfpm,"CRITICAL", "Idle workers are critically low " . $InfoData,$PerfData); nagios_exit($phpfpm,"CRITICAL", "Idle workers are critically low " . $InfoData,$PerfData);
} }
if (defined($o_crit_a_level) && (-1!=$o_crit_a_level) && ($ActiveProcesses >= $o_crit_a_level)) {
nagios_exit($phpfpm,"CRITICAL", "Active workers are critically high " . $InfoData,$PerfData);
}
# Then WARNING exits by priority # Then WARNING exits by priority
if (defined($o_warn_q_level) && (-1!=$o_warn_q_level) && ($MaxListenQueueNew >= $o_warn_q_level)) { if (defined($o_warn_q_level) && (-1!=$o_warn_q_level) && ($MaxListenQueueNew >= $o_warn_q_level)) {
nagios_exit($phpfpm,"WARNING", "Max queue reached is high " . $InfoData,$PerfData); nagios_exit($phpfpm,"WARNING", "Max queue reached is high " . $InfoData,$PerfData);
@ -664,6 +675,9 @@ if ($response->is_success) {
if (defined($o_warn_p_level) && (-1!=$o_warn_p_level) && ($IdleProcesses <= $o_warn_p_level)) { if (defined($o_warn_p_level) && (-1!=$o_warn_p_level) && ($IdleProcesses <= $o_warn_p_level)) {
nagios_exit($phpfpm,"WARNING", "Idle workers are low " . $InfoData,$PerfData); nagios_exit($phpfpm,"WARNING", "Idle workers are low " . $InfoData,$PerfData);
} }
if (defined($o_warn_a_level) && (-1!=$o_warn_a_level) && ($ActiveProcesses >= $o_warn_a_level)) {
nagios_exit($phpfpm,"WARNING", "Active workers are high " . $InfoData,$PerfData);
}
nagios_exit($phpfpm,"OK",$InfoData,$PerfData); nagios_exit($phpfpm,"OK",$InfoData,$PerfData);
@ -733,3 +747,5 @@ sub header() {
} }
return 0; return 0;
} }