autosysadmin-agent: release 24.06
All checks were successful
Ansible Lint |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |3127|0|3127|0|:zzz:
gitea/ansible-roles/pipeline/head This commit looks good

This commit is contained in:
Jérémy Lecour 2024-06-17 18:20:49 +02:00 committed by Jérémy Lecour
parent 9e63ae90c8
commit ab452d9800
Signed by: jlecour
SSH key fingerprint: SHA256:h+5LgHRKwN9lS0SsdVR5yZPeFlJE4Mt+8UtL4CcP8dY
11 changed files with 862 additions and 67 deletions

View file

@ -24,6 +24,7 @@ The **patch** part is incremented if multiple releases happen the same month
### Changed
* autosysadmin-agent: release 24.06
* Elastic Stack : default to version 8.x
* evolinux-base: Customize logcheck recipient when serveur-base is installed
* log2mail: task log2mail.yml of evolinux-base converted to a role

View file

@ -1,6 +1,6 @@
#!/bin/bash
VERSION="24.03.2"
VERSION="24.06"
# Common functions for "repair" and "restart" scripts
@ -420,7 +420,7 @@ ensure_not_too_soon_or_exit() {
if [ -f "${lastrun_file}" ]; then
lastrun_age="$(($(date +%s)-$(stat -c "%Y" "${lastrun_file}")))"
log_run "Last run was ${lastrun_age} seconds ago."
if [ "${lastrun_age}" -lt 1800 ]; then
if [ "${lastrun_age}" -lt 900 ]; then
if is_interactive; then
echo "${PROGNAME} was run ${lastrun_age} seconds ago."
answer=""

View file

@ -13,37 +13,41 @@ service_name="apache2"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# For Apache, it's acceptable to restart even if systemd status is OK
log_all "${service} is active. Try to stop/start anyway."
fi
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# check syntax
if apache2ctl -t > /dev/null 2>&1; then
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
date=$(LANG=en_US.UTF-8 date '+%b %d')
grep "${date}" /home/*/log/error.log /var/log/apache2/*error.log \
| grep -v \
-e "Got error 'PHP message:" \
-e "No matching DirectoryIndex" \
-e "client denied by server configuration" \
-e "server certificate does NOT include an ID which matches the server name" \
| save_in_log_dir "apache-errors.log"
# check syntax
if apache2ctl -t > /dev/null 2>&1; then
# Try to stop then start
### NOTE: `systemd restart apache2` is also doing stop then start, but without delay.
### TODO: consider verifying that the process is really stopped, otherwise kill it.
timeout 20 systemctl stop "${service}" > /dev/null
sleep 5
timeout 20 systemctl start "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: skip (invalid configuration)"
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
date=$(LANG=en_US.UTF-8 date '+%b %d')
grep --no-messages "${date}" /home/*/log/error.log /var/log/apache2/*error.log \
| grep --invert-match \
--regexp "Got error 'PHP message:" \
--regexp "No matching DirectoryIndex" \
--regexp "client denied by server configuration" \
--regexp "server certificate does NOT include an ID which matches the server name" \
| save_in_log_dir "apache-errors.log"
else
log_action "Restart ${service_name}: skip (invalid configuration)"
fi
else
log_all "${service} is disabled (or missing). Skip."
@ -56,30 +60,34 @@ service_name="nginx"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# For Nginx, it's acceptable to restart even if systemd status is OK
log_all "${service} is active. Try to stop/start."anyway
fi
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# check syntax
if nginx -t > /dev/null 2>&1; then
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
### Consider doing for Nginx the same as Apache
# check syntax
if nginx -t > /dev/null 2>&1; then
# Try to stop/start
### NOTE: `systemd restart nginx` is also doing stop then start, but without delay.
### TODO: consider verifying that the process is really stopped, otherwise kill it.
timeout 20 systemctl stop "${service}" > /dev/null
sleep 5
timeout 20 systemctl start "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: skip (invalid configuration)"
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
### Consider doing for Nginx the same as Apache
else
log_action "Restart ${service_name}: skip (invalid configuration)"
fi
else
log_all "${service} is disabled (or missing). Skip."
@ -103,23 +111,24 @@ if [ -n "${fpm_services}" ]; then
service_name="${service//.service/}"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
log_all "${service} is active. Try to stop/start anyway."
fi
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to stop/start
timeout 20 systemctl stop "${service}" > /dev/null
sleep 5
timeout 20 systemctl start "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
else
log_all "${service} is disabled (or missing). Skip."
fi

View file

@ -0,0 +1,65 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="nwh-fr"
list_hprof_files() {
find /srv/oomdumps -name '*.hprof' -printf "%T+ %p\n" | sort -h | awk '{ print $2 }'
}
check_disk() {
/usr/lib/nagios/plugins/check_disk -c 20% -K 20% -p /srv/oomdumps > /dev/null
}
is_status_ok() {
## this must return 0 if the service is alive, otherwise return 1
check_disk
rc=$?
test $rc -lt 2
}
oomdumps_action() {
# Save service status before restart
check_disk | save_in_log_dir "du_srv_oomdumps.before.status"
nb_run=0
max_run=100
while ! is_status_ok; do
if [ ${nb_run} -lt ${max_run} ]; then
oldest_hprof_file=$(list_hprof_files | head -n 1)
if [ -n "${oldest_hprof_file}" ]; then
rm -f "${oldest_hprof_file}"
log_action "File ${oldest_hprof_file} deleted."
(( nb_run += 1 ))
else
log_action "No hprof file found. Abort."
break
fi
else
log_action "Status is still critical after ${max_run} files deleted. Abort."
break
fi
done
# Save service status after restart
check_disk | save_in_log_dir "du_srv_oomdumps.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_status_ok; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, let's do the action
pre_restart
oomdumps_action
post_restart

View file

@ -0,0 +1,118 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="always"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## The name of the service, mainly for logging
service_name="hapee-extras-vrrp"
## The systemd service name
systemd_service="${service_name}.service"
is_ping_ok() {
/usr/lib/nagios/plugins/check_ping -H "$1" -w 2,20% -c 5,50% > /dev/null
test $? -ge 2
}
## Action for systemd system
systemd_action() {
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
# so we check the status explicitly
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
&& sleep 1 \
&& systemctl status "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
if ! is_systemd_enabled "${systemd_service}"; then
# log_global "${service_name} is disabled (or missing), nothing left to do."
exit 0
fi
# if is_systemd_active "${systemd_service}"; then
# # log_global "${service_name} is active, nothing left to do."
# exit 0
# fi
# Let's finally do the action
pre_restart
vip="54.37.170.194"
vip_peer1="delubac-delupay-pp-lb00"
wan_ip1="54.37.170.195"
lan_ip1="172.19.1.20"
vip_peer2="delubac-delupay-pp-lb01"
wan_ip2="54.37.170.196"
lan_ip2="172.19.1.21"
/usr/lib/nagios/plugins/check_ping -H "${vip}" -w 2,20% -c 5,50% > /dev/null
ping_vip_rc=$?
if ! is_ping_ok "${vip}"; then
if is_ping_ok "${wan_ip1}"; then
ssh "${lan_ip1}" -t "systemctl restart ${systemd_service}"
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name} on ${vip_peer1}: OK"
else
log_action "Restart ${service_name} on ${vip_peer1}: failed"
fi
fi
fi
sleep 2
if ! is_ping_ok "${vip}"; then
if is_ping_ok "${wan_ip2}"; then
ssh "${lan_ip2}" -t "systemctl restart ${systemd_service}"
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name} on ${vip_peer2}: OK"
else
log_action "Restart ${service_name} on ${vip_peer2}: failed"
fi
fi
fi
post_restart

View file

@ -0,0 +1,112 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="always"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## The name of the service, mainly for logging
service_name="redis"
## The SysVinit script name
sysvinit_script="${service_name}"
## The systemd service name
systemd_service="${service_name}.service"
is_service_alive() {
uuid="autosysadmin-restart-redis-$(dbus-uuidgen)"
timeout 10 redis-cli set "$uuid" "$uuid" 2>&1 > /dev/null
rc=$?
timeout 10 redis-cli get "$uuid" 2>&1 > /dev/null
rc=$(($? > 0 ? $? : rc))
timeout 10 redis-cli del "$uuid" 2>&1 > /dev/null
rc=$(($? > 0 ? $? : rc))
return $rc
}
## Action for SysVinit system
sysvinit_action() {
# Save service status before restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
}
## Action for systemd system
systemd_action() {
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
# so we check the status explicitly
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
&& sleep 1 \
&& systemctl status "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, so check for sysvinit or systemd
if is_debian_version "8" "<="; then
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
# log_global "${service_name} not enabled. Aborting"
exit 0
fi
# Let's finally do the action
pre_restart
sysvinit_action
post_restart
else
if ! is_systemd_enabled "${systemd_service}"; then
# log_global "${service_name} is disabled (or missing), nothing left to do."
exit 0
fi
if is_systemd_active "${systemd_service}"; then
# log_global "${service_name} is active, nothing left to do."
exit 0
fi
# Let's finally do the action
pre_restart
systemd_action
post_restart
fi

View file

@ -0,0 +1,89 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## The name of the service, mainly for logging
service_name="tomcat"
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
/usr/local/lib/nagios/plugins/check_tomcat_instance.sh
rc=$?
test $rc -ge 2
}
failed_instances() {
# Get the multi-instances check result
# … skip the first line
# … show only failed instances
# … and print the instance name only
/usr/local/lib/nagios/plugins/check_tomcat_instance.sh \
| tail --lines +2 \
| grep CRITICAL \
| awk '{print $3}'
}
## Action for tomcat instance
tomcat_action() {
instance_name=${1}
timeout=60
# Save service status before restart
timeout ${timeout} sudo -i -u "${instance_name}" systemctl status --user tomcat | save_in_log_dir "${service_name}-${instance_name}.before.status"
# Try to stop
timeout ${timeout} sudo -i -u "${instance_name}" systemctl stop --user tomcat
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Stop ${service_name}-${instance_name}: OK"
else
log_action "Stop ${service_name}-${instance_name}: failed."
sudo -i -u "${instance_name}" systemctl kill --user tomcat
log_action "Kill ${service_name}-${instance_name}."
sleep 5
fi
# Try to start
timeout ${timeout} sudo -i -u "${instance_name}" systemctl start --user tomcat
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Start ${service_name}-${instance_name}: OK"
else
log_action "Start ${service_name}-${instance_name}: failed"
fi
# Save service status after restart
timeout ${timeout} sudo -i -u "${instance_name}" systemctl status --user tomcat | save_in_log_dir "${service_name}-${instance_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Let's finally do the action
for instance in $(failed_instances);
do
pre_restart
tomcat_action "${instance}"
post_restart
done

View file

@ -0,0 +1,92 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## Uncomment and customize this method if you want to have a special logic :
##
## return 1 if we should not run
## return 0 if we should run
##
## Some available functions :
## is_weekend() : Saturday or Sunday
## is_holiday() : holiday in France (based on `gcal(1)`)
## is_workday() : not weekend and not holiday
## is_worktime() : work day between 9-12h and 14-18h
#
# running_custom() {
# # implement your own custom method to decide if we should run or not
# }
## The name of the service, mainly for logging
service_name="tomcat"
check_http() {
/usr/local/bin/check-local http
}
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
check_http
}
## Action for Tomcat
tomcat_action() {
# Save service status before restart
check_http | save_in_log_dir "${service_name}.before.status"
# Try to stop
timeout 120 sudo -i -u click2sell /home/click2sell/tomcat/bin/shutdown.sh
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Stop ${service_name}: OK"
else
log_action "Stop ${service_name}: failed."
tomcat_pidfile="/home/click2sell/tomcat/pid/tomcat.pid"
if [ -f ${tomcat_pidfile} ]; then
tomcat_pid=$(cat "${tomcat_pidfile}")
kill -9 "${tomcat_pid}"
log_action "Kill ${service_name}."
sleep 5
fi
fi
# Try to start
timeout 120 sudo -i -u click2sell /home/click2sell/tomcat/bin/startup.sh
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Start ${service_name}: OK"
else
log_action "Start ${service_name}: failed"
fi
# Save service status after restart
check_http | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, let's do the action
pre_restart
tomcat_action
post_restart

View file

@ -0,0 +1,120 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## Uncomment and customize this method if you want to have a special logic :
##
## return 1 if we should not run
## return 0 if we should run
##
## Some available functions :
## is_weekend() : Saturday or Sunday
## is_holiday() : holiday in France (based on `gcal(1)`)
## is_workday() : not weekend and not holiday
## is_worktime() : work day between 9-12h and 14-18h
#
# running_custom() {
# # implement your own custom method to decide if we should run or not
# }
## The name of the service, mainly for logging
service_name="example"
## The SysVinit script name
sysvinit_script="${service_name}"
## The systemd service name
systemd_service="${service_name}.service"
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
## Example:
pgrep -u USER PROCESS_NAME > /dev/null
}
## Action for SysVinit system
sysvinit_action() {
# Save service status before restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
}
## Action for systemd system
systemd_action() {
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
# so we check the status explicitly
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
&& sleep 1 \
&& systemctl status "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, so check for sysvinit or systemd
if is_debian_version "8" "<="; then
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
# log_global "${service_name} not enabled. Aborting"
exit 0
fi
# Let's finally do the action
pre_restart
sysvinit_action
post_restart
else
if ! is_systemd_enabled "${systemd_service}"; then
# log_global "${service_name} is disabled (or missing), nothing left to do."
exit 0
fi
if is_systemd_active "${systemd_service}"; then
# log_global "${service_name} is active, nothing left to do."
exit 0
fi
# Let's finally do the action
pre_restart
systemd_action
post_restart
fi

View file

@ -0,0 +1,105 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function return 0, otherwise disabled.
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## The name of the service, mainly for logging
service_name="nagios-nrpe-server"
## The SysVinit script name
sysvinit_script="${service_name}"
## The systemd service name
systemd_service="${service_name}.service"
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
## Example:
pgrep -u nagios nrpe > /dev/null
}
## Action for SysVinit system
sysvinit_action() {
# Save service status before restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
}
## Action for systemd system
systemd_action() {
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
# so we check the status explicitly
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
&& sleep 1 \
&& systemctl status "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, so check for sysvinit or systemd
if is_debian_version "8" "<="; then
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
# log_global "${service_name} not enabled. Aborting"
exit 0
fi
# Let's finally do the action
pre_restart
sysvinit_action
post_restart
else
if ! is_systemd_enabled "${systemd_service}"; then
# log_global "${service_name} is disabled (or missing), nothing left to do."
exit 0
fi
if is_systemd_active "${systemd_service}"; then
# log_global "${service_name} is active, nothing left to do."
exit 0
fi
# Let's finally do the action
pre_restart
systemd_action
post_restart
fi

View file

@ -0,0 +1,84 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## The name of the service, mainly for logging
service_name="tomcat"
## The SysVinit script name
#sysvinit_script="${service_name}"
## The systemd service name
#systemd_service="${service_name}.service"
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
critical_instances=$(/usr/local/lib/nagios/plugins/check_tomcat_instance.sh | grep CRITICAL | awk '{print $3}' | grep -v '^[0-9]')
if [ -n "${critical_instances}" ]; then
return 1
else
return 0
fi
}
## Action for tomcat instance
tomcat_action() {
# $1: tomcat instance name
instance="${1:?}"
instance_homedir="$(getent passwd "${user}" | cut -d ':' -f 6)"
# Attempt at a sanety check, at least to make sure that this isn't empty.
instance_homedir="${instance_homedir:-/srv/tomcat/${instance}}"
# Save service status before restart
timeout 40 sudo -u "${instance}" -- "${instance_homedir}/bin/status.sh" | save_in_log_dir "${service_name}.before.status"
# Try to stop
timeout 40 sudo -u "${instance}" -- /bin/bash "${instance_homedir}/bin/shutdown.sh"
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Stop ${service_name}: OK"
else
log_action "Stop ${service_name}: failed"
fi
# Try to start
timeout 40 sudo -u "${instance}" -- /bin/bash "${instance_homedir}/bin/startup.sh"
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Start ${service_name}: OK"
else
log_action "Start ${service_name}: failed"
fi
# Save service status after restart
timeout 40 sudo -u "${instance}" -- /bin/bash "${instance_homedir}/bin/status.sh" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Let's finally do the action
for instance in $( /usr/local/lib/nagios/plugins/check_tomcat_instance.sh | grep CRITICAL | awk '{print $3}' | grep -v '^[0-9]' ) ;
do
pre_restart
tomcat_action "${instance}"
post_restart
done