autosysadmin-agent: release 24.06
This commit is contained in:
parent
9e63ae90c8
commit
ab452d9800
11 changed files with 862 additions and 67 deletions
|
@ -24,6 +24,7 @@ The **patch** part is incremented if multiple releases happen the same month
|
|||
|
||||
### Changed
|
||||
|
||||
* autosysadmin-agent: release 24.06
|
||||
* Elastic Stack : default to version 8.x
|
||||
* evolinux-base: Customize logcheck recipient when serveur-base is installed
|
||||
* log2mail: task log2mail.yml of evolinux-base converted to a role
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#!/bin/bash
|
||||
|
||||
VERSION="24.03.2"
|
||||
VERSION="24.06"
|
||||
|
||||
# Common functions for "repair" and "restart" scripts
|
||||
|
||||
|
@ -420,7 +420,7 @@ ensure_not_too_soon_or_exit() {
|
|||
if [ -f "${lastrun_file}" ]; then
|
||||
lastrun_age="$(($(date +%s)-$(stat -c "%Y" "${lastrun_file}")))"
|
||||
log_run "Last run was ${lastrun_age} seconds ago."
|
||||
if [ "${lastrun_age}" -lt 1800 ]; then
|
||||
if [ "${lastrun_age}" -lt 900 ]; then
|
||||
if is_interactive; then
|
||||
echo "${PROGNAME} was run ${lastrun_age} seconds ago."
|
||||
answer=""
|
||||
|
|
|
@ -13,37 +13,41 @@ service_name="apache2"
|
|||
|
||||
if is_systemd_enabled "${service}"; then
|
||||
if is_systemd_active "${service}"; then
|
||||
log_all "${service} is active. Skip."
|
||||
else
|
||||
# Save service status before restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
|
||||
# For Apache, it's acceptable to restart even if systemd status is OK
|
||||
log_all "${service} is active. Try to stop/start anyway."
|
||||
fi
|
||||
# Save service status before restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# check syntax
|
||||
if apache2ctl -t > /dev/null 2>&1; then
|
||||
# Try to restart
|
||||
timeout 20 systemctl restart "${service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
|
||||
|
||||
# Save error logs
|
||||
date=$(LANG=en_US.UTF-8 date '+%b %d')
|
||||
grep "${date}" /home/*/log/error.log /var/log/apache2/*error.log \
|
||||
| grep -v \
|
||||
-e "Got error 'PHP message:" \
|
||||
-e "No matching DirectoryIndex" \
|
||||
-e "client denied by server configuration" \
|
||||
-e "server certificate does NOT include an ID which matches the server name" \
|
||||
| save_in_log_dir "apache-errors.log"
|
||||
# check syntax
|
||||
if apache2ctl -t > /dev/null 2>&1; then
|
||||
# Try to stop then start
|
||||
### NOTE: `systemd restart apache2` is also doing stop then start, but without delay.
|
||||
### TODO: consider verifying that the process is really stopped, otherwise kill it.
|
||||
timeout 20 systemctl stop "${service}" > /dev/null
|
||||
sleep 5
|
||||
timeout 20 systemctl start "${service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: skip (invalid configuration)"
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
|
||||
|
||||
# Save error logs
|
||||
date=$(LANG=en_US.UTF-8 date '+%b %d')
|
||||
grep --no-messages "${date}" /home/*/log/error.log /var/log/apache2/*error.log \
|
||||
| grep --invert-match \
|
||||
--regexp "Got error 'PHP message:" \
|
||||
--regexp "No matching DirectoryIndex" \
|
||||
--regexp "client denied by server configuration" \
|
||||
--regexp "server certificate does NOT include an ID which matches the server name" \
|
||||
| save_in_log_dir "apache-errors.log"
|
||||
else
|
||||
log_action "Restart ${service_name}: skip (invalid configuration)"
|
||||
fi
|
||||
else
|
||||
log_all "${service} is disabled (or missing). Skip."
|
||||
|
@ -56,30 +60,34 @@ service_name="nginx"
|
|||
|
||||
if is_systemd_enabled "${service}"; then
|
||||
if is_systemd_active "${service}"; then
|
||||
log_all "${service} is active. Skip."
|
||||
else
|
||||
# Save service status before restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
|
||||
# For Nginx, it's acceptable to restart even if systemd status is OK
|
||||
log_all "${service} is active. Try to stop/start."anyway
|
||||
fi
|
||||
# Save service status before restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# check syntax
|
||||
if nginx -t > /dev/null 2>&1; then
|
||||
# Try to restart
|
||||
timeout 20 systemctl restart "${service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
|
||||
|
||||
# Save error logs
|
||||
### Consider doing for Nginx the same as Apache
|
||||
# check syntax
|
||||
if nginx -t > /dev/null 2>&1; then
|
||||
# Try to stop/start
|
||||
### NOTE: `systemd restart nginx` is also doing stop then start, but without delay.
|
||||
### TODO: consider verifying that the process is really stopped, otherwise kill it.
|
||||
timeout 20 systemctl stop "${service}" > /dev/null
|
||||
sleep 5
|
||||
timeout 20 systemctl start "${service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: skip (invalid configuration)"
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
|
||||
|
||||
# Save error logs
|
||||
### Consider doing for Nginx the same as Apache
|
||||
else
|
||||
log_action "Restart ${service_name}: skip (invalid configuration)"
|
||||
fi
|
||||
else
|
||||
log_all "${service} is disabled (or missing). Skip."
|
||||
|
@ -103,23 +111,24 @@ if [ -n "${fpm_services}" ]; then
|
|||
service_name="${service//.service/}"
|
||||
if is_systemd_enabled "${service}"; then
|
||||
if is_systemd_active "${service}"; then
|
||||
log_all "${service} is active. Skip."
|
||||
else
|
||||
# Save service status before restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
timeout 20 systemctl restart "${service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
|
||||
log_all "${service} is active. Try to stop/start anyway."
|
||||
fi
|
||||
# Save service status before restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to stop/start
|
||||
timeout 20 systemctl stop "${service}" > /dev/null
|
||||
sleep 5
|
||||
timeout 20 systemctl start "${service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
|
||||
else
|
||||
log_all "${service} is disabled (or missing). Skip."
|
||||
fi
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="nwh-fr"
|
||||
|
||||
list_hprof_files() {
|
||||
find /srv/oomdumps -name '*.hprof' -printf "%T+ %p\n" | sort -h | awk '{ print $2 }'
|
||||
}
|
||||
check_disk() {
|
||||
/usr/lib/nagios/plugins/check_disk -c 20% -K 20% -p /srv/oomdumps > /dev/null
|
||||
}
|
||||
is_status_ok() {
|
||||
## this must return 0 if the service is alive, otherwise return 1
|
||||
check_disk
|
||||
rc=$?
|
||||
|
||||
test $rc -lt 2
|
||||
}
|
||||
|
||||
oomdumps_action() {
|
||||
# Save service status before restart
|
||||
check_disk | save_in_log_dir "du_srv_oomdumps.before.status"
|
||||
|
||||
nb_run=0
|
||||
max_run=100
|
||||
|
||||
while ! is_status_ok; do
|
||||
if [ ${nb_run} -lt ${max_run} ]; then
|
||||
oldest_hprof_file=$(list_hprof_files | head -n 1)
|
||||
if [ -n "${oldest_hprof_file}" ]; then
|
||||
rm -f "${oldest_hprof_file}"
|
||||
log_action "File ${oldest_hprof_file} deleted."
|
||||
(( nb_run += 1 ))
|
||||
else
|
||||
log_action "No hprof file found. Abort."
|
||||
break
|
||||
fi
|
||||
else
|
||||
log_action "Status is still critical after ${max_run} files deleted. Abort."
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
# Save service status after restart
|
||||
check_disk | save_in_log_dir "du_srv_oomdumps.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_status_ok; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Yes we do, let's do the action
|
||||
pre_restart
|
||||
oomdumps_action
|
||||
post_restart
|
|
@ -0,0 +1,118 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="always"
|
||||
|
||||
## Possible values for RUNNING :
|
||||
## never => disabled
|
||||
## always => enabled
|
||||
## nwh-fr => enabled during non-working-hours in France
|
||||
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
|
||||
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
|
||||
|
||||
## The name of the service, mainly for logging
|
||||
service_name="hapee-extras-vrrp"
|
||||
## The systemd service name
|
||||
systemd_service="${service_name}.service"
|
||||
|
||||
is_ping_ok() {
|
||||
/usr/lib/nagios/plugins/check_ping -H "$1" -w 2,20% -c 5,50% > /dev/null
|
||||
|
||||
test $? -ge 2
|
||||
}
|
||||
|
||||
## Action for systemd system
|
||||
systemd_action() {
|
||||
# Save service status before restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
|
||||
# so we check the status explicitly
|
||||
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
|
||||
&& sleep 1 \
|
||||
&& systemctl status "${systemd_service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_service_alive; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
|
||||
if ! is_systemd_enabled "${systemd_service}"; then
|
||||
# log_global "${service_name} is disabled (or missing), nothing left to do."
|
||||
exit 0
|
||||
fi
|
||||
# if is_systemd_active "${systemd_service}"; then
|
||||
# # log_global "${service_name} is active, nothing left to do."
|
||||
# exit 0
|
||||
# fi
|
||||
|
||||
# Let's finally do the action
|
||||
pre_restart
|
||||
|
||||
|
||||
|
||||
vip="54.37.170.194"
|
||||
|
||||
vip_peer1="delubac-delupay-pp-lb00"
|
||||
wan_ip1="54.37.170.195"
|
||||
lan_ip1="172.19.1.20"
|
||||
|
||||
vip_peer2="delubac-delupay-pp-lb01"
|
||||
wan_ip2="54.37.170.196"
|
||||
lan_ip2="172.19.1.21"
|
||||
|
||||
/usr/lib/nagios/plugins/check_ping -H "${vip}" -w 2,20% -c 5,50% > /dev/null
|
||||
ping_vip_rc=$?
|
||||
|
||||
if ! is_ping_ok "${vip}"; then
|
||||
|
||||
if is_ping_ok "${wan_ip1}"; then
|
||||
ssh "${lan_ip1}" -t "systemctl restart ${systemd_service}"
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name} on ${vip_peer1}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name} on ${vip_peer1}: failed"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
sleep 2
|
||||
|
||||
if ! is_ping_ok "${vip}"; then
|
||||
|
||||
if is_ping_ok "${wan_ip2}"; then
|
||||
ssh "${lan_ip2}" -t "systemctl restart ${systemd_service}"
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name} on ${vip_peer2}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name} on ${vip_peer2}: failed"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
|
||||
post_restart
|
112
autosysadmin-agent/files/upstream/restart/examples/restart_redis
Normal file
112
autosysadmin-agent/files/upstream/restart/examples/restart_redis
Normal file
|
@ -0,0 +1,112 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="always"
|
||||
|
||||
## Possible values for RUNNING :
|
||||
## never => disabled
|
||||
## always => enabled
|
||||
## nwh-fr => enabled during non-working-hours in France
|
||||
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
|
||||
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
|
||||
|
||||
## The name of the service, mainly for logging
|
||||
service_name="redis"
|
||||
## The SysVinit script name
|
||||
sysvinit_script="${service_name}"
|
||||
## The systemd service name
|
||||
systemd_service="${service_name}.service"
|
||||
|
||||
is_service_alive() {
|
||||
uuid="autosysadmin-restart-redis-$(dbus-uuidgen)"
|
||||
|
||||
timeout 10 redis-cli set "$uuid" "$uuid" 2>&1 > /dev/null
|
||||
rc=$?
|
||||
timeout 10 redis-cli get "$uuid" 2>&1 > /dev/null
|
||||
rc=$(($? > 0 ? $? : rc))
|
||||
timeout 10 redis-cli del "$uuid" 2>&1 > /dev/null
|
||||
rc=$(($? > 0 ? $? : rc))
|
||||
|
||||
return $rc
|
||||
}
|
||||
|
||||
## Action for SysVinit system
|
||||
sysvinit_action() {
|
||||
# Save service status before restart
|
||||
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
## Action for systemd system
|
||||
systemd_action() {
|
||||
# Save service status before restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
|
||||
# so we check the status explicitly
|
||||
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
|
||||
&& sleep 1 \
|
||||
&& systemctl status "${systemd_service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_service_alive; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Yes we do, so check for sysvinit or systemd
|
||||
if is_debian_version "8" "<="; then
|
||||
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
|
||||
# log_global "${service_name} not enabled. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
pre_restart
|
||||
sysvinit_action
|
||||
post_restart
|
||||
else
|
||||
if ! is_systemd_enabled "${systemd_service}"; then
|
||||
# log_global "${service_name} is disabled (or missing), nothing left to do."
|
||||
exit 0
|
||||
fi
|
||||
if is_systemd_active "${systemd_service}"; then
|
||||
# log_global "${service_name} is active, nothing left to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
pre_restart
|
||||
systemd_action
|
||||
post_restart
|
||||
fi
|
|
@ -0,0 +1,89 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="nwh-fr"
|
||||
|
||||
## Possible values for RUNNING :
|
||||
## never => disabled
|
||||
## always => enabled
|
||||
## nwh-fr => enabled during non-working-hours in France
|
||||
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
|
||||
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
|
||||
|
||||
## The name of the service, mainly for logging
|
||||
service_name="tomcat"
|
||||
|
||||
is_service_alive() {
|
||||
## this must return 0 if the service is alive, otherwise return 1
|
||||
/usr/local/lib/nagios/plugins/check_tomcat_instance.sh
|
||||
rc=$?
|
||||
test $rc -ge 2
|
||||
}
|
||||
failed_instances() {
|
||||
# Get the multi-instances check result
|
||||
# … skip the first line
|
||||
# … show only failed instances
|
||||
# … and print the instance name only
|
||||
|
||||
/usr/local/lib/nagios/plugins/check_tomcat_instance.sh \
|
||||
| tail --lines +2 \
|
||||
| grep CRITICAL \
|
||||
| awk '{print $3}'
|
||||
}
|
||||
|
||||
## Action for tomcat instance
|
||||
tomcat_action() {
|
||||
instance_name=${1}
|
||||
timeout=60
|
||||
|
||||
# Save service status before restart
|
||||
timeout ${timeout} sudo -i -u "${instance_name}" systemctl status --user tomcat | save_in_log_dir "${service_name}-${instance_name}.before.status"
|
||||
|
||||
# Try to stop
|
||||
timeout ${timeout} sudo -i -u "${instance_name}" systemctl stop --user tomcat
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Stop ${service_name}-${instance_name}: OK"
|
||||
else
|
||||
log_action "Stop ${service_name}-${instance_name}: failed."
|
||||
|
||||
sudo -i -u "${instance_name}" systemctl kill --user tomcat
|
||||
log_action "Kill ${service_name}-${instance_name}."
|
||||
sleep 5
|
||||
fi
|
||||
|
||||
# Try to start
|
||||
timeout ${timeout} sudo -i -u "${instance_name}" systemctl start --user tomcat
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Start ${service_name}-${instance_name}: OK"
|
||||
else
|
||||
log_action "Start ${service_name}-${instance_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
timeout ${timeout} sudo -i -u "${instance_name}" systemctl status --user tomcat | save_in_log_dir "${service_name}-${instance_name}.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_service_alive; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
for instance in $(failed_instances);
|
||||
do
|
||||
pre_restart
|
||||
tomcat_action "${instance}"
|
||||
post_restart
|
||||
done
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="nwh-fr"
|
||||
|
||||
## Possible values for RUNNING :
|
||||
## never => disabled
|
||||
## always => enabled
|
||||
## nwh-fr => enabled during non-working-hours in France
|
||||
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
|
||||
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
|
||||
|
||||
## Uncomment and customize this method if you want to have a special logic :
|
||||
##
|
||||
## return 1 if we should not run
|
||||
## return 0 if we should run
|
||||
##
|
||||
## Some available functions :
|
||||
## is_weekend() : Saturday or Sunday
|
||||
## is_holiday() : holiday in France (based on `gcal(1)`)
|
||||
## is_workday() : not weekend and not holiday
|
||||
## is_worktime() : work day between 9-12h and 14-18h
|
||||
#
|
||||
# running_custom() {
|
||||
# # implement your own custom method to decide if we should run or not
|
||||
# }
|
||||
|
||||
## The name of the service, mainly for logging
|
||||
service_name="tomcat"
|
||||
|
||||
check_http() {
|
||||
/usr/local/bin/check-local http
|
||||
}
|
||||
|
||||
is_service_alive() {
|
||||
## this must return 0 if the service is alive, otherwise return 1
|
||||
check_http
|
||||
}
|
||||
|
||||
## Action for Tomcat
|
||||
tomcat_action() {
|
||||
# Save service status before restart
|
||||
check_http | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to stop
|
||||
timeout 120 sudo -i -u click2sell /home/click2sell/tomcat/bin/shutdown.sh
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Stop ${service_name}: OK"
|
||||
else
|
||||
log_action "Stop ${service_name}: failed."
|
||||
|
||||
tomcat_pidfile="/home/click2sell/tomcat/pid/tomcat.pid"
|
||||
if [ -f ${tomcat_pidfile} ]; then
|
||||
tomcat_pid=$(cat "${tomcat_pidfile}")
|
||||
kill -9 "${tomcat_pid}"
|
||||
log_action "Kill ${service_name}."
|
||||
sleep 5
|
||||
fi
|
||||
fi
|
||||
|
||||
# Try to start
|
||||
timeout 120 sudo -i -u click2sell /home/click2sell/tomcat/bin/startup.sh
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Start ${service_name}: OK"
|
||||
else
|
||||
log_action "Start ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
check_http | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_service_alive; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Yes we do, let's do the action
|
||||
pre_restart
|
||||
tomcat_action
|
||||
post_restart
|
|
@ -0,0 +1,120 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="nwh-fr"
|
||||
|
||||
## Possible values for RUNNING :
|
||||
## never => disabled
|
||||
## always => enabled
|
||||
## nwh-fr => enabled during non-working-hours in France
|
||||
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
|
||||
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
|
||||
|
||||
## Uncomment and customize this method if you want to have a special logic :
|
||||
##
|
||||
## return 1 if we should not run
|
||||
## return 0 if we should run
|
||||
##
|
||||
## Some available functions :
|
||||
## is_weekend() : Saturday or Sunday
|
||||
## is_holiday() : holiday in France (based on `gcal(1)`)
|
||||
## is_workday() : not weekend and not holiday
|
||||
## is_worktime() : work day between 9-12h and 14-18h
|
||||
#
|
||||
# running_custom() {
|
||||
# # implement your own custom method to decide if we should run or not
|
||||
# }
|
||||
|
||||
## The name of the service, mainly for logging
|
||||
service_name="example"
|
||||
## The SysVinit script name
|
||||
sysvinit_script="${service_name}"
|
||||
## The systemd service name
|
||||
systemd_service="${service_name}.service"
|
||||
|
||||
is_service_alive() {
|
||||
## this must return 0 if the service is alive, otherwise return 1
|
||||
## Example:
|
||||
pgrep -u USER PROCESS_NAME > /dev/null
|
||||
}
|
||||
|
||||
## Action for SysVinit system
|
||||
sysvinit_action() {
|
||||
# Save service status before restart
|
||||
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
## Action for systemd system
|
||||
systemd_action() {
|
||||
# Save service status before restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
|
||||
# so we check the status explicitly
|
||||
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
|
||||
&& sleep 1 \
|
||||
&& systemctl status "${systemd_service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_service_alive; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Yes we do, so check for sysvinit or systemd
|
||||
if is_debian_version "8" "<="; then
|
||||
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
|
||||
# log_global "${service_name} not enabled. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
pre_restart
|
||||
sysvinit_action
|
||||
post_restart
|
||||
else
|
||||
if ! is_systemd_enabled "${systemd_service}"; then
|
||||
# log_global "${service_name} is disabled (or missing), nothing left to do."
|
||||
exit 0
|
||||
fi
|
||||
if is_systemd_active "${systemd_service}"; then
|
||||
# log_global "${service_name} is active, nothing left to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
pre_restart
|
||||
systemd_action
|
||||
post_restart
|
||||
fi
|
105
autosysadmin-agent/files/upstream/restart/restart_nrpe
Executable file
105
autosysadmin-agent/files/upstream/restart/restart_nrpe
Executable file
|
@ -0,0 +1,105 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
## Possible values for RUNNING :
|
||||
## never => disabled
|
||||
## always => enabled
|
||||
## nwh-fr => enabled during non-working-hours in France
|
||||
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
|
||||
## custom => enabled if `running_custom()` function return 0, otherwise disabled.
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="nwh-fr"
|
||||
|
||||
## The name of the service, mainly for logging
|
||||
service_name="nagios-nrpe-server"
|
||||
## The SysVinit script name
|
||||
sysvinit_script="${service_name}"
|
||||
## The systemd service name
|
||||
systemd_service="${service_name}.service"
|
||||
|
||||
is_service_alive() {
|
||||
## this must return 0 if the service is alive, otherwise return 1
|
||||
## Example:
|
||||
pgrep -u nagios nrpe > /dev/null
|
||||
}
|
||||
|
||||
## Action for SysVinit system
|
||||
sysvinit_action() {
|
||||
# Save service status before restart
|
||||
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
## Action for systemd system
|
||||
systemd_action() {
|
||||
# Save service status before restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to restart
|
||||
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
|
||||
# so we check the status explicitly
|
||||
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
|
||||
&& sleep 1 \
|
||||
&& systemctl status "${systemd_service}" > /dev/null
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Restart ${service_name}: OK"
|
||||
else
|
||||
log_action "Restart ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_service_alive; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Yes we do, so check for sysvinit or systemd
|
||||
if is_debian_version "8" "<="; then
|
||||
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
|
||||
# log_global "${service_name} not enabled. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
pre_restart
|
||||
sysvinit_action
|
||||
post_restart
|
||||
else
|
||||
if ! is_systemd_enabled "${systemd_service}"; then
|
||||
# log_global "${service_name} is disabled (or missing), nothing left to do."
|
||||
exit 0
|
||||
fi
|
||||
if is_systemd_active "${systemd_service}"; then
|
||||
# log_global "${service_name} is active, nothing left to do."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
pre_restart
|
||||
systemd_action
|
||||
post_restart
|
||||
fi
|
|
@ -0,0 +1,84 @@
|
|||
#!/bin/bash
|
||||
|
||||
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
|
||||
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
|
||||
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
RUNNING="nwh-fr"
|
||||
|
||||
## Possible values for RUNNING :
|
||||
## never => disabled
|
||||
## always => enabled
|
||||
## nwh-fr => enabled during non-working-hours in France
|
||||
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
|
||||
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
|
||||
|
||||
## The name of the service, mainly for logging
|
||||
service_name="tomcat"
|
||||
## The SysVinit script name
|
||||
#sysvinit_script="${service_name}"
|
||||
## The systemd service name
|
||||
#systemd_service="${service_name}.service"
|
||||
|
||||
is_service_alive() {
|
||||
## this must return 0 if the service is alive, otherwise return 1
|
||||
critical_instances=$(/usr/local/lib/nagios/plugins/check_tomcat_instance.sh | grep CRITICAL | awk '{print $3}' | grep -v '^[0-9]')
|
||||
if [ -n "${critical_instances}" ]; then
|
||||
return 1
|
||||
else
|
||||
return 0
|
||||
fi
|
||||
}
|
||||
|
||||
## Action for tomcat instance
|
||||
tomcat_action() {
|
||||
# $1: tomcat instance name
|
||||
instance="${1:?}"
|
||||
instance_homedir="$(getent passwd "${user}" | cut -d ':' -f 6)"
|
||||
# Attempt at a sanety check, at least to make sure that this isn't empty.
|
||||
instance_homedir="${instance_homedir:-/srv/tomcat/${instance}}"
|
||||
|
||||
# Save service status before restart
|
||||
timeout 40 sudo -u "${instance}" -- "${instance_homedir}/bin/status.sh" | save_in_log_dir "${service_name}.before.status"
|
||||
|
||||
# Try to stop
|
||||
timeout 40 sudo -u "${instance}" -- /bin/bash "${instance_homedir}/bin/shutdown.sh"
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Stop ${service_name}: OK"
|
||||
else
|
||||
log_action "Stop ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Try to start
|
||||
timeout 40 sudo -u "${instance}" -- /bin/bash "${instance_homedir}/bin/startup.sh"
|
||||
rc=$?
|
||||
if [ "${rc}" -eq "0" ]; then
|
||||
log_action "Start ${service_name}: OK"
|
||||
else
|
||||
log_action "Start ${service_name}: failed"
|
||||
fi
|
||||
|
||||
# Save service status after restart
|
||||
timeout 40 sudo -u "${instance}" -- /bin/bash "${instance_homedir}/bin/status.sh" | save_in_log_dir "${service_name}.after.status"
|
||||
}
|
||||
|
||||
# Should we run?
|
||||
if ! is_supposed_to_run; then
|
||||
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
|
||||
exit 0
|
||||
fi
|
||||
if is_service_alive; then
|
||||
# log_global "${service_name} process alive. Aborting"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Let's finally do the action
|
||||
for instance in $( /usr/local/lib/nagios/plugins/check_tomcat_instance.sh | grep CRITICAL | awk '{print $3}' | grep -v '^[0-9]' ) ;
|
||||
do
|
||||
pre_restart
|
||||
tomcat_action "${instance}"
|
||||
post_restart
|
||||
done
|
||||
|
Loading…
Reference in a new issue