ansible-roles/autosysadmin/files/scripts/functions.sh

479 lines
12 KiB
Bash
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
get_system() {
uname -s
}
get_fqdn() {
if [ "$(get_system)" = "Linux" ]; then
hostname --fqdn
elif [ "$(get_system)" = "OpenBSD" ]; then
hostname
else
log_error_exit "OS not detected!"
fi
}
get_complete_hostname() {
REAL_HOSTNAME="$(get_fqdn)"
if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then
echo "${HOSTNAME}"
else
echo "${HOSTNAME} (${REAL_HOSTNAME})"
fi
}
get_evomaintenance_mail() {
email="$(grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2)"
if [[ -z "$email" ]]; then
email='alert5@evolix.fr'
fi
echo "${email}"
}
arguments="${*}"
get_argument() {
no_found=1
for argument in ${arguments} ; do
if [ "${argument}" = "${1}" ] ;
then
no_found=0
fi
done
return ${no_found}
}
internal_info() {
INTERNAL_INFO="$(printf '%b\n%s' "${INTERNAL_INFO}" "$*")"
}
log_action() {
log "Action : $*"
ACTIONS="$(printf '%s\n%s' "${ACTIONS}" "$*")"
}
log() {
INTERNAL_LOG="$(printf '%s\n%s %s %s %s' "${INTERNAL_LOG}" "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*")"
printf '%s %s %s %s\n' "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*" | tee -a "${LOG_DIR}/autosysadmin.log"
echo "$*" | /usr/bin/logger -p local0.notice -t autosysadmin."$0"
}
log_error_exit() {
log "ERROR : $*"
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: $*" --no-commit --no-mail
exit 1
}
log_check_php_fpm() {
# Extraire seulement les chiffres du nom du script exécuté
# ./repair_php_fpm81.sh ==> 81
PHP_VERSION="${0//[^0-9]/}"
PHP_PATH_POOL=$(find /var/lib/lxc/php"${PHP_VERSION}"/ -type d -name "pool.d")
/usr/local/lib/nagios/plugins/check_phpfpm_multi "${PHP_PATH_POOL}" > "${LOG_DIR}/nrpe.txt"
}
log_system_status() {
DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)"
if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then
log "Warning: dump-server-state is not present. No server state recorded...."
fi
if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then
# NOTE We don't want the logging to take too much time, so we kill it
# if it take more than 20 seconds.
timeout --signal 9 20 \
"${DUMP_SERVER_STATE_BIN}" \
--dump-dir="$LOG_DIR" \
--df \
--dmesg \
--iptables \
--lxc \
--netcfg \
--netstat \
--uname \
--processes \
--systemctl \
--uptime \
--virsh \
--disks \
--mysql-processes \
--no-apt-states \
--no-apt-config \
--no-dpkg-full \
--no-dpkg-status \
--no-mount \
--no-packages \
--no-sysctl \
--no-etc
log "System status logged in ${LOG_DIR}"
fi
}
read_log_system_status(){
files="df.txt dmesg.txt lxc-list.txt netstat-legacy.txt netstat-ss.txt pstree.txt ps.txt systemctl-failed-services.txt"
echo -e "\n\n#### Détails de dump-server-state"
for file in ${files} ; do
echo -e "\n### cat ${LOG_DIR}/${file} :"
tail -n 1000 "${LOG_DIR}"/"${file}"
done
}
ensure_no_active_users_or_exit() {
if is_debug; then return; fi
# Is there any active user ?
for user in $(LC_ALL=C who --users|awk '{print $1}'); do
idle_time="$(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}')"
for sameusertime in $(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}'); do
if is_active_user "$sameusertime"; then
hook_mail abort_active_users
log_error_exit 'At least one user was recently active. That requires human intervention. Nothing to do here!'
fi
done
done
}
is_active_user() {
# Check if a user was active in the last 30 minutes
idle_time="$1"
if [ "${idle_time}" = "old" ];
then
return 1
elif [ "${idle_time}" = "." ];
then
return 0
else
hh="$(echo "${idle_time}" | awk -F':' '{print $1}')"
mm="$(echo "${idle_time}" | awk -F':' '{print $2}')"
idle_minutes="$(( 60 * "${hh}" + "${mm}" ))"
if [ "${idle_minutes}" -ge 30 ];
then
return 1
else
return 0
fi
fi
}
is_debug() {
debug_file="/etc/evolinux/autosysadmin.debug"
if [ -e "${debug_file}" ]; then
last_change=$(stat -c %Z "${debug_file}")
limit_date=$(date --date "14400 seconds ago" +"%s")
if [ $(( last_change - limit_date )) -le "0" ]; then
rm "${debug_file}"
else
return 0
fi
fi
return 1
}
check_nrpe() {
check="$1"
list_command_nrpe=$( grep --exclude=*~ -E "\[${check}\]" -r /etc/nagios/ | grep -v '#command' )
command_nrpe_primary=$( echo "${list_command_nrpe}" | grep "/etc/nagios/nrpe.d/evolix.cfg" | cut -d'=' -f2- )
command_nrpe_secondary=$( echo "${list_command_nrpe}" | head -n1 | cut -d'=' -f2- )
if [ -z "${command_nrpe_primary}" ] && [ -z "${command_nrpe_secondary}" ]
then
return 1
else
if [ -n "${command_nrpe_primary}" ]
then
${command_nrpe_primary}
else
${command_nrpe_secondary}
fi
fi
}
acquire_lock_or_exit() {
lockfile="$1"
waittime="$2"
# si le temps dattente nest pas compréhensible par sleep(1), il vaut 0
if ! echo "${waittime}" | grep -Eq '^[0-9]+[smhd]?$'
then
waittime=0
fi
# si le temps dattente est supérieur à 0 et si le lock existe, on attend
if test "${waittime}" -gt 0 && test -f "${lockfile}"
then
sleep "${waittime}"
fi
# si le lock existe, on sarrête
if test -f "${lockfile}"
then
log_error_exit "lock file ${lockfile} exists"
fi
touch "${lockfile}"
}
is_too_soon() {
if is_debug; then return; fi
witness="/tmp/autosysadmin_witness_$(basename "$0")"
if test -f "${witness}"
then
compare="$(($(date +%s)-$(stat -c "%Y" "${witness}")))"
if [ "${compare}" -lt 1800 ];
then
log_error_exit 'already executed less than 30 minutes ago'
fi
rm "${witness}"
fi
touch "${witness}"
}
init_autosysadmin() {
PATH="${PATH}":/usr/sbin:/sbin↩
unset ACTIONS
SCRIPTNAME=$(basename "$0")
PROGNAME=${SCRIPTNAME%.sh}
RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${SCRIPTNAME}_$(openssl rand -hex 6)"
LOG_DIR="/var/log/autosysadmin/${RUN_ID}"
mkdir -p "${LOG_DIR}"
log "Autosysadmin : Script ${SCRIPTNAME} triggered"
# Detect operating system name, version and release↩
detect_os
}
load_conf() {
# Load conf and enable script by default.
# To disable script locally, set "$PROGNAME"=off in /etc/evolinux/autosysadmin.
# To disable script globally, set "$PROGNAME"=off in the script, after load_conf() call.
declare -g "$PROGNAME"=on # dynamic variable assignment ($PROGNAME == repair_*)
# Source configuration file
# shellcheck source=../roles/deploy_autosysadmin/templates/autosysadmin.cfg.j2
test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin
}
detect_os() {
# OS detection
DEBIAN_RELEASE=""
LSB_RELEASE_BIN="$(command -v lsb_release)"
if [ -e /etc/debian_version ]; then
DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)"
if [ -x "${LSB_RELEASE_BIN}" ]; then
DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)"
else
case "${DEBIAN_VERSION}" in
8) DEBIAN_RELEASE="jessie";;
9) DEBIAN_RELEASE="stretch";;
10) DEBIAN_RELEASE="buster";;
11) DEBIAN_RELEASE="bullseye";;
esac
fi
fi
}
is_debian_jessie() {
test "${DEBIAN_RELEASE}" = "jessie"
}
is_debian_stretch() {
test "${DEBIAN_RELEASE}" = "stretch"
}
is_debian_buster() {
test "${DEBIAN_RELEASE}" = "buster"
}
is_debian_bullseye() {
test "${DEBIAN_RELEASE}" = "bullseye"
}
systemd_list_service_failed() {
systemctl list-units --failed --no-legend --full --type=service "$1" |
awk '{print $1}'
}
systemd_list_units_enabled() {
list_units_enabled=$(systemctl list-unit-files --state=enabled --no-legend | awk "/$1/{print \$1}")
if [ -z "${list_units_enabled}" ]
then
return 1
else
echo "${list_units_enabled}"
fi
}
format_mail_success() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: autosysadmin@evolix.fr
Subject: [autosysadmin] Intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
### Renseignements sur l'intervention
${ACTIONS}
### Réagir à cette intervention
Vous pouvez répondre à ce message (sur l'adresse mail equipe@evolix.net).
En cas d'urgence, utilisez l'adresse maintenance@evolix.fr ou
notre téléphone portable d'astreinte (04.26.99.99.26)
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_abort_active_users() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: autosysadmin@evolix.fr
Subject: [autosysadmin] Intervention interrompue sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique a été interrompue en raison
d'un utilisateur actuellement actif sur le serveur.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
### Utilisateur(s) connecté(s)
$(w)
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_internal_info() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: autosysadmin@evolix.fr
Subject: [autosysadmin] Complements (interne) - Intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : $(basename "$0")
### Actions effectuées
${ACTIONS}
### Logs autosysadmin
${INTERNAL_LOG}
### Utilisateur(s) connecté(s)
$(w)
### Informations additionnelles données par le script $(basename "$0")
${INTERNAL_INFO}
--
Votre AutoSysadmin
EOTEMPLATE
}
hook_mail() {
if is_debug; then return; fi
HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}"
HOSTNAME_TEXT="$(get_complete_hostname)"
EMAIL_CLIENT="$(get_evomaintenance_mail)"
MAIL_CONTENT="$(format_mail_"$1")"
SENDMAIL_BIN="$(command -v sendmail)"
if [ -z "${SENDMAIL_BIN}" ]; then
log "No \`sendmail' command has been found, can't send mail."
fi
if [ -x "${SENDMAIL_BIN}" ]; then
echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.net"
fi
}
# We need stable output for gcal, so we force some language environment variables
export TZ=Europe/Paris
export LANGUAGE=fr_FR.UTF-8
is_holiday() {
# gcal mark today as a holiday by surrounding with < and > the day
# of the month of that holiday line. For exemple if today is 2022-05-01 we'll
# get among other lines:
# Fête du Travail (FR) + Di, < 1>Mai 2022
# Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours
gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet
}
is_weekend() {
day_of_week=$(date +%u)
if [ "$day_of_week" != 6 ] && [ "$day_of_week" != 7 ]; then
return 1
fi
}
is_workday() {
if is_holiday || is_weekend; then
return 1
fi
}
is_worktime() {
if ! is_workday; then
return 1
fi
hour=$(date +%H)
if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then
return 1
fi
}