autosysadmin-agent: upstream release 24.02.3
All checks were successful
Ansible Lint |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |2706|22|2684|5|:-1: Reference build: <a href="https://jenkins.evolix.org/job/gitea/job/ansible-roles/job/unstable/11//ansiblelint">Evolix » ansible-roles » unstable #11</a>
gitea/ansible-roles/pipeline/head This commit looks good

This commit is contained in:
Jérémy Lecour 2024-02-28 15:40:39 +01:00 committed by Jérémy Lecour
parent bec868009c
commit b2e22413bc
Signed by: jlecour
SSH key fingerprint: SHA256:h+5LgHRKwN9lS0SsdVR5yZPeFlJE4Mt+8UtL4CcP8dY
67 changed files with 2133 additions and 1895 deletions

View file

@ -13,6 +13,7 @@ The **patch** part changes is incremented if multiple releases happen the same m
### Added
* autosysadmin-agent: upstream release 24.02.3
* certbot: Renewal hook for NRPE
* kvm-host: add minifirewall rules if DRBD interface is configured
@ -27,6 +28,8 @@ The **patch** part changes is incremented if multiple releases happen the same m
### Removed
* autosysadmin: replaced by autosysadmin-agent
### Security
## [24.02] 2024-02-08

View file

@ -0,0 +1,17 @@
---
general_scripts_dir: "/usr/share/scripts"
autosysadmin_agent_bin_dir: "/usr/local/bin/autosysadmin"
autosysadmin_agent_lib_dir: "/usr/local/lib/autosysadmin"
autosysadmin_agent_auto_dir: "{{ general_scripts_dir }}/autosysadmin/auto"
autosysadmin_agent_crontab_enabled: true
autosysadmin_agent_log_retention_days: 365
autosysadmin_config: []
### All repair are disabled if set to 'off'
### even if a specific repair value is 'on'
# repair_all: 'on'
### Default values for checks
# repair_foo: 'off'

View file

@ -0,0 +1,25 @@
#!/bin/bash
days=${1:-365}
log_dir="/var/log/autosysadmin/"
if [ -d "${log_dir}" ]; then
find_run_dirs() {
find "${log_dir}" \
-mindepth 1 \
-maxdepth 1 \
-type d \
-ctime "+${days}" \
-print0
}
log() {
/usr/bin/logger -p local0.notice -t autosysadmin "${1}"
}
while IFS= read -r -d '' run_dir; do
rm --recursive --force "${run_dir}"
log "Delete ${run_dir} (older than ${days} days)"
done < <(find_run_dirs)
fi
exit 0

View file

@ -0,0 +1,898 @@
#!/bin/bash
VERSION="24.02.3"
# Common functions for "repair" and "restart" scripts
set -u
# Initializes the program, context, configuration…
initialize() {
PATH="${PATH}":/usr/sbin:/sbin
# Used in many places to refer to the program name.
# Examples: repair_mysql, restart_nrpe…
PROGNAME=$(basename "${0}")
# find out if running in interactive mode, or not
if [ -t 0 ]; then
INTERACTIVE=1
else
INTERACTIVE=0
fi
readonly INTERACTIVE
# Default empty value for Debug mode
DEBUG="${DEBUG:-""}"
# Repair scripts obey to the value of a variable named after the script
# You can set the value ("on" or "off") in /etc/evolinux/autosysadmin
# Here we set the default value to "on".
declare -g "${PROGNAME}"=on # dynamic variable assignment ($PROGNAME == repair_*)
PID=$$
readonly PID
# Each execution (run) gets a unique ID
RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${PROGNAME}_${PID}"
readonly RUN_ID
# Each execution store some information
# in a unique directory based on the RUN_ID
LOG_DIR="/var/log/autosysadmin/${RUN_ID}"
readonly LOG_DIR
mkdir -p "${LOG_DIR}"
# This log file contains all events
LOG_FILE="${LOG_DIR}/autosysadmin.log"
readonly LOG_FILE
# This log file contains notable actions
ACTIONS_FILE="${LOG_DIR}/actions.log"
readonly ACTIONS_FILE
touch "${ACTIONS_FILE}"
# This log file contains abort reasons (if any)
ABORT_FILE="${LOG_DIR}/abort.log"
readonly ABORT_FILE
# touch "${ABORT_FILE}"
# Date format for log messages
DATE_FORMAT="%Y-%m-%d %H:%M:%S"
# This will contain lock, last-run markers…
# It's ok to lose the content after a reboot
RUN_DIR="/run/autosysadmin"
readonly RUN_DIR
mkdir -p "${RUN_DIR}"
# Only a singe instace of each script can run simultaneously
# We use a customizable lock name for this.
# By default it's the script's name
LOCK_NAME=${LOCK_NAME:-${PROGNAME}}
# If a lock is found, we can wait for it to disappear.
# The value must be understood by sleep(1)
LOCK_WAIT="0"
# Default values for email headers
EMAIL_FROM="equipe+autosysadmin@evolix.fr"
EMAIL_INTERNAL="autosysadmin@evolix.fr"
LOCK_FILE="${RUN_DIR}/${LOCK_NAME}.lock"
readonly LOCK_FILE
# Remove lock file at exit
cleanup() {
# shellcheck disable=SC2317
rm -f "${LOCK_FILE}"
}
trap 'cleanup' 0
# Load configuration
# shellcheck disable=SC1091
test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin
log_all "Begin ${PROGNAME} RUN_ID: ${RUN_ID}"
log_all "Log directory is ${LOG_DIR}"
}
# Executes a list of tasks before exiting:
# * prepare a summary of actions and possible abort reasons
# * send emails
# * do some cleanup
quit() {
log_all "End ${PROGNAME} RUN_ID: ${RUN_ID}"
summary="RUN_ID: ${RUN_ID}"
if [ -s "${ABORT_FILE}" ]; then
# Add abort reasons to summary
summary="${summary}\n$(print_abort_reasons)"
hook_mail "abort"
return_code=1
else
if [ -s "${ACTIONS_FILE}" ]; then
# Add notable actions to summary
summary="${summary}\n$(print_actions "Aucune action")"
hook_mail "success"
fi
return_code=0
fi
hook_mail "internal"
if is_interactive; then
# shellcheck disable=SC2001
echo "${summary}" | sed -e 's/\\n/\n/g'
else
/usr/share/scripts/evomaintenance.sh --auto --user autosysadmin --message "${summary}" --no-commit --no-mail
fi
teardown
# shellcheck disable=SC2086
exit ${return_code}
}
teardown() {
:
}
# Return true/false
is_interactive() {
test "${INTERACTIVE}" -eq "1"
}
save_server_state() {
DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)"
if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then
log_all "Warning: dump-server-state is not present. No server state recorded."
fi
if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then
DUMP_DIR=$(file_path_in_log_dir "server-state")
# We don't want the logging to take too much time,
# so we kill it if it takes more than 20 seconds.
timeout --signal 9 20 \
"${DUMP_SERVER_STATE_BIN}" \
--dump-dir="${DUMP_DIR}" \
--df \
--dmesg \
--iptables \
--lxc \
--netcfg \
--netstat \
--uname \
--processes \
--systemctl \
--uptime \
--virsh \
--disks \
--mysql-processes \
--no-apt-states \
--no-apt-config \
--no-dpkg-full \
--no-dpkg-status \
--no-mount \
--no-packages \
--no-sysctl \
--no-etc
log_run "Server state saved in \`server-state' directory."
fi
}
is_debug() {
# first time: do the check…
# other times: pass
if [ -z "${DEBUG:-""}" ]; then
debug_file="/etc/evolinux/autosysadmin.debug"
if [ -e "${debug_file}" ]; then
last_change=$(stat -c %Z "${debug_file}")
limit_date=$(date --date "14400 seconds ago" +"%s")
if [ $(( last_change - limit_date )) -le "0" ]; then
log_run "Debug mode disabled; file is too old (%{last_change} seconds)."
rm "${debug_file}"
# Debug mode disabled
DEBUG="0"
else
log_run "Debug mode enabled."
# Debug mode enabled
DEBUG="1"
fi
else
# log_run "Debug mode disabled; file is absent."
# Debug mode disabled
DEBUG="0"
fi
fi
# return the value
test "${DEBUG}" -eq "1"
}
# Uses the who(1) definition of "active"
currently_active_users() {
LC_ALL=C who --users | grep --extended-regexp "\s+\.\s+" | awk '{print $1}' | sort --human-numeric-sort | uniq
}
# Users active in the last 29 minutes
recently_active_users() {
LC_ALL=C who --users | grep --extended-regexp "\s+00:(0|1|2)[0-9]\s+" | awk --field-separator ' ' '{print $1,$6}'
}
# Save the list of users to a file in the log directory
save_active_users() {
LC_ALL=C who --users | save_in_log_dir "who-users"
}
# An autosysadmin must not perform actions if a user is active or was active recently.
#
# This can by bypassed in interactive mode.
# It's OK to lose this data after a reboot.
ensure_no_active_users_or_exit() {
# Save all active users
save_active_users
if is_debug; then
log_run "Debug mode enabled: continue without checking active users."
return 0;
fi
# Is there any currently active user?
currently_active_users=$(currently_active_users)
if [ -n "${currently_active_users}" ]; then
# shellcheck disable=SC2001
users_oneliner=$(echo "${currently_active_users}" | sed -e 's/\n/ /')
log_run "Currently active users: ${users_oneliner}"
if is_interactive; then
echo "Some users are currently active:"
# shellcheck disable=SC2001
echo "${currently_active_users}" | sed -e 's/\(.\+\)/* \1/'
answer=""
while :; do
printf "> Continue? [Y,n,?] "
read -r answer
case ${answer} in
[Yy]|"" )
log_run "Active users check bypassed manually in interactive mode."
return
;;
[Nn] )
log_run "Active users check confirmed manually in interactive mode."
log_abort_and_quit "Active users detected: ${users_oneliner}"
;;
* )
printf "y - yes, continue\n"
printf "n - no, exit\n"
printf "? - print this help\n"
;;
esac
done
else
log_abort_and_quit "Currently active users detected: ${users_oneliner}."
fi
else
# or recently (the last 30 minutes) active user?
recently_active_users=$(recently_active_users)
if [ -n "${recently_active_users}" ]; then
# shellcheck disable=SC2001
users_oneliner=$(echo "${recently_active_users}" | sed -e 's/\n/ /')
log_run "Recently active users: ${users_oneliner}"
if is_interactive; then
echo "Some users were recently active:"
# shellcheck disable=SC2001
echo "${recently_active_users}" | sed -e 's/\(.\+\)/* \1/'
answer=""
while :; do
printf "> Continue? [Y,n,?] "
read -r answer
case ${answer} in
[Yy]|"" )
log_run "Active users check bypassed manually in interactive mode."
return
;;
[Nn] )
log_run "Active users check confirmed manually in interactive mode."
log_abort_and_quit "Recently active users detected: ${users_oneliner}."
;;
* )
printf "y - yes, continue\n"
printf "n - no, exit\n"
printf "? - print this help\n"
;;
esac
done
else
log_abort_and_quit "Recently active users detected: ${users_oneliner}."
fi
fi
fi
}
# Takes an NRPE command name as 1st parameter,
# and executes the full command if found in the configuration.
# Return the result and the return code of the command.
check_nrpe() {
check="$1"
nrpe_files=""
# Check if NRPE config is found
if [ -f "/etc/nagios/nrpe.cfg" ]; then
nrpe_files="${nrpe_files} /etc/nagios/nrpe.cfg"
else
msg="NRPE configuration not found: /etc/nagios/nrpe.cfg"
log_run "${msg}"
echo "${msg}"
return 3
fi
# Search for included files
# shellcheck disable=SC2086
while IFS= read -r include_file; do
nrpe_files="${nrpe_files} ${include_file}"
done < <(grep --extended-regexp '^\s*include=.+' ${nrpe_files} | cut -d = -f 2)
# Search for files in included directories
# shellcheck disable=SC2086
while IFS= read -r include_dir; do
nrpe_files="${nrpe_files} ${include_dir}/*.cfg"
done < <(grep --extended-regexp '^\s*include_dir=.+' ${nrpe_files} | cut -d = -f 2)
# Fetch uncommented commands in (sorted) config files
# shellcheck disable=SC2086
nrpe_commands=$(grep --no-filename --exclude=*~ --fixed-strings "[${check}]" ${nrpe_files} | grep --invert-match --extended-regexp '^\s*#\s*command' | cut -d = -f 2)
nrpe_commands_count=$(echo "${nrpe_commands}" | wc -l)
if is_debian_version "9" "<=" && [ "${nrpe_commands_count}" -gt "1" ]; then
# On Debian <= 9, NRPE loading was not sorted
# we need to raise an error if we have multiple defined commands
msg="Unable to determine which NRPE command to run"
log_run "${msg}"
echo "${msg}"
return 3
else
# On Debian > 9, use the last command
nrpe_command=$(echo "${nrpe_commands}" | tail -n 1)
nrpe_result=$(${nrpe_command})
nrpe_rc=$?
log_run "NRPE command (exited with ${nrpe_rc}): ${nrpe_command}"
log_run "${nrpe_result}"
echo "${nrpe_result}"
return "${nrpe_rc}"
fi
}
# An autosysadmin script must not run twice (or more) simultaneously.
# We use a customizable (with LOCK_NAME) lock file to keep track of this.
# A wait time can be configured.
#
# This can by bypassed in interactive mode.
# It's OK to lose this data after a reboot.
acquire_lock_or_exit() {
lock_file="${1:-${LOCK_FILE}}"
lock_wait="${2:-${LOCK_WAIT}}"
# lock_wait must be compatible with sleep(1), otherwise fallback to 0
if ! echo "${lock_wait}" | grep -Eq '^[0-9]+[smhd]?$'; then
log_run "Lock wait: incorrect value '${lock_wait}', fallback to 0."
lock_wait=0
fi
if [ "${lock_wait}" != "0" ] && [ -f "${lock_file}" ]; then
log_run "Lock file present. Let's wait ${lock_wait} and check again."
sleep "${lock_wait}"
fi
if [ -f "${lock_file}" ]; then
log_abort_and_quit "Lock file still present."
else
log_run "Lock file absent. Let's put one."
touch "${lock_file}"
fi
}
# If a script has been run in the ast 30 minutes, running it again won't fix the issue.
# We use a /run/ausosysadmin/${PROGNAME}_lastrun file to keep track of this.
#
# This can by bypassed in interactive mode.
# This is bypassed in debug mode.
# It's OK to lose this data after a reboot.
ensure_not_too_soon_or_exit() {
if is_debug; then
log_run "Debug mode enabled: continue without checking when was the last run."
return 0;
fi
lastrun_file="${RUN_DIR}/${PROGNAME}_lastrun"
if [ -f "${lastrun_file}" ]; then
lastrun_age="$(($(date +%s)-$(stat -c "%Y" "${lastrun_file}")))"
log_run "Last run was ${lastrun_age} seconds ago."
if [ "${lastrun_age}" -lt 1800 ]; then
if is_interactive; then
echo "${PROGNAME} was run ${lastrun_age} seconds ago."
answer=""
while :; do
printf "> Continue? [Y,n,?] "
read -r answer
case ${answer} in
[Yy]|"" )
log_run "Last run check bypassed manually in interactive mode."
break
;;
[Nn] )
log_run "Last run check confirmed manually in interactive mode."
log_abort_and_quit 'Last run too recent.'
;;
* )
printf "y - yes, continue\n"
printf "n - no, exit\n"
printf "? - print this help\n"
;;
esac
done
else
log_abort_and_quit "Last run too recent."
fi
fi
fi
touch "${lastrun_file}"
}
# Populate DEBIAN_VERSION and DEBIAN_RELEASE variables
# based on gathered information about the operating system
detect_os() {
DEBIAN_RELEASE="unknown"
DEBIAN_VERSION="unknown"
LSB_RELEASE_BIN="$(command -v lsb_release)"
if [ -e /etc/debian_version ]; then
DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)"
if [ -x "${LSB_RELEASE_BIN}" ]; then
DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)"
else
case "${DEBIAN_VERSION}" in
7) DEBIAN_RELEASE="wheezy" ;;
8) DEBIAN_RELEASE="jessie" ;;
9) DEBIAN_RELEASE="stretch" ;;
10) DEBIAN_RELEASE="buster" ;;
11) DEBIAN_RELEASE="bullseye" ;;
12) DEBIAN_RELEASE="bookworm" ;;
13) DEBIAN_RELEASE="trixie" ;;
esac
fi
# log_run "Detected OS: Debian version=${DEBIAN_VERSION} release=${DEBIAN_RELEASE}"
# else
# log_run "Detected OS: unknown (missing /etc/debian_version)"
fi
}
is_debian_wheezy() {
test "${DEBIAN_RELEASE}" = "wheezy"
}
is_debian_jessie() {
test "${DEBIAN_RELEASE}" = "jessie"
}
is_debian_stretch() {
test "${DEBIAN_RELEASE}" = "stretch"
}
is_debian_buster() {
test "${DEBIAN_RELEASE}" = "buster"
}
is_debian_bullseye() {
test "${DEBIAN_RELEASE}" = "bullseye"
}
is_debian_bookworm() {
test "${DEBIAN_RELEASE}" = "bookworm"
}
is_debian_trixie() {
test "${DEBIAN_RELEASE}" = "trixie"
}
is_debian_version() {
local version=$1
local relation=${2:-"eq"}
if [ -z "${DEBIAN_VERSION:-""}" ]; then
detect_os
fi
dpkg --compare-versions "${DEBIAN_VERSION}" "${relation}" "${version}"
}
# List systemd services (only names), even if stopped
systemd_list_services() {
pattern=$1
systemctl list-units --all --no-legend --type=service "${pattern}" | grep --only-matching --extended-regexp '\S+\.service'
}
is_systemd_enabled() {
systemctl --quiet is-enabled "$1" 2> /dev/null
}
is_systemd_active() {
systemctl --quiet is-active "$1" 2> /dev/null
}
is_sysvinit_enabled() {
find /etc/rc2.d/ -name "$1" > /dev/null
}
get_fqdn() {
# shellcheck disable=SC2155
local system=$(uname -s)
if [ "${system}" = "Linux" ]; then
hostname --fqdn
elif [ "${system}" = "OpenBSD" ]; then
hostname
else
log_abort_and_quit "System '${system}' not recognized."
fi
}
get_complete_hostname() {
REAL_HOSTNAME="$(get_fqdn)"
if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then
echo "${HOSTNAME}"
else
echo "${HOSTNAME} (${REAL_HOSTNAME})"
fi
}
# Fetch values from evomaintenance configuration
get_evomaintenance_mail() {
grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2
}
get_evomaintenance_emergency_mail() {
grep "URGENCYFROM=" /etc/evomaintenance.cf | cut -d '=' -f2
}
get_evomaintenance_emergency_tel() {
grep "URGENCYTEL=" /etc/evomaintenance.cf | cut -d '=' -f2
}
# Log a message to the log file in the log directory
log_run() {
local msg="${1:-$(cat /dev/stdin)}"
# shellcheck disable=SC2155
local date=$(/bin/date +"${DATE_FORMAT}")
printf "[%s] %s[%s]: %s\\n" \
"${date}" "${PROGNAME}" "${PID}" "${msg}" \
>> "${LOG_FILE}"
}
# Log a message in the system log file (syslog or journald)
log_global() {
local msg="${1:-$(cat /dev/stdin)}"
echo "${msg}" \
| /usr/bin/logger -p local0.notice -t autosysadmin
}
# Log a message in both places
log_all() {
local msg="${1:-$(cat /dev/stdin)}"
log_global "${msg}"
log_run "${msg}"
}
# Log a notable action in regular places
# and append it to the dedicated list
log_action() {
log_all "$*"
append_action "$*"
}
# Append a line in the actions.log file in the log directory
append_action() {
echo "$*" >> "${ACTIONS_FILE}"
}
# Print the content of the actions.log file
# or a fallback content (1st parameter) if empty
# shellcheck disable=SC2120
print_actions() {
local fallback=${1:-""}
if [ -s "${ACTIONS_FILE}" ]; then
cat "${ACTIONS_FILE}"
elif [ -n "${fallback}" ]; then
echo "${fallback}"
fi
}
# Log a an abort reason in regular places
# and append it to the dedicated list
log_abort() {
log_all "$*"
append_abort_reason "$*"
}
# Append a line in the abort.log file in the log directory
append_abort_reason() {
echo "$*" >> "${ABORT_FILE}"
}
# Print the content of the abort.log file
# or a fallback content (1st parameter) if empty
# shellcheck disable=SC2120
print_abort_reasons() {
local fallback=${1:-""}
if [ -s "${ABORT_FILE}" ]; then
cat "${ABORT_FILE}"
elif [ -n "${fallback}" ]; then
echo "${fallback}"
fi
}
# Print the content of the main log from the log directory
print_main_log() {
cat "${LOG_FILE}"
}
# Log an abort reason and quit the script
log_abort_and_quit() {
log_abort "$*"
quit
}
# Store the content from standard inpu
# into a file in the log directory named after the 1st parameter
save_in_log_dir() {
local file_name=$1
local file_path="${LOG_DIR}/${file_name}"
cat /dev/stdin > "${file_path}"
log_run "Saved \`${file_name}' file."
}
# Return the full path of the file in log directory
# based on the name in the 1st parameter
file_path_in_log_dir() {
echo "${LOG_DIR}/${1}"
}
format_mail_success() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <${EMAIL_FROM}>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: ${PROGNAME}
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: ${EMAIL_INTERNAL}
Subject: [autosysadmin] Intervention automatisée sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatisée vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : ${PROGNAME}
### Actions réalisées
$(print_actions "Aucune")
### Réagir à cette intervention
Vous pouvez répondre à ce message (${EMAIL_FROM}).
En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL}
ou notre ligne d'astreinte (${EMERGENCY_TEL})
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_abort() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <${EMAIL_FROM}>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: ${PROGNAME}
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: ${EMAIL_INTERNAL}
Subject: [autosysadmin] Intervention automatisée interrompue sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatisée a été déclenchée mais s'est interrompue.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : ${PROGNAME}
### Actions réalisées
$(print_actions "Aucune")
### Raison(s) de l'interruption
$(print_abort_reasons "Inconnue")
### Réagir à cette intervention
Vous pouvez répondre à ce message (${EMAIL_FROM}).
En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL}
ou notre ligne d'astreinte (${EMERGENCY_TEL})
--
Votre AutoSysadmin
EOTEMPLATE
}
# shellcheck disable=SC2028
print_report_information() {
echo "**Uptime**"
echo ""
uptime
echo ""
echo "**Utilisateurs récents**"
echo ""
who_file=$(file_path_in_log_dir "server-state/df.txt")
if [ -s "${who_file}" ]; then
cat "${who_file}"
else
who --users
fi
echo ""
echo "**Espace disque**"
echo ""
df_file=$(file_path_in_log_dir "server-state/df.txt")
if [ -s "${df_file}" ]; then
cat "${df_file}"
else
df -h
fi
echo ""
echo "**Dmesg**"
echo ""
dmesg_file=$(file_path_in_log_dir "server-state/dmesg.txt")
if [ -s "${dmesg_file}" ]; then
tail -n 5 "${dmesg_file}"
else
dmesg | tail -n 5
fi
echo ""
echo "**systemd failed services**"
echo ""
failed_services_file=$(file_path_in_log_dir "server-state/systemctl-failed-services.txt")
if [ -s "${failed_services_file}" ]; then
cat "${failed_services_file}"
else
systemctl --no-legend --state=failed --type=service
fi
if command -v lxc-ls > /dev/null 2>&1; then
echo ""
echo "**LXC containers**"
echo ""
lxc_ls_file=$(file_path_in_log_dir "server-state/lxc-list.txt")
if [ -s "${lxc_ls_file}" ]; then
cat "${lxc_ls_file}"
else
lxc-ls --fancy
fi
fi
apache_errors_file=$(file_path_in_log_dir "apache-errors.log")
if [ -f "${apache_errors_file}" ]; then
echo ""
echo "**Apache errors**"
echo ""
cat "${apache_errors_file}"
fi
nginx_errors_file=$(file_path_in_log_dir "nginx-errors.log")
if [ -f "${nginx_errors_file}" ]; then
echo ""
echo "**Nginx errors**"
echo ""
cat "${nginx_errors_file}"
fi
}
format_mail_internal() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <${EMAIL_FROM}>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: ${PROGNAME}
X-RunId: ${RUN_ID}
To: ${EMAIL_INTERNAL}
Subject: [autosysadmin] Rapport interne d'intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : ${PROGNAME}
### Actions réalisées
$(print_actions "Aucune")
### Raison(s) de l'interruption
$(print_abort_reasons "Aucune")
### Log autosysadmin
$(print_main_log)
### Informations additionnelles
$(print_report_information)
--
Votre AutoSysadmin
EOTEMPLATE
}
# Generic function to send emails at the end of the script.
# Takes a template as 1st parameter
hook_mail() {
if is_debug; then
log_run "Debug mode enabled: continue without sending mail."
return 0;
fi
HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}"
HOSTNAME_TEXT="$(get_complete_hostname)"
EMAIL_CLIENT="$(get_evomaintenance_mail)"
EMERGENCY_MAIL="$(get_evomaintenance_emergency_mail)"
EMERGENCY_TEL="$(get_evomaintenance_emergency_tel)"
MAIL_CONTENT="$(format_mail_"$1")"
SENDMAIL_BIN="$(command -v sendmail)"
if [ -z "${SENDMAIL_BIN}" ]; then
log_global "ERROR: No \`sendmail' command has been found, can't send mail."
fi
if [ -x "${SENDMAIL_BIN}" ]; then
echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.fr"
log_global "Sent '$1' mail for RUN_ID: ${RUN_ID}"
fi
}
is_holiday() {
# gcal mark today as a holiday by surrounding with < and > the day
# of the month of that holiday line. For example if today is 2022-05-01 we'll
# get among other lines:
# Fête du Travail (FR) + Di, < 1>Mai 2022
# Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours
LANGUAGE=fr_FR.UTF-8 TZ=Europe/Paris gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet
}
is_weekend() {
day_of_week=$(date +%u)
if [ "${day_of_week}" != 6 ] && [ "${day_of_week}" != 7 ]; then
return 1
fi
}
is_workday() {
if is_holiday || is_weekend; then
return 1
fi
}
is_worktime() {
if ! is_workday; then
return 1
fi
hour=$(date +%H)
if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then
return 1
fi
}

View file

@ -0,0 +1,112 @@
#!/bin/bash
# Specific functions for "repair" scripts
is_all_repair_disabled() {
# Fetch values from the config
# and if it is not defined or has no value, then assign "on"
local status=${repair_all:=on}
test "${status}" = "off" || test "${status}" = "0"
}
is_current_repair_disabled() {
# Fetch values from the config
# and if it is not defined or has no value, then assign "on"
local status=${!PROGNAME:=on}
test "${status}" = "off" || test "${status}" = "0"
}
ensure_not_disabled_or_exit() {
if is_all_repair_disabled; then
log_global 'All repair scripts are disabled.'
exit 0
fi
if is_current_repair_disabled; then
log_global "Current repair script (${PROGNAME}) is disabled."
exit 0
fi
}
# Set of actions to do at the begining of a "repair" script
pre_repair() {
initialize
# Are we supposed to run?
ensure_not_disabled_or_exit
# Has it recently been run?
ensure_not_too_soon_or_exit
# Can we acquire a lock?
acquire_lock_or_exit
# Is there any active user?
ensure_no_active_users_or_exit
# Save important information
save_server_state
}
# Set of actions to do at the end of a "repair" script
post_repair() {
quit
}
repair_lxc_php() {
container_name=$1
if is_systemd_enabled 'lxc.service'; then
lxc_path=$(lxc-config lxc.lxcpath)
if lxc-info --name "${container_name}" > /dev/null; then
rootfs="${lxc_path}/${container_name}/rootfs"
case "${container_name}" in
php56) fpm_log_file="${rootfs}/var/log/php5-fpm.log" ;;
php70) fpm_log_file="${rootfs}/var/log/php7.0-fpm.log" ;;
php73) fpm_log_file="${rootfs}/var/log/php7.3-fpm.log" ;;
php74) fpm_log_file="${rootfs}/var/log/php7.4-fpm.log" ;;
php80) fpm_log_file="${rootfs}/var/log/php8.0-fpm.log" ;;
php81) fpm_log_file="${rootfs}/var/log/php8.1-fpm.log" ;;
php82) fpm_log_file="${rootfs}/var/log/php8.2-fpm.log" ;;
php83) fpm_log_file="${rootfs}/var/log/php8.3-fpm.log" ;;
*)
log_abort_and_quit "Unknown container '${container_name}'"
;;
esac
# Determine FPM Pool path
php_path_pool=$(find "${lxc_path}/${container_name}/" -type d -name "pool.d")
# Save LXC info (before restart)
lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.before.status"
# Save last lines of FPM log (before restart)
tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.before.log/')"
# Save NRPE check (before restart)
/usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.before.out"
lxc-stop --timeout 20 --name "${container_name}"
lxc-start --daemon --name "${container_name}"
rc=$?
if [ "${rc}" -eq "0" ]; then
log_all "Restart LXC container '${container_name}: OK"
else
log_all "Restart LXC container '${container_name}: failed"
fi
# Save LXC info (after restart)
lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.after.status"
# Save last lines of FPM log (after restart)
tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.after.log/')"
# Save NRPE check (after restart)
/usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.after.out"
else
log_abort_and_quit "LXC container '${container_name}' doesn't exist."
fi
else
log_abort_and_quit 'LXC not found.'
fi
}

View file

@ -0,0 +1,76 @@
#!/bin/bash
# Specific functions for "restart" scripts
running_custom() {
# Placeholder that returns 1, to prevent running if not redefined
log_global "running_custom() function has not been redefined! Let's quit."
return 1
}
# Examine RUNNING variable and decide if the script should run or not
is_supposed_to_run() {
if is_debug; then return 0; fi
case ${RUNNING} in
never)
# log_global "is_supposed_to_run: no (never)"
return 1
;;
always)
# log_global "is_supposed_to_run: yes (always)"
return 0
;;
nwh-fr)
! is_worktime
rc=$?
# if [ ${rc} -eq 0 ]; then
# log_global "is_supposed_to_run: yes (nwh-fr returned ${rc})"
# else
# log_global "is_supposed_to_run: no (nwh-fr returned ${rc})"
# fi
return ${rc}
;;
nwh-ca)
# Not implemented yet
return 0
;;
custom)
running_custom
rc=$?
# if [ ${rc} -eq 0 ]; then
# log_global "is_supposed_to_run: yes (custom returned ${rc})"
# else
# log_global "is_supposed_to_run: no (custom returned ${rc})"
# fi
return ${rc}
;;
esac
}
ensure_supposed_to_run_or_exit() {
if ! is_supposed_to_run; then
# simply quit (no logging, no notifications…)
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
}
# Set of actions to do at the begining of a "restart" script
pre_restart() {
initialize
# Has it recently been run?
ensure_not_too_soon_or_exit
# Can we acquire a lock?
acquire_lock_or_exit
# Save important information
save_server_state
}
# Set of actions to do at the end of a "restart" script
post_restart() {
quit
}

View file

@ -0,0 +1,16 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
# shellcheck source=./restart_amavis.sh
source /usr/share/scripts/autosysadmin/auto/restart_amavis.sh
restart_amavis
sendmail success
post_repair

View file

@ -0,0 +1,157 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
# We always keep some reserved blocks to avoid missing some logs
# https://gitea.evolix.org/evolix/autosysadmin/issues/22
RESERVED_BLOCKS_MIN=1
get_mountpoints() {
# the $(...) get the check_disk1 command
# the cut command selects the critical part of the check_disk1 output
# the grep command extracts the mountpoints and available disk space
# the last cut command selects the mountpoints
check_disk1_command=$(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-)
${check_disk1_command} -e | cut -d'|' -f1 | grep --extended-regexp --only-matching '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -d' ' -f1
}
is_reserved_blocks_nominal() {
partition=${1}
fs_type="$(findmnt -n --output=fstype "${partition}")"
if [ "${fs_type}" = "ext4" ]; then
device="$(findmnt -n --output=source "${partition}")"
reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }")
log_run "Reserved blocks for ${partition} is currently at ${percentage}%"
if [ "${percentage}" -gt "${RESERVED_BLOCKS_MIN}" ]; then
log_run "Allowing tune2fs action to reduce the number of reserved blocks"
return 0
else
log_run "Reserved blocks already at or bellow ${RESERVED_BLOCKS_MIN}%, no automatic action possible"
return 1
fi
else
log_run "Filesystem for ${partition} (${fs_type}) is incompatible with reserved block reduction."
return 1
fi
}
reduce_reserved_blocks() {
partition=${1}
device=$(findmnt -n --output=source "${partition}")
tune2fs -m "${RESERVED_BLOCKS_MIN}" "${device}"
log_action "Reserved blocks for ${partition} changed to ${RESERVED_BLOCKS_MIN} percent"
}
is_tmp_to_delete() {
size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]; then
return 0
else
return 1
fi
}
is_log_to_delete() {
size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]; then
return 0
else
return 1
fi
}
clean_apt_cache() {
for container in $(lxc-ls -1); do
if [ -e "$(lxc-config lxc.lxcpath)/${container}/rootfs/var/cache" ]; then
lxc-attach --name "${container}" -- apt-get clean
log_action "Clean apt cache in LXC container ${container}";
fi
done
# NOTE: "head -n 1" might be superfluous, but let's be sure to have only the first returned value
biggest_subdir=$(du --summarize --one-file-system "/var/*" | sort --numeric-sort --reverse | sed 's/^[0-9]\+[[:space:]]\+//;q' | head -n 1)
case "${biggest_subdir}" in
'/var/cache')
apt-get clean
log_action 'Clean apt cache'
;;
esac
}
clean_amavis_virusmails() {
if du --inodes /var/lib/* | sort --numeric-sort | tail -n 3 | grep --quiet 'virusmails$'; then
find /var/lib/amavis/virusmails/ -type f -atime +30 -delete
log_action 'Clean amavis infected mails'
fi
}
critical_mountpoints=$(get_mountpoints)
if [ -z "${critical_mountpoints}" ]; then
log_abort_and_quit "No partition is in critical state, nothing left to do."
else
for mountpoint in ${critical_mountpoints}; do
case "${mountpoint}" in
/var)
#if is_log_to_delete
#then
# find /var/log/ -type f -mtime +365 -delete
# log_action "$size Mo of disk space freed in /var"
#fi
if is_reserved_blocks_nominal /var; then
reduce_reserved_blocks /var
clean_apt_cache
clean_amavis_virusmails
fi
;;
/tmp)
#if is_tmp_to_delete
#then
# find /tmp/ -type f -ctime +1 -delete
# log_action "$size Mo of disk space freed in /tmp"
#fi
if is_reserved_blocks_nominal /tmp; then
reduce_reserved_blocks /tmp
fi
;;
/home)
if is_reserved_blocks_nominal /home; then
reduce_reserved_blocks /home
fi
;;
/srv)
if is_reserved_blocks_nominal /srv; then
reduce_reserved_blocks /srv
fi
;;
/filer)
if is_reserved_blocks_nominal /filer; then
reduce_reserved_blocks /filer
fi
;;
/)
if is_reserved_blocks_nominal /; then
reduce_reserved_blocks /
# Suggest remove old kernel ?
fi
;;
*)
# unknown
log_run 'Unknown partition (or weird case) or nothing to do'
;;
esac
done
fi
post_repair

View file

@ -0,0 +1,35 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
service="elasticsearch.service"
service_name="elasticsearch"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_abort_and_quit "${service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service} is disabled (or missing), nothing left to do."
fi
post_repair

View file

@ -0,0 +1,131 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
## Apache
service="apache2.service"
service_name="apache2"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# check syntax
if apache2ctl -t > /dev/null 2>&1; then
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
date=$(LANG=en_US.UTF-8 date '+%b %d')
grep "${date}" /home/*/log/error.log /var/log/apache2/*error.log \
| grep -v \
-e "Got error 'PHP message:" \
-e "No matching DirectoryIndex" \
-e "client denied by server configuration" \
-e "server certificate does NOT include an ID which matches the server name" \
| save_in_log_dir "apache-errors.log"
else
log_action "Restart ${service_name}: skip (invalid configuration)"
fi
fi
else
log_all "${service} is disabled (or missing). Skip."
fi
## Nginx
service="nginx.service"
service_name="nginx"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# check syntax
if nginx -t > /dev/null 2>&1; then
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
### Consider doing for Nginx the same as Apache
else
log_action "Restart ${service_name}: skip (invalid configuration)"
fi
fi
else
log_all "${service} is disabled (or missing). Skip."
fi
## LXC
if is_systemd_enabled 'lxc.service'; then
for container in $(lxc-ls -1 | grep --fixed-strings 'php' | grep --extended-regexp --invert-match --regexp '\bold\b' --regexp '\bdisabled\b'); do
repair_lxc_php "${container}"
done
else
log_all "LXC is disabled (or missing). Skip."
fi
## FPM
fpm_services=$(systemd_list_services 'php*-fpm*')
if [ -n "${fpm_services}" ]; then
for service in ${fpm_services}; do
service_name="${service//.service/}"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_all "${service} is disabled (or missing). Skip."
fi
done
else
log_all "PHP FPM not found. Skip."
fi
post_repair

View file

@ -0,0 +1,69 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
if is_debian_version "8" "<="; then
if is_sysvinit_enabled '*mysql*'; then
if ! pgrep -u mysql mysqld > /dev/null; then
# Save service status before restart
timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.before.status"
timeout 20 /etc/init.d/mysql restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart mysql: OK"
else
log_action "Restart mysql: failed"
fi
# Save service status after restart
timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.after.status"
else
log_abort_and_quit "mysqld process alive. Aborting"
fi
else
log_abort_and_quit "MySQL not enabled. Aborting"
fi
else
if is_debian_version "12" ">="; then
service="mariadb.service"
service_name="mariadb"
else
service="mysql.service"
service_name="mysql"
fi
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_abort_and_quit "${service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service} is disabled (or missing), nothing left to do."
fi
fi
post_repair

View file

@ -0,0 +1,35 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
service="opendkim.service"
service_name="opendkim"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_abort_and_quit "${service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service} is disabled (or missing). Abort."
fi
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php56
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php70
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php73
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php74
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php80
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php81
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php82
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php83
post_repair

View file

@ -0,0 +1,32 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
for service in $(systemd_list_services 'redis-server*'); do
service_name="${service//.service/}"
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK."
else
log_action "Restart ${service_name}: failed."
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
done
post_repair

View file

@ -1,43 +1,24 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
init_autosysadmin
load_conf
test "${repair_tomcat_instance:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_tomcat_instance"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
pre_repair
repair_tomcat_instance_handle_tomcat() {
if /bin/su - "${1}" -c "/bin/systemctl --quiet --user is-active tomcat.service" ; then
if ! /bin/su - "${1}" -c "/usr/bin/timeout 20 /bin/systemctl --quiet --user restart tomcat.service"
then
log_error_exit "Echec de redémarrage instance tomcat utilisateur ${1}"
log_abort_and_quit "Echec de redémarrage instance tomcat utilisateur ${1}"
else
log_action "Redémarrage instance tomcat utilisateur ${1}"
fi
elif /bin/systemctl --quiet is-active "${1}".service ; then
if ! /usr/bin/timeout 20 systemctl --quiet restart "${1}".service
then
log_error_exit "Echec de redémarrage instance tomcat ${1}"
log_abort_and_quit "Echec de redémarrage instance tomcat ${1}"
else
log_action "Redémarrage instance tomcat ${1}"
fi
@ -50,4 +31,4 @@ do
repair_tomcat_instance_handle_tomcat "${instance}"
done
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail
post_repair

View file

@ -0,0 +1,41 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
## Custom lock wait and/or lock name
# LOCK_WAIT="15s"
# LOCK_NAME="repair_http"
pre_repair
## The name of the service, mainly for logging
service_name="example"
## The systemd service name
systemd_service="${service_name}.service"
if is_systemd_enabled "${systemd_service}"; then
if is_systemd_active "${systemd_service}"; then
log_abort_and_quit "${systemd_service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service_name} is disabled (or missing), nothing left to do."
fi
post_repair

View file

@ -0,0 +1,19 @@
Autosysadmin "restart auto" scripts
===================================
In this directory you can place scripts that will be executed automatically by a cron job (stored in `/etc/cron.d/autosysadmin`).
They must satisfy the default `run-parts(8)` constaints :
* be "executable"
* belong to the Debian cron script namespace (`^[a-zA-Z0-9_-]+$`), example: `restart_amavis`
Warning: scripts that do not satisfy those criteria will NOT be run (silently)!
You can print the names of the scripts which would be run, without actually running them, with this command :
```
$ run-parts --test /usr/share/scripts/autosysadmin/auto
```
You can use `zzz-restart_example.template` as boilerplate code to make your own "restart" script.

View file

@ -0,0 +1,120 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## Uncomment and customize this method if you want to have a special logic :
##
## return 1 if we should not run
## return 0 if we should run
##
## Some available functions :
## is_weekend() : Saturday or Sunday
## is_holiday() : holiday in France (based on `gcal(1)`)
## is_workday() : not weekend and not holiday
## is_worktime() : work day between 9-12h and 14-18h
#
# running_custom() {
# # implement your own custom method to decide if we should run or not
# }
## The name of the service, mainly for logging
service_name="example"
## The SysVinit script name
sysvinit_script="${service_name}"
## The systemd service name
systemd_service="${service_name}.service"
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
## Example:
pgrep -u USER PROCESS_NAME > /dev/null
}
## Action for SysVinit system
sysvinit_action() {
# Save service status before restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
}
## Action for systemd system
systemd_action() {
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
# so we check the status explicitly
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
&& sleep 1 \
&& systemctl status "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, so check for sysvinit or systemd
if is_debian_version "8" "<="; then
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
# log_global "${service_name} not enabled. Aborting"
exit 0
fi
# Let's finally do the action
pre_restart
sysvinit_action
post_restart
else
if ! is_systemd_enabled "${systemd_service}"; then
# log_global "${service_name} is disabled (or missing), nothing left to do."
exit 0
fi
if is_systemd_active "${systemd_service}"; then
# log_global "${service_name} is active, nothing left to do."
exit 0
fi
# Let's finally do the action
pre_restart
systemd_action
post_restart
fi

View file

@ -0,0 +1,16 @@
---
- name: restart nagios-nrpe-server
service:
name: nagios-nrpe-server
state: restarted
- name: restart nrpe
service:
name: nrpe
state: restarted
- name: restart rsyslog
service:
name: rsyslog
state: restarted

View file

@ -0,0 +1,25 @@
---
- name: "Add begin marker if missing"
ansible.builtin.lineinfile:
path: "/etc/cron.d/autosysadmin"
line: "# BEGIN ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN"
insertbefore: BOF
create: yes
- name: "Add end marker if missing"
ansible.builtin.lineinfile:
path: "/etc/cron.d/autosysadmin"
line: "# END ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN"
insertbefore: "EOF"
create: yes
- name: "Create config if missing"
ansible.builtin.blockinfile:
path: "/etc/cron.d/autosysadmin"
marker: "# {mark} ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN"
block: "{{ lookup('ansible.builtin.template', '../templates/autosysadmin.cron.j2') }}"
owner: root
group: root
mode: "0750"
create: yes

View file

@ -0,0 +1,108 @@
---
- name: "Remount /usr if needed"
ansible.builtin.include_role:
name: remount-usr
- name: Create autosysadmin directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: "root"
group: "root"
mode: "0750"
loop:
- "{{ autosysadmin_agent_bin_dir }}"
- "{{ autosysadmin_agent_lib_dir }}"
- "{{ autosysadmin_agent_auto_dir }}"
- name: Copy libraries
ansible.builtin.copy:
src: "upstream/lib/"
dest: "{{ autosysadmin_agent_lib_dir }}/"
owner: root
group: root
mode: "0750"
- name: Copy repair scripts
ansible.builtin.copy:
src: "upstream/repair/"
dest: "{{ autosysadmin_agent_bin_dir }}/"
owner: root
group: root
mode: "0750"
- name: Copy other utilities
ansible.builtin.copy:
src: "upstream/bin/"
dest: "{{ autosysadmin_agent_bin_dir }}/"
owner: root
group: root
mode: "0750"
### WARNING: thos files are explicitly marked as non-executable
### to prevent them from being run automatically by run-parts
- name: Copy restart scripts
ansible.builtin.copy:
src: "upstream/restart/"
dest: "{{ autosysadmin_agent_auto_dir }}/"
owner: root
group: root
mode: "0640"
- name: Ensure /etc/evolinux folder exists
ansible.builtin.file:
path: "/etc/evolinux"
state: directory
owner: "root"
group: "root"
mode: "0700"
- name: Copy the configuration file if missing
ansible.builtin.template:
src: "autosysadmin.cf.j2"
dest: "/etc/evolinux/autosysadmin"
owner: root
group: root
mode: "0640"
force: no
# Repair scripts are supposed to be 'on' by default
# A line "repair_XXX=off" is added to the file only if the script is to be disabled.
# That's why all the ternary logic for the state is reversed.
- name: Update value per variable
ansible.builtin.lineinfile:
dest: "/etc/evolinux/autosysadmin"
line: "{{ item }}={{ autosysadmin_config[item] | default(true) | bool | ternary('on', 'off') }}"
regexp: '^(#\s*)?{{ item }}=.*'
state: "{{ autosysadmin_config[item] | default(true) | bool | ternary('absent', 'present') }}"
register: _line
loop: "{{ autosysadmin_repair_scripts | union(['repair_all']) }}"
- name: Ensure restart folder exists
ansible.builtin.file:
path: "auto"
state: directory
owner: "root"
group: "root"
mode: "0700"
- name: Legacy scripts are removed
ansible.builtin.file:
path: "{{ general_scripts_dir }}/autosysadmin/{{ item }}"
state: absent
loop:
- repair_amavis.sh
- repair_disk.sh
- repair_elasticsearch.sh
- repair_http.sh
- repair_mysql.sh
- repair_opendkim.sh
- repair_php_fpm56.sh
- repair_php_fpm70.sh
- repair_php_fpm73.sh
- repair_php_fpm74.sh
- repair_php_fpm80.sh
- repair_php_fpm81.sh
- repair_redis.sh
- repair_tomcat_instance.sh

View file

@ -1,10 +1,8 @@
---
- name: Copy logrotate configuration for autosysadmin
ansible.builtin.copy:
src: "files/logrotate_autosysadmin.conf"
src: "files/autosysadmin.logrotate.conf"
dest: "/etc/logrotate.d/autosysadmin"
owner: root
group: root
mode: "0644"
tags:
- autosysadmin

View file

@ -0,0 +1,31 @@
---
- name: The list of all repair scripts is composed.
set_fact:
autosysadmin_repair_scripts: "{{ lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map('basename') | sort }}"
- name: Install dependencies
ansible.builtin.include_tasks: dependencies.yml
- name: Install autosysadmin
ansible.builtin.include_tasks: install.yml
- name: Crontab configuration
ansible.builtin.include_tasks: crontab.yml
- name: NRPE configuration
ansible.builtin.include_tasks: nrpe.yml
- name: sudo configuration
ansible.builtin.include_tasks: sudo.yml
- name: rsyslog configuration
ansible.builtin.include_tasks: rsyslog.yml
- name: logrotate configuration
ansible.builtin.include_tasks: logrotate.yml
- name: Install latest version of dump-server-state
ansible.builtin.include_role:
name: evolinux-base
tasks_from: dump-server-state.yml

View file

@ -0,0 +1,9 @@
---
- name: custom configuration is present
ansible.builtin.template:
src: autosysadmin.nrpe.cfg.j2
dest: /etc/nagios/nrpe.d/autosysadmin.cfg
group: nagios
mode: "0640"
force: yes
notify: restart nagios-nrpe-server

View file

@ -1,11 +1,9 @@
---
- name: Copy rsyslog configuration for autosysadmin
ansible.builtin.copy:
src: "files/rsyslog_autosysadmin.conf"
src: "files/autosysadmin.rsyslog.conf"
dest: "/etc/rsyslog.d/autosysadmin.conf"
owner: root
group: root
mode: "0644"
notify: Restart rsyslog
tags:
- autosysadmin
notify: restart rsyslog

View file

@ -1,9 +1,7 @@
---
- name: Add autosysadmin sudoers file
ansible.builtin.template:
src: sudoers.j2
src: autosysadmin.sudoers.j2
dest: /etc/sudoers.d/autosysadmin
mode: "0600"
validate: "visudo -cf %s"
tags:
- autosysadmin

View file

@ -0,0 +1,12 @@
# This configuration is partially managed by Ansible
# You can change specific values manually, but they may be overridden by Ansible
#
# To be safe, update the hosts_vars/group_vars in the autosysadmin project
# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master
# then use the "agent" playbook to deploy.
#
# Configuration for autosysadmin
# Use this file to change configuration values defined in repair scripts
# To disable all repair scripts : repair_all=off
# To disable "repair_http" : repair_http=off
#

View file

@ -0,0 +1,7 @@
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# Run each enabled script
*/5 * * * * root run-parts /usr/share/scripts/autosysadmin/auto
# Clean run log files
@daily root {{ autosysadmin_agent_bin_dir | mandatory }}/delete_old_logs.sh {{ autosysadmin_agent_log_retention_days | default('365') }}

View file

@ -0,0 +1,8 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
# Autosysadmin repair commands
{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %}
command[{{ script }}]=sudo {{ autosysadmin_agent_bin_dir }}/{{ script }}
{% endfor %}

View file

@ -0,0 +1,7 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %}
nagios ALL = NOPASSWD: {{ autosysadmin_agent_bin_dir }}/{{ script }}
{% endfor %}

View file

@ -1,22 +0,0 @@
---
general_scripts_dir: "/usr/share/scripts"
autosysadmin_dir: "{{ general_scripts_dir }}/autosysadmin"
# Default values for enabled checks
repair_amavis: 'on'
repair_disk: 'on'
repair_elasticsearch: 'on'
repair_http: 'on'
repair_mysql: 'on'
repair_opendkim: 'off'
repair_php_fpm56: 'off'
repair_php_fpm70: 'off'
repair_php_fpm73: 'off'
repair_php_fpm74: 'off'
repair_php_fpm80: 'off'
repair_php_fpm81: 'off'
repair_php_fpm82: 'off'
repair_php_fpm83: 'off'
repair_redis: 'off'
repair_tomcat_instance: 'off'

View file

@ -1,478 +0,0 @@
#!/bin/bash
get_system() {
uname -s
}
get_fqdn() {
if [ "$(get_system)" = "Linux" ]; then
hostname --fqdn
elif [ "$(get_system)" = "OpenBSD" ]; then
hostname
else
log_error_exit "OS not detected!"
fi
}
get_complete_hostname() {
REAL_HOSTNAME="$(get_fqdn)"
if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then
echo "${HOSTNAME}"
else
echo "${HOSTNAME} (${REAL_HOSTNAME})"
fi
}
get_evomaintenance_mail() {
email="$(grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2)"
if [[ -z "$email" ]]; then
email='alert5@evolix.fr'
fi
echo "${email}"
}
arguments="${*}"
get_argument() {
no_found=1
for argument in ${arguments} ; do
if [ "${argument}" = "${1}" ] ;
then
no_found=0
fi
done
return ${no_found}
}
internal_info() {
INTERNAL_INFO="$(printf '%b\n%s' "${INTERNAL_INFO}" "$*")"
}
log_action() {
log "Action : $*"
ACTIONS="$(printf '%s\n%s' "${ACTIONS}" "$*")"
}
log() {
INTERNAL_LOG="$(printf '%s\n%s %s %s %s' "${INTERNAL_LOG}" "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*")"
printf '%s %s %s %s\n' "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*" | tee -a "${LOG_DIR}/autosysadmin.log"
echo "$*" | /usr/bin/logger -p local0.notice -t autosysadmin."$0"
}
log_error_exit() {
log "ERROR : $*"
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: $*" --no-commit --no-mail
exit 1
}
log_check_php_fpm() {
# Extraire seulement les chiffres du nom du script exécuté
# ./repair_php_fpm81.sh ==> 81
PHP_VERSION="${0//[^0-9]/}"
PHP_PATH_POOL=$(find /var/lib/lxc/php"${PHP_VERSION}"/ -type d -name "pool.d")
/usr/local/lib/nagios/plugins/check_phpfpm_multi "${PHP_PATH_POOL}" > "${LOG_DIR}/nrpe.txt"
}
log_system_status() {
DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)"
if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then
log "Warning: dump-server-state is not present. No server state recorded...."
fi
if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then
# NOTE We don't want the logging to take too much time, so we kill it
# if it take more than 20 seconds.
timeout --signal 9 20 \
"${DUMP_SERVER_STATE_BIN}" \
--dump-dir="$LOG_DIR" \
--df \
--dmesg \
--iptables \
--lxc \
--netcfg \
--netstat \
--uname \
--processes \
--systemctl \
--uptime \
--virsh \
--disks \
--mysql-processes \
--no-apt-states \
--no-apt-config \
--no-dpkg-full \
--no-dpkg-status \
--no-mount \
--no-packages \
--no-sysctl \
--no-etc
log "System status logged in ${LOG_DIR}"
fi
}
read_log_system_status(){
files="df.txt dmesg.txt lxc-list.txt netstat-legacy.txt netstat-ss.txt pstree.txt ps.txt systemctl-failed-services.txt"
echo -e "\n\n#### Détails de dump-server-state"
for file in ${files} ; do
echo -e "\n### cat ${LOG_DIR}/${file} :"
tail -n 1000 "${LOG_DIR}"/"${file}"
done
}
ensure_no_active_users_or_exit() {
if is_debug; then return; fi
# Is there any active user ?
for user in $(LC_ALL=C who --users|awk '{print $1}'); do
idle_time="$(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}')"
for sameusertime in $(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}'); do
if is_active_user "$sameusertime"; then
hook_mail abort_active_users
log_error_exit 'At least one user was recently active. That requires human intervention. Nothing to do here!'
fi
done
done
}
is_active_user() {
# Check if a user was active in the last 30 minutes
idle_time="$1"
if [ "${idle_time}" = "old" ];
then
return 1
elif [ "${idle_time}" = "." ];
then
return 0
else
hh="$(echo "${idle_time}" | awk -F':' '{print $1}')"
mm="$(echo "${idle_time}" | awk -F':' '{print $2}')"
idle_minutes="$(( 60 * "${hh}" + "${mm}" ))"
if [ "${idle_minutes}" -ge 30 ];
then
return 1
else
return 0
fi
fi
}
is_debug() {
debug_file="/etc/evolinux/autosysadmin.debug"
if [ -e "${debug_file}" ]; then
last_change=$(stat -c %Z "${debug_file}")
limit_date=$(date --date "14400 seconds ago" +"%s")
if [ $(( last_change - limit_date )) -le "0" ]; then
rm "${debug_file}"
else
return 0
fi
fi
return 1
}
check_nrpe() {
check="$1"
list_command_nrpe=$( grep --exclude=*~ -E "\[${check}\]" -r /etc/nagios/ | grep -v '#command' )
command_nrpe_primary=$( echo "${list_command_nrpe}" | grep "/etc/nagios/nrpe.d/evolix.cfg" | cut -d'=' -f2- )
command_nrpe_secondary=$( echo "${list_command_nrpe}" | head -n1 | cut -d'=' -f2- )
if [ -z "${command_nrpe_primary}" ] && [ -z "${command_nrpe_secondary}" ]
then
return 1
else
if [ -n "${command_nrpe_primary}" ]
then
${command_nrpe_primary}
else
${command_nrpe_secondary}
fi
fi
}
acquire_lock_or_exit() {
lockfile="$1"
waittime="$2"
# si le temps dattente nest pas compréhensible par sleep(1), il vaut 0
if ! echo "${waittime}" | grep -Eq '^[0-9]+[smhd]?$'
then
waittime=0
fi
# si le temps dattente est supérieur à 0 et si le lock existe, on attend
if test "${waittime}" -gt 0 && test -f "${lockfile}"
then
sleep "${waittime}"
fi
# si le lock existe, on sarrête
if test -f "${lockfile}"
then
log_error_exit "lock file ${lockfile} exists"
fi
touch "${lockfile}"
}
is_too_soon() {
if is_debug; then return; fi
witness="/tmp/autosysadmin_witness_$(basename "$0")"
if test -f "${witness}"
then
compare="$(($(date +%s)-$(stat -c "%Y" "${witness}")))"
if [ "${compare}" -lt 1800 ];
then
log_error_exit 'already executed less than 30 minutes ago'
fi
rm "${witness}"
fi
touch "${witness}"
}
init_autosysadmin() {
PATH="${PATH}":/usr/sbin:/sbin↩
unset ACTIONS
SCRIPTNAME=$(basename "$0")
PROGNAME=${SCRIPTNAME%.sh}
RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${SCRIPTNAME}_$(openssl rand -hex 6)"
LOG_DIR="/var/log/autosysadmin/${RUN_ID}"
mkdir -p "${LOG_DIR}"
log "Autosysadmin : Script ${SCRIPTNAME} triggered"
# Detect operating system name, version and release↩
detect_os
}
load_conf() {
# Load conf and enable script by default.
# To disable script locally, set "$PROGNAME"=off in /etc/evolinux/autosysadmin.
# To disable script globally, set "$PROGNAME"=off in the script, after load_conf() call.
declare -g "$PROGNAME"=on # dynamic variable assignment ($PROGNAME == repair_*)
# Source configuration file
# shellcheck source=../roles/deploy_autosysadmin/templates/autosysadmin.cfg.j2
test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin
}
detect_os() {
# OS detection
DEBIAN_RELEASE=""
LSB_RELEASE_BIN="$(command -v lsb_release)"
if [ -e /etc/debian_version ]; then
DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)"
if [ -x "${LSB_RELEASE_BIN}" ]; then
DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)"
else
case "${DEBIAN_VERSION}" in
8) DEBIAN_RELEASE="jessie";;
9) DEBIAN_RELEASE="stretch";;
10) DEBIAN_RELEASE="buster";;
11) DEBIAN_RELEASE="bullseye";;
esac
fi
fi
}
is_debian_jessie() {
test "${DEBIAN_RELEASE}" = "jessie"
}
is_debian_stretch() {
test "${DEBIAN_RELEASE}" = "stretch"
}
is_debian_buster() {
test "${DEBIAN_RELEASE}" = "buster"
}
is_debian_bullseye() {
test "${DEBIAN_RELEASE}" = "bullseye"
}
systemd_list_service_failed() {
systemctl list-units --failed --no-legend --full --type=service "$1" |
awk '{print $1}'
}
systemd_list_units_enabled() {
list_units_enabled=$(systemctl list-unit-files --state=enabled --no-legend | awk "/$1/{print \$1}")
if [ -z "${list_units_enabled}" ]
then
return 1
else
echo "${list_units_enabled}"
fi
}
format_mail_success() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: autosysadmin@evolix.fr
Subject: [autosysadmin] Intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
### Renseignements sur l'intervention
${ACTIONS}
### Réagir à cette intervention
Vous pouvez répondre à ce message (sur l'adresse mail equipe@evolix.net).
En cas d'urgence, utilisez l'adresse maintenance@evolix.fr ou
notre téléphone portable d'astreinte (04.26.99.99.26)
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_abort_active_users() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: autosysadmin@evolix.fr
Subject: [autosysadmin] Intervention interrompue sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique a été interrompue en raison
d'un utilisateur actuellement actif sur le serveur.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
### Utilisateur(s) connecté(s)
$(w)
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_internal_info() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: autosysadmin@evolix.fr
Subject: [autosysadmin] Complements (interne) - Intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : $(basename "$0")
### Actions effectuées
${ACTIONS}
### Logs autosysadmin
${INTERNAL_LOG}
### Utilisateur(s) connecté(s)
$(w)
### Informations additionnelles données par le script $(basename "$0")
${INTERNAL_INFO}
--
Votre AutoSysadmin
EOTEMPLATE
}
hook_mail() {
if is_debug; then return; fi
HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}"
HOSTNAME_TEXT="$(get_complete_hostname)"
EMAIL_CLIENT="$(get_evomaintenance_mail)"
MAIL_CONTENT="$(format_mail_"$1")"
SENDMAIL_BIN="$(command -v sendmail)"
if [ -z "${SENDMAIL_BIN}" ]; then
log "No \`sendmail' command has been found, can't send mail."
fi
if [ -x "${SENDMAIL_BIN}" ]; then
echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.net"
fi
}
# We need stable output for gcal, so we force some language environment variables
export TZ=Europe/Paris
export LANGUAGE=fr_FR.UTF-8
is_holiday() {
# gcal mark today as a holiday by surrounding with < and > the day
# of the month of that holiday line. For exemple if today is 2022-05-01 we'll
# get among other lines:
# Fête du Travail (FR) + Di, < 1>Mai 2022
# Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours
gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet
}
is_weekend() {
day_of_week=$(date +%u)
if [ "$day_of_week" != 6 ] && [ "$day_of_week" != 7 ]; then
return 1
fi
}
is_workday() {
if is_holiday || is_weekend; then
return 1
fi
}
is_worktime() {
if ! is_workday; then
return 1
fi
hour=$(date +%H)
if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then
return 1
fi
}

View file

@ -1,33 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
# shellcheck source=./restart_amavis.sh
source /usr/share/scripts/autosysadmin/restart_amavis.sh
init_autosysadmin
load_conf
test "${repair_amavis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Verify if check_nrpe are not OK
check_nrpe "check_amavis" && log_error_exit 'check_amavis is OK, nothing to do here!'
# Has it recently been run?
get_argument "--no-delay" || is_too_soon
lockfile="/run/lock/repair_amavis"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
restart_amavis
hook_mail success
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,173 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_disk:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_disk"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
get_mountpoints() {
# the $(...) get the check_disk1 command
# the cut command selects the critical part of the check_disk1 output
# the grep command extracts the mountpoints and available disk space
# the last cut command selects the mountpoints
$(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-) -e | cut -d'|' -f1 | grep -Eo '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -f1 -d' '
}
is_reserved-blocks() {
fs_type="$(findmnt -n --output=fstype "$1")"
if [ "${fs_type}" = "ext4" ];
then
device="$(findmnt -n --output=source "$1")"
reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }")
log "Reserved blocks for $1 is curently at $percentage%"
if [ "${percentage}" -gt "1" ]
then
log "Allowing tune2fs action to reduce the number of reserved blocks"
return 0
else
log "Reserved blocks already at or bellow 1%, no automatic action possible"
return 1
fi
else
log "Filesystem for $1 partition is not ext4"
return 1
fi
}
change_reserved-blocks() {
# We alwasy keep some reserved blocks to avoid missing some logs
# https://gitea.evolix.org/evolix/autosysadmin/issues/22
tune2fs -m 1 "$(findmnt -n --output=source "$1")"
log_action "Reserved blocks for $1 changed to 1 percent"
}
is_tmp_to_delete() {
size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]
then
return 0
else
return 1
fi
}
is_log_to_delete() {
size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]
then
return 0
else
return 1
fi
}
clean_apt_cache() {
for lxc in $(du -ax /var | sort -nr | head -n10 | grep -E '/var/lib/lxc/php[0-9]+/rootfs/var/cache$' | grep -Eo 'php[0-9]+')
do
lxc-attach --name "${lxc}" -- apt-get clean
log_action '[lxc/'"${lxc}"'] Clean apt cache'
done
case "$(du -sx /var/* | sort -rn | sed 's/^[0-9]\+[[:space:]]\+//;q')" in
'/var/cache')
apt-get clean
log_action 'Clean apt cache'
;;
esac
}
clean_amavis_virusmails() {
if du --inodes /var/lib/* | sort -n | tail -n3 | grep -q 'virusmails$'
then
find /var/lib/amavis/virusmails/ -type f -atime +30 -delete
log_action 'Clean /var/lib/amavis/virusmails'
fi
}
for mountpoint in $(get_mountpoints)
do
case "${mountpoint}" in
/var)
#if is_log_to_delete
#then
# find /var/log/ -type f -mtime +365 -delete
# log_action "$size Mo of disk space freed in /var"
#fi
if is_reserved-blocks /var
then
change_reserved-blocks /var
clean_apt_cache
clean_amavis_virusmails
hook_mail success
fi
;;
/tmp)
#if is_tmp_to_delete
#then
# find /tmp/ -type f -ctime +1 -delete
# log_action "$size Mo of disk space freed in /tmp"
#fi
if is_reserved-blocks /tmp
then
change_reserved-blocks /tmp
hook_mail success
fi
;;
/home)
if is_reserved-blocks /home
then
change_reserved-blocks /home
hook_mail success
fi
;;
/srv)
if is_reserved-blocks /srv
then
change_reserved-blocks /srv
hook_mail success
fi
;;
/filer)
if is_reserved-blocks /filer
then
change_reserved-blocks /filer
hook_mail success
fi
;;
/)
if is_reserved-blocks /
then
change_reserved-blocks /
hook_mail success
# Suggest remove old kernel ?
fi
;;
*)
# unknown
log 'Unknown partition (or weird case) or nothing to do'
;;
esac
done
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,57 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_elasticsearch:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_elasticsearch"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
elasticsearch_is_enabled() {
systemd_list_units_enabled "elasticsearch.service"
}
elasticsearch_restart() {
if ! timeout 60 systemctl restart elasticsearch.service > /dev/null
then
log_error_exit 'failed to restart elasticsearch'
fi
}
# Test functions
test_elasticsearch_process_present() {
pgrep -u elasticsearch > /dev/null
}
if elasticsearch_is_enabled
then
if ! test_elasticsearch_process_present
then
log_action "Redémarrage de elasticsearch"
elasticsearch_restart
hook_mail success
else
log_error_exit "Elasticsearch process alive. Aborting"
fi
else
log_error_exit "Elasticsearch is not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,141 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_http:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
http_detect_service() {
# check whether nginx, apache or both are supposed to be running
if is_debian_jessie; then
find /etc/rc2.d/
else
systemctl list-unit-files --state=enabled
fi | awk '/nginx/ { nginx = 1 } /apache2/ { apache2 = 1 } END { if (nginx && apache2) { print "both" } else if (nginx) { print "nginx" } else if (apache2) { print "apache2" } }'
# The previous awk command looks for two patterns: "nginx"
# and "apache2". If a line matches the patterns, a variable
# "nginx" or "apache2" is set to 1 (true). The "END" checks
# if one or both patterns has been found.
}
http_handle_apache() {
# check syntax
if ! apache2ctl -t > /dev/null 2> /dev/null
then
log_error_exit 'apache2 configuration syntax is not valid'
fi
# try restart
if ! timeout 20 systemctl restart apache2.service > /dev/null 2> /dev/null
then
log_error_exit 'failed to restart apache2'
fi
log_action "Redémarrage de Apache"
internal_info "#### grep $(LANG=en_US.UTF-8 date '+%b %d') /home/*/log/error.log /var/log/apache2/*error.log (avec filtrage)"
ERROR_LOG=$(grep "$(LANG=en_US.UTF-8 date '+%b %d')" /home/*/log/error.log /var/log/apache2/*error.log | grep -v -e "Got error 'PHP message:" -e "No matching DirectoryIndex" -e "client denied by server configuration" -e "server certificate does NOT include an ID which matches the server name" )
internal_info "$ERROR_LOG"
}
http_handle_nginx() {
# check syntax
if ! nginx -t > /dev/null 2> /dev/null
then
log_error_exit 'nginx configuration syntax is not valid'
fi
# try restart
if ! timeout 20 systemctl restart nginx.service > /dev/null 2> /dev/null
then
log_error_exit 'failed to restart nginx'
fi
log_action "Redémarrage de Nginx"
}
http_handle_lxc_php() {
# check whether containers are used for PHP and reboot them if so
if systemd_list_units_enabled 'lxc'
then
for php in $(lxc-ls | grep 'php'); do
lxc-stop -n "$php"
lxc-start --daemon -n "$php"
log_action "lxc-fpm - Redémarrage container ${php}"
done
fi
}
http_handle_fpm_php() {
# check whether php-fpm is installed and restart it if so
if enabled_units="$(systemd_list_units_enabled "php.*-fpm")"
then
systemctl restart "${enabled_units}"
log_action 'php-fpm - Redémarrage de php-fpm'
fi
}
case "$(http_detect_service)" in
nginx)
http_handle_nginx
http_handle_lxc_php
http_handle_fpm_php
hook_mail success
hook_mail internal_info
;;
apache2)
http_handle_apache
http_handle_lxc_php
http_handle_fpm_php
hook_mail success
hook_mail internal_info
;;
both)
http_handle_nginx
http_handle_apache
http_handle_lxc_php
http_handle_fpm_php
hook_mail success
hook_mail internal_info
;;
*)
# unknown
log 'nothing to do'
;;
esac
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,71 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_mysql:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_mysql"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
mysql_is_enabled() {
if is_debian_jessie
then
find /etc/rc2.d/ -name '*mysql*' > /dev/null
else
systemd_list_units_enabled "mysql.service"
fi
}
mysql_restart() {
if is_debian_jessie
then
if ! timeout 60 /etc/init.d/mysql restart > /dev/null
then
log_error_exit 'failed to restart mysql'
fi
else
if ! timeout 60 systemctl restart mysql.service > /dev/null
then
log_error_exit 'failed to restart mysql'
fi
fi
}
# Test functions
test_mysql_process_present() {
pgrep -u mysql mysqld > /dev/null
}
if mysql_is_enabled
then
if ! test_mysql_process_present
then
log_action "Redémarrage de MySQL"
mysql_restart
hook_mail success
else
log_error_exit "mysqld process alive. Aborting"
fi
else
log_error_exit "MySQL/MariaDB not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,61 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_opendkim:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_opendkim"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
log_system_status
# Functions dedicated to this repair script
opendkim_is_enabled() {
systemd_list_units_enabled "opendkim.service"
}
opendkim_restart() {
if ! timeout 60 systemctl restart opendkim.service > /dev/null
then
log_error_exit 'failed to restart opendkim'
fi
}
opendkim_test_process_present() {
pgrep -u opendkim > /dev/null
}
# Main logic
if opendkim_is_enabled
then
if ! opendkim_test_process_present
then
log_action "Redémarrage de opendkim"
opendkim_restart
hook_mail success
else
log_error_exit "opendkim process alive. Aborting"
fi
else
log_error_exit "opendkim is not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm56:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php56
then
lxc-stop -n php56
lxc-start --daemon -n php56
log_action "lxc-fpm - Redémarrage container php56"
internal_info "#### tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm70:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php70
then
lxc-stop -n php70
lxc-start --daemon -n php70
log_action "lxc-fpm - Redémarrage container php70"
internal_info "#### tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm73:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php73
then
lxc-stop -n php73
lxc-start --daemon -n php73
log_action "lxc-fpm - Redémarrage container php73"
internal_info "#### tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm74:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php74
then
lxc-stop -n php74
lxc-start --daemon -n php74
log_action "lxc-fpm - Redémarrage container php74"
internal_info "#### tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm80:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php80
then
lxc-stop -n php80
lxc-start --daemon -n php80
log_action "lxc-fpm - Redémarrage container php80"
internal_info "#### tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm81:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php81
then
lxc-stop -n php81
lxc-start --daemon -n php81
log_action "lxc-fpm - Redémarrage container php81"
internal_info "#### tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm82:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php82
then
lxc-stop -n php82
lxc-start --daemon -n php82
log_action "lxc-fpm - Redémarrage container php82"
internal_info "#### tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm83:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php83
then
lxc-stop -n php83
lxc-start --daemon -n php83
log_action "lxc-fpm - Redémarrage container php83"
internal_info "#### tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,58 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_redis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_redis"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
handle_redis() {
for service in $(systemd_list_service_failed redis*)
do
# ne rien faire si le service est désactivé
if ! systemctl is-enabled --quiet "${service}"
then
continue
fi
# ne rien faire si le service est actif
if systemctl is-active --quiet "${service}"
then
continue
fi
if ! timeout 20 systemctl restart redis.service > /dev/null 2> /dev/null
then
log_error_exit "failed to restart redis ${service}"
fi
log_action "Redémarrer service ${service}"
done
}
if ( systemd_list_units_enabled 'redis.*\.service$' ) > /dev/null
then
handle_redis
hook_mail success
else
log 'Error: redis service is not enabled'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,63 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
# Comment this line to enable
repair_template=off
test "${repair_template:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_template"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
log_system_status
# Functions dedicated to this repair script
template_is_enabled() {
systemd_list_units_enabled "template.service"
}
template_restart() {
if ! timeout 60 systemctl restart template.service > /dev/null
then
log_error_exit 'failed to restart template'
fi
}
template_test_process_present() {
pgrep -u template > /dev/null
}
# Main logic
if template_is_enabled
then
if ! template_test_process_present
then
log_action "Redémarrage de template"
template_restart
hook_mail success
else
log_error_exit "template process alive. Aborting"
fi
else
log_error_exit "template is not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,35 +0,0 @@
#!/bin/bash
restart_amavis() {
/etc/init.d/amavis stop 2>/dev/null
/etc/init.d/clamav-freshclam stop 2>/dev/null
/etc/init.d/clamav-daemon stop 2>/dev/null
if systemctl is-enabled --quiet 'clamav-freshclam.service'
then
freshclam
log_action "Mise à jour des définitions antivirus"
fi
if systemctl is-enabled --quiet 'clamav-daemon.service'
then
/etc/init.d/clamav-daemon start
log_action "Redémarrage de clamav-daemon"
else
log 'Error, clamav not installed'
fi
if systemctl is-enabled --quiet 'clamav-freshclam.service'
then
/etc/init.d/clamav-freshclam start
log_action "Redémarrage de clamav-freshclam"
fi
if systemctl is-enabled --quiet 'amavis.service'
then
/etc/init.d/amavis start
log_action "Redémarrage de amavis"
else
log 'Error, amavis not installed'
fi
}

View file

@ -1,16 +0,0 @@
---
- name: Restart nagios-nrpe-server
ansible.builtin.service:
name: nagios-nrpe-server
state: restarted
- name: Restart nrpe
ansible.builtin.service:
name: nrpe
state: restarted
- name: Restart rsyslog
ansible.builtin.service:
name: rsyslog
state: restarted

View file

@ -1,61 +0,0 @@
---
- name: "Remount /usr if needed"
ansible.builtin.import_role:
name: remount-usr
- name: Create autosysadmin directory
ansible.builtin.file:
path: "{{ autosysadmin_dir }}"
state: directory
owner: "root"
group: "root"
mode: "0750"
tags:
- autosysadmin
- name: Copy scripts
ansible.builtin.copy:
src: "files/scripts/{{ item }}"
dest: "{{ autosysadmin_dir }}/{{ item }}"
owner: root
group: root
mode: "0750"
loop:
- "functions.sh"
- "restart_amavis.sh"
- "repair_amavis.sh"
- "repair_disk.sh"
- "repair_elasticsearch.sh"
- "repair_http.sh"
- "repair_mysql.sh"
- "repair_php_fpm56.sh"
- "repair_php_fpm70.sh"
- "repair_php_fpm73.sh"
- "repair_php_fpm74.sh"
- "repair_php_fpm80.sh"
- "repair_php_fpm81.sh"
- "repair_php_fpm82.sh"
- "repair_php_fpm83.sh"
- "repair_tomcat_instance.sh"
tags:
- autosysadmin
- name: Ensure /etc/evolinux folder exists
ansible.builtin.file:
path: "/etc/evolinux"
state: directory
owner: "root"
group: "root"
mode: "0700"
tags:
- autosysadmin
- name: Copy the configuration file
ansible.builtin.template:
src: "autosysadmin.cf.j2"
dest: "/etc/evolinux/autosysadmin"
owner: root
group: root
mode: "0640"
tags:
- autosysadmin

View file

@ -1,37 +0,0 @@
---
- name: Install dependencies
ansible.builtin.import_tasks: dependencies.yml
tags:
- autosysadmin
- name: Install autosysadmin scripts
ansible.builtin.import_tasks: autosysadmin_scripts.yml
tags:
- autosysadmin
- name: Amend NRPE configuration
ansible.builtin.import_tasks: nrpe.yml
tags:
- autosysadmin
- name: Amend sudo configuration
ansible.builtin.import_tasks: sudo.yml
tags:
- autosysadmin
- name: Amend rsyslog configuration
ansible.builtin.import_tasks: rsyslog.yml
tags:
- autosysadmin
- name: Amend logrotate configuration
ansible.builtin.import_tasks: logrotate.yml
tags:
- autosysadmin
- name: Install last version of dump-server-state
ansible.builtin.import_role:
name: evolinux-base
tasks_from: dump-server-state.yml
tags:
- autosysadmin

View file

@ -1,11 +0,0 @@
---
- name: Custom configuration is present
ansible.builtin.template:
src: autosysadmin.cfg.j2
dest: /etc/nagios/nrpe.d/autosysadmin.cfg
group: nagios
mode: "0640"
force: true
notify: Restart nagios-nrpe-server
tags:
- autosysadmin

View file

@ -1,74 +0,0 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be **overwritten** !
#
# Update the hosts_vars/group_vars on the autosysadmin project
# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master
#
# Configuration for autosysadmin
# Use this file to change configuration values defined in repair scripts
# Ex : repair_http=off
{% if repair_amavis == "off" %}
repair_amavis=off
{% endif %}
{% if repair_disk == "off" %}
repair_disk=off
{% endif %}
{% if repair_elasticsearch == "off" %}
repair_elasticsearch=off
{% endif %}
{% if repair_http == "off" %}
repair_http=off
{% endif %}
{% if repair_mysql == "off" %}
repair_mysql=off
{% endif %}
{% if repair_opendkim == "off" %}
repair_opendkim=off
{% endif %}
{% if repair_php_fpm56 == "off" %}
repair_php_fpm56=off
{% endif %}
{% if repair_php_fpm70 == "off" %}
repair_php_fpm70=off
{% endif %}
{% if repair_php_fpm73 == "off" %}
repair_php_fpm73=off
{% endif %}
{% if repair_php_fpm74 == "off" %}
repair_php_fpm74=off
{% endif %}
{% if repair_php_fpm80 == "off" %}
repair_php_fpm80=off
{% endif %}
{% if repair_php_fpm81 == "off" %}
repair_php_fpm81=off
{% endif %}
{% if repair_php_fpm82 == "off" %}
repair_php_fpm82=off
{% endif %}
{% if repair_php_fpm83 == "off" %}
repair_php_fpm83=off
{% endif %}
{% if repair_redis == "off" %}
repair_redis=off
{% endif %}
{% if repair_tomcat_instance == "off" %}
repair_tomcat_instance=off
{% endif %}

View file

@ -1,22 +0,0 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
# Autosysadmin repair commands
command[repair_amavis]=sudo {{ autosysadmin_dir }}/repair_amavis.sh
command[repair_disk]=sudo {{ autosysadmin_dir }}/repair_disk.sh
command[repair_elasticsearch]=sudo {{ autosysadmin_dir }}/repair_elasticsearch.sh
command[repair_http]=sudo {{ autosysadmin_dir }}/repair_http.sh
command[repair_mysql]=sudo {{ autosysadmin_dir }}/repair_mysql.sh
command[repair_opendkim]=sudo {{ autosysadmin_dir }}/repair_opendkim.sh
command[repair_php_fpm56]=sudo {{ autosysadmin_dir }}/repair_php_fpm56.sh
command[repair_php_fpm70]=sudo {{ autosysadmin_dir }}/repair_php_fpm70.sh
command[repair_php_fpm73]=sudo {{ autosysadmin_dir }}/repair_php_fpm73.sh
command[repair_php_fpm74]=sudo {{ autosysadmin_dir }}/repair_php_fpm74.sh
command[repair_php_fpm80]=sudo {{ autosysadmin_dir }}/repair_php_fpm80.sh
command[repair_php_fpm81]=sudo {{ autosysadmin_dir }}/repair_php_fpm81.sh
command[repair_php_fpm82]=sudo {{ autosysadmin_dir }}/repair_php_fpm82.sh
command[repair_php_fpm83]=sudo {{ autosysadmin_dir }}/repair_php_fpm83.sh
command[repair_redis]=sudo {{ autosysadmin_dir }}/repair_redis.sh
command[repair_tomcat_instance]=sudo {{ autosysadmin_dir }}/repair_tomcat_instance.sh

View file

@ -1,21 +0,0 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_amavis.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_disk.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_elasticsearch.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_http.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_mysql.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_opendkim.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm56.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm70.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm73.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm74.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm80.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm81.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm82.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm83.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_redis.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_tomcat_instance.sh

View file

@ -149,7 +149,7 @@
- name: Autosysadmin
ansible.builtin.include_role:
name: 'evolix/autosysadmin'
name: 'evolix/autosysadmin-agent'
when: evolinux_autosysadmin_include | bool
- name: fail2ban