Merge branch 'unstable' into stable
All checks were successful
gitea/ansible-roles/pipeline/head This commit looks good
Ansible Lint |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |2712|0|2712|0|:zzz:
gitea/ansible-roles/pipeline/tag This commit looks good

This commit is contained in:
Jérémy Lecour 2024-03-01 09:06:08 +01:00 committed by Jérémy Lecour
commit 2a856d579e
Signed by: jlecour
SSH key fingerprint: SHA256:h+5LgHRKwN9lS0SsdVR5yZPeFlJE4Mt+8UtL4CcP8dY
83 changed files with 2789 additions and 2113 deletions

View file

@ -21,6 +21,30 @@ The **patch** part changes is incremented if multiple releases happen the same m
### Security
## [24.03] 2024-03-01
### Added
* autosysadmin-agent: upstream release 24.03
* autosysadmin-restart_nrpe: add role
* certbot: Renewal hook for NRPE
* kvm-host: add minifirewall rules if DRBD interface is configured
### Changed
* apt: add ftp.evolix.org as recognized system source
* autosysadmin-agent: logs clearing is done weekly
* autosysadmin-agent: rename /usr/share/scripts/autosysadmin/{auto,restart}
* certbot: use pkey to test the key
* evolinux-base: execute autosysadmin-agent and autosysadmin-restart_nrpe roles
* lxc-php, php: Update sury PGP key
* openvpn: earlier alert for CA expiration
* redis: create sysfs config file if missing
### Removed
* autosysadmin: replaced by autosysadmin-agent
## [24.02.1] 2024-02-08
### Fixed
@ -77,6 +101,7 @@ The **patch** part changes is incremented if multiple releases happen the same m
* nagios: add dockerd check in nrpe check template
* nagios: cleaning nrpe check template
* nagios: rename var `nagios_nrpe_process_processes` into `nagios_nrpe_processes` and check systemd-timesyncd instead of ntpd in Debian 12
* nagios: add option --full to check pressure IO and mem to avoid flaps
* proftpd: in SFTP vhost, enable SSH keys login, enable ed25549 host key for Debian >= 11
* redis: manage config template inside a block, to allow custom modifications outside
* spamassassin: Use spamd starting with Bookworm

View file

@ -1,5 +1,11 @@
#!/usr/bin/env python3
##########
# This script takes a multi-lines input of "oneliner-style" APT sources definitions.
# It converts them into "deb822-style" sources.
# Each generated file will have only one stanza, possibly with multiple Types/Suites/Components
##########
import re
import sys
import os
@ -10,11 +16,16 @@ import apt_pkg
# Order matters !
destinations = {
"debian-security": "security.sources",
".*-backports": "backports.sources",
".debian.org": "system.sources",
"mirror.evolix.org": "system.sources",
"ftp.evolix.org": "system.sources",
"pub.evolix.net": "evolix_public_old.sources.bak",
"pub.evolix.org": "evolix_public.sources",
"artifacts.elastic.co": "elastic.sources",
"download.docker.com": "docker.sources",
"downloads.linux.hpe.com": "hp.sources",
@ -76,6 +87,11 @@ def prepare_sources(lines):
key, value = option.split("=")
options[key] = value
### WARNING ###
# if there are multiple lines with different URIS for a given destination (eg. "system")
# each one will overwrite the previous one
# and the last evaluated will be what remains.
if dest in sources:
sources[dest]["Types"].add(matches["type"])
sources[dest]["URIs"] = matches["uri"]

View file

@ -1,5 +1,11 @@
#!/bin/sh
##########
# This script changes all "one-line" APT sources into "deb822" sources.
# It is responsible for searching and processing the files.
# The actual format migration is done by a python script.
##########
deb822_migrate_script=$(command -v deb822-migration.py)
if [ -z "${deb822_migrate_script}" ]; then
@ -46,4 +52,4 @@ for file in $(find /etc/apt/sources.list.d -mindepth 1 -maxdepth 1 -type f -name
done
echo "${count} file(s) migrated"
exit ${rc}
exit ${rc}

View file

@ -0,0 +1,17 @@
---
general_scripts_dir: "/usr/share/scripts"
autosysadmin_agent_bin_dir: "/usr/local/bin/autosysadmin"
autosysadmin_agent_lib_dir: "/usr/local/lib/autosysadmin"
autosysadmin_agent_auto_dir: "{{ general_scripts_dir }}/autosysadmin/restart"
autosysadmin_agent_crontab_enabled: true
autosysadmin_agent_log_retention_days: 365
autosysadmin_config: []
### All repair are disabled if set to 'off'
### even if a specific repair value is 'on'
# repair_all: 'on'
### Default values for checks
# repair_foo: 'off'

View file

@ -0,0 +1,25 @@
#!/bin/bash
days=${1:-365}
log_dir="/var/log/autosysadmin/"
if [ -d "${log_dir}" ]; then
find_run_dirs() {
find "${log_dir}" \
-mindepth 1 \
-maxdepth 1 \
-type d \
-ctime "+${days}" \
-print0
}
log() {
/usr/bin/logger -p local0.notice -t autosysadmin "${1}"
}
while IFS= read -r -d '' run_dir; do
rm --recursive --force "${run_dir}"
log "Delete ${run_dir} (older than ${days} days)"
done < <(find_run_dirs)
fi
exit 0

View file

@ -0,0 +1,907 @@
#!/bin/bash
VERSION="24.03"
# Common functions for "repair" and "restart" scripts
set -u
# Initializes the program, context, configuration…
initialize() {
PATH="${PATH}":/usr/sbin:/sbin
# Used in many places to refer to the program name.
# Examples: repair_mysql, restart_nrpe…
PROGNAME=$(basename "${0}")
# find out if running in interactive mode, or not
if [ -t 0 ]; then
INTERACTIVE=1
else
INTERACTIVE=0
fi
readonly INTERACTIVE
# Default empty value for Debug mode
DEBUG="${DEBUG:-""}"
# Repair scripts obey to the value of a variable named after the script
# You can set the value ("on" or "off") in /etc/evolinux/autosysadmin
# Here we set the default value to "on".
declare -g "${PROGNAME}"=on # dynamic variable assignment ($PROGNAME == repair_*)
PID=$$
readonly PID
# Each execution (run) gets a unique ID
RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${PROGNAME}_${PID}"
readonly RUN_ID
# Main log directory
MAIN_LOG_DIR="/var/log/autosysadmin"
readonly MAIN_LOG_DIR
# shellcheck disable=SC2174
mkdir --mode=750 --parents "${MAIN_LOG_DIR}"
chgrp adm "${MAIN_LOG_DIR}"
# Each execution store some information
# in a unique directory based on the RUN_ID
RUN_LOG_DIR="${MAIN_LOG_DIR}/${RUN_ID}"
readonly RUN_LOG_DIR
# shellcheck disable=SC2174
mkdir --mode=750 --parents "${RUN_LOG_DIR}"
chgrp adm "${RUN_LOG_DIR}"
# This log file contains all events
RUN_LOG_FILE="${RUN_LOG_DIR}/autosysadmin.log"
readonly RUN_LOG_FILE
# This log file contains notable actions
ACTIONS_FILE="${RUN_LOG_DIR}/actions.log"
readonly ACTIONS_FILE
touch "${ACTIONS_FILE}"
# This log file contains abort reasons (if any)
ABORT_FILE="${RUN_LOG_DIR}/abort.log"
readonly ABORT_FILE
# touch "${ABORT_FILE}"
# Date format for log messages
DATE_FORMAT="%Y-%m-%d %H:%M:%S"
# This will contain lock, last-run markers…
# It's ok to lose the content after a reboot
RUN_DIR="/run/autosysadmin"
readonly RUN_DIR
mkdir -p "${RUN_DIR}"
# Only a singe instace of each script can run simultaneously
# We use a customizable lock name for this.
# By default it's the script's name
LOCK_NAME=${LOCK_NAME:-${PROGNAME}}
# If a lock is found, we can wait for it to disappear.
# The value must be understood by sleep(1)
LOCK_WAIT="0"
# Default values for email headers
EMAIL_FROM="equipe+autosysadmin@evolix.fr"
EMAIL_INTERNAL="autosysadmin@evolix.fr"
LOCK_FILE="${RUN_DIR}/${LOCK_NAME}.lock"
readonly LOCK_FILE
# Remove lock file at exit
cleanup() {
# shellcheck disable=SC2317
rm -f "${LOCK_FILE}"
}
trap 'cleanup' 0
# Load configuration
# shellcheck disable=SC1091
test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin
log_all "Begin ${PROGNAME} RUN_ID: ${RUN_ID}"
log_all "Log directory is ${RUN_LOG_DIR}"
}
# Executes a list of tasks before exiting:
# * prepare a summary of actions and possible abort reasons
# * send emails
# * do some cleanup
quit() {
log_all "End ${PROGNAME} RUN_ID: ${RUN_ID}"
summary="RUN_ID: ${RUN_ID}"
if [ -s "${ABORT_FILE}" ]; then
# Add abort reasons to summary
summary="${summary}\n$(print_abort_reasons)"
hook_mail "abort"
return_code=1
else
if [ -s "${ACTIONS_FILE}" ]; then
# Add notable actions to summary
summary="${summary}\n$(print_actions "Aucune action")"
hook_mail "success"
fi
return_code=0
fi
hook_mail "internal"
if is_interactive; then
# shellcheck disable=SC2001
echo "${summary}" | sed -e 's/\\n/\n/g'
else
/usr/share/scripts/evomaintenance.sh --auto --user autosysadmin --message "${summary}" --no-commit --no-mail
fi
teardown
# shellcheck disable=SC2086
exit ${return_code}
}
teardown() {
:
}
# Return true/false
is_interactive() {
test "${INTERACTIVE}" -eq "1"
}
save_server_state() {
DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)"
if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then
log_all "Warning: dump-server-state is not present. No server state recorded."
fi
if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then
DUMP_DIR=$(file_path_in_log_dir "server-state")
# We don't want the logging to take too much time,
# so we kill it if it takes more than 20 seconds.
timeout --signal 9 20 \
"${DUMP_SERVER_STATE_BIN}" \
--dump-dir="${DUMP_DIR}" \
--df \
--dmesg \
--iptables \
--lxc \
--netcfg \
--netstat \
--uname \
--processes \
--systemctl \
--uptime \
--virsh \
--disks \
--mysql-processes \
--no-apt-states \
--no-apt-config \
--no-dpkg-full \
--no-dpkg-status \
--no-mount \
--no-packages \
--no-sysctl \
--no-etc
log_run "Server state saved in \`server-state' directory."
fi
}
is_debug() {
# first time: do the check…
# other times: pass
if [ -z "${DEBUG:-""}" ]; then
debug_file="/etc/evolinux/autosysadmin.debug"
if [ -e "${debug_file}" ]; then
last_change=$(stat -c %Z "${debug_file}")
limit_date=$(date --date "14400 seconds ago" +"%s")
if [ $(( last_change - limit_date )) -le "0" ]; then
log_run "Debug mode disabled; file is too old (%{last_change} seconds)."
rm "${debug_file}"
# Debug mode disabled
DEBUG="0"
else
log_run "Debug mode enabled."
# Debug mode enabled
DEBUG="1"
fi
else
# log_run "Debug mode disabled; file is absent."
# Debug mode disabled
DEBUG="0"
fi
fi
# return the value
test "${DEBUG}" -eq "1"
}
# Uses the who(1) definition of "active"
currently_active_users() {
LC_ALL=C who --users | grep --extended-regexp "\s+\.\s+" | awk '{print $1}' | sort --human-numeric-sort | uniq
}
# Users active in the last 29 minutes
recently_active_users() {
LC_ALL=C who --users | grep --extended-regexp "\s+00:(0|1|2)[0-9]\s+" | awk --field-separator ' ' '{print $1,$6}'
}
# Save the list of users to a file in the log directory
save_active_users() {
LC_ALL=C who --users | save_in_log_dir "who-users"
}
# An autosysadmin must not perform actions if a user is active or was active recently.
#
# This can by bypassed in interactive mode.
# It's OK to lose this data after a reboot.
ensure_no_active_users_or_exit() {
# Save all active users
save_active_users
if is_debug; then
log_run "Debug mode enabled: continue without checking active users."
return 0;
fi
# Is there any currently active user?
currently_active_users=$(currently_active_users)
if [ -n "${currently_active_users}" ]; then
# shellcheck disable=SC2001
users_oneliner=$(echo "${currently_active_users}" | sed -e 's/\n/ /')
log_run "Currently active users: ${users_oneliner}"
if is_interactive; then
echo "Some users are currently active:"
# shellcheck disable=SC2001
echo "${currently_active_users}" | sed -e 's/\(.\+\)/* \1/'
answer=""
while :; do
printf "> Continue? [Y,n,?] "
read -r answer
case ${answer} in
[Yy]|"" )
log_run "Active users check bypassed manually in interactive mode."
return
;;
[Nn] )
log_run "Active users check confirmed manually in interactive mode."
log_abort_and_quit "Active users detected: ${users_oneliner}"
;;
* )
printf "y - yes, continue\n"
printf "n - no, exit\n"
printf "? - print this help\n"
;;
esac
done
else
log_abort_and_quit "Currently active users detected: ${users_oneliner}."
fi
else
# or recently (the last 30 minutes) active user?
recently_active_users=$(recently_active_users)
if [ -n "${recently_active_users}" ]; then
# shellcheck disable=SC2001
users_oneliner=$(echo "${recently_active_users}" | sed -e 's/\n/ /')
log_run "Recently active users: ${users_oneliner}"
if is_interactive; then
echo "Some users were recently active:"
# shellcheck disable=SC2001
echo "${recently_active_users}" | sed -e 's/\(.\+\)/* \1/'
answer=""
while :; do
printf "> Continue? [Y,n,?] "
read -r answer
case ${answer} in
[Yy]|"" )
log_run "Active users check bypassed manually in interactive mode."
return
;;
[Nn] )
log_run "Active users check confirmed manually in interactive mode."
log_abort_and_quit "Recently active users detected: ${users_oneliner}."
;;
* )
printf "y - yes, continue\n"
printf "n - no, exit\n"
printf "? - print this help\n"
;;
esac
done
else
log_abort_and_quit "Recently active users detected: ${users_oneliner}."
fi
fi
fi
}
# Takes an NRPE command name as 1st parameter,
# and executes the full command if found in the configuration.
# Return the result and the return code of the command.
check_nrpe() {
check="$1"
nrpe_files=""
# Check if NRPE config is found
if [ -f "/etc/nagios/nrpe.cfg" ]; then
nrpe_files="${nrpe_files} /etc/nagios/nrpe.cfg"
else
msg="NRPE configuration not found: /etc/nagios/nrpe.cfg"
log_run "${msg}"
echo "${msg}"
return 3
fi
# Search for included files
# shellcheck disable=SC2086
while IFS= read -r include_file; do
nrpe_files="${nrpe_files} ${include_file}"
done < <(grep --extended-regexp '^\s*include=.+' ${nrpe_files} | cut -d = -f 2)
# Search for files in included directories
# shellcheck disable=SC2086
while IFS= read -r include_dir; do
nrpe_files="${nrpe_files} ${include_dir}/*.cfg"
done < <(grep --extended-regexp '^\s*include_dir=.+' ${nrpe_files} | cut -d = -f 2)
# Fetch uncommented commands in (sorted) config files
# shellcheck disable=SC2086
nrpe_commands=$(grep --no-filename --exclude=*~ --fixed-strings "[${check}]" ${nrpe_files} | grep --invert-match --extended-regexp '^\s*#\s*command' | cut -d = -f 2)
nrpe_commands_count=$(echo "${nrpe_commands}" | wc -l)
if is_debian_version "9" "<=" && [ "${nrpe_commands_count}" -gt "1" ]; then
# On Debian <= 9, NRPE loading was not sorted
# we need to raise an error if we have multiple defined commands
msg="Unable to determine which NRPE command to run"
log_run "${msg}"
echo "${msg}"
return 3
else
# On Debian > 9, use the last command
nrpe_command=$(echo "${nrpe_commands}" | tail -n 1)
nrpe_result=$(${nrpe_command})
nrpe_rc=$?
log_run "NRPE command (exited with ${nrpe_rc}): ${nrpe_command}"
log_run "${nrpe_result}"
echo "${nrpe_result}"
return "${nrpe_rc}"
fi
}
# An autosysadmin script must not run twice (or more) simultaneously.
# We use a customizable (with LOCK_NAME) lock file to keep track of this.
# A wait time can be configured.
#
# This can by bypassed in interactive mode.
# It's OK to lose this data after a reboot.
acquire_lock_or_exit() {
lock_file="${1:-${LOCK_FILE}}"
lock_wait="${2:-${LOCK_WAIT}}"
# lock_wait must be compatible with sleep(1), otherwise fallback to 0
if ! echo "${lock_wait}" | grep -Eq '^[0-9]+[smhd]?$'; then
log_run "Lock wait: incorrect value '${lock_wait}', fallback to 0."
lock_wait=0
fi
if [ "${lock_wait}" != "0" ] && [ -f "${lock_file}" ]; then
log_run "Lock file present. Let's wait ${lock_wait} and check again."
sleep "${lock_wait}"
fi
if [ -f "${lock_file}" ]; then
log_abort_and_quit "Lock file still present."
else
log_run "Lock file absent. Let's put one."
touch "${lock_file}"
fi
}
# If a script has been run in the ast 30 minutes, running it again won't fix the issue.
# We use a /run/ausosysadmin/${PROGNAME}_lastrun file to keep track of this.
#
# This can by bypassed in interactive mode.
# This is bypassed in debug mode.
# It's OK to lose this data after a reboot.
ensure_not_too_soon_or_exit() {
if is_debug; then
log_run "Debug mode enabled: continue without checking when was the last run."
return 0;
fi
lastrun_file="${RUN_DIR}/${PROGNAME}_lastrun"
if [ -f "${lastrun_file}" ]; then
lastrun_age="$(($(date +%s)-$(stat -c "%Y" "${lastrun_file}")))"
log_run "Last run was ${lastrun_age} seconds ago."
if [ "${lastrun_age}" -lt 1800 ]; then
if is_interactive; then
echo "${PROGNAME} was run ${lastrun_age} seconds ago."
answer=""
while :; do
printf "> Continue? [Y,n,?] "
read -r answer
case ${answer} in
[Yy]|"" )
log_run "Last run check bypassed manually in interactive mode."
break
;;
[Nn] )
log_run "Last run check confirmed manually in interactive mode."
log_abort_and_quit 'Last run too recent.'
;;
* )
printf "y - yes, continue\n"
printf "n - no, exit\n"
printf "? - print this help\n"
;;
esac
done
else
log_abort_and_quit "Last run too recent."
fi
fi
fi
touch "${lastrun_file}"
}
# Populate DEBIAN_VERSION and DEBIAN_RELEASE variables
# based on gathered information about the operating system
detect_os() {
DEBIAN_RELEASE="unknown"
DEBIAN_VERSION="unknown"
LSB_RELEASE_BIN="$(command -v lsb_release)"
if [ -e /etc/debian_version ]; then
DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)"
if [ -x "${LSB_RELEASE_BIN}" ]; then
DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)"
else
case "${DEBIAN_VERSION}" in
7) DEBIAN_RELEASE="wheezy" ;;
8) DEBIAN_RELEASE="jessie" ;;
9) DEBIAN_RELEASE="stretch" ;;
10) DEBIAN_RELEASE="buster" ;;
11) DEBIAN_RELEASE="bullseye" ;;
12) DEBIAN_RELEASE="bookworm" ;;
13) DEBIAN_RELEASE="trixie" ;;
esac
fi
# log_run "Detected OS: Debian version=${DEBIAN_VERSION} release=${DEBIAN_RELEASE}"
# else
# log_run "Detected OS: unknown (missing /etc/debian_version)"
fi
}
is_debian_wheezy() {
test "${DEBIAN_RELEASE}" = "wheezy"
}
is_debian_jessie() {
test "${DEBIAN_RELEASE}" = "jessie"
}
is_debian_stretch() {
test "${DEBIAN_RELEASE}" = "stretch"
}
is_debian_buster() {
test "${DEBIAN_RELEASE}" = "buster"
}
is_debian_bullseye() {
test "${DEBIAN_RELEASE}" = "bullseye"
}
is_debian_bookworm() {
test "${DEBIAN_RELEASE}" = "bookworm"
}
is_debian_trixie() {
test "${DEBIAN_RELEASE}" = "trixie"
}
is_debian_version() {
local version=$1
local relation=${2:-"eq"}
if [ -z "${DEBIAN_VERSION:-""}" ]; then
detect_os
fi
dpkg --compare-versions "${DEBIAN_VERSION}" "${relation}" "${version}"
}
# List systemd services (only names), even if stopped
systemd_list_services() {
pattern=$1
systemctl list-units --all --no-legend --type=service "${pattern}" | grep --only-matching --extended-regexp '\S+\.service'
}
is_systemd_enabled() {
systemctl --quiet is-enabled "$1" 2> /dev/null
}
is_systemd_active() {
systemctl --quiet is-active "$1" 2> /dev/null
}
is_sysvinit_enabled() {
find /etc/rc2.d/ -name "$1" > /dev/null
}
get_fqdn() {
# shellcheck disable=SC2155
local system=$(uname -s)
if [ "${system}" = "Linux" ]; then
hostname --fqdn
elif [ "${system}" = "OpenBSD" ]; then
hostname
else
log_abort_and_quit "System '${system}' not recognized."
fi
}
get_complete_hostname() {
REAL_HOSTNAME="$(get_fqdn)"
if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then
echo "${HOSTNAME}"
else
echo "${HOSTNAME} (${REAL_HOSTNAME})"
fi
}
# Fetch values from evomaintenance configuration
get_evomaintenance_mail() {
grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2
}
get_evomaintenance_emergency_mail() {
grep "URGENCYFROM=" /etc/evomaintenance.cf | cut -d '=' -f2
}
get_evomaintenance_emergency_tel() {
grep "URGENCYTEL=" /etc/evomaintenance.cf | cut -d '=' -f2
}
# Log a message to the log file in the log directory
log_run() {
local msg="${1:-$(cat /dev/stdin)}"
# shellcheck disable=SC2155
local date=$(/bin/date +"${DATE_FORMAT}")
printf "[%s] %s[%s]: %s\\n" \
"${date}" "${PROGNAME}" "${PID}" "${msg}" \
>> "${RUN_LOG_FILE}"
}
# Log a message in the system log file (syslog or journald)
log_global() {
local msg="${1:-$(cat /dev/stdin)}"
echo "${msg}" \
| /usr/bin/logger -p local0.notice -t autosysadmin
}
# Log a message in both places
log_all() {
local msg="${1:-$(cat /dev/stdin)}"
log_global "${msg}"
log_run "${msg}"
}
# Log a notable action in regular places
# and append it to the dedicated list
log_action() {
log_all "$*"
append_action "$*"
}
# Append a line in the actions.log file in the log directory
append_action() {
echo "$*" >> "${ACTIONS_FILE}"
}
# Print the content of the actions.log file
# or a fallback content (1st parameter) if empty
# shellcheck disable=SC2120
print_actions() {
local fallback=${1:-""}
if [ -s "${ACTIONS_FILE}" ]; then
cat "${ACTIONS_FILE}"
elif [ -n "${fallback}" ]; then
echo "${fallback}"
fi
}
# Log a an abort reason in regular places
# and append it to the dedicated list
log_abort() {
log_all "$*"
append_abort_reason "$*"
}
# Append a line in the abort.log file in the log directory
append_abort_reason() {
echo "$*" >> "${ABORT_FILE}"
}
# Print the content of the abort.log file
# or a fallback content (1st parameter) if empty
# shellcheck disable=SC2120
print_abort_reasons() {
local fallback=${1:-""}
if [ -s "${ABORT_FILE}" ]; then
cat "${ABORT_FILE}"
elif [ -n "${fallback}" ]; then
echo "${fallback}"
fi
}
# Print the content of the main log from the log directory
print_main_log() {
cat "${RUN_LOG_FILE}"
}
# Log an abort reason and quit the script
log_abort_and_quit() {
log_abort "$*"
quit
}
# Store the content from standard inpu
# into a file in the log directory named after the 1st parameter
save_in_log_dir() {
local file_name=$1
local file_path="${RUN_LOG_DIR}/${file_name}"
cat /dev/stdin > "${file_path}"
log_run "Saved \`${file_name}' file."
}
# Return the full path of the file in log directory
# based on the name in the 1st parameter
file_path_in_log_dir() {
echo "${RUN_LOG_DIR}/${1}"
}
format_mail_success() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <${EMAIL_FROM}>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: ${PROGNAME}
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: ${EMAIL_INTERNAL}
Subject: [autosysadmin] Intervention automatisée sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatisée vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : ${PROGNAME}
### Actions réalisées
$(print_actions "Aucune")
### Réagir à cette intervention
Vous pouvez répondre à ce message (${EMAIL_FROM}).
En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL}
ou notre ligne d'astreinte (${EMERGENCY_TEL})
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_abort() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <${EMAIL_FROM}>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: ${PROGNAME}
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: ${EMAIL_INTERNAL}
Subject: [autosysadmin] Intervention automatisée interrompue sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatisée a été déclenchée mais s'est interrompue.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : ${PROGNAME}
### Actions réalisées
$(print_actions "Aucune")
### Raison(s) de l'interruption
$(print_abort_reasons "Inconnue")
### Réagir à cette intervention
Vous pouvez répondre à ce message (${EMAIL_FROM}).
En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL}
ou notre ligne d'astreinte (${EMERGENCY_TEL})
--
Votre AutoSysadmin
EOTEMPLATE
}
# shellcheck disable=SC2028
print_report_information() {
echo "**Uptime**"
echo ""
uptime
echo ""
echo "**Utilisateurs récents**"
echo ""
who_file=$(file_path_in_log_dir "who-users")
if [ -s "${who_file}" ]; then
cat "${who_file}"
else
who --users
fi
echo ""
echo "**Espace disque**"
echo ""
df_file=$(file_path_in_log_dir "server-state/df.txt")
if [ -s "${df_file}" ]; then
cat "${df_file}"
else
df -h
fi
echo ""
echo "**Dmesg**"
echo ""
dmesg_file=$(file_path_in_log_dir "server-state/dmesg.txt")
if [ -s "${dmesg_file}" ]; then
tail -n 5 "${dmesg_file}"
else
dmesg | tail -n 5
fi
echo ""
echo "**systemd failed services**"
echo ""
failed_services_file=$(file_path_in_log_dir "server-state/systemctl-failed-services.txt")
if [ -s "${failed_services_file}" ]; then
cat "${failed_services_file}"
else
systemctl --no-legend --state=failed --type=service
fi
if command -v lxc-ls > /dev/null 2>&1; then
echo ""
echo "**LXC containers**"
echo ""
lxc_ls_file=$(file_path_in_log_dir "server-state/lxc-list.txt")
if [ -s "${lxc_ls_file}" ]; then
cat "${lxc_ls_file}"
else
lxc-ls --fancy
fi
fi
apache_errors_file=$(file_path_in_log_dir "apache-errors.log")
if [ -f "${apache_errors_file}" ]; then
echo ""
echo "**Apache errors**"
echo ""
cat "${apache_errors_file}"
fi
nginx_errors_file=$(file_path_in_log_dir "nginx-errors.log")
if [ -f "${nginx_errors_file}" ]; then
echo ""
echo "**Nginx errors**"
echo ""
cat "${nginx_errors_file}"
fi
}
format_mail_internal() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <${EMAIL_FROM}>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: ${PROGNAME}
X-RunId: ${RUN_ID}
To: ${EMAIL_INTERNAL}
Subject: [autosysadmin] Rapport interne d'intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : ${PROGNAME}
### Actions réalisées
$(print_actions "Aucune")
### Raison(s) de l'interruption
$(print_abort_reasons "Aucune")
### Log autosysadmin
$(print_main_log)
### Informations additionnelles
$(print_report_information)
--
Votre AutoSysadmin
EOTEMPLATE
}
# Generic function to send emails at the end of the script.
# Takes a template as 1st parameter
hook_mail() {
if is_debug; then
log_run "Debug mode enabled: continue without sending mail."
return 0;
fi
HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}"
HOSTNAME_TEXT="$(get_complete_hostname)"
EMAIL_CLIENT="$(get_evomaintenance_mail)"
EMERGENCY_MAIL="$(get_evomaintenance_emergency_mail)"
EMERGENCY_TEL="$(get_evomaintenance_emergency_tel)"
MAIL_CONTENT="$(format_mail_"$1")"
SENDMAIL_BIN="$(command -v sendmail)"
if [ -z "${SENDMAIL_BIN}" ]; then
log_global "ERROR: No \`sendmail' command has been found, can't send mail."
fi
if [ -x "${SENDMAIL_BIN}" ]; then
echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.fr"
log_global "Sent '$1' mail for RUN_ID: ${RUN_ID}"
fi
}
is_holiday() {
# gcal mark today as a holiday by surrounding with < and > the day
# of the month of that holiday line. For example if today is 2022-05-01 we'll
# get among other lines:
# Fête du Travail (FR) + Di, < 1>Mai 2022
# Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours
LANGUAGE=fr_FR.UTF-8 TZ=Europe/Paris gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet
}
is_weekend() {
day_of_week=$(date +%u)
if [ "${day_of_week}" != 6 ] && [ "${day_of_week}" != 7 ]; then
return 1
fi
}
is_workday() {
if is_holiday || is_weekend; then
return 1
fi
}
is_worktime() {
if ! is_workday; then
return 1
fi
hour=$(date +%H)
if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then
return 1
fi
}

View file

@ -0,0 +1,112 @@
#!/bin/bash
# Specific functions for "repair" scripts
is_all_repair_disabled() {
# Fetch values from the config
# and if it is not defined or has no value, then assign "on"
local status=${repair_all:=on}
test "${status}" = "off" || test "${status}" = "0"
}
is_current_repair_disabled() {
# Fetch values from the config
# and if it is not defined or has no value, then assign "on"
local status=${!PROGNAME:=on}
test "${status}" = "off" || test "${status}" = "0"
}
ensure_not_disabled_or_exit() {
if is_all_repair_disabled; then
log_global 'All repair scripts are disabled.'
exit 0
fi
if is_current_repair_disabled; then
log_global "Current repair script (${PROGNAME}) is disabled."
exit 0
fi
}
# Set of actions to do at the begining of a "repair" script
pre_repair() {
initialize
# Are we supposed to run?
ensure_not_disabled_or_exit
# Has it recently been run?
ensure_not_too_soon_or_exit
# Can we acquire a lock?
acquire_lock_or_exit
# Is there any active user?
ensure_no_active_users_or_exit
# Save important information
save_server_state
}
# Set of actions to do at the end of a "repair" script
post_repair() {
quit
}
repair_lxc_php() {
container_name=$1
if is_systemd_enabled 'lxc.service'; then
lxc_path=$(lxc-config lxc.lxcpath)
if lxc-info --name "${container_name}" > /dev/null; then
rootfs="${lxc_path}/${container_name}/rootfs"
case "${container_name}" in
php56) fpm_log_file="${rootfs}/var/log/php5-fpm.log" ;;
php70) fpm_log_file="${rootfs}/var/log/php7.0-fpm.log" ;;
php73) fpm_log_file="${rootfs}/var/log/php7.3-fpm.log" ;;
php74) fpm_log_file="${rootfs}/var/log/php7.4-fpm.log" ;;
php80) fpm_log_file="${rootfs}/var/log/php8.0-fpm.log" ;;
php81) fpm_log_file="${rootfs}/var/log/php8.1-fpm.log" ;;
php82) fpm_log_file="${rootfs}/var/log/php8.2-fpm.log" ;;
php83) fpm_log_file="${rootfs}/var/log/php8.3-fpm.log" ;;
*)
log_abort_and_quit "Unknown container '${container_name}'"
;;
esac
# Determine FPM Pool path
php_path_pool=$(find "${lxc_path}/${container_name}/" -type d -name "pool.d")
# Save LXC info (before restart)
lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.before.status"
# Save last lines of FPM log (before restart)
tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.before.log/')"
# Save NRPE check (before restart)
/usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.before.out"
lxc-stop --timeout 20 --name "${container_name}"
lxc-start --daemon --name "${container_name}"
rc=$?
if [ "${rc}" -eq "0" ]; then
log_all "Restart LXC container '${container_name}: OK"
else
log_all "Restart LXC container '${container_name}: failed"
fi
# Save LXC info (after restart)
lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.after.status"
# Save last lines of FPM log (after restart)
tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.after.log/')"
# Save NRPE check (after restart)
/usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.after.out"
else
log_abort_and_quit "LXC container '${container_name}' doesn't exist."
fi
else
log_abort_and_quit 'LXC not found.'
fi
}

View file

@ -0,0 +1,76 @@
#!/bin/bash
# Specific functions for "restart" scripts
running_custom() {
# Placeholder that returns 1, to prevent running if not redefined
log_global "running_custom() function has not been redefined! Let's quit."
return 1
}
# Examine RUNNING variable and decide if the script should run or not
is_supposed_to_run() {
if is_debug; then return 0; fi
case ${RUNNING} in
never)
# log_global "is_supposed_to_run: no (never)"
return 1
;;
always)
# log_global "is_supposed_to_run: yes (always)"
return 0
;;
nwh-fr)
! is_worktime
rc=$?
# if [ ${rc} -eq 0 ]; then
# log_global "is_supposed_to_run: yes (nwh-fr returned ${rc})"
# else
# log_global "is_supposed_to_run: no (nwh-fr returned ${rc})"
# fi
return ${rc}
;;
nwh-ca)
# Not implemented yet
return 0
;;
custom)
running_custom
rc=$?
# if [ ${rc} -eq 0 ]; then
# log_global "is_supposed_to_run: yes (custom returned ${rc})"
# else
# log_global "is_supposed_to_run: no (custom returned ${rc})"
# fi
return ${rc}
;;
esac
}
ensure_supposed_to_run_or_exit() {
if ! is_supposed_to_run; then
# simply quit (no logging, no notifications…)
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
}
# Set of actions to do at the begining of a "restart" script
pre_restart() {
initialize
# Has it recently been run?
ensure_not_too_soon_or_exit
# Can we acquire a lock?
acquire_lock_or_exit
# Save important information
save_server_state
}
# Set of actions to do at the end of a "restart" script
post_restart() {
quit
}

View file

@ -0,0 +1,157 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
# We always keep some reserved blocks to avoid missing some logs
# https://gitea.evolix.org/evolix/autosysadmin/issues/22
RESERVED_BLOCKS_MIN=1
get_mountpoints() {
# the $(...) get the check_disk1 command
# the cut command selects the critical part of the check_disk1 output
# the grep command extracts the mountpoints and available disk space
# the last cut command selects the mountpoints
check_disk1_command=$(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-)
${check_disk1_command} -e | cut -d'|' -f1 | grep --extended-regexp --only-matching '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -d' ' -f1
}
is_reserved_blocks_nominal() {
partition=${1}
fs_type="$(findmnt -n --output=fstype "${partition}")"
if [ "${fs_type}" = "ext4" ]; then
device="$(findmnt -n --output=source "${partition}")"
reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }")
log_run "Reserved blocks for ${partition} is currently at ${percentage}%"
if [ "${percentage}" -gt "${RESERVED_BLOCKS_MIN}" ]; then
log_run "Allowing tune2fs action to reduce the number of reserved blocks"
return 0
else
log_run "Reserved blocks already at or bellow ${RESERVED_BLOCKS_MIN}%, no automatic action possible"
return 1
fi
else
log_run "Filesystem for ${partition} (${fs_type}) is incompatible with reserved block reduction."
return 1
fi
}
reduce_reserved_blocks() {
partition=${1}
device=$(findmnt -n --output=source "${partition}")
tune2fs -m "${RESERVED_BLOCKS_MIN}" "${device}"
log_action "Reserved blocks for ${partition} changed to ${RESERVED_BLOCKS_MIN} percent"
}
is_tmp_to_delete() {
size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]; then
return 0
else
return 1
fi
}
is_log_to_delete() {
size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]; then
return 0
else
return 1
fi
}
clean_apt_cache() {
for container in $(lxc-ls -1); do
if [ -e "$(lxc-config lxc.lxcpath)/${container}/rootfs/var/cache" ]; then
lxc-attach --name "${container}" -- apt-get clean
log_action "Clean apt cache in LXC container ${container}";
fi
done
# NOTE: "head -n 1" might be superfluous, but let's be sure to have only the first returned value
biggest_subdir=$(du --summarize --one-file-system "/var/*" | sort --numeric-sort --reverse | sed 's/^[0-9]\+[[:space:]]\+//;q' | head -n 1)
case "${biggest_subdir}" in
'/var/cache')
apt-get clean
log_action 'Clean apt cache'
;;
esac
}
clean_amavis_virusmails() {
if du --inodes /var/lib/* | sort --numeric-sort | tail -n 3 | grep --quiet 'virusmails$'; then
find /var/lib/amavis/virusmails/ -type f -atime +30 -delete
log_action 'Clean amavis infected mails'
fi
}
critical_mountpoints=$(get_mountpoints)
if [ -z "${critical_mountpoints}" ]; then
log_abort_and_quit "No partition is in critical state, nothing left to do."
else
for mountpoint in ${critical_mountpoints}; do
case "${mountpoint}" in
/var)
#if is_log_to_delete
#then
# find /var/log/ -type f -mtime +365 -delete
# log_action "$size Mo of disk space freed in /var"
#fi
if is_reserved_blocks_nominal /var; then
reduce_reserved_blocks /var
clean_apt_cache
clean_amavis_virusmails
fi
;;
/tmp)
#if is_tmp_to_delete
#then
# find /tmp/ -type f -ctime +1 -delete
# log_action "$size Mo of disk space freed in /tmp"
#fi
if is_reserved_blocks_nominal /tmp; then
reduce_reserved_blocks /tmp
fi
;;
/home)
if is_reserved_blocks_nominal /home; then
reduce_reserved_blocks /home
fi
;;
/srv)
if is_reserved_blocks_nominal /srv; then
reduce_reserved_blocks /srv
fi
;;
/filer)
if is_reserved_blocks_nominal /filer; then
reduce_reserved_blocks /filer
fi
;;
/)
if is_reserved_blocks_nominal /; then
reduce_reserved_blocks /
# Suggest remove old kernel ?
fi
;;
*)
# unknown
log_run 'Unknown partition (or weird case) or nothing to do'
;;
esac
done
fi
post_repair

View file

@ -0,0 +1,35 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
service="elasticsearch.service"
service_name="elasticsearch"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_abort_and_quit "${service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service} is disabled (or missing), nothing left to do."
fi
post_repair

View file

@ -0,0 +1,131 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
## Apache
service="apache2.service"
service_name="apache2"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# check syntax
if apache2ctl -t > /dev/null 2>&1; then
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
date=$(LANG=en_US.UTF-8 date '+%b %d')
grep "${date}" /home/*/log/error.log /var/log/apache2/*error.log \
| grep -v \
-e "Got error 'PHP message:" \
-e "No matching DirectoryIndex" \
-e "client denied by server configuration" \
-e "server certificate does NOT include an ID which matches the server name" \
| save_in_log_dir "apache-errors.log"
else
log_action "Restart ${service_name}: skip (invalid configuration)"
fi
fi
else
log_all "${service} is disabled (or missing). Skip."
fi
## Nginx
service="nginx.service"
service_name="nginx"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# check syntax
if nginx -t > /dev/null 2>&1; then
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
# Save error logs
### Consider doing for Nginx the same as Apache
else
log_action "Restart ${service_name}: skip (invalid configuration)"
fi
fi
else
log_all "${service} is disabled (or missing). Skip."
fi
## LXC
if is_systemd_enabled 'lxc.service'; then
for container in $(lxc-ls -1 | grep --fixed-strings 'php' | grep --extended-regexp --invert-match --regexp '\bold\b' --regexp '\bdisabled\b'); do
repair_lxc_php "${container}"
done
else
log_all "LXC is disabled (or missing). Skip."
fi
## FPM
fpm_services=$(systemd_list_services 'php*-fpm*')
if [ -n "${fpm_services}" ]; then
for service in ${fpm_services}; do
service_name="${service//.service/}"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_all "${service} is disabled (or missing). Skip."
fi
done
else
log_all "PHP FPM not found. Skip."
fi
post_repair

View file

@ -0,0 +1,69 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
if is_debian_version "8" "<="; then
if is_sysvinit_enabled '*mysql*'; then
if ! pgrep -u mysql mysqld > /dev/null; then
# Save service status before restart
timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.before.status"
timeout 20 /etc/init.d/mysql restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart mysql: OK"
else
log_action "Restart mysql: failed"
fi
# Save service status after restart
timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.after.status"
else
log_abort_and_quit "mysqld process alive. Aborting"
fi
else
log_abort_and_quit "MySQL not enabled. Aborting"
fi
else
if is_debian_version "12" ">="; then
service="mariadb.service"
service_name="mariadb"
else
service="mysql.service"
service_name="mysql"
fi
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_abort_and_quit "${service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service} is disabled (or missing), nothing left to do."
fi
fi
post_repair

View file

@ -0,0 +1,35 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
service="opendkim.service"
service_name="opendkim"
if is_systemd_enabled "${service}"; then
if is_systemd_active "${service}"; then
log_abort_and_quit "${service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service} is disabled (or missing). Abort."
fi
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php56
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php70
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php73
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php74
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php80
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php81
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php82
post_repair

View file

@ -0,0 +1,14 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
LOCK_WAIT="15s"
LOCK_NAME="repair_http"
pre_repair
repair_lxc_php php83
post_repair

View file

@ -0,0 +1,32 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
pre_repair
for service in $(systemd_list_services 'redis-server*'); do
service_name="${service//.service/}"
if is_systemd_active "${service}"; then
log_all "${service} is active. Skip."
else
# Save service status before restart
systemctl status "${service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK."
else
log_action "Restart ${service_name}: failed."
fi
# Save service status after restart
systemctl status "${service}" | save_in_log_dir "${service_name}.after.status"
fi
done
post_repair

View file

@ -1,43 +1,24 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
init_autosysadmin
load_conf
test "${repair_tomcat_instance:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_tomcat_instance"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
pre_repair
repair_tomcat_instance_handle_tomcat() {
if /bin/su - "${1}" -c "/bin/systemctl --quiet --user is-active tomcat.service" ; then
if ! /bin/su - "${1}" -c "/usr/bin/timeout 20 /bin/systemctl --quiet --user restart tomcat.service"
then
log_error_exit "Echec de redémarrage instance tomcat utilisateur ${1}"
log_abort_and_quit "Echec de redémarrage instance tomcat utilisateur ${1}"
else
log_action "Redémarrage instance tomcat utilisateur ${1}"
fi
elif /bin/systemctl --quiet is-active "${1}".service ; then
if ! /usr/bin/timeout 20 systemctl --quiet restart "${1}".service
then
log_error_exit "Echec de redémarrage instance tomcat ${1}"
log_abort_and_quit "Echec de redémarrage instance tomcat ${1}"
else
log_action "Redémarrage instance tomcat ${1}"
fi
@ -50,4 +31,4 @@ do
repair_tomcat_instance_handle_tomcat "${instance}"
done
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail
post_repair

View file

@ -0,0 +1,41 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1
## Custom lock wait and/or lock name
# LOCK_WAIT="15s"
# LOCK_NAME="repair_http"
pre_repair
## The name of the service, mainly for logging
service_name="example"
## The systemd service name
systemd_service="${service_name}.service"
if is_systemd_enabled "${systemd_service}"; then
if is_systemd_active "${systemd_service}"; then
log_abort_and_quit "${systemd_service} is active, nothing left to do."
else
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 systemctl restart "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
fi
else
log_abort_and_quit "${service_name} is disabled (or missing), nothing left to do."
fi
post_repair

View file

@ -0,0 +1,19 @@
Autosysadmin "restart auto" scripts
===================================
In this directory you can place scripts that will be executed automatically by a cron job (stored in `/etc/cron.d/autosysadmin`).
They must satisfy the default `run-parts(8)` constraints :
* be "executable"
* belong to the Debian cron script namespace (`^[a-zA-Z0-9_-]+$`), example: `restart_amavis`
Warning: scripts that do not satisfy those criteria will NOT be run (silently)!
You can print the names of the scripts which would be run, without actually running them, with this command :
```
$ run-parts --test /usr/share/scripts/autosysadmin/restart
```
You can use `zzz-restart_example.template` as boilerplate code to make your own "restart" script.

View file

@ -0,0 +1,120 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function returns 0, otherwise disabled.
## Uncomment and customize this method if you want to have a special logic :
##
## return 1 if we should not run
## return 0 if we should run
##
## Some available functions :
## is_weekend() : Saturday or Sunday
## is_holiday() : holiday in France (based on `gcal(1)`)
## is_workday() : not weekend and not holiday
## is_worktime() : work day between 9-12h and 14-18h
#
# running_custom() {
# # implement your own custom method to decide if we should run or not
# }
## The name of the service, mainly for logging
service_name="example"
## The SysVinit script name
sysvinit_script="${service_name}"
## The systemd service name
systemd_service="${service_name}.service"
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
## Example:
pgrep -u USER PROCESS_NAME > /dev/null
}
## Action for SysVinit system
sysvinit_action() {
# Save service status before restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
}
## Action for systemd system
systemd_action() {
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
# so we check the status explicitly
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
&& sleep 1 \
&& systemctl status "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, so check for sysvinit or systemd
if is_debian_version "8" "<="; then
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
# log_global "${service_name} not enabled. Aborting"
exit 0
fi
# Let's finally do the action
pre_restart
sysvinit_action
post_restart
else
if ! is_systemd_enabled "${systemd_service}"; then
# log_global "${service_name} is disabled (or missing), nothing left to do."
exit 0
fi
if is_systemd_active "${systemd_service}"; then
# log_global "${service_name} is active, nothing left to do."
exit 0
fi
# Let's finally do the action
pre_restart
systemd_action
post_restart
fi

View file

@ -0,0 +1,16 @@
---
- name: restart nagios-nrpe-server
service:
name: nagios-nrpe-server
state: restarted
- name: restart nrpe
service:
name: nrpe
state: restarted
- name: restart rsyslog
service:
name: rsyslog
state: restarted

View file

@ -0,0 +1,25 @@
---
- name: "Add begin marker if missing"
ansible.builtin.lineinfile:
path: "/etc/cron.d/autosysadmin"
line: "# BEGIN ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN"
insertbefore: BOF
create: yes
- name: "Add end marker if missing"
ansible.builtin.lineinfile:
path: "/etc/cron.d/autosysadmin"
line: "# END ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN"
insertbefore: "EOF"
create: yes
- name: "Create config if missing"
ansible.builtin.blockinfile:
path: "/etc/cron.d/autosysadmin"
marker: "# {mark} ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN"
block: "{{ lookup('ansible.builtin.template', '../templates/autosysadmin.cron.j2') }}"
owner: root
group: root
mode: "0750"
create: yes

View file

@ -0,0 +1,114 @@
---
- name: "Remount /usr if needed"
ansible.builtin.include_role:
name: remount-usr
- name: Previous autosysadmin restart directory is renamed
command:
cmd: mv "/usr/share/scripts/autosysadmin/auto" "{{ autosysadmin_agent_auto_dir }}"
removes: "/usr/share/scripts/autosysadmin/auto"
creates: "{{ autosysadmin_agent_auto_dir }}"
- name: Create autosysadmin directories
ansible.builtin.file:
path: "{{ item }}"
state: directory
owner: "root"
group: "root"
mode: "0750"
loop:
- "{{ autosysadmin_agent_bin_dir }}"
- "{{ autosysadmin_agent_lib_dir }}"
- "{{ autosysadmin_agent_auto_dir }}"
- name: Copy libraries
ansible.builtin.copy:
src: "upstream/lib/"
dest: "{{ autosysadmin_agent_lib_dir }}/"
owner: root
group: root
mode: "0750"
- name: Copy repair scripts
ansible.builtin.copy:
src: "upstream/repair/"
dest: "{{ autosysadmin_agent_bin_dir }}/"
owner: root
group: root
mode: "0750"
- name: Copy other utilities
ansible.builtin.copy:
src: "upstream/bin/"
dest: "{{ autosysadmin_agent_bin_dir }}/"
owner: root
group: root
mode: "0750"
### WARNING: thos files are explicitly marked as non-executable
### to prevent them from being run automatically by run-parts
- name: Copy restart scripts
ansible.builtin.copy:
src: "upstream/restart/"
dest: "{{ autosysadmin_agent_auto_dir }}/"
owner: root
group: root
mode: "0640"
- name: Ensure /etc/evolinux folder exists
ansible.builtin.file:
path: "/etc/evolinux"
state: directory
owner: "root"
group: "root"
mode: "0700"
- name: Copy the configuration file if missing
ansible.builtin.template:
src: "autosysadmin.cf.j2"
dest: "/etc/evolinux/autosysadmin"
owner: root
group: root
mode: "0640"
force: no
# Repair scripts are supposed to be 'on' by default
# A line "repair_XXX=off" is added to the file only if the script is to be disabled.
# That's why all the ternary logic for the state is reversed.
- name: Update value per variable
ansible.builtin.lineinfile:
dest: "/etc/evolinux/autosysadmin"
line: "{{ item }}={{ autosysadmin_config[item] | default(true) | bool | ternary('on', 'off') }}"
regexp: '^(#\s*)?{{ item }}=.*'
state: "{{ autosysadmin_config[item] | default(true) | bool | ternary('absent', 'present') }}"
register: _line
loop: "{{ autosysadmin_repair_scripts | union(['repair_all']) }}"
- name: Ensure restart folder exists
ansible.builtin.file:
path: "auto"
state: directory
owner: "root"
group: "root"
mode: "0700"
- name: Legacy scripts are removed
ansible.builtin.file:
path: "{{ general_scripts_dir }}/autosysadmin/{{ item }}"
state: absent
loop:
- repair_amavis.sh
- repair_disk.sh
- repair_elasticsearch.sh
- repair_http.sh
- repair_mysql.sh
- repair_opendkim.sh
- repair_php_fpm56.sh
- repair_php_fpm70.sh
- repair_php_fpm73.sh
- repair_php_fpm74.sh
- repair_php_fpm80.sh
- repair_php_fpm81.sh
- repair_redis.sh
- repair_tomcat_instance.sh

View file

@ -1,10 +1,8 @@
---
- name: Copy logrotate configuration for autosysadmin
ansible.builtin.copy:
src: "files/logrotate_autosysadmin.conf"
src: "files/autosysadmin.logrotate.conf"
dest: "/etc/logrotate.d/autosysadmin"
owner: root
group: root
mode: "0644"
tags:
- autosysadmin

View file

@ -0,0 +1,31 @@
---
- name: The list of all repair scripts is composed.
set_fact:
autosysadmin_repair_scripts: "{{ lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map('basename') | sort }}"
- name: Install dependencies
ansible.builtin.include_tasks: dependencies.yml
- name: Install autosysadmin
ansible.builtin.include_tasks: install.yml
- name: Crontab configuration
ansible.builtin.include_tasks: crontab.yml
- name: NRPE configuration
ansible.builtin.include_tasks: nrpe.yml
- name: sudo configuration
ansible.builtin.include_tasks: sudo.yml
- name: rsyslog configuration
ansible.builtin.include_tasks: rsyslog.yml
- name: logrotate configuration
ansible.builtin.include_tasks: logrotate.yml
- name: Install latest version of dump-server-state
ansible.builtin.include_role:
name: evolinux-base
tasks_from: dump-server-state.yml

View file

@ -0,0 +1,9 @@
---
- name: custom configuration is present
ansible.builtin.template:
src: autosysadmin.nrpe.cfg.j2
dest: /etc/nagios/nrpe.d/autosysadmin.cfg
group: nagios
mode: "0640"
force: yes
notify: restart nagios-nrpe-server

View file

@ -1,11 +1,9 @@
---
- name: Copy rsyslog configuration for autosysadmin
ansible.builtin.copy:
src: "files/rsyslog_autosysadmin.conf"
src: "files/autosysadmin.rsyslog.conf"
dest: "/etc/rsyslog.d/autosysadmin.conf"
owner: root
group: root
mode: "0644"
notify: Restart rsyslog
tags:
- autosysadmin
notify: restart rsyslog

View file

@ -1,9 +1,7 @@
---
- name: Add autosysadmin sudoers file
ansible.builtin.template:
src: sudoers.j2
src: autosysadmin.sudoers.j2
dest: /etc/sudoers.d/autosysadmin
mode: "0600"
validate: "visudo -cf %s"
tags:
- autosysadmin

View file

@ -0,0 +1,12 @@
# This configuration is partially managed by Ansible
# You can change specific values manually, but they may be overridden by Ansible
#
# To be safe, update the hosts_vars/group_vars in the autosysadmin project
# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master
# then use the "agent" playbook to deploy.
#
# Configuration for autosysadmin
# Use this file to change configuration values defined in repair scripts
# To disable all repair scripts : repair_all=off
# To disable "repair_http" : repair_http=off
#

View file

@ -0,0 +1,7 @@
PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
# Run each enabled script
*/5 * * * * root run-parts /usr/share/scripts/autosysadmin/restart
# Clean run log files
@weekly root {{ autosysadmin_agent_bin_dir | mandatory }}/delete_old_logs.sh {{ autosysadmin_agent_log_retention_days | default('365') }}

View file

@ -0,0 +1,8 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
# Autosysadmin repair commands
{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %}
command[{{ script }}]=sudo {{ autosysadmin_agent_bin_dir }}/{{ script }}
{% endfor %}

View file

@ -0,0 +1,7 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %}
nagios ALL = NOPASSWD: {{ autosysadmin_agent_bin_dir }}/{{ script }}
{% endfor %}

View file

@ -0,0 +1,8 @@
---
general_scripts_dir: "/usr/share/scripts"
restart_nrpe_path: "{{ general_scripts_dir }}/autosysadmin/restart/restart_nrpe"
# Change this to customize the RUNNING value in the script
restart_nrpe_running: Null

View file

@ -0,0 +1,105 @@
#!/bin/bash
: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}"
source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1
source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1
## Possible values for RUNNING :
## never => disabled
## always => enabled
## nwh-fr => enabled during non-working-hours in France
## nwh-ca => enabled during non-working-hours in Canada (not supported yet)
## custom => enabled if `running_custom()` function return 0, otherwise disabled.
# shellcheck disable=SC2034
RUNNING="nwh-fr"
## The name of the service, mainly for logging
service_name="nagios-nrpe-server"
## The SysVinit script name
sysvinit_script="${service_name}"
## The systemd service name
systemd_service="${service_name}.service"
is_service_alive() {
## this must return 0 if the service is alive, otherwise return 1
## Example:
pgrep -u nagios nrpe > /dev/null
}
## Action for SysVinit system
sysvinit_action() {
# Save service status before restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status"
# Try to restart
timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status"
}
## Action for systemd system
systemd_action() {
# Save service status before restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status"
# Try to restart
# systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start
# so we check the status explicitly
timeout 20 systemctl restart "${systemd_service}" > /dev/null \
&& sleep 1 \
&& systemctl status "${systemd_service}" > /dev/null
rc=$?
if [ "${rc}" -eq "0" ]; then
log_action "Restart ${service_name}: OK"
else
log_action "Restart ${service_name}: failed"
fi
# Save service status after restart
systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status"
}
# Should we run?
if ! is_supposed_to_run; then
# log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})."
exit 0
fi
if is_service_alive; then
# log_global "${service_name} process alive. Aborting"
exit 0
fi
# Yes we do, so check for sysvinit or systemd
if is_debian_version "8" "<="; then
if ! is_sysvinit_enabled "*${sysvinit_script}*"; then
# log_global "${service_name} not enabled. Aborting"
exit 0
fi
# Let's finally do the action
pre_restart
sysvinit_action
post_restart
else
if ! is_systemd_enabled "${systemd_service}"; then
# log_global "${service_name} is disabled (or missing), nothing left to do."
exit 0
fi
if is_systemd_active "${systemd_service}"; then
# log_global "${service_name} is active, nothing left to do."
exit 0
fi
# Let's finally do the action
pre_restart
systemd_action
post_restart
fi

View file

@ -0,0 +1,24 @@
---
- name: "Remount /usr if needed"
ansible.builtin.include_role:
name: remount-usr
- name: "Copy restart_nrpe"
ansible.builtin.copy:
src: upstream/restart_nrpe
dest: "{{ restart_nrpe_path }}"
owner: "root"
group: "root"
mode: "0750"
- name: "Customize RUNNING value"
ansible.builtin.lineinfile:
path: "{{ restart_nrpe_path }}"
line: "RUNNING=\"{{ restart_nrpe_running }}\""
regexp: "^ *RUNNING="
create: False
when:
- restart_nrpe_running is defined
- restart_nrpe_running != None
- restart_nrpe_running | length > 0

View file

@ -1,22 +0,0 @@
---
general_scripts_dir: "/usr/share/scripts"
autosysadmin_dir: "{{ general_scripts_dir }}/autosysadmin"
# Default values for enabled checks
repair_amavis: 'on'
repair_disk: 'on'
repair_elasticsearch: 'on'
repair_http: 'on'
repair_mysql: 'on'
repair_opendkim: 'off'
repair_php_fpm56: 'off'
repair_php_fpm70: 'off'
repair_php_fpm73: 'off'
repair_php_fpm74: 'off'
repair_php_fpm80: 'off'
repair_php_fpm81: 'off'
repair_php_fpm82: 'off'
repair_php_fpm83: 'off'
repair_redis: 'off'
repair_tomcat_instance: 'off'

View file

@ -1,478 +0,0 @@
#!/bin/bash
get_system() {
uname -s
}
get_fqdn() {
if [ "$(get_system)" = "Linux" ]; then
hostname --fqdn
elif [ "$(get_system)" = "OpenBSD" ]; then
hostname
else
log_error_exit "OS not detected!"
fi
}
get_complete_hostname() {
REAL_HOSTNAME="$(get_fqdn)"
if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then
echo "${HOSTNAME}"
else
echo "${HOSTNAME} (${REAL_HOSTNAME})"
fi
}
get_evomaintenance_mail() {
email="$(grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2)"
if [[ -z "$email" ]]; then
email='alert5@evolix.fr'
fi
echo "${email}"
}
arguments="${*}"
get_argument() {
no_found=1
for argument in ${arguments} ; do
if [ "${argument}" = "${1}" ] ;
then
no_found=0
fi
done
return ${no_found}
}
internal_info() {
INTERNAL_INFO="$(printf '%b\n%s' "${INTERNAL_INFO}" "$*")"
}
log_action() {
log "Action : $*"
ACTIONS="$(printf '%s\n%s' "${ACTIONS}" "$*")"
}
log() {
INTERNAL_LOG="$(printf '%s\n%s %s %s %s' "${INTERNAL_LOG}" "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*")"
printf '%s %s %s %s\n' "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*" | tee -a "${LOG_DIR}/autosysadmin.log"
echo "$*" | /usr/bin/logger -p local0.notice -t autosysadmin."$0"
}
log_error_exit() {
log "ERROR : $*"
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: $*" --no-commit --no-mail
exit 1
}
log_check_php_fpm() {
# Extraire seulement les chiffres du nom du script exécuté
# ./repair_php_fpm81.sh ==> 81
PHP_VERSION="${0//[^0-9]/}"
PHP_PATH_POOL=$(find /var/lib/lxc/php"${PHP_VERSION}"/ -type d -name "pool.d")
/usr/local/lib/nagios/plugins/check_phpfpm_multi "${PHP_PATH_POOL}" > "${LOG_DIR}/nrpe.txt"
}
log_system_status() {
DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)"
if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then
log "Warning: dump-server-state is not present. No server state recorded...."
fi
if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then
# NOTE We don't want the logging to take too much time, so we kill it
# if it take more than 20 seconds.
timeout --signal 9 20 \
"${DUMP_SERVER_STATE_BIN}" \
--dump-dir="$LOG_DIR" \
--df \
--dmesg \
--iptables \
--lxc \
--netcfg \
--netstat \
--uname \
--processes \
--systemctl \
--uptime \
--virsh \
--disks \
--mysql-processes \
--no-apt-states \
--no-apt-config \
--no-dpkg-full \
--no-dpkg-status \
--no-mount \
--no-packages \
--no-sysctl \
--no-etc
log "System status logged in ${LOG_DIR}"
fi
}
read_log_system_status(){
files="df.txt dmesg.txt lxc-list.txt netstat-legacy.txt netstat-ss.txt pstree.txt ps.txt systemctl-failed-services.txt"
echo -e "\n\n#### Détails de dump-server-state"
for file in ${files} ; do
echo -e "\n### cat ${LOG_DIR}/${file} :"
tail -n 1000 "${LOG_DIR}"/"${file}"
done
}
ensure_no_active_users_or_exit() {
if is_debug; then return; fi
# Is there any active user ?
for user in $(LC_ALL=C who --users|awk '{print $1}'); do
idle_time="$(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}')"
for sameusertime in $(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}'); do
if is_active_user "$sameusertime"; then
hook_mail abort_active_users
log_error_exit 'At least one user was recently active. That requires human intervention. Nothing to do here!'
fi
done
done
}
is_active_user() {
# Check if a user was active in the last 30 minutes
idle_time="$1"
if [ "${idle_time}" = "old" ];
then
return 1
elif [ "${idle_time}" = "." ];
then
return 0
else
hh="$(echo "${idle_time}" | awk -F':' '{print $1}')"
mm="$(echo "${idle_time}" | awk -F':' '{print $2}')"
idle_minutes="$(( 60 * "${hh}" + "${mm}" ))"
if [ "${idle_minutes}" -ge 30 ];
then
return 1
else
return 0
fi
fi
}
is_debug() {
debug_file="/etc/evolinux/autosysadmin.debug"
if [ -e "${debug_file}" ]; then
last_change=$(stat -c %Z "${debug_file}")
limit_date=$(date --date "14400 seconds ago" +"%s")
if [ $(( last_change - limit_date )) -le "0" ]; then
rm "${debug_file}"
else
return 0
fi
fi
return 1
}
check_nrpe() {
check="$1"
list_command_nrpe=$( grep --exclude=*~ -E "\[${check}\]" -r /etc/nagios/ | grep -v '#command' )
command_nrpe_primary=$( echo "${list_command_nrpe}" | grep "/etc/nagios/nrpe.d/evolix.cfg" | cut -d'=' -f2- )
command_nrpe_secondary=$( echo "${list_command_nrpe}" | head -n1 | cut -d'=' -f2- )
if [ -z "${command_nrpe_primary}" ] && [ -z "${command_nrpe_secondary}" ]
then
return 1
else
if [ -n "${command_nrpe_primary}" ]
then
${command_nrpe_primary}
else
${command_nrpe_secondary}
fi
fi
}
acquire_lock_or_exit() {
lockfile="$1"
waittime="$2"
# si le temps dattente nest pas compréhensible par sleep(1), il vaut 0
if ! echo "${waittime}" | grep -Eq '^[0-9]+[smhd]?$'
then
waittime=0
fi
# si le temps dattente est supérieur à 0 et si le lock existe, on attend
if test "${waittime}" -gt 0 && test -f "${lockfile}"
then
sleep "${waittime}"
fi
# si le lock existe, on sarrête
if test -f "${lockfile}"
then
log_error_exit "lock file ${lockfile} exists"
fi
touch "${lockfile}"
}
is_too_soon() {
if is_debug; then return; fi
witness="/tmp/autosysadmin_witness_$(basename "$0")"
if test -f "${witness}"
then
compare="$(($(date +%s)-$(stat -c "%Y" "${witness}")))"
if [ "${compare}" -lt 1800 ];
then
log_error_exit 'already executed less than 30 minutes ago'
fi
rm "${witness}"
fi
touch "${witness}"
}
init_autosysadmin() {
PATH="${PATH}":/usr/sbin:/sbin↩
unset ACTIONS
SCRIPTNAME=$(basename "$0")
PROGNAME=${SCRIPTNAME%.sh}
RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${SCRIPTNAME}_$(openssl rand -hex 6)"
LOG_DIR="/var/log/autosysadmin/${RUN_ID}"
mkdir -p "${LOG_DIR}"
log "Autosysadmin : Script ${SCRIPTNAME} triggered"
# Detect operating system name, version and release↩
detect_os
}
load_conf() {
# Load conf and enable script by default.
# To disable script locally, set "$PROGNAME"=off in /etc/evolinux/autosysadmin.
# To disable script globally, set "$PROGNAME"=off in the script, after load_conf() call.
declare -g "$PROGNAME"=on # dynamic variable assignment ($PROGNAME == repair_*)
# Source configuration file
# shellcheck source=../roles/deploy_autosysadmin/templates/autosysadmin.cfg.j2
test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin
}
detect_os() {
# OS detection
DEBIAN_RELEASE=""
LSB_RELEASE_BIN="$(command -v lsb_release)"
if [ -e /etc/debian_version ]; then
DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)"
if [ -x "${LSB_RELEASE_BIN}" ]; then
DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)"
else
case "${DEBIAN_VERSION}" in
8) DEBIAN_RELEASE="jessie";;
9) DEBIAN_RELEASE="stretch";;
10) DEBIAN_RELEASE="buster";;
11) DEBIAN_RELEASE="bullseye";;
esac
fi
fi
}
is_debian_jessie() {
test "${DEBIAN_RELEASE}" = "jessie"
}
is_debian_stretch() {
test "${DEBIAN_RELEASE}" = "stretch"
}
is_debian_buster() {
test "${DEBIAN_RELEASE}" = "buster"
}
is_debian_bullseye() {
test "${DEBIAN_RELEASE}" = "bullseye"
}
systemd_list_service_failed() {
systemctl list-units --failed --no-legend --full --type=service "$1" |
awk '{print $1}'
}
systemd_list_units_enabled() {
list_units_enabled=$(systemctl list-unit-files --state=enabled --no-legend | awk "/$1/{print \$1}")
if [ -z "${list_units_enabled}" ]
then
return 1
else
echo "${list_units_enabled}"
fi
}
format_mail_success() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: autosysadmin@evolix.fr
Subject: [autosysadmin] Intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
### Renseignements sur l'intervention
${ACTIONS}
### Réagir à cette intervention
Vous pouvez répondre à ce message (sur l'adresse mail equipe@evolix.net).
En cas d'urgence, utilisez l'adresse maintenance@evolix.fr ou
notre téléphone portable d'astreinte (04.26.99.99.26)
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_abort_active_users() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: ${EMAIL_CLIENT:-alert5@evolix.fr}
Cc: autosysadmin@evolix.fr
Subject: [autosysadmin] Intervention interrompue sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique a été interrompue en raison
d'un utilisateur actuellement actif sur le serveur.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
### Utilisateur(s) connecté(s)
$(w)
--
Votre AutoSysadmin
EOTEMPLATE
}
format_mail_internal_info() {
cat <<EOTEMPLATE
From: AutoSysadmin Evolix <equipe+autosysadmin@evolix.net>
Content-Type: text/plain; charset=UTF-8
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
X-Script: $(basename "$0")
X-RunId: ${RUN_ID}
To: autosysadmin@evolix.fr
Subject: [autosysadmin] Complements (interne) - Intervention sur ${HOSTNAME_TEXT}
Bonjour,
Une intervention automatique vient de se terminer.
Nom du serveur : ${HOSTNAME_TEXT}
Heure d'intervention : $(LC_ALL=fr_FR.utf8 date)
Script déclenché : $(basename "$0")
### Actions effectuées
${ACTIONS}
### Logs autosysadmin
${INTERNAL_LOG}
### Utilisateur(s) connecté(s)
$(w)
### Informations additionnelles données par le script $(basename "$0")
${INTERNAL_INFO}
--
Votre AutoSysadmin
EOTEMPLATE
}
hook_mail() {
if is_debug; then return; fi
HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}"
HOSTNAME_TEXT="$(get_complete_hostname)"
EMAIL_CLIENT="$(get_evomaintenance_mail)"
MAIL_CONTENT="$(format_mail_"$1")"
SENDMAIL_BIN="$(command -v sendmail)"
if [ -z "${SENDMAIL_BIN}" ]; then
log "No \`sendmail' command has been found, can't send mail."
fi
if [ -x "${SENDMAIL_BIN}" ]; then
echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.net"
fi
}
# We need stable output for gcal, so we force some language environment variables
export TZ=Europe/Paris
export LANGUAGE=fr_FR.UTF-8
is_holiday() {
# gcal mark today as a holiday by surrounding with < and > the day
# of the month of that holiday line. For exemple if today is 2022-05-01 we'll
# get among other lines:
# Fête du Travail (FR) + Di, < 1>Mai 2022
# Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours
gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet
}
is_weekend() {
day_of_week=$(date +%u)
if [ "$day_of_week" != 6 ] && [ "$day_of_week" != 7 ]; then
return 1
fi
}
is_workday() {
if is_holiday || is_weekend; then
return 1
fi
}
is_worktime() {
if ! is_workday; then
return 1
fi
hour=$(date +%H)
if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then
return 1
fi
}

View file

@ -1,33 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
# shellcheck source=./restart_amavis.sh
source /usr/share/scripts/autosysadmin/restart_amavis.sh
init_autosysadmin
load_conf
test "${repair_amavis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Verify if check_nrpe are not OK
check_nrpe "check_amavis" && log_error_exit 'check_amavis is OK, nothing to do here!'
# Has it recently been run?
get_argument "--no-delay" || is_too_soon
lockfile="/run/lock/repair_amavis"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
restart_amavis
hook_mail success
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,173 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_disk:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_disk"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
get_mountpoints() {
# the $(...) get the check_disk1 command
# the cut command selects the critical part of the check_disk1 output
# the grep command extracts the mountpoints and available disk space
# the last cut command selects the mountpoints
$(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-) -e | cut -d'|' -f1 | grep -Eo '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -f1 -d' '
}
is_reserved-blocks() {
fs_type="$(findmnt -n --output=fstype "$1")"
if [ "${fs_type}" = "ext4" ];
then
device="$(findmnt -n --output=source "$1")"
reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')"
percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }")
log "Reserved blocks for $1 is curently at $percentage%"
if [ "${percentage}" -gt "1" ]
then
log "Allowing tune2fs action to reduce the number of reserved blocks"
return 0
else
log "Reserved blocks already at or bellow 1%, no automatic action possible"
return 1
fi
else
log "Filesystem for $1 partition is not ext4"
return 1
fi
}
change_reserved-blocks() {
# We alwasy keep some reserved blocks to avoid missing some logs
# https://gitea.evolix.org/evolix/autosysadmin/issues/22
tune2fs -m 1 "$(findmnt -n --output=source "$1")"
log_action "Reserved blocks for $1 changed to 1 percent"
}
is_tmp_to_delete() {
size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]
then
return 0
else
return 1
fi
}
is_log_to_delete() {
size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')"
if [ -n "${size}" ]
then
return 0
else
return 1
fi
}
clean_apt_cache() {
for lxc in $(du -ax /var | sort -nr | head -n10 | grep -E '/var/lib/lxc/php[0-9]+/rootfs/var/cache$' | grep -Eo 'php[0-9]+')
do
lxc-attach --name "${lxc}" -- apt-get clean
log_action '[lxc/'"${lxc}"'] Clean apt cache'
done
case "$(du -sx /var/* | sort -rn | sed 's/^[0-9]\+[[:space:]]\+//;q')" in
'/var/cache')
apt-get clean
log_action 'Clean apt cache'
;;
esac
}
clean_amavis_virusmails() {
if du --inodes /var/lib/* | sort -n | tail -n3 | grep -q 'virusmails$'
then
find /var/lib/amavis/virusmails/ -type f -atime +30 -delete
log_action 'Clean /var/lib/amavis/virusmails'
fi
}
for mountpoint in $(get_mountpoints)
do
case "${mountpoint}" in
/var)
#if is_log_to_delete
#then
# find /var/log/ -type f -mtime +365 -delete
# log_action "$size Mo of disk space freed in /var"
#fi
if is_reserved-blocks /var
then
change_reserved-blocks /var
clean_apt_cache
clean_amavis_virusmails
hook_mail success
fi
;;
/tmp)
#if is_tmp_to_delete
#then
# find /tmp/ -type f -ctime +1 -delete
# log_action "$size Mo of disk space freed in /tmp"
#fi
if is_reserved-blocks /tmp
then
change_reserved-blocks /tmp
hook_mail success
fi
;;
/home)
if is_reserved-blocks /home
then
change_reserved-blocks /home
hook_mail success
fi
;;
/srv)
if is_reserved-blocks /srv
then
change_reserved-blocks /srv
hook_mail success
fi
;;
/filer)
if is_reserved-blocks /filer
then
change_reserved-blocks /filer
hook_mail success
fi
;;
/)
if is_reserved-blocks /
then
change_reserved-blocks /
hook_mail success
# Suggest remove old kernel ?
fi
;;
*)
# unknown
log 'Unknown partition (or weird case) or nothing to do'
;;
esac
done
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,57 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_elasticsearch:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_elasticsearch"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
elasticsearch_is_enabled() {
systemd_list_units_enabled "elasticsearch.service"
}
elasticsearch_restart() {
if ! timeout 60 systemctl restart elasticsearch.service > /dev/null
then
log_error_exit 'failed to restart elasticsearch'
fi
}
# Test functions
test_elasticsearch_process_present() {
pgrep -u elasticsearch > /dev/null
}
if elasticsearch_is_enabled
then
if ! test_elasticsearch_process_present
then
log_action "Redémarrage de elasticsearch"
elasticsearch_restart
hook_mail success
else
log_error_exit "Elasticsearch process alive. Aborting"
fi
else
log_error_exit "Elasticsearch is not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,141 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_http:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
http_detect_service() {
# check whether nginx, apache or both are supposed to be running
if is_debian_jessie; then
find /etc/rc2.d/
else
systemctl list-unit-files --state=enabled
fi | awk '/nginx/ { nginx = 1 } /apache2/ { apache2 = 1 } END { if (nginx && apache2) { print "both" } else if (nginx) { print "nginx" } else if (apache2) { print "apache2" } }'
# The previous awk command looks for two patterns: "nginx"
# and "apache2". If a line matches the patterns, a variable
# "nginx" or "apache2" is set to 1 (true). The "END" checks
# if one or both patterns has been found.
}
http_handle_apache() {
# check syntax
if ! apache2ctl -t > /dev/null 2> /dev/null
then
log_error_exit 'apache2 configuration syntax is not valid'
fi
# try restart
if ! timeout 20 systemctl restart apache2.service > /dev/null 2> /dev/null
then
log_error_exit 'failed to restart apache2'
fi
log_action "Redémarrage de Apache"
internal_info "#### grep $(LANG=en_US.UTF-8 date '+%b %d') /home/*/log/error.log /var/log/apache2/*error.log (avec filtrage)"
ERROR_LOG=$(grep "$(LANG=en_US.UTF-8 date '+%b %d')" /home/*/log/error.log /var/log/apache2/*error.log | grep -v -e "Got error 'PHP message:" -e "No matching DirectoryIndex" -e "client denied by server configuration" -e "server certificate does NOT include an ID which matches the server name" )
internal_info "$ERROR_LOG"
}
http_handle_nginx() {
# check syntax
if ! nginx -t > /dev/null 2> /dev/null
then
log_error_exit 'nginx configuration syntax is not valid'
fi
# try restart
if ! timeout 20 systemctl restart nginx.service > /dev/null 2> /dev/null
then
log_error_exit 'failed to restart nginx'
fi
log_action "Redémarrage de Nginx"
}
http_handle_lxc_php() {
# check whether containers are used for PHP and reboot them if so
if systemd_list_units_enabled 'lxc'
then
for php in $(lxc-ls | grep 'php'); do
lxc-stop -n "$php"
lxc-start --daemon -n "$php"
log_action "lxc-fpm - Redémarrage container ${php}"
done
fi
}
http_handle_fpm_php() {
# check whether php-fpm is installed and restart it if so
if enabled_units="$(systemd_list_units_enabled "php.*-fpm")"
then
systemctl restart "${enabled_units}"
log_action 'php-fpm - Redémarrage de php-fpm'
fi
}
case "$(http_detect_service)" in
nginx)
http_handle_nginx
http_handle_lxc_php
http_handle_fpm_php
hook_mail success
hook_mail internal_info
;;
apache2)
http_handle_apache
http_handle_lxc_php
http_handle_fpm_php
hook_mail success
hook_mail internal_info
;;
both)
http_handle_nginx
http_handle_apache
http_handle_lxc_php
http_handle_fpm_php
hook_mail success
hook_mail internal_info
;;
*)
# unknown
log 'nothing to do'
;;
esac
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,71 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_mysql:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_mysql"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
mysql_is_enabled() {
if is_debian_jessie
then
find /etc/rc2.d/ -name '*mysql*' > /dev/null
else
systemd_list_units_enabled "mysql.service"
fi
}
mysql_restart() {
if is_debian_jessie
then
if ! timeout 60 /etc/init.d/mysql restart > /dev/null
then
log_error_exit 'failed to restart mysql'
fi
else
if ! timeout 60 systemctl restart mysql.service > /dev/null
then
log_error_exit 'failed to restart mysql'
fi
fi
}
# Test functions
test_mysql_process_present() {
pgrep -u mysql mysqld > /dev/null
}
if mysql_is_enabled
then
if ! test_mysql_process_present
then
log_action "Redémarrage de MySQL"
mysql_restart
hook_mail success
else
log_error_exit "mysqld process alive. Aborting"
fi
else
log_error_exit "MySQL/MariaDB not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,61 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_opendkim:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_opendkim"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
log_system_status
# Functions dedicated to this repair script
opendkim_is_enabled() {
systemd_list_units_enabled "opendkim.service"
}
opendkim_restart() {
if ! timeout 60 systemctl restart opendkim.service > /dev/null
then
log_error_exit 'failed to restart opendkim'
fi
}
opendkim_test_process_present() {
pgrep -u opendkim > /dev/null
}
# Main logic
if opendkim_is_enabled
then
if ! opendkim_test_process_present
then
log_action "Redémarrage de opendkim"
opendkim_restart
hook_mail success
else
log_error_exit "opendkim process alive. Aborting"
fi
else
log_error_exit "opendkim is not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm56:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php56
then
lxc-stop -n php56
lxc-start --daemon -n php56
log_action "lxc-fpm - Redémarrage container php56"
internal_info "#### tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm70:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php70
then
lxc-stop -n php70
lxc-start --daemon -n php70
log_action "lxc-fpm - Redémarrage container php70"
internal_info "#### tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm73:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php73
then
lxc-stop -n php73
lxc-start --daemon -n php73
log_action "lxc-fpm - Redémarrage container php73"
internal_info "#### tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm74:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php74
then
lxc-stop -n php74
lxc-start --daemon -n php74
log_action "lxc-fpm - Redémarrage container php74"
internal_info "#### tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm80:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php80
then
lxc-stop -n php80
lxc-start --daemon -n php80
log_action "lxc-fpm - Redémarrage container php80"
internal_info "#### tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm81:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php81
then
lxc-stop -n php81
lxc-start --daemon -n php81
log_action "lxc-fpm - Redémarrage container php81"
internal_info "#### tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm82:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php82
then
lxc-stop -n php82
lxc-start --daemon -n php82
log_action "lxc-fpm - Redémarrage container php82"
internal_info "#### tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,53 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_php_fpm83:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_http"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}" 15s
ensure_no_active_users_or_exit
# The actual work starts below !
log_system_status
log_check_php_fpm
if systemd_list_units_enabled 'lxc'
then
if lxc-ls | grep -q php83
then
lxc-stop -n php83
lxc-start --daemon -n php83
log_action "lxc-fpm - Redémarrage container php83"
internal_info "#### tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log"
FPM_LOG=$(tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log)
internal_info "$FPM_LOG" "$(read_log_system_status)"
hook_mail success
hook_mail internal_info
else
log 'Not possible :v'
fi
else
log 'Error, not a multi-php install'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,58 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
test "${repair_redis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_redis"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
# The actual work starts below !
handle_redis() {
for service in $(systemd_list_service_failed redis*)
do
# ne rien faire si le service est désactivé
if ! systemctl is-enabled --quiet "${service}"
then
continue
fi
# ne rien faire si le service est actif
if systemctl is-active --quiet "${service}"
then
continue
fi
if ! timeout 20 systemctl restart redis.service > /dev/null 2> /dev/null
then
log_error_exit "failed to restart redis ${service}"
fi
log_action "Redémarrer service ${service}"
done
}
if ( systemd_list_units_enabled 'redis.*\.service$' ) > /dev/null
then
handle_redis
hook_mail success
else
log 'Error: redis service is not enabled'
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,63 +0,0 @@
#!/bin/bash
# Source functions file
# shellcheck source=./functions.sh
source /usr/share/scripts/autosysadmin/functions.sh
init_autosysadmin
load_conf
# Comment this line to enable
repair_template=off
test "${repair_template:=off}" = off && log_error_exit 'Script disabled, nothing to do here!'
# Has it recently been run?
is_too_soon
lockfile="/run/lock/repair_template"
cleanup() {
rm -f "${lockfile}"
}
trap 'cleanup' 0
acquire_lock_or_exit "${lockfile}"
ensure_no_active_users_or_exit
log_system_status
# Functions dedicated to this repair script
template_is_enabled() {
systemd_list_units_enabled "template.service"
}
template_restart() {
if ! timeout 60 systemctl restart template.service > /dev/null
then
log_error_exit 'failed to restart template'
fi
}
template_test_process_present() {
pgrep -u template > /dev/null
}
# Main logic
if template_is_enabled
then
if ! template_test_process_present
then
log_action "Redémarrage de template"
template_restart
hook_mail success
else
log_error_exit "template process alive. Aborting"
fi
else
log_error_exit "template is not enabled. Aborting"
fi
AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail

View file

@ -1,35 +0,0 @@
#!/bin/bash
restart_amavis() {
/etc/init.d/amavis stop 2>/dev/null
/etc/init.d/clamav-freshclam stop 2>/dev/null
/etc/init.d/clamav-daemon stop 2>/dev/null
if systemctl is-enabled --quiet 'clamav-freshclam.service'
then
freshclam
log_action "Mise à jour des définitions antivirus"
fi
if systemctl is-enabled --quiet 'clamav-daemon.service'
then
/etc/init.d/clamav-daemon start
log_action "Redémarrage de clamav-daemon"
else
log 'Error, clamav not installed'
fi
if systemctl is-enabled --quiet 'clamav-freshclam.service'
then
/etc/init.d/clamav-freshclam start
log_action "Redémarrage de clamav-freshclam"
fi
if systemctl is-enabled --quiet 'amavis.service'
then
/etc/init.d/amavis start
log_action "Redémarrage de amavis"
else
log 'Error, amavis not installed'
fi
}

View file

@ -1,16 +0,0 @@
---
- name: Restart nagios-nrpe-server
ansible.builtin.service:
name: nagios-nrpe-server
state: restarted
- name: Restart nrpe
ansible.builtin.service:
name: nrpe
state: restarted
- name: Restart rsyslog
ansible.builtin.service:
name: rsyslog
state: restarted

View file

@ -1,61 +0,0 @@
---
- name: "Remount /usr if needed"
ansible.builtin.import_role:
name: remount-usr
- name: Create autosysadmin directory
ansible.builtin.file:
path: "{{ autosysadmin_dir }}"
state: directory
owner: "root"
group: "root"
mode: "0750"
tags:
- autosysadmin
- name: Copy scripts
ansible.builtin.copy:
src: "files/scripts/{{ item }}"
dest: "{{ autosysadmin_dir }}/{{ item }}"
owner: root
group: root
mode: "0750"
loop:
- "functions.sh"
- "restart_amavis.sh"
- "repair_amavis.sh"
- "repair_disk.sh"
- "repair_elasticsearch.sh"
- "repair_http.sh"
- "repair_mysql.sh"
- "repair_php_fpm56.sh"
- "repair_php_fpm70.sh"
- "repair_php_fpm73.sh"
- "repair_php_fpm74.sh"
- "repair_php_fpm80.sh"
- "repair_php_fpm81.sh"
- "repair_php_fpm82.sh"
- "repair_php_fpm83.sh"
- "repair_tomcat_instance.sh"
tags:
- autosysadmin
- name: Ensure /etc/evolinux folder exists
ansible.builtin.file:
path: "/etc/evolinux"
state: directory
owner: "root"
group: "root"
mode: "0700"
tags:
- autosysadmin
- name: Copy the configuration file
ansible.builtin.template:
src: "autosysadmin.cf.j2"
dest: "/etc/evolinux/autosysadmin"
owner: root
group: root
mode: "0640"
tags:
- autosysadmin

View file

@ -1,37 +0,0 @@
---
- name: Install dependencies
ansible.builtin.import_tasks: dependencies.yml
tags:
- autosysadmin
- name: Install autosysadmin scripts
ansible.builtin.import_tasks: autosysadmin_scripts.yml
tags:
- autosysadmin
- name: Amend NRPE configuration
ansible.builtin.import_tasks: nrpe.yml
tags:
- autosysadmin
- name: Amend sudo configuration
ansible.builtin.import_tasks: sudo.yml
tags:
- autosysadmin
- name: Amend rsyslog configuration
ansible.builtin.import_tasks: rsyslog.yml
tags:
- autosysadmin
- name: Amend logrotate configuration
ansible.builtin.import_tasks: logrotate.yml
tags:
- autosysadmin
- name: Install last version of dump-server-state
ansible.builtin.import_role:
name: evolinux-base
tasks_from: dump-server-state.yml
tags:
- autosysadmin

View file

@ -1,11 +0,0 @@
---
- name: Custom configuration is present
ansible.builtin.template:
src: autosysadmin.cfg.j2
dest: /etc/nagios/nrpe.d/autosysadmin.cfg
group: nagios
mode: "0640"
force: true
notify: Restart nagios-nrpe-server
tags:
- autosysadmin

View file

@ -1,74 +0,0 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be **overwritten** !
#
# Update the hosts_vars/group_vars on the autosysadmin project
# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master
#
# Configuration for autosysadmin
# Use this file to change configuration values defined in repair scripts
# Ex : repair_http=off
{% if repair_amavis == "off" %}
repair_amavis=off
{% endif %}
{% if repair_disk == "off" %}
repair_disk=off
{% endif %}
{% if repair_elasticsearch == "off" %}
repair_elasticsearch=off
{% endif %}
{% if repair_http == "off" %}
repair_http=off
{% endif %}
{% if repair_mysql == "off" %}
repair_mysql=off
{% endif %}
{% if repair_opendkim == "off" %}
repair_opendkim=off
{% endif %}
{% if repair_php_fpm56 == "off" %}
repair_php_fpm56=off
{% endif %}
{% if repair_php_fpm70 == "off" %}
repair_php_fpm70=off
{% endif %}
{% if repair_php_fpm73 == "off" %}
repair_php_fpm73=off
{% endif %}
{% if repair_php_fpm74 == "off" %}
repair_php_fpm74=off
{% endif %}
{% if repair_php_fpm80 == "off" %}
repair_php_fpm80=off
{% endif %}
{% if repair_php_fpm81 == "off" %}
repair_php_fpm81=off
{% endif %}
{% if repair_php_fpm82 == "off" %}
repair_php_fpm82=off
{% endif %}
{% if repair_php_fpm83 == "off" %}
repair_php_fpm83=off
{% endif %}
{% if repair_redis == "off" %}
repair_redis=off
{% endif %}
{% if repair_tomcat_instance == "off" %}
repair_tomcat_instance=off
{% endif %}

View file

@ -1,22 +0,0 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
# Autosysadmin repair commands
command[repair_amavis]=sudo {{ autosysadmin_dir }}/repair_amavis.sh
command[repair_disk]=sudo {{ autosysadmin_dir }}/repair_disk.sh
command[repair_elasticsearch]=sudo {{ autosysadmin_dir }}/repair_elasticsearch.sh
command[repair_http]=sudo {{ autosysadmin_dir }}/repair_http.sh
command[repair_mysql]=sudo {{ autosysadmin_dir }}/repair_mysql.sh
command[repair_opendkim]=sudo {{ autosysadmin_dir }}/repair_opendkim.sh
command[repair_php_fpm56]=sudo {{ autosysadmin_dir }}/repair_php_fpm56.sh
command[repair_php_fpm70]=sudo {{ autosysadmin_dir }}/repair_php_fpm70.sh
command[repair_php_fpm73]=sudo {{ autosysadmin_dir }}/repair_php_fpm73.sh
command[repair_php_fpm74]=sudo {{ autosysadmin_dir }}/repair_php_fpm74.sh
command[repair_php_fpm80]=sudo {{ autosysadmin_dir }}/repair_php_fpm80.sh
command[repair_php_fpm81]=sudo {{ autosysadmin_dir }}/repair_php_fpm81.sh
command[repair_php_fpm82]=sudo {{ autosysadmin_dir }}/repair_php_fpm82.sh
command[repair_php_fpm83]=sudo {{ autosysadmin_dir }}/repair_php_fpm83.sh
command[repair_redis]=sudo {{ autosysadmin_dir }}/repair_redis.sh
command[repair_tomcat_instance]=sudo {{ autosysadmin_dir }}/repair_tomcat_instance.sh

View file

@ -1,21 +0,0 @@
#
# Ansible managed - DO NOT MODIFY, your changes will be overwritten !
#
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_amavis.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_disk.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_elasticsearch.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_http.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_mysql.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_opendkim.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm56.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm70.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm73.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm74.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm80.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm81.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm82.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm83.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_redis.sh
nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_tomcat_instance.sh

View file

@ -39,8 +39,8 @@ concat_files() {
chown root: "${hapee_cert_file}"
}
cert_and_key_mismatch() {
hapee_cert_md5=$(openssl x509 -noout -modulus -in "${hapee_cert_file}" | openssl md5)
hapee_key_md5=$(openssl rsa -noout -modulus -in "${hapee_cert_file}" | openssl md5)
hapee_cert_md5=$(openssl x509 -noout -pubkey -in "${hapee_cert_file}" | openssl md5)
hapee_key_md5=$(openssl pkey -noout -pubout -in "${hapee_cert_file}" | openssl md5)
test "${hapee_cert_md5}" != "${hapee_key_md5}"
}

View file

@ -29,8 +29,8 @@ concat_files() {
chown root: "${haproxy_cert_file}"
}
cert_and_key_mismatch() {
haproxy_cert_md5=$(openssl x509 -noout -modulus -in "${haproxy_cert_file}" | openssl md5)
haproxy_key_md5=$(openssl rsa -noout -modulus -in "${haproxy_cert_file}" | openssl md5)
haproxy_cert_md5=$(openssl x509 -noout -pubkey -in "${haproxy_cert_file}" | openssl md5)
haproxy_key_md5=$(openssl pkey -noout -pubout -in "${haproxy_cert_file}" | openssl md5)
test "${haproxy_cert_md5}" != "${haproxy_key_md5}"
}

View file

@ -0,0 +1,44 @@
#!/bin/sh
error() {
>&2 echo "${PROGNAME}: $1"
exit 1
}
debug() {
if [ "${VERBOSE}" = "1" ] && [ "${QUIET}" != "1" ]; then
>&2 echo "${PROGNAME}: $1"
fi
}
daemon_found_and_running() {
test -n "$(pidof nrpe)"
}
letsencrypt_lineaged_used() {
grep -r "^ssl_cert_file" /etc/nagios/ | grep "letsencrypt" | grep -q "$(basename "${RENEWED_LINEAGE}")"
}
copy_letsencrypt_cert() {
DEST_CERTIFICATE=$(grep -r "^ssl_cert_file" /etc/nagios/ | awk -F'=' '{print $2}')
DEST_PRIVATE_KEY=$(grep -r "^ssl_privatekey_file" /etc/nagios/ | awk -F'=' '{print $2}')
install --mode 440 --group nagios ${RENEWED_LINEAGE}/fullchain.pem ${DEST_CERTIFICATE}
install --mode 440 --group nagios ${RENEWED_LINEAGE}/privkey.pem ${DEST_PRIVATE_KEY}
}
main() {
if daemon_found_and_running; then
if letsencrypt_lineaged_used; then
debug "NRPE detected... Copying certificates to the right place & permissions"
copy_letsencrypt_cert
debug "Restarting NRPE"
systemctl restart nagios-nrpe-server
else
debug "NRPE doesn't use the given Let's Encrypt certificate. Skip."
fi
else
debug "NRPE is not running or missing. Skip."
fi
}
readonly PROGNAME=$(basename "$0")
readonly VERBOSE=${VERBOSE:-"0"}
readonly QUIET=${QUIET:-"0"}
main

View file

@ -147,9 +147,14 @@
tags:
- postfix
- name: Autosysadmin
- name: Autosysadmin (agent)
ansible.builtin.include_role:
name: 'evolix/autosysadmin'
name: 'evolix/autosysadmin-agent'
when: evolinux_autosysadmin_include | bool
- name: Autosysadmin (restart_nrpe)
ansible.builtin.include_role:
name: 'evolix/autosysadmin-restart_nrpe'
when: evolinux_autosysadmin_include | bool
- name: fail2ban

View file

@ -10,4 +10,5 @@ kvm_pair: null
lvm_filter:
- '"a|^/dev/sd[a-zA-Z]+[0-9]*$|"'
- '"a|^/dev/nvme[0-9]+(n[0-9]+)?(p[0-9]+)?$|"'
- '"a|^/dev/md[0-9]+$|"'
- '"a|^/dev/md[0-9]+$|"'
kvm_drbd_interface: null

View file

@ -0,0 +1,9 @@
---
- name: Allow all traffic through DRBD interface
ansible.builtin.lineinfile:
path: /etc/minifirewall.d/drbd
line: "/sbin/iptables -I INPUT -p tcp -i {{ kvm_drbd_interface }} -j ACCEPT"
create: yes
when:
- kvm_drbd_interface is defined
- kvm_drbd_interface | length > 0

View file

@ -16,3 +16,5 @@
- ansible.builtin.include: images.yml
- ansible.builtin.include: tools.yml
- ansible.builtin.include: firewall.yml

Binary file not shown.

View file

@ -91,8 +91,8 @@ command[check_php-fpm83]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi
command[check_dhcp_pool]={{ nagios_plugins_directory }}/check_dhcp_pool
command[check_ssl_local]={{ nagios_plugins_directory }}/check_ssl_local
command[check_pressure_cpu]=/usr/lib/nagios/plugins/check_pressure --cpu -w 100000 -c 500000
command[check_pressure_mem]=/usr/lib/nagios/plugins/check_pressure --mem -w 100000 -c 500000
command[check_pressure_io]=/usr/lib/nagios/plugins/check_pressure --io -w 100000 -c 500000
command[check_pressure_mem]=/usr/lib/nagios/plugins/check_pressure --mem --full -w 100000 -c 500000
command[check_pressure_io]=/usr/lib/nagios/plugins/check_pressure --io --full -w 100000 -c 500000
# Check HTTP "many". Use this to check many websites (http, https, ports, sockets and SSL certificates).
# Beware! All checks must not take more than 10s!

View file

@ -35,6 +35,7 @@ fi
# Dates in seconds
_15_days="1296000"
_30_days="2592000"
_60_days="5184000"
current_date=$($date_cmd +"%s")
# Trying to define the OpenVPN conf file location - default to /etc/openvpn/server.conf
@ -90,15 +91,15 @@ test_ca_expiration() {
if [ $current_date -ge $1 ]; then
CA_ECHO="CRITICAL - The server CA has expired on $formated_ca_expiration_date"
CA_STATE=$STATE_CRITICAL
# Expiration in 15 days or less - CA file
elif [ $((current_date+_15_days)) -ge $1 ]; then
CA_ECHO="CRITICAL - The server CA expires in 15 days or less : $formated_ca_expiration_date"
CA_STATE=$STATE_CRITICAL
# Expiration in 30 days or less - CA file
elif [ $((current_date+_30_days)) -ge $1 ]; then
CA_ECHO="WARNING - The server CA expires in 30 days or less : $formated_ca_expiration_date"
CA_ECHO="CRITICAL - The server CA expires in 30 days or less : $formated_ca_expiration_date"
CA_STATE=$STATE_CRITICAL
# Expiration in 60 days or less - CA file
elif [ $((current_date+_60_days)) -ge $1 ]; then
CA_ECHO="WARNING - The server CA expires in 60 days or less : $formated_ca_expiration_date"
CA_STATE=$STATE_WARNING
# Expiration in more than 30 days - CA file
# Expiration in more than 60 days - CA file
else
CA_ECHO="OK - The server CA expires on $formated_ca_expiration_date"
CA_STATE=$STATE_OK
@ -193,8 +194,8 @@ main() {
echo $RESTART_ECHO
exit $CERT_STATE
else
echo $CERT_ECHO
echo $CA_ECHO
echo $CERT_ECHO
echo $RESTART_ECHO
exit $CERT_STATE
fi

Binary file not shown.

View file

@ -1,243 +1,439 @@
#!/usr/bin/perl -w
#
## Copyright (C) 2009 Gleb Voronich <http://stanly.net.ua/>
##
## This program is free software; you can redistribute it and/or
## modify it under the terms of the GNU General Public License
## as published by the Free Software Foundation; version 2 dated June,
## 1991.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program; if not, write to the Free Software
## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
##
##
## $Log$
##
## Based on Redis module code v0.08 2009/from http://svn.rot13.org/index.cgi/Redis
##
## Installation process:
##
## 1. Download the plugin to your plugins directory (e.g. /usr/share/munin/plugins)
## 2. Create 3 symlinks at the directory that us used by munin for plugins detection (e.g. /etc/munin/plugins): redis_connected_clients, redis_per_sec and and redis_used_memory
## 3. Edit plugin-conf.d/munin-node if it is needed (env.host and env.port variables are accepted; set env.password for password protected Redis server)
## 4. Restart munin-node service
##
## Magic Markers
#%# family=auto
#%# capabilities=autoconf suggest
=head CONFIGURATION
Based on Redis module code v0.08 2009/from http://svn.rot13.org/index.cgi/Redis
Installation process:
1. Download the plugin to your plugins directory (e.g. /usr/share/munin/plugins)
2. Symlink it to your configuration directory (e.g. ln -s /usr/share/munin/plugins/redis /etc/munin/plugins/redis)
3. Edit plugin-conf.d/munin-node with the options to connect to your redis instances (see below for an example)
4. Restart munin-node service
Example config
[redis]
env.host1 127.0.0.1
env.port1 6379
env.password1 password
env.title_prefix1 redis-1
env.host2 /run/redis.sock
env.title_prefix2 redis-2
Each host should be specified with at least a host or unixsocket variable suffixed with
a number, the first being 1, the second being 2 etc. They must be in sequence.
Other options are:
* port - the redis port to connect to
* password - the password to use with the AUTH command
* title_prefix - a prefix to put before the title of the graph, this is strongly recommended for multiple instances
Graphs:
This generates multigraphs for:
* Connected clients
* Key Hit vs Miss ratio
* Keys per second, hits/misses/expirations/evictions
* Replication backlog
* Replication lag
* Request per second
* Total number of keys and keys with expires
* Used memory
=head COPYRIGHT
Copyright (C) 2020 Rowan Wookey <https://www.rwky.net/>
Copyright (C) 2009 Gleb Voronich <http://stanly.net.ua/>
=head LICENSE
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 dated June,
1991.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
=head MAGIC MARKERS
#%# family=auto
#%# capabilities=autoconf
=cut
use strict;
use IO::Socket::INET;
use IO::Socket::UNIX;
use Switch;
my $HOST = exists $ENV{'host'} ? $ENV{'host'} : "127.0.0.1";
my $UNIX_SOCKET = exists $ENV{'unixsocket'} ? $ENV{'unixsocket'} : ''; # path to Redis Unix sock file
my $PORT = exists $ENV{'port'} ? $ENV{'port'} : 6379;
my $PASSWORD = exists $ENV{'password'} ? $ENV{'password'} : undef;
my $TITLE_PREFIX = exists $ENV{'title_prefix'} ? $ENV{'title_prefix'} . ": " : "";
my %INSTANCES;
my $HOST;
my $PORT;
my $PASSWORD;
for (my $i = 1; $ENV{"host$i"}; $i++)
{
$HOST = exists $ENV{"host$i"} ? $ENV{"host$i"} : "127.0.0.1";
$PORT = exists $ENV{"port$i"} ? $ENV{"port$i"} : 6379;
$PASSWORD = exists $ENV{"password$i"} ? $ENV{"password$i"} : undef;
my $TITLE_PREFIX = exists $ENV{"title_prefix$i"} ? $ENV{"title_prefix$i"} . ": " : "";
my $SOCK = &get_conn();
$INSTANCES{"instance$i"} = {
HOST => $HOST,
PORT => $PORT,
PASSWORD => $PASSWORD,
TITLE_PREFIX => $TITLE_PREFIX,
SOCK => $SOCK
};
}
my $sock = &get_conn();
my $config = ( defined $ARGV[0] and $ARGV[0] eq "config" );
my $autoconf = ( defined $ARGV[0] and $ARGV[0] eq "autoconf" );
if ( $autoconf ) {
if ( defined( $sock ) ) {
if (!%INSTANCES) {
print "no (no redis instances configured)\n";
exit 0;
}
my $err = '';
for my $INSTANCE (keys %INSTANCES) {
if (! defined( $INSTANCES{$INSTANCE}{'SOCK'} ) ) {
$err = "no (unable to connect to ".$INSTANCES{$INSTANCE}{'HOST'}."\[:". $INSTANCES{$INSTANCE}{'PORT'}."\])\n";
}
}
if ($err) {
print $err;
} else {
print "yes\n";
exit 0;
} else {
print "no (unable to connect to $HOST\[:$PORT\])\n";
exit 0;
}
}
my $suggest = ( defined $ARGV[0] and $ARGV[0] eq "suggest" );
if ( $suggest ) {
if ( defined( $sock ) ) {
my @plugins = ('connected_clients', 'key_ratio', 'keys_per_sec', 'per_sec', 'used_keys', 'used_memory');
foreach my $plugin (@plugins) {
print "$plugin\n";
}
exit 0;
} else {
print "no (unable to connect to $HOST\[:$PORT\])\n";
exit 0;
}
exit 0;
}
my $hash=&get_info();
my $total = 0;
$0 =~ s/(.+)redis_//g;
my $multi_graph_output = '';
my $instance_graph_output = '';
switch ($0) {
case "connected_clients" {
if ( $config ) {
my $maxclients= get_config("maxclients")->{"maxclients"};
print "graph_title ${TITLE_PREFIX}Connected clients\n";
print "graph_vlabel Connected clients\n";
print "graph_category search\n";
print "graph_args -l 0\n";
print "connected_clients.line $maxclients:ff0000:Limit\n";
print "connected_clients.label connected clients\n";
exit 0;
my $connected_clients = 0;
my $keyspace_hits = 0;
my $keyspace_misses = 0;
my $expired_keys = 0;
my $evicted_keys = 0;
my $total_commands_processed = 0;
my $total_connections_received = 0;
my $repl_backlog_size = 0;
my $used_memory = 0;
my $used_memory_rss = 0;
my $used_memory_peak = 0;
my $total_keys = 0;
my $total_expires = 0;
foreach my $INSTANCE (keys %INSTANCES) {
my $sock = $INSTANCES{$INSTANCE}{'SOCK'};
my $TITLE_PREFIX = $INSTANCES{$INSTANCE}{'TITLE_PREFIX'};
my $hash = get_info($sock);
my $dbs;
foreach my $key (keys %{$hash}) {
if ( $key =~ /^db\d+$/ && $hash->{$key} =~ /keys=(\d+),expires=(\d+)/ ) {
$total_keys += $1;
$total_expires += $2;
$dbs->{$key} = [ $1, $2 ];
}
print "connected_clients.value " . $hash->{'connected_clients'} . "\n";
}
case "keys_per_sec" {
if ( $config ) {
print "graph_title ${TITLE_PREFIX}Keys Per Second\n";
print "graph_vlabel per \${graph_period}\n";
print "graph_category search\n";
print "graph_args -l 0\n";
print "hits.label hits\n";
print "hits.type COUNTER\n";
print "misses.label misses\n";
print "misses.type COUNTER\n";
print "expired.label expirations\n";
print "expired.type COUNTER\n";
print "evictions.label evictions\n";
print "evictions.type COUNTER\n";
exit 0;
if ( $config ) {
my $ret = get_config("maxclients", $sock);
# if the CONFIG command is disabled we don't show the max clients
my $maxclients = defined $ret ? $ret->{"maxclients"} : 0;
$instance_graph_output .= "multigraph redis_connected_clients.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}Connected clients\n";
$instance_graph_output .= "graph_vlabel Connected clients\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -l 0\n";
if ($maxclients) {
$instance_graph_output .= "connected_clients.line $maxclients:ff0000:Limit\n";
}
print "hits.value " . $hash->{'keyspace_hits'} . "\n";
print "misses.value " . $hash->{'keyspace_misses'} . "\n";
print "expired.value " . $hash->{'expired_keys'} . "\n";
print "evictions.value " . $hash->{'evicted_keys'} . "\n";
}
case "key_ratio" {
if ( $config ) {
print "graph_title ${TITLE_PREFIX}Key Hit vs Miss Ratio\n";
print "graph_vlabel per \${graph_period}\n";
print "graph_category search\n";
print "graph_args -u 100 -l 0 -r --base 1000\n";
print "hitratio.label hit ratio\n";
print "hitratio.type GAUGE\n";
print "hitratio.draw AREA\n";
print "missratio.label miss ratio\n";
print "missratio.type GAUGE\n";
print "missratio.draw STACK\n";
exit 0;
}
my $total = $hash->{'keyspace_hits'} + $hash->{'keyspace_misses'};
my $hitratio = 0;
my $missratio = 0;
if ($total > 0) {
$hitratio = $hash->{'keyspace_hits'} / $total * 100;
$missratio = $hash->{'keyspace_misses'} / $total * 100;
}
printf("hitratio.value %.2f\n", $hitratio);
printf("missratio.value %.2f\n", $missratio);
}
case "per_sec" {
if ( $config ) {
print "graph_title ${TITLE_PREFIX}Per second\n";
print "graph_vlabel per \${graph_period}\n";
print "graph_category search\n";
print "graph_args -l 0\n";
print "requests.label requests\n";
print "requests.type COUNTER\n";
print "connections.label connections\n";
print "connections.type COUNTER\n";
exit 0;
}
print "requests.value ". $hash->{'total_commands_processed'} ."\n";
print "connections.value ". $hash->{'total_connections_received'} ."\n";
}
case "used_memory" {
if ( $config ) {
my $maxmemory = get_config("maxmemory")->{"maxmemory"};
print "graph_title ${TITLE_PREFIX}Used memory\n";
print "graph_vlabel Used memory\n";
print "graph_category search\n";
print "graph_args -l 0 --base 1024\n";
print "used_memory.line $maxmemory:ff0000:Limit\n";
print "used_memory.label used memory\n";
print "used_memory_peak.label used memory in peak\n";
print "used_memory_rss.label Resident set size memory usage\n";
exit 0;
}
print "used_memory.value ". $hash->{'used_memory'} ."\n";
print "used_memory_rss.value ". $hash->{'used_memory_rss'} ."\n";
print "used_memory_peak.value ". $hash->{'used_memory_peak'} ."\n";
}
case "used_keys" {
my $dbs;
foreach my $key (keys %{$hash}) {
if ( $key =~ /^db\d+$/ && $hash->{$key} =~ /keys=(\d+),expires=(\d+)/ ) {
$dbs->{$key} = [ $1, $2 ];
}
}
if ( $config ) {
print "graph_title ${TITLE_PREFIX}Used keys\n";
print "graph_vlabel Used keys\n";
print "graph_category search\n";
print "graph_args -l 0\n";
foreach my $db (keys %{$dbs}) {
printf "%s_keys.label %s keys\n", $db, $db;
printf "%s_expires.label %s expires\n", $db, $db;
}
exit 0;
$instance_graph_output .= "connected_clients.label connected clients\n";
$instance_graph_output .= "multigraph keys_per_sec.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}Keys Per Second\n";
$instance_graph_output .= "graph_vlabel per \${graph_period}\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -l 0\n";
$instance_graph_output .= "hits.label hits\n";
$instance_graph_output .= "hits.type COUNTER\n";
$instance_graph_output .= "misses.label misses\n";
$instance_graph_output .= "misses.type COUNTER\n";
$instance_graph_output .= "expired.label expirations\n";
$instance_graph_output .= "expired.type COUNTER\n";
$instance_graph_output .= "evicted_keys.label evictions\n";
$instance_graph_output .= "evicted_keys.type COUNTER\n";
$instance_graph_output .= "multigraph redis_key_ratio.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}Key Hit vs Miss Ratio\n";
$instance_graph_output .= "graph_vlabel per \${graph_period}\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -u 100 -l 0 -r --base 1000\n";
$instance_graph_output .= "hitratio.label hit ratio\n";
$instance_graph_output .= "hitratio.type GAUGE\n";
$instance_graph_output .= "hitratio.draw AREA\n";
$instance_graph_output .= "missratio.label miss ratio\n";
$instance_graph_output .= "missratio.type GAUGE\n";
$instance_graph_output .= "missratio.draw STACK\n";
$instance_graph_output .= "multigraph redis_per_sec.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}Requests Per second\n";
$instance_graph_output .= "graph_vlabel per \${graph_period}\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -l 0\n";
$instance_graph_output .= "requests.label requests\n";
$instance_graph_output .= "requests.type COUNTER\n";
$instance_graph_output .= "connections.label connections\n";
$instance_graph_output .= "connections.type COUNTER\n";
$instance_graph_output .= "multigraph redis_repl_backlog_size.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}replication backlog\n";
$instance_graph_output .= "graph_vlabel replication backlog\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -l 0\n";
$instance_graph_output .= "repl_backlog_size.label bytes behind master\n";
$instance_graph_output .= "multigraph redis_repl_lag.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}replication lag\n";
$instance_graph_output .= "graph_vlabel replication lag\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -l 0\n";
$instance_graph_output .= "repl_backlog_size.label amount behind master\n";
# if the CONFIG command is disabled we don't show maxmemory
$ret = get_config("maxmemory", $sock);
my $maxmemory = defined $ret ? $ret->{"maxmemory"} : 0;
$instance_graph_output .= "multigraph redis_used_memory.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}Used memory\n";
$instance_graph_output .= "graph_vlabel Used memory\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -l 0 --base 1024\n";
if ($maxmemory) {
$instance_graph_output .= "used_memory.line $maxmemory:ff0000:Limit\n";
}
$instance_graph_output .= "used_memory.label used memory\n";
$instance_graph_output .= "used_memory_peak.label used memory in peak\n";
$instance_graph_output .= "used_memory_rss.label Resident set size memory usage\n";
$instance_graph_output .= "multigraph redis_used_keys.$INSTANCE\n";
$instance_graph_output .= "graph_title ${TITLE_PREFIX}Used keys\n";
$instance_graph_output .= "graph_vlabel Used keys\n";
$instance_graph_output .= "graph_category db\n";
$instance_graph_output .= "graph_args -l 0\n";
foreach my $db (keys %{$dbs}) {
printf "%s_keys.value %d\n", $db, $dbs->{$db}[0];
printf "%s_expires.value %d\n", $db, $dbs->{$db}[1];
$instance_graph_output .= sprintf "%s_keys.label %s keys\n", $db, $db;
$instance_graph_output .= sprintf "%s_expires.label %s expires\n", $db, $db;
}
next;
}
$instance_graph_output .= "multigraph redis_connected_clients.$INSTANCE\n";
$instance_graph_output .= "connected_clients.value " . $hash->{'connected_clients'} . "\n";
$connected_clients += $hash->{'connected_clients'};
$instance_graph_output .= "multigraph keys_per_sec.$INSTANCE\n";
$instance_graph_output .= "hits.value " . $hash->{'keyspace_hits'} . "\n";
$keyspace_hits += $hash->{'keyspace_hits'};
$instance_graph_output .= "misses.value " . $hash->{'keyspace_misses'} . "\n";
$keyspace_misses += $hash->{'keyspace_misses'};
$instance_graph_output .= "expired.value " . $hash->{'expired_keys'} . "\n";
$expired_keys += $hash->{'expired_keys'};
$instance_graph_output .= "evicted_keys.value " . $hash->{'evicted_keys'} . "\n";
$evicted_keys += $hash->{'evicted_keys'};
$instance_graph_output .= "multigraph redis_key_ratio.$INSTANCE\n";
my $total = $hash->{'keyspace_hits'} + $hash->{'keyspace_misses'};
my $hitratio = 0;
my $missratio = 0;
if ($total > 0) {
$hitratio = $hash->{'keyspace_hits'} / $total * 100;
$missratio = $hash->{'keyspace_misses'} / $total * 100;
}
$instance_graph_output .= sprintf("hitratio.value %.2f\n", $hitratio);
$instance_graph_output .= sprintf("missratio.value %.2f\n", $missratio);
$instance_graph_output .= "multigraph redis_per_sec.$INSTANCE\n";
$instance_graph_output .= "requests.value ". $hash->{'total_commands_processed'} ."\n";
$total_commands_processed += $hash->{'total_commands_processed'};
$instance_graph_output .= "connections.value ". $hash->{'total_connections_received'} ."\n";
$total_connections_received += $hash->{'total_connections_received'};
$instance_graph_output .= "multigraph redis_repl_backlog_size.$INSTANCE\n";
$instance_graph_output .= "repl_backlog_size.value " . $hash->{'repl_backlog_size'} . "\n";
$repl_backlog_size += $hash->{'repl_backlog_size'};
$instance_graph_output .= "multigraph redis_repl_lag.$INSTANCE\n";
if (exists $hash->{slave0} && $hash->{slave0} =~ /lag=(\d+)/) {
$repl_backlog_size += $1;
$instance_graph_output .= "repl_backlog_size.value " . $1 . "\n";
} else {
$instance_graph_output .= "repl_backlog_size.value 0\n";
}
$instance_graph_output .= "multigraph redis_used_memory.$INSTANCE\n";
$instance_graph_output .= "used_memory.value ". $hash->{'used_memory'} ."\n";
$used_memory += $hash->{'used_memory'};
$instance_graph_output .= "used_memory_rss.value ". $hash->{'used_memory_rss'} ."\n";
$used_memory_rss += $hash->{'used_memory_rss'};
$instance_graph_output .= "used_memory_peak.value ". $hash->{'used_memory_peak'} ."\n";
$used_memory_peak += $hash->{'used_memory_peak'};
$instance_graph_output .= "multigraph redis_used_keys.$INSTANCE\n";
foreach my $db (keys %{$dbs}) {
$instance_graph_output .= sprintf "%s_keys.value %d\n", $db, $dbs->{$db}[0];
$instance_graph_output .= sprintf "%s_expires.value %d\n", $db, $dbs->{$db}[1];
}
close ($sock);
}
close ($sock);
# multigraph output
if ($config) {
$multi_graph_output .= "multigraph redis_connected_clients\n";
$multi_graph_output .= "graph_title Connected clients\n";
$multi_graph_output .= "graph_vlabel Connected clients\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -l 0\n";
$multi_graph_output .= "connected_clients.label connected clients\n";
$multi_graph_output .= "multigraph keys_per_sec\n";
$multi_graph_output .= "graph_title Keys Per Second\n";
$multi_graph_output .= "graph_vlabel per \${graph_period}\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -l 0\n";
$multi_graph_output .= "hits.label hits\n";
$multi_graph_output .= "hits.type COUNTER\n";
$multi_graph_output .= "misses.label misses\n";
$multi_graph_output .= "misses.type COUNTER\n";
$multi_graph_output .= "expired.label expirations\n";
$multi_graph_output .= "expired.type COUNTER\n";
$multi_graph_output .= "evicted_keys.label evictions\n";
$multi_graph_output .= "evicted_keys.type COUNTER\n";
$multi_graph_output .= "multigraph redis_key_ratio\n";
$multi_graph_output .= "graph_title Key Hit vs Miss Ratio\n";
$multi_graph_output .= "graph_vlabel per \${graph_period}\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -u 100 -l 0 -r --base 1000\n";
$multi_graph_output .= "hitratio.label hit ratio\n";
$multi_graph_output .= "hitratio.type GAUGE\n";
$multi_graph_output .= "hitratio.draw AREA\n";
$multi_graph_output .= "missratio.label miss ratio\n";
$multi_graph_output .= "missratio.type GAUGE\n";
$multi_graph_output .= "missratio.draw STACK\n";
$multi_graph_output .= "multigraph redis_per_sec\n";
$multi_graph_output .= "graph_title Requests Per second\n";
$multi_graph_output .= "graph_vlabel per \${graph_period}\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -l 0\n";
$multi_graph_output .= "requests.label requests\n";
$multi_graph_output .= "requests.type COUNTER\n";
$multi_graph_output .= "connections.label connections\n";
$multi_graph_output .= "connections.type COUNTER\n";
$multi_graph_output .= "multigraph redis_repl_backlog_size\n";
$multi_graph_output .= "graph_title replication backlog\n";
$multi_graph_output .= "graph_vlabel replication backlog\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -l 0\n";
$multi_graph_output .= "repl_backlog_size.label bytes behind master\n";
$multi_graph_output .= "multigraph redis_repl_lag\n";
$multi_graph_output .= "graph_title replication lag\n";
$multi_graph_output .= "graph_vlabel replication lag\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -l 0\n";
$multi_graph_output .= "repl_backlog_size.label amount behind master\n";
$multi_graph_output .= "multigraph redis_used_memory\n";
$multi_graph_output .= "graph_title Used memory\n";
$multi_graph_output .= "graph_vlabel Used memory\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -l 0 --base 1024\n";
$multi_graph_output .= "used_memory.label used memory\n";
$multi_graph_output .= "used_memory_peak.label used memory in peak\n";
$multi_graph_output .= "used_memory_rss.label Resident set size memory usage\n";
$multi_graph_output .= "multigraph redis_used_keys\n";
$multi_graph_output .= "graph_title Used keys\n";
$multi_graph_output .= "graph_vlabel Used keys\n";
$multi_graph_output .= "graph_category db\n";
$multi_graph_output .= "graph_args -l 0\n";
$multi_graph_output .= "total_keys.label Total keys\n";
$multi_graph_output .= "total_expires.label Total expires\n";
} else {
$multi_graph_output .= "multigraph redis_connected_clients\n";
$multi_graph_output .= "connected_clients.value " . $connected_clients . "\n";
$multi_graph_output .= "multigraph keys_per_sec\n";
$multi_graph_output .= "hits.value " . $keyspace_hits . "\n";
$multi_graph_output .= "misses.value " . $keyspace_misses . "\n";
$multi_graph_output .= "expired.value " . $expired_keys . "\n";
$multi_graph_output .= "evicted_keys.value " . $evicted_keys . "\n";
$multi_graph_output .= "multigraph redis_key_ratio\n";
my $total = $keyspace_hits + $keyspace_misses;
my $hitratio = 0;
my $missratio = 0;
if ($total > 0) {
$hitratio = $keyspace_hits / $total * 100;
$missratio = $keyspace_misses / $total * 100;
}
$multi_graph_output .= sprintf("hitratio.value %.2f\n", $hitratio);
$multi_graph_output .= sprintf("missratio.value %.2f\n", $missratio);
$multi_graph_output .= "multigraph redis_per_sec\n";
$multi_graph_output .= "requests.value ". $total_commands_processed ."\n";
$multi_graph_output .= "connections.value ". $total_connections_received ."\n";
$multi_graph_output .= "multigraph redis_repl_backlog_size\n";
$multi_graph_output .= "repl_backlog_size.value " . $repl_backlog_size . "\n";
$multi_graph_output .= "multigraph redis_repl_lag\n";
$multi_graph_output .= "repl_backlog_size.value " . $repl_backlog_size . "\n";
$multi_graph_output .= "multigraph redis_used_memory\n";
$multi_graph_output .= "used_memory.value ". $used_memory ."\n";
$multi_graph_output .= "used_memory_rss.value ". $used_memory_rss ."\n";
$multi_graph_output .= "used_memory_peak.value ". $used_memory_peak ."\n";
$multi_graph_output .= "multigraph redis_used_keys\n";
$multi_graph_output .= "total_keys.value $total_keys\n";
$multi_graph_output .= "total_expires.value $total_expires\n";
}
print $multi_graph_output;
print $instance_graph_output;
sub get_conn {
my $sock;
if( $UNIX_SOCKET && -S $UNIX_SOCKET ){
$sock = IO::Socket::UNIX->new(
Type => SOCK_STREAM(),
Peer => $UNIX_SOCKET,
);
if(-S $HOST ){
$sock = IO::Socket::UNIX->new(
Type => SOCK_STREAM(),
Peer => $HOST,
);
}else{
$sock = IO::Socket::INET->new(
PeerAddr => $HOST,
PeerPort => $PORT,
Timeout => 10,
Proto => 'tcp'
);
$sock = IO::Socket::INET->new(
PeerAddr => $HOST,
PeerPort => $PORT,
Timeout => 10,
Proto => 'tcp'
);
}
if (! defined($sock)) {
die "can't read socket: $!";
}
if ( defined( $PASSWORD ) ) {
print $sock "AUTH ", $PASSWORD, "\r\n";
my $result = <$sock> || die "can't read socket: $!";
}
return $sock;
}
sub get_info{
my $sock = $_[0];
print $sock "INFO\r\n";
my $result = <$sock> || die "can't read socket: $!";
@ -257,13 +453,12 @@ sub get_info{
# This subroutine returns configuration matched to supplied as object
sub get_config{
my $sock = $_[1];
print $sock "*3\r\n\$6\r\nCONFIG\r\n\$3\r\nGET\r\n\$".length($_[0])."\r\n".$_[0]."\r\n";
# Response will look like like
# *2\r\n$9\r\nmaxmemory\r\n$10\r\n3221225472\r\n
my $type = <$sock> || die "can't read socket: $!";
my $conf;
if( substr($type,0,1) ne "*" ) {
return $conf;

View file

@ -23,6 +23,7 @@
path: /etc/sysfs.conf
line: kernel/mm/transparent_hugepage/enabled = {{ redis_sysctl_transparent_hugepage_enabled }}
regexp: "kernel/mm/transparent_hugepage/enabled"
create: yes
tags:
- redis
- kernel
@ -32,4 +33,4 @@
cmd: "echo '{{ redis_sysctl_transparent_hugepage_enabled }}' >> /sys/kernel/mm/transparent_hugepage/enabled"
tags:
- redis
- kernel
- kernel