diff --git a/CHANGELOG.md b/CHANGELOG.md index cac19846..8362c5d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,30 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Security +## [24.03] 2024-03-01 + +### Added + +* autosysadmin-agent: upstream release 24.03 +* autosysadmin-restart_nrpe: add role +* certbot: Renewal hook for NRPE +* kvm-host: add minifirewall rules if DRBD interface is configured + +### Changed + +* apt: add ftp.evolix.org as recognized system source +* autosysadmin-agent: logs clearing is done weekly +* autosysadmin-agent: rename /usr/share/scripts/autosysadmin/{auto,restart} +* certbot: use pkey to test the key +* evolinux-base: execute autosysadmin-agent and autosysadmin-restart_nrpe roles +* lxc-php, php: Update sury PGP key +* openvpn: earlier alert for CA expiration +* redis: create sysfs config file if missing + +### Removed + +* autosysadmin: replaced by autosysadmin-agent + ## [24.02.1] 2024-02-08 ### Fixed @@ -77,6 +101,7 @@ The **patch** part changes is incremented if multiple releases happen the same m * nagios: add dockerd check in nrpe check template * nagios: cleaning nrpe check template * nagios: rename var `nagios_nrpe_process_processes` into `nagios_nrpe_processes` and check systemd-timesyncd instead of ntpd in Debian 12 +* nagios: add option --full to check pressure IO and mem to avoid flaps * proftpd: in SFTP vhost, enable SSH keys login, enable ed25549 host key for Debian >= 11 * redis: manage config template inside a block, to allow custom modifications outside * spamassassin: Use spamd starting with Bookworm diff --git a/apt/files/deb822-migration.py b/apt/files/deb822-migration.py index f8693b28..cb135972 100755 --- a/apt/files/deb822-migration.py +++ b/apt/files/deb822-migration.py @@ -1,5 +1,11 @@ #!/usr/bin/env python3 +########## +# This script takes a multi-lines input of "oneliner-style" APT sources definitions. +# It converts them into "deb822-style" sources. +# Each generated file will have only one stanza, possibly with multiple Types/Suites/Components +########## + import re import sys import os @@ -10,11 +16,16 @@ import apt_pkg # Order matters ! destinations = { "debian-security": "security.sources", + ".*-backports": "backports.sources", + ".debian.org": "system.sources", "mirror.evolix.org": "system.sources", + "ftp.evolix.org": "system.sources", + "pub.evolix.net": "evolix_public_old.sources.bak", "pub.evolix.org": "evolix_public.sources", + "artifacts.elastic.co": "elastic.sources", "download.docker.com": "docker.sources", "downloads.linux.hpe.com": "hp.sources", @@ -76,6 +87,11 @@ def prepare_sources(lines): key, value = option.split("=") options[key] = value + ### WARNING ### + # if there are multiple lines with different URIS for a given destination (eg. "system") + # each one will overwrite the previous one + # and the last evaluated will be what remains. + if dest in sources: sources[dest]["Types"].add(matches["type"]) sources[dest]["URIs"] = matches["uri"] diff --git a/apt/files/deb822-migration.sh b/apt/files/deb822-migration.sh index 10fb7889..7a4fb787 100755 --- a/apt/files/deb822-migration.sh +++ b/apt/files/deb822-migration.sh @@ -1,5 +1,11 @@ #!/bin/sh +########## +# This script changes all "one-line" APT sources into "deb822" sources. +# It is responsible for searching and processing the files. +# The actual format migration is done by a python script. +########## + deb822_migrate_script=$(command -v deb822-migration.py) if [ -z "${deb822_migrate_script}" ]; then @@ -46,4 +52,4 @@ for file in $(find /etc/apt/sources.list.d -mindepth 1 -maxdepth 1 -type f -name done echo "${count} file(s) migrated" -exit ${rc} \ No newline at end of file +exit ${rc} diff --git a/autosysadmin-agent/defaults/main.yml b/autosysadmin-agent/defaults/main.yml new file mode 100644 index 00000000..b223a683 --- /dev/null +++ b/autosysadmin-agent/defaults/main.yml @@ -0,0 +1,17 @@ +--- + +general_scripts_dir: "/usr/share/scripts" + +autosysadmin_agent_bin_dir: "/usr/local/bin/autosysadmin" +autosysadmin_agent_lib_dir: "/usr/local/lib/autosysadmin" +autosysadmin_agent_auto_dir: "{{ general_scripts_dir }}/autosysadmin/restart" + +autosysadmin_agent_crontab_enabled: true +autosysadmin_agent_log_retention_days: 365 + +autosysadmin_config: [] + ### All repair are disabled if set to 'off' + ### even if a specific repair value is 'on' + # repair_all: 'on' + ### Default values for checks + # repair_foo: 'off' diff --git a/autosysadmin/files/logrotate_autosysadmin.conf b/autosysadmin-agent/files/autosysadmin.logrotate.conf similarity index 100% rename from autosysadmin/files/logrotate_autosysadmin.conf rename to autosysadmin-agent/files/autosysadmin.logrotate.conf diff --git a/autosysadmin/files/rsyslog_autosysadmin.conf b/autosysadmin-agent/files/autosysadmin.rsyslog.conf similarity index 100% rename from autosysadmin/files/rsyslog_autosysadmin.conf rename to autosysadmin-agent/files/autosysadmin.rsyslog.conf diff --git a/autosysadmin-agent/files/upstream/bin/delete_old_logs.sh b/autosysadmin-agent/files/upstream/bin/delete_old_logs.sh new file mode 100644 index 00000000..a39d9efe --- /dev/null +++ b/autosysadmin-agent/files/upstream/bin/delete_old_logs.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +days=${1:-365} +log_dir="/var/log/autosysadmin/" + +if [ -d "${log_dir}" ]; then + find_run_dirs() { + find "${log_dir}" \ + -mindepth 1 \ + -maxdepth 1 \ + -type d \ + -ctime "+${days}" \ + -print0 + } + log() { + /usr/bin/logger -p local0.notice -t autosysadmin "${1}" + } + + while IFS= read -r -d '' run_dir; do + rm --recursive --force "${run_dir}" + log "Delete ${run_dir} (older than ${days} days)" + done < <(find_run_dirs) +fi + +exit 0 diff --git a/autosysadmin-agent/files/upstream/lib/common.sh b/autosysadmin-agent/files/upstream/lib/common.sh new file mode 100755 index 00000000..cc3c53e6 --- /dev/null +++ b/autosysadmin-agent/files/upstream/lib/common.sh @@ -0,0 +1,907 @@ +#!/bin/bash + +VERSION="24.03" + +# Common functions for "repair" and "restart" scripts + +set -u + +# Initializes the program, context, configuration… +initialize() { + PATH="${PATH}":/usr/sbin:/sbin + + # Used in many places to refer to the program name. + # Examples: repair_mysql, restart_nrpe… + PROGNAME=$(basename "${0}") + + # find out if running in interactive mode, or not + if [ -t 0 ]; then + INTERACTIVE=1 + else + INTERACTIVE=0 + fi + readonly INTERACTIVE + + # Default empty value for Debug mode + DEBUG="${DEBUG:-""}" + + # Repair scripts obey to the value of a variable named after the script + # You can set the value ("on" or "off") in /etc/evolinux/autosysadmin + # Here we set the default value to "on". + declare -g "${PROGNAME}"=on # dynamic variable assignment ($PROGNAME == repair_*) + + PID=$$ + readonly PID + + # Each execution (run) gets a unique ID + RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${PROGNAME}_${PID}" + readonly RUN_ID + + # Main log directory + MAIN_LOG_DIR="/var/log/autosysadmin" + readonly MAIN_LOG_DIR + # shellcheck disable=SC2174 + mkdir --mode=750 --parents "${MAIN_LOG_DIR}" + chgrp adm "${MAIN_LOG_DIR}" + + # Each execution store some information + # in a unique directory based on the RUN_ID + RUN_LOG_DIR="${MAIN_LOG_DIR}/${RUN_ID}" + readonly RUN_LOG_DIR + # shellcheck disable=SC2174 + mkdir --mode=750 --parents "${RUN_LOG_DIR}" + chgrp adm "${RUN_LOG_DIR}" + + # This log file contains all events + RUN_LOG_FILE="${RUN_LOG_DIR}/autosysadmin.log" + readonly RUN_LOG_FILE + + # This log file contains notable actions + ACTIONS_FILE="${RUN_LOG_DIR}/actions.log" + readonly ACTIONS_FILE + touch "${ACTIONS_FILE}" + # This log file contains abort reasons (if any) + ABORT_FILE="${RUN_LOG_DIR}/abort.log" + readonly ABORT_FILE + # touch "${ABORT_FILE}" + + # Date format for log messages + DATE_FORMAT="%Y-%m-%d %H:%M:%S" + + # This will contain lock, last-run markers… + # It's ok to lose the content after a reboot + RUN_DIR="/run/autosysadmin" + readonly RUN_DIR + mkdir -p "${RUN_DIR}" + + # Only a singe instace of each script can run simultaneously + # We use a customizable lock name for this. + # By default it's the script's name + LOCK_NAME=${LOCK_NAME:-${PROGNAME}} + # If a lock is found, we can wait for it to disappear. + # The value must be understood by sleep(1) + LOCK_WAIT="0" + + # Default values for email headers + EMAIL_FROM="equipe+autosysadmin@evolix.fr" + EMAIL_INTERNAL="autosysadmin@evolix.fr" + + LOCK_FILE="${RUN_DIR}/${LOCK_NAME}.lock" + readonly LOCK_FILE + # Remove lock file at exit + cleanup() { + # shellcheck disable=SC2317 + rm -f "${LOCK_FILE}" + } + trap 'cleanup' 0 + + # Load configuration + # shellcheck disable=SC1091 + test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin + + log_all "Begin ${PROGNAME} RUN_ID: ${RUN_ID}" + log_all "Log directory is ${RUN_LOG_DIR}" +} + +# Executes a list of tasks before exiting: +# * prepare a summary of actions and possible abort reasons +# * send emails +# * do some cleanup +quit() { + log_all "End ${PROGNAME} RUN_ID: ${RUN_ID}" + + summary="RUN_ID: ${RUN_ID}" + if [ -s "${ABORT_FILE}" ]; then + # Add abort reasons to summary + summary="${summary}\n$(print_abort_reasons)" + hook_mail "abort" + + return_code=1 + else + if [ -s "${ACTIONS_FILE}" ]; then + # Add notable actions to summary + summary="${summary}\n$(print_actions "Aucune action")" + hook_mail "success" + fi + + return_code=0 + fi + + hook_mail "internal" + + if is_interactive; then + # shellcheck disable=SC2001 + echo "${summary}" | sed -e 's/\\n/\n/g' + else + /usr/share/scripts/evomaintenance.sh --auto --user autosysadmin --message "${summary}" --no-commit --no-mail + fi + + teardown + + # shellcheck disable=SC2086 + exit ${return_code} +} + +teardown() { + : +} + +# Return true/false +is_interactive() { + test "${INTERACTIVE}" -eq "1" +} + +save_server_state() { + DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)" + + if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then + log_all "Warning: dump-server-state is not present. No server state recorded." + fi + + if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then + DUMP_DIR=$(file_path_in_log_dir "server-state") + # We don't want the logging to take too much time, + # so we kill it if it takes more than 20 seconds. + timeout --signal 9 20 \ + "${DUMP_SERVER_STATE_BIN}" \ + --dump-dir="${DUMP_DIR}" \ + --df \ + --dmesg \ + --iptables \ + --lxc \ + --netcfg \ + --netstat \ + --uname \ + --processes \ + --systemctl \ + --uptime \ + --virsh \ + --disks \ + --mysql-processes \ + --no-apt-states \ + --no-apt-config \ + --no-dpkg-full \ + --no-dpkg-status \ + --no-mount \ + --no-packages \ + --no-sysctl \ + --no-etc + + log_run "Server state saved in \`server-state' directory." + fi +} + +is_debug() { + # first time: do the check… + # other times: pass + if [ -z "${DEBUG:-""}" ]; then + debug_file="/etc/evolinux/autosysadmin.debug" + + if [ -e "${debug_file}" ]; then + last_change=$(stat -c %Z "${debug_file}") + limit_date=$(date --date "14400 seconds ago" +"%s") + + if [ $(( last_change - limit_date )) -le "0" ]; then + log_run "Debug mode disabled; file is too old (%{last_change} seconds)." + rm "${debug_file}" + # Debug mode disabled + DEBUG="0" + else + log_run "Debug mode enabled." + # Debug mode enabled + DEBUG="1" + fi + else + # log_run "Debug mode disabled; file is absent." + # Debug mode disabled + DEBUG="0" + fi + fi + # return the value + test "${DEBUG}" -eq "1" +} + +# Uses the who(1) definition of "active" +currently_active_users() { + LC_ALL=C who --users | grep --extended-regexp "\s+\.\s+" | awk '{print $1}' | sort --human-numeric-sort | uniq +} +# Users active in the last 29 minutes +recently_active_users() { + LC_ALL=C who --users | grep --extended-regexp "\s+00:(0|1|2)[0-9]\s+" | awk --field-separator ' ' '{print $1,$6}' +} +# Save the list of users to a file in the log directory +save_active_users() { + LC_ALL=C who --users | save_in_log_dir "who-users" +} + +# An autosysadmin must not perform actions if a user is active or was active recently. +# +# This can by bypassed in interactive mode. +# It's OK to lose this data after a reboot. +ensure_no_active_users_or_exit() { + # Save all active users + save_active_users + + if is_debug; then + log_run "Debug mode enabled: continue without checking active users." + return 0; + fi + + # Is there any currently active user? + currently_active_users=$(currently_active_users) + if [ -n "${currently_active_users}" ]; then + # shellcheck disable=SC2001 + users_oneliner=$(echo "${currently_active_users}" | sed -e 's/\n/ /') + log_run "Currently active users: ${users_oneliner}" + if is_interactive; then + echo "Some users are currently active:" + # shellcheck disable=SC2001 + echo "${currently_active_users}" | sed -e 's/\(.\+\)/* \1/' + answer="" + while :; do + printf "> Continue? [Y,n,?] " + read -r answer + case ${answer} in + [Yy]|"" ) + log_run "Active users check bypassed manually in interactive mode." + return + ;; + [Nn] ) + log_run "Active users check confirmed manually in interactive mode." + log_abort_and_quit "Active users detected: ${users_oneliner}" + ;; + * ) + printf "y - yes, continue\n" + printf "n - no, exit\n" + printf "? - print this help\n" + ;; + esac + done + else + log_abort_and_quit "Currently active users detected: ${users_oneliner}." + fi + else + # or recently (the last 30 minutes) active user? + recently_active_users=$(recently_active_users) + if [ -n "${recently_active_users}" ]; then + # shellcheck disable=SC2001 + users_oneliner=$(echo "${recently_active_users}" | sed -e 's/\n/ /') + log_run "Recently active users: ${users_oneliner}" + if is_interactive; then + echo "Some users were recently active:" + # shellcheck disable=SC2001 + echo "${recently_active_users}" | sed -e 's/\(.\+\)/* \1/' + answer="" + while :; do + printf "> Continue? [Y,n,?] " + read -r answer + case ${answer} in + [Yy]|"" ) + log_run "Active users check bypassed manually in interactive mode." + return + ;; + [Nn] ) + log_run "Active users check confirmed manually in interactive mode." + log_abort_and_quit "Recently active users detected: ${users_oneliner}." + ;; + * ) + printf "y - yes, continue\n" + printf "n - no, exit\n" + printf "? - print this help\n" + ;; + esac + done + else + log_abort_and_quit "Recently active users detected: ${users_oneliner}." + fi + fi + fi +} + +# Takes an NRPE command name as 1st parameter, +# and executes the full command if found in the configuration. +# Return the result and the return code of the command. +check_nrpe() { + check="$1" + + nrpe_files="" + + # Check if NRPE config is found + if [ -f "/etc/nagios/nrpe.cfg" ]; then + nrpe_files="${nrpe_files} /etc/nagios/nrpe.cfg" + else + msg="NRPE configuration not found: /etc/nagios/nrpe.cfg" + log_run "${msg}" + echo "${msg}" + return 3 + fi + + # Search for included files + # shellcheck disable=SC2086 + while IFS= read -r include_file; do + nrpe_files="${nrpe_files} ${include_file}" + done < <(grep --extended-regexp '^\s*include=.+' ${nrpe_files} | cut -d = -f 2) + + # Search for files in included directories + # shellcheck disable=SC2086 + while IFS= read -r include_dir; do + nrpe_files="${nrpe_files} ${include_dir}/*.cfg" + done < <(grep --extended-regexp '^\s*include_dir=.+' ${nrpe_files} | cut -d = -f 2) + + # Fetch uncommented commands in (sorted) config files + # shellcheck disable=SC2086 + nrpe_commands=$(grep --no-filename --exclude=*~ --fixed-strings "[${check}]" ${nrpe_files} | grep --invert-match --extended-regexp '^\s*#\s*command' | cut -d = -f 2) + nrpe_commands_count=$(echo "${nrpe_commands}" | wc -l) + + if is_debian_version "9" "<=" && [ "${nrpe_commands_count}" -gt "1" ]; then + # On Debian <= 9, NRPE loading was not sorted + # we need to raise an error if we have multiple defined commands + msg="Unable to determine which NRPE command to run" + log_run "${msg}" + echo "${msg}" + return 3 + else + # On Debian > 9, use the last command + nrpe_command=$(echo "${nrpe_commands}" | tail -n 1) + + nrpe_result=$(${nrpe_command}) + nrpe_rc=$? + + log_run "NRPE command (exited with ${nrpe_rc}): ${nrpe_command}" + log_run "${nrpe_result}" + + echo "${nrpe_result}" + return "${nrpe_rc}" + fi +} + +# An autosysadmin script must not run twice (or more) simultaneously. +# We use a customizable (with LOCK_NAME) lock file to keep track of this. +# A wait time can be configured. +# +# This can by bypassed in interactive mode. +# It's OK to lose this data after a reboot. +acquire_lock_or_exit() { + lock_file="${1:-${LOCK_FILE}}" + lock_wait="${2:-${LOCK_WAIT}}" + + # lock_wait must be compatible with sleep(1), otherwise fallback to 0 + if ! echo "${lock_wait}" | grep -Eq '^[0-9]+[smhd]?$'; then + log_run "Lock wait: incorrect value '${lock_wait}', fallback to 0." + lock_wait=0 + fi + + if [ "${lock_wait}" != "0" ] && [ -f "${lock_file}" ]; then + log_run "Lock file present. Let's wait ${lock_wait} and check again." + sleep "${lock_wait}" + fi + + if [ -f "${lock_file}" ]; then + log_abort_and_quit "Lock file still present." + else + log_run "Lock file absent. Let's put one." + touch "${lock_file}" + fi +} + +# If a script has been run in the ast 30 minutes, running it again won't fix the issue. +# We use a /run/ausosysadmin/${PROGNAME}_lastrun file to keep track of this. +# +# This can by bypassed in interactive mode. +# This is bypassed in debug mode. +# It's OK to lose this data after a reboot. +ensure_not_too_soon_or_exit() { + if is_debug; then + log_run "Debug mode enabled: continue without checking when was the last run." + return 0; + fi + + lastrun_file="${RUN_DIR}/${PROGNAME}_lastrun" + if [ -f "${lastrun_file}" ]; then + lastrun_age="$(($(date +%s)-$(stat -c "%Y" "${lastrun_file}")))" + log_run "Last run was ${lastrun_age} seconds ago." + if [ "${lastrun_age}" -lt 1800 ]; then + if is_interactive; then + echo "${PROGNAME} was run ${lastrun_age} seconds ago." + answer="" + while :; do + printf "> Continue? [Y,n,?] " + read -r answer + case ${answer} in + [Yy]|"" ) + log_run "Last run check bypassed manually in interactive mode." + break + ;; + [Nn] ) + log_run "Last run check confirmed manually in interactive mode." + log_abort_and_quit 'Last run too recent.' + ;; + * ) + printf "y - yes, continue\n" + printf "n - no, exit\n" + printf "? - print this help\n" + ;; + esac + done + else + log_abort_and_quit "Last run too recent." + fi + fi + fi + touch "${lastrun_file}" +} + +# Populate DEBIAN_VERSION and DEBIAN_RELEASE variables +# based on gathered information about the operating system +detect_os() { + DEBIAN_RELEASE="unknown" + DEBIAN_VERSION="unknown" + LSB_RELEASE_BIN="$(command -v lsb_release)" + + if [ -e /etc/debian_version ]; then + DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)" + if [ -x "${LSB_RELEASE_BIN}" ]; then + DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)" + else + case "${DEBIAN_VERSION}" in + 7) DEBIAN_RELEASE="wheezy" ;; + 8) DEBIAN_RELEASE="jessie" ;; + 9) DEBIAN_RELEASE="stretch" ;; + 10) DEBIAN_RELEASE="buster" ;; + 11) DEBIAN_RELEASE="bullseye" ;; + 12) DEBIAN_RELEASE="bookworm" ;; + 13) DEBIAN_RELEASE="trixie" ;; + esac + fi + # log_run "Detected OS: Debian version=${DEBIAN_VERSION} release=${DEBIAN_RELEASE}" + # else + # log_run "Detected OS: unknown (missing /etc/debian_version)" + fi +} + +is_debian_wheezy() { + test "${DEBIAN_RELEASE}" = "wheezy" +} +is_debian_jessie() { + test "${DEBIAN_RELEASE}" = "jessie" +} +is_debian_stretch() { + test "${DEBIAN_RELEASE}" = "stretch" +} +is_debian_buster() { + test "${DEBIAN_RELEASE}" = "buster" +} +is_debian_bullseye() { + test "${DEBIAN_RELEASE}" = "bullseye" +} +is_debian_bookworm() { + test "${DEBIAN_RELEASE}" = "bookworm" +} +is_debian_trixie() { + test "${DEBIAN_RELEASE}" = "trixie" +} +is_debian_version() { + local version=$1 + local relation=${2:-"eq"} + + if [ -z "${DEBIAN_VERSION:-""}" ]; then + detect_os + fi + + dpkg --compare-versions "${DEBIAN_VERSION}" "${relation}" "${version}" +} + +# List systemd services (only names), even if stopped +systemd_list_services() { + pattern=$1 + + systemctl list-units --all --no-legend --type=service "${pattern}" | grep --only-matching --extended-regexp '\S+\.service' +} + +is_systemd_enabled() { + systemctl --quiet is-enabled "$1" 2> /dev/null +} + +is_systemd_active() { + systemctl --quiet is-active "$1" 2> /dev/null +} + +is_sysvinit_enabled() { + find /etc/rc2.d/ -name "$1" > /dev/null +} + +get_fqdn() { + # shellcheck disable=SC2155 + local system=$(uname -s) + + if [ "${system}" = "Linux" ]; then + hostname --fqdn + elif [ "${system}" = "OpenBSD" ]; then + hostname + else + log_abort_and_quit "System '${system}' not recognized." + fi +} + +get_complete_hostname() { + REAL_HOSTNAME="$(get_fqdn)" + if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then + echo "${HOSTNAME}" + else + echo "${HOSTNAME} (${REAL_HOSTNAME})" + fi +} +# Fetch values from evomaintenance configuration +get_evomaintenance_mail() { + grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2 +} +get_evomaintenance_emergency_mail() { + grep "URGENCYFROM=" /etc/evomaintenance.cf | cut -d '=' -f2 +} +get_evomaintenance_emergency_tel() { + grep "URGENCYTEL=" /etc/evomaintenance.cf | cut -d '=' -f2 +} + +# Log a message to the log file in the log directory +log_run() { + local msg="${1:-$(cat /dev/stdin)}" + # shellcheck disable=SC2155 + local date=$(/bin/date +"${DATE_FORMAT}") + + printf "[%s] %s[%s]: %s\\n" \ + "${date}" "${PROGNAME}" "${PID}" "${msg}" \ + >> "${RUN_LOG_FILE}" +} +# Log a message in the system log file (syslog or journald) +log_global() { + local msg="${1:-$(cat /dev/stdin)}" + + echo "${msg}" \ + | /usr/bin/logger -p local0.notice -t autosysadmin +} +# Log a message in both places +log_all() { + local msg="${1:-$(cat /dev/stdin)}" + + log_global "${msg}" + log_run "${msg}" +} +# Log a notable action in regular places +# and append it to the dedicated list +log_action() { + log_all "$*" + append_action "$*" +} +# Append a line in the actions.log file in the log directory +append_action() { + echo "$*" >> "${ACTIONS_FILE}" +} +# Print the content of the actions.log file +# or a fallback content (1st parameter) if empty +# shellcheck disable=SC2120 +print_actions() { + local fallback=${1:-""} + if [ -s "${ACTIONS_FILE}" ]; then + cat "${ACTIONS_FILE}" + elif [ -n "${fallback}" ]; then + echo "${fallback}" + fi +} + +# Log a an abort reason in regular places +# and append it to the dedicated list +log_abort() { + log_all "$*" + append_abort_reason "$*" +} +# Append a line in the abort.log file in the log directory +append_abort_reason() { + echo "$*" >> "${ABORT_FILE}" +} +# Print the content of the abort.log file +# or a fallback content (1st parameter) if empty +# shellcheck disable=SC2120 +print_abort_reasons() { + local fallback=${1:-""} + if [ -s "${ABORT_FILE}" ]; then + cat "${ABORT_FILE}" + elif [ -n "${fallback}" ]; then + echo "${fallback}" + fi +} +# Print the content of the main log from the log directory +print_main_log() { + cat "${RUN_LOG_FILE}" +} +# Log an abort reason and quit the script +log_abort_and_quit() { + log_abort "$*" + quit +} + +# Store the content from standard inpu +# into a file in the log directory named after the 1st parameter +save_in_log_dir() { + local file_name=$1 + local file_path="${RUN_LOG_DIR}/${file_name}" + + cat /dev/stdin > "${file_path}" + + log_run "Saved \`${file_name}' file." +} +# Return the full path of the file in log directory +# based on the name in the 1st parameter +file_path_in_log_dir() { + echo "${RUN_LOG_DIR}/${1}" +} + +format_mail_success() { + cat < +Content-Type: text/plain; charset=UTF-8 +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +X-Script: ${PROGNAME} +X-RunId: ${RUN_ID} +To: ${EMAIL_CLIENT:-alert5@evolix.fr} +Cc: ${EMAIL_INTERNAL} +Subject: [autosysadmin] Intervention automatisée sur ${HOSTNAME_TEXT} + +Bonjour, + +Une intervention automatisée vient de se terminer. + +Nom du serveur : ${HOSTNAME_TEXT} +Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) +Script déclenché : ${PROGNAME} + +### Actions réalisées + +$(print_actions "Aucune") + +### Réagir à cette intervention + +Vous pouvez répondre à ce message (${EMAIL_FROM}). + +En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL} +ou notre ligne d'astreinte (${EMERGENCY_TEL}) + +-- +Votre AutoSysadmin +EOTEMPLATE +} + +format_mail_abort() { + cat < +Content-Type: text/plain; charset=UTF-8 +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +X-Script: ${PROGNAME} +X-RunId: ${RUN_ID} +To: ${EMAIL_CLIENT:-alert5@evolix.fr} +Cc: ${EMAIL_INTERNAL} +Subject: [autosysadmin] Intervention automatisée interrompue sur ${HOSTNAME_TEXT} + +Bonjour, + +Une intervention automatisée a été déclenchée mais s'est interrompue. + +Nom du serveur : ${HOSTNAME_TEXT} +Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) +Script déclenché : ${PROGNAME} + +### Actions réalisées + +$(print_actions "Aucune") + +### Raison(s) de l'interruption + +$(print_abort_reasons "Inconnue") + +### Réagir à cette intervention + +Vous pouvez répondre à ce message (${EMAIL_FROM}). + +En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL} +ou notre ligne d'astreinte (${EMERGENCY_TEL}) + +-- +Votre AutoSysadmin +EOTEMPLATE +} + +# shellcheck disable=SC2028 +print_report_information() { + echo "**Uptime**" + echo "" + uptime + + echo "" + echo "**Utilisateurs récents**" + echo "" + who_file=$(file_path_in_log_dir "who-users") + if [ -s "${who_file}" ]; then + cat "${who_file}" + else + who --users + fi + + echo "" + echo "**Espace disque**" + echo "" + df_file=$(file_path_in_log_dir "server-state/df.txt") + if [ -s "${df_file}" ]; then + cat "${df_file}" + else + df -h + fi + + echo "" + echo "**Dmesg**" + echo "" + dmesg_file=$(file_path_in_log_dir "server-state/dmesg.txt") + if [ -s "${dmesg_file}" ]; then + tail -n 5 "${dmesg_file}" + else + dmesg | tail -n 5 + fi + + echo "" + echo "**systemd failed services**" + echo "" + failed_services_file=$(file_path_in_log_dir "server-state/systemctl-failed-services.txt") + if [ -s "${failed_services_file}" ]; then + cat "${failed_services_file}" + else + systemctl --no-legend --state=failed --type=service + fi + + if command -v lxc-ls > /dev/null 2>&1; then + echo "" + echo "**LXC containers**" + echo "" + lxc_ls_file=$(file_path_in_log_dir "server-state/lxc-list.txt") + if [ -s "${lxc_ls_file}" ]; then + cat "${lxc_ls_file}" + else + lxc-ls --fancy + fi + fi + + apache_errors_file=$(file_path_in_log_dir "apache-errors.log") + if [ -f "${apache_errors_file}" ]; then + echo "" + echo "**Apache errors**" + echo "" + cat "${apache_errors_file}" + fi + + nginx_errors_file=$(file_path_in_log_dir "nginx-errors.log") + if [ -f "${nginx_errors_file}" ]; then + echo "" + echo "**Nginx errors**" + echo "" + cat "${nginx_errors_file}" + fi +} + +format_mail_internal() { + cat < +Content-Type: text/plain; charset=UTF-8 +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +X-Script: ${PROGNAME} +X-RunId: ${RUN_ID} +To: ${EMAIL_INTERNAL} +Subject: [autosysadmin] Rapport interne d'intervention sur ${HOSTNAME_TEXT} + +Bonjour, + +Une intervention automatique vient de se terminer. + +Nom du serveur : ${HOSTNAME_TEXT} +Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) +Script déclenché : ${PROGNAME} + +### Actions réalisées + +$(print_actions "Aucune") + +### Raison(s) de l'interruption + +$(print_abort_reasons "Aucune") + +### Log autosysadmin + +$(print_main_log) + +### Informations additionnelles + +$(print_report_information) + +-- +Votre AutoSysadmin +EOTEMPLATE +} + +# Generic function to send emails at the end of the script. +# Takes a template as 1st parameter +hook_mail() { + if is_debug; then + log_run "Debug mode enabled: continue without sending mail." + return 0; + fi + + HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}" + HOSTNAME_TEXT="$(get_complete_hostname)" + EMAIL_CLIENT="$(get_evomaintenance_mail)" + EMERGENCY_MAIL="$(get_evomaintenance_emergency_mail)" + EMERGENCY_TEL="$(get_evomaintenance_emergency_tel)" + + MAIL_CONTENT="$(format_mail_"$1")" + + SENDMAIL_BIN="$(command -v sendmail)" + + if [ -z "${SENDMAIL_BIN}" ]; then + log_global "ERROR: No \`sendmail' command has been found, can't send mail." + fi + if [ -x "${SENDMAIL_BIN}" ]; then + echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.fr" + log_global "Sent '$1' mail for RUN_ID: ${RUN_ID}" + fi +} + +is_holiday() { + # gcal mark today as a holiday by surrounding with < and > the day + # of the month of that holiday line. For example if today is 2022-05-01 we'll + # get among other lines: + # Fête du Travail (FR) + Di, < 1>Mai 2022 + # Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours + LANGUAGE=fr_FR.UTF-8 TZ=Europe/Paris gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet +} + +is_weekend() { + day_of_week=$(date +%u) + if [ "${day_of_week}" != 6 ] && [ "${day_of_week}" != 7 ]; then + return 1 + fi +} + +is_workday() { + if is_holiday || is_weekend; then + return 1 + fi +} + +is_worktime() { + if ! is_workday; then + return 1 + fi + + hour=$(date +%H) + if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then + return 1 + fi +} diff --git a/autosysadmin-agent/files/upstream/lib/repair.sh b/autosysadmin-agent/files/upstream/lib/repair.sh new file mode 100644 index 00000000..ddd243b5 --- /dev/null +++ b/autosysadmin-agent/files/upstream/lib/repair.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +# Specific functions for "repair" scripts + +is_all_repair_disabled() { + # Fetch values from the config + # and if it is not defined or has no value, then assign "on" + + local status=${repair_all:=on} + + + test "${status}" = "off" || test "${status}" = "0" +} + +is_current_repair_disabled() { + # Fetch values from the config + # and if it is not defined or has no value, then assign "on" + + local status=${!PROGNAME:=on} + + test "${status}" = "off" || test "${status}" = "0" +} + +ensure_not_disabled_or_exit() { + if is_all_repair_disabled; then + log_global 'All repair scripts are disabled.' + exit 0 + fi + if is_current_repair_disabled; then + log_global "Current repair script (${PROGNAME}) is disabled." + exit 0 + fi +} + +# Set of actions to do at the begining of a "repair" script +pre_repair() { + initialize + + # Are we supposed to run? + ensure_not_disabled_or_exit + + # Has it recently been run? + ensure_not_too_soon_or_exit + + # Can we acquire a lock? + acquire_lock_or_exit + + # Is there any active user? + ensure_no_active_users_or_exit + + # Save important information + save_server_state +} + +# Set of actions to do at the end of a "repair" script +post_repair() { + quit +} + +repair_lxc_php() { + container_name=$1 + + if is_systemd_enabled 'lxc.service'; then + lxc_path=$(lxc-config lxc.lxcpath) + if lxc-info --name "${container_name}" > /dev/null; then + rootfs="${lxc_path}/${container_name}/rootfs" + case "${container_name}" in + php56) fpm_log_file="${rootfs}/var/log/php5-fpm.log" ;; + php70) fpm_log_file="${rootfs}/var/log/php7.0-fpm.log" ;; + php73) fpm_log_file="${rootfs}/var/log/php7.3-fpm.log" ;; + php74) fpm_log_file="${rootfs}/var/log/php7.4-fpm.log" ;; + php80) fpm_log_file="${rootfs}/var/log/php8.0-fpm.log" ;; + php81) fpm_log_file="${rootfs}/var/log/php8.1-fpm.log" ;; + php82) fpm_log_file="${rootfs}/var/log/php8.2-fpm.log" ;; + php83) fpm_log_file="${rootfs}/var/log/php8.3-fpm.log" ;; + *) + log_abort_and_quit "Unknown container '${container_name}'" + ;; + esac + + # Determine FPM Pool path + php_path_pool=$(find "${lxc_path}/${container_name}/" -type d -name "pool.d") + + # Save LXC info (before restart) + lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.before.status" + # Save last lines of FPM log (before restart) + tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.before.log/')" + # Save NRPE check (before restart) + /usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.before.out" + + lxc-stop --timeout 20 --name "${container_name}" + lxc-start --daemon --name "${container_name}" + rc=$? + if [ "${rc}" -eq "0" ]; then + log_all "Restart LXC container '${container_name}: OK" + else + log_all "Restart LXC container '${container_name}: failed" + fi + + # Save LXC info (after restart) + lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.after.status" + # Save last lines of FPM log (after restart) + tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.after.log/')" + # Save NRPE check (after restart) + /usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.after.out" + else + log_abort_and_quit "LXC container '${container_name}' doesn't exist." + fi + else + log_abort_and_quit 'LXC not found.' + fi +} diff --git a/autosysadmin-agent/files/upstream/lib/restart.sh b/autosysadmin-agent/files/upstream/lib/restart.sh new file mode 100644 index 00000000..78be5bb0 --- /dev/null +++ b/autosysadmin-agent/files/upstream/lib/restart.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# Specific functions for "restart" scripts + +running_custom() { + # Placeholder that returns 1, to prevent running if not redefined + log_global "running_custom() function has not been redefined! Let's quit." + return 1 +} + +# Examine RUNNING variable and decide if the script should run or not +is_supposed_to_run() { + if is_debug; then return 0; fi + + case ${RUNNING} in + never) + # log_global "is_supposed_to_run: no (never)" + return 1 + ;; + always) + # log_global "is_supposed_to_run: yes (always)" + return 0 + ;; + nwh-fr) + ! is_worktime + rc=$? + # if [ ${rc} -eq 0 ]; then + # log_global "is_supposed_to_run: yes (nwh-fr returned ${rc})" + # else + # log_global "is_supposed_to_run: no (nwh-fr returned ${rc})" + # fi + return ${rc} + ;; + nwh-ca) + # Not implemented yet + return 0 + ;; + custom) + running_custom + rc=$? + # if [ ${rc} -eq 0 ]; then + # log_global "is_supposed_to_run: yes (custom returned ${rc})" + # else + # log_global "is_supposed_to_run: no (custom returned ${rc})" + # fi + return ${rc} + ;; + esac +} + +ensure_supposed_to_run_or_exit() { + if ! is_supposed_to_run; then + # simply quit (no logging, no notifications…) + # log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})." + exit 0 + fi +} + +# Set of actions to do at the begining of a "restart" script +pre_restart() { + initialize + + # Has it recently been run? + ensure_not_too_soon_or_exit + + # Can we acquire a lock? + acquire_lock_or_exit + + # Save important information + save_server_state +} + +# Set of actions to do at the end of a "restart" script +post_restart() { + quit +} diff --git a/autosysadmin-agent/files/upstream/repair/repair_disk b/autosysadmin-agent/files/upstream/repair/repair_disk new file mode 100755 index 00000000..70ed28a6 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_disk @@ -0,0 +1,157 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + + +# We always keep some reserved blocks to avoid missing some logs +# https://gitea.evolix.org/evolix/autosysadmin/issues/22 +RESERVED_BLOCKS_MIN=1 + +get_mountpoints() { + # the $(...) get the check_disk1 command + # the cut command selects the critical part of the check_disk1 output + # the grep command extracts the mountpoints and available disk space + # the last cut command selects the mountpoints + check_disk1_command=$(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-) + + ${check_disk1_command} -e | cut -d'|' -f1 | grep --extended-regexp --only-matching '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -d' ' -f1 +} + +is_reserved_blocks_nominal() { + partition=${1} + + fs_type="$(findmnt -n --output=fstype "${partition}")" + if [ "${fs_type}" = "ext4" ]; then + device="$(findmnt -n --output=source "${partition}")" + reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" + block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" + percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }") + + log_run "Reserved blocks for ${partition} is currently at ${percentage}%" + if [ "${percentage}" -gt "${RESERVED_BLOCKS_MIN}" ]; then + log_run "Allowing tune2fs action to reduce the number of reserved blocks" + return 0 + else + log_run "Reserved blocks already at or bellow ${RESERVED_BLOCKS_MIN}%, no automatic action possible" + return 1 + fi + else + log_run "Filesystem for ${partition} (${fs_type}) is incompatible with reserved block reduction." + return 1 + fi +} + +reduce_reserved_blocks() { + partition=${1} + + device=$(findmnt -n --output=source "${partition}") + tune2fs -m "${RESERVED_BLOCKS_MIN}" "${device}" + log_action "Reserved blocks for ${partition} changed to ${RESERVED_BLOCKS_MIN} percent" +} + +is_tmp_to_delete() { + size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" + if [ -n "${size}" ]; then + return 0 + else + return 1 + fi +} + +is_log_to_delete() { + size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" + if [ -n "${size}" ]; then + return 0 + else + return 1 + fi +} + +clean_apt_cache() { + for container in $(lxc-ls -1); do + if [ -e "$(lxc-config lxc.lxcpath)/${container}/rootfs/var/cache" ]; then + lxc-attach --name "${container}" -- apt-get clean + log_action "Clean apt cache in LXC container ${container}"; + fi + done + + # NOTE: "head -n 1" might be superfluous, but let's be sure to have only the first returned value + biggest_subdir=$(du --summarize --one-file-system "/var/*" | sort --numeric-sort --reverse | sed 's/^[0-9]\+[[:space:]]\+//;q' | head -n 1) + case "${biggest_subdir}" in + '/var/cache') + apt-get clean + log_action 'Clean apt cache' + ;; + esac +} + +clean_amavis_virusmails() { + if du --inodes /var/lib/* | sort --numeric-sort | tail -n 3 | grep --quiet 'virusmails$'; then + find /var/lib/amavis/virusmails/ -type f -atime +30 -delete + log_action 'Clean amavis infected mails' + fi +} + +critical_mountpoints=$(get_mountpoints) + +if [ -z "${critical_mountpoints}" ]; then + log_abort_and_quit "No partition is in critical state, nothing left to do." +else + for mountpoint in ${critical_mountpoints}; do + case "${mountpoint}" in + /var) + #if is_log_to_delete + #then + # find /var/log/ -type f -mtime +365 -delete + # log_action "$size Mo of disk space freed in /var" + #fi + if is_reserved_blocks_nominal /var; then + reduce_reserved_blocks /var + clean_apt_cache + clean_amavis_virusmails + fi + ;; + /tmp) + #if is_tmp_to_delete + #then + # find /tmp/ -type f -ctime +1 -delete + # log_action "$size Mo of disk space freed in /tmp" + #fi + if is_reserved_blocks_nominal /tmp; then + reduce_reserved_blocks /tmp + fi + ;; + /home) + if is_reserved_blocks_nominal /home; then + reduce_reserved_blocks /home + fi + ;; + /srv) + if is_reserved_blocks_nominal /srv; then + reduce_reserved_blocks /srv + fi + ;; + /filer) + if is_reserved_blocks_nominal /filer; then + reduce_reserved_blocks /filer + fi + ;; + /) + if is_reserved_blocks_nominal /; then + reduce_reserved_blocks / + # Suggest remove old kernel ? + fi + ;; + *) + # unknown + log_run 'Unknown partition (or weird case) or nothing to do' + ;; + esac + done +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_elasticsearch b/autosysadmin-agent/files/upstream/repair/repair_elasticsearch new file mode 100755 index 00000000..5baffaaa --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_elasticsearch @@ -0,0 +1,35 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +service="elasticsearch.service" +service_name="elasticsearch" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_abort_and_quit "${service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi +else + log_abort_and_quit "${service} is disabled (or missing), nothing left to do." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_http b/autosysadmin-agent/files/upstream/repair/repair_http new file mode 100755 index 00000000..1c6fa5c7 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_http @@ -0,0 +1,131 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +## Apache + +service="apache2.service" +service_name="apache2" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # check syntax + if apache2ctl -t > /dev/null 2>&1; then + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + + # Save error logs + date=$(LANG=en_US.UTF-8 date '+%b %d') + grep "${date}" /home/*/log/error.log /var/log/apache2/*error.log \ + | grep -v \ + -e "Got error 'PHP message:" \ + -e "No matching DirectoryIndex" \ + -e "client denied by server configuration" \ + -e "server certificate does NOT include an ID which matches the server name" \ + | save_in_log_dir "apache-errors.log" + else + log_action "Restart ${service_name}: skip (invalid configuration)" + fi + fi +else + log_all "${service} is disabled (or missing). Skip." +fi + +## Nginx + +service="nginx.service" +service_name="nginx" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # check syntax + if nginx -t > /dev/null 2>&1; then + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + + # Save error logs + ### Consider doing for Nginx the same as Apache + else + log_action "Restart ${service_name}: skip (invalid configuration)" + fi + fi +else + log_all "${service} is disabled (or missing). Skip." +fi + +## LXC + +if is_systemd_enabled 'lxc.service'; then + for container in $(lxc-ls -1 | grep --fixed-strings 'php' | grep --extended-regexp --invert-match --regexp '\bold\b' --regexp '\bdisabled\b'); do + repair_lxc_php "${container}" + done +else + log_all "LXC is disabled (or missing). Skip." +fi + +## FPM + +fpm_services=$(systemd_list_services 'php*-fpm*') +if [ -n "${fpm_services}" ]; then + for service in ${fpm_services}; do + service_name="${service//.service/}" + if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi + else + log_all "${service} is disabled (or missing). Skip." + fi + done +else + log_all "PHP FPM not found. Skip." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_mysql b/autosysadmin-agent/files/upstream/repair/repair_mysql new file mode 100755 index 00000000..eb176743 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_mysql @@ -0,0 +1,69 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +if is_debian_version "8" "<="; then + + if is_sysvinit_enabled '*mysql*'; then + if ! pgrep -u mysql mysqld > /dev/null; then + + # Save service status before restart + timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.before.status" + + timeout 20 /etc/init.d/mysql restart > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart mysql: OK" + else + log_action "Restart mysql: failed" + fi + + # Save service status after restart + timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.after.status" + else + log_abort_and_quit "mysqld process alive. Aborting" + fi + else + log_abort_and_quit "MySQL not enabled. Aborting" + fi + +else + + if is_debian_version "12" ">="; then + service="mariadb.service" + service_name="mariadb" + else + service="mysql.service" + service_name="mysql" + fi + + if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_abort_and_quit "${service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi + else + log_abort_and_quit "${service} is disabled (or missing), nothing left to do." + fi + +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_opendkim b/autosysadmin-agent/files/upstream/repair/repair_opendkim new file mode 100755 index 00000000..ab06d01d --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_opendkim @@ -0,0 +1,35 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +service="opendkim.service" +service_name="opendkim" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_abort_and_quit "${service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi +else + log_abort_and_quit "${service} is disabled (or missing). Abort." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm56 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm56 new file mode 100755 index 00000000..db2ed9d4 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm56 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php56 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm70 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm70 new file mode 100755 index 00000000..324acadb --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm70 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php70 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm73 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm73 new file mode 100755 index 00000000..9089aa6e --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm73 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php73 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm74 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm74 new file mode 100755 index 00000000..6d7f49bb --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm74 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php74 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm80 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm80 new file mode 100755 index 00000000..f61f45e6 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm80 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php80 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm81 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm81 new file mode 100755 index 00000000..ec9b20c0 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm81 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php81 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm82 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm82 new file mode 100755 index 00000000..8af2217e --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm82 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php82 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm83 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm83 new file mode 100755 index 00000000..7584e69c --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm83 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php83 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_redis b/autosysadmin-agent/files/upstream/repair/repair_redis new file mode 100755 index 00000000..3873d16f --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_redis @@ -0,0 +1,32 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +for service in $(systemd_list_services 'redis-server*'); do + service_name="${service//.service/}" + + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK." + else + log_action "Restart ${service_name}: failed." + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi +done + +post_repair diff --git a/autosysadmin/files/scripts/repair_tomcat_instance.sh b/autosysadmin-agent/files/upstream/repair/repair_tomcat_instance old mode 100644 new mode 100755 similarity index 51% rename from autosysadmin/files/scripts/repair_tomcat_instance.sh rename to autosysadmin-agent/files/upstream/repair/repair_tomcat_instance index 9bf9949c..8cc76ae4 --- a/autosysadmin/files/scripts/repair_tomcat_instance.sh +++ b/autosysadmin-agent/files/upstream/repair/repair_tomcat_instance @@ -1,43 +1,24 @@ #!/bin/bash -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 -init_autosysadmin -load_conf - -test "${repair_tomcat_instance:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_tomcat_instance" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status +pre_repair repair_tomcat_instance_handle_tomcat() { if /bin/su - "${1}" -c "/bin/systemctl --quiet --user is-active tomcat.service" ; then if ! /bin/su - "${1}" -c "/usr/bin/timeout 20 /bin/systemctl --quiet --user restart tomcat.service" then - log_error_exit "Echec de redémarrage instance tomcat utilisateur ${1}" + log_abort_and_quit "Echec de redémarrage instance tomcat utilisateur ${1}" else log_action "Redémarrage instance tomcat utilisateur ${1}" fi elif /bin/systemctl --quiet is-active "${1}".service ; then if ! /usr/bin/timeout 20 systemctl --quiet restart "${1}".service then - log_error_exit "Echec de redémarrage instance tomcat ${1}" + log_abort_and_quit "Echec de redémarrage instance tomcat ${1}" else log_action "Redémarrage instance tomcat ${1}" fi @@ -50,4 +31,4 @@ do repair_tomcat_instance_handle_tomcat "${instance}" done -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/zzz-repair_example.template b/autosysadmin-agent/files/upstream/repair/zzz-repair_example.template new file mode 100755 index 00000000..668d4d02 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/zzz-repair_example.template @@ -0,0 +1,41 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +## Custom lock wait and/or lock name +# LOCK_WAIT="15s" +# LOCK_NAME="repair_http" + +pre_repair + +## The name of the service, mainly for logging +service_name="example" +## The systemd service name +systemd_service="${service_name}.service" + +if is_systemd_enabled "${systemd_service}"; then + if is_systemd_active "${systemd_service}"; then + log_abort_and_quit "${systemd_service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${systemd_service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status" + fi +else + log_abort_and_quit "${service_name} is disabled (or missing), nothing left to do." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/restart/README b/autosysadmin-agent/files/upstream/restart/README new file mode 100644 index 00000000..83a3a9a2 --- /dev/null +++ b/autosysadmin-agent/files/upstream/restart/README @@ -0,0 +1,19 @@ +Autosysadmin "restart auto" scripts +=================================== + +In this directory you can place scripts that will be executed automatically by a cron job (stored in `/etc/cron.d/autosysadmin`). + +They must satisfy the default `run-parts(8)` constraints : + +* be "executable" +* belong to the Debian cron script namespace (`^[a-zA-Z0-9_-]+$`), example: `restart_amavis` + +Warning: scripts that do not satisfy those criteria will NOT be run (silently)! + +You can print the names of the scripts which would be run, without actually running them, with this command : + +``` +$ run-parts --test /usr/share/scripts/autosysadmin/restart +``` + +You can use `zzz-restart_example.template` as boilerplate code to make your own "restart" script. diff --git a/autosysadmin-agent/files/upstream/restart/zzz-restart_example.template b/autosysadmin-agent/files/upstream/restart/zzz-restart_example.template new file mode 100644 index 00000000..1051d132 --- /dev/null +++ b/autosysadmin-agent/files/upstream/restart/zzz-restart_example.template @@ -0,0 +1,120 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1 + +# shellcheck disable=SC2034 +RUNNING="nwh-fr" + +## Possible values for RUNNING : +## never => disabled +## always => enabled +## nwh-fr => enabled during non-working-hours in France +## nwh-ca => enabled during non-working-hours in Canada (not supported yet) +## custom => enabled if `running_custom()` function returns 0, otherwise disabled. + +## Uncomment and customize this method if you want to have a special logic : +## +## return 1 if we should not run +## return 0 if we should run +## +## Some available functions : +## is_weekend() : Saturday or Sunday +## is_holiday() : holiday in France (based on `gcal(1)`) +## is_workday() : not weekend and not holiday +## is_worktime() : work day between 9-12h and 14-18h +# +# running_custom() { +# # implement your own custom method to decide if we should run or not +# } + +## The name of the service, mainly for logging +service_name="example" +## The SysVinit script name +sysvinit_script="${service_name}" +## The systemd service name +systemd_service="${service_name}.service" + +is_service_alive() { + ## this must return 0 if the service is alive, otherwise return 1 + ## Example: + pgrep -u USER PROCESS_NAME > /dev/null +} + +## Action for SysVinit system +sysvinit_action() { + # Save service status before restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status" +} + +## Action for systemd system +systemd_action() { + # Save service status before restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + # systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start + # so we check the status explicitly + timeout 20 systemctl restart "${systemd_service}" > /dev/null \ + && sleep 1 \ + && systemctl status "${systemd_service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status" +} + +# Should we run? +if ! is_supposed_to_run; then + # log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})." + exit 0 +fi +if is_service_alive; then + # log_global "${service_name} process alive. Aborting" + exit 0 +fi + +# Yes we do, so check for sysvinit or systemd +if is_debian_version "8" "<="; then + if ! is_sysvinit_enabled "*${sysvinit_script}*"; then + # log_global "${service_name} not enabled. Aborting" + exit 0 + fi + + # Let's finally do the action + pre_restart + sysvinit_action + post_restart +else + if ! is_systemd_enabled "${systemd_service}"; then + # log_global "${service_name} is disabled (or missing), nothing left to do." + exit 0 + fi + if is_systemd_active "${systemd_service}"; then + # log_global "${service_name} is active, nothing left to do." + exit 0 + fi + + # Let's finally do the action + pre_restart + systemd_action + post_restart +fi diff --git a/autosysadmin-agent/handlers/main.yml b/autosysadmin-agent/handlers/main.yml new file mode 100644 index 00000000..f192d587 --- /dev/null +++ b/autosysadmin-agent/handlers/main.yml @@ -0,0 +1,16 @@ +--- + +- name: restart nagios-nrpe-server + service: + name: nagios-nrpe-server + state: restarted + +- name: restart nrpe + service: + name: nrpe + state: restarted + +- name: restart rsyslog + service: + name: rsyslog + state: restarted diff --git a/autosysadmin-agent/tasks/crontab.yml b/autosysadmin-agent/tasks/crontab.yml new file mode 100644 index 00000000..1fa090ab --- /dev/null +++ b/autosysadmin-agent/tasks/crontab.yml @@ -0,0 +1,25 @@ +--- + +- name: "Add begin marker if missing" + ansible.builtin.lineinfile: + path: "/etc/cron.d/autosysadmin" + line: "# BEGIN ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN" + insertbefore: BOF + create: yes + +- name: "Add end marker if missing" + ansible.builtin.lineinfile: + path: "/etc/cron.d/autosysadmin" + line: "# END ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN" + insertbefore: "EOF" + create: yes + +- name: "Create config if missing" + ansible.builtin.blockinfile: + path: "/etc/cron.d/autosysadmin" + marker: "# {mark} ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN" + block: "{{ lookup('ansible.builtin.template', '../templates/autosysadmin.cron.j2') }}" + owner: root + group: root + mode: "0750" + create: yes diff --git a/autosysadmin/tasks/dependencies.yml b/autosysadmin-agent/tasks/dependencies.yml similarity index 100% rename from autosysadmin/tasks/dependencies.yml rename to autosysadmin-agent/tasks/dependencies.yml diff --git a/autosysadmin-agent/tasks/install.yml b/autosysadmin-agent/tasks/install.yml new file mode 100644 index 00000000..b8ecd752 --- /dev/null +++ b/autosysadmin-agent/tasks/install.yml @@ -0,0 +1,114 @@ +--- +- name: "Remount /usr if needed" + ansible.builtin.include_role: + name: remount-usr + +- name: Previous autosysadmin restart directory is renamed + command: + cmd: mv "/usr/share/scripts/autosysadmin/auto" "{{ autosysadmin_agent_auto_dir }}" + removes: "/usr/share/scripts/autosysadmin/auto" + creates: "{{ autosysadmin_agent_auto_dir }}" + +- name: Create autosysadmin directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "root" + group: "root" + mode: "0750" + loop: + - "{{ autosysadmin_agent_bin_dir }}" + - "{{ autosysadmin_agent_lib_dir }}" + - "{{ autosysadmin_agent_auto_dir }}" + +- name: Copy libraries + ansible.builtin.copy: + src: "upstream/lib/" + dest: "{{ autosysadmin_agent_lib_dir }}/" + owner: root + group: root + mode: "0750" + +- name: Copy repair scripts + ansible.builtin.copy: + src: "upstream/repair/" + dest: "{{ autosysadmin_agent_bin_dir }}/" + owner: root + group: root + mode: "0750" + +- name: Copy other utilities + ansible.builtin.copy: + src: "upstream/bin/" + dest: "{{ autosysadmin_agent_bin_dir }}/" + owner: root + group: root + mode: "0750" + +### WARNING: thos files are explicitly marked as non-executable +### to prevent them from being run automatically by run-parts + +- name: Copy restart scripts + ansible.builtin.copy: + src: "upstream/restart/" + dest: "{{ autosysadmin_agent_auto_dir }}/" + owner: root + group: root + mode: "0640" + +- name: Ensure /etc/evolinux folder exists + ansible.builtin.file: + path: "/etc/evolinux" + state: directory + owner: "root" + group: "root" + mode: "0700" + +- name: Copy the configuration file if missing + ansible.builtin.template: + src: "autosysadmin.cf.j2" + dest: "/etc/evolinux/autosysadmin" + owner: root + group: root + mode: "0640" + force: no + +# Repair scripts are supposed to be 'on' by default +# A line "repair_XXX=off" is added to the file only if the script is to be disabled. +# That's why all the ternary logic for the state is reversed. +- name: Update value per variable + ansible.builtin.lineinfile: + dest: "/etc/evolinux/autosysadmin" + line: "{{ item }}={{ autosysadmin_config[item] | default(true) | bool | ternary('on', 'off') }}" + regexp: '^(#\s*)?{{ item }}=.*' + state: "{{ autosysadmin_config[item] | default(true) | bool | ternary('absent', 'present') }}" + register: _line + loop: "{{ autosysadmin_repair_scripts | union(['repair_all']) }}" + +- name: Ensure restart folder exists + ansible.builtin.file: + path: "auto" + state: directory + owner: "root" + group: "root" + mode: "0700" + +- name: Legacy scripts are removed + ansible.builtin.file: + path: "{{ general_scripts_dir }}/autosysadmin/{{ item }}" + state: absent + loop: + - repair_amavis.sh + - repair_disk.sh + - repair_elasticsearch.sh + - repair_http.sh + - repair_mysql.sh + - repair_opendkim.sh + - repair_php_fpm56.sh + - repair_php_fpm70.sh + - repair_php_fpm73.sh + - repair_php_fpm74.sh + - repair_php_fpm80.sh + - repair_php_fpm81.sh + - repair_redis.sh + - repair_tomcat_instance.sh diff --git a/autosysadmin/tasks/logrotate.yml b/autosysadmin-agent/tasks/logrotate.yml similarity index 70% rename from autosysadmin/tasks/logrotate.yml rename to autosysadmin-agent/tasks/logrotate.yml index d4fe7a5c..bf1e55b4 100644 --- a/autosysadmin/tasks/logrotate.yml +++ b/autosysadmin-agent/tasks/logrotate.yml @@ -1,10 +1,8 @@ --- - name: Copy logrotate configuration for autosysadmin ansible.builtin.copy: - src: "files/logrotate_autosysadmin.conf" + src: "files/autosysadmin.logrotate.conf" dest: "/etc/logrotate.d/autosysadmin" owner: root group: root mode: "0644" - tags: - - autosysadmin diff --git a/autosysadmin-agent/tasks/main.yml b/autosysadmin-agent/tasks/main.yml new file mode 100644 index 00000000..9ac8a7b6 --- /dev/null +++ b/autosysadmin-agent/tasks/main.yml @@ -0,0 +1,31 @@ +--- + +- name: The list of all repair scripts is composed. + set_fact: + autosysadmin_repair_scripts: "{{ lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map('basename') | sort }}" + +- name: Install dependencies + ansible.builtin.include_tasks: dependencies.yml + +- name: Install autosysadmin + ansible.builtin.include_tasks: install.yml + +- name: Crontab configuration + ansible.builtin.include_tasks: crontab.yml + +- name: NRPE configuration + ansible.builtin.include_tasks: nrpe.yml + +- name: sudo configuration + ansible.builtin.include_tasks: sudo.yml + +- name: rsyslog configuration + ansible.builtin.include_tasks: rsyslog.yml + +- name: logrotate configuration + ansible.builtin.include_tasks: logrotate.yml + +- name: Install latest version of dump-server-state + ansible.builtin.include_role: + name: evolinux-base + tasks_from: dump-server-state.yml diff --git a/autosysadmin-agent/tasks/nrpe.yml b/autosysadmin-agent/tasks/nrpe.yml new file mode 100644 index 00000000..b5a31922 --- /dev/null +++ b/autosysadmin-agent/tasks/nrpe.yml @@ -0,0 +1,9 @@ +--- +- name: custom configuration is present + ansible.builtin.template: + src: autosysadmin.nrpe.cfg.j2 + dest: /etc/nagios/nrpe.d/autosysadmin.cfg + group: nagios + mode: "0640" + force: yes + notify: restart nagios-nrpe-server diff --git a/autosysadmin/tasks/rsyslog.yml b/autosysadmin-agent/tasks/rsyslog.yml similarity index 64% rename from autosysadmin/tasks/rsyslog.yml rename to autosysadmin-agent/tasks/rsyslog.yml index 6f0702c5..bb57f24a 100644 --- a/autosysadmin/tasks/rsyslog.yml +++ b/autosysadmin-agent/tasks/rsyslog.yml @@ -1,11 +1,9 @@ --- - name: Copy rsyslog configuration for autosysadmin ansible.builtin.copy: - src: "files/rsyslog_autosysadmin.conf" + src: "files/autosysadmin.rsyslog.conf" dest: "/etc/rsyslog.d/autosysadmin.conf" owner: root group: root mode: "0644" - notify: Restart rsyslog - tags: - - autosysadmin + notify: restart rsyslog diff --git a/autosysadmin/tasks/sudo.yml b/autosysadmin-agent/tasks/sudo.yml similarity index 76% rename from autosysadmin/tasks/sudo.yml rename to autosysadmin-agent/tasks/sudo.yml index 24249ab7..a4fd35be 100644 --- a/autosysadmin/tasks/sudo.yml +++ b/autosysadmin-agent/tasks/sudo.yml @@ -1,9 +1,7 @@ --- - name: Add autosysadmin sudoers file ansible.builtin.template: - src: sudoers.j2 + src: autosysadmin.sudoers.j2 dest: /etc/sudoers.d/autosysadmin mode: "0600" validate: "visudo -cf %s" - tags: - - autosysadmin diff --git a/autosysadmin-agent/templates/autosysadmin.cf.j2 b/autosysadmin-agent/templates/autosysadmin.cf.j2 new file mode 100644 index 00000000..763958ba --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.cf.j2 @@ -0,0 +1,12 @@ +# This configuration is partially managed by Ansible +# You can change specific values manually, but they may be overridden by Ansible +# +# To be safe, update the hosts_vars/group_vars in the autosysadmin project +# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master +# then use the "agent" playbook to deploy. +# +# Configuration for autosysadmin +# Use this file to change configuration values defined in repair scripts +# To disable all repair scripts : repair_all=off +# To disable "repair_http" : repair_http=off +# \ No newline at end of file diff --git a/autosysadmin-agent/templates/autosysadmin.cron.j2 b/autosysadmin-agent/templates/autosysadmin.cron.j2 new file mode 100644 index 00000000..90823d5e --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.cron.j2 @@ -0,0 +1,7 @@ +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + +# Run each enabled script +*/5 * * * * root run-parts /usr/share/scripts/autosysadmin/restart + +# Clean run log files +@weekly root {{ autosysadmin_agent_bin_dir | mandatory }}/delete_old_logs.sh {{ autosysadmin_agent_log_retention_days | default('365') }} diff --git a/autosysadmin-agent/templates/autosysadmin.nrpe.cfg.j2 b/autosysadmin-agent/templates/autosysadmin.nrpe.cfg.j2 new file mode 100644 index 00000000..c3e1a40c --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.nrpe.cfg.j2 @@ -0,0 +1,8 @@ +# +# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! +# + +# Autosysadmin repair commands +{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %} +command[{{ script }}]=sudo {{ autosysadmin_agent_bin_dir }}/{{ script }} +{% endfor %} \ No newline at end of file diff --git a/autosysadmin-agent/templates/autosysadmin.sudoers.j2 b/autosysadmin-agent/templates/autosysadmin.sudoers.j2 new file mode 100644 index 00000000..f182bb84 --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.sudoers.j2 @@ -0,0 +1,7 @@ +# +# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! +# + +{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %} +nagios ALL = NOPASSWD: {{ autosysadmin_agent_bin_dir }}/{{ script }} +{% endfor %} \ No newline at end of file diff --git a/autosysadmin-restart_nrpe/defaults/main.yml b/autosysadmin-restart_nrpe/defaults/main.yml new file mode 100644 index 00000000..3d743a1b --- /dev/null +++ b/autosysadmin-restart_nrpe/defaults/main.yml @@ -0,0 +1,8 @@ +--- + +general_scripts_dir: "/usr/share/scripts" + +restart_nrpe_path: "{{ general_scripts_dir }}/autosysadmin/restart/restart_nrpe" + +# Change this to customize the RUNNING value in the script +restart_nrpe_running: Null diff --git a/autosysadmin-restart_nrpe/files/upstream/restart_nrpe b/autosysadmin-restart_nrpe/files/upstream/restart_nrpe new file mode 100755 index 00000000..b2dd7f44 --- /dev/null +++ b/autosysadmin-restart_nrpe/files/upstream/restart_nrpe @@ -0,0 +1,105 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1 + +## Possible values for RUNNING : +## never => disabled +## always => enabled +## nwh-fr => enabled during non-working-hours in France +## nwh-ca => enabled during non-working-hours in Canada (not supported yet) +## custom => enabled if `running_custom()` function return 0, otherwise disabled. + +# shellcheck disable=SC2034 +RUNNING="nwh-fr" + +## The name of the service, mainly for logging +service_name="nagios-nrpe-server" +## The SysVinit script name +sysvinit_script="${service_name}" +## The systemd service name +systemd_service="${service_name}.service" + +is_service_alive() { + ## this must return 0 if the service is alive, otherwise return 1 + ## Example: + pgrep -u nagios nrpe > /dev/null +} + +## Action for SysVinit system +sysvinit_action() { + # Save service status before restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status" +} + +## Action for systemd system +systemd_action() { + # Save service status before restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + # systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start + # so we check the status explicitly + timeout 20 systemctl restart "${systemd_service}" > /dev/null \ + && sleep 1 \ + && systemctl status "${systemd_service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status" +} + +# Should we run? +if ! is_supposed_to_run; then + # log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})." + exit 0 +fi +if is_service_alive; then + # log_global "${service_name} process alive. Aborting" + exit 0 +fi + +# Yes we do, so check for sysvinit or systemd +if is_debian_version "8" "<="; then + if ! is_sysvinit_enabled "*${sysvinit_script}*"; then + # log_global "${service_name} not enabled. Aborting" + exit 0 + fi + + # Let's finally do the action + pre_restart + sysvinit_action + post_restart +else + if ! is_systemd_enabled "${systemd_service}"; then + # log_global "${service_name} is disabled (or missing), nothing left to do." + exit 0 + fi + if is_systemd_active "${systemd_service}"; then + # log_global "${service_name} is active, nothing left to do." + exit 0 + fi + + # Let's finally do the action + pre_restart + systemd_action + post_restart +fi diff --git a/autosysadmin-restart_nrpe/tasks/main.yml b/autosysadmin-restart_nrpe/tasks/main.yml new file mode 100644 index 00000000..7a8ad5b0 --- /dev/null +++ b/autosysadmin-restart_nrpe/tasks/main.yml @@ -0,0 +1,24 @@ +--- + + - name: "Remount /usr if needed" + ansible.builtin.include_role: + name: remount-usr + + - name: "Copy restart_nrpe" + ansible.builtin.copy: + src: upstream/restart_nrpe + dest: "{{ restart_nrpe_path }}" + owner: "root" + group: "root" + mode: "0750" + + - name: "Customize RUNNING value" + ansible.builtin.lineinfile: + path: "{{ restart_nrpe_path }}" + line: "RUNNING=\"{{ restart_nrpe_running }}\"" + regexp: "^ *RUNNING=" + create: False + when: + - restart_nrpe_running is defined + - restart_nrpe_running != None + - restart_nrpe_running | length > 0 diff --git a/autosysadmin/defaults/main.yml b/autosysadmin/defaults/main.yml deleted file mode 100644 index 56190633..00000000 --- a/autosysadmin/defaults/main.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- - -general_scripts_dir: "/usr/share/scripts" -autosysadmin_dir: "{{ general_scripts_dir }}/autosysadmin" - -# Default values for enabled checks -repair_amavis: 'on' -repair_disk: 'on' -repair_elasticsearch: 'on' -repair_http: 'on' -repair_mysql: 'on' -repair_opendkim: 'off' -repair_php_fpm56: 'off' -repair_php_fpm70: 'off' -repair_php_fpm73: 'off' -repair_php_fpm74: 'off' -repair_php_fpm80: 'off' -repair_php_fpm81: 'off' -repair_php_fpm82: 'off' -repair_php_fpm83: 'off' -repair_redis: 'off' -repair_tomcat_instance: 'off' diff --git a/autosysadmin/files/scripts/functions.sh b/autosysadmin/files/scripts/functions.sh deleted file mode 100644 index 95f1a901..00000000 --- a/autosysadmin/files/scripts/functions.sh +++ /dev/null @@ -1,478 +0,0 @@ -#!/bin/bash - -get_system() { - uname -s -} - -get_fqdn() { - if [ "$(get_system)" = "Linux" ]; then - hostname --fqdn - elif [ "$(get_system)" = "OpenBSD" ]; then - hostname - else - log_error_exit "OS not detected!" - fi -} - -get_complete_hostname() { - REAL_HOSTNAME="$(get_fqdn)" - if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then - echo "${HOSTNAME}" - else - echo "${HOSTNAME} (${REAL_HOSTNAME})" - fi -} - -get_evomaintenance_mail() { - email="$(grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2)" - - if [[ -z "$email" ]]; then - email='alert5@evolix.fr' - fi - - echo "${email}" -} - -arguments="${*}" - -get_argument() { - no_found=1 - for argument in ${arguments} ; do - if [ "${argument}" = "${1}" ] ; - then - no_found=0 - fi - done - return ${no_found} -} - -internal_info() { - INTERNAL_INFO="$(printf '%b\n%s' "${INTERNAL_INFO}" "$*")" -} - -log_action() { - log "Action : $*" - ACTIONS="$(printf '%s\n%s' "${ACTIONS}" "$*")" -} - -log() { - INTERNAL_LOG="$(printf '%s\n%s %s %s %s' "${INTERNAL_LOG}" "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*")" - printf '%s %s %s %s\n' "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*" | tee -a "${LOG_DIR}/autosysadmin.log" - echo "$*" | /usr/bin/logger -p local0.notice -t autosysadmin."$0" -} - -log_error_exit() { - log "ERROR : $*" - AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: $*" --no-commit --no-mail - exit 1 -} - -log_check_php_fpm() { - - # Extraire seulement les chiffres du nom du script exécuté - # ./repair_php_fpm81.sh ==> 81 - PHP_VERSION="${0//[^0-9]/}" - - PHP_PATH_POOL=$(find /var/lib/lxc/php"${PHP_VERSION}"/ -type d -name "pool.d") - /usr/local/lib/nagios/plugins/check_phpfpm_multi "${PHP_PATH_POOL}" > "${LOG_DIR}/nrpe.txt" -} - -log_system_status() { - DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)" - - if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then - log "Warning: dump-server-state is not present. No server state recorded...." - fi - - if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then - - # NOTE We don't want the logging to take too much time, so we kill it - # if it take more than 20 seconds. - timeout --signal 9 20 \ - "${DUMP_SERVER_STATE_BIN}" \ - --dump-dir="$LOG_DIR" \ - --df \ - --dmesg \ - --iptables \ - --lxc \ - --netcfg \ - --netstat \ - --uname \ - --processes \ - --systemctl \ - --uptime \ - --virsh \ - --disks \ - --mysql-processes \ - --no-apt-states \ - --no-apt-config \ - --no-dpkg-full \ - --no-dpkg-status \ - --no-mount \ - --no-packages \ - --no-sysctl \ - --no-etc - - log "System status logged in ${LOG_DIR}" - fi -} - -read_log_system_status(){ - files="df.txt dmesg.txt lxc-list.txt netstat-legacy.txt netstat-ss.txt pstree.txt ps.txt systemctl-failed-services.txt" - echo -e "\n\n#### Détails de dump-server-state" - for file in ${files} ; do - echo -e "\n### cat ${LOG_DIR}/${file} :" - tail -n 1000 "${LOG_DIR}"/"${file}" - done -} - -ensure_no_active_users_or_exit() { - if is_debug; then return; fi - - # Is there any active user ? - for user in $(LC_ALL=C who --users|awk '{print $1}'); do - idle_time="$(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}')" - for sameusertime in $(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}'); do - if is_active_user "$sameusertime"; then - hook_mail abort_active_users - log_error_exit 'At least one user was recently active. That requires human intervention. Nothing to do here!' - fi - done - done -} - -is_active_user() { - # Check if a user was active in the last 30 minutes - idle_time="$1" - - if [ "${idle_time}" = "old" ]; - then - return 1 - elif [ "${idle_time}" = "." ]; - then - return 0 - else - hh="$(echo "${idle_time}" | awk -F':' '{print $1}')" - mm="$(echo "${idle_time}" | awk -F':' '{print $2}')" - idle_minutes="$(( 60 * "${hh}" + "${mm}" ))" - if [ "${idle_minutes}" -ge 30 ]; - then - return 1 - else - return 0 - fi - fi -} - -is_debug() { - debug_file="/etc/evolinux/autosysadmin.debug" - - if [ -e "${debug_file}" ]; then - last_change=$(stat -c %Z "${debug_file}") - limit_date=$(date --date "14400 seconds ago" +"%s") - - if [ $(( last_change - limit_date )) -le "0" ]; then - rm "${debug_file}" - else - return 0 - fi - fi - - return 1 -} - -check_nrpe() { - check="$1" - list_command_nrpe=$( grep --exclude=*~ -E "\[${check}\]" -r /etc/nagios/ | grep -v '#command' ) - command_nrpe_primary=$( echo "${list_command_nrpe}" | grep "/etc/nagios/nrpe.d/evolix.cfg" | cut -d'=' -f2- ) - command_nrpe_secondary=$( echo "${list_command_nrpe}" | head -n1 | cut -d'=' -f2- ) - - if [ -z "${command_nrpe_primary}" ] && [ -z "${command_nrpe_secondary}" ] - then - return 1 - else - if [ -n "${command_nrpe_primary}" ] - then - ${command_nrpe_primary} - else - ${command_nrpe_secondary} - fi - fi -} - -acquire_lock_or_exit() { - lockfile="$1" - waittime="$2" - - # si le temps d’attente n’est pas compréhensible par sleep(1), il vaut 0 - if ! echo "${waittime}" | grep -Eq '^[0-9]+[smhd]?$' - then - waittime=0 - fi - - # si le temps d’attente est supérieur à 0 et si le lock existe, on attend - if test "${waittime}" -gt 0 && test -f "${lockfile}" - then - sleep "${waittime}" - fi - - # si le lock existe, on s’arrête - if test -f "${lockfile}" - then - log_error_exit "lock file ${lockfile} exists" - fi - touch "${lockfile}" -} - -is_too_soon() { - if is_debug; then return; fi - - witness="/tmp/autosysadmin_witness_$(basename "$0")" - if test -f "${witness}" - then - compare="$(($(date +%s)-$(stat -c "%Y" "${witness}")))" - if [ "${compare}" -lt 1800 ]; - then - log_error_exit 'already executed less than 30 minutes ago' - fi - rm "${witness}" - fi - touch "${witness}" -} - -init_autosysadmin() { - PATH="${PATH}":/usr/sbin:/sbin↩ - unset ACTIONS - - SCRIPTNAME=$(basename "$0") - PROGNAME=${SCRIPTNAME%.sh} - - RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${SCRIPTNAME}_$(openssl rand -hex 6)" - LOG_DIR="/var/log/autosysadmin/${RUN_ID}" - mkdir -p "${LOG_DIR}" - - log "Autosysadmin : Script ${SCRIPTNAME} triggered" - - # Detect operating system name, version and release↩ - detect_os -} - -load_conf() { - # Load conf and enable script by default. - # To disable script locally, set "$PROGNAME"=off in /etc/evolinux/autosysadmin. - # To disable script globally, set "$PROGNAME"=off in the script, after load_conf() call. - declare -g "$PROGNAME"=on # dynamic variable assignment ($PROGNAME == repair_*) - - # Source configuration file - # shellcheck source=../roles/deploy_autosysadmin/templates/autosysadmin.cfg.j2 - test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin -} - -detect_os() { - # OS detection - DEBIAN_RELEASE="" - LSB_RELEASE_BIN="$(command -v lsb_release)" - - if [ -e /etc/debian_version ]; then - DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)" - if [ -x "${LSB_RELEASE_BIN}" ]; then - DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)" - else - case "${DEBIAN_VERSION}" in - 8) DEBIAN_RELEASE="jessie";; - 9) DEBIAN_RELEASE="stretch";; - 10) DEBIAN_RELEASE="buster";; - 11) DEBIAN_RELEASE="bullseye";; - esac - fi - fi -} - -is_debian_jessie() { - test "${DEBIAN_RELEASE}" = "jessie" -} -is_debian_stretch() { - test "${DEBIAN_RELEASE}" = "stretch" -} -is_debian_buster() { - test "${DEBIAN_RELEASE}" = "buster" -} -is_debian_bullseye() { - test "${DEBIAN_RELEASE}" = "bullseye" -} - -systemd_list_service_failed() { - systemctl list-units --failed --no-legend --full --type=service "$1" | - awk '{print $1}' -} - -systemd_list_units_enabled() { - list_units_enabled=$(systemctl list-unit-files --state=enabled --no-legend | awk "/$1/{print \$1}") - if [ -z "${list_units_enabled}" ] - then - return 1 - else - echo "${list_units_enabled}" - fi -} - -format_mail_success() { - cat < -Content-Type: text/plain; charset=UTF-8 -MIME-Version: 1.0 -Content-Transfer-Encoding: 8bit -X-Script: $(basename "$0") -X-RunId: ${RUN_ID} -To: ${EMAIL_CLIENT:-alert5@evolix.fr} -Cc: autosysadmin@evolix.fr -Subject: [autosysadmin] Intervention sur ${HOSTNAME_TEXT} - -Bonjour, - -Une intervention automatique vient de se terminer. - -Nom du serveur : ${HOSTNAME_TEXT} -Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) - -### Renseignements sur l'intervention - -${ACTIONS} - -### Réagir à cette intervention - -Vous pouvez répondre à ce message (sur l'adresse mail equipe@evolix.net). -En cas d'urgence, utilisez l'adresse maintenance@evolix.fr ou -notre téléphone portable d'astreinte (04.26.99.99.26) - --- -Votre AutoSysadmin -EOTEMPLATE -} - -format_mail_abort_active_users() { - cat < -Content-Type: text/plain; charset=UTF-8 -MIME-Version: 1.0 -Content-Transfer-Encoding: 8bit -X-Script: $(basename "$0") -X-RunId: ${RUN_ID} -To: ${EMAIL_CLIENT:-alert5@evolix.fr} -Cc: autosysadmin@evolix.fr -Subject: [autosysadmin] Intervention interrompue sur ${HOSTNAME_TEXT} - -Bonjour, - -Une intervention automatique a été interrompue en raison -d'un utilisateur actuellement actif sur le serveur. - -Nom du serveur : ${HOSTNAME_TEXT} -Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) - -### Utilisateur(s) connecté(s) -$(w) - --- -Votre AutoSysadmin -EOTEMPLATE -} - -format_mail_internal_info() { - cat < -Content-Type: text/plain; charset=UTF-8 -MIME-Version: 1.0 -Content-Transfer-Encoding: 8bit -X-Script: $(basename "$0") -X-RunId: ${RUN_ID} -To: autosysadmin@evolix.fr -Subject: [autosysadmin] Complements (interne) - Intervention sur ${HOSTNAME_TEXT} - -Bonjour, - -Une intervention automatique vient de se terminer. - -Nom du serveur : ${HOSTNAME_TEXT} -Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) -Script déclenché : $(basename "$0") - -### Actions effectuées - -${ACTIONS} - -### Logs autosysadmin - -${INTERNAL_LOG} - -### Utilisateur(s) connecté(s) - -$(w) - -### Informations additionnelles données par le script $(basename "$0") - -${INTERNAL_INFO} - --- -Votre AutoSysadmin -EOTEMPLATE -} - -hook_mail() { - if is_debug; then return; fi - - HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}" - HOSTNAME_TEXT="$(get_complete_hostname)" - EMAIL_CLIENT="$(get_evomaintenance_mail)" - - MAIL_CONTENT="$(format_mail_"$1")" - - SENDMAIL_BIN="$(command -v sendmail)" - - if [ -z "${SENDMAIL_BIN}" ]; then - log "No \`sendmail' command has been found, can't send mail." - fi - - if [ -x "${SENDMAIL_BIN}" ]; then - echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.net" - fi -} - - - -# We need stable output for gcal, so we force some language environment variables -export TZ=Europe/Paris -export LANGUAGE=fr_FR.UTF-8 - -is_holiday() { - # gcal mark today as a holiday by surrounding with < and > the day - # of the month of that holiday line. For exemple if today is 2022-05-01 we'll - # get among other lines: - # Fête du Travail (FR) + Di, < 1>Mai 2022 - # Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours - gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet -} - -is_weekend() { - day_of_week=$(date +%u) - if [ "$day_of_week" != 6 ] && [ "$day_of_week" != 7 ]; then - return 1 - fi -} - -is_workday() { - if is_holiday || is_weekend; then - return 1 - fi -} - -is_worktime() { - if ! is_workday; then - return 1 - fi - - hour=$(date +%H) - if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then - return 1 - fi -} diff --git a/autosysadmin/files/scripts/repair_amavis.sh b/autosysadmin/files/scripts/repair_amavis.sh deleted file mode 100644 index 5139b927..00000000 --- a/autosysadmin/files/scripts/repair_amavis.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh -# shellcheck source=./restart_amavis.sh -source /usr/share/scripts/autosysadmin/restart_amavis.sh - -init_autosysadmin -load_conf - -test "${repair_amavis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Verify if check_nrpe are not OK -check_nrpe "check_amavis" && log_error_exit 'check_amavis is OK, nothing to do here!' - -# Has it recently been run? -get_argument "--no-delay" || is_too_soon - -lockfile="/run/lock/repair_amavis" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! -restart_amavis - -hook_mail success -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_disk.sh b/autosysadmin/files/scripts/repair_disk.sh deleted file mode 100644 index fc35438c..00000000 --- a/autosysadmin/files/scripts/repair_disk.sh +++ /dev/null @@ -1,173 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_disk:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_disk" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -get_mountpoints() { - # the $(...) get the check_disk1 command - # the cut command selects the critical part of the check_disk1 output - # the grep command extracts the mountpoints and available disk space - # the last cut command selects the mountpoints - $(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-) -e | cut -d'|' -f1 | grep -Eo '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -f1 -d' ' -} - -is_reserved-blocks() { - fs_type="$(findmnt -n --output=fstype "$1")" - if [ "${fs_type}" = "ext4" ]; - then - device="$(findmnt -n --output=source "$1")" - reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" - block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" - percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }") - - log "Reserved blocks for $1 is curently at $percentage%" - if [ "${percentage}" -gt "1" ] - then - log "Allowing tune2fs action to reduce the number of reserved blocks" - return 0 - else - log "Reserved blocks already at or bellow 1%, no automatic action possible" - return 1 - fi - else - log "Filesystem for $1 partition is not ext4" - - return 1 - fi -} - -change_reserved-blocks() { - # We alwasy keep some reserved blocks to avoid missing some logs - # https://gitea.evolix.org/evolix/autosysadmin/issues/22 - tune2fs -m 1 "$(findmnt -n --output=source "$1")" - log_action "Reserved blocks for $1 changed to 1 percent" -} - -is_tmp_to_delete() { - size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" - if [ -n "${size}" ] - then - return 0 - else - return 1 - fi -} - -is_log_to_delete() { - size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" - if [ -n "${size}" ] - then - return 0 - else - return 1 - fi -} - -clean_apt_cache() { - for lxc in $(du -ax /var | sort -nr | head -n10 | grep -E '/var/lib/lxc/php[0-9]+/rootfs/var/cache$' | grep -Eo 'php[0-9]+') - do - lxc-attach --name "${lxc}" -- apt-get clean - log_action '[lxc/'"${lxc}"'] Clean apt cache' - done - case "$(du -sx /var/* | sort -rn | sed 's/^[0-9]\+[[:space:]]\+//;q')" in - '/var/cache') - apt-get clean - log_action 'Clean apt cache' - ;; - esac -} - -clean_amavis_virusmails() { - if du --inodes /var/lib/* | sort -n | tail -n3 | grep -q 'virusmails$' - then - find /var/lib/amavis/virusmails/ -type f -atime +30 -delete - log_action 'Clean /var/lib/amavis/virusmails' - fi -} - -for mountpoint in $(get_mountpoints) -do - case "${mountpoint}" in - /var) - #if is_log_to_delete - #then - # find /var/log/ -type f -mtime +365 -delete - # log_action "$size Mo of disk space freed in /var" - #fi - if is_reserved-blocks /var - then - change_reserved-blocks /var - clean_apt_cache - clean_amavis_virusmails - hook_mail success - fi - ;; - /tmp) - #if is_tmp_to_delete - #then - # find /tmp/ -type f -ctime +1 -delete - # log_action "$size Mo of disk space freed in /tmp" - #fi - if is_reserved-blocks /tmp - then - change_reserved-blocks /tmp - hook_mail success - fi - ;; - /home) - if is_reserved-blocks /home - then - change_reserved-blocks /home - hook_mail success - fi - ;; - /srv) - if is_reserved-blocks /srv - then - change_reserved-blocks /srv - hook_mail success - fi - ;; - /filer) - if is_reserved-blocks /filer - then - change_reserved-blocks /filer - hook_mail success - fi - ;; - /) - if is_reserved-blocks / - then - change_reserved-blocks / - hook_mail success - # Suggest remove old kernel ? - fi - ;; - *) - # unknown - log 'Unknown partition (or weird case) or nothing to do' - ;; - esac -done - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_elasticsearch.sh b/autosysadmin/files/scripts/repair_elasticsearch.sh deleted file mode 100644 index 3b45c6e0..00000000 --- a/autosysadmin/files/scripts/repair_elasticsearch.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_elasticsearch:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_elasticsearch" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -elasticsearch_is_enabled() { - systemd_list_units_enabled "elasticsearch.service" - -} - -elasticsearch_restart() { - if ! timeout 60 systemctl restart elasticsearch.service > /dev/null - then - log_error_exit 'failed to restart elasticsearch' - fi -} - -# Test functions -test_elasticsearch_process_present() { - pgrep -u elasticsearch > /dev/null -} - -if elasticsearch_is_enabled -then - if ! test_elasticsearch_process_present - then - log_action "Redémarrage de elasticsearch" - elasticsearch_restart - hook_mail success - else - log_error_exit "Elasticsearch process alive. Aborting" - fi -else - log_error_exit "Elasticsearch is not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_http.sh b/autosysadmin/files/scripts/repair_http.sh deleted file mode 100644 index b1642858..00000000 --- a/autosysadmin/files/scripts/repair_http.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_http:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status - -http_detect_service() { - # check whether nginx, apache or both are supposed to be running - if is_debian_jessie; then - find /etc/rc2.d/ - else - systemctl list-unit-files --state=enabled - fi | awk '/nginx/ { nginx = 1 } /apache2/ { apache2 = 1 } END { if (nginx && apache2) { print "both" } else if (nginx) { print "nginx" } else if (apache2) { print "apache2" } }' - # The previous awk command looks for two patterns: "nginx" - # and "apache2". If a line matches the patterns, a variable - # "nginx" or "apache2" is set to 1 (true). The "END" checks - # if one or both patterns has been found. -} - -http_handle_apache() { - # check syntax - if ! apache2ctl -t > /dev/null 2> /dev/null - then - log_error_exit 'apache2 configuration syntax is not valid' - fi - - # try restart - if ! timeout 20 systemctl restart apache2.service > /dev/null 2> /dev/null - then - log_error_exit 'failed to restart apache2' - fi - - log_action "Redémarrage de Apache" - - internal_info "#### grep $(LANG=en_US.UTF-8 date '+%b %d') /home/*/log/error.log /var/log/apache2/*error.log (avec filtrage)" - ERROR_LOG=$(grep "$(LANG=en_US.UTF-8 date '+%b %d')" /home/*/log/error.log /var/log/apache2/*error.log | grep -v -e "Got error 'PHP message:" -e "No matching DirectoryIndex" -e "client denied by server configuration" -e "server certificate does NOT include an ID which matches the server name" ) - internal_info "$ERROR_LOG" - -} - -http_handle_nginx() { - # check syntax - if ! nginx -t > /dev/null 2> /dev/null - then - log_error_exit 'nginx configuration syntax is not valid' - fi - - # try restart - if ! timeout 20 systemctl restart nginx.service > /dev/null 2> /dev/null - then - log_error_exit 'failed to restart nginx' - fi - - log_action "Redémarrage de Nginx" -} - -http_handle_lxc_php() { - # check whether containers are used for PHP and reboot them if so - if systemd_list_units_enabled 'lxc' - then - for php in $(lxc-ls | grep 'php'); do - lxc-stop -n "$php" - lxc-start --daemon -n "$php" - log_action "lxc-fpm - Redémarrage container ${php}" - done - - fi -} - -http_handle_fpm_php() { - # check whether php-fpm is installed and restart it if so - if enabled_units="$(systemd_list_units_enabled "php.*-fpm")" - then - systemctl restart "${enabled_units}" - log_action 'php-fpm - Redémarrage de php-fpm' - fi -} - -case "$(http_detect_service)" in -nginx) - - http_handle_nginx - - http_handle_lxc_php - http_handle_fpm_php - - hook_mail success - hook_mail internal_info - ;; - -apache2) - - http_handle_apache - - http_handle_lxc_php - http_handle_fpm_php - - hook_mail success - hook_mail internal_info - ;; - -both) - - http_handle_nginx - http_handle_apache - - http_handle_lxc_php - http_handle_fpm_php - - hook_mail success - hook_mail internal_info - ;; - -*) - # unknown - log 'nothing to do' - ;; -esac - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_mysql.sh b/autosysadmin/files/scripts/repair_mysql.sh deleted file mode 100644 index f80d5af7..00000000 --- a/autosysadmin/files/scripts/repair_mysql.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_mysql:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_mysql" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status - -mysql_is_enabled() { - if is_debian_jessie - then - find /etc/rc2.d/ -name '*mysql*' > /dev/null - else - systemd_list_units_enabled "mysql.service" - fi -} - -mysql_restart() { - if is_debian_jessie - then - if ! timeout 60 /etc/init.d/mysql restart > /dev/null - then - log_error_exit 'failed to restart mysql' - fi - else - if ! timeout 60 systemctl restart mysql.service > /dev/null - then - log_error_exit 'failed to restart mysql' - fi - fi -} - -# Test functions -test_mysql_process_present() { - pgrep -u mysql mysqld > /dev/null -} - -if mysql_is_enabled -then - if ! test_mysql_process_present - then - log_action "Redémarrage de MySQL" - mysql_restart - hook_mail success - else - log_error_exit "mysqld process alive. Aborting" - fi -else - log_error_exit "MySQL/MariaDB not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_opendkim.sh b/autosysadmin/files/scripts/repair_opendkim.sh deleted file mode 100644 index f7735028..00000000 --- a/autosysadmin/files/scripts/repair_opendkim.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_opendkim:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_opendkim" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -log_system_status - -# Functions dedicated to this repair script - -opendkim_is_enabled() { - systemd_list_units_enabled "opendkim.service" - -} - -opendkim_restart() { - if ! timeout 60 systemctl restart opendkim.service > /dev/null - then - log_error_exit 'failed to restart opendkim' - fi -} - -opendkim_test_process_present() { - pgrep -u opendkim > /dev/null -} - - -# Main logic - -if opendkim_is_enabled -then - if ! opendkim_test_process_present - then - log_action "Redémarrage de opendkim" - opendkim_restart - hook_mail success - else - log_error_exit "opendkim process alive. Aborting" - fi -else - log_error_exit "opendkim is not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm56.sh b/autosysadmin/files/scripts/repair_php_fpm56.sh deleted file mode 100644 index 6c67e0b6..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm56.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm56:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php56 - then - lxc-stop -n php56 - lxc-start --daemon -n php56 - log_action "lxc-fpm - Redémarrage container php56" - - internal_info "#### tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm70.sh b/autosysadmin/files/scripts/repair_php_fpm70.sh deleted file mode 100644 index 5bf8cab2..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm70.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm70:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php70 - then - lxc-stop -n php70 - lxc-start --daemon -n php70 - log_action "lxc-fpm - Redémarrage container php70" - - internal_info "#### tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm73.sh b/autosysadmin/files/scripts/repair_php_fpm73.sh deleted file mode 100644 index 6b2094fd..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm73.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm73:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php73 - then - lxc-stop -n php73 - lxc-start --daemon -n php73 - log_action "lxc-fpm - Redémarrage container php73" - - internal_info "#### tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm74.sh b/autosysadmin/files/scripts/repair_php_fpm74.sh deleted file mode 100644 index dab16a8a..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm74.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm74:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php74 - then - lxc-stop -n php74 - lxc-start --daemon -n php74 - log_action "lxc-fpm - Redémarrage container php74" - - internal_info "#### tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm80.sh b/autosysadmin/files/scripts/repair_php_fpm80.sh deleted file mode 100644 index 35b9e36c..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm80.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm80:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php80 - then - lxc-stop -n php80 - lxc-start --daemon -n php80 - log_action "lxc-fpm - Redémarrage container php80" - - internal_info "#### tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm81.sh b/autosysadmin/files/scripts/repair_php_fpm81.sh deleted file mode 100644 index e567f6aa..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm81.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm81:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php81 - then - lxc-stop -n php81 - lxc-start --daemon -n php81 - log_action "lxc-fpm - Redémarrage container php81" - - internal_info "#### tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm82.sh b/autosysadmin/files/scripts/repair_php_fpm82.sh deleted file mode 100644 index 295abbcd..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm82.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm82:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php82 - then - lxc-stop -n php82 - lxc-start --daemon -n php82 - log_action "lxc-fpm - Redémarrage container php82" - - internal_info "#### tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm83.sh b/autosysadmin/files/scripts/repair_php_fpm83.sh deleted file mode 100644 index 5344c2e4..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm83.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm83:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php83 - then - lxc-stop -n php83 - lxc-start --daemon -n php83 - log_action "lxc-fpm - Redémarrage container php83" - - internal_info "#### tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_redis.sh b/autosysadmin/files/scripts/repair_redis.sh deleted file mode 100644 index be5cfd77..00000000 --- a/autosysadmin/files/scripts/repair_redis.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_redis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_redis" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -handle_redis() { - for service in $(systemd_list_service_failed redis*) - do - # ne rien faire si le service est désactivé - if ! systemctl is-enabled --quiet "${service}" - then - continue - fi - - # ne rien faire si le service est actif - if systemctl is-active --quiet "${service}" - then - continue - fi - - if ! timeout 20 systemctl restart redis.service > /dev/null 2> /dev/null - then - log_error_exit "failed to restart redis ${service}" - fi - - log_action "Redémarrer service ${service}" - done -} - -if ( systemd_list_units_enabled 'redis.*\.service$' ) > /dev/null -then - handle_redis - hook_mail success -else - log 'Error: redis service is not enabled' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_template.sh b/autosysadmin/files/scripts/repair_template.sh deleted file mode 100644 index 33db0aac..00000000 --- a/autosysadmin/files/scripts/repair_template.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -# Comment this line to enable -repair_template=off -test "${repair_template:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_template" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -log_system_status - -# Functions dedicated to this repair script - -template_is_enabled() { - systemd_list_units_enabled "template.service" - -} - -template_restart() { - if ! timeout 60 systemctl restart template.service > /dev/null - then - log_error_exit 'failed to restart template' - fi -} - -template_test_process_present() { - pgrep -u template > /dev/null -} - - -# Main logic - -if template_is_enabled -then - if ! template_test_process_present - then - log_action "Redémarrage de template" - template_restart - hook_mail success - else - log_error_exit "template process alive. Aborting" - fi -else - log_error_exit "template is not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/restart_amavis.sh b/autosysadmin/files/scripts/restart_amavis.sh deleted file mode 100644 index ef8c255d..00000000 --- a/autosysadmin/files/scripts/restart_amavis.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -restart_amavis() { - /etc/init.d/amavis stop 2>/dev/null - /etc/init.d/clamav-freshclam stop 2>/dev/null - /etc/init.d/clamav-daemon stop 2>/dev/null - - if systemctl is-enabled --quiet 'clamav-freshclam.service' - then - freshclam - log_action "Mise à jour des définitions antivirus" - fi - - if systemctl is-enabled --quiet 'clamav-daemon.service' - then - /etc/init.d/clamav-daemon start - log_action "Redémarrage de clamav-daemon" - else - log 'Error, clamav not installed' - fi - - if systemctl is-enabled --quiet 'clamav-freshclam.service' - then - /etc/init.d/clamav-freshclam start - log_action "Redémarrage de clamav-freshclam" - fi - - if systemctl is-enabled --quiet 'amavis.service' - then - /etc/init.d/amavis start - log_action "Redémarrage de amavis" - else - log 'Error, amavis not installed' - fi -} diff --git a/autosysadmin/handlers/main.yml b/autosysadmin/handlers/main.yml deleted file mode 100644 index 2c0b1a7f..00000000 --- a/autosysadmin/handlers/main.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- - -- name: Restart nagios-nrpe-server - ansible.builtin.service: - name: nagios-nrpe-server - state: restarted - -- name: Restart nrpe - ansible.builtin.service: - name: nrpe - state: restarted - -- name: Restart rsyslog - ansible.builtin.service: - name: rsyslog - state: restarted diff --git a/autosysadmin/tasks/autosysadmin_scripts.yml b/autosysadmin/tasks/autosysadmin_scripts.yml deleted file mode 100644 index 4ff1c5be..00000000 --- a/autosysadmin/tasks/autosysadmin_scripts.yml +++ /dev/null @@ -1,61 +0,0 @@ ---- -- name: "Remount /usr if needed" - ansible.builtin.import_role: - name: remount-usr - -- name: Create autosysadmin directory - ansible.builtin.file: - path: "{{ autosysadmin_dir }}" - state: directory - owner: "root" - group: "root" - mode: "0750" - tags: - - autosysadmin - -- name: Copy scripts - ansible.builtin.copy: - src: "files/scripts/{{ item }}" - dest: "{{ autosysadmin_dir }}/{{ item }}" - owner: root - group: root - mode: "0750" - loop: - - "functions.sh" - - "restart_amavis.sh" - - "repair_amavis.sh" - - "repair_disk.sh" - - "repair_elasticsearch.sh" - - "repair_http.sh" - - "repair_mysql.sh" - - "repair_php_fpm56.sh" - - "repair_php_fpm70.sh" - - "repair_php_fpm73.sh" - - "repair_php_fpm74.sh" - - "repair_php_fpm80.sh" - - "repair_php_fpm81.sh" - - "repair_php_fpm82.sh" - - "repair_php_fpm83.sh" - - "repair_tomcat_instance.sh" - tags: - - autosysadmin - -- name: Ensure /etc/evolinux folder exists - ansible.builtin.file: - path: "/etc/evolinux" - state: directory - owner: "root" - group: "root" - mode: "0700" - tags: - - autosysadmin - -- name: Copy the configuration file - ansible.builtin.template: - src: "autosysadmin.cf.j2" - dest: "/etc/evolinux/autosysadmin" - owner: root - group: root - mode: "0640" - tags: - - autosysadmin diff --git a/autosysadmin/tasks/main.yml b/autosysadmin/tasks/main.yml deleted file mode 100644 index 60204162..00000000 --- a/autosysadmin/tasks/main.yml +++ /dev/null @@ -1,37 +0,0 @@ ---- -- name: Install dependencies - ansible.builtin.import_tasks: dependencies.yml - tags: - - autosysadmin - -- name: Install autosysadmin scripts - ansible.builtin.import_tasks: autosysadmin_scripts.yml - tags: - - autosysadmin - -- name: Amend NRPE configuration - ansible.builtin.import_tasks: nrpe.yml - tags: - - autosysadmin - -- name: Amend sudo configuration - ansible.builtin.import_tasks: sudo.yml - tags: - - autosysadmin - -- name: Amend rsyslog configuration - ansible.builtin.import_tasks: rsyslog.yml - tags: - - autosysadmin - -- name: Amend logrotate configuration - ansible.builtin.import_tasks: logrotate.yml - tags: - - autosysadmin - -- name: Install last version of dump-server-state - ansible.builtin.import_role: - name: evolinux-base - tasks_from: dump-server-state.yml - tags: - - autosysadmin diff --git a/autosysadmin/tasks/nrpe.yml b/autosysadmin/tasks/nrpe.yml deleted file mode 100644 index f6f5c78b..00000000 --- a/autosysadmin/tasks/nrpe.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -- name: Custom configuration is present - ansible.builtin.template: - src: autosysadmin.cfg.j2 - dest: /etc/nagios/nrpe.d/autosysadmin.cfg - group: nagios - mode: "0640" - force: true - notify: Restart nagios-nrpe-server - tags: - - autosysadmin diff --git a/autosysadmin/templates/autosysadmin.cf.j2 b/autosysadmin/templates/autosysadmin.cf.j2 deleted file mode 100644 index 0be506c2..00000000 --- a/autosysadmin/templates/autosysadmin.cf.j2 +++ /dev/null @@ -1,74 +0,0 @@ -# -# Ansible managed - DO NOT MODIFY, your changes will be **overwritten** ! -# -# Update the hosts_vars/group_vars on the autosysadmin project -# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master -# - -# Configuration for autosysadmin -# Use this file to change configuration values defined in repair scripts -# Ex : repair_http=off - -{% if repair_amavis == "off" %} -repair_amavis=off -{% endif %} - -{% if repair_disk == "off" %} -repair_disk=off -{% endif %} - -{% if repair_elasticsearch == "off" %} -repair_elasticsearch=off -{% endif %} - -{% if repair_http == "off" %} -repair_http=off -{% endif %} - -{% if repair_mysql == "off" %} -repair_mysql=off -{% endif %} - -{% if repair_opendkim == "off" %} -repair_opendkim=off -{% endif %} - -{% if repair_php_fpm56 == "off" %} -repair_php_fpm56=off -{% endif %} - -{% if repair_php_fpm70 == "off" %} -repair_php_fpm70=off -{% endif %} - -{% if repair_php_fpm73 == "off" %} -repair_php_fpm73=off -{% endif %} - -{% if repair_php_fpm74 == "off" %} -repair_php_fpm74=off -{% endif %} - -{% if repair_php_fpm80 == "off" %} -repair_php_fpm80=off -{% endif %} - -{% if repair_php_fpm81 == "off" %} -repair_php_fpm81=off -{% endif %} - -{% if repair_php_fpm82 == "off" %} -repair_php_fpm82=off -{% endif %} - -{% if repair_php_fpm83 == "off" %} -repair_php_fpm83=off -{% endif %} - -{% if repair_redis == "off" %} -repair_redis=off -{% endif %} - -{% if repair_tomcat_instance == "off" %} -repair_tomcat_instance=off -{% endif %} diff --git a/autosysadmin/templates/autosysadmin.cfg.j2 b/autosysadmin/templates/autosysadmin.cfg.j2 deleted file mode 100644 index fa6fcfd2..00000000 --- a/autosysadmin/templates/autosysadmin.cfg.j2 +++ /dev/null @@ -1,22 +0,0 @@ -# -# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! -# - -# Autosysadmin repair commands -command[repair_amavis]=sudo {{ autosysadmin_dir }}/repair_amavis.sh -command[repair_disk]=sudo {{ autosysadmin_dir }}/repair_disk.sh -command[repair_elasticsearch]=sudo {{ autosysadmin_dir }}/repair_elasticsearch.sh -command[repair_http]=sudo {{ autosysadmin_dir }}/repair_http.sh -command[repair_mysql]=sudo {{ autosysadmin_dir }}/repair_mysql.sh -command[repair_opendkim]=sudo {{ autosysadmin_dir }}/repair_opendkim.sh -command[repair_php_fpm56]=sudo {{ autosysadmin_dir }}/repair_php_fpm56.sh -command[repair_php_fpm70]=sudo {{ autosysadmin_dir }}/repair_php_fpm70.sh -command[repair_php_fpm73]=sudo {{ autosysadmin_dir }}/repair_php_fpm73.sh -command[repair_php_fpm74]=sudo {{ autosysadmin_dir }}/repair_php_fpm74.sh -command[repair_php_fpm80]=sudo {{ autosysadmin_dir }}/repair_php_fpm80.sh -command[repair_php_fpm81]=sudo {{ autosysadmin_dir }}/repair_php_fpm81.sh -command[repair_php_fpm82]=sudo {{ autosysadmin_dir }}/repair_php_fpm82.sh -command[repair_php_fpm83]=sudo {{ autosysadmin_dir }}/repair_php_fpm83.sh -command[repair_redis]=sudo {{ autosysadmin_dir }}/repair_redis.sh -command[repair_tomcat_instance]=sudo {{ autosysadmin_dir }}/repair_tomcat_instance.sh - diff --git a/autosysadmin/templates/sudoers.j2 b/autosysadmin/templates/sudoers.j2 deleted file mode 100644 index 0a458292..00000000 --- a/autosysadmin/templates/sudoers.j2 +++ /dev/null @@ -1,21 +0,0 @@ -# -# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! -# - -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_amavis.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_disk.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_elasticsearch.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_http.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_mysql.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_opendkim.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm56.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm70.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm73.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm74.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm80.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm81.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm82.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm83.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_redis.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_tomcat_instance.sh - diff --git a/certbot/files/hooks/deploy/hapee.sh b/certbot/files/hooks/deploy/hapee.sh index 89b04452..d39da25b 100644 --- a/certbot/files/hooks/deploy/hapee.sh +++ b/certbot/files/hooks/deploy/hapee.sh @@ -39,8 +39,8 @@ concat_files() { chown root: "${hapee_cert_file}" } cert_and_key_mismatch() { - hapee_cert_md5=$(openssl x509 -noout -modulus -in "${hapee_cert_file}" | openssl md5) - hapee_key_md5=$(openssl rsa -noout -modulus -in "${hapee_cert_file}" | openssl md5) + hapee_cert_md5=$(openssl x509 -noout -pubkey -in "${hapee_cert_file}" | openssl md5) + hapee_key_md5=$(openssl pkey -noout -pubout -in "${hapee_cert_file}" | openssl md5) test "${hapee_cert_md5}" != "${hapee_key_md5}" } diff --git a/certbot/files/hooks/deploy/haproxy.sh b/certbot/files/hooks/deploy/haproxy.sh index 932a3e90..c08fafc2 100644 --- a/certbot/files/hooks/deploy/haproxy.sh +++ b/certbot/files/hooks/deploy/haproxy.sh @@ -29,8 +29,8 @@ concat_files() { chown root: "${haproxy_cert_file}" } cert_and_key_mismatch() { - haproxy_cert_md5=$(openssl x509 -noout -modulus -in "${haproxy_cert_file}" | openssl md5) - haproxy_key_md5=$(openssl rsa -noout -modulus -in "${haproxy_cert_file}" | openssl md5) + haproxy_cert_md5=$(openssl x509 -noout -pubkey -in "${haproxy_cert_file}" | openssl md5) + haproxy_key_md5=$(openssl pkey -noout -pubout -in "${haproxy_cert_file}" | openssl md5) test "${haproxy_cert_md5}" != "${haproxy_key_md5}" } diff --git a/certbot/files/hooks/deploy/nrpe.sh b/certbot/files/hooks/deploy/nrpe.sh new file mode 100644 index 00000000..578d6764 --- /dev/null +++ b/certbot/files/hooks/deploy/nrpe.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +error() { + >&2 echo "${PROGNAME}: $1" + exit 1 +} +debug() { + if [ "${VERBOSE}" = "1" ] && [ "${QUIET}" != "1" ]; then + >&2 echo "${PROGNAME}: $1" + fi +} +daemon_found_and_running() { + test -n "$(pidof nrpe)" +} +letsencrypt_lineaged_used() { + grep -r "^ssl_cert_file" /etc/nagios/ | grep "letsencrypt" | grep -q "$(basename "${RENEWED_LINEAGE}")" +} +copy_letsencrypt_cert() { + DEST_CERTIFICATE=$(grep -r "^ssl_cert_file" /etc/nagios/ | awk -F'=' '{print $2}') + DEST_PRIVATE_KEY=$(grep -r "^ssl_privatekey_file" /etc/nagios/ | awk -F'=' '{print $2}') + + install --mode 440 --group nagios ${RENEWED_LINEAGE}/fullchain.pem ${DEST_CERTIFICATE} + install --mode 440 --group nagios ${RENEWED_LINEAGE}/privkey.pem ${DEST_PRIVATE_KEY} +} +main() { + if daemon_found_and_running; then + if letsencrypt_lineaged_used; then + debug "NRPE detected... Copying certificates to the right place & permissions" + copy_letsencrypt_cert + debug "Restarting NRPE" + systemctl restart nagios-nrpe-server + else + debug "NRPE doesn't use the given Let's Encrypt certificate. Skip." + fi + else + debug "NRPE is not running or missing. Skip." + fi +} + +readonly PROGNAME=$(basename "$0") +readonly VERBOSE=${VERBOSE:-"0"} +readonly QUIET=${QUIET:-"0"} + +main \ No newline at end of file diff --git a/evolinux-base/tasks/main.yml b/evolinux-base/tasks/main.yml index d27b69eb..456207df 100644 --- a/evolinux-base/tasks/main.yml +++ b/evolinux-base/tasks/main.yml @@ -147,9 +147,14 @@ tags: - postfix -- name: Autosysadmin +- name: Autosysadmin (agent) ansible.builtin.include_role: - name: 'evolix/autosysadmin' + name: 'evolix/autosysadmin-agent' + when: evolinux_autosysadmin_include | bool + +- name: Autosysadmin (restart_nrpe) + ansible.builtin.include_role: + name: 'evolix/autosysadmin-restart_nrpe' when: evolinux_autosysadmin_include | bool - name: fail2ban diff --git a/kvm-host/defaults/main.yml b/kvm-host/defaults/main.yml index 9cbdd9a3..981f2429 100644 --- a/kvm-host/defaults/main.yml +++ b/kvm-host/defaults/main.yml @@ -10,4 +10,5 @@ kvm_pair: null lvm_filter: - '"a|^/dev/sd[a-zA-Z]+[0-9]*$|"' - '"a|^/dev/nvme[0-9]+(n[0-9]+)?(p[0-9]+)?$|"' - - '"a|^/dev/md[0-9]+$|"' \ No newline at end of file + - '"a|^/dev/md[0-9]+$|"' +kvm_drbd_interface: null diff --git a/kvm-host/tasks/firewall.yml b/kvm-host/tasks/firewall.yml new file mode 100644 index 00000000..328d045c --- /dev/null +++ b/kvm-host/tasks/firewall.yml @@ -0,0 +1,9 @@ +--- +- name: Allow all traffic through DRBD interface + ansible.builtin.lineinfile: + path: /etc/minifirewall.d/drbd + line: "/sbin/iptables -I INPUT -p tcp -i {{ kvm_drbd_interface }} -j ACCEPT" + create: yes + when: + - kvm_drbd_interface is defined + - kvm_drbd_interface | length > 0 diff --git a/kvm-host/tasks/main.yml b/kvm-host/tasks/main.yml index 7aa3bdc2..ae0108cd 100644 --- a/kvm-host/tasks/main.yml +++ b/kvm-host/tasks/main.yml @@ -16,3 +16,5 @@ - ansible.builtin.include: images.yml - ansible.builtin.include: tools.yml + +- ansible.builtin.include: firewall.yml diff --git a/lxc-php/files/sury.gpg b/lxc-php/files/sury.gpg index 384771a0..28043b0a 100644 Binary files a/lxc-php/files/sury.gpg and b/lxc-php/files/sury.gpg differ diff --git a/nagios-nrpe/templates/evolix.cfg.j2 b/nagios-nrpe/templates/evolix.cfg.j2 index 1f39bcff..d725bb3b 100644 --- a/nagios-nrpe/templates/evolix.cfg.j2 +++ b/nagios-nrpe/templates/evolix.cfg.j2 @@ -91,8 +91,8 @@ command[check_php-fpm83]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi command[check_dhcp_pool]={{ nagios_plugins_directory }}/check_dhcp_pool command[check_ssl_local]={{ nagios_plugins_directory }}/check_ssl_local command[check_pressure_cpu]=/usr/lib/nagios/plugins/check_pressure --cpu -w 100000 -c 500000 -command[check_pressure_mem]=/usr/lib/nagios/plugins/check_pressure --mem -w 100000 -c 500000 -command[check_pressure_io]=/usr/lib/nagios/plugins/check_pressure --io -w 100000 -c 500000 +command[check_pressure_mem]=/usr/lib/nagios/plugins/check_pressure --mem --full -w 100000 -c 500000 +command[check_pressure_io]=/usr/lib/nagios/plugins/check_pressure --io --full -w 100000 -c 500000 # Check HTTP "many". Use this to check many websites (http, https, ports, sockets and SSL certificates). # Beware! All checks must not take more than 10s! diff --git a/openvpn/files/check_openvpn_certificates.sh b/openvpn/files/check_openvpn_certificates.sh index 26808868..1ec3aaed 100644 --- a/openvpn/files/check_openvpn_certificates.sh +++ b/openvpn/files/check_openvpn_certificates.sh @@ -35,6 +35,7 @@ fi # Dates in seconds _15_days="1296000" _30_days="2592000" +_60_days="5184000" current_date=$($date_cmd +"%s") # Trying to define the OpenVPN conf file location - default to /etc/openvpn/server.conf @@ -90,15 +91,15 @@ test_ca_expiration() { if [ $current_date -ge $1 ]; then CA_ECHO="CRITICAL - The server CA has expired on $formated_ca_expiration_date" CA_STATE=$STATE_CRITICAL - # Expiration in 15 days or less - CA file - elif [ $((current_date+_15_days)) -ge $1 ]; then - CA_ECHO="CRITICAL - The server CA expires in 15 days or less : $formated_ca_expiration_date" - CA_STATE=$STATE_CRITICAL # Expiration in 30 days or less - CA file elif [ $((current_date+_30_days)) -ge $1 ]; then - CA_ECHO="WARNING - The server CA expires in 30 days or less : $formated_ca_expiration_date" + CA_ECHO="CRITICAL - The server CA expires in 30 days or less : $formated_ca_expiration_date" + CA_STATE=$STATE_CRITICAL + # Expiration in 60 days or less - CA file + elif [ $((current_date+_60_days)) -ge $1 ]; then + CA_ECHO="WARNING - The server CA expires in 60 days or less : $formated_ca_expiration_date" CA_STATE=$STATE_WARNING - # Expiration in more than 30 days - CA file + # Expiration in more than 60 days - CA file else CA_ECHO="OK - The server CA expires on $formated_ca_expiration_date" CA_STATE=$STATE_OK @@ -193,8 +194,8 @@ main() { echo $RESTART_ECHO exit $CERT_STATE else - echo $CERT_ECHO echo $CA_ECHO + echo $CERT_ECHO echo $RESTART_ECHO exit $CERT_STATE fi diff --git a/php/files/sury.gpg b/php/files/sury.gpg index 384771a0..28043b0a 100644 Binary files a/php/files/sury.gpg and b/php/files/sury.gpg differ diff --git a/redis/files/munin_redis b/redis/files/munin_redis index 55474435..ef3b61ad 100644 --- a/redis/files/munin_redis +++ b/redis/files/munin_redis @@ -1,243 +1,439 @@ #!/usr/bin/perl -w -# -## Copyright (C) 2009 Gleb Voronich -## -## This program is free software; you can redistribute it and/or -## modify it under the terms of the GNU General Public License -## as published by the Free Software Foundation; version 2 dated June, -## 1991. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -## -## -## $Log$ -## -## Based on Redis module code v0.08 2009/from http://svn.rot13.org/index.cgi/Redis -## -## Installation process: -## -## 1. Download the plugin to your plugins directory (e.g. /usr/share/munin/plugins) -## 2. Create 3 symlinks at the directory that us used by munin for plugins detection (e.g. /etc/munin/plugins): redis_connected_clients, redis_per_sec and and redis_used_memory -## 3. Edit plugin-conf.d/munin-node if it is needed (env.host and env.port variables are accepted; set env.password for password protected Redis server) -## 4. Restart munin-node service -## -## Magic Markers -#%# family=auto -#%# capabilities=autoconf suggest +=head CONFIGURATION + + Based on Redis module code v0.08 2009/from http://svn.rot13.org/index.cgi/Redis + + Installation process: + + 1. Download the plugin to your plugins directory (e.g. /usr/share/munin/plugins) + 2. Symlink it to your configuration directory (e.g. ln -s /usr/share/munin/plugins/redis /etc/munin/plugins/redis) + 3. Edit plugin-conf.d/munin-node with the options to connect to your redis instances (see below for an example) + 4. Restart munin-node service + + Example config + [redis] + env.host1 127.0.0.1 + env.port1 6379 + env.password1 password + env.title_prefix1 redis-1 + env.host2 /run/redis.sock + env.title_prefix2 redis-2 + + Each host should be specified with at least a host or unixsocket variable suffixed with + a number, the first being 1, the second being 2 etc. They must be in sequence. + Other options are: + * port - the redis port to connect to + * password - the password to use with the AUTH command + * title_prefix - a prefix to put before the title of the graph, this is strongly recommended for multiple instances + + Graphs: + This generates multigraphs for: + * Connected clients + * Key Hit vs Miss ratio + * Keys per second, hits/misses/expirations/evictions + * Replication backlog + * Replication lag + * Request per second + * Total number of keys and keys with expires + * Used memory + +=head COPYRIGHT + + Copyright (C) 2020 Rowan Wookey + Copyright (C) 2009 Gleb Voronich + +=head LICENSE + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 dated June, + 1991. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +=head MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut use strict; use IO::Socket::INET; use IO::Socket::UNIX; -use Switch; -my $HOST = exists $ENV{'host'} ? $ENV{'host'} : "127.0.0.1"; -my $UNIX_SOCKET = exists $ENV{'unixsocket'} ? $ENV{'unixsocket'} : ''; # path to Redis Unix sock file -my $PORT = exists $ENV{'port'} ? $ENV{'port'} : 6379; -my $PASSWORD = exists $ENV{'password'} ? $ENV{'password'} : undef; -my $TITLE_PREFIX = exists $ENV{'title_prefix'} ? $ENV{'title_prefix'} . ": " : ""; +my %INSTANCES; +my $HOST; +my $PORT; +my $PASSWORD; + +for (my $i = 1; $ENV{"host$i"}; $i++) +{ + $HOST = exists $ENV{"host$i"} ? $ENV{"host$i"} : "127.0.0.1"; + $PORT = exists $ENV{"port$i"} ? $ENV{"port$i"} : 6379; + $PASSWORD = exists $ENV{"password$i"} ? $ENV{"password$i"} : undef; + my $TITLE_PREFIX = exists $ENV{"title_prefix$i"} ? $ENV{"title_prefix$i"} . ": " : ""; + my $SOCK = &get_conn(); + $INSTANCES{"instance$i"} = { + HOST => $HOST, + PORT => $PORT, + PASSWORD => $PASSWORD, + TITLE_PREFIX => $TITLE_PREFIX, + SOCK => $SOCK + }; +} + -my $sock = &get_conn(); my $config = ( defined $ARGV[0] and $ARGV[0] eq "config" ); my $autoconf = ( defined $ARGV[0] and $ARGV[0] eq "autoconf" ); if ( $autoconf ) { - if ( defined( $sock ) ) { + if (!%INSTANCES) { + print "no (no redis instances configured)\n"; + exit 0; + } + my $err = ''; + for my $INSTANCE (keys %INSTANCES) { + if (! defined( $INSTANCES{$INSTANCE}{'SOCK'} ) ) { + $err = "no (unable to connect to ".$INSTANCES{$INSTANCE}{'HOST'}."\[:". $INSTANCES{$INSTANCE}{'PORT'}."\])\n"; + } + } + if ($err) { + print $err; + } else { print "yes\n"; - exit 0; - } else { - print "no (unable to connect to $HOST\[:$PORT\])\n"; - exit 0; - } -} -my $suggest = ( defined $ARGV[0] and $ARGV[0] eq "suggest" ); -if ( $suggest ) { - if ( defined( $sock ) ) { - my @plugins = ('connected_clients', 'key_ratio', 'keys_per_sec', 'per_sec', 'used_keys', 'used_memory'); - foreach my $plugin (@plugins) { - print "$plugin\n"; - } - exit 0; - } else { - print "no (unable to connect to $HOST\[:$PORT\])\n"; - exit 0; } + exit 0; } -my $hash=&get_info(); +my $total = 0; -$0 =~ s/(.+)redis_//g; +my $multi_graph_output = ''; +my $instance_graph_output = ''; -switch ($0) { - case "connected_clients" { - if ( $config ) { - my $maxclients= get_config("maxclients")->{"maxclients"}; - print "graph_title ${TITLE_PREFIX}Connected clients\n"; - print "graph_vlabel Connected clients\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - print "connected_clients.line $maxclients:ff0000:Limit\n"; - print "connected_clients.label connected clients\n"; - exit 0; +my $connected_clients = 0; +my $keyspace_hits = 0; +my $keyspace_misses = 0; +my $expired_keys = 0; +my $evicted_keys = 0; +my $total_commands_processed = 0; +my $total_connections_received = 0; +my $repl_backlog_size = 0; +my $used_memory = 0; +my $used_memory_rss = 0; +my $used_memory_peak = 0; +my $total_keys = 0; +my $total_expires = 0; +foreach my $INSTANCE (keys %INSTANCES) { + + my $sock = $INSTANCES{$INSTANCE}{'SOCK'}; + my $TITLE_PREFIX = $INSTANCES{$INSTANCE}{'TITLE_PREFIX'}; + my $hash = get_info($sock); + + my $dbs; + foreach my $key (keys %{$hash}) { + if ( $key =~ /^db\d+$/ && $hash->{$key} =~ /keys=(\d+),expires=(\d+)/ ) { + $total_keys += $1; + $total_expires += $2; + $dbs->{$key} = [ $1, $2 ]; } - - print "connected_clients.value " . $hash->{'connected_clients'} . "\n"; } - - case "keys_per_sec" { - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Keys Per Second\n"; - print "graph_vlabel per \${graph_period}\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - print "hits.label hits\n"; - print "hits.type COUNTER\n"; - print "misses.label misses\n"; - print "misses.type COUNTER\n"; - print "expired.label expirations\n"; - print "expired.type COUNTER\n"; - print "evictions.label evictions\n"; - print "evictions.type COUNTER\n"; - exit 0; + if ( $config ) { + my $ret = get_config("maxclients", $sock); + # if the CONFIG command is disabled we don't show the max clients + my $maxclients = defined $ret ? $ret->{"maxclients"} : 0; + $instance_graph_output .= "multigraph redis_connected_clients.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Connected clients\n"; + $instance_graph_output .= "graph_vlabel Connected clients\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + if ($maxclients) { + $instance_graph_output .= "connected_clients.line $maxclients:ff0000:Limit\n"; } - - print "hits.value " . $hash->{'keyspace_hits'} . "\n"; - print "misses.value " . $hash->{'keyspace_misses'} . "\n"; - print "expired.value " . $hash->{'expired_keys'} . "\n"; - print "evictions.value " . $hash->{'evicted_keys'} . "\n"; - } - - case "key_ratio" { - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Key Hit vs Miss Ratio\n"; - print "graph_vlabel per \${graph_period}\n"; - print "graph_category search\n"; - print "graph_args -u 100 -l 0 -r --base 1000\n"; - print "hitratio.label hit ratio\n"; - print "hitratio.type GAUGE\n"; - print "hitratio.draw AREA\n"; - print "missratio.label miss ratio\n"; - print "missratio.type GAUGE\n"; - print "missratio.draw STACK\n"; - exit 0; - } - - my $total = $hash->{'keyspace_hits'} + $hash->{'keyspace_misses'}; - my $hitratio = 0; - my $missratio = 0; - if ($total > 0) { - $hitratio = $hash->{'keyspace_hits'} / $total * 100; - $missratio = $hash->{'keyspace_misses'} / $total * 100; - } - printf("hitratio.value %.2f\n", $hitratio); - printf("missratio.value %.2f\n", $missratio); - } - - - case "per_sec" { - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Per second\n"; - print "graph_vlabel per \${graph_period}\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - print "requests.label requests\n"; - print "requests.type COUNTER\n"; - print "connections.label connections\n"; - print "connections.type COUNTER\n"; - exit 0; - } - - print "requests.value ". $hash->{'total_commands_processed'} ."\n"; - print "connections.value ". $hash->{'total_connections_received'} ."\n"; - } - - - case "used_memory" { - if ( $config ) { - my $maxmemory = get_config("maxmemory")->{"maxmemory"}; - print "graph_title ${TITLE_PREFIX}Used memory\n"; - print "graph_vlabel Used memory\n"; - print "graph_category search\n"; - print "graph_args -l 0 --base 1024\n"; - print "used_memory.line $maxmemory:ff0000:Limit\n"; - print "used_memory.label used memory\n"; - print "used_memory_peak.label used memory in peak\n"; - print "used_memory_rss.label Resident set size memory usage\n"; - exit 0; - } - - print "used_memory.value ". $hash->{'used_memory'} ."\n"; - print "used_memory_rss.value ". $hash->{'used_memory_rss'} ."\n"; - print "used_memory_peak.value ". $hash->{'used_memory_peak'} ."\n"; - } - - case "used_keys" { - my $dbs; - foreach my $key (keys %{$hash}) { - if ( $key =~ /^db\d+$/ && $hash->{$key} =~ /keys=(\d+),expires=(\d+)/ ) { - $dbs->{$key} = [ $1, $2 ]; - } - } - - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Used keys\n"; - print "graph_vlabel Used keys\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - - foreach my $db (keys %{$dbs}) { - printf "%s_keys.label %s keys\n", $db, $db; - printf "%s_expires.label %s expires\n", $db, $db; - } - - exit 0; + $instance_graph_output .= "connected_clients.label connected clients\n"; + $instance_graph_output .= "multigraph keys_per_sec.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Keys Per Second\n"; + $instance_graph_output .= "graph_vlabel per \${graph_period}\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "hits.label hits\n"; + $instance_graph_output .= "hits.type COUNTER\n"; + $instance_graph_output .= "misses.label misses\n"; + $instance_graph_output .= "misses.type COUNTER\n"; + $instance_graph_output .= "expired.label expirations\n"; + $instance_graph_output .= "expired.type COUNTER\n"; + $instance_graph_output .= "evicted_keys.label evictions\n"; + $instance_graph_output .= "evicted_keys.type COUNTER\n"; + $instance_graph_output .= "multigraph redis_key_ratio.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Key Hit vs Miss Ratio\n"; + $instance_graph_output .= "graph_vlabel per \${graph_period}\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -u 100 -l 0 -r --base 1000\n"; + $instance_graph_output .= "hitratio.label hit ratio\n"; + $instance_graph_output .= "hitratio.type GAUGE\n"; + $instance_graph_output .= "hitratio.draw AREA\n"; + $instance_graph_output .= "missratio.label miss ratio\n"; + $instance_graph_output .= "missratio.type GAUGE\n"; + $instance_graph_output .= "missratio.draw STACK\n"; + $instance_graph_output .= "multigraph redis_per_sec.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Requests Per second\n"; + $instance_graph_output .= "graph_vlabel per \${graph_period}\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "requests.label requests\n"; + $instance_graph_output .= "requests.type COUNTER\n"; + $instance_graph_output .= "connections.label connections\n"; + $instance_graph_output .= "connections.type COUNTER\n"; + $instance_graph_output .= "multigraph redis_repl_backlog_size.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}replication backlog\n"; + $instance_graph_output .= "graph_vlabel replication backlog\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "repl_backlog_size.label bytes behind master\n"; + $instance_graph_output .= "multigraph redis_repl_lag.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}replication lag\n"; + $instance_graph_output .= "graph_vlabel replication lag\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "repl_backlog_size.label amount behind master\n"; + # if the CONFIG command is disabled we don't show maxmemory + $ret = get_config("maxmemory", $sock); + my $maxmemory = defined $ret ? $ret->{"maxmemory"} : 0; + $instance_graph_output .= "multigraph redis_used_memory.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Used memory\n"; + $instance_graph_output .= "graph_vlabel Used memory\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0 --base 1024\n"; + if ($maxmemory) { + $instance_graph_output .= "used_memory.line $maxmemory:ff0000:Limit\n"; } + $instance_graph_output .= "used_memory.label used memory\n"; + $instance_graph_output .= "used_memory_peak.label used memory in peak\n"; + $instance_graph_output .= "used_memory_rss.label Resident set size memory usage\n"; + $instance_graph_output .= "multigraph redis_used_keys.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Used keys\n"; + $instance_graph_output .= "graph_vlabel Used keys\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; foreach my $db (keys %{$dbs}) { - printf "%s_keys.value %d\n", $db, $dbs->{$db}[0]; - printf "%s_expires.value %d\n", $db, $dbs->{$db}[1]; + $instance_graph_output .= sprintf "%s_keys.label %s keys\n", $db, $db; + $instance_graph_output .= sprintf "%s_expires.label %s expires\n", $db, $db; } + + next; } + + $instance_graph_output .= "multigraph redis_connected_clients.$INSTANCE\n"; + $instance_graph_output .= "connected_clients.value " . $hash->{'connected_clients'} . "\n"; + $connected_clients += $hash->{'connected_clients'}; + $instance_graph_output .= "multigraph keys_per_sec.$INSTANCE\n"; + $instance_graph_output .= "hits.value " . $hash->{'keyspace_hits'} . "\n"; + $keyspace_hits += $hash->{'keyspace_hits'}; + $instance_graph_output .= "misses.value " . $hash->{'keyspace_misses'} . "\n"; + $keyspace_misses += $hash->{'keyspace_misses'}; + $instance_graph_output .= "expired.value " . $hash->{'expired_keys'} . "\n"; + $expired_keys += $hash->{'expired_keys'}; + $instance_graph_output .= "evicted_keys.value " . $hash->{'evicted_keys'} . "\n"; + $evicted_keys += $hash->{'evicted_keys'}; + $instance_graph_output .= "multigraph redis_key_ratio.$INSTANCE\n"; + my $total = $hash->{'keyspace_hits'} + $hash->{'keyspace_misses'}; + my $hitratio = 0; + my $missratio = 0; + if ($total > 0) { + $hitratio = $hash->{'keyspace_hits'} / $total * 100; + $missratio = $hash->{'keyspace_misses'} / $total * 100; + } + $instance_graph_output .= sprintf("hitratio.value %.2f\n", $hitratio); + $instance_graph_output .= sprintf("missratio.value %.2f\n", $missratio); + $instance_graph_output .= "multigraph redis_per_sec.$INSTANCE\n"; + $instance_graph_output .= "requests.value ". $hash->{'total_commands_processed'} ."\n"; + $total_commands_processed += $hash->{'total_commands_processed'}; + $instance_graph_output .= "connections.value ". $hash->{'total_connections_received'} ."\n"; + $total_connections_received += $hash->{'total_connections_received'}; + $instance_graph_output .= "multigraph redis_repl_backlog_size.$INSTANCE\n"; + $instance_graph_output .= "repl_backlog_size.value " . $hash->{'repl_backlog_size'} . "\n"; + $repl_backlog_size += $hash->{'repl_backlog_size'}; + + $instance_graph_output .= "multigraph redis_repl_lag.$INSTANCE\n"; + if (exists $hash->{slave0} && $hash->{slave0} =~ /lag=(\d+)/) { + $repl_backlog_size += $1; + $instance_graph_output .= "repl_backlog_size.value " . $1 . "\n"; + } else { + $instance_graph_output .= "repl_backlog_size.value 0\n"; + } + + + $instance_graph_output .= "multigraph redis_used_memory.$INSTANCE\n"; + $instance_graph_output .= "used_memory.value ". $hash->{'used_memory'} ."\n"; + + $used_memory += $hash->{'used_memory'}; + $instance_graph_output .= "used_memory_rss.value ". $hash->{'used_memory_rss'} ."\n"; + $used_memory_rss += $hash->{'used_memory_rss'}; + $instance_graph_output .= "used_memory_peak.value ". $hash->{'used_memory_peak'} ."\n"; + $used_memory_peak += $hash->{'used_memory_peak'}; + + $instance_graph_output .= "multigraph redis_used_keys.$INSTANCE\n"; + foreach my $db (keys %{$dbs}) { + $instance_graph_output .= sprintf "%s_keys.value %d\n", $db, $dbs->{$db}[0]; + $instance_graph_output .= sprintf "%s_expires.value %d\n", $db, $dbs->{$db}[1]; + } + close ($sock); } -close ($sock); +# multigraph output +if ($config) { + $multi_graph_output .= "multigraph redis_connected_clients\n"; + $multi_graph_output .= "graph_title Connected clients\n"; + $multi_graph_output .= "graph_vlabel Connected clients\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "connected_clients.label connected clients\n"; + $multi_graph_output .= "multigraph keys_per_sec\n"; + $multi_graph_output .= "graph_title Keys Per Second\n"; + $multi_graph_output .= "graph_vlabel per \${graph_period}\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "hits.label hits\n"; + $multi_graph_output .= "hits.type COUNTER\n"; + $multi_graph_output .= "misses.label misses\n"; + $multi_graph_output .= "misses.type COUNTER\n"; + $multi_graph_output .= "expired.label expirations\n"; + $multi_graph_output .= "expired.type COUNTER\n"; + $multi_graph_output .= "evicted_keys.label evictions\n"; + $multi_graph_output .= "evicted_keys.type COUNTER\n"; + $multi_graph_output .= "multigraph redis_key_ratio\n"; + $multi_graph_output .= "graph_title Key Hit vs Miss Ratio\n"; + $multi_graph_output .= "graph_vlabel per \${graph_period}\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -u 100 -l 0 -r --base 1000\n"; + $multi_graph_output .= "hitratio.label hit ratio\n"; + $multi_graph_output .= "hitratio.type GAUGE\n"; + $multi_graph_output .= "hitratio.draw AREA\n"; + $multi_graph_output .= "missratio.label miss ratio\n"; + $multi_graph_output .= "missratio.type GAUGE\n"; + $multi_graph_output .= "missratio.draw STACK\n"; + $multi_graph_output .= "multigraph redis_per_sec\n"; + $multi_graph_output .= "graph_title Requests Per second\n"; + $multi_graph_output .= "graph_vlabel per \${graph_period}\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "requests.label requests\n"; + $multi_graph_output .= "requests.type COUNTER\n"; + $multi_graph_output .= "connections.label connections\n"; + $multi_graph_output .= "connections.type COUNTER\n"; + $multi_graph_output .= "multigraph redis_repl_backlog_size\n"; + $multi_graph_output .= "graph_title replication backlog\n"; + $multi_graph_output .= "graph_vlabel replication backlog\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "repl_backlog_size.label bytes behind master\n"; + $multi_graph_output .= "multigraph redis_repl_lag\n"; + $multi_graph_output .= "graph_title replication lag\n"; + $multi_graph_output .= "graph_vlabel replication lag\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "repl_backlog_size.label amount behind master\n"; + $multi_graph_output .= "multigraph redis_used_memory\n"; + $multi_graph_output .= "graph_title Used memory\n"; + $multi_graph_output .= "graph_vlabel Used memory\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0 --base 1024\n"; + $multi_graph_output .= "used_memory.label used memory\n"; + $multi_graph_output .= "used_memory_peak.label used memory in peak\n"; + $multi_graph_output .= "used_memory_rss.label Resident set size memory usage\n"; + $multi_graph_output .= "multigraph redis_used_keys\n"; + $multi_graph_output .= "graph_title Used keys\n"; + $multi_graph_output .= "graph_vlabel Used keys\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "total_keys.label Total keys\n"; + $multi_graph_output .= "total_expires.label Total expires\n"; +} else { + + $multi_graph_output .= "multigraph redis_connected_clients\n"; + $multi_graph_output .= "connected_clients.value " . $connected_clients . "\n"; + $multi_graph_output .= "multigraph keys_per_sec\n"; + $multi_graph_output .= "hits.value " . $keyspace_hits . "\n"; + $multi_graph_output .= "misses.value " . $keyspace_misses . "\n"; + $multi_graph_output .= "expired.value " . $expired_keys . "\n"; + $multi_graph_output .= "evicted_keys.value " . $evicted_keys . "\n"; + $multi_graph_output .= "multigraph redis_key_ratio\n"; + my $total = $keyspace_hits + $keyspace_misses; + my $hitratio = 0; + my $missratio = 0; + if ($total > 0) { + $hitratio = $keyspace_hits / $total * 100; + $missratio = $keyspace_misses / $total * 100; + } + $multi_graph_output .= sprintf("hitratio.value %.2f\n", $hitratio); + $multi_graph_output .= sprintf("missratio.value %.2f\n", $missratio); + $multi_graph_output .= "multigraph redis_per_sec\n"; + $multi_graph_output .= "requests.value ". $total_commands_processed ."\n"; + $multi_graph_output .= "connections.value ". $total_connections_received ."\n"; + $multi_graph_output .= "multigraph redis_repl_backlog_size\n"; + $multi_graph_output .= "repl_backlog_size.value " . $repl_backlog_size . "\n"; + + $multi_graph_output .= "multigraph redis_repl_lag\n"; + $multi_graph_output .= "repl_backlog_size.value " . $repl_backlog_size . "\n"; + + + $multi_graph_output .= "multigraph redis_used_memory\n"; + $multi_graph_output .= "used_memory.value ". $used_memory ."\n"; + + $multi_graph_output .= "used_memory_rss.value ". $used_memory_rss ."\n"; + $multi_graph_output .= "used_memory_peak.value ". $used_memory_peak ."\n"; + + $multi_graph_output .= "multigraph redis_used_keys\n"; + $multi_graph_output .= "total_keys.value $total_keys\n"; + $multi_graph_output .= "total_expires.value $total_expires\n"; + +} +print $multi_graph_output; +print $instance_graph_output; sub get_conn { - + my $sock; - - if( $UNIX_SOCKET && -S $UNIX_SOCKET ){ - - $sock = IO::Socket::UNIX->new( - Type => SOCK_STREAM(), - Peer => $UNIX_SOCKET, - ); - + + if(-S $HOST ){ + + $sock = IO::Socket::UNIX->new( + Type => SOCK_STREAM(), + Peer => $HOST, + ); }else{ - - $sock = IO::Socket::INET->new( - PeerAddr => $HOST, - PeerPort => $PORT, - Timeout => 10, - Proto => 'tcp' - ); + + $sock = IO::Socket::INET->new( + PeerAddr => $HOST, + PeerPort => $PORT, + Timeout => 10, + Proto => 'tcp' + ); } - + + if (! defined($sock)) { + die "can't read socket: $!"; + } + if ( defined( $PASSWORD ) ) { print $sock "AUTH ", $PASSWORD, "\r\n"; my $result = <$sock> || die "can't read socket: $!"; } + return $sock; } sub get_info{ + my $sock = $_[0]; print $sock "INFO\r\n"; my $result = <$sock> || die "can't read socket: $!"; @@ -257,13 +453,12 @@ sub get_info{ # This subroutine returns configuration matched to supplied as object sub get_config{ - + my $sock = $_[1]; print $sock "*3\r\n\$6\r\nCONFIG\r\n\$3\r\nGET\r\n\$".length($_[0])."\r\n".$_[0]."\r\n"; # Response will look like like # *2\r\n$9\r\nmaxmemory\r\n$10\r\n3221225472\r\n my $type = <$sock> || die "can't read socket: $!"; - my $conf; if( substr($type,0,1) ne "*" ) { return $conf; diff --git a/redis/tasks/thp.yml b/redis/tasks/thp.yml index 7a215788..133466b7 100644 --- a/redis/tasks/thp.yml +++ b/redis/tasks/thp.yml @@ -23,6 +23,7 @@ path: /etc/sysfs.conf line: kernel/mm/transparent_hugepage/enabled = {{ redis_sysctl_transparent_hugepage_enabled }} regexp: "kernel/mm/transparent_hugepage/enabled" + create: yes tags: - redis - kernel @@ -32,4 +33,4 @@ cmd: "echo '{{ redis_sysctl_transparent_hugepage_enabled }}' >> /sys/kernel/mm/transparent_hugepage/enabled" tags: - redis - - kernel \ No newline at end of file + - kernel