From 7c2fd5e394ff3d7e6b9d37ce8e0c1f6c1cca6534 Mon Sep 17 00:00:00 2001 From: Alexis Ben Miloud--Josselin Date: Wed, 13 Dec 2023 12:21:37 +0100 Subject: [PATCH 01/19] kvm-host: Add firewall rule for DRBD --- kvm-host/defaults/main.yml | 3 ++- kvm-host/tasks/firewall.yml | 9 +++++++++ kvm-host/tasks/main.yml | 2 ++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 kvm-host/tasks/firewall.yml diff --git a/kvm-host/defaults/main.yml b/kvm-host/defaults/main.yml index 9cbdd9a3..981f2429 100644 --- a/kvm-host/defaults/main.yml +++ b/kvm-host/defaults/main.yml @@ -10,4 +10,5 @@ kvm_pair: null lvm_filter: - '"a|^/dev/sd[a-zA-Z]+[0-9]*$|"' - '"a|^/dev/nvme[0-9]+(n[0-9]+)?(p[0-9]+)?$|"' - - '"a|^/dev/md[0-9]+$|"' \ No newline at end of file + - '"a|^/dev/md[0-9]+$|"' +kvm_drbd_interface: null diff --git a/kvm-host/tasks/firewall.yml b/kvm-host/tasks/firewall.yml new file mode 100644 index 00000000..328d045c --- /dev/null +++ b/kvm-host/tasks/firewall.yml @@ -0,0 +1,9 @@ +--- +- name: Allow all traffic through DRBD interface + ansible.builtin.lineinfile: + path: /etc/minifirewall.d/drbd + line: "/sbin/iptables -I INPUT -p tcp -i {{ kvm_drbd_interface }} -j ACCEPT" + create: yes + when: + - kvm_drbd_interface is defined + - kvm_drbd_interface | length > 0 diff --git a/kvm-host/tasks/main.yml b/kvm-host/tasks/main.yml index 7aa3bdc2..ae0108cd 100644 --- a/kvm-host/tasks/main.yml +++ b/kvm-host/tasks/main.yml @@ -16,3 +16,5 @@ - ansible.builtin.include: images.yml - ansible.builtin.include: tools.yml + +- ansible.builtin.include: firewall.yml From c12c581f6357ae57e6f23f184031fe8b12f7c30c Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Mon, 12 Feb 2024 19:07:20 +0100 Subject: [PATCH 02/19] update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ba289fd4..ce94db16 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,8 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added +* kvm-host: add minifirewall rules if DRBD interface is configured + ### Changed ### Fixed From 015cac688e7f5e1412f8efa6c19564c21b1b76a3 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Tue, 20 Feb 2024 09:48:58 +0100 Subject: [PATCH 03/19] redis: create sysfs config file if missing --- CHANGELOG.md | 2 ++ redis/tasks/thp.yml | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ce94db16..72860558 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Changed +* redis: create sysfs config file if missing + ### Fixed ### Removed diff --git a/redis/tasks/thp.yml b/redis/tasks/thp.yml index 7a215788..133466b7 100644 --- a/redis/tasks/thp.yml +++ b/redis/tasks/thp.yml @@ -23,6 +23,7 @@ path: /etc/sysfs.conf line: kernel/mm/transparent_hugepage/enabled = {{ redis_sysctl_transparent_hugepage_enabled }} regexp: "kernel/mm/transparent_hugepage/enabled" + create: yes tags: - redis - kernel @@ -32,4 +33,4 @@ cmd: "echo '{{ redis_sysctl_transparent_hugepage_enabled }}' >> /sys/kernel/mm/transparent_hugepage/enabled" tags: - redis - - kernel \ No newline at end of file + - kernel From 56db6e1fbc0425948699d1e85c9bf4c977954a61 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Tue, 20 Feb 2024 09:49:41 +0100 Subject: [PATCH 04/19] apt: add ftp.evolix.org as recognized system source --- CHANGELOG.md | 1 + apt/files/deb822-migration.py | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72860558..77560580 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Changed +* apt: add ftp.evolix.org as recognized system source * redis: create sysfs config file if missing ### Fixed diff --git a/apt/files/deb822-migration.py b/apt/files/deb822-migration.py index f8693b28..96ef1721 100755 --- a/apt/files/deb822-migration.py +++ b/apt/files/deb822-migration.py @@ -13,6 +13,7 @@ destinations = { ".*-backports": "backports.sources", ".debian.org": "system.sources", "mirror.evolix.org": "system.sources", + "ftp.evolix.org": "system.sources", "pub.evolix.net": "evolix_public_old.sources.bak", "pub.evolix.org": "evolix_public.sources", "artifacts.elastic.co": "elastic.sources", From a56e8c27ee44bcd51349f59f3b43105ee874088c Mon Sep 17 00:00:00 2001 From: David Prevot Date: Fri, 16 Feb 2024 11:03:22 +0100 Subject: [PATCH 05/19] lxc-php, php: Update sury PGP key --- CHANGELOG.md | 1 + lxc-php/files/sury.gpg | Bin 1769 -> 1769 bytes php/files/sury.gpg | Bin 1769 -> 1769 bytes 3 files changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 77560580..f490b48a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Changed * apt: add ftp.evolix.org as recognized system source +* lxc-php, php: Update sury PGP key * redis: create sysfs config file if missing ### Fixed diff --git a/lxc-php/files/sury.gpg b/lxc-php/files/sury.gpg index 384771a0f87beb284ea8cefe887b40c8d481c209..28043b0af5928d6e14a85b6a51ae931fd02377b0 100644 GIT binary patch delta 839 zcmV-N1GxO@4e1S_0)J(|x2gpR4D+uG0162Zv54ZfmAywp#aIjeH|v^(Rl4kXmuB5v z z5!U+sih=Ux_Vf=1!`giKPW?AVmxmE6HmiU@rkohP;vESLaI+3R3qC@~YcDs}gX4K! z?3{OACy{F_Y(~$sngg`~e`Ub8%LNGx^RFxb3JDOgh~l=Dy+=b{Eere${jB{?8@JAA zJ27{J-XV>Nz_-W(j8vccm_5L98jqB2a9kf394TPD2OHXjdQ^ zd)OXZSGhYAI1^yEH^vb@8NI@Uo>N4nY{5rI4ONqy`Ks5uz%KOSMKqHp&A?s{mm)CB z_n~S6GX<{bzK_(`g98WZJw*~3x25`2k+{aj{xk1aboh(8-(IL^LE8Vd#n+;f-OrvB z&b4J42^X%6wyd8Xf4EsqIKllZOgT6q%5nuoD37zoTx8r)Ml)bWKWF#L1%~2G?ayo} z0UyH6V8sQ&f>+AASFe(Zl7|Isr9c1x delta 839 zcmV-N1GxO@4e1S_0)JpDg@*+R2|uuC0162Zv54ZfmAywpL-YDB*;DDkT#lFlBL zO;1VH5C1G0a=Hw*UV&FdwsHu8ING$9GbA1oBv-0?c{|E zRJ(pYp&TS<9Ws&S!?!)$*qz*n2xrA!^xmn`O0P8@m}61_)Ky9uvOie z4#0H#FWJ*P0_>uygtmP%jOssl>wSJ^mi0F;FI9D-BpqoIP33w!#r}bUKd_Pj3JDOgh~l=Dy+=cMoD2ZQL>4ZnQ%2fp zz$kxK+M@~>l>s=%M55&>6_E?Gmf$h+$M^WTJlMYPav>h860sf0AaqZ=J9(aHpIL!V zzQtAl$FA7zRC^$sx8jP{|C%xKTZ3|U@{o&yqpL8jTHRbqXQ_uHe=dOM5dkgPeH{M2 zlx87d_fU6fD&&8<;5>!rIcZUUTZuYit}YpfUF1@=$}CH%QrnVgXa1jvQ?2CoqEz9b z0xZR2%C@h8%mk15=o zRkef(O-F1;f2vLif4=up=@u1%1eRKYH7H`So9Fgah%BqtLQJFY&vE5!8t9rI+Wg`C zV$o5VR$dSGv-b}Sd_cM^7O^Ff0AXKN2x1afnvd54T-ABhH|#u#yYJIvH5a>Dzu-T+ z&r3I<=@ywxGl)_(92{&lyaz=Lo|E+Go+{V(u#$=K0fSQf9R(C#$G4OjVt-(d*^T~! RHL8@il!#CjFVj2qlvgjrnNa`$ diff --git a/php/files/sury.gpg b/php/files/sury.gpg index 384771a0f87beb284ea8cefe887b40c8d481c209..28043b0af5928d6e14a85b6a51ae931fd02377b0 100644 GIT binary patch delta 839 zcmV-N1GxO@4e1S_0)J(|x2gpR4D+uG0162Zv54ZfmAywp#aIjeH|v^(Rl4kXmuB5v z z5!U+sih=Ux_Vf=1!`giKPW?AVmxmE6HmiU@rkohP;vESLaI+3R3qC@~YcDs}gX4K! z?3{OACy{F_Y(~$sngg`~e`Ub8%LNGx^RFxb3JDOgh~l=Dy+=b{Eere${jB{?8@JAA zJ27{J-XV>Nz_-W(j8vccm_5L98jqB2a9kf394TPD2OHXjdQ^ zd)OXZSGhYAI1^yEH^vb@8NI@Uo>N4nY{5rI4ONqy`Ks5uz%KOSMKqHp&A?s{mm)CB z_n~S6GX<{bzK_(`g98WZJw*~3x25`2k+{aj{xk1aboh(8-(IL^LE8Vd#n+;f-OrvB z&b4J42^X%6wyd8Xf4EsqIKllZOgT6q%5nuoD37zoTx8r)Ml)bWKWF#L1%~2G?ayo} z0UyH6V8sQ&f>+AASFe(Zl7|Isr9c1x delta 839 zcmV-N1GxO@4e1S_0)JpDg@*+R2|uuC0162Zv54ZfmAywpL-YDB*;DDkT#lFlBL zO;1VH5C1G0a=Hw*UV&FdwsHu8ING$9GbA1oBv-0?c{|E zRJ(pYp&TS<9Ws&S!?!)$*qz*n2xrA!^xmn`O0P8@m}61_)Ky9uvOie z4#0H#FWJ*P0_>uygtmP%jOssl>wSJ^mi0F;FI9D-BpqoIP33w!#r}bUKd_Pj3JDOgh~l=Dy+=cMoD2ZQL>4ZnQ%2fp zz$kxK+M@~>l>s=%M55&>6_E?Gmf$h+$M^WTJlMYPav>h860sf0AaqZ=J9(aHpIL!V zzQtAl$FA7zRC^$sx8jP{|C%xKTZ3|U@{o&yqpL8jTHRbqXQ_uHe=dOM5dkgPeH{M2 zlx87d_fU6fD&&8<;5>!rIcZUUTZuYit}YpfUF1@=$}CH%QrnVgXa1jvQ?2CoqEz9b z0xZR2%C@h8%mk15=o zRkef(O-F1;f2vLif4=up=@u1%1eRKYH7H`So9Fgah%BqtLQJFY&vE5!8t9rI+Wg`C zV$o5VR$dSGv-b}Sd_cM^7O^Ff0AXKN2x1afnvd54T-ABhH|#u#yYJIvH5a>Dzu-T+ z&r3I<=@ywxGl)_(92{&lyaz=Lo|E+Go+{V(u#$=K0fSQf9R(C#$G4OjVt-(d*^T~! RHL8@il!#CjFVj2qlvgjrnNa`$ From a0fc763a0c9ef43f8e0ee4c046d9aa3dbb4ceb11 Mon Sep 17 00:00:00 2001 From: Alexis Ben Miloud--Josselin Date: Tue, 20 Feb 2024 16:11:59 +0100 Subject: [PATCH 06/19] =?UTF-8?q?certbot:=20Utiliser=20pkey=20pour=20teste?= =?UTF-8?q?r=20cl=C3=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CHANGELOG.md | 1 + certbot/files/hooks/deploy/hapee.sh | 4 ++-- certbot/files/hooks/deploy/haproxy.sh | 4 ++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f490b48a..552d4dd6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -90,6 +90,7 @@ The **patch** part changes is incremented if multiple releases happen the same m * vrrpd: variable to force update the switch script (default: false) * webapps/nextcloud: Add Ceph volume to fstab * webapps/nextcloud: Set home directory's mode +* certbot: Utiliser pkey pour tester clé ### Fixed diff --git a/certbot/files/hooks/deploy/hapee.sh b/certbot/files/hooks/deploy/hapee.sh index 89b04452..d39da25b 100644 --- a/certbot/files/hooks/deploy/hapee.sh +++ b/certbot/files/hooks/deploy/hapee.sh @@ -39,8 +39,8 @@ concat_files() { chown root: "${hapee_cert_file}" } cert_and_key_mismatch() { - hapee_cert_md5=$(openssl x509 -noout -modulus -in "${hapee_cert_file}" | openssl md5) - hapee_key_md5=$(openssl rsa -noout -modulus -in "${hapee_cert_file}" | openssl md5) + hapee_cert_md5=$(openssl x509 -noout -pubkey -in "${hapee_cert_file}" | openssl md5) + hapee_key_md5=$(openssl pkey -noout -pubout -in "${hapee_cert_file}" | openssl md5) test "${hapee_cert_md5}" != "${hapee_key_md5}" } diff --git a/certbot/files/hooks/deploy/haproxy.sh b/certbot/files/hooks/deploy/haproxy.sh index 932a3e90..c08fafc2 100644 --- a/certbot/files/hooks/deploy/haproxy.sh +++ b/certbot/files/hooks/deploy/haproxy.sh @@ -29,8 +29,8 @@ concat_files() { chown root: "${haproxy_cert_file}" } cert_and_key_mismatch() { - haproxy_cert_md5=$(openssl x509 -noout -modulus -in "${haproxy_cert_file}" | openssl md5) - haproxy_key_md5=$(openssl rsa -noout -modulus -in "${haproxy_cert_file}" | openssl md5) + haproxy_cert_md5=$(openssl x509 -noout -pubkey -in "${haproxy_cert_file}" | openssl md5) + haproxy_key_md5=$(openssl pkey -noout -pubout -in "${haproxy_cert_file}" | openssl md5) test "${haproxy_cert_md5}" != "${haproxy_key_md5}" } From 282dcb28f46f9b8e1aca09258f28fe8076614270 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Tue, 20 Feb 2024 18:50:34 +0100 Subject: [PATCH 07/19] apt: add comments to deb822 migration scripts --- apt/files/deb822-migration.py | 15 +++++++++++++++ apt/files/deb822-migration.sh | 8 +++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/apt/files/deb822-migration.py b/apt/files/deb822-migration.py index 96ef1721..cb135972 100755 --- a/apt/files/deb822-migration.py +++ b/apt/files/deb822-migration.py @@ -1,5 +1,11 @@ #!/usr/bin/env python3 +########## +# This script takes a multi-lines input of "oneliner-style" APT sources definitions. +# It converts them into "deb822-style" sources. +# Each generated file will have only one stanza, possibly with multiple Types/Suites/Components +########## + import re import sys import os @@ -10,12 +16,16 @@ import apt_pkg # Order matters ! destinations = { "debian-security": "security.sources", + ".*-backports": "backports.sources", + ".debian.org": "system.sources", "mirror.evolix.org": "system.sources", "ftp.evolix.org": "system.sources", + "pub.evolix.net": "evolix_public_old.sources.bak", "pub.evolix.org": "evolix_public.sources", + "artifacts.elastic.co": "elastic.sources", "download.docker.com": "docker.sources", "downloads.linux.hpe.com": "hp.sources", @@ -77,6 +87,11 @@ def prepare_sources(lines): key, value = option.split("=") options[key] = value + ### WARNING ### + # if there are multiple lines with different URIS for a given destination (eg. "system") + # each one will overwrite the previous one + # and the last evaluated will be what remains. + if dest in sources: sources[dest]["Types"].add(matches["type"]) sources[dest]["URIs"] = matches["uri"] diff --git a/apt/files/deb822-migration.sh b/apt/files/deb822-migration.sh index 10fb7889..7a4fb787 100755 --- a/apt/files/deb822-migration.sh +++ b/apt/files/deb822-migration.sh @@ -1,5 +1,11 @@ #!/bin/sh +########## +# This script changes all "one-line" APT sources into "deb822" sources. +# It is responsible for searching and processing the files. +# The actual format migration is done by a python script. +########## + deb822_migrate_script=$(command -v deb822-migration.py) if [ -z "${deb822_migrate_script}" ]; then @@ -46,4 +52,4 @@ for file in $(find /etc/apt/sources.list.d -mindepth 1 -maxdepth 1 -type f -name done echo "${count} file(s) migrated" -exit ${rc} \ No newline at end of file +exit ${rc} From 0a4a220bdfa6577d4551edecdee72868bdcd57a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Dubois?= Date: Wed, 21 Feb 2024 10:51:08 +0100 Subject: [PATCH 08/19] openvpn: earlier alert for CA expiration --- CHANGELOG.md | 1 + openvpn/files/check_openvpn_certificates.sh | 15 ++++++++------- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 552d4dd6..fd0d602e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ The **patch** part changes is incremented if multiple releases happen the same m * apt: add ftp.evolix.org as recognized system source * lxc-php, php: Update sury PGP key * redis: create sysfs config file if missing +* openvpn: earlier alert for CA expiration ### Fixed diff --git a/openvpn/files/check_openvpn_certificates.sh b/openvpn/files/check_openvpn_certificates.sh index 26808868..1ec3aaed 100644 --- a/openvpn/files/check_openvpn_certificates.sh +++ b/openvpn/files/check_openvpn_certificates.sh @@ -35,6 +35,7 @@ fi # Dates in seconds _15_days="1296000" _30_days="2592000" +_60_days="5184000" current_date=$($date_cmd +"%s") # Trying to define the OpenVPN conf file location - default to /etc/openvpn/server.conf @@ -90,15 +91,15 @@ test_ca_expiration() { if [ $current_date -ge $1 ]; then CA_ECHO="CRITICAL - The server CA has expired on $formated_ca_expiration_date" CA_STATE=$STATE_CRITICAL - # Expiration in 15 days or less - CA file - elif [ $((current_date+_15_days)) -ge $1 ]; then - CA_ECHO="CRITICAL - The server CA expires in 15 days or less : $formated_ca_expiration_date" - CA_STATE=$STATE_CRITICAL # Expiration in 30 days or less - CA file elif [ $((current_date+_30_days)) -ge $1 ]; then - CA_ECHO="WARNING - The server CA expires in 30 days or less : $formated_ca_expiration_date" + CA_ECHO="CRITICAL - The server CA expires in 30 days or less : $formated_ca_expiration_date" + CA_STATE=$STATE_CRITICAL + # Expiration in 60 days or less - CA file + elif [ $((current_date+_60_days)) -ge $1 ]; then + CA_ECHO="WARNING - The server CA expires in 60 days or less : $formated_ca_expiration_date" CA_STATE=$STATE_WARNING - # Expiration in more than 30 days - CA file + # Expiration in more than 60 days - CA file else CA_ECHO="OK - The server CA expires on $formated_ca_expiration_date" CA_STATE=$STATE_OK @@ -193,8 +194,8 @@ main() { echo $RESTART_ECHO exit $CERT_STATE else - echo $CERT_ECHO echo $CA_ECHO + echo $CERT_ECHO echo $RESTART_ECHO exit $CERT_STATE fi From b0ba70f06ce97db5ed4b0b56f1a291db51302ece Mon Sep 17 00:00:00 2001 From: Ludovic Poujol Date: Wed, 21 Feb 2024 12:27:18 +0100 Subject: [PATCH 09/19] certbot: Renewal hook for NRPE --- CHANGELOG.md | 1 + certbot/files/hooks/deploy/nrpe.sh | 44 ++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 certbot/files/hooks/deploy/nrpe.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index fd0d602e..b8c50622 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added +* certbot: Renewal hook for NRPE * kvm-host: add minifirewall rules if DRBD interface is configured ### Changed diff --git a/certbot/files/hooks/deploy/nrpe.sh b/certbot/files/hooks/deploy/nrpe.sh new file mode 100644 index 00000000..578d6764 --- /dev/null +++ b/certbot/files/hooks/deploy/nrpe.sh @@ -0,0 +1,44 @@ +#!/bin/sh + +error() { + >&2 echo "${PROGNAME}: $1" + exit 1 +} +debug() { + if [ "${VERBOSE}" = "1" ] && [ "${QUIET}" != "1" ]; then + >&2 echo "${PROGNAME}: $1" + fi +} +daemon_found_and_running() { + test -n "$(pidof nrpe)" +} +letsencrypt_lineaged_used() { + grep -r "^ssl_cert_file" /etc/nagios/ | grep "letsencrypt" | grep -q "$(basename "${RENEWED_LINEAGE}")" +} +copy_letsencrypt_cert() { + DEST_CERTIFICATE=$(grep -r "^ssl_cert_file" /etc/nagios/ | awk -F'=' '{print $2}') + DEST_PRIVATE_KEY=$(grep -r "^ssl_privatekey_file" /etc/nagios/ | awk -F'=' '{print $2}') + + install --mode 440 --group nagios ${RENEWED_LINEAGE}/fullchain.pem ${DEST_CERTIFICATE} + install --mode 440 --group nagios ${RENEWED_LINEAGE}/privkey.pem ${DEST_PRIVATE_KEY} +} +main() { + if daemon_found_and_running; then + if letsencrypt_lineaged_used; then + debug "NRPE detected... Copying certificates to the right place & permissions" + copy_letsencrypt_cert + debug "Restarting NRPE" + systemctl restart nagios-nrpe-server + else + debug "NRPE doesn't use the given Let's Encrypt certificate. Skip." + fi + else + debug "NRPE is not running or missing. Skip." + fi +} + +readonly PROGNAME=$(basename "$0") +readonly VERBOSE=${VERBOSE:-"0"} +readonly QUIET=${QUIET:-"0"} + +main \ No newline at end of file From aea710cb25597a5c6208b70f6a1fc42314805af3 Mon Sep 17 00:00:00 2001 From: David Prevot Date: Thu, 22 Feb 2024 09:44:38 +0100 Subject: [PATCH 10/19] redis: Update munin plugin --- redis/files/munin_redis | 597 ++++++++++++++++++++++++++-------------- 1 file changed, 396 insertions(+), 201 deletions(-) diff --git a/redis/files/munin_redis b/redis/files/munin_redis index 55474435..ef3b61ad 100644 --- a/redis/files/munin_redis +++ b/redis/files/munin_redis @@ -1,243 +1,439 @@ #!/usr/bin/perl -w -# -## Copyright (C) 2009 Gleb Voronich -## -## This program is free software; you can redistribute it and/or -## modify it under the terms of the GNU General Public License -## as published by the Free Software Foundation; version 2 dated June, -## 1991. -## -## This program is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You should have received a copy of the GNU General Public License -## along with this program; if not, write to the Free Software -## Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -## -## -## $Log$ -## -## Based on Redis module code v0.08 2009/from http://svn.rot13.org/index.cgi/Redis -## -## Installation process: -## -## 1. Download the plugin to your plugins directory (e.g. /usr/share/munin/plugins) -## 2. Create 3 symlinks at the directory that us used by munin for plugins detection (e.g. /etc/munin/plugins): redis_connected_clients, redis_per_sec and and redis_used_memory -## 3. Edit plugin-conf.d/munin-node if it is needed (env.host and env.port variables are accepted; set env.password for password protected Redis server) -## 4. Restart munin-node service -## -## Magic Markers -#%# family=auto -#%# capabilities=autoconf suggest +=head CONFIGURATION + + Based on Redis module code v0.08 2009/from http://svn.rot13.org/index.cgi/Redis + + Installation process: + + 1. Download the plugin to your plugins directory (e.g. /usr/share/munin/plugins) + 2. Symlink it to your configuration directory (e.g. ln -s /usr/share/munin/plugins/redis /etc/munin/plugins/redis) + 3. Edit plugin-conf.d/munin-node with the options to connect to your redis instances (see below for an example) + 4. Restart munin-node service + + Example config + [redis] + env.host1 127.0.0.1 + env.port1 6379 + env.password1 password + env.title_prefix1 redis-1 + env.host2 /run/redis.sock + env.title_prefix2 redis-2 + + Each host should be specified with at least a host or unixsocket variable suffixed with + a number, the first being 1, the second being 2 etc. They must be in sequence. + Other options are: + * port - the redis port to connect to + * password - the password to use with the AUTH command + * title_prefix - a prefix to put before the title of the graph, this is strongly recommended for multiple instances + + Graphs: + This generates multigraphs for: + * Connected clients + * Key Hit vs Miss ratio + * Keys per second, hits/misses/expirations/evictions + * Replication backlog + * Replication lag + * Request per second + * Total number of keys and keys with expires + * Used memory + +=head COPYRIGHT + + Copyright (C) 2020 Rowan Wookey + Copyright (C) 2009 Gleb Voronich + +=head LICENSE + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 dated June, + 1991. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +=head MAGIC MARKERS + + #%# family=auto + #%# capabilities=autoconf + +=cut use strict; use IO::Socket::INET; use IO::Socket::UNIX; -use Switch; -my $HOST = exists $ENV{'host'} ? $ENV{'host'} : "127.0.0.1"; -my $UNIX_SOCKET = exists $ENV{'unixsocket'} ? $ENV{'unixsocket'} : ''; # path to Redis Unix sock file -my $PORT = exists $ENV{'port'} ? $ENV{'port'} : 6379; -my $PASSWORD = exists $ENV{'password'} ? $ENV{'password'} : undef; -my $TITLE_PREFIX = exists $ENV{'title_prefix'} ? $ENV{'title_prefix'} . ": " : ""; +my %INSTANCES; +my $HOST; +my $PORT; +my $PASSWORD; + +for (my $i = 1; $ENV{"host$i"}; $i++) +{ + $HOST = exists $ENV{"host$i"} ? $ENV{"host$i"} : "127.0.0.1"; + $PORT = exists $ENV{"port$i"} ? $ENV{"port$i"} : 6379; + $PASSWORD = exists $ENV{"password$i"} ? $ENV{"password$i"} : undef; + my $TITLE_PREFIX = exists $ENV{"title_prefix$i"} ? $ENV{"title_prefix$i"} . ": " : ""; + my $SOCK = &get_conn(); + $INSTANCES{"instance$i"} = { + HOST => $HOST, + PORT => $PORT, + PASSWORD => $PASSWORD, + TITLE_PREFIX => $TITLE_PREFIX, + SOCK => $SOCK + }; +} + -my $sock = &get_conn(); my $config = ( defined $ARGV[0] and $ARGV[0] eq "config" ); my $autoconf = ( defined $ARGV[0] and $ARGV[0] eq "autoconf" ); if ( $autoconf ) { - if ( defined( $sock ) ) { + if (!%INSTANCES) { + print "no (no redis instances configured)\n"; + exit 0; + } + my $err = ''; + for my $INSTANCE (keys %INSTANCES) { + if (! defined( $INSTANCES{$INSTANCE}{'SOCK'} ) ) { + $err = "no (unable to connect to ".$INSTANCES{$INSTANCE}{'HOST'}."\[:". $INSTANCES{$INSTANCE}{'PORT'}."\])\n"; + } + } + if ($err) { + print $err; + } else { print "yes\n"; - exit 0; - } else { - print "no (unable to connect to $HOST\[:$PORT\])\n"; - exit 0; - } -} -my $suggest = ( defined $ARGV[0] and $ARGV[0] eq "suggest" ); -if ( $suggest ) { - if ( defined( $sock ) ) { - my @plugins = ('connected_clients', 'key_ratio', 'keys_per_sec', 'per_sec', 'used_keys', 'used_memory'); - foreach my $plugin (@plugins) { - print "$plugin\n"; - } - exit 0; - } else { - print "no (unable to connect to $HOST\[:$PORT\])\n"; - exit 0; } + exit 0; } -my $hash=&get_info(); +my $total = 0; -$0 =~ s/(.+)redis_//g; +my $multi_graph_output = ''; +my $instance_graph_output = ''; -switch ($0) { - case "connected_clients" { - if ( $config ) { - my $maxclients= get_config("maxclients")->{"maxclients"}; - print "graph_title ${TITLE_PREFIX}Connected clients\n"; - print "graph_vlabel Connected clients\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - print "connected_clients.line $maxclients:ff0000:Limit\n"; - print "connected_clients.label connected clients\n"; - exit 0; +my $connected_clients = 0; +my $keyspace_hits = 0; +my $keyspace_misses = 0; +my $expired_keys = 0; +my $evicted_keys = 0; +my $total_commands_processed = 0; +my $total_connections_received = 0; +my $repl_backlog_size = 0; +my $used_memory = 0; +my $used_memory_rss = 0; +my $used_memory_peak = 0; +my $total_keys = 0; +my $total_expires = 0; +foreach my $INSTANCE (keys %INSTANCES) { + + my $sock = $INSTANCES{$INSTANCE}{'SOCK'}; + my $TITLE_PREFIX = $INSTANCES{$INSTANCE}{'TITLE_PREFIX'}; + my $hash = get_info($sock); + + my $dbs; + foreach my $key (keys %{$hash}) { + if ( $key =~ /^db\d+$/ && $hash->{$key} =~ /keys=(\d+),expires=(\d+)/ ) { + $total_keys += $1; + $total_expires += $2; + $dbs->{$key} = [ $1, $2 ]; } - - print "connected_clients.value " . $hash->{'connected_clients'} . "\n"; } - - case "keys_per_sec" { - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Keys Per Second\n"; - print "graph_vlabel per \${graph_period}\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - print "hits.label hits\n"; - print "hits.type COUNTER\n"; - print "misses.label misses\n"; - print "misses.type COUNTER\n"; - print "expired.label expirations\n"; - print "expired.type COUNTER\n"; - print "evictions.label evictions\n"; - print "evictions.type COUNTER\n"; - exit 0; + if ( $config ) { + my $ret = get_config("maxclients", $sock); + # if the CONFIG command is disabled we don't show the max clients + my $maxclients = defined $ret ? $ret->{"maxclients"} : 0; + $instance_graph_output .= "multigraph redis_connected_clients.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Connected clients\n"; + $instance_graph_output .= "graph_vlabel Connected clients\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + if ($maxclients) { + $instance_graph_output .= "connected_clients.line $maxclients:ff0000:Limit\n"; } - - print "hits.value " . $hash->{'keyspace_hits'} . "\n"; - print "misses.value " . $hash->{'keyspace_misses'} . "\n"; - print "expired.value " . $hash->{'expired_keys'} . "\n"; - print "evictions.value " . $hash->{'evicted_keys'} . "\n"; - } - - case "key_ratio" { - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Key Hit vs Miss Ratio\n"; - print "graph_vlabel per \${graph_period}\n"; - print "graph_category search\n"; - print "graph_args -u 100 -l 0 -r --base 1000\n"; - print "hitratio.label hit ratio\n"; - print "hitratio.type GAUGE\n"; - print "hitratio.draw AREA\n"; - print "missratio.label miss ratio\n"; - print "missratio.type GAUGE\n"; - print "missratio.draw STACK\n"; - exit 0; - } - - my $total = $hash->{'keyspace_hits'} + $hash->{'keyspace_misses'}; - my $hitratio = 0; - my $missratio = 0; - if ($total > 0) { - $hitratio = $hash->{'keyspace_hits'} / $total * 100; - $missratio = $hash->{'keyspace_misses'} / $total * 100; - } - printf("hitratio.value %.2f\n", $hitratio); - printf("missratio.value %.2f\n", $missratio); - } - - - case "per_sec" { - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Per second\n"; - print "graph_vlabel per \${graph_period}\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - print "requests.label requests\n"; - print "requests.type COUNTER\n"; - print "connections.label connections\n"; - print "connections.type COUNTER\n"; - exit 0; - } - - print "requests.value ". $hash->{'total_commands_processed'} ."\n"; - print "connections.value ". $hash->{'total_connections_received'} ."\n"; - } - - - case "used_memory" { - if ( $config ) { - my $maxmemory = get_config("maxmemory")->{"maxmemory"}; - print "graph_title ${TITLE_PREFIX}Used memory\n"; - print "graph_vlabel Used memory\n"; - print "graph_category search\n"; - print "graph_args -l 0 --base 1024\n"; - print "used_memory.line $maxmemory:ff0000:Limit\n"; - print "used_memory.label used memory\n"; - print "used_memory_peak.label used memory in peak\n"; - print "used_memory_rss.label Resident set size memory usage\n"; - exit 0; - } - - print "used_memory.value ". $hash->{'used_memory'} ."\n"; - print "used_memory_rss.value ". $hash->{'used_memory_rss'} ."\n"; - print "used_memory_peak.value ". $hash->{'used_memory_peak'} ."\n"; - } - - case "used_keys" { - my $dbs; - foreach my $key (keys %{$hash}) { - if ( $key =~ /^db\d+$/ && $hash->{$key} =~ /keys=(\d+),expires=(\d+)/ ) { - $dbs->{$key} = [ $1, $2 ]; - } - } - - if ( $config ) { - print "graph_title ${TITLE_PREFIX}Used keys\n"; - print "graph_vlabel Used keys\n"; - print "graph_category search\n"; - print "graph_args -l 0\n"; - - foreach my $db (keys %{$dbs}) { - printf "%s_keys.label %s keys\n", $db, $db; - printf "%s_expires.label %s expires\n", $db, $db; - } - - exit 0; + $instance_graph_output .= "connected_clients.label connected clients\n"; + $instance_graph_output .= "multigraph keys_per_sec.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Keys Per Second\n"; + $instance_graph_output .= "graph_vlabel per \${graph_period}\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "hits.label hits\n"; + $instance_graph_output .= "hits.type COUNTER\n"; + $instance_graph_output .= "misses.label misses\n"; + $instance_graph_output .= "misses.type COUNTER\n"; + $instance_graph_output .= "expired.label expirations\n"; + $instance_graph_output .= "expired.type COUNTER\n"; + $instance_graph_output .= "evicted_keys.label evictions\n"; + $instance_graph_output .= "evicted_keys.type COUNTER\n"; + $instance_graph_output .= "multigraph redis_key_ratio.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Key Hit vs Miss Ratio\n"; + $instance_graph_output .= "graph_vlabel per \${graph_period}\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -u 100 -l 0 -r --base 1000\n"; + $instance_graph_output .= "hitratio.label hit ratio\n"; + $instance_graph_output .= "hitratio.type GAUGE\n"; + $instance_graph_output .= "hitratio.draw AREA\n"; + $instance_graph_output .= "missratio.label miss ratio\n"; + $instance_graph_output .= "missratio.type GAUGE\n"; + $instance_graph_output .= "missratio.draw STACK\n"; + $instance_graph_output .= "multigraph redis_per_sec.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Requests Per second\n"; + $instance_graph_output .= "graph_vlabel per \${graph_period}\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "requests.label requests\n"; + $instance_graph_output .= "requests.type COUNTER\n"; + $instance_graph_output .= "connections.label connections\n"; + $instance_graph_output .= "connections.type COUNTER\n"; + $instance_graph_output .= "multigraph redis_repl_backlog_size.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}replication backlog\n"; + $instance_graph_output .= "graph_vlabel replication backlog\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "repl_backlog_size.label bytes behind master\n"; + $instance_graph_output .= "multigraph redis_repl_lag.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}replication lag\n"; + $instance_graph_output .= "graph_vlabel replication lag\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; + $instance_graph_output .= "repl_backlog_size.label amount behind master\n"; + # if the CONFIG command is disabled we don't show maxmemory + $ret = get_config("maxmemory", $sock); + my $maxmemory = defined $ret ? $ret->{"maxmemory"} : 0; + $instance_graph_output .= "multigraph redis_used_memory.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Used memory\n"; + $instance_graph_output .= "graph_vlabel Used memory\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0 --base 1024\n"; + if ($maxmemory) { + $instance_graph_output .= "used_memory.line $maxmemory:ff0000:Limit\n"; } + $instance_graph_output .= "used_memory.label used memory\n"; + $instance_graph_output .= "used_memory_peak.label used memory in peak\n"; + $instance_graph_output .= "used_memory_rss.label Resident set size memory usage\n"; + $instance_graph_output .= "multigraph redis_used_keys.$INSTANCE\n"; + $instance_graph_output .= "graph_title ${TITLE_PREFIX}Used keys\n"; + $instance_graph_output .= "graph_vlabel Used keys\n"; + $instance_graph_output .= "graph_category db\n"; + $instance_graph_output .= "graph_args -l 0\n"; foreach my $db (keys %{$dbs}) { - printf "%s_keys.value %d\n", $db, $dbs->{$db}[0]; - printf "%s_expires.value %d\n", $db, $dbs->{$db}[1]; + $instance_graph_output .= sprintf "%s_keys.label %s keys\n", $db, $db; + $instance_graph_output .= sprintf "%s_expires.label %s expires\n", $db, $db; } + + next; } + + $instance_graph_output .= "multigraph redis_connected_clients.$INSTANCE\n"; + $instance_graph_output .= "connected_clients.value " . $hash->{'connected_clients'} . "\n"; + $connected_clients += $hash->{'connected_clients'}; + $instance_graph_output .= "multigraph keys_per_sec.$INSTANCE\n"; + $instance_graph_output .= "hits.value " . $hash->{'keyspace_hits'} . "\n"; + $keyspace_hits += $hash->{'keyspace_hits'}; + $instance_graph_output .= "misses.value " . $hash->{'keyspace_misses'} . "\n"; + $keyspace_misses += $hash->{'keyspace_misses'}; + $instance_graph_output .= "expired.value " . $hash->{'expired_keys'} . "\n"; + $expired_keys += $hash->{'expired_keys'}; + $instance_graph_output .= "evicted_keys.value " . $hash->{'evicted_keys'} . "\n"; + $evicted_keys += $hash->{'evicted_keys'}; + $instance_graph_output .= "multigraph redis_key_ratio.$INSTANCE\n"; + my $total = $hash->{'keyspace_hits'} + $hash->{'keyspace_misses'}; + my $hitratio = 0; + my $missratio = 0; + if ($total > 0) { + $hitratio = $hash->{'keyspace_hits'} / $total * 100; + $missratio = $hash->{'keyspace_misses'} / $total * 100; + } + $instance_graph_output .= sprintf("hitratio.value %.2f\n", $hitratio); + $instance_graph_output .= sprintf("missratio.value %.2f\n", $missratio); + $instance_graph_output .= "multigraph redis_per_sec.$INSTANCE\n"; + $instance_graph_output .= "requests.value ". $hash->{'total_commands_processed'} ."\n"; + $total_commands_processed += $hash->{'total_commands_processed'}; + $instance_graph_output .= "connections.value ". $hash->{'total_connections_received'} ."\n"; + $total_connections_received += $hash->{'total_connections_received'}; + $instance_graph_output .= "multigraph redis_repl_backlog_size.$INSTANCE\n"; + $instance_graph_output .= "repl_backlog_size.value " . $hash->{'repl_backlog_size'} . "\n"; + $repl_backlog_size += $hash->{'repl_backlog_size'}; + + $instance_graph_output .= "multigraph redis_repl_lag.$INSTANCE\n"; + if (exists $hash->{slave0} && $hash->{slave0} =~ /lag=(\d+)/) { + $repl_backlog_size += $1; + $instance_graph_output .= "repl_backlog_size.value " . $1 . "\n"; + } else { + $instance_graph_output .= "repl_backlog_size.value 0\n"; + } + + + $instance_graph_output .= "multigraph redis_used_memory.$INSTANCE\n"; + $instance_graph_output .= "used_memory.value ". $hash->{'used_memory'} ."\n"; + + $used_memory += $hash->{'used_memory'}; + $instance_graph_output .= "used_memory_rss.value ". $hash->{'used_memory_rss'} ."\n"; + $used_memory_rss += $hash->{'used_memory_rss'}; + $instance_graph_output .= "used_memory_peak.value ". $hash->{'used_memory_peak'} ."\n"; + $used_memory_peak += $hash->{'used_memory_peak'}; + + $instance_graph_output .= "multigraph redis_used_keys.$INSTANCE\n"; + foreach my $db (keys %{$dbs}) { + $instance_graph_output .= sprintf "%s_keys.value %d\n", $db, $dbs->{$db}[0]; + $instance_graph_output .= sprintf "%s_expires.value %d\n", $db, $dbs->{$db}[1]; + } + close ($sock); } -close ($sock); +# multigraph output +if ($config) { + $multi_graph_output .= "multigraph redis_connected_clients\n"; + $multi_graph_output .= "graph_title Connected clients\n"; + $multi_graph_output .= "graph_vlabel Connected clients\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "connected_clients.label connected clients\n"; + $multi_graph_output .= "multigraph keys_per_sec\n"; + $multi_graph_output .= "graph_title Keys Per Second\n"; + $multi_graph_output .= "graph_vlabel per \${graph_period}\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "hits.label hits\n"; + $multi_graph_output .= "hits.type COUNTER\n"; + $multi_graph_output .= "misses.label misses\n"; + $multi_graph_output .= "misses.type COUNTER\n"; + $multi_graph_output .= "expired.label expirations\n"; + $multi_graph_output .= "expired.type COUNTER\n"; + $multi_graph_output .= "evicted_keys.label evictions\n"; + $multi_graph_output .= "evicted_keys.type COUNTER\n"; + $multi_graph_output .= "multigraph redis_key_ratio\n"; + $multi_graph_output .= "graph_title Key Hit vs Miss Ratio\n"; + $multi_graph_output .= "graph_vlabel per \${graph_period}\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -u 100 -l 0 -r --base 1000\n"; + $multi_graph_output .= "hitratio.label hit ratio\n"; + $multi_graph_output .= "hitratio.type GAUGE\n"; + $multi_graph_output .= "hitratio.draw AREA\n"; + $multi_graph_output .= "missratio.label miss ratio\n"; + $multi_graph_output .= "missratio.type GAUGE\n"; + $multi_graph_output .= "missratio.draw STACK\n"; + $multi_graph_output .= "multigraph redis_per_sec\n"; + $multi_graph_output .= "graph_title Requests Per second\n"; + $multi_graph_output .= "graph_vlabel per \${graph_period}\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "requests.label requests\n"; + $multi_graph_output .= "requests.type COUNTER\n"; + $multi_graph_output .= "connections.label connections\n"; + $multi_graph_output .= "connections.type COUNTER\n"; + $multi_graph_output .= "multigraph redis_repl_backlog_size\n"; + $multi_graph_output .= "graph_title replication backlog\n"; + $multi_graph_output .= "graph_vlabel replication backlog\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "repl_backlog_size.label bytes behind master\n"; + $multi_graph_output .= "multigraph redis_repl_lag\n"; + $multi_graph_output .= "graph_title replication lag\n"; + $multi_graph_output .= "graph_vlabel replication lag\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "repl_backlog_size.label amount behind master\n"; + $multi_graph_output .= "multigraph redis_used_memory\n"; + $multi_graph_output .= "graph_title Used memory\n"; + $multi_graph_output .= "graph_vlabel Used memory\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0 --base 1024\n"; + $multi_graph_output .= "used_memory.label used memory\n"; + $multi_graph_output .= "used_memory_peak.label used memory in peak\n"; + $multi_graph_output .= "used_memory_rss.label Resident set size memory usage\n"; + $multi_graph_output .= "multigraph redis_used_keys\n"; + $multi_graph_output .= "graph_title Used keys\n"; + $multi_graph_output .= "graph_vlabel Used keys\n"; + $multi_graph_output .= "graph_category db\n"; + $multi_graph_output .= "graph_args -l 0\n"; + $multi_graph_output .= "total_keys.label Total keys\n"; + $multi_graph_output .= "total_expires.label Total expires\n"; +} else { + + $multi_graph_output .= "multigraph redis_connected_clients\n"; + $multi_graph_output .= "connected_clients.value " . $connected_clients . "\n"; + $multi_graph_output .= "multigraph keys_per_sec\n"; + $multi_graph_output .= "hits.value " . $keyspace_hits . "\n"; + $multi_graph_output .= "misses.value " . $keyspace_misses . "\n"; + $multi_graph_output .= "expired.value " . $expired_keys . "\n"; + $multi_graph_output .= "evicted_keys.value " . $evicted_keys . "\n"; + $multi_graph_output .= "multigraph redis_key_ratio\n"; + my $total = $keyspace_hits + $keyspace_misses; + my $hitratio = 0; + my $missratio = 0; + if ($total > 0) { + $hitratio = $keyspace_hits / $total * 100; + $missratio = $keyspace_misses / $total * 100; + } + $multi_graph_output .= sprintf("hitratio.value %.2f\n", $hitratio); + $multi_graph_output .= sprintf("missratio.value %.2f\n", $missratio); + $multi_graph_output .= "multigraph redis_per_sec\n"; + $multi_graph_output .= "requests.value ". $total_commands_processed ."\n"; + $multi_graph_output .= "connections.value ". $total_connections_received ."\n"; + $multi_graph_output .= "multigraph redis_repl_backlog_size\n"; + $multi_graph_output .= "repl_backlog_size.value " . $repl_backlog_size . "\n"; + + $multi_graph_output .= "multigraph redis_repl_lag\n"; + $multi_graph_output .= "repl_backlog_size.value " . $repl_backlog_size . "\n"; + + + $multi_graph_output .= "multigraph redis_used_memory\n"; + $multi_graph_output .= "used_memory.value ". $used_memory ."\n"; + + $multi_graph_output .= "used_memory_rss.value ". $used_memory_rss ."\n"; + $multi_graph_output .= "used_memory_peak.value ". $used_memory_peak ."\n"; + + $multi_graph_output .= "multigraph redis_used_keys\n"; + $multi_graph_output .= "total_keys.value $total_keys\n"; + $multi_graph_output .= "total_expires.value $total_expires\n"; + +} +print $multi_graph_output; +print $instance_graph_output; sub get_conn { - + my $sock; - - if( $UNIX_SOCKET && -S $UNIX_SOCKET ){ - - $sock = IO::Socket::UNIX->new( - Type => SOCK_STREAM(), - Peer => $UNIX_SOCKET, - ); - + + if(-S $HOST ){ + + $sock = IO::Socket::UNIX->new( + Type => SOCK_STREAM(), + Peer => $HOST, + ); }else{ - - $sock = IO::Socket::INET->new( - PeerAddr => $HOST, - PeerPort => $PORT, - Timeout => 10, - Proto => 'tcp' - ); + + $sock = IO::Socket::INET->new( + PeerAddr => $HOST, + PeerPort => $PORT, + Timeout => 10, + Proto => 'tcp' + ); } - + + if (! defined($sock)) { + die "can't read socket: $!"; + } + if ( defined( $PASSWORD ) ) { print $sock "AUTH ", $PASSWORD, "\r\n"; my $result = <$sock> || die "can't read socket: $!"; } + return $sock; } sub get_info{ + my $sock = $_[0]; print $sock "INFO\r\n"; my $result = <$sock> || die "can't read socket: $!"; @@ -257,13 +453,12 @@ sub get_info{ # This subroutine returns configuration matched to supplied as object sub get_config{ - + my $sock = $_[1]; print $sock "*3\r\n\$6\r\nCONFIG\r\n\$3\r\nGET\r\n\$".length($_[0])."\r\n".$_[0]."\r\n"; # Response will look like like # *2\r\n$9\r\nmaxmemory\r\n$10\r\n3221225472\r\n my $type = <$sock> || die "can't read socket: $!"; - my $conf; if( substr($type,0,1) ne "*" ) { return $conf; From bec868009cbacf57209effe3cc389dde42b0b385 Mon Sep 17 00:00:00 2001 From: William Hirigoyen Date: Tue, 27 Feb 2024 10:33:49 +0100 Subject: [PATCH 11/19] nagios: add option --full to check pressure IO and mem to avoid flaps --- CHANGELOG.md | 1 + nagios-nrpe/templates/evolix.cfg.j2 | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8c50622..ac25f844 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -79,6 +79,7 @@ The **patch** part changes is incremented if multiple releases happen the same m * nagios: add dockerd check in nrpe check template * nagios: cleaning nrpe check template * nagios: rename var `nagios_nrpe_process_processes` into `nagios_nrpe_processes` and check systemd-timesyncd instead of ntpd in Debian 12 +* nagios: add option --full to check pressure IO and mem to avoid flaps * proftpd: in SFTP vhost, enable SSH keys login, enable ed25549 host key for Debian >= 11 * redis: manage config template inside a block, to allow custom modifications outside * spamassassin: Use spamd starting with Bookworm diff --git a/nagios-nrpe/templates/evolix.cfg.j2 b/nagios-nrpe/templates/evolix.cfg.j2 index 1f39bcff..d725bb3b 100644 --- a/nagios-nrpe/templates/evolix.cfg.j2 +++ b/nagios-nrpe/templates/evolix.cfg.j2 @@ -91,8 +91,8 @@ command[check_php-fpm83]=sudo {{ nagios_plugins_directory }}/check_phpfpm_multi command[check_dhcp_pool]={{ nagios_plugins_directory }}/check_dhcp_pool command[check_ssl_local]={{ nagios_plugins_directory }}/check_ssl_local command[check_pressure_cpu]=/usr/lib/nagios/plugins/check_pressure --cpu -w 100000 -c 500000 -command[check_pressure_mem]=/usr/lib/nagios/plugins/check_pressure --mem -w 100000 -c 500000 -command[check_pressure_io]=/usr/lib/nagios/plugins/check_pressure --io -w 100000 -c 500000 +command[check_pressure_mem]=/usr/lib/nagios/plugins/check_pressure --mem --full -w 100000 -c 500000 +command[check_pressure_io]=/usr/lib/nagios/plugins/check_pressure --io --full -w 100000 -c 500000 # Check HTTP "many". Use this to check many websites (http, https, ports, sockets and SSL certificates). # Beware! All checks must not take more than 10s! From b2e22413bca20cc7f6418f470f5eb00133b5c9e2 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Wed, 28 Feb 2024 15:40:39 +0100 Subject: [PATCH 12/19] autosysadmin-agent: upstream release 24.02.3 --- CHANGELOG.md | 3 + autosysadmin-agent/defaults/main.yml | 17 + .../files/autosysadmin.logrotate.conf | 0 .../files/autosysadmin.rsyslog.conf | 0 .../files/upstream/bin/delete_old_logs.sh | 25 + .../files/upstream/lib/common.sh | 898 ++++++++++++++++++ .../files/upstream/lib/repair.sh | 112 +++ .../files/upstream/lib/restart.sh | 76 ++ .../files/upstream/repair/repair_amavis | 16 + .../files/upstream/repair/repair_disk | 157 +++ .../upstream/repair/repair_elasticsearch | 35 + .../files/upstream/repair/repair_http | 131 +++ .../files/upstream/repair/repair_mysql | 69 ++ .../files/upstream/repair/repair_opendkim | 35 + .../files/upstream/repair/repair_php_fpm56 | 14 + .../files/upstream/repair/repair_php_fpm70 | 14 + .../files/upstream/repair/repair_php_fpm73 | 14 + .../files/upstream/repair/repair_php_fpm74 | 14 + .../files/upstream/repair/repair_php_fpm80 | 14 + .../files/upstream/repair/repair_php_fpm81 | 14 + .../files/upstream/repair/repair_php_fpm82 | 14 + .../files/upstream/repair/repair_php_fpm83 | 14 + .../files/upstream/repair/repair_redis | 32 + .../upstream/repair/repair_tomcat_instance | 33 +- .../repair/zzz-repair_example.template | 41 + .../files/upstream/restart/README | 19 + .../restart/zzz-restart_example.template | 120 +++ autosysadmin-agent/handlers/main.yml | 16 + autosysadmin-agent/tasks/crontab.yml | 25 + .../tasks/dependencies.yml | 0 autosysadmin-agent/tasks/install.yml | 108 +++ .../tasks/logrotate.yml | 4 +- autosysadmin-agent/tasks/main.yml | 31 + autosysadmin-agent/tasks/nrpe.yml | 9 + .../tasks/rsyslog.yml | 6 +- .../tasks/sudo.yml | 4 +- .../templates/autosysadmin.cf.j2 | 12 + .../templates/autosysadmin.cron.j2 | 7 + .../templates/autosysadmin.nrpe.cfg.j2 | 8 + .../templates/autosysadmin.sudoers.j2 | 7 + autosysadmin/defaults/main.yml | 22 - autosysadmin/files/scripts/functions.sh | 478 ---------- autosysadmin/files/scripts/repair_amavis.sh | 33 - autosysadmin/files/scripts/repair_disk.sh | 173 ---- .../files/scripts/repair_elasticsearch.sh | 57 -- autosysadmin/files/scripts/repair_http.sh | 141 --- autosysadmin/files/scripts/repair_mysql.sh | 71 -- autosysadmin/files/scripts/repair_opendkim.sh | 61 -- .../files/scripts/repair_php_fpm56.sh | 53 -- .../files/scripts/repair_php_fpm70.sh | 53 -- .../files/scripts/repair_php_fpm73.sh | 53 -- .../files/scripts/repair_php_fpm74.sh | 53 -- .../files/scripts/repair_php_fpm80.sh | 53 -- .../files/scripts/repair_php_fpm81.sh | 53 -- .../files/scripts/repair_php_fpm82.sh | 53 -- .../files/scripts/repair_php_fpm83.sh | 53 -- autosysadmin/files/scripts/repair_redis.sh | 58 -- autosysadmin/files/scripts/repair_template.sh | 63 -- autosysadmin/files/scripts/restart_amavis.sh | 35 - autosysadmin/handlers/main.yml | 16 - autosysadmin/tasks/autosysadmin_scripts.yml | 61 -- autosysadmin/tasks/main.yml | 37 - autosysadmin/tasks/nrpe.yml | 11 - autosysadmin/templates/autosysadmin.cf.j2 | 74 -- autosysadmin/templates/autosysadmin.cfg.j2 | 22 - autosysadmin/templates/sudoers.j2 | 21 - evolinux-base/tasks/main.yml | 2 +- 67 files changed, 2133 insertions(+), 1895 deletions(-) create mode 100644 autosysadmin-agent/defaults/main.yml rename autosysadmin/files/logrotate_autosysadmin.conf => autosysadmin-agent/files/autosysadmin.logrotate.conf (100%) rename autosysadmin/files/rsyslog_autosysadmin.conf => autosysadmin-agent/files/autosysadmin.rsyslog.conf (100%) create mode 100644 autosysadmin-agent/files/upstream/bin/delete_old_logs.sh create mode 100755 autosysadmin-agent/files/upstream/lib/common.sh create mode 100644 autosysadmin-agent/files/upstream/lib/repair.sh create mode 100644 autosysadmin-agent/files/upstream/lib/restart.sh create mode 100755 autosysadmin-agent/files/upstream/repair/repair_amavis create mode 100755 autosysadmin-agent/files/upstream/repair/repair_disk create mode 100755 autosysadmin-agent/files/upstream/repair/repair_elasticsearch create mode 100755 autosysadmin-agent/files/upstream/repair/repair_http create mode 100755 autosysadmin-agent/files/upstream/repair/repair_mysql create mode 100755 autosysadmin-agent/files/upstream/repair/repair_opendkim create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm56 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm70 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm73 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm74 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm80 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm81 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm82 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_php_fpm83 create mode 100755 autosysadmin-agent/files/upstream/repair/repair_redis rename autosysadmin/files/scripts/repair_tomcat_instance.sh => autosysadmin-agent/files/upstream/repair/repair_tomcat_instance (51%) mode change 100644 => 100755 create mode 100755 autosysadmin-agent/files/upstream/repair/zzz-repair_example.template create mode 100644 autosysadmin-agent/files/upstream/restart/README create mode 100644 autosysadmin-agent/files/upstream/restart/zzz-restart_example.template create mode 100644 autosysadmin-agent/handlers/main.yml create mode 100644 autosysadmin-agent/tasks/crontab.yml rename {autosysadmin => autosysadmin-agent}/tasks/dependencies.yml (100%) create mode 100644 autosysadmin-agent/tasks/install.yml rename {autosysadmin => autosysadmin-agent}/tasks/logrotate.yml (70%) create mode 100644 autosysadmin-agent/tasks/main.yml create mode 100644 autosysadmin-agent/tasks/nrpe.yml rename {autosysadmin => autosysadmin-agent}/tasks/rsyslog.yml (64%) rename {autosysadmin => autosysadmin-agent}/tasks/sudo.yml (76%) create mode 100644 autosysadmin-agent/templates/autosysadmin.cf.j2 create mode 100644 autosysadmin-agent/templates/autosysadmin.cron.j2 create mode 100644 autosysadmin-agent/templates/autosysadmin.nrpe.cfg.j2 create mode 100644 autosysadmin-agent/templates/autosysadmin.sudoers.j2 delete mode 100644 autosysadmin/defaults/main.yml delete mode 100644 autosysadmin/files/scripts/functions.sh delete mode 100644 autosysadmin/files/scripts/repair_amavis.sh delete mode 100644 autosysadmin/files/scripts/repair_disk.sh delete mode 100644 autosysadmin/files/scripts/repair_elasticsearch.sh delete mode 100644 autosysadmin/files/scripts/repair_http.sh delete mode 100644 autosysadmin/files/scripts/repair_mysql.sh delete mode 100644 autosysadmin/files/scripts/repair_opendkim.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm56.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm70.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm73.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm74.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm80.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm81.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm82.sh delete mode 100644 autosysadmin/files/scripts/repair_php_fpm83.sh delete mode 100644 autosysadmin/files/scripts/repair_redis.sh delete mode 100644 autosysadmin/files/scripts/repair_template.sh delete mode 100644 autosysadmin/files/scripts/restart_amavis.sh delete mode 100644 autosysadmin/handlers/main.yml delete mode 100644 autosysadmin/tasks/autosysadmin_scripts.yml delete mode 100644 autosysadmin/tasks/main.yml delete mode 100644 autosysadmin/tasks/nrpe.yml delete mode 100644 autosysadmin/templates/autosysadmin.cf.j2 delete mode 100644 autosysadmin/templates/autosysadmin.cfg.j2 delete mode 100644 autosysadmin/templates/sudoers.j2 diff --git a/CHANGELOG.md b/CHANGELOG.md index ac25f844..ab28a964 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added +* autosysadmin-agent: upstream release 24.02.3 * certbot: Renewal hook for NRPE * kvm-host: add minifirewall rules if DRBD interface is configured @@ -27,6 +28,8 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Removed +* autosysadmin: replaced by autosysadmin-agent + ### Security ## [24.02] 2024-02-08 diff --git a/autosysadmin-agent/defaults/main.yml b/autosysadmin-agent/defaults/main.yml new file mode 100644 index 00000000..340ec0a1 --- /dev/null +++ b/autosysadmin-agent/defaults/main.yml @@ -0,0 +1,17 @@ +--- + +general_scripts_dir: "/usr/share/scripts" + +autosysadmin_agent_bin_dir: "/usr/local/bin/autosysadmin" +autosysadmin_agent_lib_dir: "/usr/local/lib/autosysadmin" +autosysadmin_agent_auto_dir: "{{ general_scripts_dir }}/autosysadmin/auto" + +autosysadmin_agent_crontab_enabled: true +autosysadmin_agent_log_retention_days: 365 + +autosysadmin_config: [] + ### All repair are disabled if set to 'off' + ### even if a specific repair value is 'on' + # repair_all: 'on' + ### Default values for checks + # repair_foo: 'off' diff --git a/autosysadmin/files/logrotate_autosysadmin.conf b/autosysadmin-agent/files/autosysadmin.logrotate.conf similarity index 100% rename from autosysadmin/files/logrotate_autosysadmin.conf rename to autosysadmin-agent/files/autosysadmin.logrotate.conf diff --git a/autosysadmin/files/rsyslog_autosysadmin.conf b/autosysadmin-agent/files/autosysadmin.rsyslog.conf similarity index 100% rename from autosysadmin/files/rsyslog_autosysadmin.conf rename to autosysadmin-agent/files/autosysadmin.rsyslog.conf diff --git a/autosysadmin-agent/files/upstream/bin/delete_old_logs.sh b/autosysadmin-agent/files/upstream/bin/delete_old_logs.sh new file mode 100644 index 00000000..a39d9efe --- /dev/null +++ b/autosysadmin-agent/files/upstream/bin/delete_old_logs.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +days=${1:-365} +log_dir="/var/log/autosysadmin/" + +if [ -d "${log_dir}" ]; then + find_run_dirs() { + find "${log_dir}" \ + -mindepth 1 \ + -maxdepth 1 \ + -type d \ + -ctime "+${days}" \ + -print0 + } + log() { + /usr/bin/logger -p local0.notice -t autosysadmin "${1}" + } + + while IFS= read -r -d '' run_dir; do + rm --recursive --force "${run_dir}" + log "Delete ${run_dir} (older than ${days} days)" + done < <(find_run_dirs) +fi + +exit 0 diff --git a/autosysadmin-agent/files/upstream/lib/common.sh b/autosysadmin-agent/files/upstream/lib/common.sh new file mode 100755 index 00000000..f3c16359 --- /dev/null +++ b/autosysadmin-agent/files/upstream/lib/common.sh @@ -0,0 +1,898 @@ +#!/bin/bash + +VERSION="24.02.3" + +# Common functions for "repair" and "restart" scripts + +set -u + +# Initializes the program, context, configuration… +initialize() { + PATH="${PATH}":/usr/sbin:/sbin + + # Used in many places to refer to the program name. + # Examples: repair_mysql, restart_nrpe… + PROGNAME=$(basename "${0}") + + # find out if running in interactive mode, or not + if [ -t 0 ]; then + INTERACTIVE=1 + else + INTERACTIVE=0 + fi + readonly INTERACTIVE + + # Default empty value for Debug mode + DEBUG="${DEBUG:-""}" + + # Repair scripts obey to the value of a variable named after the script + # You can set the value ("on" or "off") in /etc/evolinux/autosysadmin + # Here we set the default value to "on". + declare -g "${PROGNAME}"=on # dynamic variable assignment ($PROGNAME == repair_*) + + PID=$$ + readonly PID + + # Each execution (run) gets a unique ID + RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${PROGNAME}_${PID}" + readonly RUN_ID + + # Each execution store some information + # in a unique directory based on the RUN_ID + LOG_DIR="/var/log/autosysadmin/${RUN_ID}" + readonly LOG_DIR + mkdir -p "${LOG_DIR}" + + # This log file contains all events + LOG_FILE="${LOG_DIR}/autosysadmin.log" + readonly LOG_FILE + + # This log file contains notable actions + ACTIONS_FILE="${LOG_DIR}/actions.log" + readonly ACTIONS_FILE + touch "${ACTIONS_FILE}" + # This log file contains abort reasons (if any) + ABORT_FILE="${LOG_DIR}/abort.log" + readonly ABORT_FILE + # touch "${ABORT_FILE}" + + # Date format for log messages + DATE_FORMAT="%Y-%m-%d %H:%M:%S" + + # This will contain lock, last-run markers… + # It's ok to lose the content after a reboot + RUN_DIR="/run/autosysadmin" + readonly RUN_DIR + mkdir -p "${RUN_DIR}" + + # Only a singe instace of each script can run simultaneously + # We use a customizable lock name for this. + # By default it's the script's name + LOCK_NAME=${LOCK_NAME:-${PROGNAME}} + # If a lock is found, we can wait for it to disappear. + # The value must be understood by sleep(1) + LOCK_WAIT="0" + + # Default values for email headers + EMAIL_FROM="equipe+autosysadmin@evolix.fr" + EMAIL_INTERNAL="autosysadmin@evolix.fr" + + LOCK_FILE="${RUN_DIR}/${LOCK_NAME}.lock" + readonly LOCK_FILE + # Remove lock file at exit + cleanup() { + # shellcheck disable=SC2317 + rm -f "${LOCK_FILE}" + } + trap 'cleanup' 0 + + # Load configuration + # shellcheck disable=SC1091 + test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin + + log_all "Begin ${PROGNAME} RUN_ID: ${RUN_ID}" + log_all "Log directory is ${LOG_DIR}" +} + +# Executes a list of tasks before exiting: +# * prepare a summary of actions and possible abort reasons +# * send emails +# * do some cleanup +quit() { + log_all "End ${PROGNAME} RUN_ID: ${RUN_ID}" + + summary="RUN_ID: ${RUN_ID}" + if [ -s "${ABORT_FILE}" ]; then + # Add abort reasons to summary + summary="${summary}\n$(print_abort_reasons)" + hook_mail "abort" + + return_code=1 + else + if [ -s "${ACTIONS_FILE}" ]; then + # Add notable actions to summary + summary="${summary}\n$(print_actions "Aucune action")" + hook_mail "success" + fi + + return_code=0 + fi + + hook_mail "internal" + + if is_interactive; then + # shellcheck disable=SC2001 + echo "${summary}" | sed -e 's/\\n/\n/g' + else + /usr/share/scripts/evomaintenance.sh --auto --user autosysadmin --message "${summary}" --no-commit --no-mail + fi + + teardown + + # shellcheck disable=SC2086 + exit ${return_code} +} + +teardown() { + : +} + +# Return true/false +is_interactive() { + test "${INTERACTIVE}" -eq "1" +} + +save_server_state() { + DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)" + + if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then + log_all "Warning: dump-server-state is not present. No server state recorded." + fi + + if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then + DUMP_DIR=$(file_path_in_log_dir "server-state") + # We don't want the logging to take too much time, + # so we kill it if it takes more than 20 seconds. + timeout --signal 9 20 \ + "${DUMP_SERVER_STATE_BIN}" \ + --dump-dir="${DUMP_DIR}" \ + --df \ + --dmesg \ + --iptables \ + --lxc \ + --netcfg \ + --netstat \ + --uname \ + --processes \ + --systemctl \ + --uptime \ + --virsh \ + --disks \ + --mysql-processes \ + --no-apt-states \ + --no-apt-config \ + --no-dpkg-full \ + --no-dpkg-status \ + --no-mount \ + --no-packages \ + --no-sysctl \ + --no-etc + + log_run "Server state saved in \`server-state' directory." + fi +} + +is_debug() { + # first time: do the check… + # other times: pass + if [ -z "${DEBUG:-""}" ]; then + debug_file="/etc/evolinux/autosysadmin.debug" + + if [ -e "${debug_file}" ]; then + last_change=$(stat -c %Z "${debug_file}") + limit_date=$(date --date "14400 seconds ago" +"%s") + + if [ $(( last_change - limit_date )) -le "0" ]; then + log_run "Debug mode disabled; file is too old (%{last_change} seconds)." + rm "${debug_file}" + # Debug mode disabled + DEBUG="0" + else + log_run "Debug mode enabled." + # Debug mode enabled + DEBUG="1" + fi + else + # log_run "Debug mode disabled; file is absent." + # Debug mode disabled + DEBUG="0" + fi + fi + # return the value + test "${DEBUG}" -eq "1" +} + +# Uses the who(1) definition of "active" +currently_active_users() { + LC_ALL=C who --users | grep --extended-regexp "\s+\.\s+" | awk '{print $1}' | sort --human-numeric-sort | uniq +} +# Users active in the last 29 minutes +recently_active_users() { + LC_ALL=C who --users | grep --extended-regexp "\s+00:(0|1|2)[0-9]\s+" | awk --field-separator ' ' '{print $1,$6}' +} +# Save the list of users to a file in the log directory +save_active_users() { + LC_ALL=C who --users | save_in_log_dir "who-users" +} + +# An autosysadmin must not perform actions if a user is active or was active recently. +# +# This can by bypassed in interactive mode. +# It's OK to lose this data after a reboot. +ensure_no_active_users_or_exit() { + # Save all active users + save_active_users + + if is_debug; then + log_run "Debug mode enabled: continue without checking active users." + return 0; + fi + + # Is there any currently active user? + currently_active_users=$(currently_active_users) + if [ -n "${currently_active_users}" ]; then + # shellcheck disable=SC2001 + users_oneliner=$(echo "${currently_active_users}" | sed -e 's/\n/ /') + log_run "Currently active users: ${users_oneliner}" + if is_interactive; then + echo "Some users are currently active:" + # shellcheck disable=SC2001 + echo "${currently_active_users}" | sed -e 's/\(.\+\)/* \1/' + answer="" + while :; do + printf "> Continue? [Y,n,?] " + read -r answer + case ${answer} in + [Yy]|"" ) + log_run "Active users check bypassed manually in interactive mode." + return + ;; + [Nn] ) + log_run "Active users check confirmed manually in interactive mode." + log_abort_and_quit "Active users detected: ${users_oneliner}" + ;; + * ) + printf "y - yes, continue\n" + printf "n - no, exit\n" + printf "? - print this help\n" + ;; + esac + done + else + log_abort_and_quit "Currently active users detected: ${users_oneliner}." + fi + else + # or recently (the last 30 minutes) active user? + recently_active_users=$(recently_active_users) + if [ -n "${recently_active_users}" ]; then + # shellcheck disable=SC2001 + users_oneliner=$(echo "${recently_active_users}" | sed -e 's/\n/ /') + log_run "Recently active users: ${users_oneliner}" + if is_interactive; then + echo "Some users were recently active:" + # shellcheck disable=SC2001 + echo "${recently_active_users}" | sed -e 's/\(.\+\)/* \1/' + answer="" + while :; do + printf "> Continue? [Y,n,?] " + read -r answer + case ${answer} in + [Yy]|"" ) + log_run "Active users check bypassed manually in interactive mode." + return + ;; + [Nn] ) + log_run "Active users check confirmed manually in interactive mode." + log_abort_and_quit "Recently active users detected: ${users_oneliner}." + ;; + * ) + printf "y - yes, continue\n" + printf "n - no, exit\n" + printf "? - print this help\n" + ;; + esac + done + else + log_abort_and_quit "Recently active users detected: ${users_oneliner}." + fi + fi + fi +} + +# Takes an NRPE command name as 1st parameter, +# and executes the full command if found in the configuration. +# Return the result and the return code of the command. +check_nrpe() { + check="$1" + + nrpe_files="" + + # Check if NRPE config is found + if [ -f "/etc/nagios/nrpe.cfg" ]; then + nrpe_files="${nrpe_files} /etc/nagios/nrpe.cfg" + else + msg="NRPE configuration not found: /etc/nagios/nrpe.cfg" + log_run "${msg}" + echo "${msg}" + return 3 + fi + + # Search for included files + # shellcheck disable=SC2086 + while IFS= read -r include_file; do + nrpe_files="${nrpe_files} ${include_file}" + done < <(grep --extended-regexp '^\s*include=.+' ${nrpe_files} | cut -d = -f 2) + + # Search for files in included directories + # shellcheck disable=SC2086 + while IFS= read -r include_dir; do + nrpe_files="${nrpe_files} ${include_dir}/*.cfg" + done < <(grep --extended-regexp '^\s*include_dir=.+' ${nrpe_files} | cut -d = -f 2) + + # Fetch uncommented commands in (sorted) config files + # shellcheck disable=SC2086 + nrpe_commands=$(grep --no-filename --exclude=*~ --fixed-strings "[${check}]" ${nrpe_files} | grep --invert-match --extended-regexp '^\s*#\s*command' | cut -d = -f 2) + nrpe_commands_count=$(echo "${nrpe_commands}" | wc -l) + + if is_debian_version "9" "<=" && [ "${nrpe_commands_count}" -gt "1" ]; then + # On Debian <= 9, NRPE loading was not sorted + # we need to raise an error if we have multiple defined commands + msg="Unable to determine which NRPE command to run" + log_run "${msg}" + echo "${msg}" + return 3 + else + # On Debian > 9, use the last command + nrpe_command=$(echo "${nrpe_commands}" | tail -n 1) + + nrpe_result=$(${nrpe_command}) + nrpe_rc=$? + + log_run "NRPE command (exited with ${nrpe_rc}): ${nrpe_command}" + log_run "${nrpe_result}" + + echo "${nrpe_result}" + return "${nrpe_rc}" + fi +} + +# An autosysadmin script must not run twice (or more) simultaneously. +# We use a customizable (with LOCK_NAME) lock file to keep track of this. +# A wait time can be configured. +# +# This can by bypassed in interactive mode. +# It's OK to lose this data after a reboot. +acquire_lock_or_exit() { + lock_file="${1:-${LOCK_FILE}}" + lock_wait="${2:-${LOCK_WAIT}}" + + # lock_wait must be compatible with sleep(1), otherwise fallback to 0 + if ! echo "${lock_wait}" | grep -Eq '^[0-9]+[smhd]?$'; then + log_run "Lock wait: incorrect value '${lock_wait}', fallback to 0." + lock_wait=0 + fi + + if [ "${lock_wait}" != "0" ] && [ -f "${lock_file}" ]; then + log_run "Lock file present. Let's wait ${lock_wait} and check again." + sleep "${lock_wait}" + fi + + if [ -f "${lock_file}" ]; then + log_abort_and_quit "Lock file still present." + else + log_run "Lock file absent. Let's put one." + touch "${lock_file}" + fi +} + +# If a script has been run in the ast 30 minutes, running it again won't fix the issue. +# We use a /run/ausosysadmin/${PROGNAME}_lastrun file to keep track of this. +# +# This can by bypassed in interactive mode. +# This is bypassed in debug mode. +# It's OK to lose this data after a reboot. +ensure_not_too_soon_or_exit() { + if is_debug; then + log_run "Debug mode enabled: continue without checking when was the last run." + return 0; + fi + + lastrun_file="${RUN_DIR}/${PROGNAME}_lastrun" + if [ -f "${lastrun_file}" ]; then + lastrun_age="$(($(date +%s)-$(stat -c "%Y" "${lastrun_file}")))" + log_run "Last run was ${lastrun_age} seconds ago." + if [ "${lastrun_age}" -lt 1800 ]; then + if is_interactive; then + echo "${PROGNAME} was run ${lastrun_age} seconds ago." + answer="" + while :; do + printf "> Continue? [Y,n,?] " + read -r answer + case ${answer} in + [Yy]|"" ) + log_run "Last run check bypassed manually in interactive mode." + break + ;; + [Nn] ) + log_run "Last run check confirmed manually in interactive mode." + log_abort_and_quit 'Last run too recent.' + ;; + * ) + printf "y - yes, continue\n" + printf "n - no, exit\n" + printf "? - print this help\n" + ;; + esac + done + else + log_abort_and_quit "Last run too recent." + fi + fi + fi + touch "${lastrun_file}" +} + +# Populate DEBIAN_VERSION and DEBIAN_RELEASE variables +# based on gathered information about the operating system +detect_os() { + DEBIAN_RELEASE="unknown" + DEBIAN_VERSION="unknown" + LSB_RELEASE_BIN="$(command -v lsb_release)" + + if [ -e /etc/debian_version ]; then + DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)" + if [ -x "${LSB_RELEASE_BIN}" ]; then + DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)" + else + case "${DEBIAN_VERSION}" in + 7) DEBIAN_RELEASE="wheezy" ;; + 8) DEBIAN_RELEASE="jessie" ;; + 9) DEBIAN_RELEASE="stretch" ;; + 10) DEBIAN_RELEASE="buster" ;; + 11) DEBIAN_RELEASE="bullseye" ;; + 12) DEBIAN_RELEASE="bookworm" ;; + 13) DEBIAN_RELEASE="trixie" ;; + esac + fi + # log_run "Detected OS: Debian version=${DEBIAN_VERSION} release=${DEBIAN_RELEASE}" + # else + # log_run "Detected OS: unknown (missing /etc/debian_version)" + fi +} + +is_debian_wheezy() { + test "${DEBIAN_RELEASE}" = "wheezy" +} +is_debian_jessie() { + test "${DEBIAN_RELEASE}" = "jessie" +} +is_debian_stretch() { + test "${DEBIAN_RELEASE}" = "stretch" +} +is_debian_buster() { + test "${DEBIAN_RELEASE}" = "buster" +} +is_debian_bullseye() { + test "${DEBIAN_RELEASE}" = "bullseye" +} +is_debian_bookworm() { + test "${DEBIAN_RELEASE}" = "bookworm" +} +is_debian_trixie() { + test "${DEBIAN_RELEASE}" = "trixie" +} +is_debian_version() { + local version=$1 + local relation=${2:-"eq"} + + if [ -z "${DEBIAN_VERSION:-""}" ]; then + detect_os + fi + + dpkg --compare-versions "${DEBIAN_VERSION}" "${relation}" "${version}" +} + +# List systemd services (only names), even if stopped +systemd_list_services() { + pattern=$1 + + systemctl list-units --all --no-legend --type=service "${pattern}" | grep --only-matching --extended-regexp '\S+\.service' +} + +is_systemd_enabled() { + systemctl --quiet is-enabled "$1" 2> /dev/null +} + +is_systemd_active() { + systemctl --quiet is-active "$1" 2> /dev/null +} + +is_sysvinit_enabled() { + find /etc/rc2.d/ -name "$1" > /dev/null +} + +get_fqdn() { + # shellcheck disable=SC2155 + local system=$(uname -s) + + if [ "${system}" = "Linux" ]; then + hostname --fqdn + elif [ "${system}" = "OpenBSD" ]; then + hostname + else + log_abort_and_quit "System '${system}' not recognized." + fi +} + +get_complete_hostname() { + REAL_HOSTNAME="$(get_fqdn)" + if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then + echo "${HOSTNAME}" + else + echo "${HOSTNAME} (${REAL_HOSTNAME})" + fi +} +# Fetch values from evomaintenance configuration +get_evomaintenance_mail() { + grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2 +} +get_evomaintenance_emergency_mail() { + grep "URGENCYFROM=" /etc/evomaintenance.cf | cut -d '=' -f2 +} +get_evomaintenance_emergency_tel() { + grep "URGENCYTEL=" /etc/evomaintenance.cf | cut -d '=' -f2 +} + +# Log a message to the log file in the log directory +log_run() { + local msg="${1:-$(cat /dev/stdin)}" + # shellcheck disable=SC2155 + local date=$(/bin/date +"${DATE_FORMAT}") + + printf "[%s] %s[%s]: %s\\n" \ + "${date}" "${PROGNAME}" "${PID}" "${msg}" \ + >> "${LOG_FILE}" +} +# Log a message in the system log file (syslog or journald) +log_global() { + local msg="${1:-$(cat /dev/stdin)}" + + echo "${msg}" \ + | /usr/bin/logger -p local0.notice -t autosysadmin +} +# Log a message in both places +log_all() { + local msg="${1:-$(cat /dev/stdin)}" + + log_global "${msg}" + log_run "${msg}" +} +# Log a notable action in regular places +# and append it to the dedicated list +log_action() { + log_all "$*" + append_action "$*" +} +# Append a line in the actions.log file in the log directory +append_action() { + echo "$*" >> "${ACTIONS_FILE}" +} +# Print the content of the actions.log file +# or a fallback content (1st parameter) if empty +# shellcheck disable=SC2120 +print_actions() { + local fallback=${1:-""} + if [ -s "${ACTIONS_FILE}" ]; then + cat "${ACTIONS_FILE}" + elif [ -n "${fallback}" ]; then + echo "${fallback}" + fi +} + +# Log a an abort reason in regular places +# and append it to the dedicated list +log_abort() { + log_all "$*" + append_abort_reason "$*" +} +# Append a line in the abort.log file in the log directory +append_abort_reason() { + echo "$*" >> "${ABORT_FILE}" +} +# Print the content of the abort.log file +# or a fallback content (1st parameter) if empty +# shellcheck disable=SC2120 +print_abort_reasons() { + local fallback=${1:-""} + if [ -s "${ABORT_FILE}" ]; then + cat "${ABORT_FILE}" + elif [ -n "${fallback}" ]; then + echo "${fallback}" + fi +} +# Print the content of the main log from the log directory +print_main_log() { + cat "${LOG_FILE}" +} +# Log an abort reason and quit the script +log_abort_and_quit() { + log_abort "$*" + quit +} + +# Store the content from standard inpu +# into a file in the log directory named after the 1st parameter +save_in_log_dir() { + local file_name=$1 + local file_path="${LOG_DIR}/${file_name}" + + cat /dev/stdin > "${file_path}" + + log_run "Saved \`${file_name}' file." +} +# Return the full path of the file in log directory +# based on the name in the 1st parameter +file_path_in_log_dir() { + echo "${LOG_DIR}/${1}" +} + +format_mail_success() { + cat < +Content-Type: text/plain; charset=UTF-8 +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +X-Script: ${PROGNAME} +X-RunId: ${RUN_ID} +To: ${EMAIL_CLIENT:-alert5@evolix.fr} +Cc: ${EMAIL_INTERNAL} +Subject: [autosysadmin] Intervention automatisée sur ${HOSTNAME_TEXT} + +Bonjour, + +Une intervention automatisée vient de se terminer. + +Nom du serveur : ${HOSTNAME_TEXT} +Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) +Script déclenché : ${PROGNAME} + +### Actions réalisées + +$(print_actions "Aucune") + +### Réagir à cette intervention + +Vous pouvez répondre à ce message (${EMAIL_FROM}). + +En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL} +ou notre ligne d'astreinte (${EMERGENCY_TEL}) + +-- +Votre AutoSysadmin +EOTEMPLATE +} + +format_mail_abort() { + cat < +Content-Type: text/plain; charset=UTF-8 +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +X-Script: ${PROGNAME} +X-RunId: ${RUN_ID} +To: ${EMAIL_CLIENT:-alert5@evolix.fr} +Cc: ${EMAIL_INTERNAL} +Subject: [autosysadmin] Intervention automatisée interrompue sur ${HOSTNAME_TEXT} + +Bonjour, + +Une intervention automatisée a été déclenchée mais s'est interrompue. + +Nom du serveur : ${HOSTNAME_TEXT} +Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) +Script déclenché : ${PROGNAME} + +### Actions réalisées + +$(print_actions "Aucune") + +### Raison(s) de l'interruption + +$(print_abort_reasons "Inconnue") + +### Réagir à cette intervention + +Vous pouvez répondre à ce message (${EMAIL_FROM}). + +En cas d'urgence, utilisez l'adresse ${EMERGENCY_MAIL} +ou notre ligne d'astreinte (${EMERGENCY_TEL}) + +-- +Votre AutoSysadmin +EOTEMPLATE +} + +# shellcheck disable=SC2028 +print_report_information() { + echo "**Uptime**" + echo "" + uptime + + echo "" + echo "**Utilisateurs récents**" + echo "" + who_file=$(file_path_in_log_dir "server-state/df.txt") + if [ -s "${who_file}" ]; then + cat "${who_file}" + else + who --users + fi + + echo "" + echo "**Espace disque**" + echo "" + df_file=$(file_path_in_log_dir "server-state/df.txt") + if [ -s "${df_file}" ]; then + cat "${df_file}" + else + df -h + fi + + echo "" + echo "**Dmesg**" + echo "" + dmesg_file=$(file_path_in_log_dir "server-state/dmesg.txt") + if [ -s "${dmesg_file}" ]; then + tail -n 5 "${dmesg_file}" + else + dmesg | tail -n 5 + fi + + echo "" + echo "**systemd failed services**" + echo "" + failed_services_file=$(file_path_in_log_dir "server-state/systemctl-failed-services.txt") + if [ -s "${failed_services_file}" ]; then + cat "${failed_services_file}" + else + systemctl --no-legend --state=failed --type=service + fi + + if command -v lxc-ls > /dev/null 2>&1; then + echo "" + echo "**LXC containers**" + echo "" + lxc_ls_file=$(file_path_in_log_dir "server-state/lxc-list.txt") + if [ -s "${lxc_ls_file}" ]; then + cat "${lxc_ls_file}" + else + lxc-ls --fancy + fi + fi + + apache_errors_file=$(file_path_in_log_dir "apache-errors.log") + if [ -f "${apache_errors_file}" ]; then + echo "" + echo "**Apache errors**" + echo "" + cat "${apache_errors_file}" + fi + + nginx_errors_file=$(file_path_in_log_dir "nginx-errors.log") + if [ -f "${nginx_errors_file}" ]; then + echo "" + echo "**Nginx errors**" + echo "" + cat "${nginx_errors_file}" + fi +} + +format_mail_internal() { + cat < +Content-Type: text/plain; charset=UTF-8 +MIME-Version: 1.0 +Content-Transfer-Encoding: 8bit +X-Script: ${PROGNAME} +X-RunId: ${RUN_ID} +To: ${EMAIL_INTERNAL} +Subject: [autosysadmin] Rapport interne d'intervention sur ${HOSTNAME_TEXT} + +Bonjour, + +Une intervention automatique vient de se terminer. + +Nom du serveur : ${HOSTNAME_TEXT} +Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) +Script déclenché : ${PROGNAME} + +### Actions réalisées + +$(print_actions "Aucune") + +### Raison(s) de l'interruption + +$(print_abort_reasons "Aucune") + +### Log autosysadmin + +$(print_main_log) + +### Informations additionnelles + +$(print_report_information) + +-- +Votre AutoSysadmin +EOTEMPLATE +} + +# Generic function to send emails at the end of the script. +# Takes a template as 1st parameter +hook_mail() { + if is_debug; then + log_run "Debug mode enabled: continue without sending mail." + return 0; + fi + + HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}" + HOSTNAME_TEXT="$(get_complete_hostname)" + EMAIL_CLIENT="$(get_evomaintenance_mail)" + EMERGENCY_MAIL="$(get_evomaintenance_emergency_mail)" + EMERGENCY_TEL="$(get_evomaintenance_emergency_tel)" + + MAIL_CONTENT="$(format_mail_"$1")" + + SENDMAIL_BIN="$(command -v sendmail)" + + if [ -z "${SENDMAIL_BIN}" ]; then + log_global "ERROR: No \`sendmail' command has been found, can't send mail." + fi + if [ -x "${SENDMAIL_BIN}" ]; then + echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.fr" + log_global "Sent '$1' mail for RUN_ID: ${RUN_ID}" + fi +} + +is_holiday() { + # gcal mark today as a holiday by surrounding with < and > the day + # of the month of that holiday line. For example if today is 2022-05-01 we'll + # get among other lines: + # Fête du Travail (FR) + Di, < 1>Mai 2022 + # Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours + LANGUAGE=fr_FR.UTF-8 TZ=Europe/Paris gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet +} + +is_weekend() { + day_of_week=$(date +%u) + if [ "${day_of_week}" != 6 ] && [ "${day_of_week}" != 7 ]; then + return 1 + fi +} + +is_workday() { + if is_holiday || is_weekend; then + return 1 + fi +} + +is_worktime() { + if ! is_workday; then + return 1 + fi + + hour=$(date +%H) + if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then + return 1 + fi +} diff --git a/autosysadmin-agent/files/upstream/lib/repair.sh b/autosysadmin-agent/files/upstream/lib/repair.sh new file mode 100644 index 00000000..ddd243b5 --- /dev/null +++ b/autosysadmin-agent/files/upstream/lib/repair.sh @@ -0,0 +1,112 @@ +#!/bin/bash + +# Specific functions for "repair" scripts + +is_all_repair_disabled() { + # Fetch values from the config + # and if it is not defined or has no value, then assign "on" + + local status=${repair_all:=on} + + + test "${status}" = "off" || test "${status}" = "0" +} + +is_current_repair_disabled() { + # Fetch values from the config + # and if it is not defined or has no value, then assign "on" + + local status=${!PROGNAME:=on} + + test "${status}" = "off" || test "${status}" = "0" +} + +ensure_not_disabled_or_exit() { + if is_all_repair_disabled; then + log_global 'All repair scripts are disabled.' + exit 0 + fi + if is_current_repair_disabled; then + log_global "Current repair script (${PROGNAME}) is disabled." + exit 0 + fi +} + +# Set of actions to do at the begining of a "repair" script +pre_repair() { + initialize + + # Are we supposed to run? + ensure_not_disabled_or_exit + + # Has it recently been run? + ensure_not_too_soon_or_exit + + # Can we acquire a lock? + acquire_lock_or_exit + + # Is there any active user? + ensure_no_active_users_or_exit + + # Save important information + save_server_state +} + +# Set of actions to do at the end of a "repair" script +post_repair() { + quit +} + +repair_lxc_php() { + container_name=$1 + + if is_systemd_enabled 'lxc.service'; then + lxc_path=$(lxc-config lxc.lxcpath) + if lxc-info --name "${container_name}" > /dev/null; then + rootfs="${lxc_path}/${container_name}/rootfs" + case "${container_name}" in + php56) fpm_log_file="${rootfs}/var/log/php5-fpm.log" ;; + php70) fpm_log_file="${rootfs}/var/log/php7.0-fpm.log" ;; + php73) fpm_log_file="${rootfs}/var/log/php7.3-fpm.log" ;; + php74) fpm_log_file="${rootfs}/var/log/php7.4-fpm.log" ;; + php80) fpm_log_file="${rootfs}/var/log/php8.0-fpm.log" ;; + php81) fpm_log_file="${rootfs}/var/log/php8.1-fpm.log" ;; + php82) fpm_log_file="${rootfs}/var/log/php8.2-fpm.log" ;; + php83) fpm_log_file="${rootfs}/var/log/php8.3-fpm.log" ;; + *) + log_abort_and_quit "Unknown container '${container_name}'" + ;; + esac + + # Determine FPM Pool path + php_path_pool=$(find "${lxc_path}/${container_name}/" -type d -name "pool.d") + + # Save LXC info (before restart) + lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.before.status" + # Save last lines of FPM log (before restart) + tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.before.log/')" + # Save NRPE check (before restart) + /usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.before.out" + + lxc-stop --timeout 20 --name "${container_name}" + lxc-start --daemon --name "${container_name}" + rc=$? + if [ "${rc}" -eq "0" ]; then + log_all "Restart LXC container '${container_name}: OK" + else + log_all "Restart LXC container '${container_name}: failed" + fi + + # Save LXC info (after restart) + lxc-info --name "${container_name}" | save_in_log_dir "lxc-${container_name}.after.status" + # Save last lines of FPM log (after restart) + tail "${fpm_log_file}" | save_in_log_dir "$(basename "${fpm_log_file}" | sed -e 's/.log/.after.log/')" + # Save NRPE check (after restart) + /usr/local/lib/nagios/plugins/check_phpfpm_multi "${php_path_pool}" | save_in_log_dir "check_fpm_${container_name}.after.out" + else + log_abort_and_quit "LXC container '${container_name}' doesn't exist." + fi + else + log_abort_and_quit 'LXC not found.' + fi +} diff --git a/autosysadmin-agent/files/upstream/lib/restart.sh b/autosysadmin-agent/files/upstream/lib/restart.sh new file mode 100644 index 00000000..78be5bb0 --- /dev/null +++ b/autosysadmin-agent/files/upstream/lib/restart.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +# Specific functions for "restart" scripts + +running_custom() { + # Placeholder that returns 1, to prevent running if not redefined + log_global "running_custom() function has not been redefined! Let's quit." + return 1 +} + +# Examine RUNNING variable and decide if the script should run or not +is_supposed_to_run() { + if is_debug; then return 0; fi + + case ${RUNNING} in + never) + # log_global "is_supposed_to_run: no (never)" + return 1 + ;; + always) + # log_global "is_supposed_to_run: yes (always)" + return 0 + ;; + nwh-fr) + ! is_worktime + rc=$? + # if [ ${rc} -eq 0 ]; then + # log_global "is_supposed_to_run: yes (nwh-fr returned ${rc})" + # else + # log_global "is_supposed_to_run: no (nwh-fr returned ${rc})" + # fi + return ${rc} + ;; + nwh-ca) + # Not implemented yet + return 0 + ;; + custom) + running_custom + rc=$? + # if [ ${rc} -eq 0 ]; then + # log_global "is_supposed_to_run: yes (custom returned ${rc})" + # else + # log_global "is_supposed_to_run: no (custom returned ${rc})" + # fi + return ${rc} + ;; + esac +} + +ensure_supposed_to_run_or_exit() { + if ! is_supposed_to_run; then + # simply quit (no logging, no notifications…) + # log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})." + exit 0 + fi +} + +# Set of actions to do at the begining of a "restart" script +pre_restart() { + initialize + + # Has it recently been run? + ensure_not_too_soon_or_exit + + # Can we acquire a lock? + acquire_lock_or_exit + + # Save important information + save_server_state +} + +# Set of actions to do at the end of a "restart" script +post_restart() { + quit +} diff --git a/autosysadmin-agent/files/upstream/repair/repair_amavis b/autosysadmin-agent/files/upstream/repair/repair_amavis new file mode 100755 index 00000000..5963dc00 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_amavis @@ -0,0 +1,16 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +# shellcheck source=./restart_amavis.sh +source /usr/share/scripts/autosysadmin/auto/restart_amavis.sh + +restart_amavis + +sendmail success + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_disk b/autosysadmin-agent/files/upstream/repair/repair_disk new file mode 100755 index 00000000..70ed28a6 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_disk @@ -0,0 +1,157 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + + +# We always keep some reserved blocks to avoid missing some logs +# https://gitea.evolix.org/evolix/autosysadmin/issues/22 +RESERVED_BLOCKS_MIN=1 + +get_mountpoints() { + # the $(...) get the check_disk1 command + # the cut command selects the critical part of the check_disk1 output + # the grep command extracts the mountpoints and available disk space + # the last cut command selects the mountpoints + check_disk1_command=$(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-) + + ${check_disk1_command} -e | cut -d'|' -f1 | grep --extended-regexp --only-matching '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -d' ' -f1 +} + +is_reserved_blocks_nominal() { + partition=${1} + + fs_type="$(findmnt -n --output=fstype "${partition}")" + if [ "${fs_type}" = "ext4" ]; then + device="$(findmnt -n --output=source "${partition}")" + reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" + block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" + percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }") + + log_run "Reserved blocks for ${partition} is currently at ${percentage}%" + if [ "${percentage}" -gt "${RESERVED_BLOCKS_MIN}" ]; then + log_run "Allowing tune2fs action to reduce the number of reserved blocks" + return 0 + else + log_run "Reserved blocks already at or bellow ${RESERVED_BLOCKS_MIN}%, no automatic action possible" + return 1 + fi + else + log_run "Filesystem for ${partition} (${fs_type}) is incompatible with reserved block reduction." + return 1 + fi +} + +reduce_reserved_blocks() { + partition=${1} + + device=$(findmnt -n --output=source "${partition}") + tune2fs -m "${RESERVED_BLOCKS_MIN}" "${device}" + log_action "Reserved blocks for ${partition} changed to ${RESERVED_BLOCKS_MIN} percent" +} + +is_tmp_to_delete() { + size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" + if [ -n "${size}" ]; then + return 0 + else + return 1 + fi +} + +is_log_to_delete() { + size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" + if [ -n "${size}" ]; then + return 0 + else + return 1 + fi +} + +clean_apt_cache() { + for container in $(lxc-ls -1); do + if [ -e "$(lxc-config lxc.lxcpath)/${container}/rootfs/var/cache" ]; then + lxc-attach --name "${container}" -- apt-get clean + log_action "Clean apt cache in LXC container ${container}"; + fi + done + + # NOTE: "head -n 1" might be superfluous, but let's be sure to have only the first returned value + biggest_subdir=$(du --summarize --one-file-system "/var/*" | sort --numeric-sort --reverse | sed 's/^[0-9]\+[[:space:]]\+//;q' | head -n 1) + case "${biggest_subdir}" in + '/var/cache') + apt-get clean + log_action 'Clean apt cache' + ;; + esac +} + +clean_amavis_virusmails() { + if du --inodes /var/lib/* | sort --numeric-sort | tail -n 3 | grep --quiet 'virusmails$'; then + find /var/lib/amavis/virusmails/ -type f -atime +30 -delete + log_action 'Clean amavis infected mails' + fi +} + +critical_mountpoints=$(get_mountpoints) + +if [ -z "${critical_mountpoints}" ]; then + log_abort_and_quit "No partition is in critical state, nothing left to do." +else + for mountpoint in ${critical_mountpoints}; do + case "${mountpoint}" in + /var) + #if is_log_to_delete + #then + # find /var/log/ -type f -mtime +365 -delete + # log_action "$size Mo of disk space freed in /var" + #fi + if is_reserved_blocks_nominal /var; then + reduce_reserved_blocks /var + clean_apt_cache + clean_amavis_virusmails + fi + ;; + /tmp) + #if is_tmp_to_delete + #then + # find /tmp/ -type f -ctime +1 -delete + # log_action "$size Mo of disk space freed in /tmp" + #fi + if is_reserved_blocks_nominal /tmp; then + reduce_reserved_blocks /tmp + fi + ;; + /home) + if is_reserved_blocks_nominal /home; then + reduce_reserved_blocks /home + fi + ;; + /srv) + if is_reserved_blocks_nominal /srv; then + reduce_reserved_blocks /srv + fi + ;; + /filer) + if is_reserved_blocks_nominal /filer; then + reduce_reserved_blocks /filer + fi + ;; + /) + if is_reserved_blocks_nominal /; then + reduce_reserved_blocks / + # Suggest remove old kernel ? + fi + ;; + *) + # unknown + log_run 'Unknown partition (or weird case) or nothing to do' + ;; + esac + done +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_elasticsearch b/autosysadmin-agent/files/upstream/repair/repair_elasticsearch new file mode 100755 index 00000000..5baffaaa --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_elasticsearch @@ -0,0 +1,35 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +service="elasticsearch.service" +service_name="elasticsearch" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_abort_and_quit "${service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi +else + log_abort_and_quit "${service} is disabled (or missing), nothing left to do." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_http b/autosysadmin-agent/files/upstream/repair/repair_http new file mode 100755 index 00000000..1c6fa5c7 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_http @@ -0,0 +1,131 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +## Apache + +service="apache2.service" +service_name="apache2" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # check syntax + if apache2ctl -t > /dev/null 2>&1; then + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + + # Save error logs + date=$(LANG=en_US.UTF-8 date '+%b %d') + grep "${date}" /home/*/log/error.log /var/log/apache2/*error.log \ + | grep -v \ + -e "Got error 'PHP message:" \ + -e "No matching DirectoryIndex" \ + -e "client denied by server configuration" \ + -e "server certificate does NOT include an ID which matches the server name" \ + | save_in_log_dir "apache-errors.log" + else + log_action "Restart ${service_name}: skip (invalid configuration)" + fi + fi +else + log_all "${service} is disabled (or missing). Skip." +fi + +## Nginx + +service="nginx.service" +service_name="nginx" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # check syntax + if nginx -t > /dev/null 2>&1; then + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + + # Save error logs + ### Consider doing for Nginx the same as Apache + else + log_action "Restart ${service_name}: skip (invalid configuration)" + fi + fi +else + log_all "${service} is disabled (or missing). Skip." +fi + +## LXC + +if is_systemd_enabled 'lxc.service'; then + for container in $(lxc-ls -1 | grep --fixed-strings 'php' | grep --extended-regexp --invert-match --regexp '\bold\b' --regexp '\bdisabled\b'); do + repair_lxc_php "${container}" + done +else + log_all "LXC is disabled (or missing). Skip." +fi + +## FPM + +fpm_services=$(systemd_list_services 'php*-fpm*') +if [ -n "${fpm_services}" ]; then + for service in ${fpm_services}; do + service_name="${service//.service/}" + if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi + else + log_all "${service} is disabled (or missing). Skip." + fi + done +else + log_all "PHP FPM not found. Skip." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_mysql b/autosysadmin-agent/files/upstream/repair/repair_mysql new file mode 100755 index 00000000..eb176743 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_mysql @@ -0,0 +1,69 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +if is_debian_version "8" "<="; then + + if is_sysvinit_enabled '*mysql*'; then + if ! pgrep -u mysql mysqld > /dev/null; then + + # Save service status before restart + timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.before.status" + + timeout 20 /etc/init.d/mysql restart > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart mysql: OK" + else + log_action "Restart mysql: failed" + fi + + # Save service status after restart + timeout 2 mysqladmin status 2>&1 | save_in_log_dir "mysql.after.status" + else + log_abort_and_quit "mysqld process alive. Aborting" + fi + else + log_abort_and_quit "MySQL not enabled. Aborting" + fi + +else + + if is_debian_version "12" ">="; then + service="mariadb.service" + service_name="mariadb" + else + service="mysql.service" + service_name="mysql" + fi + + if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_abort_and_quit "${service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi + else + log_abort_and_quit "${service} is disabled (or missing), nothing left to do." + fi + +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_opendkim b/autosysadmin-agent/files/upstream/repair/repair_opendkim new file mode 100755 index 00000000..ab06d01d --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_opendkim @@ -0,0 +1,35 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +service="opendkim.service" +service_name="opendkim" + +if is_systemd_enabled "${service}"; then + if is_systemd_active "${service}"; then + log_abort_and_quit "${service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi +else + log_abort_and_quit "${service} is disabled (or missing). Abort." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm56 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm56 new file mode 100755 index 00000000..db2ed9d4 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm56 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php56 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm70 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm70 new file mode 100755 index 00000000..324acadb --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm70 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php70 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm73 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm73 new file mode 100755 index 00000000..9089aa6e --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm73 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php73 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm74 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm74 new file mode 100755 index 00000000..6d7f49bb --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm74 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php74 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm80 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm80 new file mode 100755 index 00000000..f61f45e6 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm80 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php80 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm81 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm81 new file mode 100755 index 00000000..ec9b20c0 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm81 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php81 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm82 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm82 new file mode 100755 index 00000000..8af2217e --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm82 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php82 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_php_fpm83 b/autosysadmin-agent/files/upstream/repair/repair_php_fpm83 new file mode 100755 index 00000000..7584e69c --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_php_fpm83 @@ -0,0 +1,14 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +LOCK_WAIT="15s" +LOCK_NAME="repair_http" + +pre_repair + +repair_lxc_php php83 + +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/repair_redis b/autosysadmin-agent/files/upstream/repair/repair_redis new file mode 100755 index 00000000..3873d16f --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/repair_redis @@ -0,0 +1,32 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +pre_repair + +for service in $(systemd_list_services 'redis-server*'); do + service_name="${service//.service/}" + + if is_systemd_active "${service}"; then + log_all "${service} is active. Skip." + else + # Save service status before restart + systemctl status "${service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK." + else + log_action "Restart ${service_name}: failed." + fi + + # Save service status after restart + systemctl status "${service}" | save_in_log_dir "${service_name}.after.status" + fi +done + +post_repair diff --git a/autosysadmin/files/scripts/repair_tomcat_instance.sh b/autosysadmin-agent/files/upstream/repair/repair_tomcat_instance old mode 100644 new mode 100755 similarity index 51% rename from autosysadmin/files/scripts/repair_tomcat_instance.sh rename to autosysadmin-agent/files/upstream/repair/repair_tomcat_instance index 9bf9949c..8cc76ae4 --- a/autosysadmin/files/scripts/repair_tomcat_instance.sh +++ b/autosysadmin-agent/files/upstream/repair/repair_tomcat_instance @@ -1,43 +1,24 @@ #!/bin/bash -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 -init_autosysadmin -load_conf - -test "${repair_tomcat_instance:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_tomcat_instance" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status +pre_repair repair_tomcat_instance_handle_tomcat() { if /bin/su - "${1}" -c "/bin/systemctl --quiet --user is-active tomcat.service" ; then if ! /bin/su - "${1}" -c "/usr/bin/timeout 20 /bin/systemctl --quiet --user restart tomcat.service" then - log_error_exit "Echec de redémarrage instance tomcat utilisateur ${1}" + log_abort_and_quit "Echec de redémarrage instance tomcat utilisateur ${1}" else log_action "Redémarrage instance tomcat utilisateur ${1}" fi elif /bin/systemctl --quiet is-active "${1}".service ; then if ! /usr/bin/timeout 20 systemctl --quiet restart "${1}".service then - log_error_exit "Echec de redémarrage instance tomcat ${1}" + log_abort_and_quit "Echec de redémarrage instance tomcat ${1}" else log_action "Redémarrage instance tomcat ${1}" fi @@ -50,4 +31,4 @@ do repair_tomcat_instance_handle_tomcat "${instance}" done -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail +post_repair diff --git a/autosysadmin-agent/files/upstream/repair/zzz-repair_example.template b/autosysadmin-agent/files/upstream/repair/zzz-repair_example.template new file mode 100755 index 00000000..668d4d02 --- /dev/null +++ b/autosysadmin-agent/files/upstream/repair/zzz-repair_example.template @@ -0,0 +1,41 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 + +## Custom lock wait and/or lock name +# LOCK_WAIT="15s" +# LOCK_NAME="repair_http" + +pre_repair + +## The name of the service, mainly for logging +service_name="example" +## The systemd service name +systemd_service="${service_name}.service" + +if is_systemd_enabled "${systemd_service}"; then + if is_systemd_active "${systemd_service}"; then + log_abort_and_quit "${systemd_service} is active, nothing left to do." + else + # Save service status before restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 systemctl restart "${systemd_service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status" + fi +else + log_abort_and_quit "${service_name} is disabled (or missing), nothing left to do." +fi + +post_repair diff --git a/autosysadmin-agent/files/upstream/restart/README b/autosysadmin-agent/files/upstream/restart/README new file mode 100644 index 00000000..137a7436 --- /dev/null +++ b/autosysadmin-agent/files/upstream/restart/README @@ -0,0 +1,19 @@ +Autosysadmin "restart auto" scripts +=================================== + +In this directory you can place scripts that will be executed automatically by a cron job (stored in `/etc/cron.d/autosysadmin`). + +They must satisfy the default `run-parts(8)` constaints : + +* be "executable" +* belong to the Debian cron script namespace (`^[a-zA-Z0-9_-]+$`), example: `restart_amavis` + +Warning: scripts that do not satisfy those criteria will NOT be run (silently)! + +You can print the names of the scripts which would be run, without actually running them, with this command : + +``` +$ run-parts --test /usr/share/scripts/autosysadmin/auto +``` + +You can use `zzz-restart_example.template` as boilerplate code to make your own "restart" script. diff --git a/autosysadmin-agent/files/upstream/restart/zzz-restart_example.template b/autosysadmin-agent/files/upstream/restart/zzz-restart_example.template new file mode 100644 index 00000000..1051d132 --- /dev/null +++ b/autosysadmin-agent/files/upstream/restart/zzz-restart_example.template @@ -0,0 +1,120 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1 + +# shellcheck disable=SC2034 +RUNNING="nwh-fr" + +## Possible values for RUNNING : +## never => disabled +## always => enabled +## nwh-fr => enabled during non-working-hours in France +## nwh-ca => enabled during non-working-hours in Canada (not supported yet) +## custom => enabled if `running_custom()` function returns 0, otherwise disabled. + +## Uncomment and customize this method if you want to have a special logic : +## +## return 1 if we should not run +## return 0 if we should run +## +## Some available functions : +## is_weekend() : Saturday or Sunday +## is_holiday() : holiday in France (based on `gcal(1)`) +## is_workday() : not weekend and not holiday +## is_worktime() : work day between 9-12h and 14-18h +# +# running_custom() { +# # implement your own custom method to decide if we should run or not +# } + +## The name of the service, mainly for logging +service_name="example" +## The SysVinit script name +sysvinit_script="${service_name}" +## The systemd service name +systemd_service="${service_name}.service" + +is_service_alive() { + ## this must return 0 if the service is alive, otherwise return 1 + ## Example: + pgrep -u USER PROCESS_NAME > /dev/null +} + +## Action for SysVinit system +sysvinit_action() { + # Save service status before restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status" +} + +## Action for systemd system +systemd_action() { + # Save service status before restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + # systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start + # so we check the status explicitly + timeout 20 systemctl restart "${systemd_service}" > /dev/null \ + && sleep 1 \ + && systemctl status "${systemd_service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status" +} + +# Should we run? +if ! is_supposed_to_run; then + # log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})." + exit 0 +fi +if is_service_alive; then + # log_global "${service_name} process alive. Aborting" + exit 0 +fi + +# Yes we do, so check for sysvinit or systemd +if is_debian_version "8" "<="; then + if ! is_sysvinit_enabled "*${sysvinit_script}*"; then + # log_global "${service_name} not enabled. Aborting" + exit 0 + fi + + # Let's finally do the action + pre_restart + sysvinit_action + post_restart +else + if ! is_systemd_enabled "${systemd_service}"; then + # log_global "${service_name} is disabled (or missing), nothing left to do." + exit 0 + fi + if is_systemd_active "${systemd_service}"; then + # log_global "${service_name} is active, nothing left to do." + exit 0 + fi + + # Let's finally do the action + pre_restart + systemd_action + post_restart +fi diff --git a/autosysadmin-agent/handlers/main.yml b/autosysadmin-agent/handlers/main.yml new file mode 100644 index 00000000..f192d587 --- /dev/null +++ b/autosysadmin-agent/handlers/main.yml @@ -0,0 +1,16 @@ +--- + +- name: restart nagios-nrpe-server + service: + name: nagios-nrpe-server + state: restarted + +- name: restart nrpe + service: + name: nrpe + state: restarted + +- name: restart rsyslog + service: + name: rsyslog + state: restarted diff --git a/autosysadmin-agent/tasks/crontab.yml b/autosysadmin-agent/tasks/crontab.yml new file mode 100644 index 00000000..1fa090ab --- /dev/null +++ b/autosysadmin-agent/tasks/crontab.yml @@ -0,0 +1,25 @@ +--- + +- name: "Add begin marker if missing" + ansible.builtin.lineinfile: + path: "/etc/cron.d/autosysadmin" + line: "# BEGIN ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN" + insertbefore: BOF + create: yes + +- name: "Add end marker if missing" + ansible.builtin.lineinfile: + path: "/etc/cron.d/autosysadmin" + line: "# END ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN" + insertbefore: "EOF" + create: yes + +- name: "Create config if missing" + ansible.builtin.blockinfile: + path: "/etc/cron.d/autosysadmin" + marker: "# {mark} ANSIBLE MANAGED SECTION FOR AUTOSYSADMIN" + block: "{{ lookup('ansible.builtin.template', '../templates/autosysadmin.cron.j2') }}" + owner: root + group: root + mode: "0750" + create: yes diff --git a/autosysadmin/tasks/dependencies.yml b/autosysadmin-agent/tasks/dependencies.yml similarity index 100% rename from autosysadmin/tasks/dependencies.yml rename to autosysadmin-agent/tasks/dependencies.yml diff --git a/autosysadmin-agent/tasks/install.yml b/autosysadmin-agent/tasks/install.yml new file mode 100644 index 00000000..d5e3b3eb --- /dev/null +++ b/autosysadmin-agent/tasks/install.yml @@ -0,0 +1,108 @@ +--- +- name: "Remount /usr if needed" + ansible.builtin.include_role: + name: remount-usr + +- name: Create autosysadmin directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + owner: "root" + group: "root" + mode: "0750" + loop: + - "{{ autosysadmin_agent_bin_dir }}" + - "{{ autosysadmin_agent_lib_dir }}" + - "{{ autosysadmin_agent_auto_dir }}" + +- name: Copy libraries + ansible.builtin.copy: + src: "upstream/lib/" + dest: "{{ autosysadmin_agent_lib_dir }}/" + owner: root + group: root + mode: "0750" + +- name: Copy repair scripts + ansible.builtin.copy: + src: "upstream/repair/" + dest: "{{ autosysadmin_agent_bin_dir }}/" + owner: root + group: root + mode: "0750" + +- name: Copy other utilities + ansible.builtin.copy: + src: "upstream/bin/" + dest: "{{ autosysadmin_agent_bin_dir }}/" + owner: root + group: root + mode: "0750" + +### WARNING: thos files are explicitly marked as non-executable +### to prevent them from being run automatically by run-parts + +- name: Copy restart scripts + ansible.builtin.copy: + src: "upstream/restart/" + dest: "{{ autosysadmin_agent_auto_dir }}/" + owner: root + group: root + mode: "0640" + +- name: Ensure /etc/evolinux folder exists + ansible.builtin.file: + path: "/etc/evolinux" + state: directory + owner: "root" + group: "root" + mode: "0700" + +- name: Copy the configuration file if missing + ansible.builtin.template: + src: "autosysadmin.cf.j2" + dest: "/etc/evolinux/autosysadmin" + owner: root + group: root + mode: "0640" + force: no + +# Repair scripts are supposed to be 'on' by default +# A line "repair_XXX=off" is added to the file only if the script is to be disabled. +# That's why all the ternary logic for the state is reversed. +- name: Update value per variable + ansible.builtin.lineinfile: + dest: "/etc/evolinux/autosysadmin" + line: "{{ item }}={{ autosysadmin_config[item] | default(true) | bool | ternary('on', 'off') }}" + regexp: '^(#\s*)?{{ item }}=.*' + state: "{{ autosysadmin_config[item] | default(true) | bool | ternary('absent', 'present') }}" + register: _line + loop: "{{ autosysadmin_repair_scripts | union(['repair_all']) }}" + +- name: Ensure restart folder exists + ansible.builtin.file: + path: "auto" + state: directory + owner: "root" + group: "root" + mode: "0700" + +- name: Legacy scripts are removed + ansible.builtin.file: + path: "{{ general_scripts_dir }}/autosysadmin/{{ item }}" + state: absent + loop: + - repair_amavis.sh + - repair_disk.sh + - repair_elasticsearch.sh + - repair_http.sh + - repair_mysql.sh + - repair_opendkim.sh + - repair_php_fpm56.sh + - repair_php_fpm70.sh + - repair_php_fpm73.sh + - repair_php_fpm74.sh + - repair_php_fpm80.sh + - repair_php_fpm81.sh + - repair_redis.sh + - repair_tomcat_instance.sh diff --git a/autosysadmin/tasks/logrotate.yml b/autosysadmin-agent/tasks/logrotate.yml similarity index 70% rename from autosysadmin/tasks/logrotate.yml rename to autosysadmin-agent/tasks/logrotate.yml index d4fe7a5c..bf1e55b4 100644 --- a/autosysadmin/tasks/logrotate.yml +++ b/autosysadmin-agent/tasks/logrotate.yml @@ -1,10 +1,8 @@ --- - name: Copy logrotate configuration for autosysadmin ansible.builtin.copy: - src: "files/logrotate_autosysadmin.conf" + src: "files/autosysadmin.logrotate.conf" dest: "/etc/logrotate.d/autosysadmin" owner: root group: root mode: "0644" - tags: - - autosysadmin diff --git a/autosysadmin-agent/tasks/main.yml b/autosysadmin-agent/tasks/main.yml new file mode 100644 index 00000000..9ac8a7b6 --- /dev/null +++ b/autosysadmin-agent/tasks/main.yml @@ -0,0 +1,31 @@ +--- + +- name: The list of all repair scripts is composed. + set_fact: + autosysadmin_repair_scripts: "{{ lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map('basename') | sort }}" + +- name: Install dependencies + ansible.builtin.include_tasks: dependencies.yml + +- name: Install autosysadmin + ansible.builtin.include_tasks: install.yml + +- name: Crontab configuration + ansible.builtin.include_tasks: crontab.yml + +- name: NRPE configuration + ansible.builtin.include_tasks: nrpe.yml + +- name: sudo configuration + ansible.builtin.include_tasks: sudo.yml + +- name: rsyslog configuration + ansible.builtin.include_tasks: rsyslog.yml + +- name: logrotate configuration + ansible.builtin.include_tasks: logrotate.yml + +- name: Install latest version of dump-server-state + ansible.builtin.include_role: + name: evolinux-base + tasks_from: dump-server-state.yml diff --git a/autosysadmin-agent/tasks/nrpe.yml b/autosysadmin-agent/tasks/nrpe.yml new file mode 100644 index 00000000..b5a31922 --- /dev/null +++ b/autosysadmin-agent/tasks/nrpe.yml @@ -0,0 +1,9 @@ +--- +- name: custom configuration is present + ansible.builtin.template: + src: autosysadmin.nrpe.cfg.j2 + dest: /etc/nagios/nrpe.d/autosysadmin.cfg + group: nagios + mode: "0640" + force: yes + notify: restart nagios-nrpe-server diff --git a/autosysadmin/tasks/rsyslog.yml b/autosysadmin-agent/tasks/rsyslog.yml similarity index 64% rename from autosysadmin/tasks/rsyslog.yml rename to autosysadmin-agent/tasks/rsyslog.yml index 6f0702c5..bb57f24a 100644 --- a/autosysadmin/tasks/rsyslog.yml +++ b/autosysadmin-agent/tasks/rsyslog.yml @@ -1,11 +1,9 @@ --- - name: Copy rsyslog configuration for autosysadmin ansible.builtin.copy: - src: "files/rsyslog_autosysadmin.conf" + src: "files/autosysadmin.rsyslog.conf" dest: "/etc/rsyslog.d/autosysadmin.conf" owner: root group: root mode: "0644" - notify: Restart rsyslog - tags: - - autosysadmin + notify: restart rsyslog diff --git a/autosysadmin/tasks/sudo.yml b/autosysadmin-agent/tasks/sudo.yml similarity index 76% rename from autosysadmin/tasks/sudo.yml rename to autosysadmin-agent/tasks/sudo.yml index 24249ab7..a4fd35be 100644 --- a/autosysadmin/tasks/sudo.yml +++ b/autosysadmin-agent/tasks/sudo.yml @@ -1,9 +1,7 @@ --- - name: Add autosysadmin sudoers file ansible.builtin.template: - src: sudoers.j2 + src: autosysadmin.sudoers.j2 dest: /etc/sudoers.d/autosysadmin mode: "0600" validate: "visudo -cf %s" - tags: - - autosysadmin diff --git a/autosysadmin-agent/templates/autosysadmin.cf.j2 b/autosysadmin-agent/templates/autosysadmin.cf.j2 new file mode 100644 index 00000000..763958ba --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.cf.j2 @@ -0,0 +1,12 @@ +# This configuration is partially managed by Ansible +# You can change specific values manually, but they may be overridden by Ansible +# +# To be safe, update the hosts_vars/group_vars in the autosysadmin project +# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master +# then use the "agent" playbook to deploy. +# +# Configuration for autosysadmin +# Use this file to change configuration values defined in repair scripts +# To disable all repair scripts : repair_all=off +# To disable "repair_http" : repair_http=off +# \ No newline at end of file diff --git a/autosysadmin-agent/templates/autosysadmin.cron.j2 b/autosysadmin-agent/templates/autosysadmin.cron.j2 new file mode 100644 index 00000000..d6d4612f --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.cron.j2 @@ -0,0 +1,7 @@ +PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin + +# Run each enabled script +*/5 * * * * root run-parts /usr/share/scripts/autosysadmin/auto + +# Clean run log files +@daily root {{ autosysadmin_agent_bin_dir | mandatory }}/delete_old_logs.sh {{ autosysadmin_agent_log_retention_days | default('365') }} diff --git a/autosysadmin-agent/templates/autosysadmin.nrpe.cfg.j2 b/autosysadmin-agent/templates/autosysadmin.nrpe.cfg.j2 new file mode 100644 index 00000000..c3e1a40c --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.nrpe.cfg.j2 @@ -0,0 +1,8 @@ +# +# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! +# + +# Autosysadmin repair commands +{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %} +command[{{ script }}]=sudo {{ autosysadmin_agent_bin_dir }}/{{ script }} +{% endfor %} \ No newline at end of file diff --git a/autosysadmin-agent/templates/autosysadmin.sudoers.j2 b/autosysadmin-agent/templates/autosysadmin.sudoers.j2 new file mode 100644 index 00000000..f182bb84 --- /dev/null +++ b/autosysadmin-agent/templates/autosysadmin.sudoers.j2 @@ -0,0 +1,7 @@ +# +# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! +# + +{% for script in lookup('ansible.builtin.fileglob', '../../../autosysadmin/agent/repair/repair_*', wantlist=True) | map("basename") | sort %} +nagios ALL = NOPASSWD: {{ autosysadmin_agent_bin_dir }}/{{ script }} +{% endfor %} \ No newline at end of file diff --git a/autosysadmin/defaults/main.yml b/autosysadmin/defaults/main.yml deleted file mode 100644 index 56190633..00000000 --- a/autosysadmin/defaults/main.yml +++ /dev/null @@ -1,22 +0,0 @@ ---- - -general_scripts_dir: "/usr/share/scripts" -autosysadmin_dir: "{{ general_scripts_dir }}/autosysadmin" - -# Default values for enabled checks -repair_amavis: 'on' -repair_disk: 'on' -repair_elasticsearch: 'on' -repair_http: 'on' -repair_mysql: 'on' -repair_opendkim: 'off' -repair_php_fpm56: 'off' -repair_php_fpm70: 'off' -repair_php_fpm73: 'off' -repair_php_fpm74: 'off' -repair_php_fpm80: 'off' -repair_php_fpm81: 'off' -repair_php_fpm82: 'off' -repair_php_fpm83: 'off' -repair_redis: 'off' -repair_tomcat_instance: 'off' diff --git a/autosysadmin/files/scripts/functions.sh b/autosysadmin/files/scripts/functions.sh deleted file mode 100644 index 95f1a901..00000000 --- a/autosysadmin/files/scripts/functions.sh +++ /dev/null @@ -1,478 +0,0 @@ -#!/bin/bash - -get_system() { - uname -s -} - -get_fqdn() { - if [ "$(get_system)" = "Linux" ]; then - hostname --fqdn - elif [ "$(get_system)" = "OpenBSD" ]; then - hostname - else - log_error_exit "OS not detected!" - fi -} - -get_complete_hostname() { - REAL_HOSTNAME="$(get_fqdn)" - if [ "${HOSTNAME}" = "${REAL_HOSTNAME}" ]; then - echo "${HOSTNAME}" - else - echo "${HOSTNAME} (${REAL_HOSTNAME})" - fi -} - -get_evomaintenance_mail() { - email="$(grep "EVOMAINTMAIL=" /etc/evomaintenance.cf | cut -d '=' -f2)" - - if [[ -z "$email" ]]; then - email='alert5@evolix.fr' - fi - - echo "${email}" -} - -arguments="${*}" - -get_argument() { - no_found=1 - for argument in ${arguments} ; do - if [ "${argument}" = "${1}" ] ; - then - no_found=0 - fi - done - return ${no_found} -} - -internal_info() { - INTERNAL_INFO="$(printf '%b\n%s' "${INTERNAL_INFO}" "$*")" -} - -log_action() { - log "Action : $*" - ACTIONS="$(printf '%s\n%s' "${ACTIONS}" "$*")" -} - -log() { - INTERNAL_LOG="$(printf '%s\n%s %s %s %s' "${INTERNAL_LOG}" "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*")" - printf '%s %s %s %s\n' "$(date -Isec)" "$(hostname)" "$(basename "$0")" "$*" | tee -a "${LOG_DIR}/autosysadmin.log" - echo "$*" | /usr/bin/logger -p local0.notice -t autosysadmin."$0" -} - -log_error_exit() { - log "ERROR : $*" - AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: $*" --no-commit --no-mail - exit 1 -} - -log_check_php_fpm() { - - # Extraire seulement les chiffres du nom du script exécuté - # ./repair_php_fpm81.sh ==> 81 - PHP_VERSION="${0//[^0-9]/}" - - PHP_PATH_POOL=$(find /var/lib/lxc/php"${PHP_VERSION}"/ -type d -name "pool.d") - /usr/local/lib/nagios/plugins/check_phpfpm_multi "${PHP_PATH_POOL}" > "${LOG_DIR}/nrpe.txt" -} - -log_system_status() { - DUMP_SERVER_STATE_BIN="$(command -v dump-server-state || command -v backup-server-state)" - - if [ -z "${DUMP_SERVER_STATE_BIN}" ]; then - log "Warning: dump-server-state is not present. No server state recorded...." - fi - - if [ -x "${DUMP_SERVER_STATE_BIN}" ]; then - - # NOTE We don't want the logging to take too much time, so we kill it - # if it take more than 20 seconds. - timeout --signal 9 20 \ - "${DUMP_SERVER_STATE_BIN}" \ - --dump-dir="$LOG_DIR" \ - --df \ - --dmesg \ - --iptables \ - --lxc \ - --netcfg \ - --netstat \ - --uname \ - --processes \ - --systemctl \ - --uptime \ - --virsh \ - --disks \ - --mysql-processes \ - --no-apt-states \ - --no-apt-config \ - --no-dpkg-full \ - --no-dpkg-status \ - --no-mount \ - --no-packages \ - --no-sysctl \ - --no-etc - - log "System status logged in ${LOG_DIR}" - fi -} - -read_log_system_status(){ - files="df.txt dmesg.txt lxc-list.txt netstat-legacy.txt netstat-ss.txt pstree.txt ps.txt systemctl-failed-services.txt" - echo -e "\n\n#### Détails de dump-server-state" - for file in ${files} ; do - echo -e "\n### cat ${LOG_DIR}/${file} :" - tail -n 1000 "${LOG_DIR}"/"${file}" - done -} - -ensure_no_active_users_or_exit() { - if is_debug; then return; fi - - # Is there any active user ? - for user in $(LC_ALL=C who --users|awk '{print $1}'); do - idle_time="$(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}')" - for sameusertime in $(LC_ALL=C who --users | grep "${user}" | awk '{ print $6}'); do - if is_active_user "$sameusertime"; then - hook_mail abort_active_users - log_error_exit 'At least one user was recently active. That requires human intervention. Nothing to do here!' - fi - done - done -} - -is_active_user() { - # Check if a user was active in the last 30 minutes - idle_time="$1" - - if [ "${idle_time}" = "old" ]; - then - return 1 - elif [ "${idle_time}" = "." ]; - then - return 0 - else - hh="$(echo "${idle_time}" | awk -F':' '{print $1}')" - mm="$(echo "${idle_time}" | awk -F':' '{print $2}')" - idle_minutes="$(( 60 * "${hh}" + "${mm}" ))" - if [ "${idle_minutes}" -ge 30 ]; - then - return 1 - else - return 0 - fi - fi -} - -is_debug() { - debug_file="/etc/evolinux/autosysadmin.debug" - - if [ -e "${debug_file}" ]; then - last_change=$(stat -c %Z "${debug_file}") - limit_date=$(date --date "14400 seconds ago" +"%s") - - if [ $(( last_change - limit_date )) -le "0" ]; then - rm "${debug_file}" - else - return 0 - fi - fi - - return 1 -} - -check_nrpe() { - check="$1" - list_command_nrpe=$( grep --exclude=*~ -E "\[${check}\]" -r /etc/nagios/ | grep -v '#command' ) - command_nrpe_primary=$( echo "${list_command_nrpe}" | grep "/etc/nagios/nrpe.d/evolix.cfg" | cut -d'=' -f2- ) - command_nrpe_secondary=$( echo "${list_command_nrpe}" | head -n1 | cut -d'=' -f2- ) - - if [ -z "${command_nrpe_primary}" ] && [ -z "${command_nrpe_secondary}" ] - then - return 1 - else - if [ -n "${command_nrpe_primary}" ] - then - ${command_nrpe_primary} - else - ${command_nrpe_secondary} - fi - fi -} - -acquire_lock_or_exit() { - lockfile="$1" - waittime="$2" - - # si le temps d’attente n’est pas compréhensible par sleep(1), il vaut 0 - if ! echo "${waittime}" | grep -Eq '^[0-9]+[smhd]?$' - then - waittime=0 - fi - - # si le temps d’attente est supérieur à 0 et si le lock existe, on attend - if test "${waittime}" -gt 0 && test -f "${lockfile}" - then - sleep "${waittime}" - fi - - # si le lock existe, on s’arrête - if test -f "${lockfile}" - then - log_error_exit "lock file ${lockfile} exists" - fi - touch "${lockfile}" -} - -is_too_soon() { - if is_debug; then return; fi - - witness="/tmp/autosysadmin_witness_$(basename "$0")" - if test -f "${witness}" - then - compare="$(($(date +%s)-$(stat -c "%Y" "${witness}")))" - if [ "${compare}" -lt 1800 ]; - then - log_error_exit 'already executed less than 30 minutes ago' - fi - rm "${witness}" - fi - touch "${witness}" -} - -init_autosysadmin() { - PATH="${PATH}":/usr/sbin:/sbin↩ - unset ACTIONS - - SCRIPTNAME=$(basename "$0") - PROGNAME=${SCRIPTNAME%.sh} - - RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${SCRIPTNAME}_$(openssl rand -hex 6)" - LOG_DIR="/var/log/autosysadmin/${RUN_ID}" - mkdir -p "${LOG_DIR}" - - log "Autosysadmin : Script ${SCRIPTNAME} triggered" - - # Detect operating system name, version and release↩ - detect_os -} - -load_conf() { - # Load conf and enable script by default. - # To disable script locally, set "$PROGNAME"=off in /etc/evolinux/autosysadmin. - # To disable script globally, set "$PROGNAME"=off in the script, after load_conf() call. - declare -g "$PROGNAME"=on # dynamic variable assignment ($PROGNAME == repair_*) - - # Source configuration file - # shellcheck source=../roles/deploy_autosysadmin/templates/autosysadmin.cfg.j2 - test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin -} - -detect_os() { - # OS detection - DEBIAN_RELEASE="" - LSB_RELEASE_BIN="$(command -v lsb_release)" - - if [ -e /etc/debian_version ]; then - DEBIAN_VERSION="$(cut -d "." -f 1 < /etc/debian_version)" - if [ -x "${LSB_RELEASE_BIN}" ]; then - DEBIAN_RELEASE="$("${LSB_RELEASE_BIN}" --codename --short)" - else - case "${DEBIAN_VERSION}" in - 8) DEBIAN_RELEASE="jessie";; - 9) DEBIAN_RELEASE="stretch";; - 10) DEBIAN_RELEASE="buster";; - 11) DEBIAN_RELEASE="bullseye";; - esac - fi - fi -} - -is_debian_jessie() { - test "${DEBIAN_RELEASE}" = "jessie" -} -is_debian_stretch() { - test "${DEBIAN_RELEASE}" = "stretch" -} -is_debian_buster() { - test "${DEBIAN_RELEASE}" = "buster" -} -is_debian_bullseye() { - test "${DEBIAN_RELEASE}" = "bullseye" -} - -systemd_list_service_failed() { - systemctl list-units --failed --no-legend --full --type=service "$1" | - awk '{print $1}' -} - -systemd_list_units_enabled() { - list_units_enabled=$(systemctl list-unit-files --state=enabled --no-legend | awk "/$1/{print \$1}") - if [ -z "${list_units_enabled}" ] - then - return 1 - else - echo "${list_units_enabled}" - fi -} - -format_mail_success() { - cat < -Content-Type: text/plain; charset=UTF-8 -MIME-Version: 1.0 -Content-Transfer-Encoding: 8bit -X-Script: $(basename "$0") -X-RunId: ${RUN_ID} -To: ${EMAIL_CLIENT:-alert5@evolix.fr} -Cc: autosysadmin@evolix.fr -Subject: [autosysadmin] Intervention sur ${HOSTNAME_TEXT} - -Bonjour, - -Une intervention automatique vient de se terminer. - -Nom du serveur : ${HOSTNAME_TEXT} -Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) - -### Renseignements sur l'intervention - -${ACTIONS} - -### Réagir à cette intervention - -Vous pouvez répondre à ce message (sur l'adresse mail equipe@evolix.net). -En cas d'urgence, utilisez l'adresse maintenance@evolix.fr ou -notre téléphone portable d'astreinte (04.26.99.99.26) - --- -Votre AutoSysadmin -EOTEMPLATE -} - -format_mail_abort_active_users() { - cat < -Content-Type: text/plain; charset=UTF-8 -MIME-Version: 1.0 -Content-Transfer-Encoding: 8bit -X-Script: $(basename "$0") -X-RunId: ${RUN_ID} -To: ${EMAIL_CLIENT:-alert5@evolix.fr} -Cc: autosysadmin@evolix.fr -Subject: [autosysadmin] Intervention interrompue sur ${HOSTNAME_TEXT} - -Bonjour, - -Une intervention automatique a été interrompue en raison -d'un utilisateur actuellement actif sur le serveur. - -Nom du serveur : ${HOSTNAME_TEXT} -Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) - -### Utilisateur(s) connecté(s) -$(w) - --- -Votre AutoSysadmin -EOTEMPLATE -} - -format_mail_internal_info() { - cat < -Content-Type: text/plain; charset=UTF-8 -MIME-Version: 1.0 -Content-Transfer-Encoding: 8bit -X-Script: $(basename "$0") -X-RunId: ${RUN_ID} -To: autosysadmin@evolix.fr -Subject: [autosysadmin] Complements (interne) - Intervention sur ${HOSTNAME_TEXT} - -Bonjour, - -Une intervention automatique vient de se terminer. - -Nom du serveur : ${HOSTNAME_TEXT} -Heure d'intervention : $(LC_ALL=fr_FR.utf8 date) -Script déclenché : $(basename "$0") - -### Actions effectuées - -${ACTIONS} - -### Logs autosysadmin - -${INTERNAL_LOG} - -### Utilisateur(s) connecté(s) - -$(w) - -### Informations additionnelles données par le script $(basename "$0") - -${INTERNAL_INFO} - --- -Votre AutoSysadmin -EOTEMPLATE -} - -hook_mail() { - if is_debug; then return; fi - - HOSTNAME="${HOSTNAME:-"$(get_fqdn)"}" - HOSTNAME_TEXT="$(get_complete_hostname)" - EMAIL_CLIENT="$(get_evomaintenance_mail)" - - MAIL_CONTENT="$(format_mail_"$1")" - - SENDMAIL_BIN="$(command -v sendmail)" - - if [ -z "${SENDMAIL_BIN}" ]; then - log "No \`sendmail' command has been found, can't send mail." - fi - - if [ -x "${SENDMAIL_BIN}" ]; then - echo "${MAIL_CONTENT}" | "${SENDMAIL_BIN}" -oi -t -f "equipe@evolix.net" - fi -} - - - -# We need stable output for gcal, so we force some language environment variables -export TZ=Europe/Paris -export LANGUAGE=fr_FR.UTF-8 - -is_holiday() { - # gcal mark today as a holiday by surrounding with < and > the day - # of the month of that holiday line. For exemple if today is 2022-05-01 we'll - # get among other lines: - # Fête du Travail (FR) + Di, < 1>Mai 2022 - # Jour de la Victoire (FR) + Di, : 8:Mai 2022 = +7 jours - gcal --cc-holidays=fr --holiday-list=short | grep -E '<[0-9 ]{2}>' --quiet -} - -is_weekend() { - day_of_week=$(date +%u) - if [ "$day_of_week" != 6 ] && [ "$day_of_week" != 7 ]; then - return 1 - fi -} - -is_workday() { - if is_holiday || is_weekend; then - return 1 - fi -} - -is_worktime() { - if ! is_workday; then - return 1 - fi - - hour=$(date +%H) - if [ "${hour}" -lt 9 ] || { [ "${hour}" -ge 12 ] && [ "${hour}" -lt 14 ] ; } || [ "${hour}" -ge 18 ]; then - return 1 - fi -} diff --git a/autosysadmin/files/scripts/repair_amavis.sh b/autosysadmin/files/scripts/repair_amavis.sh deleted file mode 100644 index 5139b927..00000000 --- a/autosysadmin/files/scripts/repair_amavis.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh -# shellcheck source=./restart_amavis.sh -source /usr/share/scripts/autosysadmin/restart_amavis.sh - -init_autosysadmin -load_conf - -test "${repair_amavis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Verify if check_nrpe are not OK -check_nrpe "check_amavis" && log_error_exit 'check_amavis is OK, nothing to do here!' - -# Has it recently been run? -get_argument "--no-delay" || is_too_soon - -lockfile="/run/lock/repair_amavis" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! -restart_amavis - -hook_mail success -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_disk.sh b/autosysadmin/files/scripts/repair_disk.sh deleted file mode 100644 index fc35438c..00000000 --- a/autosysadmin/files/scripts/repair_disk.sh +++ /dev/null @@ -1,173 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_disk:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_disk" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -get_mountpoints() { - # the $(...) get the check_disk1 command - # the cut command selects the critical part of the check_disk1 output - # the grep command extracts the mountpoints and available disk space - # the last cut command selects the mountpoints - $(grep check_disk1 /etc/nagios/nrpe.d/evolix.cfg | cut -d'=' -f2-) -e | cut -d'|' -f1 | grep -Eo '/[[:graph:]]* [0-9]+ [A-Z][A-Z]' | cut -f1 -d' ' -} - -is_reserved-blocks() { - fs_type="$(findmnt -n --output=fstype "$1")" - if [ "${fs_type}" = "ext4" ]; - then - device="$(findmnt -n --output=source "$1")" - reserved_block_count="$(tune2fs -l "${device}" | grep 'Reserved block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" - block_count="$(tune2fs -l "${device}" | grep 'Block count' | awk -F':' '{ gsub (" ", "", $0); print $2}')" - percentage=$(awk "BEGIN { pc=100*${reserved_block_count}/${block_count}; i=int(pc); print (pc-i<0.5)?i:i+1 }") - - log "Reserved blocks for $1 is curently at $percentage%" - if [ "${percentage}" -gt "1" ] - then - log "Allowing tune2fs action to reduce the number of reserved blocks" - return 0 - else - log "Reserved blocks already at or bellow 1%, no automatic action possible" - return 1 - fi - else - log "Filesystem for $1 partition is not ext4" - - return 1 - fi -} - -change_reserved-blocks() { - # We alwasy keep some reserved blocks to avoid missing some logs - # https://gitea.evolix.org/evolix/autosysadmin/issues/22 - tune2fs -m 1 "$(findmnt -n --output=source "$1")" - log_action "Reserved blocks for $1 changed to 1 percent" -} - -is_tmp_to_delete() { - size="$(find /var/log/ -type f -ctime +1 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" - if [ -n "${size}" ] - then - return 0 - else - return 1 - fi -} - -is_log_to_delete() { - size="$(find /var/log/ -type f -mtime +365 -exec du {} \+ | awk '{s+=$1}END{print s / 1024}')" - if [ -n "${size}" ] - then - return 0 - else - return 1 - fi -} - -clean_apt_cache() { - for lxc in $(du -ax /var | sort -nr | head -n10 | grep -E '/var/lib/lxc/php[0-9]+/rootfs/var/cache$' | grep -Eo 'php[0-9]+') - do - lxc-attach --name "${lxc}" -- apt-get clean - log_action '[lxc/'"${lxc}"'] Clean apt cache' - done - case "$(du -sx /var/* | sort -rn | sed 's/^[0-9]\+[[:space:]]\+//;q')" in - '/var/cache') - apt-get clean - log_action 'Clean apt cache' - ;; - esac -} - -clean_amavis_virusmails() { - if du --inodes /var/lib/* | sort -n | tail -n3 | grep -q 'virusmails$' - then - find /var/lib/amavis/virusmails/ -type f -atime +30 -delete - log_action 'Clean /var/lib/amavis/virusmails' - fi -} - -for mountpoint in $(get_mountpoints) -do - case "${mountpoint}" in - /var) - #if is_log_to_delete - #then - # find /var/log/ -type f -mtime +365 -delete - # log_action "$size Mo of disk space freed in /var" - #fi - if is_reserved-blocks /var - then - change_reserved-blocks /var - clean_apt_cache - clean_amavis_virusmails - hook_mail success - fi - ;; - /tmp) - #if is_tmp_to_delete - #then - # find /tmp/ -type f -ctime +1 -delete - # log_action "$size Mo of disk space freed in /tmp" - #fi - if is_reserved-blocks /tmp - then - change_reserved-blocks /tmp - hook_mail success - fi - ;; - /home) - if is_reserved-blocks /home - then - change_reserved-blocks /home - hook_mail success - fi - ;; - /srv) - if is_reserved-blocks /srv - then - change_reserved-blocks /srv - hook_mail success - fi - ;; - /filer) - if is_reserved-blocks /filer - then - change_reserved-blocks /filer - hook_mail success - fi - ;; - /) - if is_reserved-blocks / - then - change_reserved-blocks / - hook_mail success - # Suggest remove old kernel ? - fi - ;; - *) - # unknown - log 'Unknown partition (or weird case) or nothing to do' - ;; - esac -done - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_elasticsearch.sh b/autosysadmin/files/scripts/repair_elasticsearch.sh deleted file mode 100644 index 3b45c6e0..00000000 --- a/autosysadmin/files/scripts/repair_elasticsearch.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_elasticsearch:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_elasticsearch" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -elasticsearch_is_enabled() { - systemd_list_units_enabled "elasticsearch.service" - -} - -elasticsearch_restart() { - if ! timeout 60 systemctl restart elasticsearch.service > /dev/null - then - log_error_exit 'failed to restart elasticsearch' - fi -} - -# Test functions -test_elasticsearch_process_present() { - pgrep -u elasticsearch > /dev/null -} - -if elasticsearch_is_enabled -then - if ! test_elasticsearch_process_present - then - log_action "Redémarrage de elasticsearch" - elasticsearch_restart - hook_mail success - else - log_error_exit "Elasticsearch process alive. Aborting" - fi -else - log_error_exit "Elasticsearch is not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_http.sh b/autosysadmin/files/scripts/repair_http.sh deleted file mode 100644 index b1642858..00000000 --- a/autosysadmin/files/scripts/repair_http.sh +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_http:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status - -http_detect_service() { - # check whether nginx, apache or both are supposed to be running - if is_debian_jessie; then - find /etc/rc2.d/ - else - systemctl list-unit-files --state=enabled - fi | awk '/nginx/ { nginx = 1 } /apache2/ { apache2 = 1 } END { if (nginx && apache2) { print "both" } else if (nginx) { print "nginx" } else if (apache2) { print "apache2" } }' - # The previous awk command looks for two patterns: "nginx" - # and "apache2". If a line matches the patterns, a variable - # "nginx" or "apache2" is set to 1 (true). The "END" checks - # if one or both patterns has been found. -} - -http_handle_apache() { - # check syntax - if ! apache2ctl -t > /dev/null 2> /dev/null - then - log_error_exit 'apache2 configuration syntax is not valid' - fi - - # try restart - if ! timeout 20 systemctl restart apache2.service > /dev/null 2> /dev/null - then - log_error_exit 'failed to restart apache2' - fi - - log_action "Redémarrage de Apache" - - internal_info "#### grep $(LANG=en_US.UTF-8 date '+%b %d') /home/*/log/error.log /var/log/apache2/*error.log (avec filtrage)" - ERROR_LOG=$(grep "$(LANG=en_US.UTF-8 date '+%b %d')" /home/*/log/error.log /var/log/apache2/*error.log | grep -v -e "Got error 'PHP message:" -e "No matching DirectoryIndex" -e "client denied by server configuration" -e "server certificate does NOT include an ID which matches the server name" ) - internal_info "$ERROR_LOG" - -} - -http_handle_nginx() { - # check syntax - if ! nginx -t > /dev/null 2> /dev/null - then - log_error_exit 'nginx configuration syntax is not valid' - fi - - # try restart - if ! timeout 20 systemctl restart nginx.service > /dev/null 2> /dev/null - then - log_error_exit 'failed to restart nginx' - fi - - log_action "Redémarrage de Nginx" -} - -http_handle_lxc_php() { - # check whether containers are used for PHP and reboot them if so - if systemd_list_units_enabled 'lxc' - then - for php in $(lxc-ls | grep 'php'); do - lxc-stop -n "$php" - lxc-start --daemon -n "$php" - log_action "lxc-fpm - Redémarrage container ${php}" - done - - fi -} - -http_handle_fpm_php() { - # check whether php-fpm is installed and restart it if so - if enabled_units="$(systemd_list_units_enabled "php.*-fpm")" - then - systemctl restart "${enabled_units}" - log_action 'php-fpm - Redémarrage de php-fpm' - fi -} - -case "$(http_detect_service)" in -nginx) - - http_handle_nginx - - http_handle_lxc_php - http_handle_fpm_php - - hook_mail success - hook_mail internal_info - ;; - -apache2) - - http_handle_apache - - http_handle_lxc_php - http_handle_fpm_php - - hook_mail success - hook_mail internal_info - ;; - -both) - - http_handle_nginx - http_handle_apache - - http_handle_lxc_php - http_handle_fpm_php - - hook_mail success - hook_mail internal_info - ;; - -*) - # unknown - log 'nothing to do' - ;; -esac - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_mysql.sh b/autosysadmin/files/scripts/repair_mysql.sh deleted file mode 100644 index f80d5af7..00000000 --- a/autosysadmin/files/scripts/repair_mysql.sh +++ /dev/null @@ -1,71 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_mysql:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_mysql" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status - -mysql_is_enabled() { - if is_debian_jessie - then - find /etc/rc2.d/ -name '*mysql*' > /dev/null - else - systemd_list_units_enabled "mysql.service" - fi -} - -mysql_restart() { - if is_debian_jessie - then - if ! timeout 60 /etc/init.d/mysql restart > /dev/null - then - log_error_exit 'failed to restart mysql' - fi - else - if ! timeout 60 systemctl restart mysql.service > /dev/null - then - log_error_exit 'failed to restart mysql' - fi - fi -} - -# Test functions -test_mysql_process_present() { - pgrep -u mysql mysqld > /dev/null -} - -if mysql_is_enabled -then - if ! test_mysql_process_present - then - log_action "Redémarrage de MySQL" - mysql_restart - hook_mail success - else - log_error_exit "mysqld process alive. Aborting" - fi -else - log_error_exit "MySQL/MariaDB not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_opendkim.sh b/autosysadmin/files/scripts/repair_opendkim.sh deleted file mode 100644 index f7735028..00000000 --- a/autosysadmin/files/scripts/repair_opendkim.sh +++ /dev/null @@ -1,61 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_opendkim:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_opendkim" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -log_system_status - -# Functions dedicated to this repair script - -opendkim_is_enabled() { - systemd_list_units_enabled "opendkim.service" - -} - -opendkim_restart() { - if ! timeout 60 systemctl restart opendkim.service > /dev/null - then - log_error_exit 'failed to restart opendkim' - fi -} - -opendkim_test_process_present() { - pgrep -u opendkim > /dev/null -} - - -# Main logic - -if opendkim_is_enabled -then - if ! opendkim_test_process_present - then - log_action "Redémarrage de opendkim" - opendkim_restart - hook_mail success - else - log_error_exit "opendkim process alive. Aborting" - fi -else - log_error_exit "opendkim is not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm56.sh b/autosysadmin/files/scripts/repair_php_fpm56.sh deleted file mode 100644 index 6c67e0b6..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm56.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm56:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php56 - then - lxc-stop -n php56 - lxc-start --daemon -n php56 - log_action "lxc-fpm - Redémarrage container php56" - - internal_info "#### tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php56/rootfs/var/log/php5-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm70.sh b/autosysadmin/files/scripts/repair_php_fpm70.sh deleted file mode 100644 index 5bf8cab2..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm70.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm70:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php70 - then - lxc-stop -n php70 - lxc-start --daemon -n php70 - log_action "lxc-fpm - Redémarrage container php70" - - internal_info "#### tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php70/rootfs/var/log/php7.0-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm73.sh b/autosysadmin/files/scripts/repair_php_fpm73.sh deleted file mode 100644 index 6b2094fd..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm73.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm73:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php73 - then - lxc-stop -n php73 - lxc-start --daemon -n php73 - log_action "lxc-fpm - Redémarrage container php73" - - internal_info "#### tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php73/rootfs/var/log/php7.3-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm74.sh b/autosysadmin/files/scripts/repair_php_fpm74.sh deleted file mode 100644 index dab16a8a..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm74.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm74:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php74 - then - lxc-stop -n php74 - lxc-start --daemon -n php74 - log_action "lxc-fpm - Redémarrage container php74" - - internal_info "#### tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php74/rootfs/var/log/php7.4-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm80.sh b/autosysadmin/files/scripts/repair_php_fpm80.sh deleted file mode 100644 index 35b9e36c..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm80.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm80:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php80 - then - lxc-stop -n php80 - lxc-start --daemon -n php80 - log_action "lxc-fpm - Redémarrage container php80" - - internal_info "#### tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php80/rootfs/var/log/php8.0-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm81.sh b/autosysadmin/files/scripts/repair_php_fpm81.sh deleted file mode 100644 index e567f6aa..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm81.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm81:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php81 - then - lxc-stop -n php81 - lxc-start --daemon -n php81 - log_action "lxc-fpm - Redémarrage container php81" - - internal_info "#### tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php81/rootfs/var/log/php8.1-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm82.sh b/autosysadmin/files/scripts/repair_php_fpm82.sh deleted file mode 100644 index 295abbcd..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm82.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm82:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php82 - then - lxc-stop -n php82 - lxc-start --daemon -n php82 - log_action "lxc-fpm - Redémarrage container php82" - - internal_info "#### tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php82/rootfs/var/log/php8.2-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_php_fpm83.sh b/autosysadmin/files/scripts/repair_php_fpm83.sh deleted file mode 100644 index 5344c2e4..00000000 --- a/autosysadmin/files/scripts/repair_php_fpm83.sh +++ /dev/null @@ -1,53 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_php_fpm83:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_http" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" 15s - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -log_system_status -log_check_php_fpm - -if systemd_list_units_enabled 'lxc' -then - - if lxc-ls | grep -q php83 - then - lxc-stop -n php83 - lxc-start --daemon -n php83 - log_action "lxc-fpm - Redémarrage container php83" - - internal_info "#### tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log" - FPM_LOG=$(tail /var/lib/lxc/php83/rootfs/var/log/php8.3-fpm.log) - internal_info "$FPM_LOG" "$(read_log_system_status)" - - hook_mail success - hook_mail internal_info - - else - log 'Not possible :v' - fi - -else - log 'Error, not a multi-php install' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_redis.sh b/autosysadmin/files/scripts/repair_redis.sh deleted file mode 100644 index be5cfd77..00000000 --- a/autosysadmin/files/scripts/repair_redis.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -test "${repair_redis:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_redis" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -# The actual work starts below ! - -handle_redis() { - for service in $(systemd_list_service_failed redis*) - do - # ne rien faire si le service est désactivé - if ! systemctl is-enabled --quiet "${service}" - then - continue - fi - - # ne rien faire si le service est actif - if systemctl is-active --quiet "${service}" - then - continue - fi - - if ! timeout 20 systemctl restart redis.service > /dev/null 2> /dev/null - then - log_error_exit "failed to restart redis ${service}" - fi - - log_action "Redémarrer service ${service}" - done -} - -if ( systemd_list_units_enabled 'redis.*\.service$' ) > /dev/null -then - handle_redis - hook_mail success -else - log 'Error: redis service is not enabled' -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/repair_template.sh b/autosysadmin/files/scripts/repair_template.sh deleted file mode 100644 index 33db0aac..00000000 --- a/autosysadmin/files/scripts/repair_template.sh +++ /dev/null @@ -1,63 +0,0 @@ -#!/bin/bash - -# Source functions file -# shellcheck source=./functions.sh -source /usr/share/scripts/autosysadmin/functions.sh - -init_autosysadmin -load_conf - -# Comment this line to enable -repair_template=off -test "${repair_template:=off}" = off && log_error_exit 'Script disabled, nothing to do here!' - -# Has it recently been run? -is_too_soon - -lockfile="/run/lock/repair_template" -cleanup() { - rm -f "${lockfile}" -} -trap 'cleanup' 0 -acquire_lock_or_exit "${lockfile}" - -ensure_no_active_users_or_exit - -log_system_status - -# Functions dedicated to this repair script - -template_is_enabled() { - systemd_list_units_enabled "template.service" - -} - -template_restart() { - if ! timeout 60 systemctl restart template.service > /dev/null - then - log_error_exit 'failed to restart template' - fi -} - -template_test_process_present() { - pgrep -u template > /dev/null -} - - -# Main logic - -if template_is_enabled -then - if ! template_test_process_present - then - log_action "Redémarrage de template" - template_restart - hook_mail success - else - log_error_exit "template process alive. Aborting" - fi -else - log_error_exit "template is not enabled. Aborting" -fi - -AUTOSYSADMIN=1 /usr/share/scripts/evomaintenance.sh -m "$0: done" --no-commit --no-mail diff --git a/autosysadmin/files/scripts/restart_amavis.sh b/autosysadmin/files/scripts/restart_amavis.sh deleted file mode 100644 index ef8c255d..00000000 --- a/autosysadmin/files/scripts/restart_amavis.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -restart_amavis() { - /etc/init.d/amavis stop 2>/dev/null - /etc/init.d/clamav-freshclam stop 2>/dev/null - /etc/init.d/clamav-daemon stop 2>/dev/null - - if systemctl is-enabled --quiet 'clamav-freshclam.service' - then - freshclam - log_action "Mise à jour des définitions antivirus" - fi - - if systemctl is-enabled --quiet 'clamav-daemon.service' - then - /etc/init.d/clamav-daemon start - log_action "Redémarrage de clamav-daemon" - else - log 'Error, clamav not installed' - fi - - if systemctl is-enabled --quiet 'clamav-freshclam.service' - then - /etc/init.d/clamav-freshclam start - log_action "Redémarrage de clamav-freshclam" - fi - - if systemctl is-enabled --quiet 'amavis.service' - then - /etc/init.d/amavis start - log_action "Redémarrage de amavis" - else - log 'Error, amavis not installed' - fi -} diff --git a/autosysadmin/handlers/main.yml b/autosysadmin/handlers/main.yml deleted file mode 100644 index 2c0b1a7f..00000000 --- a/autosysadmin/handlers/main.yml +++ /dev/null @@ -1,16 +0,0 @@ ---- - -- name: Restart nagios-nrpe-server - ansible.builtin.service: - name: nagios-nrpe-server - state: restarted - -- name: Restart nrpe - ansible.builtin.service: - name: nrpe - state: restarted - -- name: Restart rsyslog - ansible.builtin.service: - name: rsyslog - state: restarted diff --git a/autosysadmin/tasks/autosysadmin_scripts.yml b/autosysadmin/tasks/autosysadmin_scripts.yml deleted file mode 100644 index 4ff1c5be..00000000 --- a/autosysadmin/tasks/autosysadmin_scripts.yml +++ /dev/null @@ -1,61 +0,0 @@ ---- -- name: "Remount /usr if needed" - ansible.builtin.import_role: - name: remount-usr - -- name: Create autosysadmin directory - ansible.builtin.file: - path: "{{ autosysadmin_dir }}" - state: directory - owner: "root" - group: "root" - mode: "0750" - tags: - - autosysadmin - -- name: Copy scripts - ansible.builtin.copy: - src: "files/scripts/{{ item }}" - dest: "{{ autosysadmin_dir }}/{{ item }}" - owner: root - group: root - mode: "0750" - loop: - - "functions.sh" - - "restart_amavis.sh" - - "repair_amavis.sh" - - "repair_disk.sh" - - "repair_elasticsearch.sh" - - "repair_http.sh" - - "repair_mysql.sh" - - "repair_php_fpm56.sh" - - "repair_php_fpm70.sh" - - "repair_php_fpm73.sh" - - "repair_php_fpm74.sh" - - "repair_php_fpm80.sh" - - "repair_php_fpm81.sh" - - "repair_php_fpm82.sh" - - "repair_php_fpm83.sh" - - "repair_tomcat_instance.sh" - tags: - - autosysadmin - -- name: Ensure /etc/evolinux folder exists - ansible.builtin.file: - path: "/etc/evolinux" - state: directory - owner: "root" - group: "root" - mode: "0700" - tags: - - autosysadmin - -- name: Copy the configuration file - ansible.builtin.template: - src: "autosysadmin.cf.j2" - dest: "/etc/evolinux/autosysadmin" - owner: root - group: root - mode: "0640" - tags: - - autosysadmin diff --git a/autosysadmin/tasks/main.yml b/autosysadmin/tasks/main.yml deleted file mode 100644 index 60204162..00000000 --- a/autosysadmin/tasks/main.yml +++ /dev/null @@ -1,37 +0,0 @@ ---- -- name: Install dependencies - ansible.builtin.import_tasks: dependencies.yml - tags: - - autosysadmin - -- name: Install autosysadmin scripts - ansible.builtin.import_tasks: autosysadmin_scripts.yml - tags: - - autosysadmin - -- name: Amend NRPE configuration - ansible.builtin.import_tasks: nrpe.yml - tags: - - autosysadmin - -- name: Amend sudo configuration - ansible.builtin.import_tasks: sudo.yml - tags: - - autosysadmin - -- name: Amend rsyslog configuration - ansible.builtin.import_tasks: rsyslog.yml - tags: - - autosysadmin - -- name: Amend logrotate configuration - ansible.builtin.import_tasks: logrotate.yml - tags: - - autosysadmin - -- name: Install last version of dump-server-state - ansible.builtin.import_role: - name: evolinux-base - tasks_from: dump-server-state.yml - tags: - - autosysadmin diff --git a/autosysadmin/tasks/nrpe.yml b/autosysadmin/tasks/nrpe.yml deleted file mode 100644 index f6f5c78b..00000000 --- a/autosysadmin/tasks/nrpe.yml +++ /dev/null @@ -1,11 +0,0 @@ ---- -- name: Custom configuration is present - ansible.builtin.template: - src: autosysadmin.cfg.j2 - dest: /etc/nagios/nrpe.d/autosysadmin.cfg - group: nagios - mode: "0640" - force: true - notify: Restart nagios-nrpe-server - tags: - - autosysadmin diff --git a/autosysadmin/templates/autosysadmin.cf.j2 b/autosysadmin/templates/autosysadmin.cf.j2 deleted file mode 100644 index 0be506c2..00000000 --- a/autosysadmin/templates/autosysadmin.cf.j2 +++ /dev/null @@ -1,74 +0,0 @@ -# -# Ansible managed - DO NOT MODIFY, your changes will be **overwritten** ! -# -# Update the hosts_vars/group_vars on the autosysadmin project -# https://gitea.evolix.org/evolix/autosysadmin/src/branch/master -# - -# Configuration for autosysadmin -# Use this file to change configuration values defined in repair scripts -# Ex : repair_http=off - -{% if repair_amavis == "off" %} -repair_amavis=off -{% endif %} - -{% if repair_disk == "off" %} -repair_disk=off -{% endif %} - -{% if repair_elasticsearch == "off" %} -repair_elasticsearch=off -{% endif %} - -{% if repair_http == "off" %} -repair_http=off -{% endif %} - -{% if repair_mysql == "off" %} -repair_mysql=off -{% endif %} - -{% if repair_opendkim == "off" %} -repair_opendkim=off -{% endif %} - -{% if repair_php_fpm56 == "off" %} -repair_php_fpm56=off -{% endif %} - -{% if repair_php_fpm70 == "off" %} -repair_php_fpm70=off -{% endif %} - -{% if repair_php_fpm73 == "off" %} -repair_php_fpm73=off -{% endif %} - -{% if repair_php_fpm74 == "off" %} -repair_php_fpm74=off -{% endif %} - -{% if repair_php_fpm80 == "off" %} -repair_php_fpm80=off -{% endif %} - -{% if repair_php_fpm81 == "off" %} -repair_php_fpm81=off -{% endif %} - -{% if repair_php_fpm82 == "off" %} -repair_php_fpm82=off -{% endif %} - -{% if repair_php_fpm83 == "off" %} -repair_php_fpm83=off -{% endif %} - -{% if repair_redis == "off" %} -repair_redis=off -{% endif %} - -{% if repair_tomcat_instance == "off" %} -repair_tomcat_instance=off -{% endif %} diff --git a/autosysadmin/templates/autosysadmin.cfg.j2 b/autosysadmin/templates/autosysadmin.cfg.j2 deleted file mode 100644 index fa6fcfd2..00000000 --- a/autosysadmin/templates/autosysadmin.cfg.j2 +++ /dev/null @@ -1,22 +0,0 @@ -# -# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! -# - -# Autosysadmin repair commands -command[repair_amavis]=sudo {{ autosysadmin_dir }}/repair_amavis.sh -command[repair_disk]=sudo {{ autosysadmin_dir }}/repair_disk.sh -command[repair_elasticsearch]=sudo {{ autosysadmin_dir }}/repair_elasticsearch.sh -command[repair_http]=sudo {{ autosysadmin_dir }}/repair_http.sh -command[repair_mysql]=sudo {{ autosysadmin_dir }}/repair_mysql.sh -command[repair_opendkim]=sudo {{ autosysadmin_dir }}/repair_opendkim.sh -command[repair_php_fpm56]=sudo {{ autosysadmin_dir }}/repair_php_fpm56.sh -command[repair_php_fpm70]=sudo {{ autosysadmin_dir }}/repair_php_fpm70.sh -command[repair_php_fpm73]=sudo {{ autosysadmin_dir }}/repair_php_fpm73.sh -command[repair_php_fpm74]=sudo {{ autosysadmin_dir }}/repair_php_fpm74.sh -command[repair_php_fpm80]=sudo {{ autosysadmin_dir }}/repair_php_fpm80.sh -command[repair_php_fpm81]=sudo {{ autosysadmin_dir }}/repair_php_fpm81.sh -command[repair_php_fpm82]=sudo {{ autosysadmin_dir }}/repair_php_fpm82.sh -command[repair_php_fpm83]=sudo {{ autosysadmin_dir }}/repair_php_fpm83.sh -command[repair_redis]=sudo {{ autosysadmin_dir }}/repair_redis.sh -command[repair_tomcat_instance]=sudo {{ autosysadmin_dir }}/repair_tomcat_instance.sh - diff --git a/autosysadmin/templates/sudoers.j2 b/autosysadmin/templates/sudoers.j2 deleted file mode 100644 index 0a458292..00000000 --- a/autosysadmin/templates/sudoers.j2 +++ /dev/null @@ -1,21 +0,0 @@ -# -# Ansible managed - DO NOT MODIFY, your changes will be overwritten ! -# - -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_amavis.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_disk.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_elasticsearch.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_http.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_mysql.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_opendkim.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm56.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm70.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm73.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm74.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm80.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm81.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm82.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_php_fpm83.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_redis.sh -nagios ALL = NOPASSWD: {{ autosysadmin_dir }}/repair_tomcat_instance.sh - diff --git a/evolinux-base/tasks/main.yml b/evolinux-base/tasks/main.yml index d27b69eb..f3dbde35 100644 --- a/evolinux-base/tasks/main.yml +++ b/evolinux-base/tasks/main.yml @@ -149,7 +149,7 @@ - name: Autosysadmin ansible.builtin.include_role: - name: 'evolix/autosysadmin' + name: 'evolix/autosysadmin-agent' when: evolinux_autosysadmin_include | bool - name: fail2ban From 10b507adc4a43541a39fa3bfa57511c4460d5300 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Thu, 29 Feb 2024 18:50:14 +0100 Subject: [PATCH 13/19] autosysadmin-agent: logs clearing is done weekly --- CHANGELOG.md | 1 + autosysadmin-agent/templates/autosysadmin.cron.j2 | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab28a964..02bc3c49 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added * autosysadmin-agent: upstream release 24.02.3 +* autosysadmin-agent: logs clearing is done weekly * certbot: Renewal hook for NRPE * kvm-host: add minifirewall rules if DRBD interface is configured diff --git a/autosysadmin-agent/templates/autosysadmin.cron.j2 b/autosysadmin-agent/templates/autosysadmin.cron.j2 index d6d4612f..7f2e211b 100644 --- a/autosysadmin-agent/templates/autosysadmin.cron.j2 +++ b/autosysadmin-agent/templates/autosysadmin.cron.j2 @@ -4,4 +4,4 @@ PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin */5 * * * * root run-parts /usr/share/scripts/autosysadmin/auto # Clean run log files -@daily root {{ autosysadmin_agent_bin_dir | mandatory }}/delete_old_logs.sh {{ autosysadmin_agent_log_retention_days | default('365') }} +@weekly root {{ autosysadmin_agent_bin_dir | mandatory }}/delete_old_logs.sh {{ autosysadmin_agent_log_retention_days | default('365') }} From c3339706062e646afaf77e204bfe7eb420630472 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Thu, 29 Feb 2024 19:16:18 +0100 Subject: [PATCH 14/19] autosysadmin-agent: rename /usr/share/scripts/autosysadmin/{auto,restart} --- CHANGELOG.md | 3 ++- autosysadmin-agent/defaults/main.yml | 2 +- autosysadmin-agent/files/upstream/restart/README | 2 +- autosysadmin-agent/tasks/install.yml | 6 ++++++ autosysadmin-agent/templates/autosysadmin.cron.j2 | 2 +- 5 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 02bc3c49..46cbeaa6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,13 +14,14 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added * autosysadmin-agent: upstream release 24.02.3 -* autosysadmin-agent: logs clearing is done weekly * certbot: Renewal hook for NRPE * kvm-host: add minifirewall rules if DRBD interface is configured ### Changed * apt: add ftp.evolix.org as recognized system source +* autosysadmin-agent: logs clearing is done weekly +* autosysadmin-agent: rename /usr/share/scripts/autosysadmin/{auto,restart} * lxc-php, php: Update sury PGP key * redis: create sysfs config file if missing * openvpn: earlier alert for CA expiration diff --git a/autosysadmin-agent/defaults/main.yml b/autosysadmin-agent/defaults/main.yml index 340ec0a1..b223a683 100644 --- a/autosysadmin-agent/defaults/main.yml +++ b/autosysadmin-agent/defaults/main.yml @@ -4,7 +4,7 @@ general_scripts_dir: "/usr/share/scripts" autosysadmin_agent_bin_dir: "/usr/local/bin/autosysadmin" autosysadmin_agent_lib_dir: "/usr/local/lib/autosysadmin" -autosysadmin_agent_auto_dir: "{{ general_scripts_dir }}/autosysadmin/auto" +autosysadmin_agent_auto_dir: "{{ general_scripts_dir }}/autosysadmin/restart" autosysadmin_agent_crontab_enabled: true autosysadmin_agent_log_retention_days: 365 diff --git a/autosysadmin-agent/files/upstream/restart/README b/autosysadmin-agent/files/upstream/restart/README index 137a7436..5ac200f2 100644 --- a/autosysadmin-agent/files/upstream/restart/README +++ b/autosysadmin-agent/files/upstream/restart/README @@ -13,7 +13,7 @@ Warning: scripts that do not satisfy those criteria will NOT be run (silently)! You can print the names of the scripts which would be run, without actually running them, with this command : ``` -$ run-parts --test /usr/share/scripts/autosysadmin/auto +$ run-parts --test /usr/share/scripts/autosysadmin/restart ``` You can use `zzz-restart_example.template` as boilerplate code to make your own "restart" script. diff --git a/autosysadmin-agent/tasks/install.yml b/autosysadmin-agent/tasks/install.yml index d5e3b3eb..b8ecd752 100644 --- a/autosysadmin-agent/tasks/install.yml +++ b/autosysadmin-agent/tasks/install.yml @@ -3,6 +3,12 @@ ansible.builtin.include_role: name: remount-usr +- name: Previous autosysadmin restart directory is renamed + command: + cmd: mv "/usr/share/scripts/autosysadmin/auto" "{{ autosysadmin_agent_auto_dir }}" + removes: "/usr/share/scripts/autosysadmin/auto" + creates: "{{ autosysadmin_agent_auto_dir }}" + - name: Create autosysadmin directories ansible.builtin.file: path: "{{ item }}" diff --git a/autosysadmin-agent/templates/autosysadmin.cron.j2 b/autosysadmin-agent/templates/autosysadmin.cron.j2 index 7f2e211b..90823d5e 100644 --- a/autosysadmin-agent/templates/autosysadmin.cron.j2 +++ b/autosysadmin-agent/templates/autosysadmin.cron.j2 @@ -1,7 +1,7 @@ PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin # Run each enabled script -*/5 * * * * root run-parts /usr/share/scripts/autosysadmin/auto +*/5 * * * * root run-parts /usr/share/scripts/autosysadmin/restart # Clean run log files @weekly root {{ autosysadmin_agent_bin_dir | mandatory }}/delete_old_logs.sh {{ autosysadmin_agent_log_retention_days | default('365') }} From 037ec9d3765f3648b564fc3c973c8a4c5ec3db78 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Fri, 1 Mar 2024 08:26:43 +0100 Subject: [PATCH 15/19] autosysadmin-agent: upstream release 24.03 --- CHANGELOG.md | 2 +- .../files/upstream/lib/common.sh | 37 ++++++++++++------- .../files/upstream/repair/repair_amavis | 16 -------- .../files/upstream/restart/README | 2 +- 4 files changed, 25 insertions(+), 32 deletions(-) delete mode 100755 autosysadmin-agent/files/upstream/repair/repair_amavis diff --git a/CHANGELOG.md b/CHANGELOG.md index 46cbeaa6..09e75075 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added -* autosysadmin-agent: upstream release 24.02.3 +* autosysadmin-agent: upstream release 24.03 * certbot: Renewal hook for NRPE * kvm-host: add minifirewall rules if DRBD interface is configured diff --git a/autosysadmin-agent/files/upstream/lib/common.sh b/autosysadmin-agent/files/upstream/lib/common.sh index f3c16359..cc3c53e6 100755 --- a/autosysadmin-agent/files/upstream/lib/common.sh +++ b/autosysadmin-agent/files/upstream/lib/common.sh @@ -1,6 +1,6 @@ #!/bin/bash -VERSION="24.02.3" +VERSION="24.03" # Common functions for "repair" and "restart" scripts @@ -37,22 +37,31 @@ initialize() { RUN_ID="$(date +"%Y-%m-%d_%H-%M")_${PROGNAME}_${PID}" readonly RUN_ID + # Main log directory + MAIN_LOG_DIR="/var/log/autosysadmin" + readonly MAIN_LOG_DIR + # shellcheck disable=SC2174 + mkdir --mode=750 --parents "${MAIN_LOG_DIR}" + chgrp adm "${MAIN_LOG_DIR}" + # Each execution store some information # in a unique directory based on the RUN_ID - LOG_DIR="/var/log/autosysadmin/${RUN_ID}" - readonly LOG_DIR - mkdir -p "${LOG_DIR}" + RUN_LOG_DIR="${MAIN_LOG_DIR}/${RUN_ID}" + readonly RUN_LOG_DIR + # shellcheck disable=SC2174 + mkdir --mode=750 --parents "${RUN_LOG_DIR}" + chgrp adm "${RUN_LOG_DIR}" # This log file contains all events - LOG_FILE="${LOG_DIR}/autosysadmin.log" - readonly LOG_FILE + RUN_LOG_FILE="${RUN_LOG_DIR}/autosysadmin.log" + readonly RUN_LOG_FILE # This log file contains notable actions - ACTIONS_FILE="${LOG_DIR}/actions.log" + ACTIONS_FILE="${RUN_LOG_DIR}/actions.log" readonly ACTIONS_FILE touch "${ACTIONS_FILE}" # This log file contains abort reasons (if any) - ABORT_FILE="${LOG_DIR}/abort.log" + ABORT_FILE="${RUN_LOG_DIR}/abort.log" readonly ABORT_FILE # touch "${ABORT_FILE}" @@ -91,7 +100,7 @@ initialize() { test -f /etc/evolinux/autosysadmin && source /etc/evolinux/autosysadmin log_all "Begin ${PROGNAME} RUN_ID: ${RUN_ID}" - log_all "Log directory is ${LOG_DIR}" + log_all "Log directory is ${RUN_LOG_DIR}" } # Executes a list of tasks before exiting: @@ -561,7 +570,7 @@ log_run() { printf "[%s] %s[%s]: %s\\n" \ "${date}" "${PROGNAME}" "${PID}" "${msg}" \ - >> "${LOG_FILE}" + >> "${RUN_LOG_FILE}" } # Log a message in the system log file (syslog or journald) log_global() { @@ -622,7 +631,7 @@ print_abort_reasons() { } # Print the content of the main log from the log directory print_main_log() { - cat "${LOG_FILE}" + cat "${RUN_LOG_FILE}" } # Log an abort reason and quit the script log_abort_and_quit() { @@ -634,7 +643,7 @@ log_abort_and_quit() { # into a file in the log directory named after the 1st parameter save_in_log_dir() { local file_name=$1 - local file_path="${LOG_DIR}/${file_name}" + local file_path="${RUN_LOG_DIR}/${file_name}" cat /dev/stdin > "${file_path}" @@ -643,7 +652,7 @@ save_in_log_dir() { # Return the full path of the file in log directory # based on the name in the 1st parameter file_path_in_log_dir() { - echo "${LOG_DIR}/${1}" + echo "${RUN_LOG_DIR}/${1}" } format_mail_success() { @@ -731,7 +740,7 @@ print_report_information() { echo "" echo "**Utilisateurs récents**" echo "" - who_file=$(file_path_in_log_dir "server-state/df.txt") + who_file=$(file_path_in_log_dir "who-users") if [ -s "${who_file}" ]; then cat "${who_file}" else diff --git a/autosysadmin-agent/files/upstream/repair/repair_amavis b/autosysadmin-agent/files/upstream/repair/repair_amavis deleted file mode 100755 index 5963dc00..00000000 --- a/autosysadmin-agent/files/upstream/repair/repair_amavis +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/bash - -: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" -source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 -source "${AUTOSYSADMIN_LIB}/repair.sh" || exit 1 - -pre_repair - -# shellcheck source=./restart_amavis.sh -source /usr/share/scripts/autosysadmin/auto/restart_amavis.sh - -restart_amavis - -sendmail success - -post_repair diff --git a/autosysadmin-agent/files/upstream/restart/README b/autosysadmin-agent/files/upstream/restart/README index 5ac200f2..83a3a9a2 100644 --- a/autosysadmin-agent/files/upstream/restart/README +++ b/autosysadmin-agent/files/upstream/restart/README @@ -3,7 +3,7 @@ Autosysadmin "restart auto" scripts In this directory you can place scripts that will be executed automatically by a cron job (stored in `/etc/cron.d/autosysadmin`). -They must satisfy the default `run-parts(8)` constaints : +They must satisfy the default `run-parts(8)` constraints : * be "executable" * belong to the Debian cron script namespace (`^[a-zA-Z0-9_-]+$`), example: `restart_amavis` From abd329b9c1211f55d4503da8a11192ba4d9c5691 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Fri, 1 Mar 2024 08:32:47 +0100 Subject: [PATCH 16/19] autosysadmin-restart_nrpe: add role --- CHANGELOG.md | 1 + autosysadmin-restart_nrpe/defaults/main.yml | 8 ++ .../files/upstream/restart_nrpe | 105 ++++++++++++++++++ autosysadmin-restart_nrpe/tasks/main.yml | 24 ++++ 4 files changed, 138 insertions(+) create mode 100644 autosysadmin-restart_nrpe/defaults/main.yml create mode 100755 autosysadmin-restart_nrpe/files/upstream/restart_nrpe create mode 100644 autosysadmin-restart_nrpe/tasks/main.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 09e75075..faa4624b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added * autosysadmin-agent: upstream release 24.03 +* autosysadmin-restart_nrpe: add role * certbot: Renewal hook for NRPE * kvm-host: add minifirewall rules if DRBD interface is configured diff --git a/autosysadmin-restart_nrpe/defaults/main.yml b/autosysadmin-restart_nrpe/defaults/main.yml new file mode 100644 index 00000000..3d743a1b --- /dev/null +++ b/autosysadmin-restart_nrpe/defaults/main.yml @@ -0,0 +1,8 @@ +--- + +general_scripts_dir: "/usr/share/scripts" + +restart_nrpe_path: "{{ general_scripts_dir }}/autosysadmin/restart/restart_nrpe" + +# Change this to customize the RUNNING value in the script +restart_nrpe_running: Null diff --git a/autosysadmin-restart_nrpe/files/upstream/restart_nrpe b/autosysadmin-restart_nrpe/files/upstream/restart_nrpe new file mode 100755 index 00000000..b2dd7f44 --- /dev/null +++ b/autosysadmin-restart_nrpe/files/upstream/restart_nrpe @@ -0,0 +1,105 @@ +#!/bin/bash + +: "${AUTOSYSADMIN_LIB:=/usr/local/lib/autosysadmin}" +source "${AUTOSYSADMIN_LIB}/common.sh" || exit 1 +source "${AUTOSYSADMIN_LIB}/restart.sh" || exit 1 + +## Possible values for RUNNING : +## never => disabled +## always => enabled +## nwh-fr => enabled during non-working-hours in France +## nwh-ca => enabled during non-working-hours in Canada (not supported yet) +## custom => enabled if `running_custom()` function return 0, otherwise disabled. + +# shellcheck disable=SC2034 +RUNNING="nwh-fr" + +## The name of the service, mainly for logging +service_name="nagios-nrpe-server" +## The SysVinit script name +sysvinit_script="${service_name}" +## The systemd service name +systemd_service="${service_name}.service" + +is_service_alive() { + ## this must return 0 if the service is alive, otherwise return 1 + ## Example: + pgrep -u nagios nrpe > /dev/null +} + +## Action for SysVinit system +sysvinit_action() { + # Save service status before restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.before.status" + + # Try to restart + timeout 20 "/etc/init.d/${sysvinit_script}" restart > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + timeout 2 "/etc/init.d/${sysvinit_script}" status | save_in_log_dir "${service_name}.after.status" +} + +## Action for systemd system +systemd_action() { + # Save service status before restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.before.status" + + # Try to restart + # systemctl (only for NRPE ?) sometimes returns 0 even if the service has failed to start + # so we check the status explicitly + timeout 20 systemctl restart "${systemd_service}" > /dev/null \ + && sleep 1 \ + && systemctl status "${systemd_service}" > /dev/null + rc=$? + if [ "${rc}" -eq "0" ]; then + log_action "Restart ${service_name}: OK" + else + log_action "Restart ${service_name}: failed" + fi + + # Save service status after restart + systemctl status "${systemd_service}" | save_in_log_dir "${service_name}.after.status" +} + +# Should we run? +if ! is_supposed_to_run; then + # log_global "${PROGNAME} is not supposed to run (RUNNING=${RUNNING})." + exit 0 +fi +if is_service_alive; then + # log_global "${service_name} process alive. Aborting" + exit 0 +fi + +# Yes we do, so check for sysvinit or systemd +if is_debian_version "8" "<="; then + if ! is_sysvinit_enabled "*${sysvinit_script}*"; then + # log_global "${service_name} not enabled. Aborting" + exit 0 + fi + + # Let's finally do the action + pre_restart + sysvinit_action + post_restart +else + if ! is_systemd_enabled "${systemd_service}"; then + # log_global "${service_name} is disabled (or missing), nothing left to do." + exit 0 + fi + if is_systemd_active "${systemd_service}"; then + # log_global "${service_name} is active, nothing left to do." + exit 0 + fi + + # Let's finally do the action + pre_restart + systemd_action + post_restart +fi diff --git a/autosysadmin-restart_nrpe/tasks/main.yml b/autosysadmin-restart_nrpe/tasks/main.yml new file mode 100644 index 00000000..7a8ad5b0 --- /dev/null +++ b/autosysadmin-restart_nrpe/tasks/main.yml @@ -0,0 +1,24 @@ +--- + + - name: "Remount /usr if needed" + ansible.builtin.include_role: + name: remount-usr + + - name: "Copy restart_nrpe" + ansible.builtin.copy: + src: upstream/restart_nrpe + dest: "{{ restart_nrpe_path }}" + owner: "root" + group: "root" + mode: "0750" + + - name: "Customize RUNNING value" + ansible.builtin.lineinfile: + path: "{{ restart_nrpe_path }}" + line: "RUNNING=\"{{ restart_nrpe_running }}\"" + regexp: "^ *RUNNING=" + create: False + when: + - restart_nrpe_running is defined + - restart_nrpe_running != None + - restart_nrpe_running | length > 0 From 56237bb3c6f9d86a243b55e426992a8b9930e02a Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Fri, 1 Mar 2024 08:35:16 +0100 Subject: [PATCH 17/19] evolinux-base: execute autosysadmin-agent and autosysadmin-restart_nrpe roles --- CHANGELOG.md | 1 + evolinux-base/tasks/main.yml | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index faa4624b..f600017e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ The **patch** part changes is incremented if multiple releases happen the same m * apt: add ftp.evolix.org as recognized system source * autosysadmin-agent: logs clearing is done weekly * autosysadmin-agent: rename /usr/share/scripts/autosysadmin/{auto,restart} +* evolinux-base: execute autosysadmin-agent and autosysadmin-restart_nrpe roles * lxc-php, php: Update sury PGP key * redis: create sysfs config file if missing * openvpn: earlier alert for CA expiration diff --git a/evolinux-base/tasks/main.yml b/evolinux-base/tasks/main.yml index f3dbde35..456207df 100644 --- a/evolinux-base/tasks/main.yml +++ b/evolinux-base/tasks/main.yml @@ -147,11 +147,16 @@ tags: - postfix -- name: Autosysadmin +- name: Autosysadmin (agent) ansible.builtin.include_role: name: 'evolix/autosysadmin-agent' when: evolinux_autosysadmin_include | bool +- name: Autosysadmin (restart_nrpe) + ansible.builtin.include_role: + name: 'evolix/autosysadmin-restart_nrpe' + when: evolinux_autosysadmin_include | bool + - name: fail2ban ansible.builtin.include_role: name: evolix/fail2ban From 24cbbf2f544ad165b6e04c5e13dcaac8ef4e7ef1 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Fri, 1 Mar 2024 08:55:07 +0100 Subject: [PATCH 18/19] fix CHANGELOG --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f600017e..ac02377f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,10 +23,11 @@ The **patch** part changes is incremented if multiple releases happen the same m * apt: add ftp.evolix.org as recognized system source * autosysadmin-agent: logs clearing is done weekly * autosysadmin-agent: rename /usr/share/scripts/autosysadmin/{auto,restart} +* certbot: use pkey to test the key * evolinux-base: execute autosysadmin-agent and autosysadmin-restart_nrpe roles * lxc-php, php: Update sury PGP key -* redis: create sysfs config file if missing * openvpn: earlier alert for CA expiration +* redis: create sysfs config file if missing ### Fixed @@ -100,7 +101,6 @@ The **patch** part changes is incremented if multiple releases happen the same m * vrrpd: variable to force update the switch script (default: false) * webapps/nextcloud: Add Ceph volume to fstab * webapps/nextcloud: Set home directory's mode -* certbot: Utiliser pkey pour tester clé ### Fixed From 342380876a0b95beb36bd9c60ed94b7db7cf4e28 Mon Sep 17 00:00:00 2001 From: Jeremy Lecour Date: Fri, 1 Mar 2024 09:00:49 +0100 Subject: [PATCH 19/19] Release 24.03 --- CHANGELOG.md | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ac02377f..055c176a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,18 @@ The **patch** part changes is incremented if multiple releases happen the same m ### Added +### Changed + +### Fixed + +### Removed + +### Security + +## [24.03] 2024-03-01 + +### Added + * autosysadmin-agent: upstream release 24.03 * autosysadmin-restart_nrpe: add role * certbot: Renewal hook for NRPE @@ -29,14 +41,10 @@ The **patch** part changes is incremented if multiple releases happen the same m * openvpn: earlier alert for CA expiration * redis: create sysfs config file if missing -### Fixed - ### Removed * autosysadmin: replaced by autosysadmin-agent -### Security - ## [24.02] 2024-02-08 ### Added