forked from evolix/ansible-roles
269 lines
9.4 KiB
Bash
269 lines
9.4 KiB
Bash
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# This check_hpraid is a fork from check_cciss v0.15 written by Simone Rosa.
|
|
# Usage of old tools and drivers were removed to use only the smartpqi driver and the ssacli tool from HP.
|
|
# Upstream now at:
|
|
# https://gitea.evolix.org/evolix/ansible-roles/src/branch/stable/nagios-nrpe/files/plugins
|
|
# Source of the fork:
|
|
# https://exchange.nagios.org/directory/Plugins/Hardware/Storage-Systems/RAID-Controllers/check_cciss--2D-HP-and-Compaq-Smart-Array-Hardware-status/details
|
|
#
|
|
# Licence: GPLv2
|
|
# Description:
|
|
#
|
|
# This plugin checks hardware status for Smart Array Controllers,
|
|
# using HPE Smart Storage Administrator. It should support Debian 9 and over.
|
|
# (Array, controller, cache, battery, etc...)
|
|
#
|
|
# NOTE:
|
|
#
|
|
# You need to install the proprietary tool HPE Smart Storage Administrator (ssacli) from:
|
|
# https://downloads.linux.hpe.com/SDR/repo/mcp
|
|
# Also NRPE need to launch ssacli as root.
|
|
#
|
|
# Please add this line to /etc/sudoers :
|
|
# --------------------------------------------------
|
|
# nagios ALL=NOPASSWD: /usr/sbin/ssacli
|
|
#
|
|
# Examples:
|
|
#
|
|
# ./check_cciss
|
|
# ----------------
|
|
# RAID OK
|
|
#
|
|
# ./check_cciss -v
|
|
# -------------------
|
|
# RAID OK: Smart Array 6i in Slot 0 array A logicaldrive 1 (67.8 GB, RAID 1+0, OK)
|
|
# [Controller Status: OK Cache Status: OK Battery Status: OK]
|
|
#
|
|
# RAID CRITICAL - HP Smart Array Failed: Smart Array 6i in Slot 0 (Embedded) \
|
|
# array A logicaldrive 1 (33.9 GB, RAID 1, Interim Recovery Mode) \
|
|
# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, --- GB, Failed)
|
|
#
|
|
# RAID WARNING - HP Smart Array Rebuilding: Smart Array 6i in Slot 0 (Embedded) \
|
|
# array A logicaldrive 1 (33.9 GB, RAID 1, Recovering, 26% complete) \
|
|
# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, 36.4 GB, Rebuilding)
|
|
#
|
|
# ./check_cciss -v -p
|
|
# --------------------
|
|
# RAID OK: Smart Array 6i in Slot 0 (Embedded) array A logicaldrive 1 (33.9 GB, RAID 1, OK)
|
|
# physicaldrive 2:0 (port 2:id 0 , Parallel SCSI, 36.4 GB, OK)
|
|
# physicaldrive 2:1 (port 2:id 1 , Parallel SCSI, 36.4 GB, OK)
|
|
# physicaldrive 1:5 (port 1:id 5 , Parallel SCSI, 72.8 GB, OK, spare)
|
|
# [Controller Status: OK Cache Status: OK Battery/Capacitor Status: OK]
|
|
#
|
|
# RAID CRITICAL - HP Smart Array Failed: Smart Array 6i in Slot 0 (Embedded) \
|
|
# array A logicaldrive 1 (33.9 GB, RAID 1, Interim Recovery Mode) \
|
|
# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, --- GB, Failed) \
|
|
# physicaldrive 1:1 (port 1:id 1 , Parallel SCSI, 36.4 GB, OK)
|
|
#
|
|
# RAID WARNING - HP Smart Array Rebuilding: Smart Array 6i in Slot 0 (Embedded) \
|
|
# array A logicaldrive 1 (33.9 GB, RAID 1, Recovering, 26% complete) \
|
|
# physicaldrive 1:0 (port 1:id 0 , Parallel SCSI, 36.4 GB, Rebuilding) \
|
|
# physicaldrive 1:1 (port 1:id 1 , Parallel SCSI, 36.4 GB, OK)
|
|
#
|
|
# ./check_cciss -v -b
|
|
# ----------------
|
|
#
|
|
# RAID OK: Smart Array 6i in Slot 0 (Embedded) array A logicaldrive 1 (33.9 GB, RAID 1, OK) [Controller Status: OK]
|
|
#
|
|
# [insted of]
|
|
# RAID CRITICAL - HP Smart Array Failed: Smart Array 6i in Slot 0 (Embedded) \
|
|
# Controller Status: OK Cache Status: Temporarily Disabled \
|
|
# Battery/Capacitor Status: Failed (Replace Batteries/Capacitors)
|
|
|
|
PROGNAME=$(basename $0)
|
|
NAGIOS_PLUGINS="/usr/lib/nagios/plugins/"
|
|
REVISION=$(echo '0.16-evolix')
|
|
DEBUG="0"
|
|
VERBOSE="0"
|
|
ssacli=$(command -v ssacli)
|
|
PHYSICAL_DRIVE=0
|
|
|
|
. ${NAGIOS_PLUGINS}/utils.sh
|
|
|
|
print_usage() {
|
|
echo ""
|
|
echo "Usage: $PROGNAME [-v] [-p] [-e <number>] [-E <name>] [-b] [-s] [-d]"
|
|
echo "Usage: $PROGNAME [-h]"
|
|
echo "Usage: $PROGNAME [-V]"
|
|
echo ""
|
|
echo " -v = show status and informations about RAID"
|
|
echo " -p = show detail for physical drives"
|
|
echo " -e <number> = exclude slot number"
|
|
echo " -b = exclude battery/capacitor/cache status check"
|
|
echo " -d = use for debug (command line mode)"
|
|
echo " -h = help information"
|
|
echo " -V = version information"
|
|
echo ""
|
|
echo " ============="
|
|
}
|
|
|
|
print_help() {
|
|
print_revision $PROGNAME $REVISION
|
|
echo ""
|
|
print_usage
|
|
echo ""
|
|
echo "This plugin checks hardware status for Smart Array Controllers,"
|
|
echo "using HPE Smart Storage Administrator."
|
|
echo ""
|
|
support
|
|
exit 0
|
|
}
|
|
|
|
while getopts "N:cvpbsde:Vh" options
|
|
do
|
|
case $options in
|
|
N) ;;
|
|
c) ;;
|
|
v) VERBOSE=1;;
|
|
p) PHYSICAL_DRIVE=1;;
|
|
d) DEBUG=1;;
|
|
e) EXCLUDE_SLOT=1
|
|
excludeslot="$OPTARG";;
|
|
b) EXCLUDE_BATTERY=1;;
|
|
V) print_revision $PROGNAME $REVISION
|
|
exit 0;;
|
|
h) print_help
|
|
exit 0;;
|
|
\?) print_usage
|
|
exit 0;;
|
|
*) print_usage
|
|
exit 0;;
|
|
esac
|
|
done
|
|
|
|
# Use smartpqi driver
|
|
# https://manpages.debian.org/buster/manpages/smartpqi.4.en.html
|
|
if [ -d /sys/bus/pci/drivers/smartpqi ]; then
|
|
DRIVER="/sys/bus/pci/drivers/smartpqi"
|
|
driverPresent='YES.'
|
|
else
|
|
driverPresent='No!'
|
|
fi
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### Check if \"HP Smart Array\" ($DRIVER) is present >>>\n"${driverPresent}"\n"
|
|
fi
|
|
if [[ "$driverPresent" == "No!" ]]; then
|
|
echo "RAID UNKNOWN - HP Smart Array not found"
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
|
|
# Check if "HP Array Utility CLI" is present
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### Check if \"ssacli\" is present >>>\n"
|
|
fi
|
|
if [ ! -x $ssacli ]; then
|
|
if [ -x $ssacli ]; then
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### \"ssacli\" is present >>>\n"
|
|
fi
|
|
else
|
|
echo "ERROR: ssacli tools should be installed and with right sudoers/permissions (see the notes above)"
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
fi
|
|
|
|
# Check if "HP Controller" work correctly
|
|
check=$(sudo -u root $ssacli controller all show status 2>&1)
|
|
status=$?
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### Check if \"HP Controller\" work correctly >>>\n"${check}"\n"
|
|
fi
|
|
if test ${status} -ne 0; then
|
|
echo "RAID UNKNOWN - $ssacli did not execute properly : "${check}
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
|
|
# Get "Slot" & exclude slot needed
|
|
EXCLUDE_SLOT=${EXCLUDE_SLOT:-0}
|
|
if [ "$EXCLUDE_SLOT" = "1" ]; then
|
|
slots=$(echo ${check} | egrep -o "Slot \w" | awk '{print $NF}' | grep -v "$excludeslot")
|
|
else
|
|
slots=$(echo ${check} | egrep -o "Slot \w" | awk '{print $NF}')
|
|
fi
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### Get \"Slot\" & exclude slot not needed >>>\n"${slots}"\n"
|
|
fi
|
|
for slot in $slots; do
|
|
# Get "logicaldrive" for slot
|
|
check2b=$(sudo -u root $ssacli controller slot=$slot logicaldrive all show 2>&1)
|
|
status=$?
|
|
if test ${status} -ne 0; then
|
|
echo "RAID UNKNOWN - $ssacli did not execute properly : "${check2b}
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
check2=${check2:-}
|
|
check2="$check2$check2b"
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### Get \"logicaldrive\" for slot >>>\n"${check2b}"\n"
|
|
fi
|
|
|
|
# Get "physicaldrive" for slot
|
|
if [ "$PHYSICAL_DRIVE" = "1" -o "$DEBUG" = "1" ]; then
|
|
check2b=$(sudo -u root $ssacli controller slot=$slot physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive")
|
|
else
|
|
check2b=$(sudo -u root $ssacli controller slot=$slot physicaldrive all show | sed -e 's/\?/\-/g' 2>&1 | grep "physicaldrive" | (grep "\(Failure\|Failed\|Rebuilding\)" || true))
|
|
fi
|
|
status=$?
|
|
if [ "$PHYSICAL_DRIVE" = "1" -o "$DEBUG" = "1" ]; then
|
|
if test ${status} -ne 0; then
|
|
echo "RAID UNKNOWN - $ssacli did not execute properly : "${check2b}
|
|
exit $STATE_UNKNOWN
|
|
fi
|
|
fi
|
|
check2="$check2$check2b"
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### Get \"physicaldrive\" for slot >>>\n"${check2b}"\n"
|
|
fi
|
|
done
|
|
|
|
# Check STATUS
|
|
if [ "$DEBUG" = "1" ]; then
|
|
echo "### Check STATUS >>>"
|
|
fi
|
|
|
|
# Omit battery/capacitor/cache status check if requested
|
|
EXCLUDE_BATTERY=${EXCLUDE_BATTERY:-0}
|
|
if [ "$EXCLUDE_BATTERY" = "1" ]; then
|
|
check=$(echo "$check" | grep -v 'Battery/Capacitor Status: Failed (Replace Batteries/Capacitors)')
|
|
check=$(echo "$check" | grep -v 'Cache Status: Temporarily Disabled')
|
|
fi
|
|
|
|
if echo ${check} | egrep Failed >/dev/null; then
|
|
echo "RAID CRITICAL - HP Smart Array Failed: "${check} | egrep Failed
|
|
exit $STATE_CRITICAL
|
|
elif echo ${check} | egrep Disabled >/dev/null; then
|
|
echo "RAID CRITICAL - HP Smart Array Problem: "${check} | egrep Disabled
|
|
exit $STATE_CRITICAL
|
|
elif echo ${check2} | egrep Failed >/dev/null; then
|
|
echo "RAID CRITICAL - HP Smart Array Failed: "${check2} | egrep Failed
|
|
exit $STATE_CRITICAL
|
|
elif echo ${check2} | egrep Failure >/dev/null; then
|
|
echo "RAID WARNING - Component Failure: "${check2} | egrep Failure
|
|
exit $STATE_WARNING
|
|
elif echo ${check2} | egrep Rebuild >/dev/null; then
|
|
echo "RAID WARNING - HP Smart Array Rebuilding: "${check2} | egrep Rebuild
|
|
exit $STATE_WARNING
|
|
elif echo ${check2} | egrep Recover >/dev/null; then
|
|
echo "RAID WARNING - HP Smart Array Recovering: "${check2} | egrep Recover
|
|
exit $STATE_WARNING
|
|
elif echo ${check} | egrep "Cache Status: Temporarily Disabled" >/dev/null; then
|
|
echo "RAID WARNING - HP Smart Array Cache Disabled: "${check}
|
|
exit $STATE_WARNING
|
|
elif echo ${check} | egrep FIRMWARE >/dev/null; then
|
|
echo "RAID WARNING - "${check}
|
|
exit $STATE_WARNING
|
|
else
|
|
if [ "$DEBUG" = "1" -o "$VERBOSE" = "1" ]; then
|
|
check3=$(echo "${check}" | egrep Status)
|
|
check3=$(echo ${check3})
|
|
echo "RAID OK: "${check2}" ["${check3}"]"
|
|
else
|
|
echo "RAID OK"
|
|
fi
|
|
exit $STATE_OK
|
|
fi
|
|
|
|
exit $STATE_UNKNOWN
|