Can someone please help how can I add Nagios logic to catch alerts to my below python script?
I tried adding the sys.exit(0) and sys.exit(1) for all OK and CRITICAL, Or Please Let me know what I should do, So that this script when run Nagios catch the 0,1,2 and display the message.
#!/usr/bin/python
import subprocess
import os, sys
#Check python present or not
# dnf install python3.6-stack
# export PATH=/opt/python-3.6/bin:$PATH
def check_MegaRaid():
# Next script
failed=subprocess.run(["sudo /opt/MegaRAID/MegaCli/MegaCli64 -AdpAllInfo \ -aALL | grep -i 'Failed Disks' | awk -F':' '{print $2}'"], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
failed_status = failed.stdout
print("failed_status is",failed_status)
critical=subprocess.run(["sudo /opt/MegaRAID/MegaCli/MegaCli64 -AdpAllInfo \ -aALL | grep -i 'Critical Disks' | awk -F':' '{print $2}'"], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
critical_status = critical.stdout
print("critical_status is",critical_status)
if failed_status.strip() and critical_status.strip() == "0" :
print("Raid check all OK" )
sys.exit(0)
#return 0
else:
print("CRITICAL")
sys.exit(1)
#return 1
def check_raid():
process=subprocess.run(["sudo /sbin/mdadm --detail /dev/md127 | grep -i state | grep -w clean, | awk -F',' '{print $2}' |sed -e 's/^[ \t]*//' "], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
output = process.stdout
check_process=subprocess.run(["sudo /sbin/mdadm --detail /dev/md127 | grep -i state | awk -F':' '{print $2}' |sed -e 's/^[ \t]*//' "], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
check = check_process.stdout
if output.strip() == 'degraded':
print("Raid disk state is CRITICAL ",output)
#return 1
sys.exit(1)
elif check.strip() == 'clean':
print("Raid check all OK")
#return 0
sys.exit(0)
else:
print("sudo /sbin/mdadm --detail /dev/md127 cmd not found : This is an dataraid machine")
check_MegaRaid()
#Check whether system configure raid
process=subprocess.run(["sudo cat /GEO_VERSION | grep -i raid | awk -F'Layout:' '{print $2}' | sed 's/[0-9]*//g' | sed -e 's/^[ \t]*//'"], shell=True, stdout=subprocess.PIPE, universal_newlines=True)
raid_value = process.stdout
if raid_value.strip() == 'raid':
print("System configure Raid functions")
check_raid()
else:
print("There is no raid configured in this system")
exit()
Referencing https://nagios-plugins.org/doc/guidelines.html in case you're interested.
0 is OK 1 is Warning 2 is Critical 3 is Unknown
So the first thing you need to do is replace your
sys.exit(1)with asys.exit(2)I would also replace that final
exit()with asys.exit(3)to signal that it is an Unknown exit, which will you help you identify mis-configured services in the UI.You'll also want to indicate the status first, a typical one-line plugin output will look like:
But it doesn't look like you're using performance data, so change your critical exits to be prepended with the characters
CRITICAL:and your OK statuses withOK:.