背景:由于線上服務(wù)器硬盤故障,導致服務(wù),數(shù)據(jù)失效;
網(wǎng)站建設(shè)哪家好,找創(chuàng)新互聯(lián)!專注于網(wǎng)頁設(shè)計、網(wǎng)站建設(shè)、微信開發(fā)、小程序設(shè)計、集團企業(yè)網(wǎng)站建設(shè)等服務(wù)項目。為回饋新老客戶創(chuàng)新互聯(lián)還提供了德興免費建站歡迎大家使用!
目的:保證第一時間發(fā)現(xiàn)硬盤信息是否正常;
方案:使用Nagios 自定義腳本來監(jiān)控硬盤狀態(tài);
注意:下面腳本只提供已經(jīng)安裝hpacucli megacli 軟件使用;
#!/bin/bash #Marc.wang 2014/06/17 export PATH=$PATH:/usr/sbin/:/sbin/:/usr/bin/ Get_localhost_Hostname=`hostname -I |awk '{print $1}'` Nagios="nagios.org" SERVER_TYPE=$(/usr/sbin/dmidecode | grep "Vendor" | awk -F\: 'NR==1{print $2}'|awk '{print $1}') #The nagios command run nsca Send_nsca_ssl_message (){ /usr/local/nagios/bin/send_nsca -H ${Nagios} -d ";" -c /usr/local/nagios/etc/send_nsca.cfg } #hp command run HP_DISK_STATUS_COMMAND() { rpm -qa |grep hpacucli >> /dev/null 2>&1 echo $? } # dell command run DELL_IBM_DISK_STATUS_COMMAND() { rpm -qa |grep MegaCli >> /dev/null 2>&1 echo $? } bug_test=$(ps ax |grep hpacucli |grep -v grep |wc -l) if [ "${bug_test}" != "0" ] then echo "$Get_localhost_Hostname;check_raid;2; hpacucli command run not data." | Send_nsca_ssl_message exit 2 fi CHECK_RAID_STATUS_HP () { /usr/sbin/hpacucli ctrl all show config detail |grep physicaldrive -A 4 |sed 's/ //g'|grep "Status:" |grep -v "Status:OK" | wc -l } case $SERVER_TYPE in HP|hp|Hp|Hewlett-Packard) TEST_HP_COMMAND () { hpacucli ctrl all show config detail >> /dev/null 2>&1 echo $? } HP_RPM=$(HP_DISK_STATUS_COMMAND) sleep 3 if [ ${HP_RPM} != "0" ] then echo "$Get_localhost_Hostname;check_raid;2; $SERVER_TYPE command hpacucli Not Found" | Send_nsca_ssl_message exit 2 elif [[ ${HP_RPM} == "0" ]]; then HP_RAID_STATUS_NUMBER=$(CHECK_RAID_STATUS_HP) sleep 3 TEST_HP=$(TEST_HP_COMMAND) if [ "$HP_RAID_STATUS_NUMBER" == "0" ] && [ "$TEST_HP" == "0" ]; then echo "$Get_localhost_Hostname;check_raid;0;Check_Raid_status:OK" | Send_nsca_ssl_message exit 0 elif [ "${TEST_HP}" != "0" ] then echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status: run command hpacucli Error" | Send_nsca_ssl_message exit 2 elif [ "$HP_RAID_STATUS_NUMBER" != "0" ] && [ "$TEST_HP" == "0" ] then echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status:Critical" | Send_nsca_ssl_message exit 2 fi fi ;; DELL|Dell|DEll|DeLL|dell|IBM|ibm|Ibm|IBm) if [ -f "/opt/MegaRAID/MegaCli/MegaCli64" ]; then CHECK_RAID_STATUS_IBM_DELL () { /opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -a0 | grep -E "(Media Error Count:|Other Error Count:)" |awk -F: '{sum1 += $2} END {print sum1}' } TEST_DELL_COMMAND (){ /opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -a0 >> /dev/null echo $? } else CHECK_RAID_STATUS_IBM_DELL () { MegaCli -LdPdInfo -a0 | grep -E "(Media Error Count:|Other Error Count:)" |awk -F: '{sum1 += $2} END {print sum1}' } TEST_DELL_COMMAND (){ MegaCli -LdPdInfo -a0 >> /dev/null echo $? } fi IBM_DELL_RPM=$(DELL_IBM_DISK_STATUS_COMMAND) if [[ ${IBM_DELL_RPM} == "0" ]] then TEST_OTHER_COMMAND=$(TEST_DELL_COMMAND) DELL_IBM_STATUS_NUMBER=$(CHECK_RAID_STATUS_IBM_DELL) if [[ -z "$DELL_IBM_STATUS_NUMBER" ]] then echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status:MegaCli Command Not Found!" | Send_nsca_ssl_message exit 2 elif [[ "$DELL_IBM_STATUS_NUMBER" -gt "2000" ]] ; then echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status:Critical" | Send_nsca_ssl_message exit 2 elif [[ "$DELL_IBM_STATUS_NUMBER" -lt "2000" ]] && [[ "$TEST_OTHER_COMMAND" == "0" ]] then echo "$Get_localhost_Hostname;check_raid;0;Check_Raid_status:OK" | Send_nsca_ssl_message exit 0 fi fi ;; *) echo "$Get_localhost_Hostname;check_raid;2;This machine is not IBM DELL or HP!" | Send_nsca_ssl_message ;; esac