189 8069 5689

nagios监控硬盘RAID

背景:由于线上服务器硬盘故障,导致服务,数据失效;

网站建设哪家好,找创新互联!专注于网页设计、网站建设、微信开发、小程序设计、集团企业网站建设等服务项目。为回馈新老客户创新互联还提供了德兴免费建站欢迎大家使用!


目的:保证第一时间发现硬盘信息是否正常;


方案:使用Nagios 自定义脚本来监控硬盘状态;


注意:下面脚本只提供已经安装hpacucli megacli 软件使用;

#!/bin/bash
#Marc.wang 2014/06/17


export PATH=$PATH:/usr/sbin/:/sbin/:/usr/bin/
Get_localhost_Hostname=`hostname -I |awk '{print $1}'`
Nagios="nagios.org"
SERVER_TYPE=$(/usr/sbin/dmidecode  | grep "Vendor" | awk  -F\: 'NR==1{print $2}'|awk '{print $1}')




#The nagios command run nsca
Send_nsca_ssl_message (){ 


/usr/local/nagios/bin/send_nsca  -H ${Nagios} -d ";" -c /usr/local/nagios/etc/send_nsca.cfg 

 }



#hp command run

HP_DISK_STATUS_COMMAND() {

rpm  -qa |grep  hpacucli >> /dev/null 2>&1
echo $?
}

# dell command run

DELL_IBM_DISK_STATUS_COMMAND() {

rpm -qa |grep MegaCli >> /dev/null 2>&1
 echo $?
}


bug_test=$(ps ax |grep hpacucli |grep -v grep |wc -l)

if [ "${bug_test}" != "0" ]

then
        echo "$Get_localhost_Hostname;check_raid;2; hpacucli command run not data." | Send_nsca_ssl_message

        exit 2
fi


CHECK_RAID_STATUS_HP () {  

/usr/sbin/hpacucli  ctrl all show config detail |grep physicaldrive  -A 4 |sed 's/ //g'|grep "Status:"  |grep -v  "Status:OK" | wc -l
}





case $SERVER_TYPE in

HP|hp|Hp|Hewlett-Packard)

TEST_HP_COMMAND () {

hpacucli  ctrl all show config detail >> /dev/null 2>&1

echo $?
}

HP_RPM=$(HP_DISK_STATUS_COMMAND)
sleep 3



if  [ ${HP_RPM}  != "0" ] 


    then

    echo "$Get_localhost_Hostname;check_raid;2; $SERVER_TYPE  command hpacucli Not Found" | Send_nsca_ssl_message

    exit 2

elif [[ ${HP_RPM}  == "0"  ]]; 

    then

    HP_RAID_STATUS_NUMBER=$(CHECK_RAID_STATUS_HP)
    sleep 3

    TEST_HP=$(TEST_HP_COMMAND)
    
    if [ "$HP_RAID_STATUS_NUMBER" == "0" ] && [ "$TEST_HP"   == "0" ]; 

        then
        echo "$Get_localhost_Hostname;check_raid;0;Check_Raid_status:OK" | Send_nsca_ssl_message
        exit 0

    elif [ "${TEST_HP}" != "0" ] 
         then
          echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status: run command hpacucli Error" | Send_nsca_ssl_message
          exit 2
          
    elif  [ "$HP_RAID_STATUS_NUMBER" != "0"  ] && [ "$TEST_HP" == "0" ]
          then
           echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status:Critical" | Send_nsca_ssl_message
           exit 2
    fi
  fi
    ;; 

DELL|Dell|DEll|DeLL|dell|IBM|ibm|Ibm|IBm) 

if [  -f "/opt/MegaRAID/MegaCli/MegaCli64" ];

        then


CHECK_RAID_STATUS_IBM_DELL () { 

/opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -a0  |  grep -E "(Media Error Count:|Other Error Count:)"  |awk -F: '{sum1 += $2} END {print sum1}' 

}
TEST_DELL_COMMAND (){
/opt/MegaRAID/MegaCli/MegaCli64 -LdPdInfo -a0  >> /dev/null
  echo $?
}

else

CHECK_RAID_STATUS_IBM_DELL () { 

MegaCli -LdPdInfo -a0  |  grep -E "(Media Error Count:|Other Error Count:)"  |awk -F: '{sum1 += $2} END {print sum1}' 

}
TEST_DELL_COMMAND (){
MegaCli -LdPdInfo -a0  >> /dev/null
  echo $?
}

fi


IBM_DELL_RPM=$(DELL_IBM_DISK_STATUS_COMMAND)

 if  [[ ${IBM_DELL_RPM}  ==  "0" ]]

    then
     TEST_OTHER_COMMAND=$(TEST_DELL_COMMAND)

     DELL_IBM_STATUS_NUMBER=$(CHECK_RAID_STATUS_IBM_DELL)

      if [[  -z "$DELL_IBM_STATUS_NUMBER" ]]

              then

            echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status:MegaCli Command  Not Found!" | Send_nsca_ssl_message
            exit 2

      elif [[ "$DELL_IBM_STATUS_NUMBER" -gt "2000" ]] ; 

              then
        
            echo "$Get_localhost_Hostname;check_raid;2;Check_Raid_status:Critical" | Send_nsca_ssl_message
            exit 2
       

        elif  [[  "$DELL_IBM_STATUS_NUMBER"  -lt  "2000" ]] && [[ "$TEST_OTHER_COMMAND" == "0" ]]

              then
              echo "$Get_localhost_Hostname;check_raid;0;Check_Raid_status:OK" | Send_nsca_ssl_message

              exit 0
        fi  
    
fi
;;

*)

  echo "$Get_localhost_Hostname;check_raid;2;This machine is not IBM DELL or HP!" | Send_nsca_ssl_message
  ;;
esac

文章名称:nagios监控硬盘RAID
网页URL:http://cdxtjz.cn/article/jhdiie.html

其他资讯