星期四, 五月 17, 2007

SNMP shell script for process monitor

一个临时解决办法,为了监测进程的健康状态,首先必须在被控机上安装 net-snmp,然后配置 /etc/snmp/snmpd.conf 如下:
syscontact  sysadm@sample.com
proc vsftpd 100 1
proc httpd 3000 1
proc mysqld 3000 1
disk /data 100G
com2sec mynet 192.168.0.0/24 process-mon
group mynet v1 mynet
group mynet v2c mynet
view system included .1.3.6.1.2.1
view system included .1.3.6.1.4.1.2021.2
access mynet "" any noauth exact system none none
接着编辑脚本:

#!/bin/sh

PATH=$PATH:/usr/bin
PROGRAM=`basename $0`

community='demo'
host='localhost'
mailto=''
items=''
ilist='tmp'

args=`getopt -l help,item:items-list: c:h:m:i:I: $*`
if [ $? -gt 0 ]; then
strerr="Invalid options"
echo "$strerr" >&2
logger -it "$strerr"
exit 1
fi

for i in $args; do
case $i in
-c) shift; community=$1; shift;;
-h) shift; host=$1; shift;;
-m) shift; mailto=$1; shift;;
-i|--item)
shift
if [ -n "$items" ]; then
items=`echo -e "$items\n$1"`
else
items="$1"
fi
shift
;;
-I|--items-list)
shift; ilist=$1; shift;;
--help)
shift
echo "useage: $PROGRAM [-c|-h|-m] [--item|--help]
-c community
-h host
-m mailto
-i|--item item_map, 'community host' map
--help, print this message"
exit 0
;;
esac
done

if [ -z "$items" ]; then
items="$community $host"
fi

STRERR=""
if [ "$ilist" == "tmp" ]; then echo "$items" >$ilist; fi
while read community host; do
# echo "$items" | while read community host; do
# for item in "$items"; do
# community=`echo $item | awk '{print $1}'`
# host=`echo $item | awk '{print $2}'`
if [ -z "$community" -o -z "$host" ]; then
strerr="Invalid 'community host' item"
echo strerr >&2
logger -it $PROGRAM "$strerr"
continue
fi

snmp_result=`snmpwalk -v2c -c $community $host UCD-SNMP-MIB::prNames`
if [ $? -gt 0 ];then
strerr="$host: SNMP ERROR, maybe system is down"
echo "$strerr" >&2
logger -it "$PROGRAM" $strerr
STRERR="$STRERR\n$strerr"
fi
num=`echo "$snmp_result" | wc -l` && \
for i in `seq 1 $num`; do
status=$(snmpget -v2c -c $community $host UCD-SNMP-MIB::prErrorFlag.$i | awk -F' = ' '{print $2}') && \
process=$(echo "$snmp_result" | sed -n "$i{s/^UCD-SNMP-MIB::prNames.$i = STRING: \(.*\)$/\1/p}") && \
if [ "$status" == "INTEGER: error(1)" -o "$status" == "INTEGER: 1" ]; then
message=$(snmpget -v2c -c $community $host UCD-SNMP-MIB::prErrMessage.$i | sed "s/UCD-SNMP-MIB::prErrMessage.$i = STRING: \(.*\)$/\1/")
strerr="$host: $message"
echo "$strerr" >&2
logger -it $PROGRAM "$strerr"
STRERR="$STRERR\n$strerr"
fi
done
done <$ilist
# rm -f tmp

if [ -n "$mailto" ] && [ -n "$STRERR" ]; then
STRERR="*** CRITICAL ERRORs ***\n$STRERR"
echo -e "$STRERR" | mail -s "*** CRITICAL: process monitor ERRORs reporting ***" $mailto
fi
这个脚本可以这样运行:
sh$ process-mon -c demo -h 192.168.0.98
sh$ process-mon -i 'demo 192.168.0.98' -i 'mon 192.168.0.99' -m chowroc@sample.com
检查两台主机并在有问题时发送邮件到(mailto)指定的的地址
sh$ process-mon -I process-mon.list -m roc@sample.com
sh$ cat process-mon
demo 192.168.0.98
mon 192.168.0.99
...

没有评论: