start.sh守护脚本
进程守护脚本
[ccodsupport@harbor DelRecord]$ ./start.sh -h
Usage: start.sh [options] service1 [,service2..]
Start/stop services or show files version
-c Get coredump, used with -k.
-d Run a daemon script for service
-f Force kill service, used with -k
-h Print help infomation
-k Kill service
-o Get owner authority [ -o user ]
-r Restart service
-s Show service status
-v Show file version
list Show all services status
Note: If no option ,default to start a service.
Valid shortcut for services:
del
Example:
"start.sh list" View status of all services.
"start.sh del" Start DelRecord.
"start.sh -d del" Start DelRecord with a daemon.
"start.sh del " Start DelRecord.
"start.sh -v del" Show DelRecord version.
"start.sh -s del" Show DelRecord status.
"start.sh -k del" Stop DelRecord.
"start.sh -r del" Restart DelRecord.
"start.sh -kd del" Stop DelRecord and its daemon.
"start.sh -kf del" Force,kill DelRecord immediately.
"start.sh all" Start all services at once.
"start.sh -k all" Stop all services at once.
[devops@my-dev DelRecord]$ ./start.sh -kdf del
stoping DelRecord ... [stopped]
[devops@my-dev DelRecord]$ ./start.sh del
starting DelRecord .... [ok]
[devops@my-dev DelRecord]$./start.sh list
DelRecord (pid 22081 22084) [running]
checkmen [stopped]
#!/bin/bash
#########################################################################################
################ service info ###########################################
#########################################################################################
ALIAS[0]="del"; #定义程序别名,多个二进制管理依次累加即可
ALIAS[1]="men";
PROC[0]="DelRecord"; #程序名,多个二进制管理依次累加即可
PROC[1]="checkmen"; #程序名,多个二进制管理依次累加即可
DIR[0]="./";
DIR[1]="./bin"; #二进制所在目录
ARGV[0]="";
ARGV[1]="--config ../cfg/checkmen.cfg"; #执行checkmen 二进制文件配置文件
PROC_COUNT=1
CHECK_TIME=10
#CHECK_LOGFILE="check.log"
CHECK_LOGFILE="/dev/null"
#########################################################################################
################################################################################33
usage()
{
echo "Usage: start.sh [options] service1 [,service2..]";
echo "Start/stop services or show files version"
echo " -c Get coredump, used with -k."
echo " -d Run a daemon script for service"
echo " -f Force kill service, used with -k"
echo " -h Print help infomation"
echo " -k Kill service"
echo " -o Get owner authority [ -o user ]"
echo " -r Restart service "
echo " -s Show service status"
echo " -v Show file version"
echo " list Show all services status"
echo " Note: If no option ,default to start a service. "
echo ""
echo "Valid shortcut for services:"
echo " del "
echo " "
echo "Example: "
echo " \"start.sh list\" View status of all services."
echo " \"start.sh del\" Start DelRecord."
echo " \"start.sh -d del\" Start DelRecord with a daemon."
echo " \"start.sh del \" Start DelRecord."
echo " \"start.sh -v del\" Show DelRecord version."
echo " \"start.sh -s del\" Show DelRecord status."
echo " \"start.sh -k del\" Stop DelRecord."
echo " \"start.sh -r del\" Restart DelRecord."
echo " \"start.sh -kd del\" Stop DelRecord and its daemon."
echo " \"start.sh -kf del\" Force,kill DelRecord immediately."
echo " \"start.sh all\" Start all services at once."
echo " \"start.sh -k all\" Stop all services at once."
}
echo_w()
{
width=`expr 60 - $1`
f=`printf "%ds" $width`
printf "%$f\n" "$2"
}
set_ulimit()
{
core_limit=`ulimit -c`
[ $core_limit="0" ]
if [ $? -eq "0" ]; then
ulimit -c unlimited
#echo "ulimit set"
fi;
}
checkDaemonRunning()
{
ret=`ps -u $WHO -o pid -o comm -o cmd|grep "$SCRIPT_NAME -a -d $1"|grep -v grep|awk '{print $1}'`;
echo $ret
if [ "$ret" = "" ]; then
return 0;
else
return 1;
fi
}
checkRunning()
{
ret=`ps -u $WHO -o pid -o comm|grep -w $1|awk '{print $1}'`;
echo $ret
if [ -z "$ret" ]; then
return 0;
else
return 1;
fi
}
show_status()
{
proc=${PROC[$1]}
ret=`checkRunning $proc`;
if [ $? -ne 0 ];then
str="$proc (pid $ret)";
len=`expr length "$str"`;
echo -n $str
ret=`checkDaemonRunning ${ALIAS[$1]}`;
if [ $? -ne 0 ];then
echo_w `expr $len - 10` "[daemon][�[32mrunning�[0m]";
else
echo_w `expr $len - 10` "[�[32mrunning�[0m]";
fi
else
echo -n $proc
len=`expr length "$proc"`;
echo_w `expr $len - 10` "[�[31mstopped�[0m]";
fi
}
restart_proc()
{
kill_proc $1;
# [ $? -ne 0 ]&& return;
start_proc $1;
}
kill_daemon()
{
len=`expr length "${PROC[$1]} daemon"`
ret=`checkDaemonRunning ${ALIAS[$1]}`;
if [ $? -ne 0 ];then
echo -n "stoping ${PROC[$1]} daemon .";
for pid in "$ret"
do
kill -9 $pid;
done
echo_w $len "[�[31mstopped�[0m]";
fi
}
kill_proc()
{
result=1;
[ "$DAEMON" = "true" ] && kill_daemon $1;
proc=${PROC[$1]}
echo -n "stoping $proc .";
len=`expr length "$proc"`
ret=`checkRunning $proc`;
if [ $? -ne 0 ];then
for pid in "$ret"
do
if [ "$FORCE" = "true" ];then
kill -9 $pid;
elif [ "$CORE" = "true" ];then
kill -6 $pid;
else
kill -9 $pid;
fi
for((t=0; t<10; t++))
do
echo -n "."
len=`expr $len + 1`
ret=`checkRunning $proc`;
if [ $? -ne 0 ];then
sleep 1;
else
echo_w $len "[�[31mstopped�[0m]";
result=0;
break;
fi
if [ $t -eq 8 ];then
echo_w $len "[�[32mrunning�[0m]";
KILLFAILED=true;
fi
done
done
else
echo -n ".."
echo_w `expr $len + 2` "[�[31mstopped�[0m]";
fi
return $result;
}
start_daemon()
{
$SCRIPT_NAME -a -d ${ALIAS[$1]} >/dev/null 2>&1 &
echo "starting daemon for ${PROC[$1]}... OK";
# ret=`checkDaemonRunning ${ALIAS[$1]}`;
# if [ $? -ne 0 ];then
# echo "already a instance running ...";
# else
# $SCRIPT_NAME -a -d ${ALIAS[$1]} >/dev/null 2>&1 &
# echo "starting daemon for ${PROC[$1]}... OK";
# fi
}
start_real_daemon()
{
while true
do
start_proc $1
if [ $? = 2 ];then
echo "`date`: start daemon ${PROC[$1]} ok" >> daemon.log
elif [ $? = 3 ];then
echo "`date`: start daemon ${PROC[$1]} failed" >> daemon.log
fi
sleep $CHECK_TIME
done
}
start_ss()
{
dcslog="../log/dcs/dcs.log"
wcdcsbegin=`wc -l $dcslog |awk '{print $1}'`
while [ 1 ]
do
wcdcsend=`wc -l $dcslog |awk '{print $1}'`
if [ "$wcdcsend" != "$wcdcsbegin" ]; then
ret=`sed -n "$wcdcsbegin,$wcdcsend p" $dcslog |grep 'HeartBeat succeeds'`
if [ "$ret" != "" ]; then
nohup ./StatSchedule ../cfg/ss_config.cfg >/dev/null 2>&1 &
break;
fi
wcdcsbegin=$wcdcsend
fi
echo -ne "."
sleep 3
done
}
start_proc()
{
proc=${PROC[$1]}
echo -n "starting $proc ";
len=`expr length "$proc"`
ret=`checkRunning $proc`;
cret=$?;
if [ "$ISLIST" != "true" ]; then
echo "`date` check $proc return=[$ret] [$cret]" >> $CHECK_LOGFILE
fi
if [ $cret -ne 0 ] || [ "$ret" != "" ]
then
echo -n ".."
echo_w `expr $len + 2` "[�[31mFAILED�[0m]"
echo "Error:$proc already have a instance (pid $ret)";
return 1
else
cd ${DIR[$1]}
if [ "$STARTSS" = "true" ]; then
start_ss
else
nohup ./$proc ${ARGV[$1]} >/dev/null 2>&1 &
fi
cd - >> /dev/null 2>&1
for t in 1 2 3
do
echo -n "."
len=`expr $len + 1`
sleep 1
done
echo -n "."
ret=`checkRunning $proc`;
if [ $? -ne 0 ];then
echo_w `expr $len + 1` "[�[32m OK �[0m]";
return 2
else
echo_w `expr $len + 1` "[�[31mFAILED�[0m]";
return 3
fi
fi
}
show_version()
{
echo "====================== ${PROC[$1]} Version Info ======================";
cd bin
./${PROC[$1]} --version
cd ..
}
do_process()
{
if [ "$KILL" = "true" ];then
kill_proc $1;
return;
fi
if [ "$RESTART" = "true" ];then
restart_proc $1;
return;
fi
if [ "$VERSION" = "true" ];then
show_version $1;
fi
if [ "$STATUS" = "true" ];then
show_status $1;
fi
if [ "$START" = "true" ];then
start_proc $1;
fi
if [ "$DAEMON" = "true" ] && [ "$EXPAND" != "true" ];then
start_daemon $1
fi
if [ "$DAEMON" = "true" ] && [ "$EXPAND" == "true" ];then
start_real_daemon $1;
fi
}
##=========================================================================================================
##=========================================================================================================
##=
##=========================================================================================================
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/lib:`pwd`/oracle
export TNS_ADMIN=`pwd`/oracle
KILL=false;
VERSION=false;
STATUS=false;
START=true;
FORCE=false;
CORE=false;
DAEMON=false;
RESTART=false;
EXPAND=false;
WHO=`whoami`;
OWN=`stat -c %U $0`
KILLFAILED=false;
SCRIPT_NAME=$0
ISLIST=false;
STARTSS=false;
while getopts :krvsahfcdpo: OPTION
do
case $OPTION in
a)
EXPAND=true;
START=false;;
k)
KILL=true;
START=false;;
v)
START=false;
VERSION=true;;
s)
START=false;
STATUS=true;;
f)
FORCE=true;;
r)
START=false;
RESTART=true;;
c)
START=false;
CORE=true;;
o)
WHO=$OPTARG;;
d)
START=false;
DAEMON=true;;
p)
STARTSS=true;;
h)
usage;
exit 0;;
\?)
echo "start.sh: invalid option"
echo "Tyr \"start.sh -h\" for more infomation."
exit;;
esac
done
shift `expr $OPTIND - 1`;
if [ "$#" = "0" ];then
echo "start.sh: missing operand."
echo "Try \"start.sh -h\" for more infomation."
exit 1;
fi
if [ $OWN != $WHO ];then
echo "start.sh:sorry [$WHO], the owner is [$OWN]."
echo "Add option \"-o $OWN\" to ignore this."
echo "Try \"start.sh -h\" for more infomation."
exit 1;
fi
set_ulimit;
for proc in "$@"
do
num=-1
proc=`tr A-Z a-z <<< $proc`;
if [ "$proc" = "list" ];then
ISLIST=true;
for((i=0; i<PROC_COUNT; i++))
do
show_status $i;
done
exit 0;
fi
if [ "$proc" = "all" ];then
for((i=0; i<PROC_COUNT; i++))
do
do_process $i;
done
exit 0;
fi
for((i=0; i<PROC_COUNT; i++))
do
if [ "$proc" = "${ALIAS[$i]}" ]; then
num=$i;
break;
fi
done
if [ $num -ne -1 ];then
do_process $num;
else
echo "start.sh: wrong service name [$proc]. "
echo "Try \"start.sh -h\" for more infomation."
exit 1;
fi
done
if [ "$KILLFAILED" = "true" ];then
echo " ----"
echo "If they are still running , check it later use command \"start.sh list\"";
echo "Also can use \"start.sh -kf SERVICES\" to kill them immediately"
fi
exit 0;
网友评论