美文网首页
二进制文件进程守护脚本.md

二进制文件进程守护脚本.md

作者: 平凡的运维之路 | 来源:发表于2023-09-05 16:50 被阅读0次

start.sh守护脚本

进程守护脚本

  • 使用方法
[ccodsupport@harbor DelRecord]$ ./start.sh  -h
Usage: start.sh [options] service1 [,service2..]
Start/stop services or show files version
  -c    Get coredump, used with -k.
  -d    Run a daemon script for service
  -f    Force kill service, used with -k
  -h    Print help infomation
  -k    Kill service
  -o    Get owner authority [ -o user ]
  -r    Restart service 
  -s    Show service status
  -v    Show file version
  list  Show all services status
  Note: If no option ,default to start a service. 

Valid shortcut for services:
  del 
  
Example: 
  "start.sh list"   View status of all services.
  "start.sh del"     Start DelRecord.
  "start.sh -d del"  Start DelRecord with a daemon.
  "start.sh del " Start DelRecord.
  "start.sh -v del"  Show DelRecord version.
  "start.sh -s del"  Show DelRecord status.
  "start.sh -k del"  Stop DelRecord.
  "start.sh -r del"  Restart DelRecord.
  "start.sh -kd del" Stop DelRecord and its daemon.
  "start.sh -kf del" Force,kill DelRecord immediately.
  "start.sh all"     Start all services at once.
  "start.sh -k all"  Stop all services at once.
  
[devops@my-dev DelRecord]$ ./start.sh  -kdf  del
stoping DelRecord ...                               [stopped]

[devops@my-dev DelRecord]$ ./start.sh   del
starting DelRecord ....                              [ok]

[devops@my-dev DelRecord]$./start.sh  list
DelRecord (pid 22081 22084)                         [running]
checkmen                                            [stopped]
  • 脚本源码
#!/bin/bash

#########################################################################################
################                 service info ###########################################
#########################################################################################
ALIAS[0]="del"; #定义程序别名,多个二进制管理依次累加即可
ALIAS[1]="men";

PROC[0]="DelRecord"; #程序名,多个二进制管理依次累加即可
PROC[1]="checkmen"; #程序名,多个二进制管理依次累加即可

DIR[0]="./";
DIR[1]="./bin";  #二进制所在目录

ARGV[0]="";
ARGV[1]="--config ../cfg/checkmen.cfg";  #执行checkmen 二进制文件配置文件


PROC_COUNT=1
CHECK_TIME=10

#CHECK_LOGFILE="check.log"
CHECK_LOGFILE="/dev/null"

#########################################################################################

################################################################################33

usage()
{
    echo  "Usage: start.sh [options] service1 [,service2..]";
    echo  "Start/stop services or show files version" 
    echo  "  -c Get coredump, used with -k."
    echo  "  -d Run a daemon script for service"
    echo  "  -f Force kill service, used with -k" 
    echo  "  -h Print help infomation"
    echo  "  -k Kill service"
    echo  "  -o Get owner authority [ -o user ]"                       
    echo  "  -r Restart service "
    echo  "  -s Show service status"
    echo  "  -v Show file version"
    echo  "  list  Show all services status"
    echo  "  Note: If no option ,default to start a service. "
    echo  ""
    echo  "Valid shortcut for services:"
    echo  "  del "
    echo  "  "
    echo  "Example: "
    echo  "  \"start.sh list\"  View status of all services."
    echo  "  \"start.sh del\"    Start DelRecord."
    echo  "  \"start.sh -d del\"  Start DelRecord with a daemon."
    echo  "  \"start.sh del \" Start DelRecord."
    echo  "  \"start.sh -v del\"  Show DelRecord version."
    echo  "  \"start.sh -s del\"  Show DelRecord status."
    echo  "  \"start.sh -k del\"  Stop DelRecord."
    echo  "  \"start.sh -r del\"  Restart DelRecord."
    echo  "  \"start.sh -kd del\" Stop DelRecord and its daemon."
    echo  "  \"start.sh -kf del\" Force,kill DelRecord immediately."
    echo  "  \"start.sh all\"    Start all services at once."
    echo  "  \"start.sh -k all\"  Stop all services at once."
}

echo_w()
{
    width=`expr 60 - $1`
    f=`printf "%ds" $width`
    printf  "%$f\n"  "$2"
}
set_ulimit() 
{
    core_limit=`ulimit -c`
    [ $core_limit="0" ]
    if [ $? -eq "0" ]; then
        ulimit -c unlimited
        #echo "ulimit set"
    fi;
}

checkDaemonRunning()
{
    ret=`ps -u $WHO -o pid -o comm -o cmd|grep "$SCRIPT_NAME -a -d $1"|grep -v grep|awk '{print $1}'`;
    echo $ret
    if [ "$ret" = "" ]; then
        return 0;
    else
        return 1;
    fi
}

checkRunning()
{
    ret=`ps -u $WHO -o pid -o comm|grep -w $1|awk '{print $1}'`;
    echo $ret

    if [ -z "$ret" ]; then
        return 0;
    else
        return 1;
    fi
}


show_status()
{
    proc=${PROC[$1]}
    ret=`checkRunning $proc`;
    if [ $? -ne 0 ];then
        str="$proc (pid $ret)";
        len=`expr length "$str"`;
        echo -n $str
        ret=`checkDaemonRunning ${ALIAS[$1]}`;
        if [ $? -ne 0 ];then
            echo_w `expr $len - 10` "[daemon][�[32mrunning�[0m]";
        else
            echo_w `expr $len - 10` "[�[32mrunning�[0m]";
        fi
    else
        echo -n $proc
        len=`expr length "$proc"`;
        echo_w `expr $len - 10` "[�[31mstopped�[0m]";
    fi
}

restart_proc()
{
    kill_proc $1;
#   [ $? -ne 0 ]&& return;      
    start_proc $1;
}

kill_daemon()
{
    len=`expr length "${PROC[$1]} daemon"`
    ret=`checkDaemonRunning ${ALIAS[$1]}`;  
    if [ $? -ne 0 ];then
        echo -n "stoping ${PROC[$1]} daemon ."; 
        for pid in "$ret"
        do
            kill -9 $pid;
        done
        echo_w $len "[�[31mstopped�[0m]";
    fi  
}
kill_proc()
{
    result=1;
    [ "$DAEMON" = "true" ] && kill_daemon $1;
    proc=${PROC[$1]}
    echo -n "stoping $proc .";  
    len=`expr length "$proc"`
    ret=`checkRunning $proc`;   
    if [ $? -ne 0 ];then
        for pid in "$ret"       
        do
            if [ "$FORCE" = "true" ];then
                kill -9 $pid;
            elif [ "$CORE" = "true" ];then
                kill -6 $pid;
            else
                kill -9 $pid;
            fi
            for((t=0; t<10; t++))
            do
                echo -n "."
                len=`expr $len + 1`
                ret=`checkRunning $proc`;   
                if [ $? -ne 0 ];then
                    sleep 1;
                else
                    echo_w $len "[�[31mstopped�[0m]";
                    result=0;
                    break;
                fi
                if [ $t -eq 8 ];then
                    echo_w $len "[�[32mrunning�[0m]";
                    KILLFAILED=true;
                fi
                    
            done
        done
    else
        echo -n ".."
        echo_w `expr $len + 2` "[�[31mstopped�[0m]";
    fi
    return $result;
}

start_daemon()
{
    $SCRIPT_NAME -a -d ${ALIAS[$1]} >/dev/null 2>&1 &
    echo  "starting daemon for ${PROC[$1]}... OK";              
#   ret=`checkDaemonRunning ${ALIAS[$1]}`;
#   if [ $? -ne 0 ];then
#       echo  "already a instance running ...";
#   else    
#       $SCRIPT_NAME -a -d ${ALIAS[$1]} >/dev/null 2>&1 &
#       echo  "starting daemon for ${PROC[$1]}... OK";              
#   fi
}

start_real_daemon()
{
    while true
    do
        start_proc $1
        if [ $? = 2 ];then
            echo "`date`: start daemon ${PROC[$1]} ok" >> daemon.log
        elif [ $? = 3 ];then
            echo "`date`: start daemon ${PROC[$1]} failed" >> daemon.log
        fi
        sleep $CHECK_TIME
    done        
}

start_ss()
{
    dcslog="../log/dcs/dcs.log"
    wcdcsbegin=`wc -l $dcslog |awk '{print $1}'`

    while [ 1 ]
    do
        wcdcsend=`wc -l $dcslog |awk '{print $1}'`
        if [ "$wcdcsend" != "$wcdcsbegin" ]; then
            ret=`sed -n "$wcdcsbegin,$wcdcsend p" $dcslog |grep 'HeartBeat succeeds'`
            if [ "$ret" != "" ]; then
                nohup ./StatSchedule ../cfg/ss_config.cfg >/dev/null 2>&1 &
                break;
            fi
            wcdcsbegin=$wcdcsend
        fi
        echo -ne "."
        sleep 3
    done
}

start_proc()
{
    proc=${PROC[$1]}    
    echo -n "starting $proc ";
    len=`expr length "$proc"`
        ret=`checkRunning $proc`;
    cret=$?;
    if [ "$ISLIST" != "true" ]; then
        echo "`date` check $proc return=[$ret] [$cret]" >> $CHECK_LOGFILE
    fi
    if [ $cret -ne 0 ] || [ "$ret" != "" ]
    then
        echo -n ".."
        echo_w `expr $len + 2` "[�[31mFAILED�[0m]"
        echo "Error:$proc already have a instance (pid $ret)";
        return 1
    else
        cd ${DIR[$1]}
        if [ "$STARTSS" = "true" ]; then
            start_ss
        else
            nohup ./$proc ${ARGV[$1]}  >/dev/null 2>&1 &
        fi
        cd - >> /dev/null 2>&1
        for t in 1 2 3
        do 
            echo -n "." 
            len=`expr $len + 1`
            sleep 1
        done
        echo -n "."
        ret=`checkRunning $proc`;
        if [ $? -ne 0 ];then
            echo_w `expr $len + 1` "[�[32m  OK  �[0m]";
            return 2
        else
            echo_w `expr $len + 1` "[�[31mFAILED�[0m]";
            return 3
        fi
    fi
}

show_version()
{
    echo "====================== ${PROC[$1]} Version Info ======================";  
    cd bin
    ./${PROC[$1]} --version
    cd ..       
}

do_process()
{
    if [ "$KILL" = "true" ];then
        kill_proc $1;
        return;
    fi
    if [ "$RESTART" = "true" ];then
        restart_proc $1;    
        return;
    fi
        
    if [ "$VERSION" = "true" ];then
        show_version $1;
    fi
    
    if [ "$STATUS" = "true" ];then
        show_status $1;
    fi
    
    if [ "$START" = "true" ];then
        start_proc $1;
    fi

    if [ "$DAEMON" = "true" ] && [ "$EXPAND" != "true" ];then
        start_daemon $1
    fi

    if [ "$DAEMON" = "true" ] && [ "$EXPAND" == "true" ];then
        start_real_daemon $1;
    fi
}

##=========================================================================================================
##=========================================================================================================
##=
##=========================================================================================================
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/lib:`pwd`/oracle
export TNS_ADMIN=`pwd`/oracle

KILL=false;
VERSION=false;
STATUS=false;
START=true;
FORCE=false;
CORE=false;
DAEMON=false;
RESTART=false;
EXPAND=false;
WHO=`whoami`;
OWN=`stat -c %U $0`
KILLFAILED=false;
SCRIPT_NAME=$0
ISLIST=false;
STARTSS=false;


while getopts :krvsahfcdpo: OPTION
do
    case $OPTION in
    a)
        EXPAND=true;
        START=false;;
    k)
        KILL=true;
        START=false;;
    v)
        START=false;
        VERSION=true;;
    s)
        START=false;
        STATUS=true;;
    f)
        FORCE=true;;
    r)
        START=false;
        RESTART=true;;
    c)
        START=false;
        CORE=true;;
    o)
        WHO=$OPTARG;;
    d)
        START=false;
        DAEMON=true;;
    p)
        STARTSS=true;;
    h)
        usage;
        exit 0;;
    \?)
        echo "start.sh: invalid option"
        echo "Tyr \"start.sh -h\" for more infomation."
        exit;;
    esac
done
    

shift `expr $OPTIND - 1`;

if [ "$#" = "0" ];then
    echo "start.sh: missing operand." 
    echo "Try \"start.sh -h\" for more infomation."
    exit 1;
fi

if [ $OWN != $WHO ];then
    echo "start.sh:sorry [$WHO], the owner is [$OWN]."
    echo "Add option \"-o $OWN\" to ignore this."
    echo "Try \"start.sh -h\" for more infomation."
    exit 1;
fi

set_ulimit;

for proc in "$@"
do
    num=-1
    proc=`tr A-Z a-z <<< $proc`;
    if [ "$proc" = "list" ];then
        ISLIST=true;
        for((i=0; i<PROC_COUNT; i++))
        do
            show_status $i;
        done    
        exit 0;
        
    fi  
    if [ "$proc" = "all" ];then
        for((i=0; i<PROC_COUNT; i++))
        do
            do_process $i;  
        done    
        exit 0;
    fi
    for((i=0; i<PROC_COUNT; i++))
    do
        if [ "$proc" = "${ALIAS[$i]}" ]; then
            num=$i; 
            break;
        fi
    done
    if [ $num -ne -1 ];then 
        do_process $num;
    else
        echo "start.sh: wrong service name [$proc]. "
        echo "Try \"start.sh -h\" for more infomation."
        exit 1;
    fi
done

if [ "$KILLFAILED" = "true" ];then
    echo "  ----"
    echo "If they are still running , check it later use command \"start.sh list\"";
    echo "Also can use \"start.sh -kf SERVICES\" to kill them immediately"
fi

exit 0;

相关文章

网友评论

      本文标题:二进制文件进程守护脚本.md

      本文链接:https://www.haomeiwen.com/subject/gmuhvdtx.html