美文网首页
二进制文件进程守护脚本.md

二进制文件进程守护脚本.md

作者: 平凡的运维之路 | 来源:发表于2023-09-05 16:50 被阅读0次

    start.sh守护脚本

    进程守护脚本

    • 使用方法
    [ccodsupport@harbor DelRecord]$ ./start.sh  -h
    Usage: start.sh [options] service1 [,service2..]
    Start/stop services or show files version
      -c    Get coredump, used with -k.
      -d    Run a daemon script for service
      -f    Force kill service, used with -k
      -h    Print help infomation
      -k    Kill service
      -o    Get owner authority [ -o user ]
      -r    Restart service 
      -s    Show service status
      -v    Show file version
      list  Show all services status
      Note: If no option ,default to start a service. 
    
    Valid shortcut for services:
      del 
      
    Example: 
      "start.sh list"   View status of all services.
      "start.sh del"     Start DelRecord.
      "start.sh -d del"  Start DelRecord with a daemon.
      "start.sh del " Start DelRecord.
      "start.sh -v del"  Show DelRecord version.
      "start.sh -s del"  Show DelRecord status.
      "start.sh -k del"  Stop DelRecord.
      "start.sh -r del"  Restart DelRecord.
      "start.sh -kd del" Stop DelRecord and its daemon.
      "start.sh -kf del" Force,kill DelRecord immediately.
      "start.sh all"     Start all services at once.
      "start.sh -k all"  Stop all services at once.
      
    [devops@my-dev DelRecord]$ ./start.sh  -kdf  del
    stoping DelRecord ...                               [stopped]
    
    [devops@my-dev DelRecord]$ ./start.sh   del
    starting DelRecord ....                              [ok]
    
    [devops@my-dev DelRecord]$./start.sh  list
    DelRecord (pid 22081 22084)                         [running]
    checkmen                                            [stopped]
    
    • 脚本源码
    #!/bin/bash
    
    #########################################################################################
    ################                 service info ###########################################
    #########################################################################################
    ALIAS[0]="del"; #定义程序别名,多个二进制管理依次累加即可
    ALIAS[1]="men";
    
    PROC[0]="DelRecord"; #程序名,多个二进制管理依次累加即可
    PROC[1]="checkmen"; #程序名,多个二进制管理依次累加即可
    
    DIR[0]="./";
    DIR[1]="./bin";  #二进制所在目录
    
    ARGV[0]="";
    ARGV[1]="--config ../cfg/checkmen.cfg";  #执行checkmen 二进制文件配置文件
    
    
    PROC_COUNT=1
    CHECK_TIME=10
    
    #CHECK_LOGFILE="check.log"
    CHECK_LOGFILE="/dev/null"
    
    #########################################################################################
    
    ################################################################################33
    
    usage()
    {
        echo  "Usage: start.sh [options] service1 [,service2..]";
        echo  "Start/stop services or show files version" 
        echo  "  -c Get coredump, used with -k."
        echo  "  -d Run a daemon script for service"
        echo  "  -f Force kill service, used with -k" 
        echo  "  -h Print help infomation"
        echo  "  -k Kill service"
        echo  "  -o Get owner authority [ -o user ]"                       
        echo  "  -r Restart service "
        echo  "  -s Show service status"
        echo  "  -v Show file version"
        echo  "  list  Show all services status"
        echo  "  Note: If no option ,default to start a service. "
        echo  ""
        echo  "Valid shortcut for services:"
        echo  "  del "
        echo  "  "
        echo  "Example: "
        echo  "  \"start.sh list\"  View status of all services."
        echo  "  \"start.sh del\"    Start DelRecord."
        echo  "  \"start.sh -d del\"  Start DelRecord with a daemon."
        echo  "  \"start.sh del \" Start DelRecord."
        echo  "  \"start.sh -v del\"  Show DelRecord version."
        echo  "  \"start.sh -s del\"  Show DelRecord status."
        echo  "  \"start.sh -k del\"  Stop DelRecord."
        echo  "  \"start.sh -r del\"  Restart DelRecord."
        echo  "  \"start.sh -kd del\" Stop DelRecord and its daemon."
        echo  "  \"start.sh -kf del\" Force,kill DelRecord immediately."
        echo  "  \"start.sh all\"    Start all services at once."
        echo  "  \"start.sh -k all\"  Stop all services at once."
    }
    
    echo_w()
    {
        width=`expr 60 - $1`
        f=`printf "%ds" $width`
        printf  "%$f\n"  "$2"
    }
    set_ulimit() 
    {
        core_limit=`ulimit -c`
        [ $core_limit="0" ]
        if [ $? -eq "0" ]; then
            ulimit -c unlimited
            #echo "ulimit set"
        fi;
    }
    
    checkDaemonRunning()
    {
        ret=`ps -u $WHO -o pid -o comm -o cmd|grep "$SCRIPT_NAME -a -d $1"|grep -v grep|awk '{print $1}'`;
        echo $ret
        if [ "$ret" = "" ]; then
            return 0;
        else
            return 1;
        fi
    }
    
    checkRunning()
    {
        ret=`ps -u $WHO -o pid -o comm|grep -w $1|awk '{print $1}'`;
        echo $ret
    
        if [ -z "$ret" ]; then
            return 0;
        else
            return 1;
        fi
    }
    
    
    show_status()
    {
        proc=${PROC[$1]}
        ret=`checkRunning $proc`;
        if [ $? -ne 0 ];then
            str="$proc (pid $ret)";
            len=`expr length "$str"`;
            echo -n $str
            ret=`checkDaemonRunning ${ALIAS[$1]}`;
            if [ $? -ne 0 ];then
                echo_w `expr $len - 10` "[daemon][�[32mrunning�[0m]";
            else
                echo_w `expr $len - 10` "[�[32mrunning�[0m]";
            fi
        else
            echo -n $proc
            len=`expr length "$proc"`;
            echo_w `expr $len - 10` "[�[31mstopped�[0m]";
        fi
    }
    
    restart_proc()
    {
        kill_proc $1;
    #   [ $? -ne 0 ]&& return;      
        start_proc $1;
    }
    
    kill_daemon()
    {
        len=`expr length "${PROC[$1]} daemon"`
        ret=`checkDaemonRunning ${ALIAS[$1]}`;  
        if [ $? -ne 0 ];then
            echo -n "stoping ${PROC[$1]} daemon ."; 
            for pid in "$ret"
            do
                kill -9 $pid;
            done
            echo_w $len "[�[31mstopped�[0m]";
        fi  
    }
    kill_proc()
    {
        result=1;
        [ "$DAEMON" = "true" ] && kill_daemon $1;
        proc=${PROC[$1]}
        echo -n "stoping $proc .";  
        len=`expr length "$proc"`
        ret=`checkRunning $proc`;   
        if [ $? -ne 0 ];then
            for pid in "$ret"       
            do
                if [ "$FORCE" = "true" ];then
                    kill -9 $pid;
                elif [ "$CORE" = "true" ];then
                    kill -6 $pid;
                else
                    kill -9 $pid;
                fi
                for((t=0; t<10; t++))
                do
                    echo -n "."
                    len=`expr $len + 1`
                    ret=`checkRunning $proc`;   
                    if [ $? -ne 0 ];then
                        sleep 1;
                    else
                        echo_w $len "[�[31mstopped�[0m]";
                        result=0;
                        break;
                    fi
                    if [ $t -eq 8 ];then
                        echo_w $len "[�[32mrunning�[0m]";
                        KILLFAILED=true;
                    fi
                        
                done
            done
        else
            echo -n ".."
            echo_w `expr $len + 2` "[�[31mstopped�[0m]";
        fi
        return $result;
    }
    
    start_daemon()
    {
        $SCRIPT_NAME -a -d ${ALIAS[$1]} >/dev/null 2>&1 &
        echo  "starting daemon for ${PROC[$1]}... OK";              
    #   ret=`checkDaemonRunning ${ALIAS[$1]}`;
    #   if [ $? -ne 0 ];then
    #       echo  "already a instance running ...";
    #   else    
    #       $SCRIPT_NAME -a -d ${ALIAS[$1]} >/dev/null 2>&1 &
    #       echo  "starting daemon for ${PROC[$1]}... OK";              
    #   fi
    }
    
    start_real_daemon()
    {
        while true
        do
            start_proc $1
            if [ $? = 2 ];then
                echo "`date`: start daemon ${PROC[$1]} ok" >> daemon.log
            elif [ $? = 3 ];then
                echo "`date`: start daemon ${PROC[$1]} failed" >> daemon.log
            fi
            sleep $CHECK_TIME
        done        
    }
    
    start_ss()
    {
        dcslog="../log/dcs/dcs.log"
        wcdcsbegin=`wc -l $dcslog |awk '{print $1}'`
    
        while [ 1 ]
        do
            wcdcsend=`wc -l $dcslog |awk '{print $1}'`
            if [ "$wcdcsend" != "$wcdcsbegin" ]; then
                ret=`sed -n "$wcdcsbegin,$wcdcsend p" $dcslog |grep 'HeartBeat succeeds'`
                if [ "$ret" != "" ]; then
                    nohup ./StatSchedule ../cfg/ss_config.cfg >/dev/null 2>&1 &
                    break;
                fi
                wcdcsbegin=$wcdcsend
            fi
            echo -ne "."
            sleep 3
        done
    }
    
    start_proc()
    {
        proc=${PROC[$1]}    
        echo -n "starting $proc ";
        len=`expr length "$proc"`
            ret=`checkRunning $proc`;
        cret=$?;
        if [ "$ISLIST" != "true" ]; then
            echo "`date` check $proc return=[$ret] [$cret]" >> $CHECK_LOGFILE
        fi
        if [ $cret -ne 0 ] || [ "$ret" != "" ]
        then
            echo -n ".."
            echo_w `expr $len + 2` "[�[31mFAILED�[0m]"
            echo "Error:$proc already have a instance (pid $ret)";
            return 1
        else
            cd ${DIR[$1]}
            if [ "$STARTSS" = "true" ]; then
                start_ss
            else
                nohup ./$proc ${ARGV[$1]}  >/dev/null 2>&1 &
            fi
            cd - >> /dev/null 2>&1
            for t in 1 2 3
            do 
                echo -n "." 
                len=`expr $len + 1`
                sleep 1
            done
            echo -n "."
            ret=`checkRunning $proc`;
            if [ $? -ne 0 ];then
                echo_w `expr $len + 1` "[�[32m  OK  �[0m]";
                return 2
            else
                echo_w `expr $len + 1` "[�[31mFAILED�[0m]";
                return 3
            fi
        fi
    }
    
    show_version()
    {
        echo "====================== ${PROC[$1]} Version Info ======================";  
        cd bin
        ./${PROC[$1]} --version
        cd ..       
    }
    
    do_process()
    {
        if [ "$KILL" = "true" ];then
            kill_proc $1;
            return;
        fi
        if [ "$RESTART" = "true" ];then
            restart_proc $1;    
            return;
        fi
            
        if [ "$VERSION" = "true" ];then
            show_version $1;
        fi
        
        if [ "$STATUS" = "true" ];then
            show_status $1;
        fi
        
        if [ "$START" = "true" ];then
            start_proc $1;
        fi
    
        if [ "$DAEMON" = "true" ] && [ "$EXPAND" != "true" ];then
            start_daemon $1
        fi
    
        if [ "$DAEMON" = "true" ] && [ "$EXPAND" == "true" ];then
            start_real_daemon $1;
        fi
    }
    
    ##=========================================================================================================
    ##=========================================================================================================
    ##=
    ##=========================================================================================================
    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/lib:`pwd`/oracle
    export TNS_ADMIN=`pwd`/oracle
    
    KILL=false;
    VERSION=false;
    STATUS=false;
    START=true;
    FORCE=false;
    CORE=false;
    DAEMON=false;
    RESTART=false;
    EXPAND=false;
    WHO=`whoami`;
    OWN=`stat -c %U $0`
    KILLFAILED=false;
    SCRIPT_NAME=$0
    ISLIST=false;
    STARTSS=false;
    
    
    while getopts :krvsahfcdpo: OPTION
    do
        case $OPTION in
        a)
            EXPAND=true;
            START=false;;
        k)
            KILL=true;
            START=false;;
        v)
            START=false;
            VERSION=true;;
        s)
            START=false;
            STATUS=true;;
        f)
            FORCE=true;;
        r)
            START=false;
            RESTART=true;;
        c)
            START=false;
            CORE=true;;
        o)
            WHO=$OPTARG;;
        d)
            START=false;
            DAEMON=true;;
        p)
            STARTSS=true;;
        h)
            usage;
            exit 0;;
        \?)
            echo "start.sh: invalid option"
            echo "Tyr \"start.sh -h\" for more infomation."
            exit;;
        esac
    done
        
    
    shift `expr $OPTIND - 1`;
    
    if [ "$#" = "0" ];then
        echo "start.sh: missing operand." 
        echo "Try \"start.sh -h\" for more infomation."
        exit 1;
    fi
    
    if [ $OWN != $WHO ];then
        echo "start.sh:sorry [$WHO], the owner is [$OWN]."
        echo "Add option \"-o $OWN\" to ignore this."
        echo "Try \"start.sh -h\" for more infomation."
        exit 1;
    fi
    
    set_ulimit;
    
    for proc in "$@"
    do
        num=-1
        proc=`tr A-Z a-z <<< $proc`;
        if [ "$proc" = "list" ];then
            ISLIST=true;
            for((i=0; i<PROC_COUNT; i++))
            do
                show_status $i;
            done    
            exit 0;
            
        fi  
        if [ "$proc" = "all" ];then
            for((i=0; i<PROC_COUNT; i++))
            do
                do_process $i;  
            done    
            exit 0;
        fi
        for((i=0; i<PROC_COUNT; i++))
        do
            if [ "$proc" = "${ALIAS[$i]}" ]; then
                num=$i; 
                break;
            fi
        done
        if [ $num -ne -1 ];then 
            do_process $num;
        else
            echo "start.sh: wrong service name [$proc]. "
            echo "Try \"start.sh -h\" for more infomation."
            exit 1;
        fi
    done
    
    if [ "$KILLFAILED" = "true" ];then
        echo "  ----"
        echo "If they are still running , check it later use command \"start.sh list\"";
        echo "Also can use \"start.sh -kf SERVICES\" to kill them immediately"
    fi
    
    exit 0;
    

    相关文章

      网友评论

          本文标题:二进制文件进程守护脚本.md

          本文链接:https://www.haomeiwen.com/subject/gmuhvdtx.html