美文网首页
自动化报警的实现

自动化报警的实现

作者: 尽情的嘲笑我吧 | 来源:发表于2017-11-20 22:36 被阅读0次
    监控最终效果

    最近从主产品被调到了火星,技术不到家的我感到压力很大啊。提代码的时候也更加小心翼翼了,生怕搞坏了线上环境。

    于是,我就打算给自己做个监控,好让我能快速的发现问题,然后改掉bug。考虑到公司某些规定,就不详细介绍了。下面简答的描述下,有个思路就行。

    思路就是:

    1. 分析Nginx错误日志,用正则匹配出对应内容
    2. SVN blame出错误代码的作者。
    3. 借助钉钉的群聊机器人,及时发送出去。
    

    在实现的过程中,遇到了很多问题。大概有这么几个:
    1、 服务器端口限制的比较死,不能单独给自己开对外访问的端口。
    2、 重复错误的触发。
    3、 SVN blame认证问题(没解决。。。)
    ... ...


    整体准备采用Client和Server的模式。

    Client内容如下:

    使用一个Python脚本,借助crontab监控错误日志,并正则匹配对应的内容,发送给Server端处理。

    #!/usr/bin python
    # coding: utf8
    import sys
    reload(sys)
    sys.setdefaultencoding("utf8")
    import re
    import json
    import urllib2
    import time
    
    # 2017/11/17 16:05:01 [error] 4004#0: *246620391 FastCGI sent in stderr: "PHP message: PHP Fatal error:  Function name must be a string in /home/wwwroot/api.newtv.com/live.class.php on line 2242" while reading response header from upstream, client: 192.168.30.100, server: api.changbalive.com, request: "GET /api.php?ac=recordsingsong&curuserid=2635267&channelsrc=appstore&version=1.9.5&token=T777936552f7571e&bless=0&macaddress=A4D0E95D-AB54-48A1-BCC8-3EB0A530B2A7&ismember=0&openudid=d7be3882344bb889cd6c451880df1a834f1af960&systemversion=10.3.3&device=iPhone7,1&broken=0&songid=867712&secret=483f47528d HTTP/1.1", upstream: "fastcgi://127.0.0.1:9000", host: "api.changbalive.com"
    
    # 返回值含义:tuple元组内依次为:报错时间,错误类别,错误描述,出错文件全路径,出错位置行数
    def parse_errorlog(line):
        reg = re.compile(r"(.*?) \[error\] .*?PHP message:(.*?): (.*?) in (.*?) on line (\d+).*");
        result = re.findall(reg, line)
        # print len(result)
        # print result
        return result
    def get_errorlog(type='api_newtv_error.log'):
        path = "/var/log/nginx/{}".format(type)
        result = []
        with open(path, 'r') as file:
            lines = file.readlines()
            file.close()
        for line in lines:
            if line is not None:
                result.append(line)
        return result
    //TODO
    // 使用HTTP请求,将找到的错误信息emit到Server端
    

    Server内容

    用到的文件大致有这么几个。

    findbugauthor.sh  receive.php   utils.php
    

    findbugauthor.sh

    #!/usr/bin bash
    export $PATH
    DIRPATH=$1
    FILENAME=$2
    LINE=$3
    
    cd $DIRPATH
    AUTHOR=`svn blame $FILENAME | head -$LINE | tail -1 | awk '{print $2}'`
    echo $AUTHOR
    

    utils.php

    <?php
    header("Content-Type:text/html;charset=UTF-8");
    
    function getuniquecode($path, $line){
        $errorcode = md5("{$path}{$line}");
        return $errorcode;
    }
    function phpPost( $url, $post = '', $timeout = 5  ){
        if( empty( $url  )  ){
              return ;
    
        }
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_HEADER, 0);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
        curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
    
        if( $post != '' && !empty( $post  )  ){
             curl_setopt($ch, CURLOPT_POST, 1);
             curl_setopt($ch, CURLOPT_POSTFIELDS, $post);
             curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/json', 'Content-Length: ' . strlen($post)));
         }
        curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
        $result = curl_exec($ch);
        curl_close($ch);
        return $result;
    }
    function getbashoutput($path, $line) {
        $dirpath = substr($path, 0, strpos($path, basename($path)));
        $filename = basename($path);
        $result = "";
        exec("bash ./findbugauthor.sh {$dirpath} {$filename} {$line} 2>&1", $result);
        return $result[1];
    }
    //getbashoutput("/home/wwwroot/巴拉巴拉/你的文件ListService.php", 142);
    class RedisHelper{
        private static $_instance = null;
        const APINEWTVERROR_KEY = "zpinewtvcom:error:zset";
        const NOTIFY_KEY = "apinewtvcom:notify:hash";
        private function __construct(){
            $this->redis = new Redis();
            $this->redis->connect("127.0.0.1", 6379, 7);
        }
        public static function getInstance(){
            if( self::$_instance == null ) {
                self::$_instance = new RedisHelper();
            }
            return self::$_instance;
        }
    
        public function incrErrorNumber($errorcode, $number=1){
            $this->redis->zIncrBy(self::APINEWTVERROR_KEY, 1, $errorcode);
        }
    
        public function getErrorNumber($errorcode) {
            return intval($this->redis->zScore(self::APINEWTVERROR_KEY, $errorcode));
        }
    
        public function updateNotifyDate($errorcode){
            $this->redis->hSet(self::NOTIFY_KEY, $errorcode, date("Ymd"));
        }
    
        public function getNotifyDate($errorcode) {
            return $this->redis->hGet(self::NOTIFY_KEY, $errorcode);
        }
        public function getFrequentErrors($number=7) {
            return $this->redis->zRevRange(self::APINEWTVERROR_KEY, 0, $number, true);
        }
    }
    $template = <<<EOF
    {
        "msgtype": "text",
        "text": {
            "content": "我就是我, 是不一样的烟火"
    
    },
        "at": {
            "atMobiles": [
                "156xxxx8827",
                "189xxxx8325",
    
    ],
            "isAtAll": false
    
    }
    
    }
    EOF;
    class Notifier{
        private static $instance = null;
        private function __construct() {
            $this->url = "https://oapi.dingtalk.com/robot/send?access_token=b716e1f39b7fc7afbea04b2巴拉巴拉d4bb79db65a117d589f886d1757";
        }
        public static function getInstance(){
            if(self::$instance==null) {
                self::$instance = new Notifier();
            }
            return self::$instance;
        }
        public function notify($msg){
            global $template;
            $data = json_decode($template, true);
            $data['text']['content'] = $msg;
            $data['at']['atMobiles'] = array(15801479216, );
            $data['at']['isAtAll'] = false;
            $data['msgtype'] = "text";
            $result = phpPost($this->url, json_encode($data));
            return $result;
        }
    }
    

    receive.php

    <?php
    header("Content-Type:text/html;charset=UTF-8");
    require __DIR__."/utils.php";
    
    $time = isset($_REQUEST['time'])?strval($_REQUEST['time']):"";
    $level = isset($_REQUEST['level'])?strval($_REQUEST['level']):"";
    $description = isset($_REQUEST['description'])?strval($_REQUEST['description']):"";
    $fullpath = isset($_REQUEST['fullpath'])?strval($_REQUEST['fullpath']):"";
    $linenumber = isset($_REQUEST['linenumber'])?intval($_REQUEST['linenumber']):0;
    
    if(empty($time) || empty($level) || empty($description) || empty($fullpath) || empty($linenumber)) {
        echo json_encode(array("errcode"=>-1, "errmsg"=>"请求参数不完整"));
    }
    
    $errorcode = getuniquecode($fullpath, $linenumber);
    $helper = RedisHelper::getInstance();
    $helper->incrErrorNumber($errorcode);
    $bugauthor = getbashoutput($fullpath, $linenumber);
    $notify = Notifier::getInstance();
    var_dump($errorcode);
    echo "\n";
    $errors = $helper->getFrequentErrors(7);
    var_dump($errors);
    foreach($errors as $uniquecode=>$numbers) {
        if(intval($errorcode) == intval($uniquecode)) {
            $msg = "Bug 时间:{$time}\nBug级别:{$level}\n错误描述:{$description}\n文件全路径:{$fullpath}\n出错行数:{$linenumber}\n代码负责人:{$bugauthor}\n";
            $notify->notify($msg);
        }
    }
    
    

    实现的效果

    监控最终效果

    不足之处

    报警的触发机制还没完善,其实这块要做的内容会很多的,根据不同的场景选择不同的策略很重要,但是要做到灵活的处理,每一个很好的设计是不行的,这里有兴趣的可以自己思考思考。

    crontab的时间间隔也是个问题,太小了对服务器压力稍微有一点点的影响(虽然这基本上也没什么影响,但是时间片太小了,触发机制就得跟着更改下);时间片太大了,报警的灵敏度就下降了,也就失去了报警的意义。

    总的来说,思路很简单,但是真的去实现起来并能很好的应用到开发中,还是有很长的路要走的。这里就当是抛砖引玉吧。

    相关文章

      网友评论

          本文标题:自动化报警的实现

          本文链接:https://www.haomeiwen.com/subject/dsvbvxtx.html