美文网首页
滴滴监控夜莺部署

滴滴监控夜莺部署

作者: xyz098 | 来源:发表于2020-04-19 18:33 被阅读0次

部署

reference

deploy stand-alone

complie

  • 源码

    # 克隆
    mkdir -p $GOPATH/src/github.com/didi
    cd $GOPATH/src/github.com/didi
    export GO111MODULE=off
    git clone https://github.com/didi/nightingale.git
    export GO111MODULE=on
    cd nightingale
    export GOPROXY=https://mirrors.aliyun.com/goproxy/
    
    # 编译
    ./control build
    
    # 打包 (n9e-2020-04-13-17-44-18.tar.gz)
    ./control pack
    

mysql & redis & nginx

  • redis

    # cat /data/docker/redis/docker-compose.yml
    version: '3'
    services:
      redis:
        image: docker.io/redis:4.0.1
        container_name: redis
        user: root
        volumes:
            - ./data:/data
        ports:
            - "6379:6379"
    
    # start
    docker-compose up -d 
    
  • mysql

    # cat /data/docker/mysql/docker-compose.yml
    version: '3'
    services:
      mysql:
        image: docker.io/mysql
        container_name: mysql
        user: root
        volumes:
            - ./conf:/etc/mysql/conf.d
            - ./data:/var/lib/mysql
        environment:
            - MYSQL_ROOT_PASSWORD=1234
        ports:
            - "3306:3306"
    
    # start
    docker-compose up -d
    
    # initialize
    cp $GOPATH/src/github.com/didi/nightingale/sql  /data/docker/mysql/conf/
    docker exec -it mysql bash 
    $ cd /etc/mysql/conf.d
    mysql -uroot -p1234 < sql/n9e_hbs.sql
    mysql -uroot -p1234 < sql/n9e_mon.sql
    mysql -uroot -p1234 < sql/n9e_uic.sql
    
  • nginx

    # cat /data/docker/nginx/docker-compose.yml
    version: '3'
    services:
      nginx:
        image: docker.io/nginx
        container_name: nginx
        user: root
        network_mode: host
        volumes:
            - ./conf/:/etc/nginx/conf.d/
            - ./log:/var/log/nginx
            - ./html:/usr/share/nginx/html
        ports:
            - "80:80"
    
    # 配置
    cd $GOPATH/src/github.com/didi/nightingale/pub /data/docker/nginx/html/
    
    # cat  /data/docker/nginx/conf/default.conf
     upstream n9e.monapi {
          server 127.0.0.1:5800;
          keepalive 10;
      }
      
      upstream n9e.index {
          server 127.0.0.1:5830;
          keepalive 10;
      }
      
      upstream n9e.transfer {
          server 127.0.0.1:5810;
          keepalive 10;
      }
      
      server {
          listen       80 default_server;
          server_name  192.168.5.56;
      
          # Load configuration files for the default server block.
          include /etc/nginx/default.d/*.conf;
      
          location / {
              root /usr/share/nginx/html/pub;
          }
      
          location /api/portal {
              proxy_pass http://n9e.monapi;
          }
      
          location /api/index {
              proxy_pass http://n9e.index;
          }
      
          location /api/transfer {
              proxy_pass http://n9e.transfer;
          }
      }
    
    # 启动
    docker-compose up -d
    

configuration & start

  • 配置

    # 解压
    mkdir /data/nightingale
    tar xvf n9e-2020-04-13-17-44-18.tar.gz -C /data/nightingale
    cd /data/nightingale
    
    # 修改数据库 (nightingale/etc/mysql.yml)
    grep -Ilr 3306  ./ | xargs -n1 -- sed -i 's/root:/root:1234/g'
    
    # 启动 (单个模块/所有模块 )
    ./control start collector 
    ./control start all         
    
    # 查看模块状态 (6个模块)
    ./control status         
    
  • 访问

    http://192.168.5.56/#/login (默认用户/密码: root/root)

mail-sender

  • install

    # complie
    cd $GOPATH/src
    mkdir -p github.com/n9e
    cd github.com/n9e
    export GO111MODULE=off
    git clone https://github.com/n9e/mail-sender.git
    export GO111MODULE=on
    cd mail-sender
    ./control build
    tar zcvf mail-sender.tar.gz mail-sender etc/mail.html etc/mail-sender.yml
    
    # install
    mkdir -p /data/nightingale/sender/mail-sender
    tar xf mail-sender.tar.gz -C /data/nightingale/sender/mail-sender
    
    # test
    cd /data/nightingale/sender/mail-sender
    ./mail-sender -t you@example.com
    nohup ./mail-sender &
    disown
    
  • systemd manager

    # xxx.service
    cp  $GOPATH/src/github/n9e/mail-sender/mail-sender.service ./
    sed -i 's#/home/n9e#/data/nightingale/sender/mail-sender#g' ./mail-sender.service
    mv mail-sender.service /usr/lib/systemd/system
    
    systemctl daemon-reload
    # 设置自启动,实质在 /etc/systemd/system/multi-user.target.wants/ 添加服务文件的链接
    systemctl enable mail-sender.service
    # 设置服务启停、查看状态
    systemctl start|stop|status mail-sender.service
    
  • view redis data

    # 查看redis中告警事件
    docker exec -it redis bash
    redis-cli
    > lrange "/n9e/sender/mail" 0 10
    
  • bug: 告警接收组人员没配置邮件时,发送仍然成功.

plugin_custom_metric

  • configure collector

    # configuration
    cat /data/nightingale/etc/collector.yml
    sys:
    timeout: 1000
    interval: 20
    # setting pulgin dir
    plugin: /data/nightingale/plugin
    
    ifacePrefix:
      - eth
      - em
      
    # restart collector
    ./control restart collector
    
  • script

    命名规范

    # 执行时间间隔_xxx.xxx       (间隔: 秒)
    # chmod +x  执行时间间隔_xx.xx  (支持sh、py、pl、rb)
    

    采集硬件监控指标

    # json校验
    sh /data/nightingale/plugin/60_hardware.sh | python -m josn.tool
    chmod +x /data/nightingale/plugin/60_hardware.sh
    
    cat  /data/nightingale/plugin/60_hardware.sh
    #!/bin/bash
    # date:2020/04/27
    # desc: monitor hardware  正常0, 异常1
    
    hardware_battery=$(omreport chassis batteries|awk '/^Status/{if($NF=="Ok") {print 0} else {print 1}}')
    if [ "X"$hardware_battery == "X" ]; then
        hardware_battery=0
    fi 
    
    echo '[
      {
          "endpoint": "'${localip}'",
          "tags": "",
          "timestamp": '$(date +%s)',
          "metric": "hardware.battery",
          "value": '${hardware_battery}',
          "step": '${step}'
      }
     ]'
    

    排错

    tail -f /data/nightingale/logs/collector/ERROR.log
    
  • clean-index-metric

     # 清理已废弃的监控指标
     curl -XDELETE  http://127.0.0.1:5830/api/index/metrics -d' 
     { 
       "endpoints": ["10.51.1.31","10.51.1.32"],
       "metrics":["sys.test.duration"]
     }'
    

API_custom_metric

上报指标数据到接口

#!/bin/bash

# 模拟上报mysql指标,value前加1防止01类数据出现报错
metrics='[
 {
      "metric": "mysql.master_is_read_only",
      "endpoint": "10.51.1.33",
      "value": 1'$(date +%M)',
      "counterType": "GAUGE",
      "tags": "port=3306,isSlave=0,readOnly=0,type=mysql",
      "timestamp": '$(date +%s)',
      "step": 60
  }
]'


# collector API
curl -H "Content-Type:application/json" -XPOST  http://127.0.0.1:2058/api/collector/push -d "$metrics"


metrics1='[
 {
      "metric": "mysql.master_is_read_only",
      "endpoint": "10.51.1.34",
      "value": 1'$(date +%M)',
      "counterType": "GAUGE",
      "tags": "port=3306,isSlave=0,readOnly=0,type=mysql",
      "timestamp": '$(date +%s)',
      "step": 60
  }
]'

 # transfer API
 curl -H "Content-Type:application/json" -XPOST  http://127.0.0.1:5810/api/transfer/push -d "$metrics1"
  • 页面: 全部对象 —> 批量操作 —> 导入endpoint ( 10.51.1.33 )
mysql
go get -u github.com/n9e/mymon
cd $GOPATH/src/github.com/n9e/mymon
export GO111MODULE=on
# go run main.go metric.go show.go senddata.go version.go  const.g
go build .
cd .. && tar zcvf mymon-`date +%F`.tgz mymon/mymon mymon/etc/myMon.cfg  mymon/README.md
    
wget http://10.11.100.79:81/mymon-2020-05-06.tgz
tar xvf mymon-2020-05-06.tgz -C /usr/local  
echo '* * * * * root cd /usr/local/mymon && ./mymon -c etc/myMon.cfg' > /etc/cron.d/mymon

相关文章

网友评论

      本文标题:滴滴监控夜莺部署

      本文链接:https://www.haomeiwen.com/subject/knkbbhtx.html