脚本位置:
zice服务器
/home/jerry/aliexpension/trigger.py #服务器创建触发器脚本
/home/jerry/aliexpension/deploy_mobile.sh #自动部署脚本
/home/jerry/aliexpension/phj-web-aliexpansion.jar #郑敏的创建服务器脚本
脚本通过调用zabbix的API获取监控服务器的CPU、内存和磁盘的触发器状态
状态异常则触发trigger.py脚本,调用郑敏的金融云服务器创建脚本进行服务器的创建并调用部署部署shell脚本部署生产代码。
处理流程
服务器CPU、内存、磁盘其中任何一项发生警报-->触发trigger.py脚本-->调用郑敏服务器创建脚本,创建完毕服务器-->调用自动部署脚本部署完毕。
trigger.py脚本报警逻辑
- 当服务器异常导致触发创建服务器的动作后,等待30s进行服务器创建
- 在创建完毕第一个服务器后的6个小时内不会再进行创建服务器
- 若服务器异常一直持续了6个小时,则会在6个小时后创建第二个服务器,逻辑返回到第一步,以此类推。
trigger.py 脚本内容:
vim /home/jerry/aliexpension/trigger.py
#导入脚本运行所需模块
import requests, json, time, pickle, os
#获取当前时间
def getcurrenttime(arg):
if arg == 's':
t = time.time()
elif arg == 'd':
t = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
else:
print(arg+'时间获取出错')
return t
headers = {'Content-Type': 'application/json-rpc'}
url = 'http://zabbix.puhuijia.com/zabbix/api_jsonrpc.php'
#url = 'http://10.2.2.19/pxzabbix/api_jsonrpc.php' '''测试环境地址'''
'''获取验证zabbix身份信息的token,返回token值'''
def getToken(username='zhaoliang', passwd='zhaoliang123'):
'''获取登陆zabbixapi的token信息,返回token的值'''
data = {
"jsonrpc": "2.0",
"method": "user.login",
"params": {
"user": username,
"password": passwd
},
"id": 1
}
request = requests.post(url=url, headers=headers,data=json.dumps(data))
dict = json.loads(request.text)
return dict['result']
'''获取某个服务器的某个触发器的状态信息,返回0或者1,0表示状态正常,1表示状态异常'''
def getstatus(token,hostid,triggerid):
'''获取服务器监控项item的状态,即是否被触发,被触发返回1,没被触发返回0'''
data = {
"jsonrpc": "2.0",
"method": "trigger.get",
"params": {
"triggerids": triggerid,
"hostids": hostid,
"output": "extend",
"selectFunctions": "extend"
},
"id": 1,
"auth": token,
}
request = requests.post(url=url, headers=headers, data=json.dumps(data))
dict = json.loads(request.text)
return dict['result'][0]['value']
'''要监控的zabbix服务器信息'''
def monitoredhost(token=getToken()):
'''通过myhost和triggername两个列表来生成一个字典,它包含了我们所有需要监控的服务器的监控项,返回所有服务器信息字典'''
myhost = ['dubbo1','dubbo2','dubbo3','dubbo4','dubbo5','dubbo6','dubbo7','dubbo8','dubbo9','dubbo10','web-mobile1','web-mobile2','web_back1','web_back2']
triggername = ["{HOST.NAME}服务器可用内存不足","{HOST.NAME}处理器负载过高","本机tomcat已3分钟不可用"]
data = {
"jsonrpc": "2.0",
"method": "host.get",
"params": {
"output": ["host","hostid"],
},
"id": 1,
"auth": token
}
request = requests.post(url=url, headers=headers, data=json.dumps(data))
hosts = json.loads(request.text)
items = {}
for i in hosts['result']:
hostid = i['hostid']
hostname = i['host']
if hostname in myhost:
items[hostname] = {"id":hostid, "triggers":{}}
data = {
"jsonrpc": "2.0",
"method": "trigger.get",
"params": {
'hostids': hostid
},
"id": 1,
"auth": token
}
request = requests.post(url=url, headers=headers, data=json.dumps(data))
dict = json.loads(request.text)
for m in dict['result']:
if m["description"] == triggername[0]:
items[hostname]["triggers"]["mem"] = m["triggerid"]
elif m["description"] == triggername[1]:
items[hostname]["triggers"]["cpu"] = m["triggerid"]
elif m["description"] == triggername[2]:
items[hostname]["triggers"]["heap"] = m["triggerid"]
else:
continue
return items
def findall():
'''返回异常服务器信息的字典,若无异常则返回0'''
excep = {}
error = 0
items = monitoredhost()
for host in items:
excep[host] = {}
for trigger in items[host]['triggers']:
hostid = items[host]['id']
triggerid = items[host]['triggers'][trigger]
status = getstatus(token=getToken(),hostid=hostid,triggerid=triggerid)
if status == str(0):
'''状态正常error=0'''
print('{time} INFO:主机---> {0} 的触发器 {1} 状态为 {2} 正常'.format(host, trigger, status,time=getcurrenttime('d')))
elif status == str(1):
'''状态异常error=1,随后返回异常服务器信息的字典'''
error = 1
print('{time} INFO:主机---> {0} 的触发器 {1} 状态为 {2} 发生异常'.format(host, trigger, status,time=getcurrenttime('d')))
excep[host][trigger] = triggerid
else:
return 1
if error == 1:
return excep
else:
return 0
def createcheck(hostname,hoststatus=1):
'''检查服务器当前状态是否需要创建新的服务器,返回0表示需要进行服务器创建,返回1表示无需创建服务器'''
status = 1
if os.path.isfile('hoststatus.pkl'):
'''文件已存在'''
pass
else:
'''新建pkl文件'''
f = open('hoststatus.pkl', 'w')
f.close()
if os.path.getsize('hoststatus.pkl') != 0:
with open('hoststatus.pkl', 'rb') as fr:
s = pickle.load(fr)
s1 = s.copy()
with open('hoststatus.pkl', 'wb') as f:
if hostname in s1:
s1[hostname]['time2'] = getcurrenttime('s')
timecha = s1[hostname]['time2'] - s1[hostname]['time1']
if timecha >= 30 and s1[hostname]['hoststatus'] == 1:
s1[hostname]['hoststatus'] = 0
print('{time} 首次触发创建服务器 {0}'.format(hostname, time=getcurrenttime('d')))
status = 0
elif timecha >=36000 and s1[hostname]['hoststatus'] == 0:
s1[hostname]['hoststatus'] = 1
s1[hostname]['time1'] = getcurrenttime('s')
print('{time} 异常已持续6小时,再次触发创建服务器 {0}'.format(hostname, time=getcurrenttime('d')))
status = 0
elif timecha < 30 and s1[hostname]['hoststatus'] == 1:
print('{time} 主机 {0} 正在等待30s'.format(hostname, time=getcurrenttime('d')))
status = 1
elif timecha < 30 and s1[hostname]['hoststatus'] == 0:
print('{time} 主机 {0} 出现异常ERROR'.format(hostname, time=getcurrenttime('d')))
status = 1
elif 36000 > timecha >= 30 and s1[hostname]['hoststatus'] == 0:
print('{time} 主机 {0} 状态异常且在监控中, 持续时间为 {1} 分钟, {2} 分钟后若持续保持异常状态则会新建服务器.'.format(hostname,int(timecha/60),int(600-timecha/60),time=getcurrenttime('d')))
status = 1
else:
print('{time} 主机 {0} 首次出现异常,已添加到文件中.'.format(hostname, time=getcurrenttime('d')))
s1[hostname] = {}
s1[hostname]['time1'] = getcurrenttime('s')
s1[hostname]['hoststatus'] = 1
print('{time} 服务器状态异常等待,若持续30s则创建服务器,等待30s......'.format(time=getcurrenttime('d')))
status = 1
pickle.dump(s1, f)
return status
else:
print('pkl文件大小:{0} 为空,初始化pkl文件'.format(os.path.getsize('hoststatus.pkl')))
time = getcurrenttime('s')
info = {}
info[hostname] = {}
info[hostname]['time1'] = time
info[hostname]['hoststatus'] = 0
with open('hoststatus.pkl', 'wb') as f1:
pickle.dump(info, f1)
print('{time} 服务器状态异常等待,若持续30s则创建服务器,等待30s......'.format(time=getcurrenttime('d')))
return 1
def dubboapi():
'''调用阿里云自动扩容dubbo服务器的url'''
requests.get('http://localhost:7777/create_ecs_instance?instance_type=1')
print('{time} 创建新的dubbo服务器完毕!'.format(time=getcurrenttime('d')))
def webmobileapi():
'''调用阿里云自动扩容web服务器的url'''
requests.get('http://localhost:7777/create_ecs_instance?instance_type=2')
print('{time} 创建新的web服务器完毕!'.format(time=getcurrenttime('d')))
def createserver():
'''主体函数,用来将所有函数串联起来'''
print('{time} 开始进行状态检查.'.format(time=getcurrenttime('d')))
result = findall()
if result == 0:
print('{time} 本次状态检查没有发现有异常.'.format(time=getcurrenttime('d')))
elif result == 1:
print('{time} 本次状态检查没有发现有异常,返回值为1.'.format(time=getcurrenttime('d')))
else:
for i in result:
if 'dubbo' in i and result[i].get('trigger', 0):
if createcheck(i, hoststatus=1) == 0:
for j in result[i]:
print('{time} {i} 服务器 {j} 参数值超过阈值,准备新建服务器.'.format(time=getcurrenttime('d'), j=j, i=i))
dubboapi()
else:
continue
elif 'mobile' in i and result[i].get('trigger', 0):
if createcheck(i, hoststatus=1) == 0:
for j in result[i]:
print('{time} {i} 服务器 {j} 参数值超过阈值,准备新建服务器.'.format(time=getcurrenttime('d'), j=j, i=i))
webmobileapi()
else:
continue
elif 'web_back' in i and result[i].get('trigger', 0):
pass
while True:
createserver()
time.sleep(30)
deploy_mobile.sh 脚本内容
vim /home/jerry/aliexpension/deploy_mobile.sh
#!/bin/bash
# script usage:
#command line------> sh scriptname.sh IP TYPE
#DESCRIPTION
# scriptname.sh: the name of this script
# IP: the IP address of new server
# TYPE:
# 1 the project type of web
# 2 the project type of dubbo
dubbo() {
hostname=dubbo`date +%m%d%H%M%S`
ssh -o StrictHostKeyChecking=no -p 20022 $1 "ls"
scp -P 20022 /etc/init.d/dubbo $1:/home/jerry/
ssh -o StrictHostKeyChecking=no -p 20022 $1 "/bin/expect <<EOF
spawn su - root
expect \"assword\"
send \"Puhuijia@123\r\"
send \"hostnamectl set-hostname ${hostname}\r\"
send \"mkdir -p /alidata/puhuijia/puhuijia_mobile_log/ /dubbo_provider/log /alidata/puhuijia/puhuijia_mobile_properties /dubbo_provider/service_mobile\r\"
send \"chown -R jerry:jerry /dubbo_provider/\r\"
send \"chown -R jerry:jerry /alidata/\r\"
send \"rm -rf /etc/init.d/dubbo\r\"
send \"cp -rf /home/jerry/dubbo /etc/init.d/\r\"
send \"echo $1 $hostname >> /etc/hosts\r\"
expect eof
EOF"
ssh -p 20022 $1 "rm -rf /dubbo_provider/service_mobile/*"
rsync -e 'ssh -p 20022' -avz /dubbo_provider/service_mobile/ $1:/dubbo_provider/service_mobile/
rsync -e 'ssh -p 20022' -avz /home/jerry/aliexpension/puhuijia_mobile_properties/ $1:/alidata/puhuijia/puhuijia_mobile_properties/
ssh -p 20022 $1 "/etc/init.d/dubbo service_mobile restart"
}
web() {
hostname=web_mobile`date +%m%d%H%M%S`
ssh -o StrictHostKeyChecking=no -p 20022 $1 "ls"
scp -P 20022 /etc/init.d/tomcat $1:/home/jerry/
ssh -o StrictHostKeyChecking=no -p 20022 $1 "/bin/expect <<EOF
spawn su - root
expect \"assword\"
send \"Puhuijia@123\r\"
send \"hostnamectl set-hostname ${hostname}\r\"
send \"mkdir -p /alidata/puhuijia/puhuijia_mobile_log/ /alidata/puhuijia/puhuijia_mobile_properties\r\"
send \"tar -zxvf /usr/local/soft/apache-tomcat-8.0.36.tar.gz -C /usr/local/\r\"
send \"mv -f /usr/local/apache-tomcat-8.0.36 /usr/local/tomcat_mobile\r\"
send \"chown -R jerry:jerry /usr/local/tomcat_mobile\r\"
send \"rm -rf /etc/init.d/tomcat\r\"
send \"cp -rf /home/jerry/tomcat /etc/init.d/\r\"
send \"echo $1 $hostname >> /etc/hosts\r\"
expect eof
EOF"
ssh -p 20022 $1 "rm -rf /usr/local/tomcat_mobile/webapps/*"
rsync -e 'ssh -p 20022' -avz /usr/local/tomcat_zice/webapps/app $1:/usr/local/tomcat_mobile/webapps/
rsync -e 'ssh -p 20022' -avz /home/jerry/aliexpension/puhuijia_mobile_properties/ $1:/alidata/puhuijia/puhuijia_mobile_properties/
ssh -p 20022 $1 "cd /usr/local/tomcat_mobile/bin;mv catalina.sh catalina.sh.bak${hostname};wget https://coding.net/u/himan/p/staticfile/git/raw/master/catalina.sh;sed -i 's/\r//' catalina.sh;chmod 755 catalina.sh"
ssh -p 20022 $1 "/etc/init.d/tomcat tomcat_mobile restart"
ssh -p 20022 $1 "/etc/init.d/tomcat tomcat_mobile restart"
}
case $2 in
1)
web $1
;;
2)
dubbo $1
;;
esac
网友评论