下面脚本用于检测运行的job个数
#!/bin/bash -x
#basic config
EMAIL=XXX@qq.com
JOB_NAME="SparkALS"
CDHUSER=admin
CDHPASSWORD=admin
CDHURL=http://ip:7180
JSON=/tmp/yarnApplications_${JOB_NAME}.json
LOG=/tmp/applications.log
#check job is or not running?
# 搜索 cm api 查看 api版本 cdh5.15是v19
## 通过cdh的接口获得 运行作业的数据, 包括作业名, 运行状态, appid, 资源等信息
curl -u $CDHUSER:$CDHPASSWORD $CDHURL/api/v16/clusters/RZCluster/services/yarn/yarnApplications > $JSON
## grep 过滤出job
cat $JSON | grep -A 4 $JOB_NAME > $LOG
## 找到 运行状态的 job 个数
RUNNINGNUM=`cat $LOG | grep "state" | grep "RUNNING" | wc -l`
echo "The running $JOB_NAME job num is $RUNNINGNUM"
## 如果运行的job 个数 大于0, 就发送邮件
if [ $RUNNINGNUM -gt 0 ]
then
echo -e "`date "+%Y-%m-%d %H:%M:%S"` : The current running $JOB_NAME job num is $RUNNINGNUM." | mail \
-r "From: alertAdmin <${EMAIL}>" \
-s "Warn: Skip the new $JOB_NAME spark job." ${EMAIL}
exit 0
fi
#spark parameters
CLASS_NAME=org.apache.spark.examples.SparkALS
JAR_PATH=/opt/cloudera/parcels/SPARK2/lib/spark2/examples/jars/spark-examples_2.11-2.1.0.cloudera1.jar
#submit spark job
spark2-submit \
--master yarn \
--class $CLASS_NAME \
$JAR_PATH
exit
网友评论