Spark on yarn 模式,日志会随机分配到某个磁盘。为了Flume收集,需要在提交任务时通过自定义log4j,再写一份日志写到某个固定目录下。
log4j-executor.properties
# Set everything to be logged to the console
log4j.rootCategory=INFO, console, DailyRollingFile
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
# Settings to quiet third party logs that are too verbose
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
# add for flume
log4j.appender.DailyRollingFile=org.apache.log4j.DailyRollingFileAppender
log4j.appender.DailyRollingFile.File=/opt/appdata/disk01/log/spark-on-yarn-apps-log/${spark.yarn.app.container.log.dir}/spark.log
log4j.appender.DailyRollingFile.layout=org.apache.log4j.PatternLayout
log4j.appender.DailyRollingFile.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} [%t] [%p] [%c] [%m] %n
nohup /usr/hdp/2.6.2.0-205/spark2/bin/spark-submit \
--name $app_name \
--class $class_name \
--master yarn \
--deploy-mode cluster \
--driver-memory 1g --executor-memory 1g \
--num-executors 2 \
--executor-cores 1 \
--conf spark.driver.userClassPathFirst=true \
--conf spark.executor.userClassPathFirst=true \
--conf "spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-executor.properties" \
--conf "spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j-executor.properties" \
--files /usr/hdp/2.6.2.0-205/spark2/conf/log4j-executor.properties \
--conf spark.streaming.concurrentJobs=4 \
--conf spark.driver.userClassPathFirst=true \
--conf spark.executor.userClassPathFirst=true \
Testxxx.jar >> "/opt/applog/MskyLog/$app_name/$(date +"%Y-%m-%d").log" 2>&1 &
网友评论