-
Hive与Spark的版本兼容性:
- 测试Spark服务是否正常
spark-submit --class org.apache.spark.examples.SparkPi\
--master yarn-client --num-executors 4 --driver-memory 2g\
--driver-cores 1 --executor-memory 2g --executor-cores 1\
/opt/cloudera/parcels/CDH/lib/spark/lib/spark-examples.jar 10
- spark-assembly.jar包软连至${HIVE2_HOME}/lib目录
[root@ip-xxx-xx-x-xx lib]# pwd
/opt/cloudera/HIVE2/lib
[root@ip-xxx-xx-x-xx lib]# ln -s /opt/cloudera/parcels/CDH/lib/spark/lib/spark-assembly.jar spark-assembly.jar
[root@ip-xxx-xx-x-xx lib]# ll spark-assembly.jar
- spark-assembly.jar包上传至HDFS的/spark-jars目录
[root@ip-xxx-xx-x-xx lib]# pwd
/opt/cloudera/HIVE2/lib
[root@ip-xxx-xx-x-xx lib]# ln -s /opt/cloudera/parcels/CDH/lib/spark/lib/spark-assembly.jar spark-assembly.jar
[root@ip-xxx-xx-x-xx lib]# ll spark-assembly.jar
- hive-site.xml配置修改
<property>
<name>spark.master</name>
<value>yarn-cluster</value>
</property>
<property>
<name>hive.merge.sparkfiles</name>
<value>true</value>
</property>
<property>
<name>spark.executor.memory</name>
<value>1g</value>
</property>
<property>
<name>spark.driver.memory</name>
<value>1g</value>
</property>
<property>
<name>spark.executor.cores</name>
<value>1</value>
</property>
<property>
<name>spark.yarn.driver.memoryOverhead</name>
<value>102</value>
</property>
<property>
<name>spark.yarn.executor.memoryOverhead</name>
<value>326</value>
</property>
<property>
<name>spark.dynamicAllocation.enabled</name>
<value>true</value>
</property>
<property>
<name>spark.dynamicAllocation.initialExecutors</name>
<value>1</value>
</property>
<property>
<name>spark.dynamicAllocation.minExecutors</name>
<value>1</value>
</property>
<property>
<name>spark.dynamicAllocation.maxExecutors</name>
<value>2147483647</value>
</property>
<property>
<name>hive.spark.dynamic.partition.pruning.map.join.only</name>
<value>false</value>
</property>
<property>
<name>spark.shuffle.service.enabled</name>
<value>true</value>
</property>
<property>
<name>spark.eventLog.enabled</name>
<value>true</value>
</property>
<property>
<name>spark.eventLog.dir</name>
<value>hdfs://ip-xxx-xx-x-xx.ap-southeast-1.compute.internal:8020/user/spark/applicationHistory</value>
</property>
<property>
<name>spark.yarn.jar</name>
<value>hdfs://ip-xxx-xx-x-xx.ap-southeast-1.compute.internal:8020/spark-jars/spark-assembly.jar</value>
</property>
- 重启HIveMetastore和HiveServer2
[root@ip-xxx-xx-x-xx ~]# hive2
[root@ip-xxx-xx-x-xx ~]# hive2-server
[root@ip-xxx-xx-x-xx ~]# ps -ef |grep -i hivemetastore
[root@ip-xxx-xx-x-xx ~]# ps -ef |grep -i hiveserver2
alias hive2="nohup /opt/cloudera/HIVE2/bin/hive --service metastore > /opt/cloudera/HIVE2/logs/hive-metastore.log 2>&1 &"
alias hive2-server="nohup /opt/cloudera/HIVE2/bin/hive --service hiveserver2 > /opt/cloudera/HIVE2/logs/hive-server2.log 2>&1 &"
alias beeline2="/opt/cloudera/HIVE2/bin/beeline"
- HiveCLI 验证
[root@ip-xxx-xx-x-xx bin]# sudo -u hive ./hive
hive> use test;
hive> set hive.execution.engine=spark;
hive> select id,count(*) from test_table group by id ;
- 测试beeline连接
[root@ip-xxx-xx-x-xx ~]# beeline2
beeline> !connect jdbc:hive2://localhost:10000 hive hive
0: jdbc:hive2://localhost:10000> use test;
0: jdbc:hive2://localhost:10000> set hive.execution.engine=spark;
0: jdbc:hive2://localhost:10000> select id,count(*) from test_table group by id;
大数据视频推荐:
腾讯课堂
CSDN
AIops智能运维机器学习算法实战
ELK入门精讲
AIOps智能运维实战
ELK7 stack开发运维
大数据语音推荐:
ELK7 stack开发运维
企业级大数据技术应用
大数据机器学习案例之推荐系统
自然语言处理
大数据基础
人工智能:深度学习入门到精通
网友评论