环境
| yfbd-virtual-machine-01 | 10.216.6.227 | zookeeper datanode namenode spark master/work hive hivemetastore|
| yfbd-virtual-machine-02 | 10.216.6.228 | zookeeper datanode senamenode spark work hive|
| yfbd-virtual-machine-03 | 10.216.6.229 | zookeeper datanode spark work hive|
配置免密登录
1.修改hosts文件
10.216.6.227 yfbd-virtual-machine-01
10.216.6.228 yfbd-virtual-machine-02
10.216.6.229 yfbd-virtual-machine-03
2.免密登录
ssh-keygen
ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-02
ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-03
配置环境变量
vim /etc/profile
#JAVA_HOME
export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export PATH=$PATH:$JAVA_HOME/bin
Hadoop HA部署
1.配置core-site.xml
vim /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<!-- 指定hadoop数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/yfbd/bigdata/hadoop-3.1.4/data/tmp</value>
</property>
<!-- 配置HDFS网页登录使用的静态用户为yfbd-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>yfbd</value>
</property>
<!-- 配置该yfbd(superUser)允许通过代理访问的主机节点 -->
<property>
<name>hadoop.proxyuser.yfbd.hosts</name>
<value>*</value>
</property>
<!-- 配置该yfbd(superUser)允许通过代理用户所属组 -->
<property>
<name>hadoop.proxyuser.yfbd.groups</name>
<value>*</value>
</property>
<!-- 配置该zookeeper地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181</value>
</property>
</configuration>
2.配置 hdfs-site.xml
<configuration>
<!--设置副本数量为3 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns</value>
</property>
<!--设置2台高可用namenode -->
<property>
<name>dfs.ha.namenodes.ns</name>
<value>nn1,nn2</value>
</property>
<!--设置nn1所在地址 监听的rpc地址 -->
<property>
<name>dfs.namenode.rpc-address.ns.nn1</name>
<value>yfbd-virtual-machine-01:8020</value>
</property>
<!--设置nn1所在地址 监听的http地址 -->
<property>
<name>dfs.namenode.http-address.ns.nn1</name>
<value>yfbd-virtual-machine-01:50070</value>
</property>
<!--设置nn2所在地址 监听的rpc地址 -->
<property>
<name>dfs.namenode.rpc-address.ns.nn2</name>
<value>yfbd-virtual-machine-02:8020</value>
</property>
<!--设置nn2所在地址 监听的http地址 -->
<property>
<name>dfs.namenode.http-address.ns.nn2</name>
<value>yfbd-virtual-machine-02:50070</value>
</property>
<!-- namenode共享的存储位置 -->
<!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://yfbd-virtual-machine-01:8485;yfbd-virtual-machine-02:8485;yfbd-virtual-machine-03:8485/ns</value>
</property>
<!-- journal目录 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/yfbd/bigdata/hadoop-3.1.4/data/journal</value>
</property>
<!--是否开启故障自动转移,如果你没有自动故障转移,这个可以先不配 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--配置故障转移代理类 -->
<!--客户端通过代理访问namenode,访问文件系统,HDFS 客户端与Active 节点通信的Java 类,使用其确定Active 节点是否活跃 -->
<property>
<name>dfs.client.failover.proxy.provider.ns</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制 -->
<!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,这里是远程登录杀死的方法 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
</configuration>
3.配置yarn-site.xml
<configuration>
<!-- 指定MR走shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- Site specific YARN configuration properties -->
<!--启用resourcemanager ha-->
<!--是否开启RM ha,默认是开启的-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--声明两台resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rmcluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2,rm3</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>yfbd-virtual-machine-01</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>yfbd-virtual-machine-02</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm3</name>
<value>yfbd-virtual-machine-03</value>
</property>
<!--指定zookeeper集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181</value>
</property>
<!--启用自动恢复,当任务进行一半,rm坏掉,就要启动自动恢复,默认是false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--指定resourcemanager的状态信息存储在zookeeper集群,默认是存放在FileSystem里面。-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- 环境变量的继承 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!-- 开启日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置日志聚集服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://yfbd-virtual-machine-01:19888/jobhistory/logs</value>
</property>
<!-- 设置日志保留时间为7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm1</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
</configuration>
4.配置workers
yfbd-virtual-machine-01
yfbd-virtual-machine-02
yfbd-virtual-machine-03
5.格式化namenode
cd /home/yfbd/bigdata/hadoop-3.1.4/bin
./hadoop namenode -format
6.开启集群
cd /home/yfbd/bigdata/hadoop-3.1.4/sbin
./start-all.sh
hive 部署
1.配置环境变量
export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=${HADOOP_HOME}
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export ZOOKEEPER_HOME=/home/yfbd/bigdata/zookeeper
export PATH=$PATH:$ZOOKEEPER_HOME/bin
export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
export PATH=$PATH:$HIVE_HOME/bin
2.配置hive-site.xml
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive_remote/warehouse</value>
<description>设置hdfs中的默认目录</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true</value>
<description>保存元数据的数据库连接</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
<description>数据库驱动,需要拷贝到${HIVE_HOME}/lib目录</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive3</value>
<description>用户名和密码</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>用户名和密码</description>
</property>
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>spark.home</name>
<value>/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2</value>
</property>
<property>
<name>hive.aux.jars.path</name>
<value>file:///home/yfbd/bigdata/hive3.1.2/lib</value>
</property>
</configuration>
<configuration>
<property>
<name>hive.metastore.uris</name>
<value>thrift://yfbd-virtual-machine-01:9083</value>
<description>metastore地址</description>
</property>
</configuration>
3.初始化hive元数据
cd /home/yfbd/bigdata/hive3.1.2/bin
schematool -dbType mysql -initSchema
4.启动Metastore服务
hive --service metastore
spark master-slave部署
1.下载spark包
https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz
2.解压
tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz
3.添加Hadoop配置文件软链接
ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml
ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/hdfs-site.xml
4.添加hive-site.xml配置文件
vim hive-site.xml
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/user/hive_remote/warehouse</value>
<description>设置hdfs中的默认目录</description>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&useSSL=false&allowPublicKeyRetrieval=true</value>
<description>保存元数据的数据库连接</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.cj.jdbc.Driver</value>
<description>数据库驱动,需要拷贝到${HIVE_HOME}/lib目录</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>hive3</value>
<description>用户名和密码</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>用户名和密码</description>
</property>
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
<property>
<name>spark.home</name>
<value>/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2</value>
</property>
<property>
<name>hive.aux.jars.path</name>
<value>file:///home/yfbd/bigdata/hive3.1.2/lib</value>
</property>
</configuration>
5.编辑spark-env.sh
export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=yfbd-virtual-machine-01
export SPARK_MASTER_WEBUI_PORT=8060
export SPARK_WORKER_WEBUI_PORT=8061
export SPARK_WORKER_MEMORY=500m
export SPARK_WORKER_CORES=1
export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER
-Dspark.deploy.zookeeper.url=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
-Dspark.deploy.zookeeper.dir=/opt/hadoop/data/zookeeper/spark"
export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
export YARN_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
6.编辑workers
yfbd-virtual-machine-01
yfbd-virtual-machine-02
yfbd-virtual-machine-03
7.编辑spark-defaults.conf
spark.sql.hive.metastore.version 3.1.2
spark.sql.hive.metastore.jars path
spark.sql.hive.metastore.jars.path file:///home/yfbd/bigdata/hive3.1.2/lib/*.jar
spark.sql.uris thrift://yfbd-virtual-machine-01:9083
8.启动spark
cd /home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2/sbin
./start-all.sh
kyuubi部署
1.下载kyuubi包,解压
https://dlcdn.apache.org/incubator/kyuubi/kyuubi-1.5.1-incubating/apache-kyuubi-1.5.1-incubating-bin.tgz
tar -zxvf apache-kyuubi-1.5.1-incubating-bin.tgz
2.配置kyuubi-defaults.conf
cp kyuubi-defaults.conf.template kyuubi-defaults.conf
kyuubi.ha.zookeeper.quorum=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
kyuubi.authentication=NONE
kyuubi.engine.share.level=USER
kyuubi.frontend.bind.host=0.0.0.0
kyuubi.frontend.bind.port=10009
kyuubi.ha.zookeeper.namespace=kyuubi
kyuubi.session.engine.idle.timeout=PT10H
spark.master=yarn
spark.submit.deployMode=cluster
spark.dynamicAllocation.enabled=true
spark.dynamicAllocation.minExecutors=0
spark.dynamicAllocation.maxExecutors=20
spark.dynamicAllocation.executorIdleTimeout=60
spark.shuffle.service.enabled=true
3.配置kyuubi-env.sh
cp kyuubi-env.sh.template kyuubi-env.sh
export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
export SPARK_HOME=/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2
export SPARK_CONF_DIR=${SPARK_HOME}/conf
export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
export KYUUBI_MAX_LOG_FILES=10
4.配置hive-site.xml
cp /home/yfbd/bigdata/hive3.1.2/conf/hive-site.xml /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/conf/
5.启动kyuubi
cd /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/bin
./kyuubi start
网友评论