美文网首页
hadoop3.1.4+hive3.1.2+spark3.1.3

hadoop3.1.4+hive3.1.2+spark3.1.3

作者: 于飞_d529 | 来源:发表于2022-06-15 14:12 被阅读0次

    环境

    | yfbd-virtual-machine-01 | 10.216.6.227 | zookeeper datanode namenode spark master/work hive hivemetastore|
    | yfbd-virtual-machine-02 | 10.216.6.228 | zookeeper datanode senamenode spark work hive|
    | yfbd-virtual-machine-03 | 10.216.6.229 | zookeeper datanode spark work hive|
    

    配置免密登录

    1.修改hosts文件

    10.216.6.227 yfbd-virtual-machine-01
    10.216.6.228 yfbd-virtual-machine-02
    10.216.6.229 yfbd-virtual-machine-03
    

    2.免密登录

    • 在227上执行
    ssh-keygen
    
    • 在227上执行
    ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-02
    ssh-copy-id -i /home/yfbd/.ssh/id_rsa.pub yfbd-virtual-machine-03
    

    配置环境变量

    vim /etc/profile
    #JAVA_HOME
    export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
    export PATH=$PATH:$JAVA_HOME/bin
    

    Hadoop HA部署

    1.配置core-site.xml

    vim /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml
    <configuration>
        <property>  
            <name>fs.defaultFS</name>  
            <value>hdfs://ns</value>  
        </property> 
        <!-- 指定hadoop数据的存储目录 -->
        <property>  
            <name>hadoop.tmp.dir</name>  
            <value>/home/yfbd/bigdata/hadoop-3.1.4/data/tmp</value>  
        </property> 
        <!-- 配置HDFS网页登录使用的静态用户为yfbd-->
        <property>  
            <name>hadoop.http.staticuser.user</name>  
            <value>yfbd</value>  
        </property> 
        <!-- 配置该yfbd(superUser)允许通过代理访问的主机节点 -->
        <property>
            <name>hadoop.proxyuser.yfbd.hosts</name>
            <value>*</value>
        </property>
        <!-- 配置该yfbd(superUser)允许通过代理用户所属组 -->
        <property>
            <name>hadoop.proxyuser.yfbd.groups</name>
            <value>*</value>
        </property>
        <!-- 配置该zookeeper地址-->
        <property>  
            <name>ha.zookeeper.quorum</name>  
            <value>yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181</value>  
        </property>
    </configuration>
    
    

    2.配置 hdfs-site.xml

    <configuration>
      <!--设置副本数量为3 -->  
     <property>  
          <name>dfs.replication</name>  
        <value>3</value>  
     </property> 
     <property>
       <name>dfs.nameservices</name>
       <value>ns</value>
     </property>
        
    <!--设置2台高可用namenode -->
     <property>
       <name>dfs.ha.namenodes.ns</name>
       <value>nn1,nn2</value>
     </property>
    <!--设置nn1所在地址 监听的rpc地址 -->
     <property>
       <name>dfs.namenode.rpc-address.ns.nn1</name>
       <value>yfbd-virtual-machine-01:8020</value>
     </property>
    <!--设置nn1所在地址 监听的http地址 -->
     <property>
       <name>dfs.namenode.http-address.ns.nn1</name>
       <value>yfbd-virtual-machine-01:50070</value>
     </property>
    <!--设置nn2所在地址 监听的rpc地址 -->
     <property>
       <name>dfs.namenode.rpc-address.ns.nn2</name>
       <value>yfbd-virtual-machine-02:8020</value>
     </property>
    <!--设置nn2所在地址 监听的http地址 -->
     <property>
       <name>dfs.namenode.http-address.ns.nn2</name>
       <value>yfbd-virtual-machine-02:50070</value>
     </property>
    <!-- namenode共享的存储位置 -->
    <!-- 指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起) --> 
     <property>
       <name>dfs.namenode.shared.edits.dir</name>
       <value>qjournal://yfbd-virtual-machine-01:8485;yfbd-virtual-machine-02:8485;yfbd-virtual-machine-03:8485/ns</value>
     </property>
    <!-- journal目录 -->
     <property>
       <name>dfs.journalnode.edits.dir</name>
       <value>/home/yfbd/bigdata/hadoop-3.1.4/data/journal</value>
     </property>
    <!--是否开启故障自动转移,如果你没有自动故障转移,这个可以先不配 -->
     <property>
       <name>dfs.ha.automatic-failover.enabled</name>
       <value>true</value>
     </property>
    <!--配置故障转移代理类 -->
     <!--客户端通过代理访问namenode,访问文件系统,HDFS 客户端与Active 节点通信的Java 类,使用其确定Active 节点是否活跃  --> 
     <property>
       <name>dfs.client.failover.proxy.provider.ns</name>
       <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
     </property>
    <!-- 配置隔离机制 -->
    <!--这是配置自动切换的方法,有多种使用方法,具体可以看官网,这里是远程登录杀死的方法  --> 
     <property>
       <name>dfs.ha.fencing.methods</name>
       <value>sshfence</value>
     </property>
     <property>
       <name>dfs.permissions.enabled</name>
       <value>false</value>
     </property>
    </configuration>
    

    3.配置yarn-site.xml

    <configuration>
        <!-- 指定MR走shuffle -->
        <property>  
            <name>yarn.nodemanager.aux-services</name>  
            <value>mapreduce_shuffle</value>  
        </property>
        <!-- Site specific YARN configuration properties -->
        <!--启用resourcemanager ha-->  
        <!--是否开启RM ha,默认是开启的-->  
        <property>  
           <name>yarn.resourcemanager.ha.enabled</name>  
           <value>true</value>  
        </property>  
        <!--声明两台resourcemanager的地址-->  
        <property>  
           <name>yarn.resourcemanager.cluster-id</name>  
           <value>rmcluster</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.ha.rm-ids</name>  
           <value>rm1,rm2,rm3</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.hostname.rm1</name>  
           <value>yfbd-virtual-machine-01</value>  
        </property>  
        <property>  
           <name>yarn.resourcemanager.hostname.rm2</name>  
           <value>yfbd-virtual-machine-02</value>  
        </property>  
        <property>
           <name>yarn.resourcemanager.hostname.rm3</name>
           <value>yfbd-virtual-machine-03</value>
        </property> 
        <!--指定zookeeper集群的地址-->   
        <property>  
           <name>yarn.resourcemanager.zk-address</name>  
            <value>yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181</value>  
        </property>  
        <!--启用自动恢复,当任务进行一半,rm坏掉,就要启动自动恢复,默认是false-->   
        <property>  
           <name>yarn.resourcemanager.recovery.enabled</name>  
           <value>true</value>  
        </property>  
       
        <!--指定resourcemanager的状态信息存储在zookeeper集群,默认是存放在FileSystem里面。-->   
        <property>  
           <name>yarn.resourcemanager.store.class</name>  
         <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value> 
        </property> 
        
        <!-- 环境变量的继承 -->
        <property>
            <name>yarn.nodemanager.env-whitelist</name>
            <value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
        </property>
        
        <!-- 开启日志聚集功能 -->
        <property>
            <name>yarn.log-aggregation-enable</name>
            <value>true</value>
        </property>
    
        <!-- 设置日志聚集服务器地址 -->
        <property>  
            <name>yarn.log.server.url</name>  
            <value>http://yfbd-virtual-machine-01:19888/jobhistory/logs</value>
        </property>
    
        <!-- 设置日志保留时间为7天 -->
        <property>
            <name>yarn.log-aggregation.retain-seconds</name>
            <value>604800</value>
        </property>
        <property>  
           <name>yarn.resourcemanager.ha.id</name>  
           <value>rm1</value>  
        </property>
        <property>
            <name>yarn.nodemanager.pmem-check-enabled</name>
            <value>false</value>
        </property>
        <property>
            <name>yarn.nodemanager.vmem-check-enabled</name>
            <value>false</value>
        </property>
    </configuration>
    

    4.配置workers

    yfbd-virtual-machine-01
    yfbd-virtual-machine-02
    yfbd-virtual-machine-03
    

    5.格式化namenode

    cd /home/yfbd/bigdata/hadoop-3.1.4/bin
    ./hadoop namenode -format
    

    6.开启集群

    cd /home/yfbd/bigdata/hadoop-3.1.4/sbin
    ./start-all.sh
    

    hive 部署

    1.配置环境变量

    export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
    export PATH=$PATH:$HADOOP_HOME/bin
    export PATH=$PATH:$HADOOP_HOME/sbin
    
    export HADOOP_MAPRED_HOME=${HADOOP_HOME}
    export HADOOP_COMMON_HOME=${HADOOP_HOME}
    export HADOOP_HDFS_HOME=${HADOOP_HOME}
    export HADOOP_YARN_HOME=${HADOOP_HOME}
    
    export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
    export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
    
    export ZOOKEEPER_HOME=/home/yfbd/bigdata/zookeeper
    export PATH=$PATH:$ZOOKEEPER_HOME/bin
    
    export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
    export PATH=$PATH:$HIVE_HOME/bin
    

    2.配置hive-site.xml

    • master节点
    <configuration>
        <property>
            <name>hive.metastore.warehouse.dir</name>
            <value>/user/hive_remote/warehouse</value>
            <description>设置hdfs中的默认目录</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionURL</name>
            <value>jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&amp;useSSL=false&amp;allowPublicKeyRetrieval=true</value>
            <description>保存元数据的数据库连接</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionDriverName</name>
            <value>com.mysql.cj.jdbc.Driver</value>
            <description>数据库驱动,需要拷贝到${HIVE_HOME}/lib目录</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionUserName</name>
            <value>hive3</value>
            <description>用户名和密码</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionPassword</name>
            <value>123456</value>
            <description>用户名和密码</description>
        </property>
        <property>
            <name>hive.cli.print.header</name>
            <value>true</value>
        </property>
        <property>
            <name>hive.cli.print.current.db</name>
            <value>true</value>
        </property>
        <property> 
            <name>spark.home</name> 
            <value>/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2</value> 
        </property>
        <property>
            <name>hive.aux.jars.path</name>
            <value>file:///home/yfbd/bigdata/hive3.1.2/lib</value>
        </property>
    </configuration>
    
    • 客户端节点
    <configuration>
        <property>
            <name>hive.metastore.uris</name>
            <value>thrift://yfbd-virtual-machine-01:9083</value>
            <description>metastore地址</description>
        </property>
    </configuration>
    

    3.初始化hive元数据

    cd /home/yfbd/bigdata/hive3.1.2/bin
    schematool -dbType mysql -initSchema
    

    4.启动Metastore服务

    hive --service metastore
    

    spark master-slave部署

    1.下载spark包

    https://mirrors.tuna.tsinghua.edu.cn/apache/spark/spark-3.1.2/spark-3.1.2-bin-hadoop3.2.tgz
    

    2.解压

    tar -zxvf spark-3.1.2-bin-hadoop3.2.tgz
    

    3.添加Hadoop配置文件软链接

    ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/core-site.xml
    ln -s /home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop/hdfs-site.xml
    

    4.添加hive-site.xml配置文件

    vim hive-site.xml
    <configuration>
        <property>
            <name>hive.metastore.warehouse.dir</name>
            <value>/user/hive_remote/warehouse</value>
            <description>设置hdfs中的默认目录</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionURL</name>
            <value>jdbc:mysql://10.216.3.17:3306/hive3_remote?createDatabaseIfNotExist=true&amp;useSSL=false&amp;allowPublicKeyRetrieval=true</value>
            <description>保存元数据的数据库连接</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionDriverName</name>
            <value>com.mysql.cj.jdbc.Driver</value>
            <description>数据库驱动,需要拷贝到${HIVE_HOME}/lib目录</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionUserName</name>
            <value>hive3</value>
            <description>用户名和密码</description>
        </property>
        <property>
            <name>javax.jdo.option.ConnectionPassword</name>
            <value>123456</value>
            <description>用户名和密码</description>
        </property>
        <property>
            <name>hive.cli.print.header</name>
            <value>true</value>
        </property>
        <property>
            <name>hive.cli.print.current.db</name>
            <value>true</value>
        </property>
        <property> 
            <name>spark.home</name> 
            <value>/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2</value> 
        </property>
        <property>
            <name>hive.aux.jars.path</name>
            <value>file:///home/yfbd/bigdata/hive3.1.2/lib</value> 
        </property>
    </configuration>
    

    5.编辑spark-env.sh

    export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
    export HADOOP_HOME=/home/yfbd/bigdata/hadoop-3.1.4
    export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
    export SPARK_MASTER_HOST=yfbd-virtual-machine-01
    export SPARK_MASTER_WEBUI_PORT=8060
    export SPARK_WORKER_WEBUI_PORT=8061
    export SPARK_WORKER_MEMORY=500m
    export SPARK_WORKER_CORES=1
    export SPARK_DAEMON_JAVA_OPTS="-Dspark.deploy.recoveryMode=ZOOKEEPER
     -Dspark.deploy.zookeeper.url=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
     -Dspark.deploy.zookeeper.dir=/opt/hadoop/data/zookeeper/spark"
    export HIVE_HOME=/home/yfbd/bigdata/hive3.1.2
    export YARN_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
    

    6.编辑workers

    yfbd-virtual-machine-01
    yfbd-virtual-machine-02
    yfbd-virtual-machine-03
    

    7.编辑spark-defaults.conf

    spark.sql.hive.metastore.version        3.1.2
    spark.sql.hive.metastore.jars           path
    spark.sql.hive.metastore.jars.path      file:///home/yfbd/bigdata/hive3.1.2/lib/*.jar
    spark.sql.uris                          thrift://yfbd-virtual-machine-01:9083
    

    8.启动spark

    cd /home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2/sbin
    ./start-all.sh
    

    kyuubi部署

    1.下载kyuubi包,解压

    https://dlcdn.apache.org/incubator/kyuubi/kyuubi-1.5.1-incubating/apache-kyuubi-1.5.1-incubating-bin.tgz
    tar -zxvf apache-kyuubi-1.5.1-incubating-bin.tgz
    

    2.配置kyuubi-defaults.conf

    cp kyuubi-defaults.conf.template kyuubi-defaults.conf
    
    kyuubi.ha.zookeeper.quorum=yfbd-virtual-machine-01:2181,yfbd-virtual-machine-02:2181,yfbd-virtual-machine-03:2181
    kyuubi.authentication=NONE
    kyuubi.engine.share.level=USER
    kyuubi.frontend.bind.host=0.0.0.0
    kyuubi.frontend.bind.port=10009
    kyuubi.ha.zookeeper.namespace=kyuubi
    kyuubi.session.engine.idle.timeout=PT10H
    
    spark.master=yarn
    spark.submit.deployMode=cluster
    spark.dynamicAllocation.enabled=true
    spark.dynamicAllocation.minExecutors=0
    spark.dynamicAllocation.maxExecutors=20
    spark.dynamicAllocation.executorIdleTimeout=60
    spark.shuffle.service.enabled=true
    

    3.配置kyuubi-env.sh

    cp kyuubi-env.sh.template kyuubi-env.sh
    
    export JAVA_HOME=/home/yfbd/bigdata/jdk1.8
    export SPARK_HOME=/home/yfbd/bigdata/spark-3.1.3-bin-hadoop3.2
    export SPARK_CONF_DIR=${SPARK_HOME}/conf
    export HADOOP_CONF_DIR=/home/yfbd/bigdata/hadoop-3.1.4/etc/hadoop
    export KYUUBI_MAX_LOG_FILES=10
    

    4.配置hive-site.xml

    cp /home/yfbd/bigdata/hive3.1.2/conf/hive-site.xml /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/conf/
    

    5.启动kyuubi

    cd /home/yfbd/bigdata/apache-kyuubi-1.5.1-incubating-bin/bin
    ./kyuubi start
    

    相关文章

      网友评论

          本文标题:hadoop3.1.4+hive3.1.2+spark3.1.3

          本文链接:https://www.haomeiwen.com/subject/bndxvrtx.html