美文网首页Linux科技
Hadoop3.x 分布式

Hadoop3.x 分布式

作者: Miracle001 | 来源:发表于2019-03-04 17:17 被阅读0次

    准备

    centos 7.5 1804
    3台机器  VMware操作
    NAT网络  192.168.25.x
    仅主机网络  192.168.50.x
    3台主机的主机名:
      node1.fgq.com
      node2.fgq.com
      node3.fgq.com
    3台主机 时间同步
    [root@node1 ~]# crontab -e
    */5 * * * * ntpdate time3.aliyun.com && hwclock -w
    3台主机 主机名称解析
    [root@node1 ~]# cat /etc/hosts
    192.168.25.11 node1.fgq.com node1
    192.168.25.12 node2.fgq.com node2
    192.168.25.13 node3.fgq.com node3
    192.168.25.14 node4.fgq.com node4
    192.168.25.15 node5.fgq.com node5
    3台主机 安装基础包
    [root@node1 ~]# yum -y install vim lrzsz ntpdate wget net-tools lsof
    [root@node1 ~]# yum -y install epel-release-latest-7.noarch.rpm
    3台主机 禁用防火墙
    [root@node1 ~]# systemctl stop firewalld
    [root@node1 ~]# systemctl disable firewalld
    3台主机 禁用selinux
    [root@node1 ~]# vi /etc/selinux/config 
    SELINUX=disabled
    [root@node1 ~]# setenforce 0
    [root@node1 ~]# getenforce 
    
    无密码登录
    [root@node1 ~]# ssh-keygen -t rsa -P ''  3台主机都做
    [root@node1 ~]# cat /root/.ssh/id_rsa.pub  3台主机都做
    [root@node1 ~]# vim /root/.ssh/authorized_keys
    把3个主机的id_rsa.pub公钥复制到这个文件里面
    ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDObdYkfz1qSG4i5gsVGT6I7wqdD8LH3Rax1/YFm9gmsQ+w/rQ0GlKJ4W1TSZc+5vgtCLGi53k9UZpNjX6dHBjFaw4sAwZa00/UBvpIRbIRNl9bhuDYdq2uQOeYKjUTFfrXbt0TrQAJuY+B+yedpI+iot67swumUNjH51m04UAz93hfKF+O3k1aeTGzIKkSgXQ7TASYB2pttS7sDdN8Eo2muWcN59ezT2FAk/Hn9Q1Dx5h1exKqxGNHrVb7OIbt/L0c5wgyoriIZldg3xsSiJ/DXlYFZ+7YHwM/lSAKWDojtqTg6ZzcBK82m9eNW0Wo4HU+SnhZtPpUAV9SbZFqpFz/ root@node1.fgq.com
    ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDdA79fJ5BxHlBcrKqvTxuiFgsENW7V0ueLAcilF6/JJfLmS0ltXsBx8vuq0PYFv9fvdcfoBna+VSI2dFfJ5PqeKn8IL7sV/D8pD8mPAESL8fzLjbSFgW8K1ur7AfjtNwOebr+EL45tpSAdc44QAy/2Tria0k6MC/xjiA+dc75bYganarBUXAqagCoNSZdlbUT3WYaxhV3UoIYmx2oayB/X+VyUCZiJnr7v9y/w4IQoLvXlzJxhW7muMmZ6Ty2BbeYiENYakV7Y1vbs2u45l7gcvWadYbuSeAWjiemHbEaAeLpGLidHEWyV2JHPQhJTXs+d+zAW9HMEva08Hh6Lrs2D root@node2.fgq.com
    ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQClxM2idmxLyh7dqg67grMzmpv2uix51gAkIsBSI5epi8h+2+NZyVtV9UOwNWC3eOtGhyRcodUFyL/z+tb4FWOmGFZAKkZ3hYPvMzKBCYCmlgF2vXx7kUVL2y39OZuWDgKk2sMqkjogowNAMg4zR/QR8hduPS/JWvBmTq3/JDIqWj4j7GT8DTVXJBz895K4cKlkmddV7NAMRDg0nlQ80mg+8YoU+4IwHTKTux6mA+h4rmcdhDs1zQRrhh8M/SYAjaAKf1HzdAkrUMJYDHVVzfJ+cQMvsKUYk+F5OBSTHw8MmANiU+dOyHch4+WbsLi5j2OH4GQWF2dsUJxbGyZZnr3t root@node3.fgq.com
    把这个authorized_keys文件复制到其他两台机器
    [root@node1 ~]# scp /root/.ssh/authorized_keys node2:/root/.ssh/
    [root@node1 ~]# scp /root/.ssh/authorized_keys node3:/root/.ssh/
    可以无密码登录
    [root@node1 ~]# ssh node2
    [root@node1 ~]# ssh node3
    
    下载jdk包  jdk-8u152-linux-x64.tar.gz  1.8版本
    下载Hadoop包  hadoop-3.2.0.tar.gz
    

    jdk配置

    3台主机都做 node1为例
    [root@node1 ~]# tar zxf jdk-8u152-linux-x64.tar.gz
    [root@node1 ~]# ln -s jdk1.8.0_152 jdk
    [root@node1 ~]# vim /etc/profile
    文件最下方加入下面的配置
    export JAVA_HOME=/root/jdk
    export JRE_HOME=$JAVA_HOME/jre
    export CLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar
    export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
    [root@node1 ~]# source /etc/profile
    [root@node1 ~]# java -version
    java version "1.8.0_152"
    Java(TM) SE Runtime Environment (build 1.8.0_152-b16)
    Java HotSpot(TM) 64-Bit Server VM (build 25.152-b16, mixed mode)
    

    hadoop配置

    [root@node1 ~]# mkdir -p /data/hadoop/{tmp,hdfs/{name,data}}  3台主机都做
    [root@node1 ~]# vim /etc/profile  3台主机都做
    文件最下方加入下面的配置
    export HADOOP_HOME=/root/hadoop
    export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
    export LD_LIBRARY_PATH=$HADOOP_HOME/lib/native
    export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
    export YARN_CONF_DIR=$HADOOP_HOME/etc/hadoop
    [root@node1 ~]# source /etc/profile  3台主机都做
    
    下面命令,仅主机node1做,然后把文件复制到node2和node3即可
    [root@node1 ~]# tar xzf hadoop-3.2.0.tar.gz
    [root@node1 ~]# cd hadoop/etc/hadoop/
    [root@node1 hadoop]# vim hadoop-env.sh
    在配置文件里面增加以下配置
    export JAVA_HOME=/root/jdk
    export HDFS_NAMENODE_USER=root
    export HDFS_DATANODE_USER=root
    export HDFS_SECONDARYNAMENODE_USER=root
    export YARN_RESOURCEMANAGER_USER=root
    export YARN_NODEMANAGER_USER=root
    [root@node1 hadoop]# vim core-site.xml
    <configuration>
            <property>
                      <name>fs.defaultFS</name>
                      <value>hdfs://node1:8020</value>
            </property>
            <property>
                      <name>hadoop.tmp.dir</name>
                      <value>/data/hadoop/tmp</value>
            </property>
    </configuration>
    [root@node1 hadoop]# vim hdfs-site.xml 
    <configuration>
       <property>
                 <name>dfs.namenode.secondary.http-address</name>
                 <value>node3:50090</value>
       </property>
       <property>
                 <name>dfs.replication</name>
                 <value>2</value>
       </property>
       <property>
                 <name>dfs.namenode.name.dir</name>
                 <value>file:/data/hadoop/hdfs/name</value>
       </property>
       <property>
                 <name>dfs.datanode.data.dir</name>
                 <value>file:/data/hadoop/hdfs/data</value>
       </property>
    </configuration>
    [root@node1 hadoop]# vim yarn-site.xml
    <configuration>
       <property>
           <name>yarn.nodemanager.aux-services</name>
           <value>mapreduce_shuffle</value>
       </property>
       <property>
          <name>yarn.nodemanager.localizer.address</name>
          <value>0.0.0.0:8140</value>
       </property>
       <property>
           <name>yarn.resourcemanager.hostname</name>
           <value>node2</value>
       </property>
       <property>
           <name>yarn.log-aggregation-enable</name>
           <value>true</value>
       </property>
       <property>
           <name>yarn.log-aggregation.retain-seconds</name>
           <value>106800</value>
       </property>
       <property>
           <name>yarn.log.server.url</name>
           <value>http://node3:19888/jobhistory/logs</value>
       </property>
    </configuration>
    [root@node1 hadoop]# vim mapred-site.xml
    <configuration>
       <property>
           <name>mapreduce.framework.name</name>
           <value>yarn</value>
       </property>
       <property>
           <name>mapreduce.jobhistory.address</name>
           <value>node3:10020</value>
       </property>
       <property>
           <name>mapreduce.jobhistory.webapp.address</name>
           <value>node3:19888</value>
       </property>
    </configuration>
    [root@node1 hadoop]# vim workers 
    node1.fgq.com
    node2.fgq.com
    node3.fgq.com
    
    在node2和node3上复制node1的hadoop文件到本机
    [root@node2 ~]# scp -r root@node1:/root/hadoop .
    [root@node3 ~]# scp -r root@node1:/root/hadoop .
    

    启动

    在NameNode机器上执行格式化
    [root@node1 ~]# hdfs namenode -format
    WARNING: /root/hadoop/logs does not exist. Creating.
    2019-02-26 15:47:18,766 INFO namenode.NameNode: STARTUP_MSG: 
    /************************************************************
    STARTUP_MSG: Starting NameNode
    STARTUP_MSG:   host = node1.fgq.com/192.168.25.11
    STARTUP_MSG:   args = [-format]
    STARTUP_MSG:   version = 3.2.0
    ...  ...
    2019-02-26 15:47:20,080 INFO common.Storage: Storage directory /data/hadoop/hdfs/name has been successfully formatted.
    2019-02-26 15:47:20,092 INFO namenode.FSImageFormatProtobuf: Saving image file /data/hadoop/hdfs/name/current/fsimage.ckpt_0000000000000000000 using no compression
    2019-02-26 15:47:20,170 INFO namenode.FSImageFormatProtobuf: Image file /data/hadoop/hdfs/name/current/fsimage.ckpt_0000000000000000000 of size 399 bytes saved in 0 seconds .
    2019-02-26 15:47:20,179 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
    2019-02-26 15:47:20,190 INFO namenode.NameNode: SHUTDOWN_MSG: 
    /************************************************************
    SHUTDOWN_MSG: Shutting down NameNode at node1.fgq.com/192.168.25.11
    ************************************************************/
    执行成功,倒数第8行有successfully字样
    
    在node1上启动HDFS
    [root@node1 ~]# start-dfs.sh 
    Starting namenodes on [node1]
    Last login: Tue Feb 26 14:36:34 CST 2019 from 192.168.25.1 on pts/1
    Starting datanodes
    Last login: Tue Feb 26 15:48:32 CST 2019 on pts/0
    node3.fgq.com: Warning: Permanently added 'node3.fgq.com' (ECDSA) to the list of known hosts.
    node1.fgq.com: Warning: Permanently added 'node1.fgq.com' (ECDSA) to the list of known hosts.
    node2.fgq.com: Warning: Permanently added 'node2.fgq.com' (ECDSA) to the list of known hosts.
    node3.fgq.com: WARNING: /root/hadoop/logs does not exist. Creating.
    node2.fgq.com: WARNING: /root/hadoop/logs does not exist. Creating.
    Starting secondary namenodes [node3]
    Last login: Tue Feb 26 15:48:35 CST 2019 on pts/0
    
    在node1上启动YARN
    [root@node1 ~]# start-yarn.sh 
    WARNING: YARN_CONF_DIR has been replaced by HADOOP_CONF_DIR. Using value of YARN_CONF_DIR.
    Starting resourcemanager
    Last login: Tue Feb 26 15:48:39 CST 2019 on pts/0
    WARNING: YARN_CONF_DIR has been replaced by HADOOP_CONF_DIR. Using value of YARN_CONF_DIR.
    Starting nodemanagers
    Last login: Tue Feb 26 15:48:57 CST 2019 on pts/0
    WARNING: YARN_CONF_DIR has been replaced by HADOOP_CONF_DIR. Using value of YARN_CONF_DIR.
    
    在node2上启动ResourceManager
    [root@node2 ~]# yarn --daemon start resourcemanager
    WARNING: YARN_CONF_DIR has been replaced by HADOOP_CONF_DIR. Using value of YARN_CONF_DIR.
    [root@node2 ~]# echo $?
    0
    
    在node3上启动日志服务
    规划是在node3服务器上运行MapReduce日志服务
    [root@node3 ~]# mapred --daemon start historyserver
    [root@node3 ~]# echo $?
    0
    
    执行jps命令,测试是否可以成功
    [root@node1 ~]# jps 
    2034 DataNode
    2579 NodeManager
    1894 NameNode
    2743 Jps
    [root@node2 ~]# jps
    1930 ResourceManager
    2171 Jps
    1679 DataNode
    1791 NodeManager
    [root@node3 ~]# jps
    1986 JobHistoryServer
    2071 Jps
    1736 SecondaryNameNode
    1818 NodeManager
    1629 DataNode
    
    在node1上查看name和data下面的VERSION的clusterID是否一致
    [root@node1 ~]# cat /data/hadoop/hdfs/name/current/VERSION
    #Tue Feb 26 15:47:20 CST 2019
    namespaceID=1992157079
    clusterID=CID-2cae4f5e-21cb-4710-beb5-571fb5f6c602
    cTime=1551167240059
    storageType=NAME_NODE
    blockpoolID=BP-1252608076-192.168.25.11-1551167240059
    layoutVersion=-65
    
    [root@node1 ~]# cat /data/hadoop/hdfs/data/current/VERSION
    #Tue Feb 26 15:48:39 CST 2019
    storageID=DS-ca07dc35-8601-453e-9ee1-70df29905e58
    clusterID=CID-2cae4f5e-21cb-4710-beb5-571fb5f6c602
    cTime=0
    datanodeUuid=95f82aab-92c8-4c1c-b72d-23fbcce352d8
    storageType=DATA_NODE
    layoutVersion=-57
    
    Hadoop命令操作
    [root@node1 ~]# hdfs dfs -ls -R /
    drwxrwx---   - root supergroup          0 2019-02-26 15:50 /tmp
    drwxrwx---   - root supergroup          0 2019-02-26 15:50 /tmp/hadoop-yarn
    drwxrwx---   - root supergroup          0 2019-02-26 15:50 /tmp/hadoop-yarn/staging
    drwxrwx---   - root supergroup          0 2019-02-26 15:50 /tmp/hadoop-yarn/staging/history
    drwxrwx---   - root supergroup          0 2019-02-26 15:50 /tmp/hadoop-yarn/staging/history/done
    drwxrwxrwt   - root supergroup          0 2019-02-26 15:50 /tmp/hadoop-yarn/staging/history/done_intermediate
    ... ...
    

    浏览器操作

    http://192.168.25.11:9870
    http://192.168.25.12:8088
    

    相关文章

      网友评论

        本文标题:Hadoop3.x 分布式

        本文链接:https://www.haomeiwen.com/subject/zoqkeftx.html