hadoop入门

作者: Chen_xy | 来源:发表于2017-10-11 21:02 被阅读0次

    1.事先准备好一台linux虚拟机(网卡为仅主机模式,最小化安装)

    #systemctl stop firewalld      //关闭防火墙
    #systemctl disable firewalld      //防止开机自启
    

    2.IPADDR 192.168.56.100/24 网关192.168.56.1

    #vi /etc/sysconfig/network 
    NETWORKING=yes
    GATEWAY=192.168.56.1
    #vi /etc/sysconfig/network-sripts/ifcfg-enp0s3
    TYPE=Ethernet 
    IPADDR=192.168.56.100 
    NETMASK=255.255.255.0
    

    3.用root连接Xshell,使用xftp上传jdk和hadoop
    4.安装jdk和hadoop(检验是否成功)

    #cd /usr/local
    #rpm -ivh jkd-8u91-linux-64.rpm   //用Tab补齐
    #cd /usr/
    #java
    #cd local
    #ls
    #tar -xvf hadoop-2.7.3.tar.gz   //注意用Tab补齐
    
    #cd /usr/local
    #ls
    bin  games   hadoop-2.7.3.tar.gz  jdk-8u91-linux-x64.rpm  lib64    sbin   src
    etc  hadoop  include              lib                     libexec  share
    # cd hadoop
    # cd etc
    # ls
    hadoop
    # cd h*
    # ls
    capacity-scheduler.xml      httpfs-env.sh            mapred-env.sh
    configuration.xsl           httpfs-log4j.properties  mapred-queues.xml.template
    container-executor.cfg      httpfs-signature.secret  mapred-site.xml.template
    core-site.xml               httpfs-site.xml          slaves
    hadoop-env.cmd              kms-acls.xml             ssl-client.xml.example
    hadoop-env.sh               kms-env.sh               ssl-server.xml.example
    hadoop-metrics2.properties  kms-log4j.properties     yarn-env.cmd
    hadoop-metrics.properties   kms-site.xml             yarn-env.sh
    hadoop-policy.xml           log4j.properties         yarn-site.xml
    hdfs-site.xml               mapred-env.cmd
    #vim hadoop-env.sh   //配置hadoop环境
    //将export JAVA_HOME=${JAVA_HOME}改写为export JAVA_HOME=/usr/java/default
    #vim /etc/profile
    //zai 末尾添加export PATH=$PATH:/usr/local/hadoop/bin:/usr/local/hadoop/sbin
    #source /etc/profile   //执行改写的文件,使之生效
    #hadoop
    

    5.退出master,将master复制三台服务器:slave1,slave2,slave3
    6.将三台服务器的Ip分别改为192.168.56.101,192.168.56.102,192.168.56.103,名字分别为slave1,slave2,slave3
    查看是否都能ping通

    systemctl stop firewalld   //关闭防火墙
    systemctl disable firewalld   //关闭开机自启
    

    7.master管理者另外三台服务器(slave1,2,3)(工具--发送键送到所以会话)

    #cd /usr/local/hadoop/etc/hadoop
    #vim core-site.xml    //四台都要
    <configuration>
    <property>
        <name>fs.defaultFS</name>   //名字
        <value>hdfs://master:9000</value>   //hdfs协议,9000为监听端口
    </property>
    </configuration>   
    #vim /etc/hosts
    192.168.56.100 master
    192.168.56.101 slave1
    192.168.56.102 slave2
    192.168.56.103 slave3
    
    //在master中
    #hdfs namenode .format    //格式化
    #hadoop-daemon.sh start namenode
    starting namenode, logging to /usr/local/ha
    # jps
    3352 NameNode       //成功
    3420 Jps  
        
    //在slave1,2,3中
    #hadoop-daemo.sh start datanode
    starting datanode, logging to /usr/local/hadoop/logs/hadoop-root-datanode-slave1.out
    #jps
    3210 DataNode           //成功
    3293 Jps
    

    8.开启关闭集群命令

    #hadoop-daemon.sh stop namenode   //关闭
    #hadoop-daemon.sh start namenode   //开启
    #jps  //用来观察进程情况
    

    9.集中式管理添加所有机器

    #vim slaves
    slave1
    slave2
    slave3
    #start-dfs.sh  //启动所有机器(需要输入密码)
    #jps
    

    10.ssh免密登录

    #ssh slave1     //第一次需要输入密码
    #exit
    #cd    //进入根目录
    #ls -la
    #cd .ssh
    #ssh-keygen -t rsa    //默认直接回车确认
    #ls
    id_rsa  id_rsa.pub  known_hosts    //id_rsa是root的私钥,id_rsa.pub是root的公钥
    #ssh-copy-id slave1  //传递公钥
    #ssh-copy-id slave2
    #ssh-copy-id slave3
    #ssh-copy-id master  //默认本机也是远程登录,所以也需要
    stop-dfs.sh //关闭集群
    start-dfs.sh //开启集群
    

    11.上传一个文件,每个hdfs块文件备份2份,从节点的心跳检查时间间隔10秒

    #ls
    capacity-scheduler.xml      httpfs-env.sh            mapred-env.sh
    configuration.xsl           httpfs-log4j.properties  mapred-queues.xml.template
    container-executor.cfg      httpfs-signature.secret  mapred-site.xml.template
    core-site.xml               httpfs-site.xml          slave
    hadoop-env.cmd              kms-acls.xml             slaves
    hadoop-env.sh               kms-env.sh               ssl-client.xml.example
    hadoop-metrics2.properties  kms-log4j.properties     ssl-server.xml.example
    hadoop-metrics.properties   kms-site.xml             yarn-env.cmd
    hadoop-policy.xml           log4j.properties         yarn-env.sh
    hdfs-site.xml               mapred-env.cmd           yarn-site.xml
    #vim hdfs-site.xml     //修改多少个备份
    <property>
    <name>dfs.replication</name>
    <value>2</value>
    </property>
    # vim hdfs-site.xml
    <property>
    <name>dfs.namenode.heartbeat.recheck-interval</name>
    <value>10000</value>   //每隔10000毫秒更新
    </property>
    #cd /usr/local
    # ls
    bin     hadoop-2.7.3.tar.gz     lib64    src
    etc     include                 libexec
    games   jdk-8u91-linux-x64.rpm  sbin
    hadoop  lib                     share
    # hadoop fs -put jdk-8u91-linux-x64.rpm /
    

    12.hadoop.tmp.dir配置为/var/tmphadoop/
    1.所有机器都要修改

    #cd /tmp
    #cd /usr/local/hadoop/etc/hadoop
    #ls
    #vim core-site.xml
    <property>
        <name>hadoop.tmp.dir</name>
        <value>/var/hadoop</value>
    </property>
    

    2.namenode格式化

    #hdfs namenode -format
    #stop-dfs.sh
    #start-dfs.sh
    

    13.进入java配置
    1.导入必要的包,新建class
    (1).share--hadoop--common--hadoop-common-2.7.3jar
    (2).share--hadoop--common--lib--全部
    (3).share--hadoop--hdfs--hadoop-hdfs-2.7.3jar
    2.新建文件

    #cd
    #vi hello.test
    hello bj
    hello sh
    hello sz
    hello AMD690G
    #hadoop fs -put ./hello.txt /
    # hadoop fs -ls /
    Found 1 items
    -rw-r--r--   2 root supergroup         41 2017-10-06 23:12 /hello.txt
    #vim hdfs-site.xml
    <property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
    </property>
    #stop-dfs.sh
    #start-dfs.sh
    

    全部代码

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    public class hellohdfs2 {
    
        public static void main(String[] args) throws Exception{
            /*URL url = new URL("http://www.baidu.com");
            InputStream in = url.openStream();
            IOUtils.copyBytes(in, System.out , 4096, true);*/
            
            /*URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
            URL url = new URL("hdfs://192.168.56.10:9000/hello.txt");
            InputStream in = url.openStream();
            IOUtils.copyBytes(in, System.out , 4096, true);*/
            
            Configuration conf = new Configuration();
            conf.set("fs.defaultFS", "hdfs://192.168.56.10:9000");
            FileSystem fileSystem = FileSystem.get(conf);
            
            /*boolean success = fileSystem.mkdirs(new Path("/msb"));
            System.out.println(success);
            
            success = fileSystem.exists(new Path("/hello.txt"));
            System.out.println(success);  //判断文件是否存在
            
            success = fileSystem.delete(new Path("/msb"), true);
            System.out.println(success);  //删除目录
            
            success = fileSystem.exists(new Path("/msb"));
            System.out.println(success);*/   
            
            /*FSDataOutputStream out = fileSystem.create(new Path("/test.data"), true);
            FileInputStream fis = new FileInputStream("c:/test/core-site.xml");
            IOUtils.copyBytes(fis, out, 4096, true);*/
            
            /*FSDataOutputStream out = fileSystem.create(new Path("/test.data"), true);
            FileInputStream in = new FileInputStream("c:/test/core-site.xml");
            byte[] buf = new byte[4096];
            int len = in.read(buf);
            while(len !=-1) {
                out.write(buf, 0, len);
                len = in.read(buf);
            }
            in.close();
            out.close();*/
            
            FileStatus[] statuses = fileSystem.listStatus(new Path("/"));
            //System.out.println(statuses.length);
            for(FileStatus status : statuses) {
                System.out.println(status.getPath());
                System.out.println(status.getPermission());
                System.out.println(status.getReplication());
            }
        }
    
    }
    

    13.yarn

    #cd /usr/local/hadoop/etc/hadoop
    #vim yarn-site.xml    //四台都要
    <property>
        <name>yarn.resourcemanager.hostname</name>
        <value>master</value>
    </property>
     
    <property>  
        <name>yarn.nodemanager.aux-services</name>  
        <value>mapreduce_shuffle</value>  
    </property>  
     
    <property>
        <name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    
    
    //namenode上配置mapred-site.xml(要复制)
    #vi mapred-site.xml
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    
    
    #start-yarn.sh
    #jps
    8225 NameNode
    8418 SecondaryNameNode
    8870 ResourceManager
    9134 Jps
    #vim mapred-site.xml
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    #vi input.txt
    hello java
    hello java
    hello c
    hello c++
    hello python
    hello java
    #cd 
    # vi input.txt
    # hadoop fs -mkdir /wcinput
    # hadoop fs -put input.txt /wcinput/
    # hadoop fs -ls /wcinput
    Found 1 items
    -rw-r--r--   2 root supergroup         64 2017-10-07 00:22 /wcinput/input.txt
    #find /usr/local/hadoop -name *example*.jar      //查找示例文件
    #hadoop jar /usr/local/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /wcinput/input.txt /wcoutput
    

    通过网页来观察该job的运行情况
    192.168.56.10:8088

    相关文章

      网友评论

        本文标题:hadoop入门

        本文链接:https://www.haomeiwen.com/subject/kxrgyxtx.html