美文网首页
03.HDFS常用API

03.HDFS常用API

作者: 哈哈大圣 | 来源:发表于2019-11-07 23:39 被阅读0次

    HDFS常用API

    一、工程搭建

    1). 创建Maven工程

    1).如果配置了阿里云镜像,建议注释掉;2).建议重新指定一个干净的本地仓库

    1. pox.xml
    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.hahadasheng.bigdata</groupId>
        <artifactId>hadoop-learning</artifactId>
        <version>1.0-RELEASE</version>
        <packaging>jar</packaging>
        <name>hadoop-learning</name>
    
        <properties>
            <!--定义Hadoop版本-->
            <hadoop.version>2.6.0-cdh5.15.1</hadoop.version>
        </properties>
    
        <!---引入cdh的仓库: 注意:Maven软件中的Setting如果配置比如阿里镜像可能导致下载不了jar包-->
        <repositories>
            <repository>
                <id>cloudera</id>
                <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
            </repository>
        </repositories>
    
        <dependencies>
            <!--添加Hadoop依赖包-->
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-client</artifactId>
                <version>${hadoop.version}</version>
            </dependency>
    
            <!--添加junit依赖包-->
            <dependency>
                <groupId>junit</groupId>
                <artifactId>junit</artifactId>
                <version>4.10</version>
                <scope>test</scope>
            </dependency>
        </dependencies>
    
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.3</version>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </project>
    

    2). 获取文件系统句柄

    package com.hahadasheng.bigdata.hadooplearning;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    import java.net.URI;
    
    /**
     * 【注意对应的包,要是hadoop相关的】
     * 1. 创建Configuration
     * 2. 获取FileSystem
     * 3. 操作
     * @author Liucheng
     * @since 2019-11-06
     */
    public class HDFSApp {
    
        public static void main(String[] args) throws Exception {
            // 获取文件系统
            Configuration configuration = new Configuration();
            // 本地hosts配置了映射关系: 192.168.10.188 hadoop000
            FileSystem fileSystem = FileSystem.get(new URI("hdfs://hadoop000:8020"), configuration, "hadoop");
    
            // 创建文件夹
            Path path = new Path("/hdfsapi/test");
    
            final boolean result = fileSystem.mkdirs(path);
            System.out.println(result);
        }
    }
    

    二、API开发

    使用JUnit框架测试;在同一个测试类中

    1). 前置准备

    package com.hahadasheng.bigdata.hadooplearning;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.*;
    import org.apache.hadoop.io.IOUtils;
    import org.apache.hadoop.util.Progressable;
    import org.junit.After;
    import org.junit.Before;
    import org.junit.Test;
    
    import java.io.*;
    import java.net.URI;
    
    /**
     * @author Liucheng
     * @since 2019-11-06
     */
    public class HDFSAppTest {
    
        private FileSystem fileSystem;
    
        @Before
        public void init() throws Exception{
            Configuration configuration = new Configuration();
    
            // 见下方 设置副本系数 案例
            configuration.set("dfs.replication", "1");
            // 本地hosts配置了映射关系: 192.168.10.188 hadoop000
            this.fileSystem = FileSystem.get(new URI("hdfs://hadoop000:8020"), configuration, "hadoop");
            System.out.println("~~~~~~~~~~~~~~test up ~~~~~~~~~~~~~~~~~~~~~~");
        }
    
        @After
        public void release() {
            System.out.println("\n~~~~~~~~~~~~~~test down ~~~~~~~~~~~~~~~~~~~~~~");
    
        }
        
        /* ~~~~~ 其他代码 ~~~~~ */
    }
    

    2). 创建文件夹

    @Test
    public void mkdir() throws IOException {
        // 创建文件夹
        Path path = new Path("/hdfsapi/test");
        boolean result = fileSystem.mkdirs(path);
        System.out.println(result);
    }
    
    

    3). 创建文件,并写入内容

    @Test
    public void create() throws Exception {
        FSDataOutputStream out = fileSystem.create(new Path("/b.txt"));
    
        out.write("hello pk".getBytes()); # 
        out.flush();
        out.close();
    }
    
    

    4). 查看HDFS文件的内容: 文件内部的数据

    @Test
    public void text() throws IOException {
        FSDataInputStream in = fileSystem.open(new Path("/b.txt"));
        // 将内容显示到控制台
        IOUtils.copyBytes(in, System.out, 1024);
    }
    
    

    5). 设置副本系数

    1. 通过命令行上传的文件,副本系数为/$HADOOP_HOME/etc/hadoop/hdfs-site.xml中的dfs.replication配置为准。
    2. 通过Java客户端,默认是工具默认的系数org.apache.hadoop:hadoop-hdfs依赖下面的hdfs-default.xml中dfs.replication配置
    3. 如果需要定制,可以在Configuration对象中进行配置
    @Test
    public void replicationTest() throws Exception {
        // 见上面副本系数设置
        this.create();
    }
    
    

    6). 重命名

    @Test
    public void rename() throws IOException {
        Path oldName = new Path("/b.txt");
        Path newName = new Path("/c.txt");
        boolean result = fileSystem.rename(oldName, newName);
        System.out.println(result);
    }
    
    

    7). 拷贝本地文件到HDFS文件系统

    @Test
    public void copyFromLocalFile() throws IOException {
        String path = Thread.currentThread().getContextClassLoader().getResource("localfile.txt").getPath();
    
        Path localFilePath = new Path(path);
       // 如果path前面不加斜杠,表示的路径默认为用户路径,这里为为 /user/hadoop;即 hadoop fs -ls 这里等同于 hadoop fs -ls /user/hadoop
        Path remoteFilePath = new Path("/remotefile.txt");
        fileSystem.copyFromLocalFile(localFilePath, remoteFilePath);
    }
    

    8). 拷贝文件到HDFS文件系统,带上进度条

    @Test
    public void copyFileWithProcessBar() throws IOException {
        String path = Thread.currentThread().getContextClassLoader().getResource("mysql.rar").getPath();
    
        InputStream in = new BufferedInputStream(new FileInputStream(new File(path)));
        FSDataOutputStream out = fileSystem.create(new Path("/mysql.rar"), new Progressable() {
            @Override
            public void progress() {
                System.out.print(">");
            }
        });
    
        IOUtils.copyBytes(in, out, 4096);
    }
    
    

    9). 下载文件

    @Test
    public void copyToLocalFile() throws Exception {
        String fileNameLocal = "E:/ImprovementWorkingSpace/hadoop-learning/src/test/resources/";
        Path src = new Path("/remotefile.txt");
        Path dst = new Path(fileNameLocal);
        // 注意,Windows环境的得使用本地的文件系统!如下!
        fileSystem.copyToLocalFile(false, src, dst, true);
    }
    

    9). 列出文件夹下面的内容

    @Test
    public void listFile() throws Exception {
        FileStatus[] files = fileSystem.listStatus(new Path("/"));
    
        for (FileStatus file : files) {
            StringBuilder sb = new StringBuilder("~~~~~~~~~\n");
            sb.append("path:\t").append(file.getPath())
               .append("\nlength:\t").append(file.getLen())
               .append("\nisdir:\t").append(file.isDirectory())
               .append("\nblock_replication:\t").append(file.getReplication())
               .append("\nblocksize:\t").append(file.getBlockSize())
               .append("\nmodification_time:\t").append(file.getModificationTime())
               .append("\npermission:\t").append(file.getPermission())
               .append("\nowner:\t").append(file.getOwner())
               .append("\ngroup:\t").append(file.getGroup())
               .append("\nsymlink:\t").append(file.isSymlink())
               .append("~~~~~~~~~\n");
    
            System.out.println(sb.toString());
        }
    }
    
    

    10). 递归列出文件:注意:只是文件,没有文件夹

    @Test
    public void listFileRecursive() throws Exception {
        RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(new Path("/"), true);
        while (iterator.hasNext()) {
            LocatedFileStatus file = iterator.next();
            StringBuilder sb = new StringBuilder("~~~~~~~~~\n");
            sb.append("path:\t").append(file.getPath())
                    .append("\nlength:\t").append(file.getLen())
                    .append("\nisdir:\t").append(file.isDirectory())
                    .append("\nblock_replication:\t").append(file.getReplication())
                    .append("\nblocksize:\t").append(file.getBlockSize())
                    .append("\nmodification_time:\t").append(file.getModificationTime())
                    .append("\npermission:\t").append(file.getPermission())
                    .append("\nowner:\t").append(file.getOwner())
                    .append("\ngroup:\t").append(file.getGroup())
                    .append("\nsymlink:\t").append(file.isSymlink())
                    .append("~~~~~~~~~\n");
    
            System.out.println(sb.toString());
        }
    }
    

    11). 查看文件块信息: 文件分成几个块,副本等

    @Test
    public void getFileBlockLocations() throws IOException {
        FileStatus fileStatus = fileSystem.getFileStatus(new Path("/c.txt"));
        BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        for (BlockLocation block : blocks) {
            for (String name : block.getNames()) {
                System.out.print(name + " : " + block.getLength() + " : ");
                for (String host : block.getHosts()) {
                    System.out.print(host + "、");
                }
                System.out.println();
            }
        }
    }
    

    12). 删除文件:选择递归或者非递归删除

    @Test
    public void delete() throws Exception {
        boolean result = fileSystem.delete(new Path("/user"), true);
        System.out.println(result);
    }
    
    

    相关文章

      网友评论

          本文标题:03.HDFS常用API

          本文链接:https://www.haomeiwen.com/subject/vkibbctx.html