美文网首页
Java查询HDFS文件系统

Java查询HDFS文件系统

作者: 主君_05c4 | 来源:发表于2019-04-20 17:15 被阅读0次
    1、文件元数据 FileStatus
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.FsStatus;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.fs.permission.FsPermission;
    
    import java.io.IOException;
    import java.net.URI;
    
    public class ShowFileStatus {
    
        public static void main(String[] args) {
            try {
                URI uri = URI.create("hdfs://192.168.1.100:9000/home/ossuser/1.txt");
    
                Configuration conf = new Configuration();
                FileSystem fs = FileSystem.get(uri, conf);
    
                Path path = new Path(uri);
                FsStatus status = fs.getStatus(path);
    
                System.out.println("fsStatus.getCapacity() = " + status.getCapacity());
                System.out.println("fsStatus.getUsed() = " + status.getUsed());
                System.out.println("fsStatus.getRemaining() = " + status.getRemaining());
    
                System.out.println("----------------------------------");
    
                FileStatus fileStatus = fs.getFileStatus(path);
    
                System.out.println("fileStatus.getOwner() = " + fileStatus.getOwner());
                System.out.println("fileStatus.getGroup() = " + fileStatus.getGroup());
                System.out.println("fileStatus.getAccessTime() = " + fileStatus.getAccessTime());
                System.out.println("fileStatus.getBlockSize() = " + fileStatus.getBlockSize());
                System.out.println("fileStatus.getLen() = " + fileStatus.getLen());
                System.out.println("fileStatus.getModificationTime() = " + fileStatus.getModificationTime());
                System.out.println("fileStatus.getReplication() = " + fileStatus.getReplication());
    
                FsPermission fsPermission = fileStatus.getPermission();
                System.out.println("fileStatus.getPermission() = " + fsPermission);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    

    输入如下:

    fsStatus.getCapacity() = 38046990336
    fsStatus.getUsed() = 187760640
    fsStatus.getRemaining() = 14045487104
    ----------------------------------
    fileStatus.getOwner() = ossuser
    fileStatus.getGroup() = supergroup
    fileStatus.getAccessTime() = 1555746057611
    fileStatus.getBlockSize() = 134217728
    fileStatus.getLen() = 1935838
    fileStatus.getModificationTime() = 1555746058582
    fileStatus.getReplication() = 3
    fileStatus.getPermission() = rw-r--r--
    

    FileStatus封装了目录与文件的元数据信息,包括所有者、块大小、文件长度、修改时间、副本数、权限等信息

    2、列出文件
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.fs.PathFilter;
    
    import java.io.IOException;
    import java.net.URI;
    
    public class ShowFileStatus {
    
        public static void main(String[] args) {
            try {
                URI uri = URI.create("hdfs://192.168.1.100:9000/home/ossuser/");
    
                Configuration conf = new Configuration();
                FileSystem fs = FileSystem.get(uri, conf);
    
                Path path = new Path(uri);
    
                FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
                    @Override
                    public boolean accept(Path path) {
                        try {
                            return fs.getFileStatus(path).isDirectory();
                        } catch (IOException e) {
                            return false;
                        }
                    }
                });
                for (FileStatus fileStatus : fileStatuses) {
                    System.out.println(fileStatus.getPath());
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    

    输出结果

    hdfs://192.168.1.100:9000/home/ossuser/aa
    

    输出多个路径的文件列表信息:

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.FileUtil;
    import org.apache.hadoop.fs.Path;
    
    import java.io.IOException;
    import java.net.URI;
    
    public class FileStat2Paths {
    
        public static void main(String[] args) {
            try {
                Path[] paths = new Path[]{
                    new Path("hdfs://192.168.1.100:9000/"),
                    new Path("hdfs://192.168.1.100:9000/home/ossuser"),
                };
    
                Configuration configuration = new Configuration();
                FileSystem fs = FileSystem.get(URI.create(paths[0].toString()), configuration);
    
                FileStatus[] status = fs.listStatus(paths);
    
                Path[] listedPaths = FileUtil.stat2Paths(status);
                for(Path listedPath : listedPaths) {
                    System.out.println(listedPath);
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    
    hdfs://192.168.1.100:9000/2019
    hdfs://192.168.1.100:9000/home
    hdfs://192.168.1.100:9000/home/ossuser/1.txt
    hdfs://192.168.1.100:9000/home/ossuser/aa
    hdfs://192.168.1.100:9000/home/ossuser/log1.txt
    
    3、文件模式

    在一个表达式中使用通配符(globbing)匹配多个文件,FileSystem为匹配通配符提供了两个方法:

    • public FileStatus[] globStatus(Path pathPattern) throws IOException ;
    • public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileStatus;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    import java.io.IOException;
    import java.net.URI;
    
    public class GlobStatus {
    
        public static void main(String[] args) {
            URI uri = URI.create("hdfs://192.168.1.100:9000/");
    
            try {
                Configuration configuration = new Configuration();
                FileSystem fs = FileSystem.get(uri, configuration);
    
                FileStatus[] status = fs.globStatus(new Path("/*/{01,02, ossuser}"));
                for (FileStatus fileStatus : status) {
                    System.out.println(fileStatus.getPath());
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
    

    输出如下:

    hdfs://192.168.1.100:9000/2019/01
    hdfs://192.168.1.100:9000/2019/02
    

    通配符只能作用于文件路径与名称,不能作用于文件属性,可结合PathFilter接口实现类以达成更加灵活的文件或目录过滤。

    相关文章

      网友评论

          本文标题:Java查询HDFS文件系统

          本文链接:https://www.haomeiwen.com/subject/inrlgqtx.html