美文网首页
Hadoop 通过 Maven 用 Java API 对HDFS

Hadoop 通过 Maven 用 Java API 对HDFS

作者: 一条IT | 来源:发表于2019-02-25 18:30 被阅读17次

    1. 读写源码

    注意加注释的地方!!!
    
    
    /**
     * App.java
     */
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FSDataInputStream;
    import org.apache.hadoop.fs.FSDataOutputStream;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    
    import java.io.BufferedReader;
    import java.io.InputStreamReader;
    import java.net.URI;
    
    /**
     * Created by Administrator on 2018/5/23.
     */
    public class App {
        public static void main( String[] args )
        {
            try {
                Configuration conf = new Configuration();
    
                // 不设置该代码会出现错误:java.io.IOException: No FileSystem for scheme: hdfs
                conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
    
                String filePath = "hdfs://192.168.47.140:8020/song/hello.txt";
                Path path = new Path(filePath);
    
                // 这里需要设置URI,否则出现错误:java.lang.IllegalArgumentException: Wrong FS: hdfs://127.0.0.1:9000/test/test.txt, expected: file:///
                FileSystem fs = FileSystem.get(new URI(filePath), conf,"hadoop3");
    
                System.out.println( "READING ============================" );
                FSDataInputStream is = fs.open(path);
                BufferedReader br = new BufferedReader(new InputStreamReader(is));
                // 示例仅读取一行
                String content = br.readLine();
                System.out.println(content);
                br.close();
    
                System.out.println("WRITING ============================");
                byte[] buff = "this is helloworld from java api!\n".getBytes();
                FSDataOutputStream os = fs.create(path);
                os.write(buff, 0, buff.length);
                os.close();
                fs.close();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    
    

    运行结果:

    image

    2. pom.xml文件配置

    注意:
        1\. 使用shade方式防止打包失败
        2\. 这里的dependency可以只引用 `hadoop-client`,或者同时引用`hadoop-common`和`hadoop-hdfs`
    
    
    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
    
      <groupId>com.jiecxy</groupId>
      <artifactId>HDFSTest</artifactId>
      <version>1</version>
      <packaging>jar</packaging>
    
      <name>HDFSTest</name>
      <url>http://maven.apache.org</url>
    
      <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
      </properties>
    
      <dependencies>
          <dependency>
              <groupId>org.apache.hadoop</groupId>
              <artifactId>hadoop-client</artifactId>
              <version>2.8.1</version>
          </dependency>
      </dependencies>
    
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-shade-plugin</artifactId>
                    <executions>
                        <!-- Run shade goal on package phase -->
                        <execution>
                            <phase>package</phase>
                            <goals>
                                <goal>shade</goal>
                            </goals>
                            <configuration>
                                <filters>
                                    <filter>
                                        <!-- Do not copy the signatures in the META-INF folder.
                                        Otherwise, this might cause SecurityExceptions when using the JAR. -->
                                        <artifact>*:*</artifact>
                                        <excludes>
                                            <exclude>META-INF/*.SF</exclude>
                                            <exclude>META-INF/*.DSA</exclude>
                                            <exclude>META-INF/*.RSA</exclude>
                                        </excludes>
                                    </filter>
                                </filters>
    
                                <transformers>
                                    <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                        <mainClass>com.jiecxy.App</mainClass>
                                    </transformer>
                                </transformers>
    
                                <createDependencyReducedPom>false</createDependencyReducedPom>
                            </configuration>
                        </execution>
                    </executions>
                </plugin>
    
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                    </configuration>
                </plugin>
    
            </plugins>
        </build>
    </project>
    
    

    3.可能出现的问题

    3.1 java.lang.IllegalArgumentException: Wrong FS

    java.lang.IllegalArgumentException: Wrong FS: hdfs://127.0.0.1:9000/test/test.txt, expected: file:///
        at org.apache.hadoop.fs.FileSystem.checkPath(FileSystem.java:666)
        at org.apache.hadoop.fs.RawLocalFileSystem.pathToFile(RawLocalFileSystem.java:86)
        at org.apache.hadoop.fs.RawLocalFileSystem.deprecatedGetFileStatus(RawLocalFileSystem.java:630)
        at org.apache.hadoop.fs.RawLocalFileSystem.getFileLinkStatusInternal(RawLocalFileSystem.java:861)
        at org.apache.hadoop.fs.RawLocalFileSystem.getFileStatus(RawLocalFileSystem.java:625)
        at org.apache.hadoop.fs.FilterFileSystem.getFileStatus(FilterFileSystem.java:435)
        at org.apache.hadoop.fs.ChecksumFileSystem$ChecksumFSInputChecker.<init>(ChecksumFileSystem.java:146)
        at org.apache.hadoop.fs.ChecksumFileSystem.open(ChecksumFileSystem.java:347)
        at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:786)
        at com.jiecxy.App.main(App.java:25)
    
    

    解决方法:

    FileSystem fs = FileSystem.get(conf);
    
    

    改为:

    FileSystem fs = FileSystem.get(new URI(filePath), conf);
    
    

    3.2 java.io.IOException: No FileSystem for scheme: hdfs

    java.io.IOException: No FileSystem for scheme: hdfs
        at org.apache.hadoop.fs.FileSystem.getFileSystemClass(FileSystem.java:2798)
        at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2809)
        at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:100)
        at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2848)
        at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2830)
        at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:389)
        at com.jiecxy.App.main(App.java:24)
    
    

    解决方法:
    指定hdfs(若只因用了hadoop-common,则需要再加入依赖 hadoop-hdfs,否则找不到该类):

    conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
    

    相关文章

      网友评论

          本文标题:Hadoop 通过 Maven 用 Java API 对HDFS

          本文链接:https://www.haomeiwen.com/subject/cmzgyqtx.html