美文网首页
Hadoop 学习番外篇1-hdfs客户端操作

Hadoop 学习番外篇1-hdfs客户端操作

作者: Kean_L_C | 来源:发表于2019-04-23 23:41 被阅读0次

hdfs客户端

学习资料B站:https://www.bilibili.com/video/av32081351/?p=57

window下载对应hadoop版本编译文件,并设置HADOOP_HOME/bin环境变量
maven project:pom.xml

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.learning</groupId>
    <artifactId>hadoop</artifactId>
    <version>1.0-SNAPSHOT</version>

    <repositories>
        <repository>
            <id>nexus-aliyun</id>
            <name>nexus-aliyun</name>
            <url>http://maven.aliyun.com/nexus/content/groups/public/</url>
            <releases>
                <enabled>true</enabled>
            </releases>
            <snapshots>
                <enabled>false</enabled>
            </snapshots>
        </repository>
    </repositories>

    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.7.3</version>
        </dependency>

        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>2.7.3</version>
        </dependency>

        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>


    </dependencies>

    <build>

        <!--<resources>-->
        <!--<resource>-->
        <!--<targetPath>lib/</targetPath>-->
        <!--<directory>lib/</directory>-->
        <!--<includes>-->
        <!--<include>**/*.jar</include>-->
        <!--</includes>-->
        <!--</resource>-->
        <!--</resources>-->

        <plugins>
            <plugin>
                <artifactId>maven-compiler-plugin</artifactId>
                <!--<version>3.1</version>-->
                <configuration>
                    <source>1.8</source>
                    <target>1.8</target>
                    <!--<encoding>${project.build.sourceEncoding}</encoding>-->
                    <!--<compilerArguments>-->
                    <!--<extdirs>>${project.basedir}/lib</extdirs>&lt;!&ndash;指定外部lib&ndash;&gt;-->
                    <!--</compilerArguments>-->
                </configuration>
            </plugin>

            <plugin>
                <artifactId>maven-dependency-plugin</artifactId>
                <executions>
                    <execution>
                        <id>copy-dependencies</id>
                        <phase>prepare-package</phase>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <!-- ${project.build.directory}为Maven内置变量,缺省为target -->
                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
                            <!-- 表示是否不包含间接依赖的包 -->
                            <excludeTransitive>false</excludeTransitive>
                            <!-- 表示复制的jar文件去掉版本信息 -->
                            <stripVersion>false</stripVersion>
                        </configuration>
                    </execution>
                </executions>
            </plugin>

            <plugin>
                <artifactId>maven-resources-plugin</artifactId>
                <version>2.5</version>
                <executions>
                    <execution>
                        <id>copy-xmls</id>
                        <phase>process-sources</phase>
                        <goals>
                            <goal>copy-resources</goal>
                        </goals>
                        <configuration>
                            <outputDirectory>${basedir}/target/</outputDirectory>
                            <resources>
                                <resource>
                                    <directory>${basedir}/</directory>
                                    <!--<directory>data/</directory>-->
                                    <!--<includes>-->
                                        <!--<include>data/</include>-->
                                        <!--<include>company_word_id/</include>-->
                                        <!--<include>src/main/resources/application.properties</include>-->
                                        <!--<include>src/main/resources/constant.properties</include>-->
                                        <!--<include>src/main/resources/dbConfig.properties</include>-->
                                        <!--<include>src/main/resources/synonym_words.csv</include>-->
                                        <!--<include>src/main/resources/标签匹配规则20180528.xlsx</include>-->
                                    <!--</includes>-->
                                    <!--<filtering>true</filtering>-->
                                </resource>
                            </resources>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <plugin>
                <artifactId>maven-jar-plugin</artifactId>
                <!--<version>2.4</version>-->
                <configuration>
                    <archive>
                        <!--&lt;!&ndash;生成的jar中,不要包含pom.xml和pom.properties这两个文件&ndash;&gt;-->
                        <!--<addMavenDescriptor>false</addMavenDescriptor>-->
                        <manifest>
                            <!-- 告知 maven-jar-plugin添加一个 Class-Path元素到 MANIFEST.MF文件,以及在Class-Path元素中包括所有依赖项 -->
                            <addClasspath>true</addClasspath>
                            <!-- 所有的依赖项应该位于 lib文件夹 -->
                            <classpathPrefix>lib/</classpathPrefix>
                            <!-- 当用户使用 lib命令执行JAR文件时,使用该元素定义将要执行的类名 -->
                            <mainClass>definative_guide.MaxTemperature</mainClass>
                        </manifest>
                    </archive>

                    <!--&lt;!&ndash;过滤掉不希望包含在jar中的文件&ndash;&gt;-->
                    <!--<excludes>-->
                    <!--<exclude>${project.basedir}/xml/*</exclude>-->
                    <!--</excludes>-->

                </configuration>
            </plugin>


        </plugins>
    </build>

</project>

HDFS client

package org.shangu.client;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * @author : kean
 * @version V1.0
 * @Project: hadoop
 * @Package org.shangu.client
 * @Description: TODO
 * @date Date : 2019-04-23 22:08
 */

public class HDFSClient {

    private static final Logger LOGGER = LoggerFactory.getLogger(HDFSClient.class);

    private static String DFSMasterName = "fs.defaultFS";

    private static String nameNodeURI = "hdfs://172.16.21.220:9000/";

    public static void main(String[] args)  throws IOException, URISyntaxException, InterruptedException {
        Configuration conf  = new Configuration();
        // menthod1: 此方法需要编辑VM options  设置用户,为访hdfs访问文件的owner:-DHADOOP_USER_NAME=root
        // conf.set("fs.defaultFS", "hdfs://172.16.21.220:9000/");
        // FileSystem fs = FileSystem.get(conf);
        // method2:
        FileSystem fs = FileSystem.get(new URI(nameNodeURI), conf, "root");
        LOGGER.info("{}", fs.getHomeDirectory());
        LOGGER.info("{}", fs.getUri());
        RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(new Path("/data_algorithms"), true);
        while (remoteIterator.hasNext()) {
            LOGGER.info("{}", remoteIterator.next());
        }
        fs.close();
    }
}

output

D:\apps\jdk\bin\java.exe -DHADOOP_USER_NAME=root "-javaagent:D:\apps\IntelliJ IDEA 2018.2\lib\idea_rt.jar=6204:D:\apps\IntelliJ IDEA 2018.2\bin" -Dfile.encoding=UTF-8 -classpath D:\apps\jdk\jre\lib\charsets.jar;D:\apps\jdk\jre\lib\deploy.jar;D:\apps\jdk\jre\lib\ext\access-bridge-64.jar;D:\apps\jdk\jre\lib\ext\cldrdata.jar;D:\apps\jdk\jre\lib\ext\dnsns.jar;D:\apps\jdk\jre\lib\ext\jaccess.jar;D:\apps\jdk\jre\lib\ext\jfxrt.jar;D:\apps\jdk\jre\lib\ext\localedata.jar;D:\apps\jdk\jre\lib\ext\nashorn.jar;D:\apps\jdk\jre\lib\ext\sunec.jar;D:\apps\jdk\jre\lib\ext\sunjce_provider.jar;D:\apps\jdk\jre\lib\ext\sunmscapi.jar;D:\apps\jdk\jre\lib\ext\sunpkcs11.jar;D:\apps\jdk\jre\lib\ext\zipfs.jar;D:\apps\jdk\jre\lib\javaws.jar;D:\apps\jdk\jre\lib\jce.jar;D:\apps\jdk\jre\lib\jfr.jar;D:\apps\jdk\jre\lib\jfxswt.jar;D:\apps\jdk\jre\lib\jsse.jar;D:\apps\jdk\jre\lib\management-agent.jar;D:\apps\jdk\jre\lib\plugin.jar;D:\apps\jdk\jre\lib\resources.jar;D:\apps\jdk\jre\lib\rt.jar;D:\java_workspace\hadoop\target\classes;D:\java_workspace\repository\org\apache\hadoop\hadoop-common\2.7.3\hadoop-common-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-annotations\2.7.3\hadoop-annotations-2.7.3.jar;D:\apps\jdk\lib\tools.jar;D:\java_workspace\repository\com\google\guava\guava\11.0.2\guava-11.0.2.jar;D:\java_workspace\repository\commons-cli\commons-cli\1.2\commons-cli-1.2.jar;D:\java_workspace\repository\org\apache\commons\commons-math3\3.1.1\commons-math3-3.1.1.jar;D:\java_workspace\repository\xmlenc\xmlenc\0.52\xmlenc-0.52.jar;D:\java_workspace\repository\commons-httpclient\commons-httpclient\3.1\commons-httpclient-3.1.jar;D:\java_workspace\repository\commons-codec\commons-codec\1.4\commons-codec-1.4.jar;D:\java_workspace\repository\commons-io\commons-io\2.4\commons-io-2.4.jar;D:\java_workspace\repository\commons-net\commons-net\3.1\commons-net-3.1.jar;D:\java_workspace\repository\commons-collections\commons-collections\3.2.2\commons-collections-3.2.2.jar;D:\java_workspace\repository\javax\servlet\servlet-api\2.5\servlet-api-2.5.jar;D:\java_workspace\repository\org\mortbay\jetty\jetty\6.1.26\jetty-6.1.26.jar;D:\java_workspace\repository\org\mortbay\jetty\jetty-util\6.1.26\jetty-util-6.1.26.jar;D:\java_workspace\repository\javax\servlet\jsp\jsp-api\2.1\jsp-api-2.1.jar;D:\java_workspace\repository\com\sun\jersey\jersey-core\1.9\jersey-core-1.9.jar;D:\java_workspace\repository\com\sun\jersey\jersey-json\1.9\jersey-json-1.9.jar;D:\java_workspace\repository\org\codehaus\jettison\jettison\1.1\jettison-1.1.jar;D:\java_workspace\repository\com\sun\xml\bind\jaxb-impl\2.2.3-1\jaxb-impl-2.2.3-1.jar;D:\java_workspace\repository\javax\xml\bind\jaxb-api\2.2.2\jaxb-api-2.2.2.jar;D:\java_workspace\repository\javax\xml\stream\stax-api\1.0-2\stax-api-1.0-2.jar;D:\java_workspace\repository\javax\activation\activation\1.1\activation-1.1.jar;D:\java_workspace\repository\org\codehaus\jackson\jackson-jaxrs\1.8.3\jackson-jaxrs-1.8.3.jar;D:\java_workspace\repository\org\codehaus\jackson\jackson-xc\1.8.3\jackson-xc-1.8.3.jar;D:\java_workspace\repository\com\sun\jersey\jersey-server\1.9\jersey-server-1.9.jar;D:\java_workspace\repository\asm\asm\3.1\asm-3.1.jar;D:\java_workspace\repository\commons-logging\commons-logging\1.1.3\commons-logging-1.1.3.jar;D:\java_workspace\repository\net\java\dev\jets3t\jets3t\0.9.0\jets3t-0.9.0.jar;D:\java_workspace\repository\org\apache\httpcomponents\httpclient\4.1.2\httpclient-4.1.2.jar;D:\java_workspace\repository\org\apache\httpcomponents\httpcore\4.1.2\httpcore-4.1.2.jar;D:\java_workspace\repository\com\jamesmurty\utils\java-xmlbuilder\0.4\java-xmlbuilder-0.4.jar;D:\java_workspace\repository\commons-lang\commons-lang\2.6\commons-lang-2.6.jar;D:\java_workspace\repository\commons-configuration\commons-configuration\1.6\commons-configuration-1.6.jar;D:\java_workspace\repository\commons-digester\commons-digester\1.8\commons-digester-1.8.jar;D:\java_workspace\repository\commons-beanutils\commons-beanutils\1.7.0\commons-beanutils-1.7.0.jar;D:\java_workspace\repository\commons-beanutils\commons-beanutils-core\1.8.0\commons-beanutils-core-1.8.0.jar;D:\java_workspace\repository\org\slf4j\slf4j-api\1.7.10\slf4j-api-1.7.10.jar;D:\java_workspace\repository\org\slf4j\slf4j-log4j12\1.7.10\slf4j-log4j12-1.7.10.jar;D:\java_workspace\repository\org\codehaus\jackson\jackson-core-asl\1.9.13\jackson-core-asl-1.9.13.jar;D:\java_workspace\repository\org\codehaus\jackson\jackson-mapper-asl\1.9.13\jackson-mapper-asl-1.9.13.jar;D:\java_workspace\repository\org\apache\avro\avro\1.7.4\avro-1.7.4.jar;D:\java_workspace\repository\com\thoughtworks\paranamer\paranamer\2.3\paranamer-2.3.jar;D:\java_workspace\repository\org\xerial\snappy\snappy-java\1.0.4.1\snappy-java-1.0.4.1.jar;D:\java_workspace\repository\com\google\protobuf\protobuf-java\2.5.0\protobuf-java-2.5.0.jar;D:\java_workspace\repository\com\google\code\gson\gson\2.2.4\gson-2.2.4.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-auth\2.7.3\hadoop-auth-2.7.3.jar;D:\java_workspace\repository\org\apache\directory\server\apacheds-kerberos-codec\2.0.0-M15\apacheds-kerberos-codec-2.0.0-M15.jar;D:\java_workspace\repository\org\apache\directory\server\apacheds-i18n\2.0.0-M15\apacheds-i18n-2.0.0-M15.jar;D:\java_workspace\repository\org\apache\directory\api\api-asn1-api\1.0.0-M20\api-asn1-api-1.0.0-M20.jar;D:\java_workspace\repository\org\apache\directory\api\api-util\1.0.0-M20\api-util-1.0.0-M20.jar;D:\java_workspace\repository\org\apache\curator\curator-framework\2.7.1\curator-framework-2.7.1.jar;D:\java_workspace\repository\com\jcraft\jsch\0.1.42\jsch-0.1.42.jar;D:\java_workspace\repository\org\apache\curator\curator-client\2.7.1\curator-client-2.7.1.jar;D:\java_workspace\repository\org\apache\curator\curator-recipes\2.7.1\curator-recipes-2.7.1.jar;D:\java_workspace\repository\com\google\code\findbugs\jsr305\3.0.0\jsr305-3.0.0.jar;D:\java_workspace\repository\org\apache\htrace\htrace-core\3.1.0-incubating\htrace-core-3.1.0-incubating.jar;D:\java_workspace\repository\org\apache\zookeeper\zookeeper\3.4.6\zookeeper-3.4.6.jar;D:\java_workspace\repository\io\netty\netty\3.7.0.Final\netty-3.7.0.Final.jar;D:\java_workspace\repository\org\apache\commons\commons-compress\1.4.1\commons-compress-1.4.1.jar;D:\java_workspace\repository\org\tukaani\xz\1.0\xz-1.0.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-client\2.7.3\hadoop-client-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-hdfs\2.7.3\hadoop-hdfs-2.7.3.jar;D:\java_workspace\repository\io\netty\netty-all\4.0.23.Final\netty-all-4.0.23.Final.jar;D:\java_workspace\repository\xerces\xercesImpl\2.9.1\xercesImpl-2.9.1.jar;D:\java_workspace\repository\xml-apis\xml-apis\1.3.04\xml-apis-1.3.04.jar;D:\java_workspace\repository\org\fusesource\leveldbjni\leveldbjni-all\1.8\leveldbjni-all-1.8.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-mapreduce-client-app\2.7.3\hadoop-mapreduce-client-app-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-mapreduce-client-common\2.7.3\hadoop-mapreduce-client-common-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-yarn-client\2.7.3\hadoop-yarn-client-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-yarn-server-common\2.7.3\hadoop-yarn-server-common-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-mapreduce-client-shuffle\2.7.3\hadoop-mapreduce-client-shuffle-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-yarn-api\2.7.3\hadoop-yarn-api-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-mapreduce-client-core\2.7.3\hadoop-mapreduce-client-core-2.7.3.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-yarn-common\2.7.3\hadoop-yarn-common-2.7.3.jar;D:\java_workspace\repository\com\sun\jersey\jersey-client\1.9\jersey-client-1.9.jar;D:\java_workspace\repository\org\apache\hadoop\hadoop-mapreduce-client-jobclient\2.7.3\hadoop-mapreduce-client-jobclient-2.7.3.jar;D:\java_workspace\repository\log4j\log4j\1.2.17\log4j-1.2.17.jar org.shangu.client.HDFSClient
2019-04-23 23:02:15  [main] [WARN]  - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
2019-04-23 23:02:17  [main] [INFO]  - hdfs://172.16.21.220:9000/user/root
2019-04-23 23:02:17  [main] [INFO]  - hdfs://172.16.21.220:9000
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/input/sample_input.txt; isDirectory=false; length=158; replication=3; blocksize=134217728; modification_time=1555230387107; access_time=1555861955684; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/input2/timeseries.txt; isDirectory=false; length=102; replication=3; blocksize=134217728; modification_time=1555255745932; access_time=1555517268232; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/output/_SUCCESS; isDirectory=false; length=0; replication=3; blocksize=134217728; modification_time=1555861971335; access_time=1555861971300; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/output/part-r-00000; isDirectory=false; length=334; replication=3; blocksize=134217728; modification_time=1555861970474; access_time=1555861969935; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/output2/_SUCCESS; isDirectory=false; length=0; replication=3; blocksize=134217728; modification_time=1555517270045; access_time=1555517270038; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/output2/part-00000; isDirectory=false; length=47; replication=3; blocksize=134217728; modification_time=1555517269386; access_time=1555517268867; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/output2/part-00001; isDirectory=false; length=47; replication=3; blocksize=134217728; modification_time=1555517269371; access_time=1555517268875; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}
2019-04-23 23:02:27  [main] [INFO]  - LocatedFileStatus{path=hdfs://172.16.21.220:9000/data_algorithms/chapter1/output2/part-00002; isDirectory=false; length=36; replication=3; blocksize=134217728; modification_time=1555517269940; access_time=1555517269476; owner=root; group=supergroup; permission=rw-r--r--; isSymlink=false}

Process finished with exit code 0
```
Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
hadoop本地库是很久之前安装的,这里提示本地库与系统版本不匹配

相关文章

网友评论

      本文标题:Hadoop 学习番外篇1-hdfs客户端操作

      本文链接:https://www.haomeiwen.com/subject/okpigqtx.html