操作步骤
- IDEA + MAVEN 创建JAVA工程
本地安装maven,IDEA配置maven并创建相应的maven项目
- 添加HDFS相关依赖
<properties>
<hadoop.version>2.6.0-cdh5.7.0</hadoop.version>//指定hadoop版本
</properties>
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos</url>//指定仓库地址
</repository>
</repositories>
<dependency>
<groupId>org.apache.hadoop</groupId> //添加hadoop依赖
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
- 开发Java api 操作HDFS文件
准备工作
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public static final String HDFS_PATH = "hdfs://192.168.247.100:9000"; //hdfs路径
FileSystem fileSystem = null; //操作Hdfs核心类
Configuration configuration = null; //配置类
@Before
public void setUp() throws Exception{
System.out.println("set up");
configuration = new Configuration();
fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration); //如用户无权限,可加用户参数
}
@After
public void tearDown() throws Exception {
configuration = null;
fileSystem = null;
System.out.print("tearDowm");
}
创建HDFS文件目录
@Test
public void mkdir() throws Exception{
fileSystem.mkdirs(new Path("/hdfsapi/test"));
}
创建HDFS文件
@Test
public void create() throws Exception{
FSDataOutputStream outputStream = fileSystem.create(new Path("/hdfsapi/test/test.txt"));
outputStream.write("Hello World".getBytes());
outputStream.flush();
outputStream.close();
}
查看HDFS文件内容
@Test
public void cat() throws Exception{
FSDataInputStream inputStream = fileSystem.open(new Path("/hdfsapi/test/test.txt"));
IOUtils.copyBytes(inputStream, System.out, 1024);
inputStream.close();
}
文件重命名
@Test
public void rename() throws Exception{
fileSystem.rename(new Path("/hdfsapi/test/test.txt"), new Path("/hdfsapi/test/test1.txt"));
}
上传本地文件
@Test
public void copyFromLocalFile() throws Exception{
fileSystem.copyFromLocalFile(new Path("E://U盘/test.sql"), new Path("/hdfsapi/test/")); //Windows系统和Linux系统都可以,写法不一样
}
上传本地大文件带进度条
@Test
public void copyFromLocalFileWithProgress() throws Exception{
InputStream is = new BufferedInputStream(
new FileInputStream(
new File("E://game.tgz")
)
);
FSDataOutputStream outputStream = fileSystem.create(
new Path("/hdfsapi/test/zookeeper.tar.gz"),
new Progressable() {
@Override
public void progress() {
System.out.print("."); //自定义进度条显示
}
}
);
IOUtils.copyBytes(is,outputStream,4096);
}
}
下载HDFS文件
@Test
public void copyToLocalFile() throws Exception{
fileSystem.copyToLocalFile(false, new Path("/hdfsapi/test/test.txt"), new Path("F://test.txt"), true);
//public void copyToLocalFile(boolean delSrc, Path src, Path dst, boolean useRawLocalFileSystem)
//使用java io流 而不使用本地文件系统,windows会报空指针异常
}
查看HDFS目录
@Test
public void listFiles() throws Exception{
FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/hdfsapi/test"));
Arrays.stream(fileStatuses).forEach(fileStatus -> {
String isDir = fileStatus.isDirectory()? "文件夹" : "文件";
Short replication = fileStatus.getReplication(); //副本系数
/**
*如果采用HDFS Shell put的方式上传文件,采用服务器设置的默认副本系数
*如果采用java api 方式上传文件,本地没有设置副本系数,默认采用hadoop默认副本系数3
**/
Long blockSize = fileStatus.getBlockSize();
Long len = fileStatus.getLen();
String path = fileStatus.getPath().toString();
System.out.println(isDir+"\t"+replication+"\t"+blockSize+"\t"+len+"\t"+path);
});
}
删除
@Test
public void delete() throws Exception{
fileSystem.delete(new Path("/hdfsapi/test/zookeeper.tar.gz"),true);//是否递归删除
}
网友评论