- 需求背景:简单实现远程文件的MD5校验
- 方案设计:①通过FTP获取远程文件流;②将文件流处理获取MD5;
- 已知bug:①中文路径乱码
1.FTP的工具类
关于中文路径的乱码bug没能解决
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.plugin.unstructuredstorage.reader.UnstructuredStorageReaderUtil;
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.serializer.SerializerFeature;
import com.jcraft.jsch.*;
import com.jcraft.jsch.ChannelSftp.LsEntry;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.reflect.Field;
import java.util.*;
public class SftpHelper {
private static final Logger LOG = LoggerFactory.getLogger(SftpHelper.class);
Session session = null;
ChannelSftp channelSftp = null;
public static Long totalCount = 0L;
public void loginFtpServer(String host, String username, String password, int port, int timeout,
String connectMode) {
JSch jsch = new JSch(); // 创建JSch对象
try {
session = jsch.getSession(username, host, port);
// 根据用户名,主机ip,端口获取一个Session对象
// 如果服务器连接不上,则抛出异常
if (session == null) {
throw DataXException.asDataXException(FtpReaderErrorCode.FAIL_LOGIN,
"session is null,无法通过sftp与服务器建立链接,请检查主机名和用户名是否正确.");
}
session.setPassword(password); // 设置密码
Properties config = new Properties();
config.put("StrictHostKeyChecking", "no");
session.setConfig(config); // 为Session对象设置properties
session.setTimeout(timeout); // 设置timeout时间
session.connect(); // 通过Session建立链接
channelSftp = (ChannelSftp) session.openChannel("sftp"); // 打开SFTP通道
channelSftp.connect(); // 建立SFTP通道的连接
try {
Class cl = channelSftp.getClass();
Field field = cl.getDeclaredField("server_version");
field.setAccessible(true);
field.set(channelSftp,2);
channelSftp.setFilenameEncoding("GBK");
} catch (SftpException e) {
e.printStackTrace();
String message = String.format("-------------------------", host);
LOG.error(message);
} catch (IllegalAccessException e) {
e.printStackTrace();
} catch (NoSuchFieldException e) {
e.printStackTrace();
}
//设置命令传输编码
//String fileEncoding = System.getProperty("file.encoding");
//channelSftp.setFilenameEncoding(fileEncoding);
} catch (JSchException e) {
if(null != e.getCause()){
String cause = e.getCause().toString();
String unknownHostException = "java.net.UnknownHostException: " + host;
String illegalArgumentException = "java.lang.IllegalArgumentException: port out of range:" + port;
String wrongPort = "java.net.ConnectException: Connection refused";
if (unknownHostException.equals(cause)) {
String message = String.format("请确认ftp服务器地址是否正确,无法连接到地址为: [%s] 的ftp服务器", host);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FAIL_LOGIN, message, e);
} else if (illegalArgumentException.equals(cause) || wrongPort.equals(cause) ) {
String message = String.format("请确认连接ftp服务器端口是否正确,错误的端口: [%s] ", port);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FAIL_LOGIN, message, e);
}
}else {
if("Auth fail".equals(e.getMessage())){
String message = String.format("与ftp服务器建立连接失败,请检查用户名和密码是否正确: [%s]",
"message:host =" + host + ",username = " + username + ",port =" + port);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FAIL_LOGIN, message);
}else{
String message = String.format("与ftp服务器建立连接失败 : [%s]",
"message:host =" + host + ",username = " + username + ",port =" + port);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FAIL_LOGIN, message, e);
}
}
}
}
public void logoutFtpServer() {
if (channelSftp != null) {
channelSftp.disconnect();
}
if (session != null) {
session.disconnect();
}
}
public void mkdir(String directoryPath) {
boolean isDirExist = false;
try {
this.printWorkingDirectory();
SftpATTRS sftpATTRS = this.channelSftp.lstat(directoryPath);
isDirExist = sftpATTRS.isDir();
} catch (SftpException e) {
if (e.getMessage().toLowerCase().equals("no such file")) {
LOG.warn(String.format(
"您的配置项path:[%s]不存在,将尝试进行目录创建, errorMessage:%s",
directoryPath, e.getMessage()), e);
isDirExist = false;
}
}
if (!isDirExist) {
try {
// warn 检查mkdir -p
this.channelSftp.mkdir(directoryPath);
} catch (SftpException e) {
String message = String
.format("创建目录:%s时发生I/O异常,请确认与ftp服务器的连接正常,拥有目录创建权限, errorMessage:%s",
directoryPath, e.getMessage());
LOG.error(message, e);
throw DataXException
.asDataXException(
FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION,
message, e);
}
}
}
public void mkDirRecursive(String directoryPath){
boolean isDirExist = false;
try {
this.printWorkingDirectory();
SftpATTRS sftpATTRS = this.channelSftp.lstat(directoryPath);
isDirExist = sftpATTRS.isDir();
} catch (SftpException e) {
if (e.getMessage().toLowerCase().equals("no such file")) {
LOG.warn(String.format("您的配置项path:[%s]不存在,将尝试进行目录创建, errorMessage:%s",directoryPath, e.getMessage()), e);
isDirExist = false;
}
}
if (!isDirExist) {
StringBuilder dirPath = new StringBuilder();
dirPath.append(IOUtils.DIR_SEPARATOR_UNIX);
String[] dirSplit = StringUtils.split(directoryPath,IOUtils.DIR_SEPARATOR_UNIX);
try {
// ftp server不支持递归创建目录,只能一级一级创建
for(String dirName : dirSplit){
try {
dirPath.append(dirName);
channelSftp.cd(dirPath.toString());
dirPath.append(IOUtils.DIR_SEPARATOR_UNIX);
} catch (SftpException e1) {
String message = String.format("没有此目录:%s,开始创建=======================================, errorMessage:%s",dirName, e1.getMessage());
LOG.info(message, e1);
channelSftp.mkdir(dirName);
channelSftp.cd(dirPath.toString());
dirPath.append(IOUtils.DIR_SEPARATOR_UNIX);
}
/*dirPath.append(dirName);
mkDirSingleHierarchy(dirPath.toString());
dirPath.append(IOUtils.DIR_SEPARATOR_UNIX);*/
}
} catch (SftpException e) {
String message = String
.format("创建目录:%s时发生I/O异常,请确认与ftp服务器的连接正常,拥有目录创建权限, errorMessage:%s",
directoryPath, e.getMessage());
LOG.error(message, e);
throw DataXException
.asDataXException(
FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION,
message, e);
}
}
}
public boolean mkDirSingleHierarchy(String directoryPath) throws SftpException {
boolean isDirExist = false;
try {
SftpATTRS sftpATTRS = this.channelSftp.lstat(directoryPath);
isDirExist = sftpATTRS.isDir();
} catch (SftpException e) {
if(!isDirExist){
LOG.info(String.format("正在逐级创建目录 [%s]",directoryPath));
this.channelSftp.mkdir(directoryPath);
return true;
}
}
if(!isDirExist){
LOG.info(String.format("正在逐级创建目录 [%s]",directoryPath));
this.channelSftp.mkdir(directoryPath);
}
return true;
}
public OutputStream getOutputStream(String filePath) {
try {
this.printWorkingDirectory();
String parentDir = filePath.substring(0,StringUtils.lastIndexOf(filePath, IOUtils.DIR_SEPARATOR_UNIX));
this.channelSftp.cd(parentDir);
this.printWorkingDirectory();
OutputStream writeOutputStream = this.channelSftp.put(filePath,ChannelSftp.APPEND);
if (null == writeOutputStream) {
String message = String.format(
"打开FTP文件[%s]获取写出流时出错,请确认文件%s有权限创建,有权限写出等", filePath,filePath);
throw DataXException.asDataXException(FtpWriterErrorCode.OPEN_FILE_ERROR, message);
}
return writeOutputStream;
} catch (SftpException e) {
String message = String.format(
"写出文件[%s] 时出错,请确认文件%s有权限写出, errorMessage:%s", filePath,filePath, e.getMessage());
LOG.error(message);
throw DataXException.asDataXException(FtpWriterErrorCode.OPEN_FILE_ERROR, message);
}
}
public Long getOutputStream(InputStream in, String filePath) {
try {
this.printWorkingDirectory();
//断点续传
String parentDir = filePath.substring(0,StringUtils.lastIndexOf(filePath, IOUtils.DIR_SEPARATOR_UNIX));
this.channelSftp.cd(parentDir);
this.printWorkingDirectory();
this.channelSftp.put(in, filePath, new MyProgressMonitor(), ChannelSftp.RESUME);
return totalCount;
} catch (SftpException e) {
String message = String.format("写出文件[%s] 时出错,请确认文件%s有权限写出, errorMessage:%s", filePath,filePath, e.getMessage());
LOG.error(message);
throw DataXException.asDataXException(FtpWriterErrorCode.OPEN_FILE_ERROR, message);
}
}
public String getRemoteFileContent(String filePath) {
try {
this.completePendingCommand();
this.printWorkingDirectory();
String parentDir = filePath.substring(0,
StringUtils.lastIndexOf(filePath, IOUtils.DIR_SEPARATOR_UNIX));
this.channelSftp.cd(parentDir);
this.printWorkingDirectory();
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(22);
this.channelSftp.get(filePath, outputStream);
String result = outputStream.toString();
IOUtils.closeQuietly(outputStream);
return result;
} catch (SftpException e) {
String message = String.format(
"写出文件[%s] 时出错,请确认文件%s有权限写出, errorMessage:%s", filePath,
filePath, e.getMessage());
LOG.error(message);
throw DataXException.asDataXException(
FtpWriterErrorCode.OPEN_FILE_ERROR, message);
}
}
public Set<String> getAllFilesInDir(String dir, String prefixFileName) {
Set<String> allFilesWithPointedPrefix = new HashSet<String>();
try {
this.printWorkingDirectory();
@SuppressWarnings("rawtypes")
Vector allFiles = this.channelSftp.ls(dir);
LOG.debug(String.format("ls: %s", JSON.toJSONString(allFiles,
SerializerFeature.UseSingleQuotes)));
for (int i = 0; i < allFiles.size(); i++) {
LsEntry le = (LsEntry) allFiles.get(i);
String strName = le.getFilename();
if (strName.startsWith(prefixFileName)) {
allFilesWithPointedPrefix.add(strName);
}
}
} catch (SftpException e) {
String message = String
.format("获取path:[%s] 下文件列表时发生I/O异常,请确认与ftp服务器的连接正常,拥有目录ls权限, errorMessage:%s",
dir, e.getMessage());
LOG.error(message);
throw DataXException.asDataXException(
FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
}
return allFilesWithPointedPrefix;
}
public void deleteFiles(Set<String> filesToDelete) {
String eachFile = null;
try {
this.printWorkingDirectory();
for (String each : filesToDelete) {
LOG.info(String.format("delete file [%s].", each));
eachFile = each;
this.channelSftp.rm(each);
}
} catch (SftpException e) {
String message = String.format(
"删除文件:[%s] 时发生异常,请确认指定文件有删除权限,以及网络交互正常, errorMessage:%s",
eachFile, e.getMessage());
LOG.error(message);
throw DataXException.asDataXException(
FtpWriterErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
}
}
private void printWorkingDirectory() {
try {
LOG.info(String.format("current working directory:%s",this.channelSftp.pwd()));
} catch (Exception e) {
LOG.warn(String.format("printWorkingDirectory error:%s",e.getMessage()));
}
}
public void completePendingCommand() {
}
public boolean isDirExist(String directoryPath) {
try {
SftpATTRS sftpATTRS = channelSftp.lstat(directoryPath);
return sftpATTRS.isDir();
} catch (SftpException e) {
return false;
}
}
public boolean isFileExist(String filePath) {
boolean isExitFlag = false;
try {
SftpATTRS sftpATTRS = channelSftp.lstat(filePath);
if(sftpATTRS.getSize() >= 0){
isExitFlag = true;
}
} catch (SftpException e) {
if (e.getMessage().toLowerCase().equals("no such file")) {
String message = String.format("请确认您的配置项path:[%s]存在,且配置的用户有权限读取", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
} else {
String message = String.format("获取文件:[%s] 属性时发生I/O异常,请确认与ftp服务器的连接正常", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
}
}
return isExitFlag;
}
public boolean isSymbolicLink(String filePath) {
try {
SftpATTRS sftpATTRS = channelSftp.lstat(filePath);
return sftpATTRS.isLink();
} catch (SftpException e) {
if (e.getMessage().toLowerCase().equals("no such file")) {
String message = String.format("请确认您的配置项path:[%s]存在,且配置的用户有权限读取", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
} else {
String message = String.format("获取文件:[%s] 属性时发生I/O异常,请确认与ftp服务器的连接正常", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
}
}
}
HashSet<String> sourceFiles = new HashSet<String>();
public HashSet<String> getListFiles(String directoryPath, int parentLevel, int maxTraversalLevel ,String fileSuffix) {
if(parentLevel < maxTraversalLevel){
String parentPath = null;// 父级目录,以'/'结尾
int pathLen = directoryPath.length();
if (directoryPath.contains("*") || directoryPath.contains("?")) {//*和?的限制
// path是正则表达式
String subPath = UnstructuredStorageReaderUtil.getRegexPathParentPath(directoryPath);
if (isDirExist(subPath)) {
parentPath = subPath;
} else {
String message = String.format("不能进入目录:[%s]," + "请确认您的配置项path:[%s]存在,且配置的用户有权限进入", subPath,
directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
}
} else if (isDirExist(directoryPath)) {
// path是目录
if (directoryPath.charAt(pathLen - 1) == IOUtils.DIR_SEPARATOR_UNIX) {
parentPath = directoryPath;
} else {
parentPath = directoryPath + IOUtils.DIR_SEPARATOR_UNIX;
}
} else if(isSymbolicLink(directoryPath)){
//path是链接文件
String message = String.format("文件:[%s]是链接文件,当前不支持链接文件的读取", directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.LINK_FILE, message);
}else if (isFileExist(directoryPath)) {
// path指向具体文件
sourceFiles.add(directoryPath);
return sourceFiles;
} else {
String message = String.format("请确认您的配置项path:[%s]存在,且配置的用户有权限读取", directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
}
try {
Vector vector = channelSftp.ls(directoryPath);
for (int i = 0; i < vector.size(); i++) {
LsEntry le = (LsEntry) vector.get(i);
String strName = le.getFilename();
String filePath = parentPath + strName;
if (isDirExist(filePath)) {
// 是子目录
if (!(strName.equals(".") || strName.equals(".."))) {
//递归处理
getListFiles(filePath, parentLevel+1, maxTraversalLevel, fileSuffix);
}
} else if(isSymbolicLink(filePath)){
//是链接文件
String message = String.format("文件:[%s]是链接文件,当前不支持链接文件的读取", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.LINK_FILE, message);
}else if (isFileExist(filePath)) {
// 是文件
//是否有要求的 后缀
if(fileSuffix != null && !fileSuffix.equals("")){
List<String> list = Arrays.asList(fileSuffix.split(","));
list.stream().forEach(l ->{
if(filePath.contains(l)) sourceFiles.add(filePath);
});
}else {
sourceFiles.add(filePath);
}
} else {
String message = String.format("请确认path:[%s]存在,且配置的用户有权限读取", filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.FILE_NOT_EXISTS, message);
}
} // end for vector
} catch (SftpException e) {
String message = String.format("获取path:[%s] 下文件列表时发生I/O异常,请确认与ftp服务器的连接正常", directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.COMMAND_FTP_IO_EXCEPTION, message, e);
}
return sourceFiles;
}else{
//超出最大递归层数
String message = String.format("获取path:[%s] 下文件列表时超出最大层数,请确认路径[%s]下不存在软连接文件", directoryPath, directoryPath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.OUT_MAX_DIRECTORY_LEVEL, message);
}
}
public InputStream getInputStream(String filePath) {
try {
return channelSftp.get(filePath);
} catch (SftpException e) {
String message = String.format("读取文件 : [%s] 时出错,请确认文件:[%s]存在且配置的用户有权限读取", filePath, filePath);
LOG.error(message);
throw DataXException.asDataXException(FtpReaderErrorCode.OPEN_FILE_ERROR, message);
}
}
public Date lstat(String filePath) {
try {
SftpATTRS attrs = this.channelSftp.lstat(filePath);
Date lastModified = new Date(attrs.getMTime() * 1000L);
return lastModified;
}catch (Exception e){
LOG.error("获取文件"+filePath+"最新修改时间有误:"+e.toString());
}
return null;
}
}
2.MD5的工具类
其中关于字符串的MD5校验功能可以用来解决文件夹的MD5校验
/**
* @ClassName:Md5Utils
* @author:莫须有 (来自网络)
* @Description: 生成文件的MD5校验码
* @create:2020/11/5 11:31
* @Version1.0
*/
import org.apache.commons.codec.digest.DigestUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
public class Md5Utils {
/**
* 生成字符串的md5校验值
*
* @param s
* @return
*/
public static String getMD5String(String s) {
return DigestUtils.md5Hex(s);
}
/**
* 判断字符串的md5校验码是否与一个已知的md5码相匹配
*
* @param md5str1 要校验的字符串
* @param md5str2 已知的md5校验码
* @return
*/
public static boolean checkMd5(String md5str1, String md5str2) {
return md5str1.equals(md5str2);
}
/**
* 生成文件的md5校验值
*
* @param file
* @return
* @throws IOException
*/
public static String getFileMD5String(File file) throws IOException {
InputStream fis;
fis = new FileInputStream(file);
String md5=DigestUtils.md5Hex(fis);
fis.close();
return md5;
}
public static String getFileInputStreamMD5String(InputStream fis) throws IOException {
String md5=DigestUtils.md5Hex(fis);
fis.close();
return md5;
}
}
网友评论