public static String getWordAndPdfContent(File file) throws Exception {
String content = "";
if (file.getName().endsWith("doc")) {//读取word,doc格式
WordExtractor extractor = new WordExtractor(new FileInputStream(file));
content = extractor.getText();
} else if (file.getName().endsWith("docx")) {//docx格式
OPCPackage opcPackage = POIXMLDocument.openPackage(file.getPath());
POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
content = extractor.getText();
opcPackage.close();
} else if (file.getName().endsWith("pdf")) {//读取pdf格式文件
PDDocument document = PDDocument.load(file);
if (!document.isEncrypted()) {//是否加密
PDFTextStripperByArea stripperByArea = new PDFTextStripperByArea();
stripperByArea.setSortByPosition(true);
PDFTextStripper stripper = new PDFTextStripper();
// System.out.println(stripper.getPageStart()+"#################");
content = stripper.getText(document);
System.out.println(content);
}
document.close();
}
return content;
}
网友评论