1. 效果
(1)转换pdf所有页面:
# 运行
java -jar pdftransfer-1.0.jar D:\test\mypdflocation\文件.pdf
![](https://img.haomeiwen.com/i6332441/a37da98bc6bb4bd3.png)
![](https://img.haomeiwen.com/i6332441/bfd2ef186e04293d.png)
(2)转化指定页面
# 运行,注意第二个参数,代表页面号,多个页面用空格隔开
java -jar pdftransfer-1.0.jar D:\test\mypdflocation\文件.pdf 5 6 7
![](https://img.haomeiwen.com/i6332441/118b9f0d88015924.png)
![](https://img.haomeiwen.com/i6332441/159f214dc58c1790.png)
2. 源码
JDK:
建议1.8
maven依赖:
<dependency>
<groupId>com.levigo.jbig2</groupId>
<artifactId>levigo-jbig2-imageio</artifactId>
<version>2.0</version>
</dependency>
<dependency>
<groupId>com.twelvemonkeys.imageio</groupId>
<artifactId>imageio-jpeg</artifactId>
<version>3.4.1</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.17</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-core</artifactId>
<version>1.4.0</version>
</dependency>
<dependency>
<groupId>com.github.jai-imageio</groupId>
<artifactId>jai-imageio-jpeg2000</artifactId>
<version>1.3.0</version>
</dependency>
文件源码
package com.qzh;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.DefaultResourceCache;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.rendering.PDFRenderer;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.*;
/**
* 参数1:路径
* 参数2:指定页数
*
* @author qu.zh
* @date 2019/12/11 17:24
*/
public class PdfTransfer {
private static String FILE_NAME = "";
/**
* 文件输出路径
*/
private static String FILE_OUTPUT_PATH = "d:/pdf/output/";
/**
* 队列
*/
private volatile static ArrayBlockingQueue<DataEntity> queue = new ArrayBlockingQueue<DataEntity>(500);
/**
* CPU核心线程数
*/
private static final int CPU_CORE = 7;
/**
* 默认DPI,可以用参数进行扩展
*/
private static final int DEFAULT_DPI = 400;
static {
System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider");
}
public static void main(String[] args) {
// pdf名称
if (args == null || args.length == 0) {
System.err.println("请传入路径");
return;
}
String name = args[0];
List<Integer> needTransferList = new ArrayList<>();
if (args.length > 1) {
for (int i = 1; i < args.length; i++) {
needTransferList.add(Integer.valueOf(args[i]));
System.out.println("打印页数:" + args[i]);
}
}
FILE_NAME = name;
if (FILE_NAME == null || "".equals(FILE_NAME)) {
System.err.println("pdf名称不能为空");
return;
}
ThreadPoolExecutor executor = new ThreadPoolExecutor(CPU_CORE, CPU_CORE,
1200, TimeUnit.SECONDS, new SynchronousQueue<>(), new ThreadPoolExecutor.CallerRunsPolicy());
// i7+16G+8核的机器
// 20进程 62秒
// 14进程 46秒
// 10进程 44秒
// 8进程 40秒
// 6进程 42秒
// 4线程 49秒
// 趋近核数最快
for (int i = 0; i < CPU_CORE; i++) {
executor.submit(new MyTask(queue));
}
File file = new File(FILE_NAME);
int end = file.getName().lastIndexOf(".");
String folderName = file.getName().substring(0, end);
File fileParent = new File(file.getParent());
if (!fileParent.exists()) {
file.mkdirs();
}
FILE_OUTPUT_PATH = FILE_OUTPUT_PATH + File.separator + folderName + File.separator;
File output = new File(FILE_OUTPUT_PATH);
if (!output.exists()) {
output.mkdirs();
}
PDDocument pdDocument = null;
try {
Date startDate = new Date();
System.out.println();
pdDocument = PDDocument.load(new File(FILE_NAME));
pdDocument.setResourceCache(new MyResourceCache());
int pageCount = pdDocument.getNumberOfPages();
PDFRenderer renderer = new PDFRenderer(pdDocument);
CountDownLatch countDownLatch = new CountDownLatch(needTransferList.size() > 0 ? needTransferList.size() : pageCount);
System.out.println("转换页数页数一共:" + needTransferList.size());
if (needTransferList.size() > 0) {
for (int i = 0; i < needTransferList.size(); i++) {
int curPage = needTransferList.get(i);
DataEntity dataEntity = new DataEntity();
dataEntity.setPageNum(curPage - 1);
dataEntity.setPdfRenderer(renderer);
dataEntity.setCountDownLatch(countDownLatch);
dataEntity.setPageCount(pageCount);
queue.put(dataEntity);
}
} else {
for (int j = 0; j < pageCount; j++) {
DataEntity dataEntity = new DataEntity();
dataEntity.setPageNum(j);
dataEntity.setPdfRenderer(renderer);
dataEntity.setCountDownLatch(countDownLatch);
dataEntity.setPageCount(pageCount);
int imageCount = 0;
int fontCount = 0;
// 图片内容
PDPage page = pdDocument.getPage(j);
PDResources resources = page.getResources();
Iterable<COSName> cosNames = resources.getXObjectNames();
BufferedImage bufferedImage = null;
if (cosNames != null) {
Iterator<COSName> cosNamesIter = cosNames.iterator();
while (cosNamesIter.hasNext()) {
COSName cosName = cosNamesIter.next();
PDFont font = resources.getFont(cosName);
if (resources.isImageXObject(cosName)) {
imageCount++;
PDImageXObject Ipdmage = (PDImageXObject) resources.getXObject(cosName);
bufferedImage = Ipdmage.getImage();
}
if (font != null) {
fontCount++;
}
}
}
// 如果每一页只有一张图片,直接提出来就行,不然效率低
if (fontCount == 0 && imageCount == 1) {
String imageFileName = FILE_OUTPUT_PATH + "number_" + (j + 1) + "_page.png";
FileOutputStream out = new FileOutputStream(imageFileName);
try {
ImageIO.write(bufferedImage, "png", out);
} catch (IOException e) {
} finally {
try {
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
} else {
queue.put(dataEntity);
}
}
}
countDownLatch.await(200, TimeUnit.SECONDS);
System.out.println("执行完毕!!!!!!!!!");
System.out.println("页数:" + pageCount);
Date endDate = new Date();
System.out.println("用时:" + (endDate.getTime() - startDate.getTime()) / 1000 + "秒");
} catch (IOException e) {
e.printStackTrace();
System.err.println("IOException");
} catch (InterruptedException e) {
e.printStackTrace();
System.err.println("InterruptedException");
} catch (Throwable throwable) {
throwable.printStackTrace();
} finally {
try {
if (pdDocument != null) {
pdDocument.close();
}
} catch (IOException e) {
e.printStackTrace();
System.err.println("IOException");
}
}
System.out.println("按ctrl+c结束");
}
/**
* 缓存优化,官网说这个指定为空会禁止使用缓存
*/
private static class MyResourceCache extends DefaultResourceCache {
@Override
public void put(COSObject indirect, PDXObject xobject) throws IOException {
// super .put(indirect,xobject);
}
}
/**
* 任务
*/
public static class MyTask implements Runnable {
private ArrayBlockingQueue<DataEntity> queue;
public MyTask(ArrayBlockingQueue queue) {
this.queue = queue;
}
@Override
public void run() {
while (true) {
DataEntity dataEntity = null;
BufferedImage image = null;
try {
// 取出任务
dataEntity = queue.take();
PDFRenderer renderer = dataEntity.getPdfRenderer();
int pageNum = dataEntity.getPageNum();
String imageFileName = FILE_OUTPUT_PATH + "第" + (pageNum + 1) + "页.png";
// 进行转图片
System.out.println("============第" + (pageNum + 1) + "页转换中============");
// renderer是不安全的,所以得加锁,虽然会影响性能
synchronized (renderer) {
image = renderer.renderImageWithDPI(pageNum, DEFAULT_DPI);
}
ImageIO.write(image, "png", new File(imageFileName));
} catch (InterruptedException e) {
e.printStackTrace();
System.err.println("InterruptedException==========");
} catch (IOException e) {
if (e instanceof EOFException) {
System.err.println("EOFException========");
} else {
System.err.println("IOException========");
e.printStackTrace();
}
} catch (Exception throwable) {
System.out.println("=================Throwable==========================");
throwable.printStackTrace();
} finally {
if (image != null) {
image.flush();
}
if (dataEntity != null) {
CountDownLatch countDownLatch = dataEntity.getCountDownLatch();
countDownLatch.countDown();
}
}
}
}
}
/**
* 任务实体
*/
private static class DataEntity implements Serializable {
public static final long serialVersionUID = -1;
private PDFRenderer pdfRenderer;
private int pageNum;
private CountDownLatch countDownLatch;
private int pageCount;
public int getPageCount() {
return pageCount;
}
public void setPageCount(int pageCount) {
this.pageCount = pageCount;
}
public PDFRenderer getPdfRenderer() {
return pdfRenderer;
}
public CountDownLatch getCountDownLatch() {
return countDownLatch;
}
public void setCountDownLatch(CountDownLatch countDownLatch) {
this.countDownLatch = countDownLatch;
}
public void setPdfRenderer(PDFRenderer pdfRenderer) {
this.pdfRenderer = pdfRenderer;
}
public int getPageNum() {
return pageNum;
}
public void setPageNum(int pageNum) {
this.pageNum = pageNum;
}
}
}
如果需要指定main执行的话:
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.3</version>
<configuration>
<appendAssemblyId>false</appendAssemblyId>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
<mainClass>com.qzh.PdfTransfer</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>assembly</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
3. 说明
(1)核心线程数可以根据实际环境来调整,建议为cpu核数(加一或者减一)。
(2)出现其他问题请参考:https://www.jianshu.com/p/c85017f8577a
![](https://img.haomeiwen.com/i6332441/14c51d99f5bea418.png)
网友评论