美文网首页
PDFBox转图片小工具

PDFBox转图片小工具

作者: QZH_2019 | 来源:发表于2019-12-20 17:04 被阅读0次

1. 效果

(1)转换pdf所有页面:

# 运行
java -jar pdftransfer-1.0.jar D:\test\mypdflocation\文件.pdf
转化pdf所有页面.png
转化结果.png

(2)转化指定页面

# 运行,注意第二个参数,代表页面号,多个页面用空格隔开
java -jar pdftransfer-1.0.jar D:\test\mypdflocation\文件.pdf 5 6 7
转化指定页面.png image.png

2. 源码

JDK:
建议1.8

maven依赖:

<dependency>
       <groupId>com.levigo.jbig2</groupId>
       <artifactId>levigo-jbig2-imageio</artifactId>
       <version>2.0</version>
</dependency>
<dependency>
       <groupId>com.twelvemonkeys.imageio</groupId>
       <artifactId>imageio-jpeg</artifactId>
       <version>3.4.1</version>
</dependency>
<dependency>
       <groupId>org.apache.pdfbox</groupId>
       <artifactId>pdfbox</artifactId>
       <version>2.0.17</version>
</dependency>
<dependency>
       <groupId>com.github.jai-imageio</groupId>
       <artifactId>jai-imageio-core</artifactId>
       <version>1.4.0</version>
</dependency>
<dependency>
      <groupId>com.github.jai-imageio</groupId>
      <artifactId>jai-imageio-jpeg2000</artifactId>
      <version>1.3.0</version>
</dependency>

文件源码

package com.qzh;

import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.pdmodel.DefaultResourceCache;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.rendering.PDFRenderer;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.*;

/**
 * 参数1:路径
 * 参数2:指定页数
 *
 * @author qu.zh
 * @date 2019/12/11 17:24
 */
public class PdfTransfer {

    private static String FILE_NAME = "";

    /**
     * 文件输出路径
     */
    private static String FILE_OUTPUT_PATH = "d:/pdf/output/";

    /**
     * 队列
     */
    private volatile static ArrayBlockingQueue<DataEntity> queue = new ArrayBlockingQueue<DataEntity>(500);

    /**
     * CPU核心线程数
     */
    private static final int CPU_CORE = 7;

    /**
     * 默认DPI,可以用参数进行扩展
     */
    private static final int DEFAULT_DPI = 400;


    static {
        System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider");
    }

    public static void main(String[] args) {
        // pdf名称
        if (args == null || args.length == 0) {
            System.err.println("请传入路径");
            return;
        }
        String name = args[0];

        List<Integer> needTransferList = new ArrayList<>();
        if (args.length > 1) {
            for (int i = 1; i < args.length; i++) {
                needTransferList.add(Integer.valueOf(args[i]));
                System.out.println("打印页数:" + args[i]);
            }
        }

        FILE_NAME = name;

        if (FILE_NAME == null || "".equals(FILE_NAME)) {
            System.err.println("pdf名称不能为空");
            return;
        }


        ThreadPoolExecutor executor = new ThreadPoolExecutor(CPU_CORE, CPU_CORE,
                1200, TimeUnit.SECONDS, new SynchronousQueue<>(), new ThreadPoolExecutor.CallerRunsPolicy());
        // i7+16G+8核的机器
        // 20进程 62秒
        // 14进程 46秒
        // 10进程 44秒
        // 8进程  40秒
        // 6进程  42秒
        // 4线程  49秒
        // 趋近核数最快
        for (int i = 0; i < CPU_CORE; i++) {
            executor.submit(new MyTask(queue));
        }

        File file = new File(FILE_NAME);
        int end = file.getName().lastIndexOf(".");
        String folderName = file.getName().substring(0, end);
        File fileParent = new File(file.getParent());
        if (!fileParent.exists()) {
            file.mkdirs();
        }

        FILE_OUTPUT_PATH = FILE_OUTPUT_PATH + File.separator + folderName + File.separator;
        File output = new File(FILE_OUTPUT_PATH);
        if (!output.exists()) {
            output.mkdirs();
        }


        PDDocument pdDocument = null;
        try {
            Date startDate = new Date();
            System.out.println();
            pdDocument = PDDocument.load(new File(FILE_NAME));

            pdDocument.setResourceCache(new MyResourceCache());
            int pageCount = pdDocument.getNumberOfPages();

            PDFRenderer renderer = new PDFRenderer(pdDocument);
            CountDownLatch countDownLatch = new CountDownLatch(needTransferList.size() > 0 ? needTransferList.size() : pageCount);
            System.out.println("转换页数页数一共:" + needTransferList.size());
            if (needTransferList.size() > 0) {
                for (int i = 0; i < needTransferList.size(); i++) {
                    int curPage = needTransferList.get(i);
                    DataEntity dataEntity = new DataEntity();
                    dataEntity.setPageNum(curPage - 1);
                    dataEntity.setPdfRenderer(renderer);
                    dataEntity.setCountDownLatch(countDownLatch);
                    dataEntity.setPageCount(pageCount);
                    queue.put(dataEntity);
                }
            } else {
                for (int j = 0; j < pageCount; j++) {
                    DataEntity dataEntity = new DataEntity();
                    dataEntity.setPageNum(j);
                    dataEntity.setPdfRenderer(renderer);
                    dataEntity.setCountDownLatch(countDownLatch);
                    dataEntity.setPageCount(pageCount);
                    int imageCount = 0;

                    int fontCount = 0;
                    // 图片内容
                    PDPage page = pdDocument.getPage(j);
                    PDResources resources = page.getResources();
                    Iterable<COSName> cosNames = resources.getXObjectNames();
                    BufferedImage bufferedImage = null;
                    if (cosNames != null) {
                        Iterator<COSName> cosNamesIter = cosNames.iterator();
                        while (cosNamesIter.hasNext()) {
                            COSName cosName = cosNamesIter.next();
                            PDFont font = resources.getFont(cosName);
                            if (resources.isImageXObject(cosName)) {
                                imageCount++;
                                PDImageXObject Ipdmage = (PDImageXObject) resources.getXObject(cosName);
                                bufferedImage = Ipdmage.getImage();

                            }
                            if (font != null) {
                                fontCount++;
                            }
                        }
                    }
                   // 如果每一页只有一张图片,直接提出来就行,不然效率低
                    if (fontCount == 0 && imageCount == 1) {
                        String imageFileName = FILE_OUTPUT_PATH + "number_" + (j + 1) + "_page.png";
                        FileOutputStream out = new FileOutputStream(imageFileName);
                        try {
                            ImageIO.write(bufferedImage, "png", out);
                        } catch (IOException e) {
                        } finally {
                            try {
                                out.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    } else {
                        queue.put(dataEntity);
                    }
                }
            }

            countDownLatch.await(200, TimeUnit.SECONDS);
            System.out.println("执行完毕!!!!!!!!!");
            System.out.println("页数:" + pageCount);
            Date endDate = new Date();
            System.out.println("用时:" + (endDate.getTime() - startDate.getTime()) / 1000 + "秒");

        } catch (IOException e) {
            e.printStackTrace();
            System.err.println("IOException");
        } catch (InterruptedException e) {
            e.printStackTrace();
            System.err.println("InterruptedException");
        } catch (Throwable throwable) {
            throwable.printStackTrace();
        } finally {
            try {
                if (pdDocument != null) {
                    pdDocument.close();
                }
            } catch (IOException e) {
                e.printStackTrace();
                System.err.println("IOException");
            }
        }
        System.out.println("按ctrl+c结束");
    }

    /**
     * 缓存优化,官网说这个指定为空会禁止使用缓存
     */
    private static class MyResourceCache extends DefaultResourceCache {
        @Override
        public void put(COSObject indirect, PDXObject xobject) throws IOException {
            // super .put(indirect,xobject);
        }
    }

    /**
     * 任务
     */
    public static class MyTask implements Runnable {

        private ArrayBlockingQueue<DataEntity> queue;

        public MyTask(ArrayBlockingQueue queue) {
            this.queue = queue;
        }

        @Override
        public void run() {
            while (true) {
                DataEntity dataEntity = null;
                BufferedImage image = null;
                try {
                    // 取出任务
                    dataEntity = queue.take();
                    PDFRenderer renderer = dataEntity.getPdfRenderer();
                    int pageNum = dataEntity.getPageNum();
                    String imageFileName = FILE_OUTPUT_PATH + "第" + (pageNum + 1) + "页.png";
                    // 进行转图片
                    System.out.println("============第" + (pageNum + 1) + "页转换中============");

                    // renderer是不安全的,所以得加锁,虽然会影响性能
                    synchronized (renderer) {
                        image = renderer.renderImageWithDPI(pageNum, DEFAULT_DPI);
                    }

                    ImageIO.write(image, "png", new File(imageFileName));
                } catch (InterruptedException e) {
                    e.printStackTrace();
                    System.err.println("InterruptedException==========");
                } catch (IOException e) {
                    if (e instanceof EOFException) {
                        System.err.println("EOFException========");
                    } else {
                        System.err.println("IOException========");
                        e.printStackTrace();
                    }


                } catch (Exception throwable) {
                    System.out.println("=================Throwable==========================");
                    throwable.printStackTrace();
                } finally {
                    if (image != null) {
                        image.flush();
                    }

                    if (dataEntity != null) {
                        CountDownLatch countDownLatch = dataEntity.getCountDownLatch();
                        countDownLatch.countDown();
                    }
                }
            }
        }
    }

    /**
     * 任务实体
     */
    private static class DataEntity implements Serializable {
        public static final long serialVersionUID = -1;
        private PDFRenderer pdfRenderer;

        private int pageNum;

        private CountDownLatch countDownLatch;

        private int pageCount;

        public int getPageCount() {
            return pageCount;
        }

        public void setPageCount(int pageCount) {
            this.pageCount = pageCount;
        }

        public PDFRenderer getPdfRenderer() {
            return pdfRenderer;
        }

        public CountDownLatch getCountDownLatch() {
            return countDownLatch;
        }

        public void setCountDownLatch(CountDownLatch countDownLatch) {
            this.countDownLatch = countDownLatch;
        }

        public void setPdfRenderer(PDFRenderer pdfRenderer) {
            this.pdfRenderer = pdfRenderer;
        }

        public int getPageNum() {
            return pageNum;
        }

        public void setPageNum(int pageNum) {
            this.pageNum = pageNum;
        }
    }
}



如果需要指定main执行的话:

<build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-assembly-plugin</artifactId>
                <version>2.3</version>
                <configuration>
                    <appendAssemblyId>false</appendAssemblyId>
                    <descriptorRefs>
                        <descriptorRef>jar-with-dependencies</descriptorRef>
                    </descriptorRefs>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <classpathPrefix>lib/</classpathPrefix>
                            <mainClass>com.qzh.PdfTransfer</mainClass>
                        </manifest>
                    </archive>
                </configuration>
                <executions>
                    <execution>
                        <id>make-assembly</id>
                        <phase>package</phase>
                        <goals>
                            <goal>assembly</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>

3. 说明

(1)核心线程数可以根据实际环境来调整,建议为cpu核数(加一或者减一)。
(2)出现其他问题请参考:https://www.jianshu.com/p/c85017f8577a

image.png

相关文章

网友评论

      本文标题:PDFBox转图片小工具

      本文链接:https://www.haomeiwen.com/subject/whyinctx.html