美文网首页
pdfjs 导出PDF所有图片

pdfjs 导出PDF所有图片

作者: 此昵称已被狗抢占 | 来源:发表于2020-08-24 21:08 被阅读0次
    var fs = require('fs');
    var path = require('path');
    var Canvas = require('canvas');
    
    var pdfjsLib = require('pdfjs-dist/es5/build/pdf.js');
    
    var pdfPath = '9.pdf';
    var data = new Uint8Array(fs.readFileSync(pdfPath));
    
    const loadingTask = pdfjsLib.getDocument({ data });
    
    async function extractImages() {
      let doc = await loadingTask.promise;
      let numPages = doc.numPages;
    
      for (var pageNum = 1; pageNum <= numPages; pageNum++) {
        try {
          let page = await doc.getPage(pageNum);
          let opList = await page.getOperatorList();
    
          for (var i = 0; i < opList.fnArray.length; i++) {
            if (
              opList.fnArray[i] == pdfjsLib.OPS.paintJpegXObject ||
              opList.fnArray[i] == pdfjsLib.OPS.paintImageXObject
            ) {
              var op = opList.argsArray[i][0];
              var img = page.objs.get(op);
    
              var scale = img.width / page.view[2];
              var viewport = page.getViewport({ scale: scale });
    
              var canvas = Canvas.createCanvas(img.width, img.height);
              var ctx = canvas.getContext('2d');
              var imageData = ctx.createImageData(img.width, img.height);
              var imageBytes = imageData.data;
    
              for (var j = 0, k = 0, jj = img.width * img.height * 4; j < jj; ) {
                imageBytes[j++] = img.data[k++];
                imageBytes[j++] = img.data[k++];
                imageBytes[j++] = img.data[k++];
                imageBytes[j++] = 255;
              }
    
              canvas.width = img.width;
              canvas.height = img.height;
              ctx.putImageData(imageData, 0, 0);
    
              var imageData = canvas.toBuffer('image/png');
              fs.writeFileSync(op + '.png', imageData);
            }
          }
        } catch (error) {
          console.error(error);
        }
      }
    }
    
    extractImages();
    
    

    相关文章

      网友评论

          本文标题:pdfjs 导出PDF所有图片

          本文链接:https://www.haomeiwen.com/subject/pwrojktx.html