美文网首页
python自动化--提取pdf中的图片

python自动化--提取pdf中的图片

作者: 新苡米 | 来源:发表于2023-04-17 18:30 被阅读0次
    import fitz
    import re
    import os
    from addres_file import file_name
    import time
    
    
    def pdf2image1(path, pic_path, image_name):
        try:
            checkIM = r"/Subtype(?= */Image)"
            pdf = fitz.open(path)
            lenXREF = pdf.xref_length()
            count = 1
            for i in range(1, lenXREF):
                text = pdf.xref_object(i)
                isImage = re.search(checkIM, text)
                if not isImage:
                    continue
                pix = fitz.Pixmap(pdf, i)
                # new_name = f"img_{count}.png"
                new_name = '%s_img_%d.png' % (image_name, count)
                pix.save(os.path.join(pic_path, new_name))
                count += 1
                pix = None
        except Exception as error:
            print(error)
    
    
    if __name__ == '__main__':
        name_list = file_name()
        start = time.time()
        for name in name_list:
            image_name = name
            file_path = r'E:\Python\提取图片pdf\1过程审核系统(操作手册)1.pdf'# PDF 文件路径
            dir_path = r'E:\Python\提取图片pdf'# 存放图片的文件夹
            # os.makedirs(dir_path)  # 根据路径,创建对应路径下的文件夹
            pdf2image1(file_path, dir_path, image_name)
        end = time.time()
        print('task is over: %.2f' % (end - start))
    

    相关文章

      网友评论

          本文标题:python自动化--提取pdf中的图片

          本文链接:https://www.haomeiwen.com/subject/uspcjdtx.html