import re
input_pdf = 'Ch05-2006.pdf'
output_base = input_pdf.replace('-2006','').split('.')[0]
with open(input_pdf,'rb') as f:
pdf = f.read()
jpg_pattern = re.compile(rb'\xff\xd8.*?\xff\xd9\x0a',re.DOTALL)
png_pattern = re.compile(rb'\x89\x50\x4e\x47.*?\xae\x42\x60\x82',re.DOTALL)
jpgs = jpg_pattern.findall(pdf)
pngs = png_pattern.findall(pdf)
jpgn = jpgs.__len__()
pngn = pngs.__len__()
print('Find {} jpg and {} png in {}'.format(jpgn,pngn,input_pdf))
if jpgn:
for i,jpg in enumerate(jpgs):
output_jpg = '{}-{}.jpg'.format(output_base,str(i + 1).zfill(3))
print(' Export {}'.format(output_jpg))
with open(output_jpg,'wb') as f:
f.write(jpg)
if pngn:
for i,pngn in enumerate(pngn):
output_png = '{}-{}.png'.format(output_base,str(i + 1).zfill(3))
print(' Export {}'.format(output_png))
with open(output_jpg,'wb') as f:
f.write(jpg)
网友评论