pdf 文件是签名只读的,所以要先读出来然后在写出去
使用前先安装 pypdf2 中文的坑请参考 https://github.com/mstamy2/PyPDF2/pull/463
talk is cheap,show you the code
from PyPDF2 import PdfFileReader, PdfFileWriter
from multiprocessing import Process, Queue
import os,time
import getopt, sys,shutil
def update_metadata(pdf):
readFile = pdf['source']
writeFile = pdf['to']
# 获取一个 PdfFileReader 对象
pdfReader = PdfFileReader(open(readFile, 'rb'))
print(pdfReader.getDocumentInfo())
# 获取一个 PdfFileWriter 对象
pdfWriter = PdfFileWriter()
# 这里输入要修改的元信息,当然又可以在原来的信息里面加,我不会高级的设计模式,大牛改进后可以发给我
pdfWriter.addMetadata({'/Author':'youngboy','/Title':'youngboy','/Creator':'youngboy'})
# 将一个 PageObject 加入到 PdfFileWriter 中
pdfWriter.appendPagesFromReader(pdfReader)
# 输出到文件中
pdfWriter.write(open(writeFile, 'wb+'))
def long_time_task(q):
while not q.empty():
print("剩余任务"+str(q.qsize()))
v=q.get()
update_metadata(v)
def usage():
print("""
- r root 目录
- p 进程数(程池不会用所以这个参数没意义)
""")
if __name__=='__main__':
print(sys.argv[1:])
try:
opts, args = getopt.getopt(sys.argv[1:], "hr:p:")
except getopt.GetoptError as err:
# print help information and exit:
print(err)
usage()
sys.exit(2)
root = None
pnum = 3;
verbose = False
for o, a in opts:
if o == "-r":
root = a
elif o in ("-h", "--help"):
usage()
sys.exit()
elif o in ("-p", "--process"):
pnum = a
else:
assert False, "unhandled option"
q = Queue()
## 把任务装进队列
shutil.rmtree(root+'/dest')
for (r, dirs, files) in os.walk(root):
for f in files:
dd = r.replace(root,'')
to_path=root+'/dest'+dd+'/'+f
os.makedirs(root+'/dest'+dd,exist_ok=True)
print(dd+"--"+r+"--"+root)
if f.find('pdf')>-1:
q.put({
'source':r+'/'+f,
'to':to_path
})
print(q.qsize())
# 使用两个进程,进程池不会用所以就手动new
pw1 = Process(target=long_time_task, args=(q,))
pw1.start()
pw2 = Process(target=long_time_task, args=(q,))
pw2.start()
pw1.join()
pw2.join()
print('All subprocesses done.')
使用示例
python xx.py -r D:/pdf
网友评论