pdf.py
from PyPDF2 import PdfFileReader
def getTextPDF(pdfFileName):
pdf_file = open(pdfFileName,'rb')
read_pdf = PdfFileReader(pdf_file)
text = []
for i in range(0,read_pdf.getNumPages()-1):
text.append(read_pdf.getPage(i).extractText())
return '\n'.join(text)
TestPDFs.py
import pdf
pdfFile = '/Users/lilong/Desktop/1.pdf'
# pdfFileEncrypted = 'sonnets.pdf'
print("PDF 1:\n",pdf.getTextPDF(pdfFile))
网友评论