import PyPDF2
import os
#传入文件夹名称,对该文件下所有文件进行相同ppt合并
def combine_PDF(folderName):
for root, dirs, files in os.walk(folderName):
for file in files:
pdffile = open(root +'\\'+file, 'rb') # 读取pdf文件
pdfreader = PyPDF2.PdfFileReader(pdffile) # 读入到
pdf_writer = PyPDF2.PdfFileWriter()
path = root +'\\result' #没有目录创建result输出目录
isExists = os.path.exists(path)
if not isExists:
os.makedirs(path)
pdfoutputfile = open(root +'\\result\\'+file, 'wb')
for pagenumin in range(pdfreader.numPages):
pagenumin2 = pagenumin + 1
if pagenumin2 >= pdfreader.numPages:
pdf_writer.addPage(pdfreader.getPage(pagenumin))
break
page0 = pdfreader.getPage(pagenumin)
page2 = pdfreader.getPage(pagenumin2)
str1 = page0.extractText() #获取相隔两页的PPT
str2 = (page2.extractText())
if str2.find(str1) == -1: #如果内容不同,则保存上一页的PPT
pdf_writer.addPage(page0)
pdf_writer.write(pdfoutputfile) #写入文件
pdfoutputfile.close() #关闭流
combine_PDF('file')
print("finish")