最近有相关需求就用Python做了一个PDF处理工具代码如下:
不想写代码的可以用打包好EXE
https://download.csdn.net/download/qq_36497454/53542250
# main.py
# Python3.x 导入方法
from window import MainWindow
main_window_class = MainWindow()
window = main_window_class.get_window()
# 进入消息循环
window.mainloop()
# MainWindow.py
# Python3.x 导入方法
import os
from tkinter import *
from tkinter import filedialog
from imageUtil import pyMuPDF_fitz
from wordUtil import PDFtoWord
class MainWindow:
pdf_path: object
def __init__(self):
# 创建窗口对象的背景色
self.pdf_path = None
self.dir_path = None
self.window = Tk()
# self.window.attributes('-topmost', True)
# 窗口名
self.window.title('PDF文件转化器')
# 几何位置
self.window.geometry('600x400+50+100')
# PDF文件标签
self.pdf_lable = Label(self.window, text="PDF:")
self.pdf_lable.place(x=100, y=25)
# 文件文本框
self.pdf_text = Text(self.window, bd=5, width=30, height=2)
self.pdf_text.place(x=180, y=20)
# 选择PDF文件按钮
self.pdf_button = Button(self.window, text="选择PDF", width=10, command=self.set_pdf)
self.pdf_button.place(x=440, y=20)
# 目录标签
self.dir_lable = Label(self.window, text="保存目录:")
self.dir_lable.place(x=100, y=75)
# 目录文本框
self.dir_text = Text(self.window, bd=5, width=30, height=2)
self.dir_text.place(x=180, y=70)
# 选择图片目录按钮
self.dir_button = Button(self.window, text="保存目录", width=10, command=self.set_dir)
self.dir_button.place(x=440, y=70)
# 转化按钮
self.change_image_button = Button(self.window, text="生成图片", width=10, command=self.pdf_image)
self.change_image_button.place(x=150, y=150)
self.change_word_button = Button(self.window, text="生成word", width=10, command=self.pdf_word)
self.change_word_button.place(x=450, y=150)
def get_window(self):
return self.window
def set_pdf(self):
# 获取所选文件
self.pdf_path = filedialog.askopenfilename()
self.pdf_text.delete(1.0, END)
self.pdf_text.insert(1.0, self.pdf_path)
def set_dir(self):
# 获取所选文件
self.dir_path = filedialog.askdirectory()
self.dir_text.delete(1.0, END)
self.dir_text.insert(1.0, self.dir_path)
def pdf_image(self):
pyMuPDF_fitz(self.pdf_path, self.dir_path)
def pdf_word(self):
file_name = os.path.basename(self.pdf_path).split(".")[0]
word_path = self.dir_path + '/' + file_name + ".doc"
PDFtoWord(self.pdf_path, word_path)
# PDFtoWord.py
from pdf2docx import Converter
# 读取PDF
def PDFtoWord(pdf_file, docx_file):
# convert pdf to docx
cv = Converter(pdf_file)
cv.convert(docx_file, start=0, end=None)
cv.close()
# pyMuPDF_fitz.py
import datetime
import os
import logging
import fitz # fitz就是pip install PyMuPDF
def pyMuPDF_fitz(pdf_path, image_path):
pdfDoc = fitz.open(pdf_path)
file_name = os.path.basename(pdf_path).split(".")[0]
for pg in range(pdfDoc.pageCount):
page = pdfDoc[pg]
rotate = int(0)
# 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。
# 此处若是不做设置,默认图片大小为:792X612, dpi=96
zoom_x = 1.33333333 # (1.33333333-->1056x816) (2-->1584x1224)
zoom_y = 1.33333333
mat = fitz.Matrix(zoom_x, zoom_y).preRotate(rotate)
pix = page.getPixmap(matrix=mat, alpha=False)
if not os.path.exists(image_path): # 判断存放图片的文件夹是否存在
os.makedirs(image_path) # 若图片文件夹不存在就创建
pix.writePNG(image_path + '/' + file_name+'_%s.jpg' % pg) # 将图片写入指定的文件夹内