python将word文件批量转成pdf

遍历目录下的文件

  • 调用glob
    遍历指定目录下的所有文件和文件夹,不递归遍历,需要手动完成递归遍历功能。
import glob as gb
paths = gb.glob('D:\\文件夹\\*')
for path in paths:
    print(path)
  • 调用os.walk
    遍历指定目录下的所有文件和文件夹,递归遍历,功能强大,推荐使用。
import os
for dirpath, dirnames, filenames in os.walk('d:\\2\\'):
    for file in filenames:
        fullpath = os.path.join(dirpath, file)
        print(fullpath, file)
  • DIY
    遍历指定目录下的所有文件和文件夹,递归遍历,自主编写,扩展性强,可以学习练手。
import os;  
files = list();  
def DirAll(pathName):  
    if os.path.exists(pathName):  
        fileList = os.listdir(pathName);  
        for f in fileList:  
            if f=="$RECYCLE.BIN" or f=="System Volume Information":  
                continue;  
            f=os.path.join(pathName,f);  
            if os.path.isdir(f):     
                DirAll(f);                  
            else:  
                dirName=os.path.dirname(f);  
                baseName=os.path.basename(f);  
                if dirName.endswith(os.sep):  
                    files.append(dirName+baseName);  
                else:  
                    files.append(dirName+os.sep+baseName);  

DirAll("D:\\2\\");  
for f in files:  
    print(f)
    # print f.decode('gbk').encode('utf-8'); 

Word转换为PDF的库

Python中针对Word转换为PDF的库有:

  • 仅能在Windows上运行
    • win32com:通过Windows Com组件(win32com),调用Word服务(Word.Application),实现Word到PDF文件的转换。因此,要求该Python程序需要在有Word服务(可能至少要求2007版本)的Windows机器上运行。
from win32com.client import Dispatch
from os import walk

wdFormatPDF = 17


def doc2pdf(input_file):
    word = Dispatch('Word.Application')
    doc = word.Documents.Open(input_file)
    doc.SaveAs(input_file.replace(".docx", ".pdf"), FileFormat=wdFormatPDF)
    doc.Close()
    word.Quit()


if __name__ == "__main__":
    doc_files = []
    directory = "C:\\Users\\xkw\\Desktop\\destData"
    for root, dirs, filenames in walk(directory):
        for file in filenames:
            if file.endswith(".doc") or file.endswith(".docx"):
                doc2pdf(str(root + "\\" + file))
  • comtypes
import os
import sys
import re

import comtypes.client

wdFormatPDF = 17

def covx_to_pdf(infile, outfile):
    """Convert a Word .docx to PDF"""
    print('making:',outfile)
    word = comtypes.client.CreateObject('Word.Application')
    doc = word.Documents.Open(infile)
    doc.SaveAs(outfile)
    doc.Close()
    word.Quit()

total = 0

for root,dirs,files in os.walk('.'):
    for filespath in files:
        # print(filespath)
        p = os.path.abspath( os.path.join(root, filespath) )
        zhen = re.search(r'(\w+镇)|(\w+乡)', filespath)
        cun = re.search(r'((\w+镇)|(\w+乡))(\w+村)', filespath)
        if zhen and cun:
            print(p, zhen.groups(), cun.groups())
            zhen = zhen.group(1) or zhen.group(2)
            cun = cun.group(4)
            outp = os.path.abspath( os.path.join(root, 'output/'+zhen+'/'+cun+'.doc') )
            folder, fn = os.path.split(outp)
            if not os.path.exists(folder) :
                os.makedirs(folder)
            if p[-3:] == 'pdf':
                total += 1
                covx_to_pdf(p, outp)

print('共生成',total)
  • mac用户
    • docx2pdf:专门用于word转pdf
pip install docx2pdf
# 单个文件转换
from docx2pdf import convert
convert("input.docx", "output.pdf")

#查找当前目录下的全部word文件
import os
import glob
from pathlib import Path

path = os.getcwd() + '/'
p = Path(path) #初始化构造Path对象
FileList=list(p.glob("**/*.docx")) 

 #循环将该目录下的全部word一次性转换为PDF
for file in FileList:
    convert(file,f"{file}.pdf")

参考连接
https://www.zhihu.com/people/valerie-98-45
https://blog.csdn.net/san1156/article/details/77885995
https://blog.csdn.net/kewei168/article/details/84574301
https://www.cnblogs.com/xiangnan/p/7040093.html

    原文作者:Valerie2020
    原文地址: https://blog.csdn.net/qq_42771083/article/details/107387640
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞