遍历目录下的文件
- 调用glob
遍历指定目录下的所有文件和文件夹,不递归遍历,需要手动完成递归遍历功能。
import glob as gb
paths = gb.glob('D:\\文件夹\\*')
for path in paths:
print(path)
- 调用os.walk
遍历指定目录下的所有文件和文件夹,递归遍历,功能强大,推荐使用。
import os
for dirpath, dirnames, filenames in os.walk('d:\\2\\'):
for file in filenames:
fullpath = os.path.join(dirpath, file)
print(fullpath, file)
- DIY
遍历指定目录下的所有文件和文件夹,递归遍历,自主编写,扩展性强,可以学习练手。
import os;
files = list();
def DirAll(pathName):
if os.path.exists(pathName):
fileList = os.listdir(pathName);
for f in fileList:
if f=="$RECYCLE.BIN" or f=="System Volume Information":
continue;
f=os.path.join(pathName,f);
if os.path.isdir(f):
DirAll(f);
else:
dirName=os.path.dirname(f);
baseName=os.path.basename(f);
if dirName.endswith(os.sep):
files.append(dirName+baseName);
else:
files.append(dirName+os.sep+baseName);
DirAll("D:\\2\\");
for f in files:
print(f)
# print f.decode('gbk').encode('utf-8');
Word转换为PDF的库
Python中针对Word转换为PDF的库有:
- 仅能在Windows上运行
- win32com:通过Windows Com组件(win32com),调用Word服务(Word.Application),实现Word到PDF文件的转换。因此,要求该Python程序需要在有Word服务(可能至少要求2007版本)的Windows机器上运行。
from win32com.client import Dispatch
from os import walk
wdFormatPDF = 17
def doc2pdf(input_file):
word = Dispatch('Word.Application')
doc = word.Documents.Open(input_file)
doc.SaveAs(input_file.replace(".docx", ".pdf"), FileFormat=wdFormatPDF)
doc.Close()
word.Quit()
if __name__ == "__main__":
doc_files = []
directory = "C:\\Users\\xkw\\Desktop\\destData"
for root, dirs, filenames in walk(directory):
for file in filenames:
if file.endswith(".doc") or file.endswith(".docx"):
doc2pdf(str(root + "\\" + file))
- comtypes
import os
import sys
import re
import comtypes.client
wdFormatPDF = 17
def covx_to_pdf(infile, outfile):
"""Convert a Word .docx to PDF"""
print('making:',outfile)
word = comtypes.client.CreateObject('Word.Application')
doc = word.Documents.Open(infile)
doc.SaveAs(outfile)
doc.Close()
word.Quit()
total = 0
for root,dirs,files in os.walk('.'):
for filespath in files:
# print(filespath)
p = os.path.abspath( os.path.join(root, filespath) )
zhen = re.search(r'(\w+镇)|(\w+乡)', filespath)
cun = re.search(r'((\w+镇)|(\w+乡))(\w+村)', filespath)
if zhen and cun:
print(p, zhen.groups(), cun.groups())
zhen = zhen.group(1) or zhen.group(2)
cun = cun.group(4)
outp = os.path.abspath( os.path.join(root, 'output/'+zhen+'/'+cun+'.doc') )
folder, fn = os.path.split(outp)
if not os.path.exists(folder) :
os.makedirs(folder)
if p[-3:] == 'pdf':
total += 1
covx_to_pdf(p, outp)
print('共生成',total)
- mac用户
- docx2pdf:专门用于word转pdf
pip install docx2pdf
# 单个文件转换
from docx2pdf import convert
convert("input.docx", "output.pdf")
#查找当前目录下的全部word文件
import os
import glob
from pathlib import Path
path = os.getcwd() + '/'
p = Path(path) #初始化构造Path对象
FileList=list(p.glob("**/*.docx"))
#循环将该目录下的全部word一次性转换为PDF
for file in FileList:
convert(file,f"{file}.pdf")
参考连接
https://www.zhihu.com/people/valerie-98-45
https://blog.csdn.net/san1156/article/details/77885995
https://blog.csdn.net/kewei168/article/details/84574301
https://www.cnblogs.com/xiangnan/p/7040093.html