实现的目标
批量修改word文档内容,此脚本直接修改文件,不是另存为,建议先copy再修改。多级目录结构对脚本没有影响,脚本会遍历”path”下所有目录以docx结尾文档。
用到的python模块
pip install python-docx
脚本内容
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import docx
import sys
import datetime
if sys.getdefaultencoding() != 'utf-8':
reload(sys)
sys.setdefaultencoding('utf-8')
#两个日志
def log(text):
err_log = path + u'\替换出错列表.txt'
with open( err_log,"a+" ) as f:
f.write(text)
f.write('\n')
def log2(text):
tlog = path + u'\替换文档列表.txt'
with open( tlog,"a+" ) as f:
f.write(text)
f.write('\n')
#替换函数(文档名称,替换字典)
def info_update(doc,replace_dict):
#替换文档中所有文字内容
for para in doc.paragraphs:
#print(para.runs)
for run in para.runs:
#从字典中对比替换内容
for key, value in replace_dict.items():
if key in run.text:
run.text = run.text.replace(key,value)
#替换文档中表格中的内容
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
#从字典中对比替换内容
for key, value in replace_dict.items():
if key in cell.text:
cell.text = cell.text.replace(key,value)
def main(replace_dict):
#遍历目录中的docx文档
for parent, dirnames, filenames in os.walk(path):
for fn in filenames:
filedir = os.path.join(parent, fn)
if fn.endswith('.docx'):
nowtime=datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
try:
#定义文档路径
doc = docx.Document(filedir)
#调用函数修改文档内容
info_update(doc,replace_dict)
#保存文档
doc.save(filedir)
#写入修改日志
log2(nowtime + ' ' + filedir + ' ----完成')
print(nowtime + ' ' + filedir + ' ----完成')
except Exception as e:
#写入修改失败日志
log(nowtime + ' ' + filedir+ ' ----失败')
if __name__ == '__main__':
#需替换文档所在目录,注意windows目录要写两个反斜杠
path = u'C:\\Users\\user\\Desktop\\1'
replace_dict = {
'大哥':'小弟',
'123':'345',
'come':'go'
}
main(replace_dict)
print('----全部替换完成----')