Python提取文本文档内容

网上查找资料,修修改改。能够提取出来关键字,数量

问题一:提取出来的关键字,对应文本文档路径要怎么实现。目前输出的是全部的文本路径

import re
from pathlib import Path

##读取文本内容
def main():
    txts = []
    for p in Path(import_path).rglob("*.txt"):
        #print(type(p))
        list_file = p.joinpath()
        route_file = str(list_file)
        #保存路径
        r_files.write(route_file + '\n')
        with list_file.open('rb') as f:
            lines = f.readlines()
            for line in lines:
                txts.append(line)
            f.close()
    txts = str(txts) ##转换数据类型
    ##提取手机号码
    number = re.findall(r'(?:13[0-9]|14[01456879]|15[0-35-9]|16[2567]|17[0-8]|18[0-9]|19[0-35-9])\d{8}',txts)   
    with open(save_path, 'w') as save_file:
        for num in number:
            save_file.write(num + '\n')
        save_file.write('\nTotal numbers: ' + str(len(number)))
        print('Number read OK, total number: ' + str(len(number)))
        save_file.close()

if __name__ == '__main__':
    import_path = r"D:\python\"
    save_path = input("请输入保存的路径: ")
    r_files = open('扫描目录.txt','w')
    main()

    原文作者:发黄的光
    原文地址: https://blog.csdn.net/black01dream/article/details/122128571
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞