pandas批量处理excel表格

pandas 批量处理excel表格

import pandas as pd
import numpy as np

一.少量表格的操作

1.读取数据

data1=pd.read_excel("D:\Windows 10 Documents\Desktop\pd_excel\李大牛_业绩表.xlsx")
data2=pd.read_excel("D:\Windows 10 Documents\Desktop\pd_excel\刘小鼠_业绩表.xlsx")
data3=pd.read_excel("D:\Windows 10 Documents\Desktop\pd_excel\王小兔_业绩表.xlsx")
data4=pd.read_excel("D:\Windows 10 Documents\Desktop\pd_excel\张小虎_业绩表.xlsx")
data1
姓名销售产品销售金额销售时间
0李大牛海尔冰箱100002012-12-01
1李大牛海尔空调234502012-12-02
2李大牛联想电脑320002012-12-03
3李大牛华为手机650002012-12-04
4李大牛苹果手机80002012-12-05
5李大牛oppo手机40002012-12-06

2.合并数据

data=pd.concat([data1,data2,data3,data4])
data=pd.concat([data1,data2,data3,data4],ignore_index=True)

3.生成excel

data.to_excel("D:\Windows 10 Documents\Desktop\业绩表.xlsx")

4.计算数据

data.loc["销售总金额","销售金额"]=data["销售金额"].sum()
data
姓名销售产品销售金额销售时间
0李大牛海尔冰箱10000.02012-12-01
1李大牛海尔空调23450.02012-12-02
2李大牛联想电脑32000.02012-12-03
3李大牛华为手机65000.02012-12-04
4李大牛苹果手机8000.02012-12-05
5李大牛oppo手机4000.02012-12-06
6刘小鼠格力冰箱20000.02012-12-02
7刘小鼠格力空调33450.02012-12-02
8刘小鼠联想电脑42000.02012-12-03
9刘小鼠华为手机55000.02012-12-04
10刘小鼠苹果手机6000.02012-12-05
11刘小鼠oppo手机8000.02012-12-07
12王小兔美的冰箱30000.02012-12-02
13王小兔美的空调43450.02012-12-02
14王小兔小米电脑22000.02012-12-03
15王小兔华为手机35000.02012-12-08
16王小兔苹果手机2000.02012-12-09
17王小兔oppo手机10000.02012-12-10
18张小虎海信冰箱50100.02012-12-02
19张小虎海信空调43450.02012-12-02
20张小虎华为电脑52000.02012-12-03
21张小虎华为手机25000.02012-12-08
22张小虎苹果手机3000.02012-12-09
23张小虎oppo手机23400.02012-12-10
销售总金额NaNNaN646300.0NaT

二. 批量处理表格

1.批量生成excel表

import random
product=["海信空调","海尔冰箱","格力空调","海信冰箱","晶弘冰箱","苹果手机","联想电脑","苹果电脑","华为电脑","弘基电脑","小米手机","华为手机","OPPO手机","vivo手机"]
#生成100个excel文件
for i in range(100):
    data=pd.DataFrame()
    data["姓名"]=np.zeros(6)
    name = "王" + str(i)
    data["姓名"]=name
    data["销售产品"]=random.sample(product,6)
    data["销售金额"]=np.random.randint(10000,60000,size=6)
    data["时间"]=random.sample(pd.date_range(start="20200101", end="20200113", freq="D").tolist(),6)
    data.to_excel("D:\Windows 10 Documents\Desktop\pd_excel\{}_业绩表.xlsx".format(name))

2.批量操作excel表

#读取文件夹中的所有excel表格
import os
def file_name(file_dir):
    """个人销售总金额汇总"""
    total=pd.DataFrame(columns=["姓名","个人销售金额"])
    for dirpath, dirnames, filenames in os.walk(file_dir):
# print("dirpath",dirpath)
# print("dirnames",dirnames)
# print("filenames",filenames)
        count=0
        for i in filenames:
            file_path=file_dir+"\\"+i
# print(i) # 当前路径下所有非目录文件
            data=pd.read_excel(file_path)
            name=i[:3]
            total.loc[count]=[name,data["销售金额"].sum()]
            count+=1
# 
    return total
file_dir="D:\Windows 10 Documents\Desktop\pd_excel"
total=file_name(file_dir)
# total
total["个人销售金额"].sum()#求和
total["个人销售金额"].mean()#求均值
total["个人销售金额"].max()#求最大值
total["个人销售金额"].min()#求最小值
total.to_excel("D:\Windows 10 Documents\Desktop\总业绩表.xlsx")

批量处理excel表格 总结

1.找文件路径 #os模块
2.路径+文件名 拼接
3.读取数据 #read_excel()
4.操作 (例子中是进行100个销售人员,个人总额度的汇总)

python技术交流 1029280696

    原文作者:斗笠戴山头
    原文地址: https://blog.csdn.net/weixin_45080737/article/details/103981970
    本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系博主进行删除。
点赞