Python:将DataFrame数据由多列转为一行
1.原始数据
import pandas as pd
import numpy as np
a=pd.DataFrame([['2020-01-01 12:21:00',np.nan,2,np.nan,4,5,np.nan],['2020-01-02 16:23:03',11,22,np.nan,44,55,66],
['2020-01-04 17:22:23',np.nan,222,np.nan,np.nan,555,np.nan],['2020-01-06 19:45:43',0.1,0.2,0.3,0.4,0.5,np.nan],
['2020-01-07 13:23:45',101,202,np.nan,404,505,606],['2020-01-08 09:34:52',110,220,330,np.nan,550,np.nan],
['2020-01-09 10:33:23',1,2.5,3,3.5,5.5,np.nan]],columns=['time','a','b','c','d','e','f'])
a
2.补充为连续数据
#start
start = pd.to_datetime(a['Time_Day'].min())
#end
end = pd.to_datetime(a['Time_Day'].max())
dates = pd.date_range(start=start, end=end, freq='D')
#生成连续数据
data_continue=pd.DataFrame(index=range(len(dates)))#
data_continue['Time_Day']=dates
data_continue=pd.merge(data_continue,a,on='Time_Day',how='left')
data_continue
3.将数据由列转行
#将数据由行变更为列
#构造新的DataFrame的列
new_data_columns=[]
for i in range(len(data_continue)):
for column in data_continue.columns:
new_data_columns.append(str(i)+'_'+column)
print(new_data_columns)
#构建每一天对应的特征
data_columns=[]
for day_index in data_continue.index:
data_columns.extend(list(data_continue.loc[day_index,:]))
print(data_columns)
#构造新的dataframe
data_final_redefine_column=pd.DataFrame(columns=new_data_columns)
data_final_redefine_column.loc[0]=data_columns
data_final_redefine_column