import pandas
import numpy
# 通过传递一个 numpyarray,时间索引以及列标签来创建一个DataFrame:
dates = pandas.date_range("20180509", periods=6)
df = pandas.DataFrame(numpy.random.randn(6, 4), index=dates, columns=list('ABCD'))
print("时间索引以及列标签来创建一个DataFrame:", df, sep="\n")
# 查看DataFrame中头部和尾部的行
print("头部行: ", df.head(), sep="\n") # 不给定head()参数时,默认除最后一行都是头部
print("尾部行: ", df.tail(), sep="\n") # 不给定tail()参数时,默认除第一行都是尾部
print("头部行第一行: ", df.head(1), sep="\n")
print("尾部行最后一行: ", df.tail(1), sep="\n")
# 显示索引、列和底层的numpy数据
print("索引:", df.index, sep="\n")
print("列:", df.columns, sep="\n")
print("值:", df.values, sep="\n")
# describe()函数对于数据的快速统计汇总
print("数据统计:", df.describe(), sep="\n")
# 对数据的转置
print("对数据的转置: ", df.T, sep="\n")
# 按轴进行排序
print("按轴进行排序: ", df.sort_index(axis=0, ascending=False), sep="\n") # ascending 是否自增
# 按值进行排序
print("按值进行排序: ", df.sort_values(by='B'), sep="\n")
"E:\Python 3.6.2\python.exe" F:/PycharmProjects/test.py
时间索引以及列标签来创建一个DataFrame:
A B C D
2018-05-09 -1.900068 -0.208794 -0.523035 1.240455
2018-05-10 1.512279 -2.283494 0.608609 1.027053
2018-05-11 -3.320670 -0.260807 0.508715 0.662909
2018-05-12 0.338343 -1.735734 1.500790 -0.959845
2018-05-13 1.990765 0.214486 -1.244937 -0.258515
2018-05-14 -1.044454 0.360775 -0.657407 -0.593493
头部行:
A B C D
2018-05-09 -1.900068 -0.208794 -0.523035 1.240455
2018-05-10 1.512279 -2.283494 0.608609 1.027053
2018-05-11 -3.320670 -0.260807 0.508715 0.662909
2018-05-12 0.338343 -1.735734 1.500790 -0.959845
2018-05-13 1.990765 0.214486 -1.244937 -0.258515
尾部行:
A B C D
2018-05-10 1.512279 -2.283494 0.608609 1.027053
2018-05-11 -3.320670 -0.260807 0.508715 0.662909
2018-05-12 0.338343 -1.735734 1.500790 -0.959845
2018-05-13 1.990765 0.214486 -1.244937 -0.258515
2018-05-14 -1.044454 0.360775 -0.657407 -0.593493
头部行第一行:
A B C D
2018-05-09 -1.900068 -0.208794 -0.523035 1.240455
尾部行最后一行:
A B C D
2018-05-14 -1.044454 0.360775 -0.657407 -0.593493
索引:
DatetimeIndex(['2018-05-09', '2018-05-10', '2018-05-11', '2018-05-12',
'2018-05-13', '2018-05-14'],
dtype='datetime64[ns]', freq='D')
列:
Index(['A', 'B', 'C', 'D'], dtype='object')
值:
[[-1.90006837 -0.20879388 -0.52303491 1.24045481]
[ 1.51227925 -2.28349377 0.60860861 1.02705302]
[-3.32067045 -0.26080686 0.50871488 0.6629095 ]
[ 0.33834299 -1.73573353 1.5007895 -0.95984505]
[ 1.99076464 0.21448643 -1.24493715 -0.25851535]
[-1.04445367 0.36077537 -0.65740657 -0.59349347]]
数据统计:
A B C D
count 6.000000 6.000000 6.000000 6.000000
mean -0.403968 -0.652261 0.032122 0.186427
std 2.054919 1.091991 1.013014 0.912672
min -3.320670 -2.283494 -1.244937 -0.959845
25% -1.686165 -1.367002 -0.623814 -0.509749
50% -0.353055 -0.234800 -0.007160 0.202197
75% 1.218795 0.108666 0.583635 0.936017
max 1.990765 0.360775 1.500790 1.240455
对数据的转置:
2018-05-09 2018-05-10 2018-05-11 2018-05-12 2018-05-13 2018-05-14
A -1.900068 1.512279 -3.320670 0.338343 1.990765 -1.044454
B -0.208794 -2.283494 -0.260807 -1.735734 0.214486 0.360775
C -0.523035 0.608609 0.508715 1.500790 -1.244937 -0.657407
D 1.240455 1.027053 0.662909 -0.959845 -0.258515 -0.593493
按轴进行排序:
A B C D
2018-05-14 -1.044454 0.360775 -0.657407 -0.593493
2018-05-13 1.990765 0.214486 -1.244937 -0.258515
2018-05-12 0.338343 -1.735734 1.500790 -0.959845
2018-05-11 -3.320670 -0.260807 0.508715 0.662909
2018-05-10 1.512279 -2.283494 0.608609 1.027053
2018-05-09 -1.900068 -0.208794 -0.523035 1.240455
按值进行排序:
A B C D
2018-05-10 1.512279 -2.283494 0.608609 1.027053
2018-05-12 0.338343 -1.735734 1.500790 -0.959845
2018-05-11 -3.320670 -0.260807 0.508715 0.662909
2018-05-09 -1.900068 -0.208794 -0.523035 1.240455
2018-05-13 1.990765 0.214486 -1.244937 -0.258515
2018-05-14 -1.044454 0.360775 -0.657407 -0.593493
Process finished with exit code 0