Pandas时刻数据:Timestamp
时刻数据代表时间点,是pandas的数据类型,是将值与时间点相关联的最基本类型的时间序列数据
pandas.Timestamp()
# pd.Timestamp()
import numpy as np
import pandas as pd
date1 = datetime.datetime(2016,12,1,12,45,30) # 创建一个datetime.datetime
date2 = '2017-12-21' # 创建一个字符串
t1 = pd.Timestamp(date1)
t2 = pd.Timestamp(date2)
print(t1,type(t1))
print(t2)
print(pd.Timestamp('2017-12-21 15:00:22'))
# 直接生成pandas的时刻数据 → 时间戳
# 数据类型为 pandas的Timestamp
#执行结果
2016-12-01 12:45:30 <class 'pandas._libs.tslibs.timestamps.Timestamp'>
2017-12-21 00:00:00
2017-12-21 15:00:22
# pd.to_datetime
from datetime import datetime
date1 = datetime(2016,12,1,12,45,30)
date2 = '2017-12-21'
t1 = pd.to_datetime(date1)
t2 = pd.to_datetime(date2)
print(t1,type(t1))
print(t2,type(t2))
# pd.to_datetime():如果是单个时间数据,转换成pandas的时刻数据,数据类型为Timestamp
lst_date = [ '2017-12-21', '2017-12-22', '2017-12-23']
t3 = pd.to_datetime(lst_date)
print(t3,'\n',type(t3))
# 多个时间数据,将会转换为pandas的DatetimeIndex
#执行结果
2016-12-01 12:45:30 <class 'pandas._libs.tslibs.timestamps.Timestamp'>
2017-12-21 00:00:00 <class 'pandas._libs.tslibs.timestamps.Timestamp'>
DatetimeIndex(['2017-12-21', '2017-12-22', '2017-12-23'], dtype='datetime64[ns]', freq=None)
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>
# pd.to_datetime → 多个时间数据转换时间戳索引
date1 = [datetime(2015,6,1),datetime(2015,7,1),datetime(2015,8,1),datetime(2015,9,1),datetime(2015,10,1)]
date2 = ['2017-2-1','2017-2-2','2017-2-3','2017-2-4','2017-2-5','2017-2-6']
print("1".center(40,'*'))
print(date1)
print(date2)
t1 = pd.to_datetime(date1)
t2 = pd.to_datetime(date2)
print("2".center(40,'*'))
print(t1)
print(t2)
# 多个时间数据转换为 DatetimeIndex
date3 = ['2017-2-1','2017-2-2','2017-2-3','hello world!','2017-2-5','2017-2-6']
t3 = pd.to_datetime(date3, errors = 'ignore')
print("3".center(40,'*'))
print(t3,type(t3))
# 当一组时间序列中夹杂其他格式数据,可用errors参数返回
# errors = 'ignore':不可解析时返回原始输入,这里就是直接生成一般数组
t4 = pd.to_datetime(date3, errors = 'coerce')
print("4".center(40,'*'))
print(t4,type(t4))
# errors = 'coerce':不可扩展,缺失值返回NaT(Not a Time),结果认为DatetimeIndex
# 执行结果
*******************1********************
[datetime.datetime(2015, 6, 1, 0, 0), datetime.datetime(2015, 7, 1, 0, 0), datetime.datetime(2015, 8, 1, 0, 0), datetime.datetime(2015, 9, 1, 0, 0), datetime.datetime(2015, 10, 1, 0, 0)]
['2017-2-1', '2017-2-2', '2017-2-3', '2017-2-4', '2017-2-5', '2017-2-6']
*******************2********************
DatetimeIndex(['2015-06-01', '2015-07-01', '2015-08-01', '2015-09-01',
'2015-10-01'],
dtype='datetime64[ns]', freq=None)
DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', '2017-02-04',
'2017-02-05', '2017-02-06'],
dtype='datetime64[ns]', freq=None)
*******************3********************
['2017-2-1' '2017-2-2' '2017-2-3' 'hello world!' '2017-2-5' '2017-2-6'] <class 'numpy.ndarray'>
*******************4********************
DatetimeIndex(['2017-02-01', '2017-02-02', '2017-02-03', 'NaT', '2017-02-05',
'2017-02-06'],
dtype='datetime64[ns]', freq=None) <class 'pandas.core.indexes.datetimes.DatetimeIndex'>
# asfreq:时期频率转换
ts = pd.Series(np.random.rand(4),
index = pd.date_range('20170101','20170104'))
print(ts)
print(ts.asfreq('5H',method = 'ffill'))
# 改变频率,这里是D改为4H
# method:插值模式,None不插值,ffill用之前值填充,bfill用之后值填充
#执行结果
2017-01-01 0.233770
2017-01-02 0.297308
2017-01-03 0.576970
2017-01-04 0.246338
Freq: D, dtype: float64
2017-01-01 00:00:00 0.233770
2017-01-01 05:00:00 0.233770
2017-01-01 10:00:00 0.233770
2017-01-01 15:00:00 0.233770
2017-01-01 20:00:00 0.233770
2017-01-02 01:00:00 0.297308
2017-01-02 06:00:00 0.297308
2017-01-02 11:00:00 0.297308
2017-01-02 16:00:00 0.297308
2017-01-02 21:00:00 0.297308
2017-01-03 02:00:00 0.576970
2017-01-03 07:00:00 0.576970
2017-01-03 12:00:00 0.576970
2017-01-03 17:00:00 0.576970
2017-01-03 22:00:00 0.576970
Freq: 5H, dtype: float64
# pd.date_range()-日期范围:超前/滞后数据
ts = pd.Series(np.random.rand(4),
index = pd.date_range('20170101','20170104'))
print("1".center(40,'*'))
print(ts)
print("2".center(40,'*'))
print(ts.shift(2))
print(ts.shift(-2))
# 正数:数值后移(滞后);负数:数值前移(超前)
per = ts/ts.shift(1) - 1
print("3".center(40,'*'))
print(per)
# 计算变化百分比,这里计算:该时间戳与上一个时间戳相比,变化百分比
print("4".center(40,'*'))
print(ts.shift(2, freq = 'D'))
print(ts.shift(2, freq = 'T'))
# 加上freq参数:对时间戳进行位移,而不是对数值进行位移
#执行结果
*******************1********************
2017-01-01 0.314173
2017-01-02 0.871328
2017-01-03 0.710915
2017-01-04 0.066829
Freq: D, dtype: float64
*******************2********************
2017-01-01 NaN
2017-01-02 NaN
2017-01-03 0.314173
2017-01-04 0.871328
Freq: D, dtype: float64
2017-01-01 0.710915
2017-01-02 0.066829
2017-01-03 NaN
2017-01-04 NaN
Freq: D, dtype: float64
*******************3********************
2017-01-01 NaN
2017-01-02 1.773404
2017-01-03 -0.184102
2017-01-04 -0.905996
Freq: D, dtype: float64
*******************4********************
2017-01-03 0.314173
2017-01-04 0.871328
2017-01-05 0.710915
2017-01-06 0.066829
Freq: D, dtype: float64
2017-01-01 00:02:00 0.314173
2017-01-02 00:02:00 0.871328
2017-01-03 00:02:00 0.710915
2017-01-04 00:02:00 0.066829
Freq: D, dtype: float64