用R中prophet包做时序预测

      最近又接到一个预测项目需求,主要是预测每天投资用户会投资不同产品多少金额,属于每天即时预测,需要拿最近一年的数据做测试集,来预测每天不同期限产品分别被投资多少金额,然后通过这些金额每天找借款端匹配借款需求,借款端运营通过资金端需求来动态调整营销活动,通过资金需求多少来有效运营借款需求,形成与资金端的良性互动,节约闲置资金成本。

      就这样一个需求,如何实现?我首先想到通过业务经验的方法来实现,需要从N多产品中抽丝剥茧,找到产品、推广,促销,节日之间的关系,最终确定一个期限的产品当天成交预测值;但是从成交数据来看,这些数据都是每天在变化的,不能明显看出两者之间有明显的相关关系,用这些现有的数据关系能预测将来的成交吗?

      既然是预测问题,那我首先想到的是时序预测,那么问题就来了,如何程序化这些现有的数据关系?

      在网上找了一些相关模型,并没有模型来实现我所说的推广,促销,节日,加息,降息等业务时点与预测数据的关系,仔细查找,首先(注意还有后续,我会在下篇博文做出解释)发现了一个名叫prophet的时序预测包,里面可以用不同类型的holiday值来对模型进行业务时点的匹配和修正,非常适合目前的项目需求。不管三七二十一,先安装了再说。

      安装了之后需要初始化数据,初始化节假日,初始化模型,调整参数,衡量结果,具体代码如下:

library(prophet)

library(dplyr) #初始化数据 all<-read.csv(‘d:/Rdata/zjd/ts/all.csv’,na.string=’NA’,header=T)

alln<-read.csv(‘d:/Rdata/zjd/ts/alln.csv’,na.string=’NA’,header=T)

qb30<-read.csv(‘d:/Rdata/zjd/ts/qb30.csv’,na.string=’NA’,header=T)

qb45<-read.csv(‘d:/Rdata/zjd/ts/qb45.csv’,na.string=’NA’,header=T)

qb60<-read.csv(‘d:/Rdata/zjd/ts/qb60.csv’,na.string=’NA’,header=T)

qb90<-read.csv(‘d:/Rdata/zjd/ts/qb90.csv’,na.string=’NA’,header=T)

qb180<-read.csv(‘d:/Rdata/zjd/ts/qb180.csv’,na.string=’NA’,header=T)

qb270<-read.csv(‘d:/Rdata/zjd/ts/qb270.csv’,na.string=’NA’,header=T)

qb365<-read.csv(‘d:/Rdata/zjd/ts/qb365.csv’,na.string=’NA’,header=T)

qb730<-read.csv(‘d:/Rdata/zjd/ts/qb730.csv’,na.string=’NA’,header=T)

qb1095<-read.csv(‘d:/Rdata/zjd/ts/qb1095.csv’,na.string=’NA’,header=T)

qb1095m<-read.csv(‘d:/Rdata/zjd/ts/qb1095+.csv’,na.string=’NA’,header=T)

qbother<-read.csv(‘d:/Rdata/zjd/ts/qbother.csv’,na.string=’NA’,header=T)

#holiday<-read.csv(‘d:/Rdata/zjd/ts/holidays.csv’,na.string=’NA’,header=T)

historyalln <- data.frame(ds = seq(as.Date(‘2017-01-01’), as.Date(‘2017-09-17’), by = ‘d’), y = alln$yn)

historyalln <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = all$yn)

historyallo <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = all$yo)

history30n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb30$yn)

plot(history30n$ds,history30n$y)

history45n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb45$yn)

history60n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb60$yn)

history90n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb90$yn)

history180n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb180$yn)

history270n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb270$yn)

history365n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb365$yn)

history730n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb730$yn)

history1095n <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb1095$yn)

history1095m <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb1095m$yn)

historyothern <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qbother$yn)

history30o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb30$yo)

history45o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb45$yo)

history60o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb60$yo)

history90o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb90$yo)

history180o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb180$yo)

history270o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb270$yo)

history365o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb365$yo)

history730o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb730$yo)

history1095o <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb1095$yo)

history1095p <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qb1095m$yo)

historyothero <- data.frame(ds = seq(as.Date(‘2016-01-01’), as.Date(‘2017-09-11’), by = ‘d’), y = qbother$yo) #初始化节假日

holiday <- data_frame(

  holiday = ‘holiday’,

  ds = as.Date(c(  ‘2017-01-01′,’2017-01-27’,  ‘2017-04-02′,’2017-04-29’,

                   ‘2017-05-28’)),

  lower_window = 0,

  upper_window = 2

)

payday <- data_frame(

  holiday = ‘payday’,

  ds = as.Date(c(  ‘2016-01-26’,

    ‘2016-02-26′,’2016-03-26’,  ‘2016-04-26′,’2016-05-26’,

    ‘2016-06-26′,’2016-07-26’,  ‘2016-08-26′,’2016-09-26’,

    ‘2016-10-26′,’2016-11-26’,  ‘2016-12-26′,’2017-01-26’,

    ‘2017-02-26′,’2017-03-26’,  ‘2017-04-26′,’2017-05-26’,

    ‘2017-06-26′,’2017-07-26’,  ‘2017-08-26′,’2017-09-26’)),

  lower_window = 0,

  upper_window = 1

)

VIP <- data_frame(

  holiday = ‘VIP’,

  ds = as.Date(c(

    ‘2016-06-09′,’2016-07-09’,  ‘2016-08-09′,’2016-09-09’,

    ‘2016-10-09′,’2016-11-09’,  ‘2016-12-09′,’2017-01-09’,

    ‘2017-02-09′,’2017-03-09’,  ‘2017-04-09′,’2017-05-09’,

    ‘2017-06-09’, ‘2017-07-09’, ‘2017-08-09’,  ‘2017-09-09’)),

  lower_window = 0,

  upper_window = 1

)

raiserate <- data_frame(

  holiday = ‘raiserate’,

  ds = as.Date(c( ‘2017-05-08′,’2017-06-19’,  ‘2017-07-24’)),

  lower_window = 0,

  upper_window = 7

)

reducerate <- data_frame(

  holiday = ‘reducerate’,

  ds = as.Date(c(‘2016-06-20′,’2017-02-22’, ‘2017-03-27’, ‘2017-04-05’)),

  lower_window = 0,

  upper_window = 7

)

newcustomer <- data_frame(

  holiday = ‘newcustomer’,

  ds = as.Date(c(‘2016-01-01′,’2016-01-15′,’2016-07-18′,’2016-08-24’,

                 ‘2016-09-07′,’2016-12-09′,’2016-12-22′,’2017-01-06′,’2017-04-12’)),

  lower_window = 0,

  upper_window = 7

)

promotion <- data_frame(

  holiday = ‘promotion’,

  ds = as.Date(c(‘2016-07-07′,’2016-11-03′,’2017-01-10’, ‘2017-01-17’, ‘2017-02-10’)),

  lower_window = 0,

  upper_window = 7

)

holidays<-bind_rows(holiday,payday,VIP, raiserate,reducerate,newcustomer,promotion) #初始化模型 nall <- prophet(historyalln,

                holidays = holidays,

                seasonality.prior.scale=5,

                changepoint.prior.scale=0.9,

                holidays.prior.scale = 20,

                mcmc.samples = 500,

                interval.width=0.9,

                uncertainty.samples = 30)

oall <- prophet(historyallo,

                holidays = holidays,

                seasonality.prior.scale=5,

                changepoint.prior.scale=0.1,

                holidays.prior.scale = 1,

                mcmc.samples = 100,

                interval.width=0.9,

                uncertainty.samples = 100)

n30 <- prophet(history30n,

               holidays = holidays,

               seasonality.prior.scale=5,

               changepoint.prior.scale=0.1,

               holidays.prior.scale = 1,

               mcmc.samples = 100,

               interval.width=0.9,

               uncertainty.samples = 100)

n45 <- prophet(history45n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n60 <- prophet(history60n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n90 <- prophet(history90n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n180 <- prophet(history180n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n270 <- prophet(history270n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n365 <- prophet(history365n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n730 <- prophet(history730n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n1095 <- prophet(history1095n,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

n1095m <- prophet(history1095m,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

nother <- prophet(historyothern,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o30 <- prophet(history30o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o45 <- prophet(history45o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o60 <- prophet(history60o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o90 <- prophet(history90o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o180 <- prophet(history180o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o270 <- prophet(history270o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o365 <- prophet(history365o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o730 <- prophet(history730o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o1095 <- prophet(history1095o,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

o1095p <- prophet(history1095p,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500)

oother <- prophet(historyothero,holidays = holidays, holidays.prior.scale = 1, mcmc.samples = 500) #设定预测时间 futurealln <- make_future_dataframe(nall, periods = 7)

futureallo <- make_future_dataframe(oall, periods = 7)

future30n <- make_future_dataframe(n30, periods = 7)

future45n <- make_future_dataframe(n45, periods = 7)

future60n <- make_future_dataframe(n60, periods = 7)

future90n <- make_future_dataframe(n90, periods = 7)

future180n <- make_future_dataframe(n180, periods = 7)

future270n <- make_future_dataframe(n270, periods = 7)

future365n <- make_future_dataframe(n365, periods = 7)

future730n <- make_future_dataframe(n730, periods = 7)

future1095n <- make_future_dataframe(n1095, periods = 7)

future1095m <- make_future_dataframe(n1095m, periods = 7)

futureothern <- make_future_dataframe(nother, periods = 7)

future30o <- make_future_dataframe(o30, periods = 7)

future45o <- make_future_dataframe(o45, periods = 7)

future60o <- make_future_dataframe(o60, periods = 7)

future90o <- make_future_dataframe(o90, periods = 7)

future180o <- make_future_dataframe(o180, periods = 7)

future270o <- make_future_dataframe(o270, periods = 7)

future365o <- make_future_dataframe(o365, periods = 7)

future730o <- make_future_dataframe(o730, periods = 7)

future1095o <- make_future_dataframe(o1095, periods = 7)

future1095p <- make_future_dataframe(o1095p, periods = 7)

futureothero <- make_future_dataframe(oother, periods = 7)

tail(future30n)

#预测

forecastalln <- predict(nall, futurealln)

forecastallo <- predict(oall, futureallo)

forecast30n <- predict(n30, future30n)

forecast45n <- predict(n45, future45n)

forecast60n <- predict(n60, future60n)

forecast90n <- predict(n90, future90n)

forecast180n <- predict(n180, future180n)

forecast270n <- predict(n270, future270n)

forecast365n <- predict(n365, future365n)

forecast730n <- predict(n730, future730n)

forecast1095n <- predict(n1095, future1095n)

forecast1095m <- predict(n1095m, future1095m)

forecastothern <- predict(nother, futureothern)

forecast30o <- predict(o30, future30o)

forecast45o <- predict(o45, future45o)

forecast60o <- predict(o60, future60o)

forecast90o <- predict(o90, future90o)

forecast180o <- predict(o180, future180o)

forecast270o <- predict(o270, future270o)

forecast365o <- predict(o365, future365o)

forecast730o <- predict(o730, future730o)

forecast1095o <- predict(o1095, future1095o)

forecast1095p <- predict(o1095p, future1095p)

forecastothero <- predict(oother, futureothero)

# tail(forecast90n[c(‘ds’, ‘yhat’, ‘yhat_lower’, ‘yhat_upper’)]) forecastalln %>%

  select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%

  filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%

  tail(10) forecastallo %>%

  select(ds,payday,VIP, raiserate,reducerate,newcustomer,promotion) %>%

  filter(abs(payday+VIP+raiserate+reducerate+newcustomer+promotion)>0) %>%

  tail(10) #直线预测

plot(nall, forecastalln)

plot(n30, forecast30n)

plot(oall, forecastallo)

#趋势分解

prophet_plot_components(nall, forecastalln)

prophet_plot_components(oall, forecastallo) a<-cbind(futurealln[1],forecastalln$yhat,forecast30n$yhat,forecast45n$yhat,forecast60n$yhat,forecast90n$yhat,forecast180n$yhat,forecast270n$yhat,forecast365n$yhat,forecast730n$yhat,forecast1095n$yhat,forecast1095m$yhat,forecastothern$yhat)

b<-cbind(futureallo[1],forecastallo$yhat,forecast30o$yhat,forecast45o$yhat,forecast60o$yhat,forecast90o$yhat,forecast180o$yhat,forecast270o$yhat,forecast365o$yhat,forecast730o$yhat,forecast1095o$yhat,forecast1095p$yhat,forecastothero$yhat)

write.csv(a,file = “d:/Rdata/zjd/ts/qballn.csv”)

write.csv(b,file = “d:/Rdata/zjd/ts/qballo.csv”)

目前预测准确率总金额能较为准确的预测,调整好参数后总成交金额差值约为1%左右;不同产品期限成交金额预测会有10%上下的波动。 参考文献:http://blog.csdn.net/sinat_26917383/article/details/57419862 http://blog.csdn.net/justinjia91/article/details/62470076

点赞