This vignette presents an example of using the onlineforecasting package for fitting an error model and calculating forecasts as carried out by Bacher et al. (2013).
See the Building heat load forecasting for more details on the model.
Fit the load forecast model:
library(onlineforecast)
D <- Dbuilding
# Don't bother with test period here
D$scoreperiod <- in_range("2010-12-20", D$t)
D$tday <- make_tday(D$t, 1:36)
# The heat load forecast model
model <- forecastmodel$new()
model$output = "heatload"
model$add_inputs(Ta = "lp(Ta, a1=0.9)",
I = "lp(I, a1=0.7)",
mu_tday = "fs(tday/24, nharmonics=10)",
mu = "one()")
model$add_regprm("rls_prm(lambda=0.9)")
model$add_prmbounds(Ta__a1 = c(0.8, 0.9, 0.9999),
I__a1 = c(0.4, 0.8, 0.9999),
lambda = c(0.9, 0.99, 0.9999))
rls_optim(model, D, kseq=c(1,18))
## $par
## Ta__a1 I__a1 lambda
## 0.938 0.808 0.995
##
## $value
## [1] 1.48
##
## $counts
## function gradient
## 22 22
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
model$kseq <- 1
val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)
D$heatloadHat <- val$Yhat
Analyse the one-step prediction residuals
Take one-step forecast, lag and calculate residuals:
# Generate the one-step residuals
residualsk1 <- D$heatload - lagvec(D$heatloadHat$k1, 1)
# or the residual function give the same
tmp <- residuals(D$D$heatloadHat, D$heatload)
unique(tmp$k1 - residualsk1)
## numeric(0)
# Plot to see
plot(residualsk1)
acf(residualsk1, na.action = na.pass)
No ACF on one-step because of the big morning peak (shower) errors, but anyway build an error AR:
# The residuals (they are k0, i.e. the observed one-step error at time t)
# Put them in the data
D$residuals <- residualsk1
# Setup the model
model2 <- forecastmodel$new()
model2$output = "residuals"
model2$add_inputs(AR = "AR(0)")
model2$add_regprm("rls_prm(lambda=0.9)")
model2$add_prmbounds(lambda = c(0.9, 0.99, 0.9999))
# Optimize the parameters
rls_optim(model2, D, kseq=1)
## $par
## lambda
## 0.987
##
## $value
## [1] 0.731
##
## $counts
## function gradient
## 7 7
##
## $convergence
## [1] 0
##
## $message
## [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
Now fit with the optimized parameters just fit on the entire period:
model2$kseq <- 1
val2 <- rls_fit(model2$prm, model2, D, returnanalysis = TRUE)
## ----------------
## lambda
## 0.987
Plot the forecasts (Yhat adheres to the forecast matrix format and in plot_ts the forecasts are lagged k steps to sync with the observations):
D$residualsHat <- val2$Yhat
plot_ts(D, c("^residuals|^residualsHat"), c("2011-01-01","2011-02-01"), kseq = 1)
Is the score improved?
# The RMSE with no model
rmse(D$residuals[D$scoreperiod])
## [1] 0.74
# RMSE on the residuals from the error model
rmse(residuals(D$residualsHat, D$residuals)$h1[D$scoreperiod])
## [1] 0.731
The final forecast with the error model included
D$heatloadHat2 <- D$heatloadHat + D$residualsHat
plot_ts(D, c("^heatload$|^heatloadHat"), c("2011-01-20","2011-02-01"), kseq = 1)
rmse(residuals(D$heatloadHat2, D$heatload)$h1[D$scoreperiod])
## [1] 0.731
Conclusion: AR model for errors are not really useful for a process with such high spikes!
In Bacher et al. (2013) the spikes were filtered away before applying the AR model, so there was a higher ACF on the residuals and thus the AR model on the residuals reduced the ACF and the RMSE on short horizons (1-4 hours).