Intro

This vignette presents an example of using the onlineforecasting package for fitting an error model and calculating forecasts as carried out by Bacher et al. (2013).

See the Building heat load forecasting for more details on the model.

Fit the load forecast model:

library(onlineforecast)
D <- Dbuilding
# Don't bother with test period here
D$scoreperiod <- in_range("2010-12-20", D$t)
D$tday <- make_tday(D$t, 1:36)
# The heat load forecast model
model <- forecastmodel$new()
model$output = "heatload"
model$add_inputs(Ta = "lp(Ta, a1=0.9)", 
                 I = "lp(I, a1=0.7)", 
                 mu_tday = "fs(tday/24, nharmonics=10)",
                 mu = "one()")
model$add_regprm("rls_prm(lambda=0.9)")
model$add_prmbounds(Ta__a1 = c(0.8, 0.9, 0.9999),
                    I__a1 =  c(0.4, 0.8, 0.9999),
                    lambda = c(0.9, 0.99, 0.9999))
rls_optim(model, D, kseq=c(1,18))
##     $par
##     Ta__a1  I__a1 lambda 
##      0.938  0.808  0.995 
##     
##     $value
##     [1] 1.48
##     
##     $counts
##     function gradient 
##           22       22 
##     
##     $convergence
##     [1] 0
##     
##     $message
##     [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"
model$kseq <- 1
val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)
D$heatloadHat <- val$Yhat

Error model

Analyse the one-step prediction residuals

Take one-step forecast, lag and calculate residuals:

# Generate the one-step residuals
residualsk1 <- D$heatload - lagvec(D$heatloadHat$k1, 1)
# or the residual function give the same
tmp <- residuals(D$D$heatloadHat, D$heatload)
unique(tmp$k1 - residualsk1)
##     numeric(0)

# Plot to see 
plot(residualsk1)

acf(residualsk1, na.action = na.pass)

No ACF on one-step because of the big morning peak (shower) errors, but anyway build an error AR:

# The residuals (they are k0, i.e. the observed one-step error at time t)
# Put them in the data
D$residuals <- residualsk1

# Setup the model
model2 <- forecastmodel$new()
model2$output = "residuals"
model2$add_inputs(AR = "AR(0)")
model2$add_regprm("rls_prm(lambda=0.9)")
model2$add_prmbounds(lambda = c(0.9, 0.99, 0.9999))

# Optimize the parameters
rls_optim(model2, D, kseq=1)
##     $par
##     lambda 
##      0.987 
##     
##     $value
##     [1] 0.731
##     
##     $counts
##     function gradient 
##            7        7 
##     
##     $convergence
##     [1] 0
##     
##     $message
##     [1] "CONVERGENCE: REL_REDUCTION_OF_F <= FACTR*EPSMCH"

Now fit with the optimized parameters just fit on the entire period:

model2$kseq <- 1
val2 <- rls_fit(model2$prm, model2, D, returnanalysis = TRUE)
##     ----------------
##     lambda 
##      0.987

Plot the forecasts (Yhat adheres to the forecast matrix format and in plot_ts the forecasts are lagged k steps to sync with the observations):

D$residualsHat <- val2$Yhat
plot_ts(D, c("^residuals|^residualsHat"), c("2011-01-01","2011-02-01"), kseq = 1)

Is the score improved?

# The RMSE with no model
rmse(D$residuals[D$scoreperiod])
##     [1] 0.74
# RMSE on the residuals from the error model
rmse(residuals(D$residualsHat, D$residuals)$h1[D$scoreperiod])
##     [1] 0.731

The final forecast with the error model included

D$heatloadHat2 <- D$heatloadHat + D$residualsHat

plot_ts(D, c("^heatload$|^heatloadHat"), c("2011-01-20","2011-02-01"), kseq = 1)


rmse(residuals(D$heatloadHat2, D$heatload)$h1[D$scoreperiod])
##     [1] 0.731

Conclusion: AR model for errors are not really useful for a process with such high spikes!

In Bacher et al. (2013) the spikes were filtered away before applying the AR model, so there was a higher ACF on the residuals and thus the AR model on the residuals reduced the ACF and the RMSE on short horizons (1-4 hours).

Literature

Bacher, Peder, Henrik Madsen, Henrik Aalborg Nielsen, and Bengt Perers. 2013. “Short-Term Heat Load Forecasting for Single Family Houses.” Energy and Buildings 65 (0): 101–12. https://doi.org/http://dx.doi.org/10.1016/j.enbuild.2013.04.022.