Intro

Introduce how to use lm() as the regression function, hence describing the steps in making a new fitting function.

Data

Refer to description.

Initialize

Load the package:

library(onlineforecast)

The “building” data is included in the package. Its a data.list (see the vignette ??):

class(Dbuilding)
##     [1] "data.list"

Keep it in D and see the content:

D <- Dbuilding
D$tday <- make_tday(D$t, 1:36)
names(D)
##     [1] "t"             "heatload"      "heatloadtotal" "Ta.obs"       
##     [5] "I.obs"         "Ta"            "I"             "tday"

Just keep it as a vector y

D$y <- D$heatload

Take out data from a period for fitting the model

Dtrain <- subset(D, c("2011-01-08","2011-01-25"))
Dtrain$scoreperiod <- in_range("2011-01-15", Dtrain$t)

Define a very simple model and fit it

First carry out the input transformation step

Now we can define a simple model using only the ambient temperature NWPs as input

model <- forecastmodel$new()
model$output = "y"
model$add_inputs(Ta = "Ta",     # The ambient temperature
                 mu = "one()") # one() generates a matrix of ones (i.e. an intercept is included)

First transform the input series (more details later)

model$kseq <- 1:6
datatr <- model$transform_data(Dtrain)

Now the input data is almost ready for the regression model, it holds Ta and mu

names(datatr)
##     [1] "Ta" "mu"

They are both data.frames holding the regression inputs for each horizon k (i.e. a column for each k)

class(datatr$Ta)
##     [1] "data.frame"
names(datatr$Ta)
##      [1] "k1"  "k2"  "k3"  "k4"  "k5"  "k6"  "k7"  "k8"  "k9"  "k10" "k11" "k12"
##     [13] "k13" "k14" "k15" "k16" "k17" "k18" "k19" "k20" "k21" "k22" "k23" "k24"
##     [25] "k25" "k26" "k27" "k28" "k29" "k30" "k31" "k32" "k33" "k34" "k35" "k36"
head(datatr$Ta)
##           k1   k2   k3   k4   k5   k6   k7   k8   k9  k10  k11  k12  k13  k14  k15
##     577 2.15 1.59 1.65 1.94 2.53 3.09 3.46 3.75 4.29 5.06 5.77 6.37 6.61 6.63 6.79
##     578 2.28 1.65 1.94 2.53 3.09 3.46 3.75 4.29 5.06 5.77 6.37 6.61 6.63 6.79 7.07
##     579 2.46 1.94 2.53 3.09 3.46 3.75 4.29 5.06 5.77 6.37 6.61 6.63 6.79 7.07 7.25
##     580 2.70 2.47 3.25 3.51 3.79 4.54 5.35 6.06 6.70 7.04 7.30 7.54 7.66 7.77 7.71
##     581 2.90 3.25 3.51 3.79 4.54 5.35 6.06 6.70 7.04 7.30 7.54 7.66 7.77 7.71 6.84
##     582 3.33 3.51 3.79 4.54 5.35 6.06 6.70 7.04 7.30 7.54 7.66 7.77 7.71 6.84 5.50
##          k16  k17  k18  k19  k20  k21  k22   k23   k24   k25   k26   k27   k28
##     577 7.07 7.25 7.36 7.47 7.03 5.87 4.81 4.092 3.702 3.461 2.946 2.326 1.861
##     578 7.25 7.36 7.47 7.03 5.87 4.81 4.09 3.702 3.461 2.946 2.326 1.861 1.507
##     579 7.36 7.47 7.03 5.87 4.81 4.09 3.70 3.461 2.946 2.326 1.861 1.507 1.123
##     580 6.84 5.50 4.58 4.01 3.41 2.68 2.08 1.611 1.167 0.819 0.463 0.200 0.159
##     581 5.50 4.58 4.01 3.41 2.68 2.08 1.61 1.167 0.819 0.463 0.200 0.159 0.412
##     582 4.58 4.01 3.41 2.68 2.08 1.61 1.17 0.819 0.463 0.200 0.159 0.412 1.084
##           k29   k30   k31   k32  k33  k34  k35  k36
##     577 1.507 1.123 0.795 0.781 1.25 2.00 2.68 2.97
##     578 1.123 0.795 0.781 1.255 2.00 2.68 2.97 2.87
##     579 0.795 0.781 1.255 1.998 2.68 2.97 2.87 2.66
##     580 0.412 1.084 1.981 2.783 3.24 3.30 3.00 2.54
##     581 1.084 1.981 2.783 3.235 3.30 3.00 2.54 2.23
##     582 1.981 2.783 3.235 3.299 3.00 2.54 2.23 2.10
##
class(datatr$mu)
##     [1] "data.frame"
names(datatr$mu)
##     [1] "k1" "k2" "k3" "k4" "k5" "k6"
head(datatr$mu)
##       k1 k2 k3 k4 k5 k6
##     1  1  1  1  1  1  1
##     2  1  1  1  1  1  1
##     3  1  1  1  1  1  1
##     4  1  1  1  1  1  1
##     5  1  1  1  1  1  1
##     6  1  1  1  1  1  1

The regression stage

We now want to fit a regression model for a specific horizon k

k <- 6

Form the regressor matrix and lag the forecasts k steps to match the output observations

X <- as.data.frame(subset(datatr, kseq=k, lagforecasts = TRUE))
names(X)
##     [1] "Ta.k6" "mu.k6"

In order to use lm() we need to bind the model input and output data

X <- cbind(y = Dtrain[[model$output]], X)

Use lm() to fit a linear regression model, by first generating a formula with no intercept

frml <- pst("y ~ ", pst(names(X)[-1], collapse=" + "), " - 1")
frml
##     [1] "y ~ Ta.k6 + mu.k6 - 1"

and then fit the model

fit <- lm(frml, X)

Voila, now we can calculate the k step predictions

yhat <- predict.lm(fit, X)

And make a plot

plot(X$y)
lines(yhat)

Finally, calculate the RMSE score

rmse(X$y - yhat)
##     [1] 0.251

Fit a model using a low-pass filter on the ambient temperature

Now we can define a more advanced model which take into account the dynamics from the ambient temperature to the heat load using a first order low-pass filter

model <- forecastmodel$new()
model$output = "y"
model$add_inputs(Ta = "lp(Ta, a1=0.85)", # The ambient temperature through a low-pass filter
                 mu = "one()")

Each input in model is an object of class input, see the expression for input Ta

class(model$inputs)
##     [1] "list"
model$inputs$Ta$expr
##     [1] "lp(Ta, a1=0.85)"

The expression can be evaluated on the data, this is what happens in transform_data(). The expression will be evaluated using the data.list as the environment. So every variable in the data.list is available in the evaluation of the expression

tmp <- model$inputs$Ta$evaluate(D)
head(tmp[ ,1:5])
##             k1    k2    k3    k4    k5
##     [1,] -2.82 -3.20 -3.12 -3.09 -3.13
##     [2,] -2.84 -3.19 -3.11 -3.10 -3.14
##     [3,] -2.85 -3.18 -3.12 -3.11 -3.14
##     [4,] -2.86 -3.17 -3.11 -3.11 -3.14
##     [5,] -2.85 -3.15 -3.11 -3.11 -3.09
##     [6,] -2.84 -3.14 -3.11 -3.06 -2.98

Now evaluate the expression in each input with transform_data the input series

model$kseq <- 1:36
datatr <- model$transform_data(Dtrain)

Compare the input before and after low-pass filtering

plot(Dtrain$Ta$k6)
lines(datatr$Ta$k6)

The regression stage

We now want to fit a regression model for a specific horizon k

X <- as.data.frame(subset(datatr, kseq=k, lagforecasts = TRUE))
X <- cbind(y = Dtrain[[model$output]], X)
fit <- lm(frml, X)

Voila, now we can calculate the k step predictions

yhat <- predict.lm(fit, X)

And make a plot

plot(X$y)
lines(yhat)

Finally, calculate the RMSE score

rmse(X$y - yhat)
##     [1] 0.222

Hmm, not much better than before…

Tune the low-pass filter coefficient

The model has implemented a function for inserting a new value using the syntax: “inputname”__“parametername” = value, like

model$insert_prm(c(Ta__a1 = 0.99))
model$inputs$Ta$expr
##     [1] "lp(Ta, a1=0.99)"

Now when carrying out the transformation we can see that the Ta input is much more smoothed (higher filter coefficient (a1) value)

datatr <- model$transform_data(Dtrain)
plot(Dtrain$Ta$k1)
lines(datatr$Ta$k1)

Write a function to be passed to an optimizer

scorefun <- function(prm, model, k){
  model$reset_state()
  model$insert_prm(prm)
  datatr <- model$transform_data(Dtrain)
  X <- as.data.frame(subset(datatr, kseq=k, lagforecasts = TRUE))
  X <- cbind(y = Dtrain[[model$output]], X)
  frml <- pst("y ~ ", pst(names(X)[-1], collapse=" + "), " - 1")
  yhat <- predict(lm(frml, X), newdata = X)
  return(rmse(X$y - yhat))
}

Use it to check that the same rmse score is obtained

scorefun(c(Ta__a1 = 0.85), model, k)
##     [1] 0.227

Now optimize the low-pass filter coefficient

optim(c(Ta__a1 = 0.85), scorefun, model = model, k = k)
##     Warning in optim(c(Ta__a1 = 0.85), scorefun, model = model, k = k): one-dimensional optimization by Nelder-Mead is unreliable:
##     use "Brent" or optimize() directly
##     $par
##     Ta__a1 
##      0.938 
##     
##     $value
##     [1] 0.215
##     
##     $counts
##     function gradient 
##           26       NA 
##     
##     $convergence
##     [1] 0
##     
##     $message
##     NULL

It gets around 5% better.

Use the optimized value and see the prediction

prm <- c(Ta__a1 = 0.938)
model$reset_state()
model$insert_prm(prm)
datatr <- model$transform_data(Dtrain)
X <- as.data.frame(subset(datatr, kseq=k, lagforecasts = TRUE))
X <- cbind(y = Dtrain[[model$output]], X)
fit <- lm(frml, X)
yhat <- predict.lm(fit, X)
plot(X$y)
lines(yhat)