Intro

This vignette presents an example of using the onlineforecast package for fitting a model for solar power forecasts.

Data

Data for the forecasting examples is taken from a data set collected in Sønderborg, Denmark.

First Load the package:

library(onlineforecast)

The “Dsolarpower” data is downloaded and loaded. It’s a data.list (see the vignette onlineforecasting.pdf):

path <- paste0(tempdir(),"/Dsolarpower.rda")
download.file("https://onlineforecasting.org/examples/data/Dsolarpower.rda", path)
load(path)
class(Dsolarpower)
##     [1] "data.list"

Keep it in D and see the content:

D <- Dsolarpower
names(D)
##     [1] "t"            "PVpower"      "I"            "Ta"           "I.obs"       
##     [6] "Ta.obs"       "cosAoi"       "sunElevation" "tday"

The solar power (in W) is kept in a data.frame. It was generated by transforming the observed global radiation (in ‘I.obs’) for PV panels with different angles of azimuth (m40 is pointing 40 deg. towards West, 0 is straight South, etc. They were all with tilt of 45 deg.):

head(D$PVpower)
##          azimuth.m40 azimuth.m20 azimuth.0 azimuth.20 azimuth.40
##     9505           0           0         0          0          0
##     9506           0           0         0          0          0
##     9507           0           0         0          0          0
##     9508           0           0         0          0          0
##     9509           0           0         0          0          0
##     9510           0           0         0          0          0

The Numerical Weather Predictions (NWPs) of global radiation

head(D$I[ ,1:9])
##          k1    k2     k3      k4      k5      k6      k7    k8    k9
##     9505  0 0.000  0.000   0.000   0.000   0.000   0.364  47.8 127.9
##     9506  0 0.000  0.000   0.000   0.000   0.364  47.778 127.9 181.0
##     9507  0 0.000  0.000   0.000   0.364  47.778 127.947 181.0 195.0
##     9508  0 0.000  0.000   0.373  42.316 113.756 145.173 152.6 134.4
##     9509  0 0.000  0.373  42.316 113.756 145.173 152.587 134.4  71.6
##     9510  0 0.373 42.316 113.756 145.173 152.587 134.382  71.6  18.2

Take the solar power (azimuth 20 degrees i.e. panel pointing slightly west) and keep it as a vector y:

D$y <- D$PVpower$azimuth.20

A time series plot, see “?plot_ts.data.list”:

plot_ts(D, c("^y","^I$"), kseq=c(1,12))

A shorter period and note how the forecasts I_kxx are lagged (12 step forecast is lagged 12 steps) such that they are all aligned in time:

plot_ts(D, c("^y|I.obs","^I$"), c("2010-07-01","2010-07-15"), kseq=c(1,12))

Take a subset for optimizing parameters on:

# Take a subset for opmizing the parameters
Dtrain <- subset(D, c("2010-01-01", "2010-04-01"))

Set in the training set the index of the values which will be evaluated (when calculating the score with indexes with ‘scoreperiod == FALSE’ are not included):

# In that set which points to include in score calculation (what is not included can be considered a burn-in period)
Dtrain$scoreperiod <- in_range("2010-02-01", Dtrain$t)

Dtrain$scoreperiod[Dtrain$y < 0.1] <- FALSE

Model

Initiate a model:

# Make a new forecasmodel objext
model <- forecastmodel$new()
# Set the name of the model output
model$output <- "y"

Define the transformations forming the inputs. For the solar power model base splines of the time of day multiplied with the global irradiance NWP is a good model:

# Define the input I, which is generated as presented later by the call to bspline
model$add_inputs(I = "bspline(tday, Boundary.knots = c(6,18), degree = 5, intercept=TRUE) %**% I")

Notice the use of the %\*\*% operator (see ?"%\*\*%" for info), which is needed since bspline returns a list of data.frames with the base splines:

# Test the bspline function
spl <- bspline(D$tday[1:24, 1:3], Boundary.knots = c(6,18), degree = 3, intercept=TRUE)
##     Warning in splines::bs(X[, i], Boundary.knots = Boundary.knots, degree =
##     degree, : some 'x' values beyond boundary knots may cause ill-conditioned bases

##     Warning in splines::bs(X[, i], Boundary.knots = Boundary.knots, degree =
##     degree, : some 'x' values beyond boundary knots may cause ill-conditioned bases

##     Warning in splines::bs(X[, i], Boundary.knots = Boundary.knots, degree =
##     degree, : some 'x' values beyond boundary knots may cause ill-conditioned bases
# See the base splines
plot(spl$bs1$k1, type="l", xlim=c(1,21))
lines(spl$bs2$k1, col=2)
lines(spl$bs3$k1, col=3)
lines(spl$bs4$k1, col=4)

Notice, that values outside of Boundary.knots lead to the warnings, but since the solar power is zero (or close to) outside these, and since it’s multiplied with the splines, then those values are not used in the model.

Add the off-line parameters used in the regression function. For recursive least squares the forgetting factor must be added (see ‘?rls’ and ‘?rls_prm’):

# The regression stage parameters
model$add_regprm("rls_prm(lambda=0.9)")
# Add the optimization parameter bounds
model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999))

First try to fit the model and return the value of the “scorefun()”, this must work before optimizing

model$kseq <- c(1,18)
# Get the parameters
prm <- model$get_prmbounds("init")
# Fit the model and get the RMSE
rls_fit(prm, model, Dtrain, scorefun = rmse, returnanalysis = FALSE)
##     [1] 1346

Optimize the parameters: first set the horizons to run and the

model$prm <- rls_optim(model, Dtrain, control=list(maxit=2))$par
model$prm
##     lambda 
##      0.992

Now fit with the optimized parameters on the entire period for all horizons

model$kseq <- 1:36
val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)

Analyse forecasts

Plot the forecasts (Yhat adheres to the forecast matrix format and in plot_ts the forecasts are lagged k steps to sync with the observations)

D$Yhat <- val$Yhat
plot_ts(D, c("^y|^Y"), c("2010-03-01","2010-03-15"), kseq = c(1,18))

plot_ts(D, c("^y|^Y"), c("2010-03-15","2010-03-20"), kseq = c(1,18))

Plot a forecast for a particular time point

i <- 1400
iseq <- i+model$kseq
plot(D$t[iseq], D$y[iseq], type = "b", xlab = "t", ylab = "y", main = pst("t = ", D$t[i]))
lines(D$t[iseq], D$Yhat[i, ], type = "b", col = 2)
legend("topright", c("Observations",pst("Predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2)

Make a small “movie” stepping through the forecasts

for(i in 1400:1900){
    iseq <- i+model$kseq
    plot(D$t[iseq], D$y[iseq], type = "b", xlab = "t", ylab = "y", main = pst("t = ", D$t[i]), ylim=c(0,4000))
    lines(D$t[iseq], D$Yhat[i, ], type = "b", col = 2)
    legend("topright", c("Observations",pst("Predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2)
    Sys.sleep(0.1)
}

Recursive update and prediction

First fit on a period

itrain <- which(in_range("2010-07-01",D$t,"2010-08-01"))
itest <- which(in_range("2010-08-01",D$t,"2010-08-05"))
Dfit <- subset(D, itrain)
rls_fit(model$prm, model, Dfit)
##     ----------------
##     lambda 
##      0.992

Now the fits are saved in the model object (its an R6 object, hence passed by reference to the functions and can be changed inside the functions). A list of fits with an entry for each horizon is in Lfits, see the two first

str(model$Lfits[1:2])
##     List of 2
##      $ k1:List of 4
##       ..$ k    : num 1
##       ..$ theta: num [1:6, 1] 3.528 9.15 -0.975 -1.475 10.384 ...
##       ..$ P    : num [1:6, 1:6] 3.24e-06 -2.79e-06 6.93e-07 8.54e-07 -7.43e-07 ...
##       .. ..- attr(*, "dimnames")=List of 2
##       .. .. ..$ : NULL
##       .. .. ..$ : chr [1:6] "I.bs1" "I.bs2" "I.bs3" "I.bs4" ...
##       ..$ yhat : num [1:48, 1] 0 0 0 -21.4 411.9 ...
##      $ k2:List of 4
##       ..$ k    : num 2
##       ..$ theta: num [1:6, 1] 3.5572 9.6501 -0.0983 -2.5046 10.3786 ...
##       ..$ P    : num [1:6, 1:6] 3.14e-06 -2.64e-06 5.76e-07 8.74e-07 -7.29e-07 ...
##       .. ..- attr(*, "dimnames")=List of 2
##       .. .. ..$ : NULL
##       .. .. ..$ : chr [1:6] "I.bs1" "I.bs2" "I.bs3" "I.bs4" ...
##       ..$ yhat : num [1:48, 1] 0 0 -29.2 125.7 547.8 ...

Now new data arrives, take the point right after the fit period

(i <- itrain[length(itrain)] + 1)
##     [1] 5089
Dnew <- subset(D, i)

First we need to transform the new data (This must only be done once for each new data, since some transform functions, e.g. lp(), actually keep states, see the detailed vignette onlineforecasting.pdf)

Dnew_transformed <- model$transform_data(Dnew)

Then we can update the parameters using the transformed data

rls_update(model, Dnew_transformed, Dnew[[model$output]])

Calculate predictions using the new data and the updated fits (rls coefficient estimates in model\(Lfits[[k]]\)theta)

yhat <- rls_predict(model, Dnew_transformed)

Plot to see that it fits the observations

iseq <- i+model$kseq
plot(D$t[iseq], D$y[iseq], type = "b", xlab = "t", ylab = "y")
lines(D$t[iseq], yhat, type = "b", col = 2)
legend("topright", c("observations",pst("predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2)

Run this for a longer period to verify that the same forecasts are obtained (in one go vs. iteratively)

First in one go

val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)
##     ----------------
##     lambda 
##      0.992
D$Yhat1 <- val$Yhat

and then iteratively

rls_fit(model$prm, model, subset(D, itrain))
##     ----------------
##     lambda 
##      0.992

D$Yhat2 <- data.frame(matrix(NA, nrow(D$Yhat1), ncol(D$Yhat1)))
names(D$Yhat2) <- names(D$Yhat1)
for(i in itest){
    print(i)
    Dnew <- subset(D, i)
    Dnewtr <- model$transform_data(Dnew)
    rls_update(model, Dnewtr, Dnew[[model$output]])
    D$Yhat2[i, ] <- as.numeric(rls_predict(model, Dnewtr))
}

Compare to see the difference between the one step forecasts

D$Yhat1$k1[itest] - D$Yhat2$k1[itest]
##      [1]  0.00000  0.00000  0.00000  0.58676  0.14980  0.24762  0.90428  2.21367
##      [9]  2.21697  0.69412 -0.72583 -1.92028 -2.11041 -2.00326 -0.98773 -0.34746
##     [17] -1.32677 -0.89317  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000
##     [25]  0.00000  0.00000  0.00000  0.34541  0.02355 -0.00373  0.85578  1.44080
##     [33]  0.93197  0.36628 -0.29434 -0.84970 -0.27691 -0.16698 -0.14822 -0.31159
##     [41] -0.38386 -0.20273  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000
##     [49]  0.00000  0.00000  0.00000  0.67913 -0.01621  0.60845  1.01005  1.44436
##     [57]  1.71871  0.83843  0.01285 -0.57329 -0.47918 -0.93496 -1.58311 -2.36884
##     [65] -1.66960 -1.79901  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000
##     [73]  0.00000  0.00000  0.00000  0.44044  0.04515 -0.04908  0.01248  0.43425
##     [81]  0.74141  0.64339 -0.61662 -1.57319 -0.70122 -0.21939 -0.31530 -1.05171
##     [89] -0.67606 -0.10380  0.00000  0.00000  0.00000  0.00000  0.00000  0.00000

The small differences are most likely caused by numerical issues: The fit on all data was done only in c++, whereas the iterative calculation was carried out switching between R and c++. Where and how this exactly happens is still to be investigated.

Improved splines


# Find the start and end of solar each day

# A minimum threshold
minthres <- 0.1
D$y[minthres < 0.1] <- 0

yday <- aslt(D$t-1)$yday
val <- lapply(split(D$y, yday), function(x){
    x[is.na(x)] <- 0
    i <- which(x > 0)
    if(length(i) > 0){
        iseq <- min(i):max(i)
        x[iseq] <- seq(0, 1, len=length(iseq))
        x[-iseq] <- NA
    }
    return(x)
})
D$tdaynormed <- unlist(val)
D$tdaynormed <- lagdf(make_input(D$tdaynormed, 1:36), "-k")

Dtrain <- subset(D, c("2010-01-01", "2010-04-01"))
Dtrain$scoreperiod <- in_range("2010-02-01", Dtrain$t)

D$ynormed <- D$y/max(D$y, na.rm=TRUE)
plot_ts(D, "tdaynormed|ynormed$", c("2010-04-01","2010-04-10"), kseq=c(1,4,12))


model$add_inputs(I = "bspline(tdaynormed, Boundary.knots = c(0,1), degree = 5) %**% I")


# Test the bspline function
spl <- bspline(D$tdaynormed[1:24, 1:3], Boundary.knots = c(0,1), degree = 3, intercept=TRUE)
# See the base splines
plot(spl$bs1$k1, type="l", xlim=c(1,21))
lines(spl$bs2$k1, col=2)
lines(spl$bs3$k1, col=3)
lines(spl$bs4$k1, col=4)

model$kseq <- c(1,18)
# Get the parameters
prm <- model$get_prmbounds("init")
# Fit the model and get the RMSE
rls_fit(prm, model, Dtrain, scorefun = rmse, returnanalysis = FALSE)
##     [1] 1423

Optimize the parameters: first set the horizons to run and the

model$prm <- rls_optim(model, Dtrain, control=list(maxit=2))$par
model$prm
##     lambda 
##      0.993

Now fit with the optimized parameters on the entire period for all horizons

model$kseq <- 1:36
val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)


D$Yhat2 <- val$Yhat
plot_ts(D, c("^y$|^Y"), c("2010-04-01","2010-04-15"), kseq = c(1,18))


plot_ts(D, c("^y$|^Y|I"), c("2010-04-01","2010-04-05"), kseq = c(1,18))