This vignette presents an example of using the onlineforecast package for fitting a model for solar power forecasts.
Data for the forecasting examples is taken from a data set collected in Sønderborg, Denmark.
First Load the package:
library(onlineforecast)
The “Dsolarpower” data is downloaded and loaded. It’s a data.list (see the vignette onlineforecasting.pdf):
path <- paste0(tempdir(),"/Dsolarpower.rda")
download.file("https://onlineforecasting.org/examples/data/Dsolarpower.rda", path)
load(path)
class(Dsolarpower)
## [1] "data.list"
Keep it in D
and see the content:
D <- Dsolarpower
names(D)
## [1] "t" "PVpower" "I" "Ta" "I.obs" "Ta.obs" "cosAoi"
## [8] "sunElevation" "tday"
The solar power (in W) is kept in a data.frame. It was generated by transforming the observed global radiation (in ‘I.obs’) for PV panels with different angles of azimuth (m40 is pointing 40 deg. towards West, 0 is straight South, etc. They were all with tilt of 45 deg.):
head(D$PVpower)
## azimuth.m40 azimuth.m20 azimuth.0 azimuth.20 azimuth.40
## 9505 0 0 0 0 0
## 9506 0 0 0 0 0
## 9507 0 0 0 0 0
## 9508 0 0 0 0 0
## 9509 0 0 0 0 0
## 9510 0 0 0 0 0
The Numerical Weather Predictions (NWPs) of global radiation
head(D$I[ ,1:9])
## k1 k2 k3 k4 k5 k6 k7 k8 k9
## 9505 0 0.000 0.000 0.000 0.000 0.000 0.364 47.8 127.9
## 9506 0 0.000 0.000 0.000 0.000 0.364 47.778 127.9 181.0
## 9507 0 0.000 0.000 0.000 0.364 47.778 127.947 181.0 195.0
## 9508 0 0.000 0.000 0.373 42.316 113.756 145.173 152.6 134.4
## 9509 0 0.000 0.373 42.316 113.756 145.173 152.587 134.4 71.6
## 9510 0 0.373 42.316 113.756 145.173 152.587 134.382 71.6 18.2
Take the solar power (azimuth 20 degrees i.e. panel pointing slightly
west) and keep it as a vector y
:
D$y <- D$PVpower$azimuth.20
A time series plot, see “?plot_ts.data.list”:
plot_ts(D, c("^y","^I$"), kseq=c(1,12))
A shorter period and note how the forecasts I_kxx
are
lagged (12 step forecast is lagged 12 steps) such that they are all
aligned in time:
plot_ts(D, c("^y|I.obs","^I$"), c("2010-07-01","2010-07-15"), kseq=c(1,12))
Take a subset for optimizing parameters on:
# Take a subset for opmizing the parameters
Dtrain <- subset(D, c("2010-01-01", "2010-04-01"))
Set in the training set the index of the values which will be evaluated (when calculating the score with indexes with ‘scoreperiod == FALSE’ are not included):
# In that set which points to include in score calculation (what is not included can be considered a burn-in period)
Dtrain$scoreperiod <- in_range("2010-02-01", Dtrain$t)
Dtrain$scoreperiod[Dtrain$y < 0.1] <- FALSE
Initiate a model:
# Make a new forecasmodel objext
model <- forecastmodel$new()
# Set the name of the model output
model$output <- "y"
Define the transformations forming the inputs. For the solar power model base splines of the time of day multiplied with the global irradiance NWP is a good model:
# Define the input I, which is generated as presented later by the call to bspline
model$add_inputs(I = "bspline(tday, Boundary.knots = c(6,18), degree = 5, intercept=TRUE) %**% I")
Notice the use of the %\*\*%
operator (see
?"%\*\*%"
for info), which is needed since
bspline
returns a list of data.frames with the base
splines:
# Test the bspline function
spl <- bspline(D$tday[1:24, 1:3], Boundary.knots = c(6,18), degree = 3, intercept=TRUE)
## Warning in splines::bs(X[, i], Boundary.knots = Boundary.knots, degree = degree, : some 'x' values beyond boundary
## knots may cause ill-conditioned bases
## Warning in splines::bs(X[, i], Boundary.knots = Boundary.knots, degree = degree, : some 'x' values beyond boundary
## knots may cause ill-conditioned bases
## Warning in splines::bs(X[, i], Boundary.knots = Boundary.knots, degree = degree, : some 'x' values beyond boundary
## knots may cause ill-conditioned bases
# See the base splines
plot(spl$bs1$k1, type="l", xlim=c(1,21))
lines(spl$bs2$k1, col=2)
lines(spl$bs3$k1, col=3)
lines(spl$bs4$k1, col=4)
Notice, that values outside of Boundary.knots
lead to the
warnings, but since the solar power is zero (or close to) outside these,
and since it’s multiplied with the splines, then those values are not
used in the model.
Add the off-line parameters used in the regression function. For recursive least squares the forgetting factor must be added (see ‘?rls’ and ‘?rls_prm’):
# The regression stage parameters
model$add_regprm("rls_prm(lambda=0.9)")
# Add the optimization parameter bounds
model$add_prmbounds(lambda = c(0.9, 0.99, 0.9999))
First try to fit the model and return the value of the “scorefun()”, this must work before optimizing
model$kseq <- c(1,18)
# Get the parameters
prm <- model$get_prmbounds("init")
# Fit the model and get the RMSE
rls_fit(prm, model, Dtrain, scorefun = rmse, returnanalysis = FALSE)
## [1] 1346
Optimize the parameters: first set the horizons to run and the
model$prm <- rls_optim(model, Dtrain, control=list(maxit=2))$par
model$prm
## lambda
## 0.992
Now fit with the optimized parameters on the entire period for all horizons
model$kseq <- 1:36
val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)
Plot the forecasts (Yhat adheres to the forecast matrix format and in plot_ts the forecasts are lagged k steps to sync with the observations)
D$Yhat <- val$Yhat
plot_ts(D, c("^y|^Y"), c("2010-03-01","2010-03-15"), kseq = c(1,18))
plot_ts(D, c("^y|^Y"), c("2010-03-15","2010-03-20"), kseq = c(1,18))
Plot a forecast for a particular time point
i <- 1400
iseq <- i+model$kseq
plot(D$t[iseq], D$y[iseq], type = "b", xlab = "t", ylab = "y", main = pst("t = ", D$t[i]))
lines(D$t[iseq], D$Yhat[i, ], type = "b", col = 2)
legend("topright", c("Observations",pst("Predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2)
Make a small “movie” stepping through the forecasts
for(i in 1400:1900){
iseq <- i+model$kseq
plot(D$t[iseq], D$y[iseq], type = "b", xlab = "t", ylab = "y", main = pst("t = ", D$t[i]), ylim=c(0,4000))
lines(D$t[iseq], D$Yhat[i, ], type = "b", col = 2)
legend("topright", c("Observations",pst("Predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2)
Sys.sleep(0.1)
}
First fit on a period
itrain <- which(in_range("2010-07-01",D$t,"2010-08-01"))
itest <- which(in_range("2010-08-01",D$t,"2010-08-05"))
Dfit <- subset(D, itrain)
rls_fit(model$prm, model, Dfit)
## ----------------
## lambda
## 0.992
Now the fits are saved in the model object (its an R6 object, hence passed by reference to the functions and can be changed inside the functions). A list of fits with an entry for each horizon is in Lfits, see the two first
str(model$Lfits[1:2])
## List of 2
## $ k1:List of 4
## ..$ k : num 1
## ..$ theta: num [1:6, 1] 3.528 9.15 -0.975 -1.475 10.384 ...
## ..$ P : num [1:6, 1:6] 3.24e-06 -2.79e-06 6.93e-07 8.54e-07 -7.43e-07 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:6] "I.bs1" "I.bs2" "I.bs3" "I.bs4" ...
## ..$ yhat : num [1:48, 1] 0 0 0 -21.4 411.9 ...
## $ k2:List of 4
## ..$ k : num 2
## ..$ theta: num [1:6, 1] 3.5572 9.6501 -0.0983 -2.5046 10.3786 ...
## ..$ P : num [1:6, 1:6] 3.14e-06 -2.64e-06 5.76e-07 8.74e-07 -7.29e-07 ...
## .. ..- attr(*, "dimnames")=List of 2
## .. .. ..$ : NULL
## .. .. ..$ : chr [1:6] "I.bs1" "I.bs2" "I.bs3" "I.bs4" ...
## ..$ yhat : num [1:48, 1] 0 0 -29.2 125.7 547.8 ...
Now new data arrives, take the point right after the fit period
(i <- itrain[length(itrain)] + 1)
## [1] 5089
Dnew <- subset(D, i)
First we need to transform the new data (This must only be done once for each new data, since some transform functions, e.g. lp(), actually keep states, see the detailed vignette onlineforecasting.pdf)
Dnew_transformed <- model$transform_data(Dnew)
Then we can update the parameters using the transformed data
rls_update(model, Dnew_transformed, Dnew[[model$output]])
Calculate predictions using the new data and the updated fits (rls coefficient estimates in model\(Lfits[[k]]\)theta)
yhat <- rls_predict(model, Dnew_transformed)
Plot to see that it fits the observations
iseq <- i+model$kseq
plot(D$t[iseq], D$y[iseq], type = "b", xlab = "t", ylab = "y")
lines(D$t[iseq], yhat, type = "b", col = 2)
legend("topright", c("observations",pst("predictions (",min(model$kseq)," to ",max(model$kseq)," steps ahead)")), lty = 1, col = 1:2)
Run this for a longer period to verify that the same forecasts are obtained (in one go vs. iteratively)
First in one go
val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)
## ----------------
## lambda
## 0.992
D$Yhat1 <- val$Yhat
and then iteratively
rls_fit(model$prm, model, subset(D, itrain))
D$Yhat2 <- data.frame(matrix(NA, nrow(D$Yhat1), ncol(D$Yhat1)))
names(D$Yhat2) <- names(D$Yhat1)
for(i in itest){
print(i)
Dnew <- subset(D, i)
Dnewtr <- model$transform_data(Dnew)
rls_update(model, Dnewtr, Dnew[[model$output]])
D$Yhat2[i, ] <- as.numeric(rls_predict(model, Dnewtr))
}
## [1] 5089
## [1] 5090
## [1] 5091
## [1] 5092
## [1] 5093
## [1] 5094
## [1] 5095
## [1] 5096
## [1] 5097
## [1] 5098
## [1] 5099
## [1] 5100
## [1] 5101
## [1] 5102
## [1] 5103
## [1] 5104
## [1] 5105
## [1] 5106
## [1] 5107
## [1] 5108
## [1] 5109
## [1] 5110
## [1] 5111
## [1] 5112
## [1] 5113
## [1] 5114
## [1] 5115
## [1] 5116
## [1] 5117
## [1] 5118
## [1] 5119
## [1] 5120
## [1] 5121
## [1] 5122
## [1] 5123
## [1] 5124
## [1] 5125
## [1] 5126
## [1] 5127
## [1] 5128
## [1] 5129
## [1] 5130
## [1] 5131
## [1] 5132
## [1] 5133
## [1] 5134
## [1] 5135
## [1] 5136
## [1] 5137
## [1] 5138
## [1] 5139
## [1] 5140
## [1] 5141
## [1] 5142
## [1] 5143
## [1] 5144
## [1] 5145
## [1] 5146
## [1] 5147
## [1] 5148
## [1] 5149
## [1] 5150
## [1] 5151
## [1] 5152
## [1] 5153
## [1] 5154
## [1] 5155
## [1] 5156
## [1] 5157
## [1] 5158
## [1] 5159
## [1] 5160
## [1] 5161
## [1] 5162
## [1] 5163
## [1] 5164
## [1] 5165
## [1] 5166
## [1] 5167
## [1] 5168
## [1] 5169
## [1] 5170
## [1] 5171
## [1] 5172
## [1] 5173
## [1] 5174
## [1] 5175
## [1] 5176
## [1] 5177
## [1] 5178
## [1] 5179
## [1] 5180
## [1] 5181
## [1] 5182
## [1] 5183
## [1] 5184
Compare to see the difference between the one step forecasts
D$Yhat1$k1[itest] - D$Yhat2$k1[itest]
## [1] 0.00000 0.00000 0.00000 0.58676 0.14980 0.24762 0.90428 2.21367 2.21697 0.69412 -0.72583 -1.92028
## [13] -2.11041 -2.00326 -0.98773 -0.34746 -1.32677 -0.89317 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
## [25] 0.00000 0.00000 0.00000 0.34541 0.02355 -0.00373 0.85578 1.44080 0.93197 0.36628 -0.29434 -0.84970
## [37] -0.27691 -0.16698 -0.14822 -0.31159 -0.38386 -0.20273 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
## [49] 0.00000 0.00000 0.00000 0.67913 -0.01621 0.60845 1.01005 1.44436 1.71871 0.83843 0.01285 -0.57329
## [61] -0.47918 -0.93496 -1.58311 -2.36884 -1.66960 -1.79901 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
## [73] 0.00000 0.00000 0.00000 0.44044 0.04515 -0.04908 0.01248 0.43425 0.74141 0.64339 -0.61662 -1.57319
## [85] -0.70122 -0.21939 -0.31530 -1.05171 -0.67606 -0.10380 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
The small differences are most likely caused by numerical issues: The fit on all data was done only in c++, whereas the iterative calculation was carried out switching between R and c++. Where and how this exactly happens is still to be investigated.
# Find the start and end of solar each day
# A minimum threshold
minthres <- 0.1
D$y[minthres < 0.1] <- 0
yday <- aslt(D$t-1)$yday
val <- lapply(split(D$y, yday), function(x){
x[is.na(x)] <- 0
i <- which(x > 0)
if(length(i) > 0){
iseq <- min(i):max(i)
x[iseq] <- seq(0, 1, len=length(iseq))
x[-iseq] <- NA
}
return(x)
})
D$tdaynormed <- unlist(val)
D$tdaynormed <- lagdf(make_input(D$tdaynormed, 1:36), "-k")
Dtrain <- subset(D, c("2010-01-01", "2010-04-01"))
Dtrain$scoreperiod <- in_range("2010-02-01", Dtrain$t)
D$ynormed <- D$y/max(D$y, na.rm=TRUE)
plot_ts(D, "tdaynormed|ynormed$", c("2010-04-01","2010-04-10"), kseq=c(1,4,12))
model$add_inputs(I = "bspline(tdaynormed, Boundary.knots = c(0,1), degree = 5) %**% I")
# Test the bspline function
spl <- bspline(D$tdaynormed[1:24, 1:3], Boundary.knots = c(0,1), degree = 3, intercept=TRUE)
# See the base splines
plot(spl$bs1$k1, type="l", xlim=c(1,21))
lines(spl$bs2$k1, col=2)
lines(spl$bs3$k1, col=3)
lines(spl$bs4$k1, col=4)
model$kseq <- c(1,18)
# Get the parameters
prm <- model$get_prmbounds("init")
# Fit the model and get the RMSE
rls_fit(prm, model, Dtrain, scorefun = rmse, returnanalysis = FALSE)
## [1] 1423
Optimize the parameters: first set the horizons to run and the
model$prm <- rls_optim(model, Dtrain, control=list(maxit=2))$par
model$prm
## lambda
## 0.993
Now fit with the optimized parameters on the entire period for all horizons
model$kseq <- 1:36
val <- rls_fit(model$prm, model, D, returnanalysis = TRUE)
D$Yhat2 <- val$Yhat
plot_ts(D, c("^y$|^Y"), c("2010-04-01","2010-04-15"), kseq = c(1,18))
plot_ts(D, c("^y$|^Y|I"), c("2010-04-01","2010-04-05"), kseq = c(1,18))