Skip to content

Commit

Permalink
version 0.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
jobstdavid committed Dec 27, 2023
1 parent 5626d38 commit a0d35c1
Show file tree
Hide file tree
Showing 87 changed files with 1,949 additions and 2,248 deletions.
Binary file modified .DS_Store
Binary file not shown.
1,000 changes: 500 additions & 500 deletions .Rhistory

Large diffs are not rendered by default.

Binary file modified .github/.DS_Store
Binary file not shown.
3 changes: 1 addition & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: eppverification
Type: Package
Title: Verification Tools for the Statistical Postprocessing of Ensemble Forecasts
Version: 0.3.0
Version: 0.4.0
Authors@R: c(
person(given = "David",
family = "Jobst",
Expand All @@ -14,7 +14,6 @@ URL: https://github.com/jobstdavid/eppverification
BugReports: https://github.com/jobstdavid/eppverification/issues
Imports:
ggplot2,
fields,
vegan,
pcaPP,
Rfast,
Expand Down
10 changes: 4 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@

export(ae)
export(bh.test)
export(bias)
export(bs)
export(cov.plot)
export(cpi)
export(cpi.plot)
export(crps)
export(dispersion)
export(dm.test)
export(ds)
export(dss)
export(ee)
export(ent)
export(es)
export(line.plot)
export(logs)
export(m.pit)
export(ment)
export(mri)
export(mrnk)
Expand All @@ -28,7 +28,6 @@ export(rnk)
export(score.plot)
export(se)
export(ss)
export(var.pit)
export(vr.hist)
export(vs)
importFrom(Rcpp,sourceCpp)
Expand All @@ -38,7 +37,6 @@ importFrom(Rfast,rowMedians)
importFrom(Rfast,rowVars)
importFrom(Rfast,rowmeans)
importFrom(Rfast,rowsums)
importFrom(fields,rdist)
importFrom(ggplot2,aes)
importFrom(ggplot2,after_stat)
importFrom(ggplot2,element_text)
Expand All @@ -55,7 +53,6 @@ importFrom(ggplot2,ggtitle)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_color_manual)
importFrom(ggplot2,scale_fill_gradient)
importFrom(ggplot2,scale_linetype_manual)
importFrom(ggplot2,scale_x_continuous)
importFrom(ggplot2,scale_x_discrete)
importFrom(ggplot2,scale_y_continuous)
Expand All @@ -69,8 +66,9 @@ importFrom(stats,acf)
importFrom(stats,complete.cases)
importFrom(stats,median)
importFrom(stats,na.omit)
importFrom(stats,pt)
importFrom(stats,pnorm)
importFrom(stats,setNames)
importFrom(stats,var)
importFrom(utils,tail)
importFrom(vegan,spantree)
useDynLib(eppverification, .registration = TRUE)
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# eppverification 0.4.0 (December 27, 2023)

**Note**: Most of the following changes serve as preparation for the next version with groundbreaking innovations!

- change `na.rm` and `mean` argument in nearly all functions to `na.action` and `aggregate`. This allows a more flexible handling of missing values and score aggregation.
- expand functionality of `bh.test`.
- change test statistic in `dm.test` back to the one in version 0.2.0.
- change argument name `interval.range` to `nominal.coverage` in the functions `cpi`, `cpi.plot` and `cov.plot`.
- change argument name `ri` to `reliability` and `ent` to `entropy` in the functions `vr.hist` and `mvr.hist`.
- changed name `var.pit` and `m.pit` to `dispersion` and `bias`, respectively.
- add weight matrix symmetry check for `vs`.
- remove function `line.plot`.


# eppverification 0.3.0 (September 30, 2023)

- univariate and multivariate score calculation is speeded-up using C++ code by the R-packages `Rfast`, `Rcpp`, `RcppArmadillo`.
Expand Down
172 changes: 172 additions & 0 deletions R/.Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
library(boostCopula)
library(VineCopula)
library(tictoc)
logit <- function(x, inv = FALSE) {
if (!inv) {
log(x/(1-x))
} else {
exp(x)/(1+exp(x))
}
}
link_fun <- function(family, x) {
if (family %in% 1:2) {
sin(pi/2*tanh(x))
} else if (family %in% c(3,13)) {
2*logit(x, inv = T)/(1-logit(x, inv = T))
} else if (family %in% c(23, 33)) {
-2*logit(x, inv = T)/(1-logit(x, inv = T))
} else if (family %in% c(4,14)) {
1/(1-logit(x, inv = T))
} else if (family %in% c(24, 34)) {
-1/(1-logit(x, inv = T))
}
}
# copula families
fams <- c(1, 2, 3, 13, 23, 33, 4, 14, 24, 34)
n <- 100 # number of simulation repetitions
N <- 60 # number of samples for each simulation
p <- 100 # number of variables
d <- 5
coef_mat <- array(0, dim = c(p+1, d, d), dimnames = list(c("intercept", paste("x", sep = "", 1:p)), c(), c()))
positives_lin <- array(NA, dim = c(2, d, d), dimnames = list(c("true", "false"), c(), c()))
par_mae_lin <- array(NA, dim = c(d, d))
fams_lin <- array(0, dim = c(d, d))
# define 5-dimensional R-vine tree structure matrix
Matrix <- c(5, 0, 0, 0, 0,
2, 2, 0, 0, 0,
3, 3, 3, 0, 0,
1, 4, 4, 4, 0,
4, 1, 1, 1, 1)
Matrix <- matrix(Matrix, 5, 5, byrow = T)
# define R-vine pair-copula family matrix
family <- c(0, 0, 0, 0, 0,
1, 0, 0, 0, 0,
24, 2, 0, 0, 0,
4, 33, 13, 0, 0,
23, 34, 14, 3, 0)
family <- matrix(family, 5, 5, byrow = T)
# define R-vine pair-copula parameter matrix
par <- c(0, 0, 0, 0, 0,
0.5, 0, 0, 0, 0,
-2, 0.5, 0, 0, 0,
2, -2, 2, 0, 0,
-2, -2, 2, 2, 0)
par <- matrix(par, 5, 5, byrow = T)
# define second R-vine pair-copula parameter matrix
par2 <- c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0,
0, 4, 0, 0, 0,
0, 0, 0, 0, 0,
0, 0, 0, 0, 0)
par2 <- matrix(par2, 5, 5, byrow = T)
boostRVC <- RVineMatrix(Matrix = Matrix,
family = family,
par = par, par2 = par2,
names = c("U1", "U2", "U3", "U4", "U5"))
# set seed for reproducubility
set.seed(5)
x <- sapply(1:p, function(l) runif(N, -1, 1))
colnames(x) <- paste0("x", 1:p)
u <- matrix(runif(N*d), ncol = d, dimnames = list(c(), paste0("U", 1:d, sep = "")))
# simulate linear model
U <- boostRVineSimStudy(boostRVC, u, x, mode = "linear")
library(boostCopula)
library(VineCopula)
library(tictoc)
logit <- function(x, inv = FALSE) {
if (!inv) {
log(x/(1-x))
} else {
exp(x)/(1+exp(x))
}
}
link_fun <- function(family, x) {
if (family %in% 1:2) {
sin(pi/2*tanh(x))
} else if (family %in% c(3,13)) {
2*logit(x, inv = T)/(1-logit(x, inv = T))
} else if (family %in% c(23, 33)) {
-2*logit(x, inv = T)/(1-logit(x, inv = T))
} else if (family %in% c(4,14)) {
1/(1-logit(x, inv = T))
} else if (family %in% c(24, 34)) {
-1/(1-logit(x, inv = T))
}
}
# copula families
fams <- c(1, 2, 3, 13, 23, 33, 4, 14, 24, 34)
n <- 100 # number of simulation repetitions
N <- 6000 # number of samples for each simulation
p <- 100 # number of variables
d <- 5
coef_mat <- array(0, dim = c(p+1, d, d), dimnames = list(c("intercept", paste("x", sep = "", 1:p)), c(), c()))
positives_lin <- array(NA, dim = c(2, d, d), dimnames = list(c("true", "false"), c(), c()))
par_mae_lin <- array(NA, dim = c(d, d))
fams_lin <- array(0, dim = c(d, d))
# define 5-dimensional R-vine tree structure matrix
Matrix <- c(5, 0, 0, 0, 0,
2, 2, 0, 0, 0,
3, 3, 3, 0, 0,
1, 4, 4, 4, 0,
4, 1, 1, 1, 1)
Matrix <- matrix(Matrix, 5, 5, byrow = T)
# define R-vine pair-copula family matrix
family <- c(0, 0, 0, 0, 0,
1, 0, 0, 0, 0,
24, 2, 0, 0, 0,
4, 33, 13, 0, 0,
23, 34, 14, 3, 0)
family <- matrix(family, 5, 5, byrow = T)
# define R-vine pair-copula parameter matrix
par <- c(0, 0, 0, 0, 0,
0.5, 0, 0, 0, 0,
-2, 0.5, 0, 0, 0,
2, -2, 2, 0, 0,
-2, -2, 2, 2, 0)
par <- matrix(par, 5, 5, byrow = T)
# define second R-vine pair-copula parameter matrix
par2 <- c(0, 0, 0, 0, 0,
0, 0, 0, 0, 0,
0, 4, 0, 0, 0,
0, 0, 0, 0, 0,
0, 0, 0, 0, 0)
par2 <- matrix(par2, 5, 5, byrow = T)
boostRVC <- RVineMatrix(Matrix = Matrix,
family = family,
par = par, par2 = par2,
names = c("U1", "U2", "U3", "U4", "U5"))
# set seed for reproducubility
set.seed(5)
x <- sapply(1:p, function(l) runif(N, -1, 1))
colnames(x) <- paste0("x", 1:p)
u <- matrix(runif(N*d), ncol = d, dimnames = list(c(), paste0("U", 1:d, sep = "")))
# simulate linear model
U <- boostRVineSimStudy(boostRVC, u, x, mode = "linear")
tic()
object <- boostRVineCopSelect(U = U,
X = x,
boostRVM = boostRVineMatrix(Matrix = Matrix,
family = family,
formula = matrix("~.", d, d, byrow = T)),
selectioncrit = "loglik",
familyset = NA,
par = FALSE,
type = "glm",
control = boost_control(mstop = 500),
cval = cval_control(cval = F),
deselect = deselect_control(deselect = T),
center = TRUE,
stabilization = "none",
na.action = na.omit,
cores = 10)
toc()
253.249/60
ll_true <- boostRVineTrueLL(U = U,
X = x,
boostRVM = boostRVineMatrix(Matrix = Matrix,
family = family,
formula = matrix("~.", d, d, byrow = T)),
cores = 10)
sum(unlist(ll_true), na.rm = T)
object$loglik
100/17
8 changes: 4 additions & 4 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,6 @@ crps_sml_cpp <- function(y, x) {
.Call(`_eppverification_crps_sml_cpp`, y, x)
}

crps_mc_cpp <- function(y, x) {
.Call(`_eppverification_crps_mc_cpp`, y, x)
}

euclnorm_cpp <- function(x) {
.Call(`_eppverification_euclnorm_cpp`, x)
}
Expand All @@ -33,3 +29,7 @@ ds_cpp <- function(x) {
.Call(`_eppverification_ds_cpp`, x)
}

rdist_cpp <- function(x) {
.Call(`_eppverification_rdist_cpp`, x)
}

46 changes: 23 additions & 23 deletions R/ae.R
Original file line number Diff line number Diff line change
@@ -1,46 +1,48 @@
#' Absolute Error
#'
#' This function calculates the Absolute Error (AE) given observations of a one-dimensional variable and ensemble forecasts/samples of a predictive distribution.
#' This function calculates the Absolute Error (AE) given observations of a univariate variable and samples of a predictive distribution.
#'
#' @param y vector of observations
#' @param x matrix of ensemble forecasts/samples of a predictive distribution or vector of medians of ensemble forecasts/a predictive distribution (depending on \code{y}; see details)
#' @param mean logical; if \code{TRUE} the mean of the AE values is calculated for output; if \code{FALSE} the single AE values are used as output; default: \code{FALSE}
#' @param na.rm logical; if \code{TRUE} NA are removed after the computation; if \code{FALSE} NA are used in the computation; default: \code{FALSE}
#' @param x matrix of samples of a predictive distribution or vector containing the medians of a predictive distribution (depending on \code{y}; see details)
#' @param na.action function to handle the NA's. Default: \code{na.omit}.
#' @param aggregate logical or function for aggregating the single scores, e.g. \code{sum}, \code{mean}, \code{weighted.mean}, ....
#' Default: \code{FALSE}, i.e. no aggregation function.
#' @param ... further arguments passed to the \code{aggregate} function.
#'
#' @details
#' For a vector \code{y} of length n, \code{x} can be given as matrix of ensemble forecasts/samples of a predictive distribution
#' For a vector \code{y} of length n, \code{x} can be given as matrix of samples of a predictive distribution
#' with n rows, where the i-th entry of \code{y} belongs to the i-th row
#' of \code{x}. The columns of \code{x} represent the samples of a predictive distribution
#' or ensemble forecasts. The row-wise medians are determined by its sample version.
#' of \code{x}. The columns of \code{x} represent the samples of a predictive distribution.
#' The row-wise medians are determined by its sample version.
#'
#' If the medians of ensemble forecasts or of a predictive distribution are directly
#' available, \code{x} can be given as vector of medians, where
#' If the medians of a predictive distribution are directly
#' available, \code{x} can be given as vector containing the medians, where
#' the i-th entry of \code{y} belongs to the i-th entry of \code{x}.
#'
#' In addition, with this function, the Mean Absolute Error (MAE) can be calculated.
#' In addition, with this function, the Mean Absolute Error (MAE) can be calculated by specifying \code{aggregate = mean}.
#'
#' A lower AE indicates a better forecast.
#'
#' @return
#' Vector of score value(s).
#'
#' @examples
#' #simulated data
#' # simulated data
#' n <- 30
#' m <- 50
#' y <- rnorm(n)
#' x1 <- matrix(rnorm(n*m), ncol = m)
#' x2 <- apply(x1, 1, median)
#'
#' #ae calculation
#' ae(y = y, x = x1, mean = FALSE)
#' ae(y = y, x = x1, mean = TRUE)
#' # ae calculation
#' ae(y = y, x = x1)
#' ae(y = y, x = x1, aggregate = mean)
#'
#' ae(y = y, x = x2, mean = FALSE)
#' (mae <- ae(y = y, x = x2, mean = TRUE))
#' ae(y = y, x = x2)
#' (mae <- ae(y = y, x = x2, aggregate = mean))
#'
#' @references
#' Vannitsem, S., Wilks, D. and Messner, J. (2018). Statistical Postprocessing of Ensemble Forecasts. Elsevier. 164.
#' Gneiting, T. (2011). Making and Evaluating Point Forecasts. Journal of the American Statistical Association, 106(494), 746-762.
#'
#' @author David Jobst
#'
Expand All @@ -50,7 +52,7 @@
#' @importFrom Rfast rowMedians
#'
#' @export
ae <- function(y, x, mean = FALSE, na.rm = FALSE) {
ae <- function(y, x, na.action = na.omit, aggregate = FALSE, ...) {
if (!is.vector(y)) {
stop("'y' should be a vector!")
}
Expand All @@ -71,13 +73,11 @@ ae <- function(y, x, mean = FALSE, na.rm = FALSE) {
stop("'x' should be a vector or matrix!")
}

if (na.rm == TRUE) {
ae.value <- as.vector(na.omit(ae.value))
ae.value <- as.vector(na.action(ae.value))
if (!isFALSE(aggregate)) {
ae.value <- do.call(aggregate, list(ae.value, ...))
}

if (mean == TRUE) {
ae.value <- mean(ae.value)
}
return(as.numeric(ae.value))

}
Expand Down
Loading

0 comments on commit a0d35c1

Please sign in to comment.