Skip to content

Commit

Permalink
add trace, messages. some structural changes and bugFix in summary
Browse files Browse the repository at this point in the history
  • Loading branch information
LukaszChrostowski committed Aug 26, 2023
1 parent 25ea616 commit a48e396
Show file tree
Hide file tree
Showing 23 changed files with 221 additions and 107 deletions.
8 changes: 6 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@ Authors@R:
person(given = "Maciej",
family = "Beręsewicz",
role = c("aut", "ctb"),
email = "maciej.beresewicz@ue.poznan.pl"))
email = "maciej.beresewicz@ue.poznan.pl"),
person(given = "Piotr",
family = "Chlebicki",
role = c("aut", "ctb"),

This comment has been minimized.

Copy link
@Kertoo

Kertoo Aug 26, 2023

Member

Just "ctb" would be more accurate I reckon

email = "piochl@st.amu.edu.pl"))
Description: An R package for statistical inference with non-probability samples when auxiliary information from external sources like probability samples or population totals/means are available.
License: file LICENSE
Encoding: UTF-8
Expand All @@ -19,5 +23,5 @@ RdMacros: mathjaxr
RoxygenNote: 7.2.3
Suggests: tinytest, covr
Depends: survey
Imports: maxLik, stats, Matrix, MASS, ncvreg, betareg, RANN, sampling, Rcpp, RcppArmadillo, mathjaxr, nleqslv, doParallel, foreach, parallel
Imports: maxLik, stats, Matrix, MASS, ncvreg, betareg, RANN, sampling, Rcpp, RcppArmadillo, mathjaxr, nleqslv, doParallel, foreach, parallel, HelpersMG
LinkingTo: RcppArmadillo, Rcpp
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export(probit)
import(Rcpp)
import(RcppArmadillo)
import(mathjaxr)
importFrom(HelpersMG,ExtractAIC.glm)
importFrom(MASS,ginv)
importFrom(Matrix,Diagonal)
importFrom(Matrix,Matrix)
Expand Down
14 changes: 8 additions & 6 deletions R/EstimationMethods.R
Original file line number Diff line number Diff line change
Expand Up @@ -163,19 +163,19 @@ gee <- function(...) {
}

make_t <- function(X, ps, psd, b, y_rand, y_nons, h, N, method_selection, weights) {
if (h == "1") {
if (h == 1) {
t <- X %*% t(as.matrix(b)) + y_rand - 1/N * sum(weights * y_nons)
} else if (h == "2") {
} else if (h == 2) {
t <- as.vector(ps) * X %*% t(as.matrix(b)) + y_rand - 1/N * sum(weights * y_nons)
}
t
}

make_var_nonprob <- function(ps, psd, y, y_pred, h_n, X, b, N, h, method_selection, weights, pop_totals) {
if (!is.null(pop_totals)) h <- "1" # perhaps to remove, just check if appropriate var is calculated
if (h == "2") {
if (!is.null(pop_totals)) h <- 1 # perhaps to remove, just check if appropriate var is calculated
if (h == 2) {
var_nonprob <- 1/N^2 * sum((1 - ps) * ((weights*(y - y_pred - h_n)/ps) - b %*% t(X))^2)
} else if (h == "1") {
} else if (h == 1) {
var_nonprob <- 1/N^2 * sum((1 - ps) * ((weights*(y - y_pred - h_n) - b %*% t(X))/ps)^2)
}
as.numeric(var_nonprob)
Expand Down Expand Up @@ -242,7 +242,8 @@ gee <- function(...) {

}

mm <- function(X, y, weights, weights_rand, R, n_nons, n_rand, method_selection, family, boot = FALSE) { # TODO
# bias correction
mm <- function(X, y, weights, weights_rand, R, n_nons, n_rand, method_selection, family, boot = FALSE) {

method <- get_method(method_selection)
inv_link <- method$make_link_inv
Expand Down Expand Up @@ -324,6 +325,7 @@ mm <- function(X, y, weights, weights_rand, R, n_nons, n_rand, method_selection,
grad = multiroot$fvec[1:(p)], # TODO
hess = hess, # TODO
ps_nons = ps_nons,
est_ps_rand = est_ps_rand,
variance_covariance = vcov_selection,
df_residual = df_residual,
log_likelihood = "NULL")
Expand Down
17 changes: 8 additions & 9 deletions R/OutcomeMethods.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' @importFrom stats predict.glm
#' @importFrom stats glm.fit
#' @importFrom stats summary.glm
glm <- function(outcome,
glm_nonprobsvy <- function(outcome,
data,
weights,
family_outcome,
Expand Down Expand Up @@ -39,13 +39,13 @@ glm <- function(outcome,
y_nons_pred <- model_out$glm$fitted.values
} else {
model <- stats::glm.fit(x = X_nons,
y = y_nons,
weights = weights,
family = get_method(family_outcome),
control = list(control$epsilon,
y = y_nons,
weights = weights,
family = get_method(family_outcome),
control = list(control$epsilon,
control$maxit,
control$trace),
intercept = FALSE)
intercept = FALSE)
model_summ <- stats::summary.glm(model)
parameters <- model_summ$coefficients
model_nons_coefs <- model$coefficients
Expand All @@ -69,7 +69,7 @@ glm <- function(outcome,
}


nn <- function(outcome,
nn_nonprobsvy <- function(outcome,
data,
weights,
family_outcome,
Expand All @@ -81,8 +81,7 @@ nn <- function(outcome,
n_rand,
vars_selection,
pop_totals,
model_frame = NULL) {

model_frame = NULL) { # TODO consider add data standardization before modelling
model_nons <- nonprobMI_nn(data = X_nons,
query = X_nons,
k = control$k,
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

cv_nonprobsvy_rcpp <- function(X, R, weights_X, method_selection, h, maxit, eps, lambda_min, nlambda, nfolds, penalty, a, pop_totals, lambda = -1) {
.Call('_nonprobsvy_cv_nonprobsvy_rcpp', PACKAGE = 'nonprobsvy', X, R, weights_X, method_selection, h, maxit, eps, lambda_min, nlambda, nfolds, penalty, a, pop_totals, lambda)
cv_nonprobsvy_rcpp <- function(X, R, weights_X, method_selection, h, maxit, eps, lambda_min, nlambda, nfolds, penalty, a, pop_totals, verbose, lambda = -1) {
.Call('_nonprobsvy_cv_nonprobsvy_rcpp', PACKAGE = 'nonprobsvy', X, R, weights_X, method_selection, h, maxit, eps, lambda_min, nlambda, nfolds, penalty, a, pop_totals, verbose, lambda)
}

56 changes: 51 additions & 5 deletions R/bootstraps.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ bootMI <- function(X_rand,
method,
control,
pop_totals,
verbose,
...
){ # TODO add methods instead of conditional loops

Expand Down Expand Up @@ -65,6 +66,10 @@ bootMI <- function(X_rand,
#mu_hat_boot <- mu_hatMI(ystrap_rand, weights_rand_strap_svy, N_strap)
mu_hat_boot <- weighted.mean(x = y_strap_rand, w = weights_rand_strap_svy)
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
k <- k + 1
}
} else if (method == "nn") {
Expand Down Expand Up @@ -95,6 +100,10 @@ bootMI <- function(X_rand,

mu_hat_boot <- weighted.mean(x = y_rand_strap, w = weights_rand_strap)
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
k <- k + 1
}
}
Expand All @@ -119,6 +128,10 @@ bootMI <- function(X_rand,
#mu_hat_boot <- mu_hatMI(ystrap_rand, weights_rand_strap_svy, N_strap)
mu_hat_boot <- as.vector(y_strap_rand/N)
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
k <- k + 1
}
} else if (method == "nn") {
Expand All @@ -135,6 +148,10 @@ bootMI <- function(X_rand,
searchtype = control$searchtype)
mu_hat_boot <- mean(y_strap[model_rand$nn.idx])
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
k <- k + 1
}
}
Expand Down Expand Up @@ -162,6 +179,7 @@ bootIPW <- function(X_rand,
pop_size = NULL,
pop_totals = NULL,
control_selection,
verbose,
...){
mu_hats <- vector(mode = "numeric", length = num_boot)
if (!is.null(weights_rand)) N <- sum(weights_rand)
Expand Down Expand Up @@ -204,6 +222,10 @@ bootIPW <- function(X_rand,
weights_nons = weights_nons,
N = N_est_nons) # IPW estimator
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}

} else {
strap <- sample.int(replace = TRUE, n = n_nons)
Expand Down Expand Up @@ -231,6 +253,10 @@ bootIPW <- function(X_rand,
weights_nons = weights_nons,
N = N_est_nons) # IPW estimator
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
}
k <- k + 1
}
Expand Down Expand Up @@ -259,6 +285,8 @@ bootDR <- function(SelectionModel,
pop_size,
pop_totals,
pop_means,
bias_correction,
verbose,
...) {

mu_hats <- vector(mode = "numeric", length = num_boot)
Expand All @@ -276,7 +304,7 @@ bootDR <- function(SelectionModel,
family <- family()
}

if (est_method == "mm") {
if (bias_correction == TRUE) {
X <- rbind(SelectionModel$X_rand, SelectionModel$X_nons)
p <- ncol(X)
y_rand <- vector(mode = "numeric", length = n_rand)
Expand Down Expand Up @@ -345,6 +373,10 @@ bootDR <- function(SelectionModel,
N_nons = N_est_nons,
N_rand = N_est_rand)
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
k <- k + 1
}
} else { # TODO
Expand Down Expand Up @@ -388,6 +420,10 @@ bootDR <- function(SelectionModel,

mu_hat_boot <- 1/N_est * sum(weights_nons_strap * (weights_strap * (OutcomeModel$y[strap] - y_nons_pred))) + 1/pop_size * y_rand_pred
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
k <- k + 1
}
}
Expand All @@ -407,7 +443,8 @@ bootDR_sel <- function(X,
n_rand,
num_boot,
par0,
psel) { # TODO function to test
psel,
verbose) { # TODO function to test
mu_hats <- vector(mode = "numeric", length = num_boot)
k <- 1
loc_nons <- which(R == 1)
Expand Down Expand Up @@ -447,6 +484,10 @@ bootDR_sel <- function(X,
N_nons = N_nons,
N_rand = N_rand) #DR estimator
mu_hats[k] <- mu_hat_boot
if (verbose) {
info <- paste("iteration ", k, "/", num_boot, ", estimated mean = ", mu_hat_boot, sep = "")
print(info)
}
k <- k + 1
}
boot_var <- 1/num_boot * sum((mu_hats - mu_hat)^2)
Expand All @@ -473,6 +514,7 @@ bootMI_multicore <- function(X_rand,
control,
pop_totals,
cores,
verbose,
...) {
#mu_hats <- vector(mode = "numeric", length = num_boot)
n_nons <- nrow(X_nons)
Expand Down Expand Up @@ -522,7 +564,6 @@ bootMI_multicore <- function(X_rand,
beta <- model_strap$coefficients
eta <- X_rand %*% beta
y_strap_rand <- family_nonprobsvy$mu(eta)

weighted.mean(x = y_strap_rand, w = weights_rand_strap_svy)
})
} else if (method == "nn") {
Expand Down Expand Up @@ -640,6 +681,7 @@ bootIPW_multicore <- function(X_rand,
pop_totals = NULL,
control_selection,
cores,
verbose,
...) {

if (!is.null(weights_rand)) N <- sum(weights_rand)
Expand Down Expand Up @@ -763,7 +805,9 @@ bootDR_multicore <- function(SelectionModel,
pop_size,
pop_totals,
pop_means,
bias_correction,
cores,
verbose,
...) {

# mu_hats <- vector(mode = "numeric", length = num_boot)
Expand All @@ -781,7 +825,7 @@ bootDR_multicore <- function(SelectionModel,
family <- family()
}

if (est_method == "mm") {
if (bias_correction == TRUE) {
X <- rbind(SelectionModel$X_rand, SelectionModel$X_nons)
p <- ncol(X)
y_rand <- vector(mode = "numeric", length = n_rand)
Expand Down Expand Up @@ -930,7 +974,8 @@ bootDR_sel_multicore <- function(X,
num_boot,
par0,
psel,
cores) { # TODO function to test
cores,
verbose) { # TODO function to test
mu_hats <- vector(mode = "numeric", length = num_boot)
loc_nons <- which(R == 1)
loc_rand <- which(R == 0)
Expand All @@ -953,6 +998,7 @@ bootDR_sel_multicore <- function(X,
X_strap <- rbind(X_rand[strap_rand, ], X_nons[strap_nons, ])
y_strap <- c(y_rand[strap_rand], y_nons[strap_nons])

source("R/EstimationMethods.R")
model_strap <- mm(X = X_strap,
y = y_strap,
weights = prior_weights[loc_nons][strap_nons],
Expand Down
6 changes: 3 additions & 3 deletions R/cloglogModel.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ cloglog <- function(...) {
v_2 <- v_2 + v_2i
}
v_2 <- 1/N^2 * v_2
} else if (est_method == "gee" && h == "1") {
} else if (est_method == "gee" && h == 1) {
if (is.null(N)) {
N <- sum(1/ps)
v11 <- 1/N^2 * sum(((1 - ps)/ps^2 * weights * (y - mu)^2)) # TODO
Expand All @@ -175,7 +175,7 @@ cloglog <- function(...) {
v_2i <- (1 - ps[i])/ps[i] * X[i,] %*% t(X[i,])
v_2 <- v_2 + v_2i
}
} else if (est_method == "gee" && h == "2") {
} else if (est_method == "gee" && h == 2) {
if (is.null(N)) {
N <- sum(1/ps)
v11 <- 1/N^2 * sum(((1 - ps)/ps^2 * weights * (y - mu)^2)) # TODO
Expand Down Expand Up @@ -210,7 +210,7 @@ cloglog <- function(...) {
if (est_method == "mle") {
svydesign$prob <- as.vector(1/log(1 - eps) * svydesign$prob)
} else if (est_method == "gee") {
if (h == "2") svydesign$prob <- as.vector(1/eps * svydesign$prob)
if (h == 2) svydesign$prob <- as.vector(1/eps * svydesign$prob)
}
if (is.null(postStrata)) {
cov <- 1/N^2 * svyrecvar(X/svydesign$prob, svydesign$cluster, stratas = svydesign$strata, fpcs = svydesign$fpc)
Expand Down
Loading

0 comments on commit a48e396

Please sign in to comment.