From 7ed8035221b421337193f6bd0e61debd9f12918a Mon Sep 17 00:00:00 2001 From: Nima Hejazi Date: Mon, 2 Oct 2023 15:00:34 -0400 Subject: [PATCH] truncated censoring --- NEWS.md | 8 ++++++++ R/bound.R | 4 ++-- R/fit_mechanisms.R | 7 ++++++- R/onestep_txshift.R | 5 ++++- R/tmle_txshift.R | 5 ++++- R/txshift.R | 8 ++++---- man/bound_propensity.Rd | 4 ++-- man/est_g_cens.Rd | 7 ++++++- man/onestep_txshift.Rd | 5 ++++- man/tmle_txshift.Rd | 5 ++++- man/txshift.Rd | 8 ++++---- 11 files changed, 48 insertions(+), 18 deletions(-) diff --git a/NEWS.md b/NEWS.md index 83c22cd..4026a91 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,13 @@ # txshift 0.3.9 +As of October 2023: +* A new argument `bound` has been added to `est_g_cens()` to specify the lower + tolerated limit of the censoring mechanism estimates. This is used to allow + for stable inverse probability of censoring weights to be applied. This bound + has been given a default value of 0.02. +* The default value of `bound` in `bound_propensity()` has been changed from + 0.005 to 0.01, as has the default value of `gps_bound` in `txshift()`. + As of May 2023: * A new argument `bound` has been added to `bound_propensity()` to specify the lower tolerated limit of generalized propensity score estimates. Estimates diff --git a/R/bound.R b/R/bound.R index f3f40b9..245b863 100644 --- a/R/bound.R +++ b/R/bound.R @@ -33,14 +33,14 @@ bound_precision <- function(vals) { #' should only be bounded/truncated away from zero. #' @param bound \code{numeric} atomic giving the lower limit of the generalized #' propensity score estimates to be tolerated. Estimates less than this will -#' be truncated to this value (default = 0.005). Note that the default may be +#' be truncated to this value (default = 0.01). Note that the default may be #' internally overwritten by reference to the sample size (n), so the lower #' bound used is the greater of the specified value and 1/n. #' #' @return A \code{numeric} vector of the same length as \code{vals}, where the #' returned values are bounded such that the minimum is no lower than 1/n, for #' the sample size n. -bound_propensity <- function(vals, bound = 0.005) { +bound_propensity <- function(vals, bound = 0.01) { # bound generalized propensity score g(a|w) away from 0 only propensity_bound <- max(1 / length(vals), bound) vals_bounded <- pmax(vals, propensity_bound) diff --git a/R/fit_mechanisms.R b/R/fit_mechanisms.R index a38c8d6..4bcd7c2 100644 --- a/R/fit_mechanisms.R +++ b/R/fit_mechanisms.R @@ -224,6 +224,9 @@ est_g_exp <- function(A, #' for fitting a (generalized) linear model via \code{\link[stats]{glm}}. #' @param sl_learners Object containing a set of instantiated learners from the #' \pkg{sl3}, to be used in fitting an ensemble model. +#' @param bound \code{numeric} giving the lower limit of censoring mechanism +#' estimates to be tolerated (default = 0.02). Estimates below this value are +#' truncated to this or 1/n. See \code{\link{bound_propensity}} for details. #' #' @importFrom stats glm as.formula predict #' @importFrom data.table as.data.table setnames copy set @@ -237,7 +240,8 @@ est_g_cens <- function(C_cens, samp_weights = rep(1, length(C_cens)), fit_type = c("sl", "glm"), glm_formula = "C_cens ~ .", - sl_learners = NULL) { + sl_learners = NULL, + bound = 0.02) { # set defaults and check arguments fit_type <- match.arg(fit_type) if (fit_type == "sl") { @@ -301,6 +305,7 @@ est_g_cens <- function(C_cens, } # generate output + pred_g_cens <- bound_propensity(vals = pred_g_cens, bound = bound) return(pred_g_cens) } diff --git a/R/onestep_txshift.R b/R/onestep_txshift.R index 3068191..91fa80a 100644 --- a/R/onestep_txshift.R +++ b/R/onestep_txshift.R @@ -22,7 +22,10 @@ #' @param samp_estim An object providing the value of the censoring mechanism #' evaluated across the full data. This object is passed in after being #' constructed by a call to the internal function \code{\link{est_samp}}. -#' @param gn_cens_weights TODO: document +#' @param gn_cens_weights An object providing the value of inverse probability +#' of censoring weights, the inverse of the censoring mechanism estimate. The +#' weights are used as part of the IPCW-EIF procedure to implement a joint +#' intervention that removes the contribution of the censoring process. #' @param Qn_estim An object providing the value of the outcome evaluated after #' imposing a shift in the treatment. This object is passed in after being #' constructed by a call to the internal function \code{est_Q}. diff --git a/R/tmle_txshift.R b/R/tmle_txshift.R index 431a618..cb7a898 100644 --- a/R/tmle_txshift.R +++ b/R/tmle_txshift.R @@ -23,7 +23,10 @@ #' @param samp_estim An object providing the value of the sampling mechanism #' evaluated across the full data. This object is passed in after being #' constructed by a call to the internal function \code{\link{est_samp}}. -#' @param gn_cens_weights TODO: document +#' @param gn_cens_weights An object providing the value of inverse probability +#' of censoring weights, the inverse of the censoring mechanism estimate. The +#' weights are used as part of the IPCW-TMLE procedure to implement a joint +#' intervention that removes the contribution of the censoring process. #' @param Qn_estim An object providing the value of the outcome evaluated after #' imposing a shift in the treatment. This object is passed in after being #' constructed by a call to the internal function \code{\link{est_Q}}. diff --git a/R/txshift.R b/R/txshift.R index e70babb..394b99a 100644 --- a/R/txshift.R +++ b/R/txshift.R @@ -38,9 +38,9 @@ #' @param max_iter A \code{numeric} integer giving the maximum number of steps #' to be taken in iterating to a solution of the efficient influence function. #' @param gps_bound \code{numeric} giving the lower limit of the generalized -#' propensity score estimates to be tolerated (default = 0.05). Estimates less -#' than this are truncated to this or 1/n. See \code{\link{bound_propensity}} -#' for details. +#' propensity score estimates to be tolerated (default = 0.01). Estimates +#' falling below this value are truncated to this or 1/n. For details, see +#' \code{\link{bound_propensity}}. #' @param samp_fit_args A \code{list} of arguments, all but one of which are #' passed to \code{\link{est_samp}}. For details, consult the documentation of #' \code{\link{est_samp}}. The first element (i.e., \code{fit_type}) is used @@ -203,7 +203,7 @@ txshift <- function(W, estimator = c("tmle", "onestep"), fluctuation = c("standard", "weighted"), max_iter = 10, - gps_bound = 0.005, + gps_bound = 0.01, samp_fit_args = list( fit_type = c("glm", "sl", "external"), sl_learners = NULL diff --git a/man/bound_propensity.Rd b/man/bound_propensity.Rd index cc09fbc..6804c5b 100644 --- a/man/bound_propensity.Rd +++ b/man/bound_propensity.Rd @@ -4,7 +4,7 @@ \alias{bound_propensity} \title{Bound Generalized Propensity Score} \usage{ -bound_propensity(vals, bound = 0.005) +bound_propensity(vals, bound = 0.01) } \arguments{ \item{vals}{\code{numeric} vector of generalized propensity score estimates. @@ -13,7 +13,7 @@ should only be bounded/truncated away from zero.} \item{bound}{\code{numeric} atomic giving the lower limit of the generalized propensity score estimates to be tolerated. Estimates less than this will -be truncated to this value (default = 0.005). Note that the default may be +be truncated to this value (default = 0.01). Note that the default may be internally overwritten by reference to the sample size (n), so the lower bound used is the greater of the specified value and 1/n.} } diff --git a/man/est_g_cens.Rd b/man/est_g_cens.Rd index eec6535..5e1b39e 100644 --- a/man/est_g_cens.Rd +++ b/man/est_g_cens.Rd @@ -11,7 +11,8 @@ est_g_cens( samp_weights = rep(1, length(C_cens)), fit_type = c("sl", "glm"), glm_formula = "C_cens ~ .", - sl_learners = NULL + sl_learners = NULL, + bound = 0.02 ) } \arguments{ @@ -39,6 +40,10 @@ for fitting a (generalized) linear model via \code{\link[stats]{glm}}.} \item{sl_learners}{Object containing a set of instantiated learners from the \pkg{sl3}, to be used in fitting an ensemble model.} + +\item{bound}{\code{numeric} giving the lower limit of censoring mechanism +estimates to be tolerated (default = 0.02). Estimates below this value are +truncated to this or 1/n. See \code{\link{bound_propensity}} for details.} } \value{ A \code{numeric} vector of the propensity score for censoring. diff --git a/man/onestep_txshift.Rd b/man/onestep_txshift.Rd index d2d0555..3facf27 100644 --- a/man/onestep_txshift.Rd +++ b/man/onestep_txshift.Rd @@ -43,7 +43,10 @@ the scale of the treatment (A).} evaluated across the full data. This object is passed in after being constructed by a call to the internal function \code{\link{est_samp}}.} -\item{gn_cens_weights}{TODO: document} +\item{gn_cens_weights}{An object providing the value of inverse probability +of censoring weights, the inverse of the censoring mechanism estimate. The +weights are used as part of the IPCW-EIF procedure to implement a joint +intervention that removes the contribution of the censoring process.} \item{Qn_estim}{An object providing the value of the outcome evaluated after imposing a shift in the treatment. This object is passed in after being diff --git a/man/tmle_txshift.Rd b/man/tmle_txshift.Rd index a166444..1947930 100644 --- a/man/tmle_txshift.Rd +++ b/man/tmle_txshift.Rd @@ -46,7 +46,10 @@ the scale of the treatment (A).} evaluated across the full data. This object is passed in after being constructed by a call to the internal function \code{\link{est_samp}}.} -\item{gn_cens_weights}{TODO: document} +\item{gn_cens_weights}{An object providing the value of inverse probability +of censoring weights, the inverse of the censoring mechanism estimate. The +weights are used as part of the IPCW-TMLE procedure to implement a joint +intervention that removes the contribution of the censoring process.} \item{Qn_estim}{An object providing the value of the outcome evaluated after imposing a shift in the treatment. This object is passed in after being diff --git a/man/txshift.Rd b/man/txshift.Rd index 2599b36..ce99fe8 100644 --- a/man/txshift.Rd +++ b/man/txshift.Rd @@ -15,7 +15,7 @@ txshift( estimator = c("tmle", "onestep"), fluctuation = c("standard", "weighted"), max_iter = 10, - gps_bound = 0.005, + gps_bound = 0.01, samp_fit_args = list(fit_type = c("glm", "sl", "external"), sl_learners = NULL), g_exp_fit_args = list(fit_type = c("hal", "sl", "external"), lambda_seq = exp(seq(-1, -13, length = 300)), sl_learners_density = NULL), @@ -74,9 +74,9 @@ tilting regression.} to be taken in iterating to a solution of the efficient influence function.} \item{gps_bound}{\code{numeric} giving the lower limit of the generalized -propensity score estimates to be tolerated (default = 0.05). Estimates less -than this are truncated to this or 1/n. See \code{\link{bound_propensity}} -for details.} +propensity score estimates to be tolerated (default = 0.01). Estimates +falling below this value are truncated to this or 1/n. For details, see +\code{\link{bound_propensity}}.} \item{samp_fit_args}{A \code{list} of arguments, all but one of which are passed to \code{\link{est_samp}}. For details, consult the documentation of