From 7ed8035221b421337193f6bd0e61debd9f12918a Mon Sep 17 00:00:00 2001
From: Nima Hejazi <nh@nimahejazi.org>
Date: Mon, 2 Oct 2023 15:00:34 -0400
Subject: [PATCH] truncated censoring

---
 NEWS.md                 | 8 ++++++++
 R/bound.R               | 4 ++--
 R/fit_mechanisms.R      | 7 ++++++-
 R/onestep_txshift.R     | 5 ++++-
 R/tmle_txshift.R        | 5 ++++-
 R/txshift.R             | 8 ++++----
 man/bound_propensity.Rd | 4 ++--
 man/est_g_cens.Rd       | 7 ++++++-
 man/onestep_txshift.Rd  | 5 ++++-
 man/tmle_txshift.Rd     | 5 ++++-
 man/txshift.Rd          | 8 ++++----
 11 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 83c22cd..4026a91 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,13 @@
 # txshift 0.3.9
 
+As of October 2023:
+* A new argument `bound` has been added to `est_g_cens()` to specify the lower
+  tolerated limit of the censoring mechanism estimates. This is used to allow
+  for stable inverse probability of censoring weights to be applied. This bound
+  has been given a default value of 0.02.
+* The default value of `bound` in `bound_propensity()` has been changed from
+  0.005 to 0.01, as has the default value of `gps_bound` in `txshift()`.
+
 As of May 2023:
 * A new argument `bound` has been added to `bound_propensity()` to specify the
   lower tolerated limit of generalized propensity score estimates. Estimates
diff --git a/R/bound.R b/R/bound.R
index f3f40b9..245b863 100644
--- a/R/bound.R
+++ b/R/bound.R
@@ -33,14 +33,14 @@ bound_precision <- function(vals) {
 #'  should only be bounded/truncated away from zero.
 #' @param bound \code{numeric} atomic giving the lower limit of the generalized
 #'  propensity score estimates to be tolerated. Estimates less than this will
-#'  be truncated to this value (default = 0.005). Note that the default may be
+#'  be truncated to this value (default = 0.01). Note that the default may be
 #'  internally overwritten by reference to the sample size (n), so the lower
 #'  bound used is the greater of the specified value and 1/n.
 #'
 #' @return A \code{numeric} vector of the same length as \code{vals}, where the
 #'  returned values are bounded such that the minimum is no lower than 1/n, for
 #'  the sample size n.
-bound_propensity <- function(vals, bound = 0.005) {
+bound_propensity <- function(vals, bound = 0.01) {
   # bound generalized propensity score g(a|w) away from 0 only
   propensity_bound <- max(1 / length(vals), bound)
   vals_bounded <- pmax(vals, propensity_bound)
diff --git a/R/fit_mechanisms.R b/R/fit_mechanisms.R
index a38c8d6..4bcd7c2 100644
--- a/R/fit_mechanisms.R
+++ b/R/fit_mechanisms.R
@@ -224,6 +224,9 @@ est_g_exp <- function(A,
 #'  for fitting a (generalized) linear model via \code{\link[stats]{glm}}.
 #' @param sl_learners Object containing a set of instantiated learners from the
 #'  \pkg{sl3}, to be used in fitting an ensemble model.
+#' @param bound \code{numeric} giving the lower limit of censoring mechanism
+#'  estimates to be tolerated (default = 0.02). Estimates below this value are
+#'  truncated to this or 1/n. See \code{\link{bound_propensity}} for details.
 #'
 #' @importFrom stats glm as.formula predict
 #' @importFrom data.table as.data.table setnames copy set
@@ -237,7 +240,8 @@ est_g_cens <- function(C_cens,
                        samp_weights = rep(1, length(C_cens)),
                        fit_type = c("sl", "glm"),
                        glm_formula = "C_cens ~ .",
-                       sl_learners = NULL) {
+                       sl_learners = NULL,
+                       bound = 0.02) {
   # set defaults and check arguments
   fit_type <- match.arg(fit_type)
   if (fit_type == "sl") {
@@ -301,6 +305,7 @@ est_g_cens <- function(C_cens,
   }
 
   # generate output
+  pred_g_cens <- bound_propensity(vals = pred_g_cens, bound = bound)
   return(pred_g_cens)
 }
 
diff --git a/R/onestep_txshift.R b/R/onestep_txshift.R
index 3068191..91fa80a 100644
--- a/R/onestep_txshift.R
+++ b/R/onestep_txshift.R
@@ -22,7 +22,10 @@
 #' @param samp_estim An object providing the value of the censoring mechanism
 #'  evaluated across the full data. This object is passed in after being
 #'  constructed by a call to the internal function \code{\link{est_samp}}.
-#' @param gn_cens_weights TODO: document
+#' @param gn_cens_weights An object providing the value of inverse probability
+#'  of censoring weights, the inverse of the censoring mechanism estimate. The
+#'  weights are used as part of the IPCW-EIF procedure to implement a joint
+#'  intervention that removes the contribution of the censoring process.
 #' @param Qn_estim An object providing the value of the outcome evaluated after
 #'  imposing a shift in the treatment. This object is passed in after being
 #'  constructed by a call to the internal function \code{est_Q}.
diff --git a/R/tmle_txshift.R b/R/tmle_txshift.R
index 431a618..cb7a898 100644
--- a/R/tmle_txshift.R
+++ b/R/tmle_txshift.R
@@ -23,7 +23,10 @@
 #' @param samp_estim An object providing the value of the sampling mechanism
 #'  evaluated across the full data. This object is passed in after being
 #'  constructed by a call to the internal function \code{\link{est_samp}}.
-#' @param gn_cens_weights TODO: document
+#' @param gn_cens_weights An object providing the value of inverse probability
+#'  of censoring weights, the inverse of the censoring mechanism estimate. The
+#'  weights are used as part of the IPCW-TMLE procedure to implement a joint
+#'  intervention that removes the contribution of the censoring process.
 #' @param Qn_estim An object providing the value of the outcome evaluated after
 #'  imposing a shift in the treatment. This object is passed in after being
 #'  constructed by a call to the internal function \code{\link{est_Q}}.
diff --git a/R/txshift.R b/R/txshift.R
index e70babb..394b99a 100644
--- a/R/txshift.R
+++ b/R/txshift.R
@@ -38,9 +38,9 @@
 #' @param max_iter A \code{numeric} integer giving the maximum number of steps
 #'  to be taken in iterating to a solution of the efficient influence function.
 #' @param gps_bound \code{numeric} giving the lower limit of the generalized
-#'  propensity score estimates to be tolerated (default = 0.05). Estimates less
-#'  than this are truncated to this or 1/n. See \code{\link{bound_propensity}}
-#'  for details.
+#'  propensity score estimates to be tolerated (default = 0.01). Estimates
+#'  falling below this value are truncated to this or 1/n. For details, see
+#'  \code{\link{bound_propensity}}.
 #' @param samp_fit_args A \code{list} of arguments, all but one of which are
 #'  passed to \code{\link{est_samp}}. For details, consult the documentation of
 #'  \code{\link{est_samp}}. The first element (i.e., \code{fit_type}) is used
@@ -203,7 +203,7 @@ txshift <- function(W,
                     estimator = c("tmle", "onestep"),
                     fluctuation = c("standard", "weighted"),
                     max_iter = 10,
-                    gps_bound = 0.005,
+                    gps_bound = 0.01,
                     samp_fit_args = list(
                       fit_type = c("glm", "sl", "external"),
                       sl_learners = NULL
diff --git a/man/bound_propensity.Rd b/man/bound_propensity.Rd
index cc09fbc..6804c5b 100644
--- a/man/bound_propensity.Rd
+++ b/man/bound_propensity.Rd
@@ -4,7 +4,7 @@
 \alias{bound_propensity}
 \title{Bound Generalized Propensity Score}
 \usage{
-bound_propensity(vals, bound = 0.005)
+bound_propensity(vals, bound = 0.01)
 }
 \arguments{
 \item{vals}{\code{numeric} vector of generalized propensity score estimates.
@@ -13,7 +13,7 @@ should only be bounded/truncated away from zero.}
 
 \item{bound}{\code{numeric} atomic giving the lower limit of the generalized
 propensity score estimates to be tolerated. Estimates less than this will
-be truncated to this value (default = 0.005). Note that the default may be
+be truncated to this value (default = 0.01). Note that the default may be
 internally overwritten by reference to the sample size (n), so the lower
 bound used is the greater of the specified value and 1/n.}
 }
diff --git a/man/est_g_cens.Rd b/man/est_g_cens.Rd
index eec6535..5e1b39e 100644
--- a/man/est_g_cens.Rd
+++ b/man/est_g_cens.Rd
@@ -11,7 +11,8 @@ est_g_cens(
   samp_weights = rep(1, length(C_cens)),
   fit_type = c("sl", "glm"),
   glm_formula = "C_cens ~ .",
-  sl_learners = NULL
+  sl_learners = NULL,
+  bound = 0.02
 )
 }
 \arguments{
@@ -39,6 +40,10 @@ for fitting a (generalized) linear model via \code{\link[stats]{glm}}.}
 
 \item{sl_learners}{Object containing a set of instantiated learners from the
 \pkg{sl3}, to be used in fitting an ensemble model.}
+
+\item{bound}{\code{numeric} giving the lower limit of censoring mechanism
+estimates to be tolerated (default = 0.02). Estimates below this value are
+truncated to this or 1/n. See \code{\link{bound_propensity}} for details.}
 }
 \value{
 A \code{numeric} vector of the propensity score for censoring.
diff --git a/man/onestep_txshift.Rd b/man/onestep_txshift.Rd
index d2d0555..3facf27 100644
--- a/man/onestep_txshift.Rd
+++ b/man/onestep_txshift.Rd
@@ -43,7 +43,10 @@ the scale of the treatment (A).}
 evaluated across the full data. This object is passed in after being
 constructed by a call to the internal function \code{\link{est_samp}}.}
 
-\item{gn_cens_weights}{TODO: document}
+\item{gn_cens_weights}{An object providing the value of inverse probability
+of censoring weights, the inverse of the censoring mechanism estimate. The
+weights are used as part of the IPCW-EIF procedure to implement a joint
+intervention that removes the contribution of the censoring process.}
 
 \item{Qn_estim}{An object providing the value of the outcome evaluated after
 imposing a shift in the treatment. This object is passed in after being
diff --git a/man/tmle_txshift.Rd b/man/tmle_txshift.Rd
index a166444..1947930 100644
--- a/man/tmle_txshift.Rd
+++ b/man/tmle_txshift.Rd
@@ -46,7 +46,10 @@ the scale of the treatment (A).}
 evaluated across the full data. This object is passed in after being
 constructed by a call to the internal function \code{\link{est_samp}}.}
 
-\item{gn_cens_weights}{TODO: document}
+\item{gn_cens_weights}{An object providing the value of inverse probability
+of censoring weights, the inverse of the censoring mechanism estimate. The
+weights are used as part of the IPCW-TMLE procedure to implement a joint
+intervention that removes the contribution of the censoring process.}
 
 \item{Qn_estim}{An object providing the value of the outcome evaluated after
 imposing a shift in the treatment. This object is passed in after being
diff --git a/man/txshift.Rd b/man/txshift.Rd
index 2599b36..ce99fe8 100644
--- a/man/txshift.Rd
+++ b/man/txshift.Rd
@@ -15,7 +15,7 @@ txshift(
   estimator = c("tmle", "onestep"),
   fluctuation = c("standard", "weighted"),
   max_iter = 10,
-  gps_bound = 0.005,
+  gps_bound = 0.01,
   samp_fit_args = list(fit_type = c("glm", "sl", "external"), sl_learners = NULL),
   g_exp_fit_args = list(fit_type = c("hal", "sl", "external"), lambda_seq = exp(seq(-1,
     -13, length = 300)), sl_learners_density = NULL),
@@ -74,9 +74,9 @@ tilting regression.}
 to be taken in iterating to a solution of the efficient influence function.}
 
 \item{gps_bound}{\code{numeric} giving the lower limit of the generalized
-propensity score estimates to be tolerated (default = 0.05). Estimates less
-than this are truncated to this or 1/n. See \code{\link{bound_propensity}}
-for details.}
+propensity score estimates to be tolerated (default = 0.01). Estimates
+falling below this value are truncated to this or 1/n. For details, see
+\code{\link{bound_propensity}}.}
 
 \item{samp_fit_args}{A \code{list} of arguments, all but one of which are
 passed to \code{\link{est_samp}}. For details, consult the documentation of