Skip to content

Commit

Permalink
Merge pull request #115 from jdblischak/dt-mb_weight
Browse files Browse the repository at this point in the history
Convert mb_weight() to data.table
  • Loading branch information
nanxstats authored Oct 30, 2023
2 parents 0f06d96 + 7dd8b55 commit 7daf1e3
Show file tree
Hide file tree
Showing 15 changed files with 101 additions and 48 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: simtrial
Type: Package
Title: Clinical Trial Simulation
Version: 0.3.0.1
Version: 0.3.0.2
Authors@R: c(
person("Keaven", "Anderson", email = "keaven_anderson@merck.com", role = c("aut")),
person("Yilong", "Zhang", email = "elong0527@gmail.com", role = c("aut")),
Expand Down Expand Up @@ -48,7 +48,6 @@ Imports:
stats,
survival,
tibble,
tidyr,
utils
Suggests:
Matrix,
Expand All @@ -62,7 +61,8 @@ Suggests:
rmarkdown,
stringr,
survMisc,
testthat
testthat,
tidyr
LinkingTo:
Rcpp
Roxygen: list(markdown = TRUE)
Expand Down
5 changes: 0 additions & 5 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,10 @@ importFrom(data.table,setDF)
importFrom(data.table,setDT)
importFrom(data.table,setorderv)
importFrom(doFuture,"%dofuture%")
importFrom(dplyr,filter)
importFrom(dplyr,full_join)
importFrom(dplyr,group_by)
importFrom(dplyr,mutate)
importFrom(dplyr,right_join)
importFrom(dplyr,select)
importFrom(dplyr,starts_with)
importFrom(dplyr,summarize)
importFrom(future,plan)
importFrom(magrittr,"%>%")
importFrom(methods,is)
Expand All @@ -46,5 +42,4 @@ importFrom(mvtnorm,pmvnorm)
importFrom(survival,Surv)
importFrom(survival,is.Surv)
importFrom(tibble,tibble)
importFrom(tidyr,replace_na)
useDynLib(simtrial, .registration = TRUE)
45 changes: 20 additions & 25 deletions R/mb_weight.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@
#' Set `delay = Inf`, `w_max = 2` to be consistent with recommendation of
#' Magirr (2021).
#'
#' @return A vector with weights for the Magirr-Burman weighted logrank test
#' for the data in `x`.
#' @return A data frame. The column `mb_weight` contains the weights for the
#' Magirr-Burman weighted logrank test for the data in `x`.
#'
#' @details
#' We define \eqn{t^*} to be the input variable `delay`.
Expand All @@ -65,10 +65,6 @@
#' "Non‐proportional hazards in immuno‐oncology: Is an old perspective needed?"
#' _Pharmaceutical Statistics_ 20 (3): 512--527.
#'
#' @importFrom dplyr group_by summarize filter right_join
#' mutate full_join select
#' @importFrom tidyr replace_na
#'
#' @export
#'
#' @examples
Expand Down Expand Up @@ -141,26 +137,25 @@ mb_weight <- function(x, delay = 4, w_max = Inf) {
}

# Compute max weight by stratum
x2 <- x %>% group_by(stratum)
x2 <- as.data.table(x)
# Make sure you don't lose any stratum!
tbl_all_stratum <- x2 %>% summarize()
tbl_all_stratum <- data.table(stratum = unique(x2$stratum))

ans <- x2 %>%
# Look only up to delay time
filter(tte <= delay) %>%
# Weight before delay specified as 1/S
summarize(max_weight = max(1 / s)) %>%
# Get back stratum with no records before delay ends
right_join(tbl_all_stratum, by = "stratum") %>%
# `max_weight` is 1 when there are no records before delay ends
mutate(max_weight = replace_na(max_weight, 1)) %>%
# Cut off weights at w_max
mutate(max_weight = pmin(w_max, max_weight)) %>%
# Now merge max_weight back to stratified dataset
full_join(x2, by = "stratum") %>%
# Weight is min of max_weight and 1/S which will increase up to delay
mutate(mb_weight = pmin(max_weight, 1 / s)) %>%
select(-max_weight)
# Look only up to delay time
ans <- x2[tte <= delay, ]
# Weight before delay specified as 1/S
ans <- ans[, .(max_weight = max(1 / s)), by = "stratum"]
# Get back stratum with no records before delay ends
ans <- ans[tbl_all_stratum, on = "stratum"]
# `max_weight` is 1 when there are no records before delay ends
ans[, max_weight := ifelse(is.na(max_weight), 1, max_weight)]
# Cut off weights at w_max
ans[, max_weight := pmin(w_max, max_weight)]
# Now merge max_weight back to stratified dataset
ans <- merge.data.table(ans, x2, by = "stratum", all = TRUE)
# Weight is min of max_weight and 1/S which will increase up to delay
ans[, mb_weight := pmin(max_weight, 1 / s)]
ans[, max_weight := NULL]

ans
setDF(ans)
}
1 change: 0 additions & 1 deletion R/simfix2simPWSurv.R
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@
#' @export
#'
#' @examples
#' library(tidyr)
#' library(dplyr)
#' library(tibble)
#'
Expand Down
2 changes: 1 addition & 1 deletion data-raw/DATASET.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
## code to prepare `DATASET` dataset goes here
library(tidyr)
library(tibble)
set.seed(6671)
ds <- simPWSurv(
n = 200,
Expand Down
4 changes: 2 additions & 2 deletions man/mb_weight.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion man/simfix2simpwsurv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

31 changes: 31 additions & 0 deletions tests/testthat/generate-backwards-compatible-test-data.R
Original file line number Diff line number Diff line change
Expand Up @@ -269,4 +269,35 @@ generate_test_data <- function(
dropout_rate = dropout_rate
)
saveRDS(ex4, file.path(outdir, "sim_pw_surv_ex4.rds"))

# mb_weight() ----------------------------------------------------------------

# Use default enrollment and event rates at cut at 100 events
# For transparency, may be good to set either `delay` or `w_max` to `Inf`
set.seed(12345)
x <- sim_pw_surv(n = 200)
x <- cut_data_by_event(x, 125)
x <- counting_process(x, arm = "experimental")

# Example 1
# Compute Magirr-Burman weights with `delay = 6`
ZMB <- mb_weight(x, delay = 6, w_max = Inf)
S <- with(ZMB, sum(o_minus_e * mb_weight))
V <- with(ZMB, sum(var_o_minus_e * mb_weight^2))
z <- S / sqrt(V)

# Compute p-value of modestly weighted logrank of Magirr-Burman
ex1 <- pnorm(z)
saveRDS(ex1, file.path(outdir, "mb_weight_ex1.rds"))

# Example 2
# Now compute with maximum weight of 2 as recommended in Magirr, 2021
ZMB2 <- mb_weight(x, delay = Inf, w_max = 2)
S <- with(ZMB2, sum(o_minus_e * mb_weight))
V <- with(ZMB2, sum(var_o_minus_e * mb_weight^2))
z <- S / sqrt(V)

# Compute p-value of modestly weighted logrank of Magirr-Burman
ex2 <- pnorm(z)
saveRDS(ex2, file.path(outdir, "mb_weight_ex2.rds"))
}
34 changes: 34 additions & 0 deletions tests/testthat/test-backwards-compatibility.R
Original file line number Diff line number Diff line change
Expand Up @@ -279,3 +279,37 @@ test_that("sim_fixed_n()", {
expected <- readRDS("fixtures/backwards-compatibility/sim_fixed_n_ex3.rds")
expect_equal(observed, expected)
})

test_that("mb_weight()", {
# Use default enrollment and event rates at cut at 100 events
# For transparency, may be good to set either `delay` or `w_max` to `Inf`
set.seed(12345)
x <- sim_pw_surv(n = 200)
x <- cut_data_by_event(x, 125)
x <- counting_process(x, arm = "experimental")

# Example 1
# Compute Magirr-Burman weights with `delay = 6`
ZMB <- mb_weight(x, delay = 6, w_max = Inf)
S <- with(ZMB, sum(o_minus_e * mb_weight))
V <- with(ZMB, sum(var_o_minus_e * mb_weight^2))
z <- S / sqrt(V)

# Compute p-value of modestly weighted logrank of Magirr-Burman
observed <- pnorm(z)
expected <- readRDS("fixtures/backwards-compatibility/mb_weight_ex1.rds")
expect_equal(observed, expected)

# Example 2
# Now compute with maximum weight of 2 as recommended in Magirr, 2021
ZMB2 <- mb_weight(x, delay = Inf, w_max = 2)
S <- with(ZMB2, sum(o_minus_e * mb_weight))
V <- with(ZMB2, sum(var_o_minus_e * mb_weight^2))
z <- S / sqrt(V)

# Compute p-value of modestly weighted logrank of Magirr-Burman
observed <- pnorm(z)
expected <- readRDS("fixtures/backwards-compatibility/mb_weight_ex2.rds")
expect_equal(observed, expected)
})

4 changes: 2 additions & 2 deletions tests/testthat/test-double_programming_simPWSurv.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ x <- sim_pw_surv(
)

# prepare to test block
block1 <- x %>% filter(stratum == "Low")
block2 <- x %>% filter(stratum == "High")
block1 <- x %>% dplyr::filter(stratum == "Low")
block2 <- x %>% dplyr::filter(stratum == "High")
bktest1 <- c()
j <- 1
for (i in seq(1, floor(nrow(block1) / 4))) {
Expand Down
4 changes: 2 additions & 2 deletions tests/testthat/test-independent_test_cutData.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ test_that("x is a time-to-event data set", {
cut_date <- 10
xcut <- cut_data_by_date(x, cut_date)
test_that("only paitients recorded by cut_data_by_date are included", {
Npts <- dim(filter(x, enroll_time <= cut_date))[1]
Npts <- dim(dplyr::filter(x, enroll_time <= cut_date))[1]
Nptscut <- length(xcut$tte)
testthat::expect_equal(Npts, Nptscut)
})
Expand All @@ -25,6 +25,6 @@ test_that("Time to event (tte) is cut off at the cut_date", {

test_that("the event variable is calculated correctly", {
Nevent <- sum(x$fail * (x$cte <= cut_date))
Neventcut <- dim(filter(xcut, event == 1))[1]
Neventcut <- dim(dplyr::filter(xcut, event == 1))[1]
testthat::expect_equal(Nevent, Neventcut)
})
2 changes: 1 addition & 1 deletion tests/testthat/test-independent_test_getCutDateForCount.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ test_that("get_cut_date_by_event returns the correct cut date", {

x <- y %>%
dplyr::ungroup() %>%
filter(fail == 1) %>%
dplyr::filter(fail == 1) %>%
dplyr::arrange(cte) %>%
dplyr::slice(event)
expect_equal(x$cte, ycutdate)
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-independent_test_pvalue_maxcombo.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ testthat::test_that("the p-values correspond to pvalue_maxcombo", {
tst.rslt2 <- subset(tst.rsltt, grepl("FH", tst.rsltt$W))
cov.tst.rslt11 <- tst.rslt2$Var
d2$V <- cov.tst.rslt11
d1d2 <- full_join(d1, d2, by = c("X1", "X2"))
d1d2 <- dplyr::full_join(d1, d2, by = c("X1", "X2"))
cov.tst.rslt1 <- d1d2$V
cov.tst.1 <- matrix(NA, nrow = length(wt1), ncol = length(wt1))
cov.tst.1[lower.tri(cov.tst.1, diag = FALSE)] <- cov.tst.rslt1
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-independent_test_wlr.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ testthat::test_that("wlr calculated correct correlation value when input a seque
tst.rslt2 <- subset(tst.rsltt, grepl("FH", tst.rsltt$W))
cov.tst.rslt11 <- tst.rslt2$Var
d2$V <- cov.tst.rslt11
d1d2 <- full_join(d1, d2, by = c("X1", "X2"))
d1d2 <- dplyr::full_join(d1, d2, by = c("X1", "X2"))
cov.tst.rslt1 <- d1d2$V
cov.tst.1 <- matrix(NA, nrow = length(wt1), ncol = length(wt1))
cov.tst.1[lower.tri(cov.tst.1, diag = FALSE)] <- cov.tst.rslt1
Expand Down
6 changes: 3 additions & 3 deletions vignettes/modestWLRTVignette.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ Next, we consider the @Magirr2021 extension of the modestly weighted logrank tes
$$w(t, \tau, w_{\max}) = \min\left(w_{\max},\left(\frac{1}{S(\min(t,\tau))}\right)\right).$$

This requires generating weights and then computing the test.
We begin with the default of `w_max=\Inf` which corresponds to the original @MagirrBurman test and set the time until maximum weight $\tau$ with `delay = 6`.
We begin with the default of `w_max=Inf` which corresponds to the original @MagirrBurman test and set the time until maximum weight $\tau$ with `delay = 6`.

```{r}
ZMB <- MBdelay %>%
Expand Down Expand Up @@ -122,7 +122,7 @@ pnorm(Z_modified_FH$z)
### Freidlin and Korn strong null hypothesis example

The underlying survival is worse for the experimental group is uniformly worse for the experimental group until the very end of the study.
This was presented by @FKNPH2019. For this case, we have a hazard ratio of 16 for 1/10 of 1 year (1.2 months), followed by a hazard ratio of
This was presented by @FKNPH2019. For this case, we have a hazard ratio of 16 for 1/10 of 1 year (1.2 months), followed by a hazard ratio of 0.76 thereafter.

First, we specify study duration, sample size and enrollment rates. The enrollment rate is assumed constant during the enrollment period until the targeted sample size is reached.
For failure rates, we consider the delayed treatment effect example of @MagirrBurman.
Expand All @@ -144,7 +144,7 @@ fail_rate <- tibble::tibble(
)
```

Now we generate a single dataset with the above characteristics and cut data for analysis at 36 months post start of enrollment.
Now we generate a single dataset with the above characteristics and cut data for analysis at 5 years post start of enrollment.
Then we plot Kaplan-Meier curves for the resulting dataset (red curve for experimental treatment, black for control):

```{r, message=FALSE,warning=FALSE,fig.width=7.5, fig.height=4}
Expand Down

0 comments on commit 7daf1e3

Please sign in to comment.