From cb23a046beda3eea126555ff7bacc1e119b5be25 Mon Sep 17 00:00:00 2001 From: Alex Keil Date: Wed, 13 Sep 2023 22:22:44 -0400 Subject: [PATCH] added tests Cleaning up docstrings docs lsurvresp issue jackknife for KM jackknife for AJ --- src/LSurvival.jl | 1 + src/bootstrap.jl | 80 +- src/coxmodel.jl | 28 +- src/data_generators.jl | 8 +- src/docstr.jl | 1632 +++++++++++++++++++++------------------- src/jackknife.jl | 183 ++++- src/npsurvival.jl | 111 +-- src/shared_structs.jl | 38 +- test/runtests.jl | 102 ++- 9 files changed, 1229 insertions(+), 954 deletions(-) diff --git a/src/LSurvival.jl b/src/LSurvival.jl index bf1279b..42b3156 100644 --- a/src/LSurvival.jl +++ b/src/LSurvival.jl @@ -37,6 +37,7 @@ import StatsBase: #predict, predict!, response, score, + var, vcov, weights diff --git a/src/bootstrap.jl b/src/bootstrap.jl index 387290b..36e74ec 100644 --- a/src/bootstrap.jl +++ b/src/bootstrap.jl @@ -1,15 +1,5 @@ - """ -``` -id, int, outt, data = -LSurvival.dgm(MersenneTwister(1212), 20, 5; afun = LSurvival.int_0) - -d, X = data[:, 4], data[:, 1:3] -weights = rand(length(d)) - -# survival outcome: -R = LSurvivalResp(int, outt, d, ID.(id)) # specification with ID only -``` +$DOC_BOOTSTRAP_SURVRESP """ function bootstrap(rng::MersenneTwister, R::T) where {T<:LSurvivalResp} uid = unique(R.id) @@ -24,16 +14,9 @@ end bootstrap(R::T) where {T<:LSurvivalResp} = bootstrap(MersenneTwister(), R::T) + """ -``` -z,x,t,d,event,weights = -LSurvival.dgm_comprisk(MersenneTwister(1212), 300) -enter = zeros(length(event)) - -# survival outcome: -R = LSurvivalCompResp(enter, t, event, weights, ID.(collect(1:length(t)))) # specification with ID only -bootstrap(R) # note that entire observations/clusters identified by id are kept -``` +$DOC_BOOTSTRAP_SURVCOMPRESP """ function bootstrap(rng::MersenneTwister, R::T) where {T<:LSurvivalCompResp} uid = unique(R.id) @@ -47,27 +30,9 @@ function bootstrap(rng::MersenneTwister, R::T) where {T<:LSurvivalCompResp} end bootstrap( R::T) where {T<:LSurvivalCompResp} = bootstrap(MersenneTwister(), R::T) -""" -``` -using LSurvival, Random - -id, int, outt, data = -LSurvival.dgm(MersenneTwister(1212), 20, 5; afun = LSurvival.int_0) - -d, X = data[:, 4], data[:, 1:3] -weights = rand(length(d)) - -# survival outcome: -R = LSurvivalResp(int, outt, d, ID.(id)) # specification with ID only -P = PHParms(X) -idx, R2 = bootstrap(R) -P2 = bootstrap(idx, P) - -Mod = PHModel(R2, P2) -LSurvival._fit!(Mod, start=Mod.P._B) - -``` +""" +$DOC_BOOTSTRAP_PHPARMS """ function bootstrap(idx::Vector{Int}, P::PHParms) P2 = PHParms(P.X[idx, :]) @@ -118,6 +83,19 @@ function bootstrap(rng::MersenneTwister, m::M;kwargs...) where{M<:KMSurv} end bootstrap(m::M;kwargs...) where{M<:KMSurv} = bootstrap(MersenneTwister(), m;kwargs...) +function bootstrap(rng::MersenneTwister, m::M, iter::Int;kwargs...) where{M<:KMSurv} + if isnothing(m.R) + throw("Model is missing response matrix, use keepy=true for original fit") + end + res = zeros(iter, 1) + @inbounds for i = 1:iter + mb = bootstrap(rng, m) + LSurvival._fit!(mb; kwargs...) + res[i, :] = mb.surv[end:end] + end + res +end +bootstrap(m::M, iter::Int; kwargs...) where{M<:KMSurv} = bootstrap(MersenneTwister(), m, iter::Int;kwargs...) @@ -132,10 +110,6 @@ function bootstrap(rng::MersenneTwister, m::M;kwargs...) where{M<:AJSurv} end bootstrap(m::M; kwargs...) where{M<:AJSurv} = bootstrap(MersenneTwister(), m;kwargs...) - -""" -$DOC_BOOTSTRAP_AJSURV -""" function bootstrap(rng::MersenneTwister, m::M, iter::Int;kwargs...) where{M<:AJSurv} if isnothing(m.R) throw("Model is missing response matrix, use keepy=true for original fit") @@ -150,21 +124,3 @@ function bootstrap(rng::MersenneTwister, m::M, iter::Int;kwargs...) where{M<:AJS end bootstrap(m::M, iter::Int; kwargs...) where{M<:AJSurv} = bootstrap(MersenneTwister(), m, iter::Int;kwargs...) - - -""" -$DOC_BOOTSTRAP_KMSURV -""" -function bootstrap(rng::MersenneTwister, m::M, iter::Int;kwargs...) where{M<:KMSurv} - if isnothing(m.R) - throw("Model is missing response matrix, use keepy=true for original fit") - end - res = zeros(iter, 1) - @inbounds for i = 1:iter - mb = bootstrap(rng, m) - LSurvival._fit!(mb; kwargs...) - res[i, :] = mb.surv[end:end] - end - res -end -bootstrap(m::M, iter::Int; kwargs...) where{M<:KMSurv} = bootstrap(MersenneTwister(), m, iter::Int;kwargs...) diff --git a/src/coxmodel.jl b/src/coxmodel.jl index 9086566..05617ba 100644 --- a/src/coxmodel.jl +++ b/src/coxmodel.jl @@ -68,9 +68,6 @@ mutable struct PHModel{G<:LSurvivalResp,L<:AbstractLSurvivalParms} <: AbstractPH RL::Union{Nothing,Vector{Matrix{Float64}}} # residual matrix end -""" -$DOC_PHMODEL -""" function PHModel( R::Union{Nothing,G}, P::L, @@ -87,9 +84,6 @@ function PHModel( return PHModel(R, P, formula, ties, fit, bh, nothing) end -""" -$DOC_PHMODEL -""" function PHModel( R::Union{Nothing,G}, P::L, @@ -100,9 +94,6 @@ function PHModel( return PHModel(R, P, formula, ties, fit, zeros(Float64, length(R.eventtimes), 6)) end -""" -$DOC_PHMODEL -""" function PHModel( R::Union{Nothing,G}, P::L, @@ -112,9 +103,6 @@ function PHModel( return PHModel(R, P, formula, ties, false) end -""" -$DOC_PHMODEL -""" function PHModel( R::Union{Nothing,G}, P::L, @@ -123,9 +111,6 @@ function PHModel( return PHModel(R, P, nothing, ties, false) end -""" -$DOC_PHMODEL -""" function PHModel( R::Union{Nothing,G}, P::L, @@ -134,9 +119,6 @@ function PHModel( return PHModel(R, P, formula, "efron", false) end -""" -$DOC_PHMODEL -""" function PHModel(R::Union{Nothing,G}, P::L) where {G<:LSurvivalResp,L<:AbstractLSurvivalParms} return PHModel(R, P, "efron") end @@ -155,9 +137,6 @@ mutable struct PHSurv{G<:Array{T} where {T<:PHModel}} <: AbstractNPSurv fit::Bool end -""" -$DOC_PHSURV -""" function PHSurv(fitlist::Array{T}, eventtypes) where {T<:PHModel} bhlist = [ft.bh for ft in fitlist] bhlist = [hcat(bh, fill(eventtypes[i], size(bh, 1))) for (i, bh) in enumerate(bhlist)] @@ -171,9 +150,6 @@ function PHSurv(fitlist::Array{T}, eventtypes) where {T<:PHModel} PHSurv(fitlist, eventtypes, times, surv, risk, bh[:, 1], event, false) end -""" -$DOC_PHSURV -""" function PHSurv(fitlist::Array{T}) where {T<:PHModel} eventtypes = collect(eachindex(fitlist)) PHSurv(fitlist, eventtypes) @@ -827,7 +803,7 @@ end """ $DOC_FIT_PHSURV """ -function fit(::Type{M}, fitlist::Vector{<:T}, ; fitargs...) where {M<:PHSurv,T<:PHModel} +function fit(::Type{M}, fitlist::Vector{T}, ; fitargs...) where {M<:PHSurv,T<:PHModel} res = M(fitlist) @@ -837,7 +813,7 @@ end """ $DOC_FIT_PHSURV """ -risk_from_coxphmodels(fitlist::Array{T}, args...; kwargs...) where {T<:PHModel} = +risk_from_coxphmodels(fitlist::Vector{T}, args...; kwargs...) where {T<:PHModel} = fit(PHSurv, fitlist, args...; kwargs...) diff --git a/src/data_generators.jl b/src/data_generators.jl index ec91ced..41b65fc 100644 --- a/src/data_generators.jl +++ b/src/data_generators.jl @@ -20,7 +20,7 @@ end """ $DOC_DGM """ -function dgm(rng, n, maxT; afun = int_0, yfun = yprob, lfun = lprob) +function dgm(rng::MersenneTwister, n::Int, maxT::Int; afun = int_0, yfun = yprob, lfun = lprob) V = rand(rng, n) LAY = Array{Float64,2}(undef, n * maxT, 4) keep = ones(Bool, n * maxT) @@ -43,13 +43,13 @@ function dgm(rng, n, maxT; afun = int_0, yfun = yprob, lfun = lprob) end id[findall(keep)], time[findall(keep)] .- 1, time[findall(keep)], LAY[findall(keep), :] end -dgm(n, maxT; kwargs...) = dgm(MersenneTwister(), n, maxT; kwargs...) +dgm(n::Int, maxT::Int; kwargs...) = dgm(MersenneTwister(), n::Int, maxT::Int; kwargs...) """ $DOC_DGM_COMPRISK """ -function dgm_comprisk(rng, n) +function dgm_comprisk(rng::MersenneTwister, n::Int) z = rand(rng, n) .* 5 x = rand(rng, n) .* 5 dt1 = Weibull.(fill(0.75, n), inv.(exp.(-x .- z))) @@ -72,4 +72,4 @@ function dgm_comprisk(rng, n) event, round.(weights, digits = 4) end -dgm_comprisk(n) = dgm_comprisk(MersenneTwister(), n) +dgm_comprisk(n) = dgm_comprisk(MersenneTwister(), n) \ No newline at end of file diff --git a/src/docstr.jl b/src/docstr.jl index 051f819..47c745a 100644 --- a/src/docstr.jl +++ b/src/docstr.jl @@ -1,207 +1,207 @@ ####### module structs DOC_ABSTRACTLSURVRESP = raw""" - AbstractLsurvResp +AbstractLsurvResp - Abstract type representing a model response vector - """ +Abstract type representing a model response vector +""" DOC_ABSTRACTLSURVPARMS = raw""" - AbstractLsurvParms +AbstractLsurvParms - Abstract type representing a model predictors and coefficient parameters - """ +Abstract type representing a model predictors and coefficient parameters +""" DOC_ABSTRACTPH = raw""" - Abstract type for proportional hazards models - """ +Abstract type for proportional hazards models +""" DOC_ABSTRACTNPSURV = raw""" - Abstract type for non-parametric survival models, including Kaplan-Meier, Aalen Johansen, and Cox-model based estimates of survival using an Aalen-Johansen-like estimator - """ +Abstract type for non-parametric survival models, including Kaplan-Meier, Aalen Johansen, and Cox-model based estimates of survival using an Aalen-Johansen-like estimator +""" DOC_LSURVRESP = raw""" - Outcome type for survival outcome subject to left truncation and right censoring. - - Will not generally be needed by users - - Parameters - - `enter`: Time at observation start - - `exit`: Time at observation end - - `y`: event occurrence in observation - - `wts`: observation weights - - `eventtimes`: unique event times - - `origin`: origin on the time scale - - `id`: person level identifier (must be wrapped in ID() function) + Outcome type for survival outcome subject to left truncation and right censoring. + Will not generally be needed by users -```julia - struct LSurvivalResp{ - E<:AbstractVector, - X<:AbstractVector, - Y<:AbstractVector, - W<:AbstractVector, - T<:Real, - I<:AbstractLSurvivalID, - } <: AbstractLSurvivalResp - enter::E - exit::X - y::Y - wts::W - eventtimes::E - origin::T - id::Vector{I} - end - -``` - -```julia - LSurvivalResp( - enter::E, - exit::X, - y::Y, - wts::W, - id::Vector{I}, - ) where { - E<:Vector, - X<:Vector, - Y<:Union{Vector{<:Real},BitVector}, - W<:Vector, - I<:AbstractLSurvivalID, - } -``` + Parameters +- `enter`: Time at observation start +- `exit`: Time at observation end +- `y`: event occurrence in observation +- `wts`: observation weights +- `eventtimes`: unique event times +- `origin`: origin on the time scale +- `id`: person level identifier (must be wrapped in ID() function) + + + ```julia + struct LSurvivalResp{ + E<:AbstractVector, + X<:AbstractVector, + Y<:AbstractVector, + W<:AbstractVector, + T<:Real, + I<:AbstractLSurvivalID, + } <: AbstractLSurvivalResp + enter::E + exit::X + y::Y + wts::W + eventtimes::E + origin::T + id::Vector{I} + end + + ``` + + ```julia + LSurvivalResp( + enter::E, + exit::X, + y::Y, + wts::W, + id::Vector{I}, + ) where { + E<:Vector, + X<:Vector, + Y<:Union{Vector{<:Real},BitVector}, + W<:Vector, + I<:AbstractLSurvivalID, +} + ``` + + ```julia + LSurvivalResp( + enter::E, + exit::X, + y::Y, + id::Vector{I}, + ) + + ``` + + ```julia + LSurvivalResp( + y::Vector{Y}, + wts::W, + id::Vector{I}, + ) where {Y<:AbstractSurvTime,W<:Vector,I<:AbstractLSurvivalID} + ``` -```julia - LSurvivalResp( + ```julia + LSurvivalResp( enter::E, exit::X, y::Y, - id::Vector{I}, - ) - -``` - -```julia - LSurvivalResp( - y::Vector{Y}, - wts::W, - id::Vector{I}, - ) where {Y<:AbstractSurvTime,W<:Vector,I<:AbstractLSurvivalID} -``` + ) where {E<:Vector,X<:Vector,Y<:Union{Vector{<:Real},BitVector}} + ``` -```julia - LSurvivalResp( - enter::E, - exit::X, - y::Y, - ) where {E<:Vector,X<:Vector,Y<:Union{Vector{<:Real},BitVector}} -``` + ```julia + LSurvivalResp(exit::X, y::Y) where {X<:Vector,Y<:Vector} + ``` -```julia - LSurvivalResp(exit::X, y::Y) where {X<:Vector,Y<:Vector} -``` + # Examples - # Examples + ```julia + # no late entry + LSurvivalResp([.5, .6], [1,0]) -```julia - # no late entry - LSurvivalResp([.5, .6], [1,0]) + ``` -``` - - """ +""" DOC_LSURVCOMPRESP = raw""" - Outcome type for competing risk survival outcomes subject to left truncation and right censoring (not generally needed for users) - - Parameters - - `enter` Time at observation start - - `exit` Time at observation end - - `y` event occurrence in observation - - `wts` observation weights - - `eventtimes` unique event times - - `origin` origin on the time scale - - `id` person level identifier (must be wrapped in ID() function) - - `eventtypes` vector of unique event types - - `eventmatrix` matrix of indicators on the observation level - - # Signatures: - -```julia - struct LSurvivalCompResp{ - E<:AbstractVector, - X<:AbstractVector, - Y<:AbstractVector, - W<:AbstractVector, - T<:Real, - I<:AbstractLSurvivalID, - V<:AbstractVector, - M<:AbstractMatrix, - } <: AbstractLSurvivalResp - enter::E - exit::X - y::Y - wts::W - eventtimes::X - origin::T - id::Vector{I} - eventtypes::V - eventmatrix::M - end -``` - -```julia - LSurvivalCompResp( - enter::E, - exit::X, - y::Y, - wts::W, - id::Vector{I} - ) -``` + Outcome type for competing risk survival outcomes subject to left truncation and right censoring (not generally needed for users) -```julia - LSurvivalCompResp( - enter::E, - exit::X, - y::Y, - id::Vector{I} - ) -``` + Parameters +- `enter` Time at observation start +- `exit` Time at observation end +- `y` event occurrence in observation +- `wts` observation weights +- `eventtimes` unique event times +- `origin` origin on the time scale +- `id` person level identifier (must be wrapped in ID() function) +- `eventtypes` vector of unique event types +- `eventmatrix` matrix of indicators on the observation level + + # ## Signatures: + + ```julia + struct LSurvivalCompResp{ + E<:AbstractVector, + X<:AbstractVector, + Y<:AbstractVector, + W<:AbstractVector, + T<:Real, + I<:AbstractLSurvivalID, + V<:AbstractVector, + M<:AbstractMatrix, + } <: AbstractLSurvivalResp + enter::E + exit::X + y::Y + wts::W + eventtimes::X + origin::T + id::Vector{I} + eventtypes::V + eventmatrix::M + end + ``` + + ```julia + LSurvivalCompResp( + enter::E, + exit::X, + y::Y, + wts::W, + id::Vector{I} + ) + ``` + + ```julia + LSurvivalCompResp( + enter::E, + exit::X, + y::Y, + id::Vector{I} + ) + ``` + + ```julia + LSurvivalCompResp( + enter::E, + exit::X, + y::Y, + wts::W, + ) + ``` -```julia - LSurvivalCompResp( - enter::E, - exit::X, - y::Y, - wts::W, - ) -``` + ```julia + LSurvivalCompResp( + enter::E, + exit::X, + y::Y, + ) + ``` -```julia - LSurvivalCompResp( - enter::E, + ```julia + LSurvivalCompResp( exit::X, y::Y, - ) -``` - -```julia - LSurvivalCompResp( - exit::X, - y::Y, - ) where {X<:Vector,Y<:Union{Vector{<:Real},BitVector}} -``` - """ + ) where {X<:Vector,Y<:Union{Vector{<:Real},BitVector}} + ``` +""" DOC_CONFINT = raw""" ```julia using LSurvival dat1= ( - time = [1,1,6,6,8,9], - status = [1,0,1,1,0,1], - x = [1,1,1,0,0,0] + time = [1,1,6,6,8,9], + status = [1,0,1,1,0,1], + x = [1,1,1,0,0,0] ) ft = coxph(@formula(Surv(time, status) ~ x),dat1, keepx=true) @@ -216,11 +216,11 @@ using LSurvival ```julia dat1clust= ( - id = [1,2,3,3,4,4,5,5,6,6], - enter = [0,0,0,1,0,1,0,1,0,1], - exit = [1,1,1,6,1,6,1,8,1,9], - status = [1,0,0,1,0,1,0,0,0,1], - x = [1,1,1,1,0,0,0,0,0,0] + id = [1,2,3,3,4,4,5,5,6,6], + enter = [0,0,0,1,0,1,0,1,0,1], + exit = [1,1,1,6,1,6,1,8,1,9], + status = [1,0,0,1,0,1,0,0,0,1], + x = [1,1,1,1,0,0,0,0,0,0] ) ft2 = coxph(@formula(Surv(enter, exit, status) ~ x),dat1clust, id=ID.(dat1clust.id), keepx=true) @@ -232,119 +232,119 @@ using LSurvival ```` - """ +""" DOC_PHMODEL = raw""" - PHModel: Mutable object type for proportional hazards regression (not generally needed for users) - - Parameters - - `R` Survival response - - `P` # parameters - - `ties` String: "efron" or "breslow" - - `fit` Bool: logical for whether the model has been fitted - - `bh` AbstractMatrix: baseline hazard estimates + PHModel: Mutable object type for proportional hazards regression (not generally needed for users) - # Signatures - -```julia - mutable struct PHModel{G<:LSurvivalResp,L<:AbstractLSurvivalParms} <: AbstractPH - R::G # Survival response - P::L # parameters - ties::String #"efron" or"breslow" - fit::Bool - bh::AbstractMatrix - end - - PHModel( - R::G, - P::L, - ties::String, - fit::Bool, - ) where {G<:LSurvivalResp,L<:AbstractLSurvivalParms} - PHModel(R::G, P::L, ties::String) where {G<:LSurvivalResp,L<:AbstractLSurvivalParms} - PHModel(R::G, P::L) where {G<:LSurvivalResp,L<:AbstractLSurvivalParms} -``` - Methods: fit, coef, confint, std_err, show + Parameters +- `R` Survival response +- `P` # parameters +- `ties` String: "efron" or "breslow" +- `fit` Bool: logical for whether the model has been fitted +- `bh` AbstractMatrix: baseline hazard estimates + + # ## Signatures + + ```julia + mutable struct PHModel{G<:LSurvivalResp,L<:AbstractLSurvivalParms} <: AbstractPH + R::G # Survival response + P::L # parameters + ties::String #"efron" or"breslow" + fit::Bool + bh::AbstractMatrix + end + + PHModel( + R::G, + P::L, + ties::String, + fit::Bool, + ) where {G<:LSurvivalResp,L<:AbstractLSurvivalParms} + PHModel(R::G, P::L, ties::String) where {G<:LSurvivalResp,L<:AbstractLSurvivalParms} + PHModel(R::G, P::L) where {G<:LSurvivalResp,L<:AbstractLSurvivalParms} + ``` + Methods: fit, coef, confint, std_err, show - # Example + # Example -```@example - using LSurvival - using Random - import LSurvival: _stepcox!, dgm_comprisk - - z,x,t,d, event,wt = dgm_comprisk(MersenneTwister(1212), 100); - enter = zeros(length(t)); - X = hcat(x,z); - R = LSurvivalResp(enter, t, Int.(d), wt) - P = PHParms(X) - mf = PHModel(R,P) - LSurvival._fit!(mf) -``` - """ + ```@example + using LSurvival + using Random + import LSurvival: _stepcox!, dgm_comprisk + + z,x,t,d, event,wt = dgm_comprisk(MersenneTwister(1212), 100); + enter = zeros(length(t)); + X = hcat(x,z); + R = LSurvivalResp(enter, t, Int.(d), wt) + P = PHParms(X) + mf = PHModel(R,P) + LSurvival._fit!(mf) + ``` +""" DOC_PHSURV = raw""" - Mutable type for proportional hazards models (not generally needed by users) +Mutable type for proportional hazards models (not generally needed by users) - PHSsurv: Object type for proportional hazards regression +PHSsurv: Object type for proportional hazards regression - surv::Vector{Float64} - risk::Matrix{Float64} - basehaz::Vector{Float64} - event::Vector{Float64} +surv::Vector{Float64} +risk::Matrix{Float64} +basehaz::Vector{Float64} +event::Vector{Float64} - - `fitlist`: vector of PHSurv objects (Cox model fits) - - `eventtypes`: vector of unique event types - - `times`: unique event times - - `surv`: Overall survival at each time - - `risk`: Cause-specific risk at each time (1 for each outcome type) - - `basehaz`: baseline hazard for a specific event type - - `event`: value of event type that occurred at each time +- `fitlist`: vector of PHSurv objects (Cox model fits) +- `eventtypes`: vector of unique event types +- `times`: unique event times +- `surv`: Overall survival at each time +- `risk`: Cause-specific risk at each time (1 for each outcome type) +- `basehaz`: baseline hazard for a specific event type +- `event`: value of event type that occurred at each time - Methods: fit, show +Methods: fit, show ```julia - mutable struct PHSurv{G<:Array{T} where {T<:PHModel}} <: AbstractNPSurv - fitlist::G - eventtypes::AbstractVector - times::AbstractVector - surv::Vector{Float64} - risk::Matrix{Float64} - basehaz::Vector{Float64} - event::Vector{Float64} - end - - PHSurv(fitlist::Array{T}, eventtypes) where {T<:PHModel} - PHSurv(fitlist::Array{T}) where {T<:PHModel} +mutable struct PHSurv{G<:Array{T} where {T<:PHModel}} <: AbstractNPSurv +fitlist::G +eventtypes::AbstractVector +times::AbstractVector +surv::Vector{Float64} +risk::Matrix{Float64} +basehaz::Vector{Float64} +event::Vector{Float64} +end + +PHSurv(fitlist::Array{T}, eventtypes) where {T<:PHModel} +PHSurv(fitlist::Array{T}) where {T<:PHModel} ``` - """ +""" DOC_ID = raw""" - Type for identifying individuals in survival outcomes. +Type for identifying individuals in survival outcomes. - Used for the id argument in - - Outcome types: LSurvivalResp, LSurvivalCompResp - - Model types: PHModel, KMRisk, AJRisk +Used for the id argument in + - Outcome types: LSurvivalResp, LSurvivalCompResp + - Model types: PHModel, KMRisk, AJRisk Accepts any Number or String. There is no significance to having this particular struct, but it enables easier use of multiple dispatch. ```@example - [ID(i) for i in 1:10] +[ID(i) for i in 1:10] ``` - """ +""" DOC_STRATA = raw""" - Type for identifying individuals in survival outcomes. - Used for the strata argument in PHModel (not yet implemented) +Type for identifying individuals in survival outcomes. +Used for the strata argument in PHModel (not yet implemented) - Accepts any Number or String. There is no significance to having this particular struct, but it enables easier use of multiple dispatch. +Accepts any Number or String. There is no significance to having this particular struct, but it enables easier use of multiple dispatch. ```julia - [Strata(i) for i in 1:10] +[Strata(i) for i in 1:10] ``` - """ +""" ####### Primary methods @@ -354,78 +354,78 @@ DOC_COXPH = raw""" Alias for`fit(PHModel, ..., )`. - Also see ?fit section on "Fit method for AbstractPH objects" +Also see ?fit section on "Fit method for AbstractPH objects" - Signatures + ## Signatures ## using matrix/vector interface ```julia - coxph(X::AbstractMatrix, enter::AbstractVector, exit::AbstractVector, y::AbstractVector; ) - coxph(X::AbstractMatrix, exit::AbstractVector, y::AbstractVector; ) # enter assumed to be 0 - coxph(X::AbstractMatrix, exit::AbstractVector; ) # enter assumed to be 0, y assumed to be 1 +coxph(X::AbstractMatrix, enter::AbstractVector, exit::AbstractVector, y::AbstractVector; ) +coxph(X::AbstractMatrix, exit::AbstractVector, y::AbstractVector; ) # enter assumed to be 0 +coxph(X::AbstractMatrix, exit::AbstractVector; ) # enter assumed to be 0, y assumed to be 1 ``` - Parameters - - `X`: a design matrix (matrix of predictors) - - `enter`: Time at observation start - - `exit`: Time at observation end - - `y`: event occurrence in observation - - Keyword parameters - - `ties` method for ties ("efron" or "breslow") - - `wts` vector of observation weights - - `keepx` (boolean) keep the design matrix after fitting the model? (default `true`, `false` can be used to save space) - - `keepy` (boolean) keep the outcome vector after fitting the model? (default `true`, `false` can be used to save space) - - See [https://en.wikipedia.org/wiki/Proportional_hazards_model#Likelihood_when_there_exist_tied_times](for the log-partial liklihoods given for Efron's and Breslow's method) - +Parameters +- `X`: a design matrix (matrix of predictors) +- `enter`: Time at observation start +- `exit`: Time at observation end +- `y`: event occurrence in observation + +Keyword parameters +- `ties` method for ties ("efron" or "breslow") +- `wts` vector of observation weights +- `keepx` (boolean) keep the design matrix after fitting the model? (default `true`, `false` can be used to save space) +- `keepy` (boolean) keep the outcome vector after fitting the model? (default `true`, `false` can be used to save space) + +See [https://en.wikipedia.org/wiki/Proportional_hazards_model#Likelihood_when_there_exist_tied_times](for the log-partial liklihoods given for Efron's and Breslow's method) - ## using StatsAPI `@formula` interface + +## using StatsAPI `@formula` interface ```julia - coxph(f::Formula, dat; ) +coxph(f::Formula, dat; ) ``` - Parameters - - `f`: a `@formula` object - - `dat`: a Tables.jl compatible table +Parameters +- `f`: a `@formula` object +- `dat`: a Tables.jl compatible table - Keyword parameters - - `ties` method for ties ("efron" or "breslow") - - `wts` vector of observation weights - - `keepx` (boolean) keep the design matrix after fitting the model? (default `true`, `false` can be used to save space) - - `keepy` (boolean) keep the outcome vector after fitting the model? (default `true`, `false` can be used to save space) - - `contrasts` an optional Dict used to process the columns in `dat` (CF: See the contrasts argument in GLM.glm, and https://juliastats.org/StatsModels.jl/stable/contrasts/) +Keyword parameters +- `ties` method for ties ("efron" or "breslow") +- `wts` vector of observation weights +- `keepx` (boolean) keep the design matrix after fitting the model? (default `true`, `false` can be used to save space) +- `keepy` (boolean) keep the outcome vector after fitting the model? (default `true`, `false` can be used to save space) +- `contrasts` an optional Dict used to process the columns in `dat` (CF: See the contrasts argument in GLM.glm, and https://juliastats.org/StatsModels.jl/stable/contrasts/) # Example ```julia - using LSurvival - using Random - z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 200); - enter = zeros(length(t)); - X = hcat(x,z); - tab = (enter=enter, t=t, d=d, x=x[:],z=z[:]) - - # three identical ways to fit the model - m1 = fit(PHModel, X, enter, t, d, ties="breslow") - m2 = coxph(X, enter, t, d, ties="breslow", keepx=false, keepy=false) - m3 = coxph(@formula(Surv(enter, t, d)~x+z), tab, ties="breslow", keepx=true, keepy=true) - coeftable(m) - stderror(m3) - stderror(m3, type="robust") - confint(m3, type="robust") +using LSurvival +using Random +z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 200); +enter = zeros(length(t)); +X = hcat(x,z); +tab = (enter=enter, t=t, d=d, x=x[:],z=z[:]) + +# three identical ways to fit the model +m1 = fit(PHModel, X, enter, t, d, ties="breslow") +m2 = coxph(X, enter, t, d, ties="breslow", keepx=false, keepy=false) +m3 = coxph(@formula(Surv(enter, t, d)~x+z), tab, ties="breslow", keepx=true, keepy=true) +coeftable(m) +stderror(m3) +stderror(m3, type="robust") +confint(m3, type="robust") ``` # cluster robust standard errors using the `id` keyword argument ```julia dat1clust= ( - id = [1,2,3,3,4,4,5,5,6,6], - enter = [0,0,0,1,0,1,0,1,0,1], - exit = [1,1,1,6,1,6,1,8,1,9], - status = [1,0,0,1,0,1,0,0,0,1], - x = [1,1,1,1,0,0,0,0,0,0] + id = [1,2,3,3,4,4,5,5,6,6], + enter = [0,0,0,1,0,1,0,1,0,1], + exit = [1,1,1,6,1,6,1,8,1,9], + status = [1,0,0,1,0,1,0,0,0,1], + x = [1,1,1,1,0,0,0,0,0,0] ) ft2 = coxph(@formula(Surv(enter, exit, status) ~ x),dat1clust, id=ID.(dat1clust.id), keepx=true) @@ -440,84 +440,83 @@ DOC_COXPH = raw""" # once robust SE is calculated, coefficient table uses the robust SE for confidence intervals and test statistics ft2 ``` - """ +""" DOC_FIT_PHSURV = raw""" - Competing risk models: +Competing risk models: - Calculate survival curve and cumulative incidence (risk) function, get a set of Cox models (PHModel objects) that are exhaustive for the outcome types +Calculate survival curve and cumulative incidence (risk) function, get a set of Cox models (PHModel objects) that are exhaustive for the outcome types ```julia - fit(::Type{M}, - fitlist::AbstractVector{<:T}, - ; - fitargs...) where {M<:PHSurv, T <: PHModel} +fit(::Type{M}, +fitlist::AbstractVector{<:T}, +; +fitargs...) where {M<:PHSurv, T <: PHModel} ``` - OR +OR ```julia - risk_from_coxphmodels(fitlist::Array{T}, args...; kwargs...) where T <: PHModel +risk_from_coxphmodels(fitlist::Array{T}, args...; kwargs...) where T <: PHModel ``` - - fit for PHSurv objects +fit for PHSurv objects ```@example - using LSurvival - using Random - z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); - enter = zeros(length(t)); - X = hcat(x,rand(length(x))); - - ft1 = coxph(X, enter, t, (event .== 1), ties="breslow"); - nft2 = findall(event .!= 1) - ft2 = coxph(X[nft2,:], enter[nft2], t[nft2], (event .== 2)[nft2], ties="breslow"); - fitlist = [ft1, ft2] - - # Risk at x=0, z=0 (referent values) - # these are equivalent - res = fit(PHSurv, [ft1, ft2]) - res2 = risk_from_coxphmodels([ft1, ft2]) - - # Risk at x=1, z=0.5 - res3 = risk_from_coxphmodels([ft1, ft2], pred_profile=[1.0, 0.5]) - +using LSurvival +using Random + z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); + enter = zeros(length(t)); + X = hcat(x,rand(length(x))); + + ft1 = coxph(X, enter, t, (event .== 1), ties="breslow"); + nft2 = findall(event .!= 1) + ft2 = coxph(X[nft2,:], enter[nft2], t[nft2], (event .== 2)[nft2], ties="breslow"); + fitlist = [ft1, ft2] + +# Risk at x=0, z=0 (referent values) +# these are equivalent + res = fit(PHSurv, [ft1, ft2]) + res2 = risk_from_coxphmodels([ft1, ft2]) + +# Risk at x=1, z=0.5 + res3 = risk_from_coxphmodels([ft1, ft2], pred_profile=[1.0, 0.5]) + ``` - """ +""" DOC_E_YEARSOFLIFELOST = raw""" - # Deprecated function +# Deprecated function - Expected number of years of life lost due to cause k +Expected number of years of life lost due to cause k ```julia - using Distributions, Plots, Random - plotly() - z,x,t,d, event,weights = dgm_comprisk(n=200, rng=MersenneTwister(1232)); - - times_sd, cumhaz, ci_sd = subdistribution_hazard_cuminc(zeros(length(t)), t, event, dvalues=[1.0, 2.0]); - times_aj, S, ajest, riskset, events = aalen_johansen(zeros(length(t)), t, event, dvalues=[1.0, 2.0]); - time0, eyll0 = e_yearsoflifelost(times_aj, 1.0 .- S) - time2, eyll1 = e_yearsoflifelost(times_aj, ajest[:,1]) - time1, eyll2 = e_yearsoflifelost(times_sd, ci_sd) - # CI estimates - plot(times_aj, ajest[:,1], label="AJ", st=:step); - plot!(times_sd, ci_sd, label="SD", st=:step) - # expected years of life lost by time k, given a specific cause or overall - plot(time0, eyll0, label="Overall", st=:step); - plot!(time1, eyll1, label="AJ", st=:step); - plot!(time2, eyll2, label="SD", st=:step) +using Distributions, Plots, Random +plotly() +z,x,t,d, event,weights = dgm_comprisk(n=200, rng=MersenneTwister(1232)); + +times_sd, cumhaz, ci_sd = subdistribution_hazard_cuminc(zeros(length(t)), t, event, dvalues=[1.0, 2.0]); +times_aj, S, ajest, riskset, events = aalen_johansen(zeros(length(t)), t, event, dvalues=[1.0, 2.0]); +time0, eyll0 = e_yearsoflifelost(times_aj, 1.0 .- S) +time2, eyll1 = e_yearsoflifelost(times_aj, ajest[:,1]) +time1, eyll2 = e_yearsoflifelost(times_sd, ci_sd) + # CI estimates +plot(times_aj, ajest[:,1], label="AJ", st=:step); +plot!(times_sd, ci_sd, label="SD", st=:step) + # expected years of life lost by time k, given a specific cause or overall +plot(time0, eyll0, label="Overall", st=:step); +plot!(time1, eyll1, label="AJ", st=:step); +plot!(time2, eyll2, label="SD", st=:step) ``` - """ +""" ####### generic methods DOC_FIT_ABSTRACPH = raw""" Fit method for AbstractPH objects (Cox models) -Keyword arguments (used here, and passed on to internal structs) + Keyword arguments (used here, and passed on to internal structs) - `ties` "breslow" or "efron" (default) - `wts` observation weights @@ -527,7 +526,8 @@ Keyword arguments (used here, and passed on to internal structs) - `id` cluster or individual level ID (defaults to a unique value for each row of data) see note below on ID - `contrasts` StatsModel style contrasts (dicts) that can be used for variable transformations/indicator variable creation (e.g. https://juliastats.org/StatsModels.jl/stable/contrasts/) -```julia + ## Signatures + ```julia fit(::Type{M}, X::AbstractMatrix,#{<:FP}, enter::AbstractVector{<:Real}, @@ -538,16 +538,17 @@ Keyword arguments (used here, and passed on to internal structs) wts::AbstractVector{<:Real} = similar(y, 0), offset::AbstractVector{<:Real} = similar(y, 0), fitargs...) where {M<:AbstractPH} -``` + ``` -``` + ``` coxph(f::FormulaTerm, data; kwargs...) -``` -``` -coxph(X, enter, exit, y, args...; kwargs...) -``` + ``` -```julia + ``` + coxph(X, enter, exit, y, args...; kwargs...) + ``` + + ```julia using LSurvival, Random z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); enter = zeros(length(t)); @@ -555,14 +556,13 @@ coxph(X, enter, exit, y, args...; kwargs...) m = fit(PHModel, X, enter, t, d, ties="efron") m2 = fit(PHModel, X, enter, t, d, ties="breslow") coeftable(m) -``` + ``` ## Note on use of `id` keyword + `id` is not needed in person-period structure data for standard estimates or confidence intervals -`id` is not needed in person-period structure data for standard estimates or confidence intervals - -```@example + ```@example using Random, LSurvival id, int, outt, dat = LSurvival.dgm(MersenneTwister(123123), 100, 100; afun = LSurvival.int_0) @@ -576,28 +576,28 @@ coxph(X, enter, exit, y, args...; kwargs...) f = @formula(Surv(int, outt,d)~x+z) coxph(f, data) -``` + ``` # BUT, you must specify `id` to get appropriate robust variance and some other statistics. Here is an example where the same data are presented in two different ways, which should yield identical statistics when used in Cox model. -```@example + ```@example dat1 = ( time = [1,1,6,6,8,9], status = [1,0,1,1,0,1], x = [1,1,1,0,0,0] -) -ft = coxph(@formula(Surv(time,status)~x),dat1) -bic(ft) -nobs(ft) -dof_residual(ft) -# lrtest is another one - -stderror(ft) # model based -stderror(ft, type="robust") # robust standard error, based on dfbeta residuals -ft + ) + ft = coxph(@formula(Surv(time,status)~x),dat1) + bic(ft) + nobs(ft) + dof_residual(ft) + # lrtest is another one -# now using "clustered" data with multiple observations per individual + stderror(ft) # model based + stderror(ft, type="robust") # robust standard error, based on dfbeta residuals + ft + + # now using "clustered" data with multiple observations per individual dat1clust= ( id = [1,2,3,3,4,4,5,5,6,6], enter = [0,0,0,1,0,1,0,1,0,1], @@ -616,11 +616,11 @@ ft stderror(ft2, type="robust") # robust standard error, based on `id` level dfbeta residuals (CORRECT) # once robust SE is calculated, coefficient table uses the robust SE for confidence intervals and test statistics ft2 # CORRECT (compare to `ft` object) -``` + ``` ## NOTE THE FOLLOWING IS INCORRECT because the `id` keyword is omitted -```@example + ```@example ft2w = coxph(@formula(Surv(enter, exit, status) ~ x),dat1clust) bic(ft2w) # INCORRECT nobs(ft2w) # INCORRECT @@ -631,108 +631,140 @@ ft ft2w # the coefficient table now shows incorrect confidence intervals and test statistics -``` - """ + ``` +""" DOC_FIT_KMSURV = raw""" Kaplan-Meier estimator for cumulative conditional risk - Signatures + ## Signatures ```julia - StatsBase.fit!(m::T; kwargs...) where {T<:AbstractNPSurv} +StatsBase.fit!(m::T; kwargs...) where {T<:AbstractNPSurv} - kaplan_meier(enter::AbstractVector, exit::AbstractVector, y::AbstractVector, - ; ) +kaplan_meier(enter::AbstractVector, exit::AbstractVector, y::AbstractVector, + ; kwargs...) ``` +## Keyword arguments +- wts::Vector{<:Real} = similar(enter, 0); vector of case weights (or zero length vector) for each observation +- id::Vector{<:AbstractLSurvivalID} = [ID(i) for i in eachindex(y)]; Vector of AbstractSurvID objects denoting observations that form a single unit (used in bootstrap and jackknife methods) +- atol = 0.00000001; absolute tolerance for defining tied event times +- censval = 0; value of the outcome to be considered a censored event +- keepy = true; keep the outcome vector after fitting (may save memory with large datasets) +- eps = 0.00000001; deprecated (replaced by atol) + + + ```@example - using LSurvival - using Random - z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); - enter = zeros(length(t)); - m = fit(KMSurv, enter, t, d) - mw = fit(KMSurv, enter, t, d, wts=wt) +using LSurvival +using Random +z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); +enter = zeros(length(t)); +m = fit(KMSurv, enter, t, d) +mw = fit(KMSurv, enter, t, d, wts=wt) ``` - or, equivalently: +or, equivalently: ```julia - kaplan_meier(enter, t, d, wts=wt) +kaplan_meier(enter, t, d, wts=wt) ``` - """ +""" DOC_FIT_AJSURV = raw""" - Aalen-Johansen estimator for cumulative cause-specific risk (in the presence of competing events) +Aalen-Johansen estimator for cumulative cause-specific risk (in the presence of competing events) - Signatures +## Signatures ```julia - StatsBase.fit!(m::T; kwargs...) where {T<:AbstractNPSurv} - - aalen_johansen(enter::AbstractVector, exit::AbstractVector, y::AbstractVector, - ; ) - + StatsBase.fit!(m::T; kwargs...) where {T<:AbstractNPSurv} + + aalen_johansen(enter::AbstractVector, exit::AbstractVector, y::AbstractVector, + ; kwargs...) + ``` - + +## Keyword arguments +- wts::Vector{<:Real} = similar(enter, 0); vector of case weights (or zero length vector) for each observation +- id::Vector{<:AbstractLSurvivalID} = [ID(i) for i in eachindex(y)]; Vector of AbstractSurvID objects denoting observations that form a single unit (used in bootstrap and jackknife methods) +- atol = 0.00000001; absolute tolerance for defining tied event times +- keepy = true; keep the outcome vector after fitting (may save memory with large datasets) +- eps = 0.00000001; deprecated (replaced by atol) + ```@example - using LSurvival - using Random - z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); - enter = zeros(length(t)); - # event variable is coded 0[referent],1,2 - m = fit(AJSurv, enter, t, event) - mw = fit(AJSurv, enter, t, event, wts=wt) +using LSurvival +using Random +z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); +enter = zeros(length(t)); + # event variable is coded 0[referent],1,2 +m = fit(AJSurv, enter, t, event) +mw = fit(AJSurv, enter, t, event, wts=wt) ``` - or, equivalently: +or, equivalently: ```julia - aalen_johansen(enter, t, event, wts=wt) +aalen_johansen(enter, t, event, wts=wt) ``` - """ +""" DOC_VARIANCE_KMSURV = raw""" - Greenwood's formula for variance and confidence intervals of a Kaplan-Meier survival curve +Greenwood's formula for variance and confidence intervals of a Kaplan-Meier survival curve - Signatures: + ## Signatures: ```julia - StatsBase.stderror(m::KMSurv) -``` +StatsBase.stderror(m::KMSurv) -```julia - StatsBase.confint(m:KMSurv; level=0.95, method="normal") +StatsBase.confint(m:KMSurv; level=0.95, method="normal") ``` - method: - - "normal" normality-based confidence intervals - - "lognlog" log(-log(S(t))) based confidence intervals + +## Keyword arguments + +method: + - "normal" normality-based confidence intervals + - "lognlog" log(-log(S(t))) based confidence intervals ```@example - using LSurvival - using Random - z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); - enter = zeros(length(t)); - m = fit(KMSurv, enter, t, d) - mw = fit(KMSurv, enter, t, d, wts=wt) - stderror(m) - confint(m, method="normal") - confint(m, method="lognlog") # log-log transformation +using LSurvival +using Random +z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); +enter = zeros(length(t)); +m = fit(KMSurv, enter, t, d) +mw = fit(KMSurv, enter, t, d, wts=wt) +stderror(m) +confint(m, method="normal") +confint(m, method="lognlog") # log-log transformation ``` - """ +""" DOC_VARIANCE_AJSURV = raw""" - Greenwood's formula for variance and confidence intervals of a Aalen-Johansen risk function +Greenwood's formula for variance and confidence intervals of a Aalen-Johansen risk function + +## Signatures: + +```julia +StatsBase.stderror(m::AJSurv) + +StatsBase.confint(m:AJSurv; level=0.95, method="normal") +``` + +## Keyword arguments +- method + - "normal" normality-based confidence intervals + - "lognlog" log(-log(S(t))) based confidence intervals + ```@example - res = z, x, outt, d, event, wts = LSurvival.dgm_comprisk(MersenneTwister(123123), 100) - int = zeros(length(d)) # no late entry - m = fit(AJSurv, int, outt, event) - stderror(m) - confint(m, level=0.95) +res = z, x, outt, d, event, wts = LSurvival.dgm_comprisk(MersenneTwister(123123), 100) +int = zeros(length(d)) # no late entry +m = fit(AJSurv, int, outt, event) +stderror(m) +confint(m, level=0.95) ``` - """ +""" @@ -746,13 +778,13 @@ DOC_LGH_BRESLOW = raw""" Updates over all observations - Signature + ## Signature ```julia - lgh_breslow!(m::M, j, caseidx, risksetidx) where {M<:AbstractPH} +lgh_breslow!(m::M, j, caseidx, risksetidx) where {M<:AbstractPH} ``` - - """ + +""" DOC_LGH_EFRON = raw""" Update the partial likelihood, gradient and Hessian values from a Cox model fit (used during fitting, not generally useful for users). @@ -761,12 +793,12 @@ DOC_LGH_EFRON = raw""" Updates over all observations - Signature + ## Signature ```julia - lgh_efron!(m::M, j, caseidx, risksetidx) where {M<:AbstractPH} +lgh_efron!(m::M, j, caseidx, risksetidx) where {M<:AbstractPH} ``` - """ +""" DOC_LGH = raw""" Update the partial likelihood, gradient and Hessian values from a Cox model fit (used during fitting, not generally useful for users). @@ -775,12 +807,12 @@ DOC_LGH = raw""" Updates over all a single observation. This is just a simple wrapper that calls `lgh_breslow!` or `lgh_efron!` - Signature + ## Signature ```julia - lgh!(m::M, j, caseidx, risksetidx) where {M<:AbstractPH} +lgh!(m::M, j, caseidx, risksetidx) where {M<:AbstractPH} ``` - """ +""" DOC__UPDATE_PHPARMS = raw""" @@ -790,280 +822,360 @@ DOC__UPDATE_PHPARMS = raw""" Updates over all observations +## Signature + ```julia - function _update_PHParms!( - m::M, - # big indexes - ne::I, - caseidxs::Vector{Vector{T}}, - risksetidxs::Vector{Vector{T}}, +_update_PHParms!( + m::M, + # big indexes + ne::I, + caseidxs::Vector{Vector{T}}, + risksetidxs::Vector{Vector{T}}, ) where {M<:AbstractPH,I<:Int,T<:Int} ``` - """ + +""" DOC_FIT_PHSURV = raw""" - Survival curve estimation using multiple cox models + Survival curve estimation using multiple cox models - ## Function Signatures + ## Signatures -```julia - risk_from_coxphmodels(fitlist::Array{T}, args...; kwargs...) where {T<:PHModel} -``` + ```julia + risk_from_coxphmodels(fitlist::Vector{T}, args...; kwargs...) where {T<:PHModel} -```julia - fit(::Type{M}, fitlist::Vector{<:T}, ; fitargs...) where {M<:PHSurv,T<:PHModel} -``` + fit(::Type{M}, fitlist::Vector{T}, ; fitargs...) where {M<:PHSurv,T<:PHModel} + ``` - ## Optional keywords - - `coef_vectors` = nothing(default) or vector of coefficient vectors from the cox models [will default to the coefficients from fitlist models] - - `pred_profile` = nothing(default) or vector of specific predictor values of the same length as the coef_vectors[1] + ## Optional keywords +- `coef_vectors` = nothing(default) or vector of coefficient vectors from the cox models [will default to the coefficients from fitlist models] +- `pred_profile` = nothing(default) or vector of specific predictor values of the same length as the coef_vectors[1] -```@example - using LSurvival - using Random - # event variable is coded 0[referent],1,2 - z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); - enter = zeros(length(t)); - - ft1 = coxph(hcat(x,z), enter, t, (event .== 1)) - nidx = findall(event .!= 1) - ft2 = coxph(hcat(x,z)[nidx,:], enter[nidx], t[nidx], (event[nidx] .== 2)) - - # risk at referent levels of `x` and `z` - risk_from_coxphmodels([ft1,ft2]) - - # risk at average levels of `x` and `z` - mnx = sum(x)/length(x) - mnz = sum(z)/length(z) - risk_from_coxphmodels([ft1,ft2], pred_profile=[mnx,mnz]) -``` - """ + ```@example + using LSurvival + using Random + # event variable is coded 0[referent],1,2 + z,x,t,d, event,wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000); + enter = zeros(length(t)); + + ft1 = coxph(hcat(x,z), enter, t, (event .== 1)) + nidx = findall(event .!= 1) + ft2 = coxph(hcat(x,z)[nidx,:], enter[nidx], t[nidx], (event[nidx] .== 2)) + + # risk at referent levels of `x` and `z` + risk_from_coxphmodels([ft1,ft2]) + + # risk at average levels of `x` and `z` + mnx = sum(x)/length(x) + mnz = sum(z)/length(z) + # equivalent + fit(PHSurv, [ft1,ft2], pred_profile=[mnx,mnz]) + risk_from_coxphmodels([ft1,ft2], pred_profile=[mnx,mnz]) + ``` +""" ####### data generation functions DOC_DGM = raw""" - Generating discrete survival data without competing risks +Generating discrete survival data without competing risks - Usage: dgm(rng, n, maxT;afun=int_0, yfun=yprob, lfun=lprob) - dgm(n, maxT;afun=int_0, yfun=yprob, lfun=lprob) +## Signatures +``` +dgm(rng::MersenneTwister, n::Int, maxT:Int; afun = int_0, yfun = yprob, lfun = lprob) + +dgm(n::Int, maxT::Int; kwargs...) +``` - Where afun, yfun, and lfun are all functions that take arguments v,l,a and output time-specific values of a, y, and l respectively - Example: +Usage: dgm(rng, n, maxT;afun=int_0, yfun=yprob, lfun=lprob) +dgm(n, maxT;afun=int_0, yfun=yprob, lfun=lprob) + +Where afun, yfun, and lfun are all functions that take arguments v,l,a and output time-specific values of a, y, and l respectively +Example: ```julia - expit(mu) = inv(1.0+exp(-mu)) +expit(mu) = inv(1.0+exp(-mu)) - function aprob(v,l,a) - expit(-1.0 + 3*v + 2*l) - end - - function lprob(v,l,a) - expit(-3 + 2*v + 0*l + 0*a) - end - - function yprob(v,l,a) - expit(-3 + 2*v + 0*l + 2*a) - end - # 10 individuals followed for up to 5 times - LSurvival.dgm(10, 5;afun=aprob, yfun=yprob, lfun=lprob) +function aprob(v,l,a) +expit(-1.0 + 3*v + 2*l) +end + +function lprob(v,l,a) +expit(-3 + 2*v + 0*l + 0*a) +end + +function yprob(v,l,a) +expit(-3 + 2*v + 0*l + 2*a) +end + # 10 individuals followed for up to 5 times +LSurvival.dgm(10, 5;afun=aprob, yfun=yprob, lfun=lprob) ``` - """ +""" DOC_DGM_COMPRISK = raw""" - Generating continuous survival data with competing risks +Generating continuous survival data with competing risks + +## Signatures - Usage: dgm_comprisk(rng, n) - dgm_comprisk(n) +``` +dgm_comprisk(rng::MersenneTwister, n::Int) + +dgm_comprisk(n::Int) +``` - - rng = random number generator - - n = sample size + - rng = random number generator + - n = sample size - Example: +Example: ```julia - using LSurvival - # 100 individuals with two competing events - z,x,t,d,event,weights = LSurvival.dgm_comprisk(100) - +using LSurvival +# 100 individuals with two competing events +z,x,t,d,event,weights = LSurvival.dgm_comprisk(100) + ``` - """ +""" DOC_BOOTSTRAP_PHMODEL = raw""" - Bootstrapping coefficients of a proportional hazards model +Bootstrapping coefficients of a proportional hazards model - Signatures +## Signatures ``` - # single bootstrap draw, keeping the entire object - bootstrap(rng::MersenneTwister, m::PHModel) - bootstrap(m::PHModel) - # muliple bootstrap draws, keeping only coefficient estimates - bootstrap(rng::MersenneTwister, m::PHModel, iter::Int; kwargs...) - bootstrap(m::PHModel, iter::Int; kwargs...) +# single bootstrap draw, keeping the entire object +bootstrap(rng::MersenneTwister, m::PHModel) +bootstrap(m::PHModel) +# muliple bootstrap draws, keeping only coefficient estimates +bootstrap(rng::MersenneTwister, m::PHModel, iter::Int; kwargs...) +bootstrap(m::PHModel, iter::Int; kwargs...) ``` - Returns: - - If using `bootstrap(m)`: a single bootstrap draw - - If using `bootstrap(m, 10)` (e.g.): 10 bootstrap draws of the cumulative cause-specific risks at the end of follow up + +Returns: +- If using `bootstrap(m)`: a single bootstrap draw +- If using `bootstrap(m, 10)` (e.g.): 10 bootstrap draws of the cumulative cause-specific risks at the end of follow up ```julia - using LSurvival, Random +using LSurvival, Random - id, int, outt, data = - LSurvival.dgm(MersenneTwister(1212), 500, 5; afun = LSurvival.int_0) +id, int, outt, data = +LSurvival.dgm(MersenneTwister(1212), 500, 5; afun = LSurvival.int_0) - d, X = data[:, 4], data[:, 1:3] - weights = rand(length(d)) +d, X = data[:, 4], data[:, 1:3] +weights = rand(length(d)) - # survival outcome: - R = LSurvivalResp(int, outt, d, ID.(id)) # specification with ID only - P = PHParms(X) +# survival outcome: +R = LSurvivalResp(int, outt, d, ID.(id)) # specification with ID only +P = PHParms(X) - Mod = PHModel(R, P) - LSurvival._fit!(Mod, start=Mod.P._B, keepx=true, keepy=true) +Mod = PHModel(R, P) +LSurvival._fit!(Mod, start=Mod.P._B, keepx=true, keepy=true) - # careful propogation of bootstrap sampling - idx, R2 = bootstrap(R) - P2 = bootstrap(idx, P) - Modb = PHModel(R2, P2) - LSurvival._fit!(Mod, start=Mod.P._B, keepx=true, keepy=true) +# careful propogation of bootstrap sampling +idx, R2 = bootstrap(R) +P2 = bootstrap(idx, P) +Modb = PHModel(R2, P2) +LSurvival._fit!(Mod, start=Mod.P._B, keepx=true, keepy=true) - # convenience function for bootstrapping a model - Modc = bootstrap(Mod) - LSurvival._fit!(Modc, start=Modc.P._B); - Modc - Modc.P.X == nothing - Modc.R == nothing +# convenience function for bootstrapping a model +Modc = bootstrap(Mod) +LSurvival._fit!(Modc, start=Modc.P._B); +Modc +Modc.P.X == nothing +Modc.R == nothing ``` - Bootstrap Cox model coefficients +Bootstrap Cox model coefficients ``` - LSurvival._fit!(mb, keepx=true, keepy=true, start=[0.0, 0.0]) +LSurvival._fit!(mb, keepx=true, keepy=true, start=[0.0, 0.0]) ``` ```@example - using LSurvival, Random - res = z, x, outt, d, event, wts = LSurvival.dgm_comprisk(MersenneTwister(123123), 200) - int = zeros(length(d)) # no late entry - X = hcat(z, x) - - mainfit = fit(PHModel, X, int, outt, d .* (event .== 1), keepx=true, keepy=true) - - function stddev_finite(x) - n = length(x) - mnx = sum(x)/n - ret = sum((x .- mnx) .^ 2) - ret /= n-1 - sqrt(ret) - end - - # bootstrap standard error versus asymptotic - mb = bootstrap(MersenneTwister(123123), mainfit, 200) - ## bootstrap standard error - [stddev_finite(mb[:,i]) for i in 1:2] - ## asymptotic standard error - stderror(mainfit) - +using LSurvival, Random +res = z, x, outt, d, event, wts = LSurvival.dgm_comprisk(MersenneTwister(123123), 200) +int = zeros(length(d)) # no late entry +X = hcat(z, x) + +mainfit = fit(PHModel, X, int, outt, d .* (event .== 1), keepx=true, keepy=true) + +function stddev_finite(x) + n = length(x) + mnx = sum(x)/n + ret = sum((x .- mnx) .^ 2) + ret /= n-1 + sqrt(ret) +end + +# bootstrap standard error versus asymptotic +mb = bootstrap(MersenneTwister(123123), mainfit, 200) +## bootstrap standard error +[stddev_finite(mb[:,i]) for i in 1:2] +## asymptotic standard error +stderror(mainfit) + ``` - """ +""" DOC_BOOTSTRAP_KMSURV = raw""" - Bootstrap methods for Kaplan-Meier survival curve estimator +Bootstrap methods for Kaplan-Meier survival curve estimator - Signatures + ## Signatures ``` - # single bootstrap draw, keeping the entire object - bootstrap(rng::MersenneTwister, m::KMSurv) - bootstrap(m::KMSurv) - # muliple bootstrap draws, keeping only coefficient estimates - bootstrap(rng::MersenneTwister, m::KMSurv, iter::Int; kwargs...) - bootstrap(m::KMSurv, iter::Int; kwargs...) + # single bootstrap draw, keeping the entire object + bootstrap(rng::MersenneTwister, m::KMSurv) + bootstrap(m::KMSurv) + + # muliple bootstrap draws, keeping only coefficient estimates + bootstrap(rng::MersenneTwister, m::KMSurv, iter::Int; kwargs...) + bootstrap(m::KMSurv, iter::Int; kwargs...) ``` - Returns: - - If using `bootstrap(m)`: a single bootstrap draw - - If using `bootstrap(m, 10)` (e.g.): 10 bootstrap draws of the survival probability at the end of follow up - - + Returns: + - If using `bootstrap(m)`: a single bootstrap draw + - If using `bootstrap(m, 10)` (e.g.): 10 bootstrap draws of the survival probability at the end of follow up + + ```@example - using LSurvival - using Random +using LSurvival +using Random - id, int, outt, data = - LSurvival.dgm(MersenneTwister(1212), 20, 5; afun = LSurvival.int_0) +id, int, outt, data = +LSurvival.dgm(MersenneTwister(1212), 20, 5; afun = LSurvival.int_0) - d, X = data[:, 4], data[:, 1:3] - wts = rand(length(d)) +d, X = data[:, 4], data[:, 1:3] +wts = rand(length(d)) - km1 = kaplan_meier(int, outt, d, id=ID.(id), wts=wts) - km2 = bootstrap(km1, keepy=false) - km3 = bootstrap(km1, 10, keepy=false) - km1 +km1 = kaplan_meier(int, outt, d, id=ID.(id), wts=wts) +km2 = bootstrap(km1, keepy=false) +km3 = bootstrap(km1, 10, keepy=false) +km1 - km1.R - km2.R +km1.R +km2.R ``` - """ +""" DOC_BOOTSTRAP_AJSURV = raw""" - Bootstrap methods for Aalen-Johansen cumulative risk estimator - - Signatures +Bootstrap methods for Aalen-Johansen cumulative risk estimator + + ## Signatures ``` - # single bootstrap draw, keeping the entire object - bootstrap(rng::MersenneTwister, m::AJSurv) - bootstrap(m::AJSurv) - # muliple bootstrap draws, keeping only coefficient estimates - bootstrap(rng::MersenneTwister, m::AJSurv, iter::Int; kwargs...) - bootstrap(m::AJSurv, iter::Int; kwargs...) + # single bootstrap draw, keeping the entire object + bootstrap(rng::MersenneTwister, m::AJSurv) + bootstrap(m::AJSurv) + + # muliple bootstrap draws, keeping only coefficient estimates + bootstrap(rng::MersenneTwister, m::AJSurv, iter::Int; kwargs...) + bootstrap(m::AJSurv, iter::Int; kwargs...) ``` - Returns: - - If using `bootstrap(m)`: a single bootstrap draw - - If using `bootstrap(m, 10)` (e.g.): 10 bootstrap draws of the cumulative cause-specific risks at the end of follow up +Returns: +- If using `bootstrap(m)`: a single bootstrap draw +- If using `bootstrap(m, 10)` (e.g.): 10 bootstrap draws of the cumulative cause-specific risks at the end of follow up ```@example - using LSurvival - using Random +using LSurvival +using Random - z, x, t, d, event, wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 100) - id = 1:length(x) - enter = zeros(length(t)) +z, x, t, d, event, wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 100) +id = 1:length(x) +enter = zeros(length(t)) - aj1 = aalen_johansen(enter, t, event, id=ID.(id), wts=wt) - aj2 = bootstrap(aj1, keepy=false); - ajboot = bootstrap(aj1, 10, keepy=false); - aj1 +aj1 = aalen_johansen(enter, t, event, id=ID.(id), wts=wt) +aj2 = bootstrap(aj1, keepy=false); +ajboot = bootstrap(aj1, 10, keepy=false); +aj1 - aj1.R - aj2.R +aj1.R +aj2.R ``` - """ +""" + +DOC_BOOTSTRAP_SURVRESP = raw""" +Bootstrapping sampling of a survival response + + +``` +id, int, outt, data = +LSurvival.dgm(MersenneTwister(1212), 20, 5; afun = LSurvival.int_0) + +d, X = data[:, 4], data[:, 1:3] +weights = rand(length(d)) + +# survival outcome: +R = LSurvivalResp(int, outt, d, ID.(id)) # specification with ID only +``` +""" + +DOC_BOOTSTRAP_SURVCOMPRESP = raw""" +Bootstrapping sampling of a competing risk survival response + +## Signatures + +``` +bootstrap(rng::MersenneTwister, R::T) where {T<:LSurvivalCompResp} +bootstrap(R::T) where {T<:LSurvivalCompResp} +``` + +``` +z,x,t,d,event,weights = +LSurvival.dgm_comprisk(MersenneTwister(1212), 300) +enter = zeros(length(event)) + +# survival outcome: +R = LSurvivalCompResp(enter, t, event, weights, ID.(collect(1:length(t)))) # specification with ID only +bootstrap(R) # note that entire observations/clusters identified by id are kept +``` +""" + +DOC_BOOTSTRAP_PHPARMS = raw""" +Bootstrap sampling of a proportional hazards predictor object + +``` +using LSurvival, Random + +id, int, outt, data = +LSurvival.dgm(MersenneTwister(1212), 20, 5; afun = LSurvival.int_0) + +d, X = data[:, 4], data[:, 1:3] +weights = rand(length(d)) + +# survival outcome: +R = LSurvivalResp(int, outt, d, ID.(id)) # specification with ID only +P = PHParms(X) +idx, R2 = bootstrap(R) +P2 = bootstrap(idx, P) + +Mod = PHModel(R2, P2) +LSurvival._fit!(Mod, start=Mod.P._B) + +``` + +""" DOC_ROBUST_VCOV = raw""" ```@example using LSurvival dat1 = ( - time = [1,1,6,6,8,9], - status = [1,0,1,1,0,1], - x = [1,1,1,0,0,0] + time = [1,1,6,6,8,9], + status = [1,0,1,1,0,1], + x = [1,1,1,0,0,0] ) ft = coxph(@formula(Surv(time,status)~x),dat1, id=ID.(collect(1:6))) @@ -1077,11 +1189,11 @@ ft ```@example dat1clust= ( - id = [1,2,3,3,4,4,5,5,6,6], - enter = [0,0,0,1,0,1,0,1,0,1], - exit = [1,1,1,6,1,6,1,8,1,9], - status = [1,0,0,1,0,1,0,0,0,1], - x = [1,1,1,1,0,0,0,0,0,0] + id = [1,2,3,3,4,4,5,5,6,6], + enter = [0,0,0,1,0,1,0,1,0,1], + exit = [1,1,1,6,1,6,1,8,1,9], + status = [1,0,0,1,0,1,0,0,0,1], + x = [1,1,1,1,0,0,0,0,0,0] ) ft2 = coxph(@formula(Surv(enter, exit, status) ~ x),dat1clust, id=ID.(dat1clust.id)) @@ -1109,7 +1221,7 @@ ft2w """ -DOC_JACKKNIFE = """ +DOC_JACKKNIFE = raw""" Obtain jackknife (leave-one-out) estimates from a Cox model by refitting the model n times ```@example @@ -1139,11 +1251,11 @@ hcat(semb, sebs[1,:], sejk, sejk_manual[1,:], sero) dat1 = (time = [1, 1, 6, 6, 8, 9], status = [1, 0, 1, 1, 0, 1], x = [1, 1, 1, 0, 0, 0]) dat1clust = ( - id = [1, 2, 3, 3, 4, 4, 5, 5, 6, 6], - enter = [0, 0, 0, 1, 0, 1, 0, 1, 0, 1], - exit = [1, 1, 1, 6, 1, 6, 1, 8, 1, 9], - status = [1, 0, 0, 1, 0, 1, 0, 0, 0, 1], - x = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0], + id = [1, 2, 3, 3, 4, 4, 5, 5, 6, 6], + enter = [0, 0, 0, 1, 0, 1, 0, 1, 0, 1], + exit = [1, 1, 1, 6, 1, 6, 1, 8, 1, 9], + status = [1, 0, 0, 1, 0, 1, 0, 0, 0, 1], + x = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0], ) m = coxph(@formula(Surv(time, status)~x),dat1) @@ -1160,169 +1272,169 @@ stderror(mc, type="jackknife") DOC_RESIDUALS = raw""" -#################################################################### -Cox proportional hazards model residuals: + #################################################################### + Cox proportional hazards model residuals: -Signature + Signature -```julia -residuals(m::M; type = "martingale") where {M<:PHModel} -``` -where type is one of + ```julia + residuals(m::M; type = "martingale") where {M<:PHModel} + ``` + where type is one of - `martingale` - `schoenfeld` - `score` - `dfbeta` - `scaled_schoenfeld` -Residuals from the residuals function are designed to exactly emulate those from the `survival` package in R. Currently, they are validated for single observation data (e.g. one data row per individual). + Residuals from the residuals function are designed to exactly emulate those from the `survival` package in R. Currently, they are validated for single observation data (e.g. one data row per individual). #################################################################### ## Martingale residuals: Observed versus expected -```@example -# example from https://cran.r-project.org/web/packages/survival/vignettes/validate.pdf -# by Terry Therneau + ```@example + # example from https://cran.r-project.org/web/packages/survival/vignettes/validate.pdf + # by Terry Therneau -dat1 = ( + dat1 = ( time = [1,1,6,6,8,9], status = [1,0,1,1,0,1], x = [1,1,1,0,0,0] -) + ) -# Nelson-Aalen type estimator for Breslow partial likelihood -ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="breslow") -residuals(ft, type="martingale") -``` + # Nelson-Aalen type estimator for Breslow partial likelihood + ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="breslow") + residuals(ft, type="martingale") + ``` -```@example -dat1 = ( + ```@example + dat1 = ( time = [1,1,6,6,8,9], status = [1,0,1,1,0,1], x = [1,1,1,0,0,0] -) + ) -# Fleming-Harrington type estimator for Efron partial likelihood -ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="efron") -residuals(ft, type="martingale") + # Fleming-Harrington type estimator for Efron partial likelihood + ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="efron") + residuals(ft, type="martingale") -``` -#################################################################### -## Score residuals: Per observation contribution to score function + ``` + #################################################################### + ## Score residuals: Per observation contribution to score function -```julia -using LSurvival -dat1 = ( + ```julia + using LSurvival + dat1 = ( time = [1,1,6,6,8,9], status = [1,0,1,1,0,1], x = [1,1,1,0,0,0] -) -ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="breslow") -S = residuals(ft, type="score")[:] -ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="efron", maxiter=0) -S = residuals(ft, type="score")[:] -``` + ) + ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="breslow") + S = residuals(ft, type="score")[:] + ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="efron", maxiter=0) + S = residuals(ft, type="score")[:] + ``` -#################################################################### -## Schoenfeld residuals: Per time contribution to score function -```julia -using LSurvival -dat1 = ( + #################################################################### + ## Schoenfeld residuals: Per time contribution to score function + ```julia + using LSurvival + dat1 = ( time = [1,1,6,6,8,9], status = [1,0,1,1,0,1], x = [1,1,1,0,0,0] -) -ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="breslow", maxiter=0) + ) + ft = coxph(@formula(Surv(time,status)~x),dat1, keepx=true, keepy=true, ties="breslow", maxiter=0) -X = ft.P.X -M = residuals(ft, type="martingale") -S = residuals(ft, type="schoenfeld")[:] -``` + X = ft.P.X + M = residuals(ft, type="martingale") + S = residuals(ft, type="schoenfeld")[:] + ``` -#################################################################### -## dfbeta residuals: influence of individual observations on each parameter + #################################################################### + ## dfbeta residuals: influence of individual observations on each parameter -```@example -using LSurvival -dat1 = ( + ```@example + using LSurvival + dat1 = ( time = [1,1,6,6,8,9], status = [1,0,1,1,0,1], x = [1,1,1,0,0,0] -) + ) -ft = coxph(@formula(Surv(time,status)~x),dat1, ties="breslow") -residuals(ft, type="dfbeta") + ft = coxph(@formula(Surv(time,status)~x),dat1, ties="breslow") + residuals(ft, type="dfbeta") -# can also calculate from score residuals and Hessian matrix -L = residuals(ft, type="score") # n X p -H = ft.P._hess # p X p -dfbeta = L*inv(H) -robVar = dfbeta'dfbeta -sqrt(robVar) + # can also calculate from score residuals and Hessian matrix + L = residuals(ft, type="score") # n X p + H = ft.P._hess # p X p + dfbeta = L*inv(H) + robVar = dfbeta'dfbeta + sqrt(robVar) -``` + ``` -# using the `id` keyword argument -# see help for LSurvival.vcov for what happens when `id` keyword is not used + # using the `id` keyword argument + # see help for LSurvival.vcov for what happens when `id` keyword is not used -```@example -dat1clust= ( + ```@example + dat1clust= ( id = [1,2,3,3,4,4,5,5,6,6], enter = [0,0,0,1,0,1,0,1,0,1], exit = [1,1,1,6,1,6,1,8,1,9], status = [1,0,0,1,0,1,0,0,0,1], x = [1,1,1,1,0,0,0,0,0,0] -) + ) -ft2 = coxph(@formula(Surv(enter, exit, status) ~ x),dat1clust, id=ID.(dat1clust.id), ties="breslow") + ft2 = coxph(@formula(Surv(enter, exit, status) ~ x),dat1clust, id=ID.(dat1clust.id), ties="breslow") -# note these are still on the observation level (not the id level)! -residuals(ft2, type="dfbeta") + # note these are still on the observation level (not the id level)! + residuals(ft2, type="dfbeta") -# getting id level dfbeta residuals -dfbeta = residuals(ft2, type="dfbeta") -id = values(ft2.R.id) -D = reduce(vcat, [sum(dfbeta[findall(id .== i),:], dims=1) for i in unique(id)]) -D'D -vcov(ft, type="robust") -vcov(ft2, type="robust") -``` + # getting id level dfbeta residuals + dfbeta = residuals(ft2, type="dfbeta") + id = values(ft2.R.id) + D = reduce(vcat, [sum(dfbeta[findall(id .== i),:], dims=1) for i in unique(id)]) + D'D + vcov(ft, type="robust") + vcov(ft2, type="robust") + ``` -#################################################################### -## jackknife residuals: influence of individual observations on each parameter using leave-one-out estimates + #################################################################### + ## jackknife residuals: influence of individual observations on each parameter using leave-one-out estimates note there are other definitions of jackknife residuals See Chapter 7.1 of "Extending the Cox Model" by Therneau and Grambsch for an example of the type of jackknife residuals used here -Jackknife residuals $r_i$ for $i \in 1:n$ are given as the difference between the maximum partial likelihood estimate and the jackknife estimates for each observation + Jackknife residuals $r_i$ for $i \in 1:n$ are given as the difference between the maximum partial likelihood estimate and the jackknife estimates for each observation $$r_i = \hat\beta - \hat\beta_{(-i)}$$ -where $\beta_{(-i)}$ is the maximum partial likelihood estimate of the log-hazard ratio vector obtained from a dataset in which observations belonging to individual `i` are removed + where $\beta_{(-i)}$ is the maximum partial likelihood estimate of the log-hazard ratio vector obtained from a dataset in which observations belonging to individual `i` are removed -```@example -using LSurvival -dat1 = ( + ```@example + using LSurvival + dat1 = ( time = [1,1,6,6,8,9], status = [1,0,1,1,0,1], x = [1,1,1,0,0,0] -) + ) -ft = coxph(@formula(Surv(time,status)~x),dat1, ties="breslow") -#jackknife(ft) -residuals(ft, type="jackknife") + ft = coxph(@formula(Surv(time,status)~x),dat1, ties="breslow") + #jackknife(ft) + residuals(ft, type="jackknife") -``` + ``` """ DOC_VCOV = raw""" -Covariance matrix for Cox proportional hazards models + Covariance matrix for Cox proportional hazards models -Keyword arguments + Keyword arguments - `type` nothing or "robust": determines whether model based or robust (dfbeta based) variance is returned. -See ?residuals for info on `dfbeta` residuals + See ?residuals for info on `dfbeta` residuals $DOC_ROBUST_VCOV """ @@ -1333,26 +1445,26 @@ $DOC_ROBUST_VCOV """ *Deprecated function* -Estimate parameters of an extended Cox model + Estimate parameters of an extended Cox model -Using: Newton raphson algorithm with modified/adaptive step sizes + Using: Newton raphson algorithm with modified/adaptive step sizes -Keyword inputs: -method="efron", -inits=nothing , # initial parameter values, set to zero if this is nothing -tol=10e-9, # convergence tolerance based on log likelihod: likrat = abs(lastLL/_LL[1]), absdiff = abs(lastLL-_LL[1]), reldiff = max(likrat, inv(likrat)) -1.0 -maxiter=500 # maximum number of iterations for Newton Raphson algorithm (set to zero to calculate likelihood, gradient, Hessian at the initial parameter values) + Keyword inputs: + method="efron", + inits=nothing , # initial parameter values, set to zero if this is nothing + tol=10e-9, # convergence tolerance based on log likelihod: likrat = abs(lastLL/_LL[1]), absdiff = abs(lastLL-_LL[1]), reldiff = max(likrat, inv(likrat)) -1.0 + maxiter=500 # maximum number of iterations for Newton Raphson algorithm (set to zero to calculate likelihood, gradient, Hessian at the initial parameter values) -Outputs: -beta: coefficients -ll: log partial likelihood history (all iterations), with final value being the (log) maximum partial likelihood (log-MPL) -g: gradient vector (first derivative of log partial likelihood) at log-MPL -h: hessian matrix (second derivative of log partial likelihood) at log-MPL -basehaz: Matrix: baseline hazard at referent level of all covariates, weighted risk set size, weighted # of cases, time + Outputs: + beta: coefficients + ll: log partial likelihood history (all iterations), with final value being the (log) maximum partial likelihood (log-MPL) + g: gradient vector (first derivative of log partial likelihood) at log-MPL + h: hessian matrix (second derivative of log partial likelihood) at log-MPL + basehaz: Matrix: baseline hazard at referent level of all covariates, weighted risk set size, weighted # of cases, time -Examples: -```julia-repl + Examples: + ```julia-repl using LSurvival # simulating discrete survival data for 20 individuals at 10 time points id, int, outt, data = LSurvival.dgm(20, 5;afun=LSurvival.int_0); @@ -1370,7 +1482,7 @@ Examples: res = coxmodel(args..., method="efron"); coxsum = cox_summary(res, alpha=0.05, verbose=true); -``` + ``` """ """ @@ -1392,14 +1504,14 @@ Examples: covarmat = sum(X, dims=1) ./ size(X,1) ci, surv = ci_from_coxmodels(bhlist;eventtypes=[1,2], coeflist=coeflist, covarmat=covarmat) ci, surv = ci_from_coxmodels(bhlist;eventtypes=[1,2]) - """ +""" """ *Deprecated function* -Kaplan Meier with late entry, possibly multiple observations per unit + Kaplan Meier with late entry, possibly multiple observations per unit -Usage: kaplan_meier(in,out,d; weights=nothing, eps = 0.00000001) + Usage: kaplan_meier(in,out,d; weights=nothing, eps = 0.00000001) - in = time at entry (numeric vector) - out = time at exit (numeric vector) @@ -1420,8 +1532,8 @@ Usage: kaplan_meier(in,out,d; weights=nothing, eps = 0.00000001) """ *Deprecated function* -Aalen-Johansen (cumulative incidence) with late entry, possibly multiple observations per unit, non-repeatable events -Usage: aalen_johansen(in,out,d;dvalues=[1.0, 2.0], weights=nothing, eps = 0.00000001) + Aalen-Johansen (cumulative incidence) with late entry, possibly multiple observations per unit, non-repeatable events + Usage: aalen_johansen(in,out,d;dvalues=[1.0, 2.0], weights=nothing, eps = 0.00000001) - in = time at entry (numeric vector) - out = time at exit (numeric vector) @@ -1449,7 +1561,7 @@ Usage: aalen_johansen(in,out,d;dvalues=[1.0, 2.0], weights=nothing, eps = 0.0000 Non-parametric sub-distribution hazard estimator estimating cumulative incidence via the subdistribution hazard function -Usage: subdistribution_hazard_cuminc(in,out,d;dvalues=[1.0, 2.0], weights=nothing, eps = 0.00000001) + Usage: subdistribution_hazard_cuminc(in,out,d;dvalues=[1.0, 2.0], weights=nothing, eps = 0.00000001) - in = time at entry (numeric vector) - out = time at exit (numeric vector) @@ -1467,13 +1579,13 @@ Usage: subdistribution_hazard_cuminc(in,out,d;dvalues=[1.0, 2.0], weights=nothin - events: number of events of each type used in calculating survival and cumulative incidence at each event time - names: vector of symbols [:times, :cumhaz, :ci] used as a mnemonic for the function output -Note: + Note: For time specific subdistribution hazard given by 'sdhaz(t)', the cumulative incidence for a specific event type calculated over time is 1.0 .- exp.(.-cumsum(sdhaz(t))) -Examples: -```julia-repl + Examples: + ```julia-repl using LSurvival, Random z,x,t,d, event,weights = LSurvival.dgm_comprisk(1000); @@ -1482,7 +1594,7 @@ Examples: times_sd, cumhaz, ci_sd = subdistribution_hazard_cuminc(zeros(length(t)), t, event, dvalues=[1.0, 2.0]); times_aj, surv, ajest, riskset, events = aalen_johansen(zeros(length(t)), t, event, dvalues=[1.0, 2.0]); -``` + ``` """ """ diff --git a/src/jackknife.jl b/src/jackknife.jl index 6eda234..b1f7d33 100644 --- a/src/jackknife.jl +++ b/src/jackknife.jl @@ -1,34 +1,90 @@ function pop(R::T) where {T<:LSurvivalResp} uid = unique(R.id)[end] idx = findall(getfield.(R.id, :value) .== uid.value) - nidx = setdiff(collect(eachindex(R.id)),idx) - Ri = LSurvivalResp(R.enter[idx], R.exit[idx], R.y[idx], R.wts[idx], R.id[idx]; origintime= R.origin) - Rj = LSurvivalResp(R.enter[nidx], R.exit[nidx], R.y[nidx], R.wts[nidx], R.id[nidx],origintime= R.origin) + nidx = setdiff(collect(eachindex(R.id)), idx) + Ri = LSurvivalResp( + R.enter[idx], + R.exit[idx], + R.y[idx], + R.wts[idx], + R.id[idx]; + origintime = R.origin, + ) + Rj = LSurvivalResp( + R.enter[nidx], + R.exit[nidx], + R.y[nidx], + R.wts[nidx], + R.id[nidx], + origintime = R.origin, + ) + Ri, Rj, idx, nidx +end + +function pop(R::T) where {T<:LSurvivalCompResp} + uid = unique(R.id)[end] + idx = findall(getfield.(R.id, :value) .== uid.value) + nidx = setdiff(collect(eachindex(R.id)), idx) + Ri = LSurvivalCompResp( + R.enter[idx], + R.exit[idx], + R.y[idx], + R.wts[idx], + R.id[idx]; + origintime = R.origin, + etypes = R.eventtypes, + ematrix = R.eventmatrix[idx,:] + ) + Rj = LSurvivalCompResp( + R.enter[nidx], + R.exit[nidx], + R.y[nidx], + R.wts[nidx], + R.id[nidx], + origintime = R.origin, + etypes = R.eventtypes, + ematrix = R.eventmatrix[nidx,:] + ) Ri, Rj, idx, nidx end + function popat!(P::T, idxi, idxj) where {T<:PHParms} - Pi = PHParms(P.X[idxi,:], P._B, P._r[idxi], P._LL, P._grad, P._hess, 1, P.p) - P.X = P.X[idxj,:] + Pi = PHParms(P.X[idxi, :], P._B, P._r[idxi], P._LL, P._grad, P._hess, 1, P.p) + P.X = P.X[idxj, :] P._r = P._r[idxj] P.n -= length(idxi) Pi end function push(Ri::T, Rj::T) where {T<:LSurvivalResp} - Ri = LSurvivalResp( - vcat(Ri.enter, Rj.enter), - vcat(Ri.exit, Rj.exit), - vcat(Ri.y, Rj.y), - vcat(Ri.wts, Rj.wts), - vcat(Ri.id, Rj.id); - origintime = min(Ri.origin, Rj.origin)) + LSurvivalResp( + vcat(Ri.enter, Rj.enter), + vcat(Ri.exit, Rj.exit), + vcat(Ri.y, Rj.y), + vcat(Ri.wts, Rj.wts), + vcat(Ri.id, Rj.id); + origintime = min(Ri.origin, Rj.origin), + ) +end + +function push(Ri::T, Rj::T) where {T<:LSurvivalCompResp} + LSurvivalCompResp( + vcat(Ri.enter, Rj.enter), + vcat(Ri.exit, Rj.exit), + vcat(Ri.y, Rj.y), + vcat(Ri.wts, Rj.wts), + vcat(Ri.id, Rj.id); + origintime = min(Ri.origin, Rj.origin), + etypes = sort(unique(vcat(Ri.eventtypes, Rj.eventtypes))), + ematrix = vcat(Ri.eventmatrix, Rj.eventmatrix) + ) end function push!(Pi::T, Pj::T) where {T<:PHParms} - Pj.X = vcat(Pi.X, Pj.X) + Pj.X = vcat(Pi.X, Pj.X) Pj._r = vcat(Pi._r, Pj._r) - Pj.n +=1 + Pj.n += 1 nothing end @@ -36,17 +92,25 @@ end """ $DOC_JACKKNIFE """ -function jackknife(m::M) where {M<:PHModel} +function jackknife(m::M; kwargs...) where {M<:PHModel} uid = unique(m.R.id) coefs = zeros(length(uid), length(coef(m))) R = deepcopy(m.R) P = deepcopy(m.P) for i in eachindex(uid) - Ri, Rj, idxi, idxj = pop(m.R); + Ri, Rj, idxi, idxj = pop(m.R) Pi = popat!(m.P, idxi, idxj) - mi= PHModel(Rj, m.P, m.formula, m.ties, false, m.bh[1:length(Rj.eventtimes),:], nothing) - fit!(mi, getbasehaz=false) - coefs[i,:] = coef(mi) + mi = PHModel( + Rj, + m.P, + m.formula, + m.ties, + false, + m.bh[1:length(Rj.eventtimes), :], + nothing, + ) + fit!(mi, getbasehaz = false; kwargs...) + coefs[i, :] = coef(mi) m.R = push(Ri, Rj) push!(Pi, m.P) end @@ -58,8 +122,85 @@ end function jackknife_vcov(m::M) where {M<:PHModel} N = nobs(m) #comparing estimate with jackknife estimate with bootstrap mean - jk = jackknife(m); - covjk = cov(jk, corrected=false) .* (N-1) + jk = jackknife(m) + covjk = cov(jk, corrected = false) .* (N - 1) covjk end + +""" +Obtain jackknife (leave-one-out) estimates from a Kaplan-Meier survival curve (survival at end of follow-up) by refitting the model n times + + +## Signatures + +```julia +jackknife(m::M;kwargs...) where {M<:KMSurv} +``` + +```@example +using LSurvival, Random, StatsBase + +dat1 = (time = [1, 1, 6, 6, 8, 9], status = [1, 0, 1, 1, 0, 1], x = [1, 1, 1, 0, 0, 0]) + +dat1clust = ( + id = [1, 2, 3, 3, 4, 4, 5, 5, 6, 6], + enter = [0, 0, 0, 1, 0, 1, 0, 1, 0, 1], + exit = [1, 1, 1, 6, 1, 6, 1, 8, 1, 9], + status = [1, 0, 0, 1, 0, 1, 0, 0, 0, 1], + x = [1, 1, 1, 1, 0, 0, 0, 0, 0, 0], +) + +m = kaplan_meier(dat1.time, dat1.status) +a = aalen_johansen(dat1.time, dat1.status) +mc = kaplan_meier(dat1clust.enter, dat1clust.exit, dat1clust.status, id=ID.(dat1clust.id)) +ac = aalen_johansen(dat1clust.enter, dat1clust.exit, dat1clust.status, id=ID.(dat1clust.id)) +jk = jackknife(m); +jkc = jackknife(mc); +jka = jackknife(a); +bs = bootstrap(mc, 100); +std(bs[:,1]) +stderror(m, type="jackknife") +stderror(mc, type="jackknife") +@assert jk == jkc +``` +""" +function jackknife(m::M; kwargs...) where {M<:KMSurv} + uid = unique(m.R.id) + res = zeros(length(uid)) + R = deepcopy(m.R) + for i in eachindex(uid) + Ri, Rj, _, _ = pop(m.R) + mi = KMSurv(Rj) + fit!(mi; kwargs...) + res[i, :] = mi.surv[end:end] + m.R = push(Ri, Rj) + end + m.R = R + res +end + +""" +Obtain jackknife (leave-one-out) estimates from a Aalen-Johansen risk curve (risk at end of follow-up) by refitting the model n times + +## Signatures + +```julia +jackknife(m::M;kwargs...) where {M<:AJSurv} +``` + +""" +function jackknife(m::M; kwargs...) where {M<:AJSurv} + uid = unique(m.R.id) + res = zeros(length(uid)) + R = deepcopy(m.R) + for i in eachindex(uid) + Ri, Rj, _, _ = pop(m.R) + mi = AJSurv(Rj) + fit!(mi; kwargs...) + res[i, :] = mi.surv[end:end] + m.R = push(Ri, Rj) + end + m.R = R + res +end diff --git a/src/npsurvival.jl b/src/npsurvival.jl index 265b7a6..3abac8f 100644 --- a/src/npsurvival.jl +++ b/src/npsurvival.jl @@ -47,11 +47,19 @@ end # Fitting functions for non-parametric survival models ##################################################################################################################### -function _fit!(m::KMSurv; eps = 0.00000001, censval = 0, keepy = true, kwargs...) +function _fit!( + m::KMSurv; + eps = 0.00000001, + censval = 0, + keepy = true, + atol = 0.00000001, + kwargs..., +) # there is some bad floating point issue with epsilon that should be tracked # R handles this gracefully # ties allowed #_dt = zeros(length(orderedtimes)) + eps = atol _1mdovern = ones(length(m.times)) for (_i, tt) in enumerate(m.times) R = findall((m.R.exit .>= tt) .& (m.R.enter .< (tt - eps))) # risk set index (if in times are very close to other out-times, not using epsilon will make risk sets too big) @@ -67,7 +75,8 @@ function _fit!(m::KMSurv; eps = 0.00000001, censval = 0, keepy = true, kwargs... m end -function _fit!(m::AJSurv; keepy = true, eps = 0.00000001) +function _fit!(m::AJSurv; keepy = true, eps = 0.00000001, atol = 0.00000001) + eps = atol dvalues = m.R.eventtypes[2:end] nvals = length(dvalues) kmfit = fit(KMSurv, m.R.enter, m.R.exit, m.R.y, wts = m.R.wts) @@ -126,7 +135,17 @@ function fit( return fit!(res; fitargs...) end +fit(::Type{M}, exit, y; kwargs...) where {M<:KMSurv} = + fit(M, zeros(eltype(exit), length(y)), exit, y; kwargs...) +fit(::Type{M}, exit; kwargs...) where {M<:KMSurv} = + fit(M, exit, ones(Int, length(exit)); kwargs...) +""" +$DOC_FIT_KMSURV +""" +kaplan_meier(enter, exit, y; kwargs...) = fit(KMSurv, enter, exit, y; kwargs...) +kaplan_meier(exit, y; kwargs...) = fit(KMSurv, exit, y; kwargs...) +kaplan_meier(exit; kwargs...) = fit(KMSurv, exit; kwargs...) """ @@ -148,44 +167,45 @@ function fit( return fit!(res; fitargs...) end +fit(::Type{M}, exit, y; kwargs...) where {M<:AJSurv} = + fit(M, zeros(eltype(exit), length(y)), exit, y; kwargs...) +fit(::Type{M}, exit; kwargs...) where {M<:AJSurv} = + fit(M, exit, ones(Int, length(exit)); kwargs...) -""" -$DOC_FIT_KMSURV -""" -kaplan_meier(enter, exit, y; kwargs...) = - fit(KMSurv, enter, exit, y; kwargs...) - -kaplan_meier(exit, y; kwargs...) = - kaplan_meier(zeros(length(y)), exit, y; kwargs...) - -kaplan_meier(exit; kwargs...) = - kaplan_meier(exit, ones(length(exit)); kwargs...) """ $DOC_FIT_AJSURV """ -aalen_johansen(enter, exit, y; kwargs...) = - fit(AJSurv, enter, exit, y; kwargs...) - -aalen_johansen(exit, y; kwargs...) = - aalen_johansen(zeros(length(y)), exit, y; kwargs...) +aalen_johansen(enter, exit, y; kwargs...) = fit(AJSurv, enter, exit, y; kwargs...) +aalen_johansen(exit, y; kwargs...) = fit(AJSurv, exit, y; kwargs...) ##################################################################################################################### # Summary functions for non-parametric survival models ##################################################################################################################### -function StatsBase.isfitted(m::M) where {M<:KMSurv} + +function StatsBase.nobs(m::M) where {M<:AbstractNPSurv} + mwarn(m) + length(unique(m.R.id)) +end + +function StatsBase.isfitted(m::M) where {M<:AbstractNPSurv} m.fit end """ $DOC_VARIANCE_KMSURV """ -function StatsBase.stderror(m::KMSurv) - var = m.surv .* m.surv .* cumsum(m.events ./ (m.riskset .* (m.riskset .- m.events))) - sqrt.(var) +function StatsBase.stderror(m::KMSurv; type = nothing) + if type == "jackknife" + N = nobs(m) + jk = jackknife(m) + variance = var(jk, corrected = false) .* (N - 1) + else + variance = m.surv .* m.surv .* cumsum(m.events ./ (m.riskset .* (m.riskset .- m.events))) + end + sqrt.(variance) end - function confint_normal(m::KMSurv; level = 0.95) se = stderror(m) halfalpha = (1.0 - level) / 2.0 @@ -212,26 +232,31 @@ end """ $DOC_VARIANCE_AJSURV """ -function StatsBase.stderror(m::AJSurv) - riski = m.risk[:, 1] - d = sum(m.events, dims = 2) - sm1 = vcat(1.0, m.surv[1:end-1]) - vv = [ - ( - cumsum( - (m.surv .* m.risk[:, j]) .* (m.surv .* m.risk[:, j]) .* - (m.riskset .- 1.0) .* m.riskset .^ (-3.0) .* d, - dims = 1, - ) + cumsum( - (sm1) .* (sm1) .* (1.0 .- 2.0 .* m.risk[:, j]) .* (m.riskset .- 1.0) .* - m.riskset .^ (-3.0) .* m.events[:, j], - dims = 1, - ) - ) for j = 1:size(m.risk, 2) - ] - - var = reduce(hcat, vv) - sqrt.(var) +function StatsBase.stderror(m::AJSurv; type = nothing) + if type == "jackknife" + N = nobs(m) + jk = jackknife(m) + variance = var(jk, corrected = false) .* (N - 1) + else + riski = m.risk[:, 1] + d = sum(m.events, dims = 2) + sm1 = vcat(1.0, m.surv[1:end-1]) + vv = [ + ( + cumsum( + (m.surv .* m.risk[:, j]) .* (m.surv .* m.risk[:, j]) .* + (m.riskset .- 1.0) .* m.riskset .^ (-3.0) .* d, + dims = 1, + ) + cumsum( + (sm1) .* (sm1) .* (1.0 .- 2.0 .* m.risk[:, j]) .* (m.riskset .- 1.0) .* m.riskset .^ (-3.0) .* m.events[:, j], + dims = 1, + ) + ) for j = 1:size(m.risk, 2) + ] + + variance = reduce(hcat, vv) + end + sqrt.(variance) end diff --git a/src/shared_structs.jl b/src/shared_structs.jl index 3352f03..ed6cb18 100644 --- a/src/shared_structs.jl +++ b/src/shared_structs.jl @@ -44,17 +44,17 @@ struct Surv{E<:Real,X<:Real,Y<:Real,O<:Real} <: AbstractSurvTime origin::O end -function Surv(enter::E, exit::X, y::Y, origintime=nothing) where {E<:Real,X<:Real,Y<:Real} +function Surv(enter::E, exit::X, y::Y, origintime = nothing) where {E<:Real,X<:Real,Y<:Real} origin = isnothing(origintime) ? zero(E) : origintime return Surv(enter, exit, y, origin) end -function Surv(exit::X, y::Y;kwargs...) where {X<:Real,Y<:Real} - return Surv(zero(X), exit, y;kwargs...) +function Surv(exit::X, y::Y; kwargs...) where {X<:Real,Y<:Real} + return Surv(zero(X), exit, y; kwargs...) end -function Surv(exit::X;kwargs...) where {X<:Real} - return Surv(zero(X), exit, 1;kwargs...) +function Surv(exit::X; kwargs...) where {X<:Real} + return Surv(zero(X), exit, 1; kwargs...) end @@ -78,7 +78,7 @@ struct LSurvivalResp{ "`wts`: observation weights" wts::W "`eventtimes`: unique event times" - eventtimes::E + eventtimes::X "`origin`: origin on the time scale" origin::T "`id`: person level identifier (must be wrapped in ID() function)" @@ -91,7 +91,7 @@ function LSurvivalResp( y::Y, wts::W, id::Vector{I}; - origintime = nothing + origintime = nothing, ) where { E<:Vector, X<:Vector, @@ -228,7 +228,9 @@ function LSurvivalCompResp( y::Y, wts::W, id::Vector{I}; - origintime = nothing + origintime = nothing, + etypes = ones(0), + ematrix = ones(0,0), ) where { E<:Vector, X<:Vector, @@ -251,12 +253,15 @@ function LSurvivalCompResp( throw(DimensionMismatch("wts must have length $ny or length 0 but was $lw")) end eventtimes = sort(unique(exit[findall(y .> 0)])) + origin = isnothing(origintime) ? minimum(enter) : origintime if lw == 0 wts = ones(Int, ny) end - eventtypes = sort(unique(y)) - eventmatrix = reduce(hcat, [y .== e for e in eventtypes[2:end]]) + eventtypes = length(etypes) == 0 ? sort(unique(y)) : etypes + length(eventtypes) < length(sort(unique(y))) && + throw(DimensionMismatch("etypes cannot be have fewer unique values than y")) + eventmatrix = size(ematrix,1) == 0 ? reduce(hcat, [y .== e for e in eventtypes[2:end]]) : ematrix return LSurvivalCompResp( enter, @@ -276,7 +281,7 @@ function LSurvivalCompResp( exit::X, y::Y, id::Vector{I}; - kwargs... + kwargs..., ) where {E<:Vector,X<:Vector,Y<:Union{Vector{<:Real},BitVector},I<:AbstractLSurvivalID} wts = ones(Int, length(y)) return LSurvivalCompResp(enter, exit, y, wts, id; kwargs...) @@ -287,7 +292,7 @@ function LSurvivalCompResp( exit::X, y::Y, wts::W; - kwargs... + kwargs..., ) where {E<:Vector,X<:Vector,Y<:Union{Vector{<:Real},BitVector},W<:Vector} id = [ID(i) for i in eachindex(y)] return LSurvivalCompResp(enter, exit, y, wts, id; kwargs...) @@ -297,7 +302,7 @@ function LSurvivalCompResp( enter::E, exit::X, y::Y; - kwargs... + kwargs..., ) where {E<:Vector,X<:Vector,Y<:Union{Vector{<:Real},BitVector}} id = [ID(i) for i in eachindex(y)] return LSurvivalCompResp(enter, exit, y, id; kwargs...) @@ -306,7 +311,7 @@ end function LSurvivalCompResp( exit::X, y::Y; - kwargs... + kwargs..., ) where {X<:Vector,Y<:Union{Vector{<:Real},BitVector}} return LSurvivalCompResp(zeros(length(exit)), exit, y; kwargs...) end @@ -326,7 +331,7 @@ function Base.show(io::IO, x::T; maxrows::Int = 10) where {T<:AbstractLSurvivalR println("Max time: $(maximum(x.exit))") iob = IOBuffer() op = reduce(vcat, pr) - nr = typeof(op) == String ? 1 : size(op, 1) + nr = typeof(op) == String ? 1 : size(op, 1) if nr == 1 println(iob, "$(x.id[1]). $(op)") elseif nr < maxrows @@ -344,7 +349,8 @@ function Base.show(io::IO, x::T; maxrows::Int = 10) where {T<:AbstractLSurvivalR println(io, str) end -Base.show(x::T; kwargs...) where {T<:AbstractLSurvivalResp} = Base.show(stdout, x; kwargs...) +Base.show(x::T; kwargs...) where {T<:AbstractLSurvivalResp} = + Base.show(stdout, x; kwargs...) function Base.show(io::IO, x::T) where {T<:AbstractSurvTime} lefttruncate = x.enter == x.origin ? "[" : "(" diff --git a/test/runtests.jl b/test/runtests.jl index 904eec9..3fe1903 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -5,6 +5,13 @@ using Random, Tables #using BenchmarkTools # add during testing @testset "LSurvival.jl" begin + + + + + ################################################ + ###### start of tests ############ + ################################################ println("Creating test datasets") dat1 = (time = [1, 1, 6, 6, 8, 9], status = [1, 0, 1, 1, 0, 1], x = [1, 1, 1, 0, 0, 0]) dat2 = ( @@ -82,8 +89,9 @@ using Random, Tables ) res2 = coxph(X, int, outt, d, wts = wt, ties = "breslow", gtol = 1e-9) - resnobh = coxph(X, int, outt, d, wts = wt, ties = "breslow", gtol = 1e-9, getbasehaz=false) - @test all(isapprox.(resnobh.bh[end,:],zeros(6))) + resnobh = + coxph(X, int, outt, d, wts = wt, ties = "breslow", gtol = 1e-9, getbasehaz = false) + @test all(isapprox.(resnobh.bh[end, :], zeros(6))) rfromc = risk_from_coxphmodels([res, res2]) @@ -150,10 +158,13 @@ using Random, Tables ft = coxph( @formula(Surv(entertime, exittime, death) ~ x + z1 + z2 + z1 * x), tab, - contrasts = Dict(:z1 => CategoricalTerm), maxIter = 100, convTol = 1e-9, tol=1e-9 + contrasts = Dict(:z1 => CategoricalTerm), + maxIter = 100, + convTol = 1e-9, + tol = 1e-9, ) println(ft) - stderror(ft, type="robust") + stderror(ft, type = "robust") println(ft) @test all(isapprox.(fitted(ft), log.(ft.P._r))) @@ -166,11 +177,11 @@ using Random, Tables @test typeof(model_response(ft)) <: LSurvivalResp @test maximum(abs.(score(ft))) < 0.00000001 @test weights(ft) == ft.R.wts - - show(ft) + + show(ft) ft.fit = false print(ft) - + (coxph( @formula(Surv(entertime, exittime, death) ~ x + z1 + z2 + z1 * x), @@ -215,7 +226,8 @@ using Random, Tables @test M.ties == "efron" # this errors in Ubuntu - try LSurvival._fit!(M, start = [0.0, 0.0, 0.0]) + try + LSurvival._fit!(M, start = [0.0, 0.0, 0.0]) catch e end @@ -231,20 +243,20 @@ using Random, Tables println(ajfit) # TESTs: do bootstrap functions work? - @test size(bootstrap(kmfit, 3)) == (3,1) + @test size(bootstrap(kmfit, 3)) == (3, 1) #trivial case of non-competing events with late entry - @test size(bootstrap(ajfit, 3)) == (3,1) + @test size(bootstrap(ajfit, 3)) == (3, 1) z, x, t, d, event, wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 1000) - print(kaplan_meier(t,d)) - print(aalen_johansen(t,event)) + print(kaplan_meier(t, d)) + print(aalen_johansen(t, event)) # running through some deprecated functions - kmdep = LSurvival.km(t,d) - ajdep = LSurvival.aj(zeros(length(t)), t,event) - timessub, _, cisub = LSurvival.subdistribution_hazard_cuminc(zeros(length(t)),t,event) + kmdep = LSurvival.km(t, d) + ajdep = LSurvival.aj(zeros(length(t)), t, event) + timessub, _, cisub = LSurvival.subdistribution_hazard_cuminc(zeros(length(t)), t, event) - LSurvival.e_yearsoflifelost(timessub,cisub) + LSurvival.e_yearsoflifelost(timessub, cisub) z, x, t, d, event, wt = LSurvival.dgm_comprisk(MersenneTwister(1212), 100) @@ -318,10 +330,18 @@ using Random, Tables _, _, _, _, bh2 = coxmodel(enter, t, Int.(event .== 2), X) _, _, _, _, bh1 = coxmodel(enter, t, Int.(event .== 1), X) - rfromc = risk_from_coxphmodels([ft1, ft2], coef_vectors=[coef(res), coef(res2)], pred_profile=[ 0 0 -1]) + rfromc = risk_from_coxphmodels( + [ft1, ft2], + coef_vectors = [coef(res), coef(res2)], + pred_profile = [0 0 -1], + ) println(rfromc) refrisk = rfromc.risk - oldrisk, _ = ci_from_coxmodels([bh1, bh2]; coeflist=[coef(res), coef(res2)], covarmat=[ 0 0 -1;]) + oldrisk, _ = ci_from_coxmodels( + [bh1, bh2]; + coeflist = [coef(res), coef(res2)], + covarmat = [0 0 -1;], + ) @@ -872,7 +892,7 @@ using Random, Tables # TEST: bootstrapping a cox model without a seed @test !bootstrap(ft2).fit - println(Surv(1,2,1)) + println(Surv(1, 2, 1)) # TEST: bootstrapping a kaplan meier without a seed zz = zeros(length(dat1.time)) @@ -905,7 +925,7 @@ using Random, Tables #@test # fails as a test on Ubuntu (unique(op.R.wts) == unique(bootstrap(MersenneTwister(345), op).R.wts)) && - (op.R.wts != bootstrap(MersenneTwister(345), op).R.wts) + (op.R.wts != bootstrap(MersenneTwister(345), op).R.wts) op = aalen_johansen( zeros(100), rand(MersenneTwister(345), 100), @@ -914,8 +934,7 @@ using Random, Tables ) #@test # fails as a test on Ubuntu - (unique(op.R.wts) == unique(bootstrap(op).R.wts)) && - (op.R.wts != bootstrap(op).R.wts) + (unique(op.R.wts) == unique(bootstrap(op).R.wts)) && (op.R.wts != bootstrap(op).R.wts) # this is a need for re-factoring # op = aalen_johansen(zeros(100), rand(MersenneTwister(345), 100), rand(MersenneTwister(345), [0,1,2], 100), keepy=false) @@ -983,4 +1002,43 @@ using Random, Tables args = coxmodel(dat1clust.enter, dat1clust.exit, dat1clust.status, dat1clust.x[1:end, :]) @test cox_summary(args)[1] == args[1][1] + + # TEST: jackknife functions of KM/AJ + m = kaplan_meier(dat1.time, dat1.status) + mc = kaplan_meier( + dat1clust.enter, + dat1clust.exit, + dat1clust.status, + id = ID.(dat1clust.id), + ) + a = aalen_johansen(dat1.time, dat1.status) + ac = aalen_johansen( + dat1clust.enter, + dat1clust.exit, + dat1clust.status, + id = ID.(dat1clust.id), + ) + jk = jackknife(m) + jkc = jackknife(mc) + jka = jackknife(a) + jkac = jackknife(ac) + @test jk == jkc + @test jka == jkac + @test stderror(m, type = "jackknife") == stderror(mc, type = "jackknife") + @test stderror(a, type = "jackknife") == stderror(ac, type = "jackknife") + + + # + a = aalen_johansen(dat1.time, dat1.status) + ac = aalen_johansen( + dat1clust.enter, + dat1clust.exit, + dat1clust.status, + id = ID.(dat1clust.id), + ) + jkc = jackknife(mc) + jka = jackknife(a) + stderror(a, type = "jackknife") + stderror(ac, type = "jackknife") + end