Skip to content

Commit

Permalink
changes to plots and vignettes
Browse files Browse the repository at this point in the history
  • Loading branch information
gtonkinhill committed Sep 9, 2022
1 parent 1f066e0 commit 492a1cc
Show file tree
Hide file tree
Showing 32 changed files with 459 additions and 309 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ Imports:
boot,
dglm,
statmod,
tweedie
tweedie,
glmmTMB,
scales
LinkingTo: Rcpp
Suggests:
knitr,
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ export(panstripe)
export(plot_acc)
export(plot_gain_loss)
export(plot_pairwise)
export(plot_pangenome_branches)
export(plot_pangenome_cumulative)
export(plot_pangenome_fits)
export(plot_pangenome_curve)
export(plot_pangenome_params)
export(plot_residuals)
export(plot_tree_pa)
Expand Down
48 changes: 31 additions & 17 deletions R/compare_pangenomes.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
#' @param fitA a `panfit` object generated by the `panstripe` function
#' @param fitB a `panfit` object generated by the `panstripe` function
#' @param family the family used by glm. One of 'Tweedie', 'Poisson', 'Gamma' or 'Gaussian'. (default='Tweedie')
#' @param intercept whether or not to include an intercept term in the GLM (default=FALSE). Adding an intercept can increase the robustness of the algorithm to errors at higher branches of the phylogeny at the expense of less sensitivity.
#' @param modeldisp whether or not to model the dispersion as a function of the covariates of interest if using a Tweedie family (default=TRUE)
#' @param ci_type the method used to calculate the bootstrap CI (default='norm'). See \link[boot]{boot.ci} for more details.
#' @param ci_type the method used to calculate the bootstrap CI (default='perc'). See \link[boot]{boot.ci} for more details.
#' @param conf A scalar indicating the confidence level of the required intervals (default=0.95)
#' @param nboot the number of bootstrap replicates to perform (default=100)
#' @param boot_pvalue whether or not to calculate bootstrap p-values (default=FALSE)
Expand All @@ -26,8 +27,8 @@
#' comp$summary
#'
#' @export
compare_pangenomes <- function(fitA, fitB, family="Tweedie", modeldisp=TRUE, ci_type='norm',
conf=0.95, nboot=100, boot_pvalue=FALSE){
compare_pangenomes <- function(fitA, fitB, family="Tweedie", intercept=FALSE, modeldisp=TRUE,
ci_type='perc', conf=0.95, nboot=100, boot_pvalue=FALSE){

# input checks
if (class(fitA) != 'panfit') stop('fitA is not of class `panfit`!')
Expand All @@ -46,25 +47,34 @@ compare_pangenomes <- function(fitA, fitB, family="Tweedie", modeldisp=TRUE, ci_
warning("No gene gains/losses identified at all at internal branches or tips in one or both pangenomes!")
}

# model
model <- stats::as.formula("acc ~ 0 + istip + core + depth + istip:core + depth:pangenome + istip:pangenome + core:pangenome + istip:core:pangenome")
# set model
if (intercept){
model <- stats::as.formula("acc ~ 0 + pangenome + istip + core + depth + istip:core + depth:pangenome + istip:pangenome + core:pangenome + istip:core:pangenome")
} else {
model <- stats::as.formula("acc ~ 0 + istip + core + depth + istip:core + depth:pangenome + istip:pangenome + core:pangenome + istip:core:pangenome")
}

# set dispersion model
if (modeldisp){
dmodel <- stats::as.formula("acc ~ pangenome")
} else {
dmodel <- stats::as.formula("acc ~ 1")
}


# fit model
if (family=='Tweedie'){
m <- fit_double_tweedie(model, dmodel=dmodel, data = dat)
a <- anova.dglm.basic(m, tweedie.power = m$p)
m <- panstripe:::fit_double_tweedie(model, dmodel=dmodel, data = dat)
a <- panstripe:::anova.dglm.basic(m, tweedie.power = m$p)
} else {
m <- stats::glm(model, dat, family = family)
}

s <- summary(m)$coefficients %>%
tibble::as_tibble(rownames = 'term')
s <- s[s$term %in% c('istip:pangenome', 'core:pangenome', 'depth:pangenome'), , drop=FALSE]
s$term <- gsub("[T:].*", "", s$term)
s <- s[s$term %in% c('istip:pangenome', 'core:pangenome', 'depth:pangenome',
'pangenome:istip', 'pangenome:core', 'pangenome:depth'), , drop=FALSE]
s$term <- gsub("pangenome:", "", s$term)
s$term <- gsub(":pangenome", "", s$term)
colnames(s) <- c('term','estimate','std.error','statistic','p.value')

if ((family=='Tweedie') & modeldisp){
Expand All @@ -79,7 +89,7 @@ compare_pangenomes <- function(fitA, fitB, family="Tweedie", modeldisp=TRUE, ci_
# run bootstrap
if (nboot>1){
if (family=='Tweedie'){
boot_reps <- boot::boot(dat, fit_double_model,
boot_reps <- boot::boot(dat, panstripe:::fit_double_model,
R = nboot,
stype='i',
strata=factor(dat$pangenome),
Expand All @@ -89,11 +99,12 @@ compare_pangenomes <- function(fitA, fitB, family="Tweedie", modeldisp=TRUE, ci_
R = nboot,
stype='i',
strata=factor(dat$pangenome),
model=model, family=family, boot_type='branch')
model=model, family=family, boot_type='branch', fit_method='glm')
}

ci <- purrr::map_dfr(which(grepl('^[a-z]+:pangenome$', names(m$coefficients))), ~{
df <- as.data.frame(t(boot_ci_pval(boot_reps, index=.x, type=ci_type,
ci <- purrr::map_dfr(which(grepl('^[a-z]+:pangenome$', names(m$coefficients)) |
grepl('^pangenome:[a-z]+', names(m$coefficients))), ~{
df <- as.data.frame(t(panstripe:::boot_ci_pval(boot_reps, index=.x, type=ci_type,
theta_null=0, ci_conf=conf,
transformation='identity', calc_pval = boot_pvalue)))
df$term <- gsub("[T:].*", "", names(m$coefficients)[[.x]])
Expand Down Expand Up @@ -133,14 +144,17 @@ double_tweedie_llk <- function(p, model, dmodel, data){
fit_double_tweedie <- function(model, dmodel, data){
fm <- tryCatch(
{
op <- stats::optimise(double_tweedie_llk, lower = 1.01, upper = 1.99, model=model, dmodel=dmodel, data=data)
tm <- dglm_mod(formula = model,
op <- stats::optimise(panstripe:::double_tweedie_llk, lower = 1.01, upper = 1.99, model=model, dmodel=dmodel, data=data)
tm <- panstripe:::dglm_mod(formula = model,
dformula = dmodel,
data = data,
family = statmod::tweedie(var.power = op$minimum, link.power = 0),
tweedie.var.power=op$minimum)
# check convergence
ftm <- stats::glm(model, data, family = statmod::tweedie(var.power = op$minimum, link.power = 0))
tm$p <- op$minimum
return(tm)
tm$converged <- ftm$converged
tm
},
error=function(cond) {
stop(
Expand Down
2 changes: 1 addition & 1 deletion R/dglm_mod.R
Original file line number Diff line number Diff line change
Expand Up @@ -330,5 +330,5 @@ anova.dglm.basic <- function(object, tweedie.power){
Chisq <- reduced$m2loglik - object$m2loglik
return(tibble::tibble(
Chisq = Chisq,
p.value = 1 - stats::pchisq(Chisq, 1)))
p.value = stats::pchisq(Chisq, 1, lower.tail = FALSE)))
}
11 changes: 6 additions & 5 deletions R/panfit.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ new_panfit <- function(summary, model, data, boot_samples,
tree, pa){

stopifnot(any(class(summary) %in% c('NULL','tbl','data.frame')))
stopifnot(any(class(model) %in% c('NULL','glm')))
stopifnot(any(class(model) %in% c('NULL','glm','glmmTMB')))
stopifnot(any(class(data) %in% c('NULL','tbl','data.frame')))
stopifnot(any(class(boot_samples) %in% c('NULL','boot')))
stopifnot(any(class(tree) %in% c('NULL','phylo')))
Expand Down Expand Up @@ -59,9 +59,9 @@ validate_panfit <- function(x) {
# check summary
if(!any(class(x$summary) %in% c('NULL','tbl','data.frame'))) stop("Invalid class for `summary`", call. = FALSE)
if (!is.null(x$summary)){
if(ncol(x$summary)!=7) {
if(!ncol(x$summary) %in% c(7,8)) {
stop(
"There must be 7 columns in `summary` data.frame",
"There must be 7 or 8 columns in `summary` data.frame",
call. = FALSE
)
}
Expand All @@ -76,7 +76,7 @@ validate_panfit <- function(x) {
}

# check model
if(!any(class(x$model) %in% c('NULL','glm'))) stop("Invalid class for `model`", call. = FALSE)
if(!any(class(x$model) %in% c('NULL','glm','glmmTMB'))) stop("Invalid class for `model`", call. = FALSE)

# check data
if(!any(class(x$data) %in% c('NULL','tbl','data.frame'))) stop("Invalid class for `data`", call. = FALSE)
Expand All @@ -98,4 +98,5 @@ validate_panfit <- function(x) {
# check ci samplles
if(!class(x$boot_samples) %in% c('NULL','boot')) stop("Invalid class for `boot_samples`", call. = FALSE)

}
}

Loading

0 comments on commit 492a1cc

Please sign in to comment.