-
Notifications
You must be signed in to change notification settings - Fork 0
/
Model_activity_test.R
315 lines (237 loc) · 13 KB
/
Model_activity_test.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
################################################################################
# Script to fit phylogenetic multilevel model using brms: Part 2
# Title: Activity effect on metabolic scaling of ectothermic vertebrates
# Authors: Guillermo García-Gómez (guillegar.gz@gmail.com) & Matthew Spencer
# Date: 07112023
# Operating System: Windows 10 Pro 21H2
# ------------------------------------------------------------------------------
# Cite as:
# García-Gómez, G., Hirst, A.G., Spencer, M., & Atkinson, D. (2024).
# Responses of intraspecific metabolic scaling to temperature and activity differ between water- and air-breathing ectothermic vertebrates.
# OSF (DOI 10.17605/OSF.IO/N6M5J)
# ------------------------------------------------------------------------------
rm(list=ls())# clear the work environment
# ------------------------------------------------------------------------------
# It needs to be set to Project directory
getwd()# to check
# ------------------------------------------------------------------------------
# Packages ####
#
# Bayesian analysis
library(rstan)
library(rstantools)
library(brms)
library(bayestestR)
library(loo)
# visual-checking model performance
library(ggmcmc)
library(bayesplot)
library(tidybayes)
# phylogenetic tree
library(rotl)
library(ape)
## plotting
library(ggplot2)
library(ggtree)
library(cowplot)
# data sorting
library(dplyr)
fitt <- TRUE #fit model with student-t errors?
fitgaussian <- FALSE #fit model with Gaussian errors?
# ------------------------------------------------------------------------------
set.seed(1234)# to replicate the results
# General STAN specifications
rstan::rstan_options(auto_write = TRUE) # write objects to the hard disk to avoid recompilation
options(mc.cores = parallel::detectCores())
#### Load data ####
all_act <- read.csv("table_S2.csv") # Table S2
# Histograms:
# response variable: slopes b
hist(all_act$b)
# predictor variable: log10(Metabolic level, L)
hist(log10(all_act$L))
#### Models for all species (water + air-breathers) ####
# Building the phylogenetic tree for all species (needs internet connection)
all_act.taxa <- tnrs_match_names(as.character(all_act$species_phylo)) # match species from dataset to those included in Tree of Life
all_act.tree <- tol_induced_subtree(ott_ids = ott_id(all_act.taxa), label = "name") # building a tree
# Check phylogenetic tree
ggtree(all_act.tree, layout = "fan", open.angle=0, ladderize = TRUE, size=0.75) +
geom_tiplab(size=2.1, hjust = -.1, color='blue')
# group species colour-coded according to respiration mode by selecting nodes in the tree
(tree_plot <- groupClade(all_act.tree, c(59, 50)))
# plot species colour-coded by respiration mode
(p <- ggtree(tree_plot, aes(color=group), layout = "fan", ladderize = TRUE, size=0.5) +
xlim(0,30) +
scale_color_manual(values=c("cornflowerblue", "cornflowerblue", "chartreuse3"))+
#scale_x_reverse() +
geom_tiplab(size=2) +
theme(legend.position = "none"))
# The tree object contains information on the relationship between species.
# Using this information, we can construct a covariance matrix of species.
all_act$phylo <- gsub(' ','_',all_act$species_phylo)# On the tree, names have underscores instead of spaces.
## to compute branch length
all_act.tree # inferred from taxonomic relatedness, no branch-lengths
is.binary(all_act.tree) # check for polytomies
all_act.tree_p <- multi2di(all_act.tree) # resolve polytomies before calculating branch length
is.binary(all_act.tree_p)
# plot new tree without polytomies
(all_act.tree_plot2 <- groupClade(all_act.tree, c(59, 50)))
(p2 <- ggtree(all_act.tree_plot2, aes(color=group), layout = "fan", ladderize = TRUE, size=0.5) +
xlim(0,30) +
scale_color_manual(values=c("cornflowerblue", "cornflowerblue", "chartreuse3"))+
#scale_x_reverse() +
geom_tiplab(size=2) +
theme(legend.position = "none"))
# compute branch length
all_act.tree.bl <- compute.brlen(all_act.tree_p, method="Grafen", power = 1) # Grafen method used to calculate branch-length from a non-ultrametric tree
# preliminary analyses showed that different tree transformation (rho) resulted in only slightly differences in model performance, so default is used (rho, 'power' = 1).
# This is how a transformation of rho = 1 looks like:
ggtree(all_act.tree.bl, layout = "fan", open.angle=0, ladderize = TRUE, size=0.75) +
geom_tiplab(size=2.1, hjust = -.1, color='blue')
B <- vcv.phylo(all_act.tree.bl) # covariance matrix from tree
# explore whether our tree covers all the species we wanted
# it to include, and making sure that the species names in our
# database match those in the tree. We use the following code.
setdiff(all_act$phylo, rownames(B)) # check species names in dataset match those in A (0 is OK)
setdiff(rownames(B), all_act$phylo) # check species names in A match those in dataset (0 is OK)
#-------------------------------------------------------------------------------
# Define priors
# mix of weakly informative and informative priors
priors =c(
## Informative priors:
# Intercept:
# Following MLBH (Glazier 2005, 2010), slope b approaches 2/3 with temperature-increased
# metabolic levels (log10 L = 0 (intercept), i.e. L of 1 mg O2/g/h, which is high):
prior(normal(1, 0.5), "Intercept"),
# Coefficient of log-transformed Metabolic level (L):
# Following empirical estimate of slope b vs. L between species
# at different temperatures from Killen et al. (2010) in Eco. Lett. (estimate = -0.145),
# we apply the inverse of this value for activity:
prior(normal(0.1, 0.5), "b", coef = "log10_L"),
## weakly informative priors
prior(normal(0, 1), "b"),
prior(student_t(3, 0, 2.5), "sd"),
prior(student_t(3, 0, 2.5), "sigma")
)
#-------------------------------------------------------------------------------
#### Model fitting ####
if(fitt){
# model
m_all.act_complex.info <- brm(
b ~ log10_L * group + temp + (log10_L|experiment) + (1|gr(phylo, cov = B)),
data = all_act,
family = student(),
control = list(adapt_delta = 0.999, max_treedepth = 15),
data2 = list(B = B),
prior = priors,
sample_prior = TRUE,
save_pars = save_pars(all = TRUE),
iter = 7000, warmup = 3000, chains = 4, cores = 4)
# total running time: 15 min # Bulk_ESS seems fine for all parameters (i.e. > 1000)
# Model summary ####
summary(m_all.act_complex.info, prob = 0.95) %>% print(digits = 3)
# Family: student
# Links: mu = identity; sigma = identity; nu = identity
# Formula: b ~ log10_L * group + temp + (log10_L | experiment) + (1 | gr(phylo, cov = B))
# Data: all_act (Number of observations: 281)
# Draws: 4 chains, each with iter = 7000; warmup = 3000; thin = 1;
# total post-warmup draws = 16000
# Group-Level Effects:
# ~experiment (Number of levels: 56)
# Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
# sd(Intercept) 0.103 0.028 0.044 0.157 1.003 1804 2244
# sd(log10_L) 0.141 0.026 0.092 0.194 1.001 3438 4153
# cor(Intercept,log10_L) 0.827 0.127 0.567 0.966 1.001 2387 3009
# ~phylo (Number of levels: 47)
# Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
# sd(Intercept) 0.136 0.053 0.041 0.247 1.003 1367 2949
# Population-Level Effects:
# Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
# Intercept 0.978 0.145 0.679 1.273 1.000 10953 8947
# log10_L 0.194 0.055 0.085 0.302 1.001 6758 8638
# groupwaterMbreather -0.111 0.162 -0.438 0.225 1.000 8481 7272
# temp -0.002 0.001 -0.004 0.001 1.001 11530 12005
# log10_L:groupwaterMbreather -0.152 0.062 -0.274 -0.029 1.000 6639 9717
# Family Specific Parameters:
# Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
# sigma 0.059 0.007 0.046 0.073 1.001 5281 8858
# nu 4.777 2.318 2.358 10.667 1.000 9518 10820
# Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
# and Tail_ESS are effective sample size measures, and Rhat is the potential
# scale reduction factor on split chains (at convergence, Rhat = 1).
# create a folder named 'Model_outputs', and save model (so that they don't need to be rerun for every session) ####
# note: saved models are too large to be pushed in git standard repositories, save them elsewhere
saveRDS(m_all.act_complex.info,"../Model_outputs/m_all.act.rds")
m_all.act <- readRDS("../Model_outputs/m_all.act.rds") # rename saved model
summary(m_all.act, prob = 0.95) %>% print(digits = 3)
# write stan code for this model:
stancode(m_all.act)
# Posterior predictive checks ####
plot(m_all.act, N = 6, ask = FALSE) # check convergence of chains / looks fine (but note sd_phylo_intercept)
pp_check(m_all.act, type="scatter_avg", ndraws = 100) # some outliers at the x end
pp_check(m_all.act, type = "loo_pit_overlay", nsamples = NULL) # looks fine
pp_check(m_all.act, type = "dens_overlay", nsamples = 99) # looks fine
plot(conditional_effects(m_all.act), points = TRUE) # see data from the same experiments (Fig. 2)
## LOO
loo_model_all_act <- loo(m_all.act, save_psis = TRUE)
loo_model_all_act # 3 problematic observations (1.1% of total data) based on pareto-k-diagnostic (k > 0.7)
plot(loo_model_all_act) # plot of the pareto-k values
## RELOO without problematic observations, using one more chain:
# (takes about 20 minutes to run, and results are almost identical)
# This is a necessary step to calculate Monte Carlo SE of elpd_loo
reloo_model_all_act <- reloo(loo_model_all_act, m_all.act, chains = 1)
reloo_model_all_act # almost identical LOOIC (-560.4 vs. -560.6).
# Computed from 16000 by 281 log-likelihood matrix
# Estimate SE
# elpd_loo 280.3 16.3
# p_loo 77.3 5.6
# looic -560.6 32.6
# ------
# Monte Carlo SE of elpd_loo is 0.2.
# Pareto k diagnostic values:
# Count Pct. Min. n_eff
# (-Inf, 0.5] (good) 267 95.0% 242
# (0.5, 0.7] (ok) 14 5.0% 252
# (0.7, 1] (bad) 0 0.0% <NA>
# (1, Inf) (very bad) 0 0.0% <NA>
# All Pareto k estimates are ok (k < 0.7).
# See help('pareto-k-diagnostic') for details.
# Calculate phylogenetic signal (equivalent lambda):
hyp <- "sd_phylo__Intercept^2 / (sd_phylo__Intercept^2 + sigma^2) = 0"
(hyp_act <- hypothesis(m_all.act, hyp, class = NULL) %>% print(digits = 3))
# Hypothesis Tests for class :
# Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob Star
# 1 (sd_phylo__Interc... = 0 0.789 0.159 0.324 0.951 0.017 0.017 *
# ---
# 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
# '*': For one-sided hypotheses, the posterior probability exceeds 95%;
# for two-sided hypotheses, the value tested against lies outside the 95%-CI.
# Posterior probabilities of point hypotheses assume equal prior probabilities.
}
if(fitgaussian){
# Student-t distribution vs. Gaussian distribution ####
# Check that model performance is better using a Student-t distribution than a Gaussian distribution:
m_all.act_gaussian <- update(m_all.act, family = gaussian())
# total running time: 8 min # Bulk_ESS are fine (i.e.> 1000)
# save model (so that they don't need to be rerun for every session)
# saveRDS(m_all.act_gaussian,"../Model_outputs/m_all.act_gaussian.rds")
# m_all.act_gaussian <- readRDS("../Model_outputs/m_all.act_gaussian.rds") # call saved model
# compare models through LOOIC
loo(m_all.act, m_all.act_gaussian)
# Model comparisons:
# elpd_diff se_diff
# m_all.act 0.0 0.0
# m_all.act_gaussian -15.9 7.0
}
# References:
# Glazier, D.S. (2005). Beyond the ‘3/4-power law’: variation in the intra- and interspecific scaling of metabolic rate in animals. Biol. Rev., 80, 611-662.
# Glazier, D.S. (2010). A unifying explanation for diverse metabolic scaling in animals and plants. Biol. Rev., 85, 111-138.
# Killen, S.S., Atkinson, D. & Glazier, D.S. (2010). The intraspecific scaling of metabolic rate with body mass in fishes depends on lifestyle and temperature. Ecol. Lett., 13, 184-193.
#-------------------------------------------------------------------------------
# Save data on the R session and packages versions for reproducibility shake ####
sink("../R_session/Bayesian_models_act_R_session.txt") # Create folder named 'R_session'
sessionInfo()
sink()
################################################################################
############################ END OF SCRIPT #####################################
################################################################################