-
Notifications
You must be signed in to change notification settings - Fork 2
/
Jenior_Metatransciptomics_mSphere_2018.Rmd
1578 lines (1383 loc) · 128 KB
/
Jenior_Metatransciptomics_mSphere_2018.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
---
csl: msphere.csl
output:
word_document:
keep_md: true
reference_docx: manuscript_format.docx
bibliography: references.bib
---
#### Title:
*Clostridium difficile* alters the structure and metabolism of distinct cecal microbiomes during initial infection to promote sustained colonization
##### Authors:
Matthew L. Jenior^1^, Jhansi L. Leslie^1^, Vincent B. Young^1,2^, and Patrick D. Schloss^1\*^
1. Department of Microbiology and Immunology, University of Michigan, Ann Arbor, Michigan
2. Department of Internal Medicine/Infectious Diseases Division, University of Michigan Medical Center, Ann Arbor, Michigan
<br>
<br>
\* Correspondence to Patrick D. Schloss at pschloss@umich.edu
### Abstract
Susceptibility to *Clostridium difficile* infection is primarily associated with previous exposure to antibiotics, which compromise the structure and function of the gut bacterial community. Specific antibiotic classes correlate more strongly with recurrent or persistent *C. difficile* infection. As such, we utilized a mouse model of infection to explore the effect of distinct antibiotic classes on the impact that infection has on community-level transcription and metabolic signatures shortly following pathogen colonization and how those changes may associate with persistence of *C. difficile*. Untargeted metabolomic analysis revealed that *C. difficile* infection had significantly larger impacts on the metabolic environment across cefoperazone and streptomycin-pretreated mice, which become persistently colonized compared to clindamycin-pretreated mice where infection quickly became undetectable. Through metagenome-enabled metatranscriptomics we observed that transcripts for genes associated with carbon and energy acquisition were greatly reduced in infected animals, suggesting those niches were instead occupied by *C. difficile*. Furthermore, the largest changes in transcription were seen in the least abundant species indicating that *C. difficile* may "attack the loser" in gut environments where sustained infection occurs more readily. Overall, our results suggest that *C. difficile* is able to restructure the nutrient-niche landscape in the gut to promote persistent infection.
#### Importance
*Clostridium difficile* has become the most common single cause of hospital-acquired infection over the last decade in the United States. Colonization resistance to the nosocomial pathogen is primarily provided by the gut microbiota, which is also involved in clearing the infection as the community recovers from perturbation. As distinct antibiotics are associated with different risk levels for CDI, we utilized a mouse model of infection with 3 separate antibiotic pretreatment regimes to generate alternative gut microbiomes that each allowed for *C. difficile* colonization but varied in clearance rate. To assess community-level dynamics, we implemented an integrative multi-omic approach that revealed that infection significantly changed many aspects of the gut community. The degree to which the community changed was inversely correlated with clearance during the first six days of infection, suggesting that *C. difficile* differentially modifies the gut environment to promote persistence. This is the first time metagenome-enabled metatranscriptomics have been employed to study the behavior of a host-associated microbiota in response to an infection. Our results allow for a previously unseen understanding of the ecology associated with *C. difficile* infection and provides groundwork for identification of context-specific probiotic therapies.
### Introduction
One of the many beneficial functions provided by the indigenous gut bacterial community is its ability to protect the host from infection by pathogens [@Vollaard1994]. This attribute, termed colonization resistance, is one of the main mechanisms that protect healthy individuals from the gastrointestinal pathogen *Clostridium difficile* [@Freter1955;@Fekety1979;@Britton2012]. *C. difficile* infection (CDI) is responsible for most cases of antibiotic-associated colitis, a toxin-mediated diarrheal disease that has dramatically increased in prevalence over the last 10 years. There are an estimated 453,000 cases of CDI resulting in 29,000 deaths in the United States annually [@Lessa2015]. Antibiotics are a major risk factor for CDI and are thought to increase susceptibility by disrupting the gut bacterial community structure; however, it is still unclear what specific changes to the microbiota contribute to this susceptibility [@Antonopoulos2009;@Buffie2012]. Although most classes of antibiotics have been associated with initial susceptibility to CDI, fluoroquinolones, clindamycin, and cephalosporins are linked to increased risk of recurrent or persistent infection [@Thomas2003;@Brown2013;@Bignardi1998]. This raises questions about the groups of bacteria that are differentially impacted by certain therapies and how these changes effect duration or severity of the infection.
Associations between the membership and functional capacity of the microbiota as measured by the metabolic output suggest that antibiotics increase susceptibility by altering the nutrient milieu in the gut to one that favors *C. difficile* metabolism [@Antunes2011;@Jump2014;@Theriot2014]. One hypothesis is that *C. difficile* colonization resistance is driven by competition for growth substrates by an intact community of metabolic specialists. This has been supported by animal model experiments over the past several decades [@Wilson1988;@Sambol2002;@Perez-Cobas2014]. This line of reasoning has been carried through to the downstream restoration of colonization resistance with the application of fecal microbiota transplant (FMT). Although an individual's microbiota may not return to its precise original state following FMT, it is hypothesized that the functional capacity of the new microbiota is able to outcompete *C. difficile* for resources and clear the infection [@Zaura2015;@Theriot2014].
Leveraging distinct antibiotic treatment regimens in a murine model of CDI [@Schubert2015], we and others have shown that *C. difficile* adapts its physiology to the distinct cecal microbiomes that resulted from exposure to antibiotics [@Schubert2015;@Jenior2017]. We went on to show that *C. difficile* appears to adapt portions of its metabolism to fit alternative nutrient niche landscapes. As the diet of the mice remained unchanged, changes in the cecal metabolome are likely driven by the intestinal microbiota. Although it has been established that *C. difficile* colonizes these communities effectively, it is unknown whether the differences in the metabolic activity of communities following antibiotic treatment are impacted by *C. difficile* colonization or if they correlate with prolonged infection. Historically, it has been difficult to ascribe specific metabolic contributions to individual taxa within the microbiota during perturbations, especially within the context of a host. To address this limited understanding, we employed an integrative untargeted metabolomic and metagenome-enabled metatransciptomic approach to investigate specific responses to infection of the gut microbiota in a murine model of CDI. This high-dimensional analysis allowed us to not only characterize the metabolic output of the community, but to also identify which subgroups of bacteria were differentially active during mock infection and CDI. Our results supported the hypothesis that CDI was indeed associated with altered community-level gene transcription and metabolomic profile of susceptible environments. This effect was significantly more pronounced in communities where *C. difficile* was able to maintain colonization. This work highlights the need for increased appreciation of the differential, combined effects of antibiotics and CDI on the gut microbiota to develop more successful targeted therapies that eliminate *C. difficile* colonization.
```{r startup, echo=F, message=F, warning=F, cache=F}
# Load dependencies
deps <- c('vegan', 'shape', 'plotrix', 'reshape2', 'GMD', 'randomForest', 'RColorBrewer', 'gplots','viridis', 'scales','Hmisc','VennDiagram','AUCRF')
for (dep in deps){
if (dep %in% installed.packages()[,"Package"] == FALSE){
install.packages(as.character(dep), quiet=TRUE);
}
library(dep, verbose=FALSE, character.only=TRUE)
}
rm(dep, deps)
# Filter out columns that have values in at least 3 samples (ignores first column if needed)
filter_table <- function(data) {
drop <- c()
if (class(data[,1]) != 'character') {
if (sum(data[,1] != 0) < 3) {
drop <- c(drop, colnames(data)[1])
}
}
for (index in 2:ncol(data)) {
if (sum(data[,index] != 0) < 3) {
drop <- c(drop, colnames(data)[index])
}
}
filtered_data <- data[,!(colnames(data) %in% drop)]
return(filtered_data)
}
# Neatly merge 2 matices with shared row names
clean_merge <- function(data_1, data_2){
clean_merged <- merge(data_1, data_2, by='row.names', all.y=TRUE)
rownames(clean_merged) <- clean_merged$Row.names
clean_merged$Row.names <- NULL
return(clean_merged)
}
# Significant feature filter with random forest (2 classes only)
susceptibilityRF <- function(feature_data){
# Breiman (2001). Random Forests. Machine Learning.
lvls <- as.vector(unique(feature_data$susceptibility))
x <- round(length(rownames(feature_data[which(feature_data$susceptibility==lvls[1]),])) * 0.623)
y <- round(length(rownames(feature_data[which(feature_data$susceptibility==lvls[2]),])) * 0.623)
z <- max(c(round(x / y), round(y / x))) * 3
nTrees <- round(ncol(feature_data) - 1) * z
mTries <- round(sqrt(ncol(feature_data) - 1))
rm(lvls, x, y, z)
# Finish reformatting data
training_data <- feature_data
susceptibility <- training_data$susceptibility
susceptibility <- as.factor(susceptibility)
training_data$susceptibility <- NULL
names(training_data) <- make.names(names(training_data))
training_data <- data.matrix(training_data)
training_data[is.na(training_data)] <- 0
# Run random forest and get MDA values
set.seed(906801)
modelRF <- randomForest(susceptibility ~ ., data=training_data,
importance=TRUE, replace=FALSE, err.rate=TRUE,
ntree=nTrees, mtry=mTries)
featRF <- importance(modelRF, type=1)
featRF <- as.data.frame(featRF)
featRF$features <- rownames(featRF)
rm(modelRF)
# Filter to significant features (Strobl 2002) and sort
sigFeatRF <- as.data.frame(subset(featRF, featRF$MeanDecreaseAccuracy > (abs(min(featRF$MeanDecreaseAccuracy)))))
# Subset trainng data to significant features
finalFeat <- training_data[,which(rownames(sigFeatRF) %in% colnames(training_data))]
rm(training_data, sigFeatRF)
return(finalFeat)
}
# Significant feature filter with random forest (2 classes only)
infectionRF <- function(feature_data){
# Breiman (2001). Random Forests. Machine Learning.
lvls <- as.vector(unique(feature_data$infection))
x <- round(length(rownames(feature_data[which(feature_data$infection==lvls[1]),])) * 0.623)
y <- round(length(rownames(feature_data[which(feature_data$infection==lvls[2]),])) * 0.623)
z <- max(c(round(x / y), round(y / x))) * 3
nTrees <- round(ncol(feature_data) - 1) * z
mTries <- round(sqrt(ncol(feature_data) - 1))
rm(lvls, x, y, z)
# Finish reformatting data
training_data <- feature_data
infection <- training_data$infection
infection <- as.factor(infection)
training_data$infection <- NULL
names(training_data) <- make.names(names(training_data))
training_data <- data.matrix(training_data)
training_data[is.na(training_data)] <- 0
# Run random forest and get MDA values
set.seed(906801)
modelRF <- randomForest(infection ~ ., data=training_data,
importance=TRUE, replace=FALSE, err.rate=TRUE,
ntree=nTrees, mtry=mTries)
featRF <- importance(modelRF, type=1)
featRF <- as.data.frame(featRF)
featRF$features <- rownames(featRF)
rm(modelRF)
# Filter to significant features (Strobl 2002) and sort
sigFeatRF <- as.data.frame(subset(featRF, featRF$MeanDecreaseAccuracy > (abs(min(featRF$MeanDecreaseAccuracy)))))
sigFeatRF <- sigFeatRF[order(-sigFeatRF$MeanDecreaseAccuracy),]
return(sigFeatRF)
}
# Significant feature filter with random forest (2 or more classes)
abxRF <- function(feature_data){
mTries <- round(sqrt(ncol(feature_data) - 1))
# Finish reformatting data
training_data <- feature_data
abx <- training_data$abx
abx <- as.factor(abx)
training_data$abx <- NULL
names(training_data) <- make.names(names(training_data))
training_data <- data.matrix(training_data)
training_data[is.na(training_data)] <- 0
# Run random forest and get MDA values
set.seed(906801)
modelRF <- randomForest(abx ~ ., data=training_data,
importance=TRUE, replace=FALSE, err.rate=TRUE, mtry=mTries)
featRF <- importance(modelRF, type=1)
featRF <- as.data.frame(featRF)
featRF$features <- rownames(featRF)
rm(modelRF)
# Filter to significant features (Strobl 2002) and sort
sigFeatRF <- as.data.frame(subset(featRF, featRF$MeanDecreaseAccuracy > (abs(min(featRF$MeanDecreaseAccuracy)))))
# Subset trainng data to significant features
finalFeat <- training_data[,which(rownames(sigFeatRF) %in% colnames(training_data))]
rm(training_data, sigFeatRF)
return(finalFeat)
}
# Calculates distance of a point from x=y in 2-d space
dist_xy <- function(x) {
v1 <- c(20,20) - x
v2 <- c(0,0) - c(20,20)
m <- cbind(v1, v2)
distance <- abs(det(m)) / sqrt(sum(v1 * v1))
return(distance)
}
```
### Results
```{r figure_1, echo=F, warning=F, message=F, results='hide', cache=F}
# Load in data
metadata <- read.delim('data/metadata.tsv', sep='\t', header=T, row.names=1)
metadata <- subset(metadata, type == 'conventional') # remove germfree
metadata$type <- NULL
metadata$cage <- NULL
metadata$mouse <- NULL
metadata$gender <- NULL
wetlab <- read.delim('data/wetlab_assays.tsv', sep='\t', header=T, row.names=1)
cfu <- read.delim('data/cfu_time.tsv', sep='\t', header=TRUE)
taxonomy_family <- read.delim('data/16S_analysis/all_treatments.family.cons.format.taxonomy', sep='\t', header=T)
shared_family <- read.delim('data/16S_analysis/all_treatments.family.subsample.shared', sep='\t', header=T, row.names=2)
shared_family <- shared_family[!rownames(shared_family) %in% c('CefC5M2'), ]
shared_family <- shared_family[ ,!(names(shared_family) == 'Otu008')]
shared_family$numOtus <- NULL
shared_family$label <- NULL
# Format wetlab assay data
wetlab <- subset(wetlab, infection == '630') # Remove uninfected controls
wetlab <- subset(wetlab, treatment != 'germfree') # Remove germfree
wetlab$infection <- NULL
wetlab$cfu_spore <- NULL
wetlab$toxin_titer <- NULL
wetlab$cfu_vegetative <- as.numeric(wetlab$cfu_vegetative)
wetlab$cfu_vegetative[wetlab$cfu_vegetative == 0] <- 100
wetlab$cfu_vegetative <- log10(wetlab$cfu_vegetative)
wetlab$treatment <- droplevels(wetlab$treatment)
wetlab$treatment <- factor(wetlab$treatment, levels=c('conventional', 'streptomycin', 'cefoperazone', 'clindamycin'))
wetlab$cfu_vegetative[wetlab$cfu_vegetative <= 2.0] <- 1.7 # Undetectable points below LOD
noabx_veg <- as.numeric(subset(wetlab, treatment == 'conventional')$cfu_vegetative)
strep_veg <- as.numeric(subset(wetlab, treatment == 'streptomycin')$cfu_vegetative)
cef_veg <- as.numeric(subset(wetlab, treatment == 'cefoperazone')$cfu_vegetative)
clinda_veg <- as.numeric(subset(wetlab, treatment == 'clindamycin')$cfu_vegetative)
rm(wetlab)
# Significance testing
p_val <- p.adjust(c(wilcox.test(noabx_veg, strep_veg, exact=FALSE)$p.value,
wilcox.test(noabx_veg, cef_veg, exact=FALSE)$p.value,
wilcox.test(noabx_veg, clinda_veg, exact=FALSE)$p.value),
method='BH')
veg_pval <- as.character(max(round(p_val, 3)))
if (veg_pval == '0') {
veg_pval = '0.001'
}
rm(noabx_veg, strep_veg, cef_veg, clinda_veg, p_val)
# Format CFU over time
cfu <- cfu[,c(1,11)]
strep_stoolCFU <- subset(cfu, abx == 'streptomycin')[,2]
cef_stoolCFU <- subset(cfu, abx == 'cefoperazone')[,2]
clinda_stoolCFU <- subset(cfu, abx == 'clindamycin')[,2]
rm(cfu)
# Significance testing
p_val <- p.adjust(c(wilcox.test(strep_stoolCFU, clinda_stoolCFU, exact=FALSE)$p.value,
wilcox.test(cef_stoolCFU, clinda_stoolCFU, exact=FALSE)$p.value),
method='BH')
stoolCFU_pval <- as.character(max(round(p_val, 3)))
rm(strep_stoolCFU, cef_stoolCFU, clinda_stoolCFU, p_val)
# Convert to relative abundance
relabund_family <- (shared_family / rowSums(shared_family)) * 100
rm(shared_family)
# Rename OTUs to families
relabund_family <- t(relabund_family)
relabund_family <- merge(taxonomy_family, relabund_family, by.x='otu', by.y='row.names')
rownames(relabund_family) <- relabund_family$family
relabund_family$otu <- NULL
relabund_family$phylum <- NULL
relabund_family$family <- NULL
relabund_family <- t(relabund_family)
# Combine with metadata and subset to pretreatments
relabund_family <- clean_merge(metadata, relabund_family)
relabund_family$susceptibility <- NULL
relabund_family <- subset(relabund_family, infection == '630')
relabund_family$infection <- NULL
relabund_family_cef <- subset(relabund_family, abx == 'cefoperazone')
relabund_family_cef <- subset(relabund_family_cef, rownames(relabund_family_cef) != 'CefC3M2')
relabund_family_cef$abx <- NULL
relabund_family_strep <- subset(relabund_family, abx == 'streptomycin')
relabund_family_strep$abx <- NULL
relabund_family_clinda <- subset(relabund_family, abx == 'clindamycin')
relabund_family_clinda$abx <- NULL
rm(relabund_family)
# Get % of community for specific groups
cef_bacter <- median(relabund_family_cef$Bacteroidaceae)
cef_bacter <- as.character(round(cef_bacter, 2))
rm(relabund_family_cef)
strep_lacto <- median(relabund_family_strep$Lactobacillaceae)
strep_lacto <- as.character(round(strep_lacto, 2))
rm(relabund_family_strep)
clinda_lacto <- median(relabund_family_clinda$Lactobacillaceae)
clinda_lacto <- as.character(round(clinda_lacto, 2))
rm(relabund_family_clinda)
```
#### Distinct antibiotic pretreatments are associated with alternative community structures that are equally susceptible to initial *C. difficile* colonization, but differ in patterns of clearance.
We have previously shown that when conventionally-reared SPF mice were pretreated with one of three different antibiotics (streptomycin, cefoperazone, and clindamycin; Table S1), each pretreatment was associated with altered patterns of *C. difficile* virulence factor expression [@Jenior2017]. These antibiotics were chosen for not only the ability to reduce *C. difficile* colonization resistance in a mouse model [@Schubert2015], but also for distinct and significant impacts on the structure and diversity of the cecal microbiota (Fig. 1A) [@Jenior2017]. In each antibiotic pretreatment, we observed equally high levels of *C. difficile* colonization on the day after infection, however, over the subsequent 9 days the amount of *C. difficile* in the feces of clindamycin-pretreated mice were the only mice to fall below the limit of detection, while mice receiving the other pretreatments remained highly colonized (*p* = `r stoolCFU_pval`; Fig. 1A). We hypothesized that this occurred in the clindamycin-pretreated mice because the perturbed intestinal community occupied niche space that overlapped with that of *C. difficile*.
We chose to focus our remaining experiments on cecal samples collected 18 hours after infection to the assess behavior of *C. difficile* directly prior to the reduction in detectable *C. difficile*. This end point corresponded with a previous study where *C. difficile* reached maximum cecal vegetative cell load with few detectable spores [@Koenigsknecht2015]. We also elected to examine cecal content because it was more likely to be a site of active bacterial metabolism compared to stool and would allow for an assessment of functional differences in the microbiota. At 18 hours after infection, we found that the communities remained highly differentiated from untreated controls as measured by 16S rRNA gene sequencing of the V4 region (Fig. 1B). The composition of streptomycin-pretreated communities was more variable between cages, but was generally enriched for members of the *Bacteroidetes* phylum. Cefoperazone and clindamycin-pretreated cecal communities were consistently dominated by members of the *Lactobacillaceae* and *Enterobacteriaceae* families, respectively. Despite variation in the community structures, there were no significant differences in the number of vegetative cells between any antibiotic-pretreatment group (Fig. 1C). All susceptible mice were colonized with ~1x10^8^ vegetative colony forming units (CFU) per gram of cecal content and untreated mice maintained *C. difficile* colonization resistance.
```{r figure_2, echo=F, warning=F, message=F, results='hide', cache=F}
aucrfClearance <- function(training_data){
# Format levels of susceptibility for AUCRF
colnames(training_data) <- make.names(colnames(training_data))
levels <- as.vector(unique(training_data$clearance))
training_data$clearance <- as.character(training_data$clearance)
training_data$clearance[which(training_data$clearance==levels[1])] <- 0
training_data$clearance[which(training_data$clearance==levels[2])] <- 1
training_data$clearance <- as.factor(as.numeric(training_data$clearance))
rm(levels)
# Run AUCRF with reproduceable parameters
set.seed(906801)
data_RF <- AUCRF(clearance ~ ., data=training_data, pdel=0.05, k0=5, ranking='MDA')
return(data_RF)
}
#----------------#
# Read in data
# Metabolomes
metabolome <- read.delim('data/metabolome/scaled_intensities.log10.tsv', sep='\t', header=TRUE)
# 16S
shared_otu <- read.delim('data/16S_analysis/all_treatments.0.03.unique_list.conventional.shared', sep='\t', header=T, row.names=2)
shared_otu <- shared_otu[!rownames(shared_otu) %in% c('CefC5M2'), ] # Remove possible contaminated sample
cdf_otu <- shared_otu[,(names(shared_otu) %in% c('Otu0004','Otu0308'))]
shared_otu <- shared_otu[,!(names(shared_otu) %in% c('Otu0004','Otu0308'))] # Remove residual C. difficile OTUs
shared_noncdf <- subset(shared_otu, rowSums(cdf_otu) != 0)
cdf_otu <- subset(cdf_otu, rowSums(cdf_otu) != 0)
cdf_percent <- as.character(round(mean(rowSums(cdf_otu) / rowSums(shared_noncdf)) * 100, 3))
rm(cdf_otu, shared_noncdf)
shared_otu$numOtus <- NULL
shared_otu$label <- NULL
# Metadata
metadata <- read.delim('data/metadata.tsv', sep='\t', header=T, row.names=1)
# Metadata
metadata$type <- NULL
metadata$cage <- NULL
metadata$mouse <- NULL
metadata$gender <- NULL
metadata$clearance <- c(rep('colonized',18), rep('cleared',18), rep('colonized',18),
rep('mock',12), rep('colonized',18))
# Metabolomes
metabolome$BIOCHEMICAL <- gsub('_', ' ', metabolome$BIOCHEMICAL)
rownames(metabolome) <- metabolome$BIOCHEMICAL
metabolome$BIOCHEMICAL <- NULL
metabolome$PUBCHEM <- NULL
metabolome$KEGG <- NULL
metabolome$SUB_PATHWAY <- NULL
metabolome$SUPER_PATHWAY <- NULL
metabolome <- as.data.frame(t(metabolome))
metabolome <- clean_merge(metadata, metabolome)
metabolome <- subset(metabolome, abx != 'germfree')
metabolome <- subset(metabolome, abx != 'none')
metabolome <- subset(metabolome, infection == 'mock')
metabolome$abx <- NULL
metabolome$susceptibility <- NULL
metabolome$infection <- NULL
# 16S
shared_otu <- clean_merge(metadata, shared_otu)
shared_otu <- subset(shared_otu, abx != 'germfree')
shared_otu <- subset(shared_otu, abx != 'none')
shared_otu <- subset(shared_otu, infection == 'mock')
shared_otu$abx <- NULL
shared_otu$susceptibility <- NULL
shared_otu$infection <- NULL
# Ordination analysis
# Metabolome - 728 metabolites
metabolome_dist <- vegdist(metabolome[,2:ncol(metabolome)], method='bray') # Bray-Curtis
# permANOVA
metabolome_permANOVA_pval <- adonis(metabolome_dist ~ metabolome$clearance, metabolome, perm=999)$aov.tab[[6]][1]
metabolome_permANOVA_pval <- as.character(round(metabolome_permANOVA_pval, 3))
rm(metabolome_dist)
# 16S - 810 OTUs
otu_dist <- vegdist(shared_otu[,2:ncol(shared_otu)], method='bray') # Bray-Curtis
# permANOVA
otu_permANOVA_pval <- adonis(otu_dist ~ shared_otu$clearance, shared_otu, perm=999)$aov.tab[[6]][1]
otu_permANOVA_pval <- as.character(round(otu_permANOVA_pval, 3))
rm(otu_dist)
rm(metadata)
# Feature selection
# Metabolome
colnames(metabolome) <- make.names(colnames(metabolome))
metabolome_aucrf <- aucrfClearance(metabolome)
# Get OOB
metabolome_aucrf_oob <- metabolome_aucrf$RFopt
metabolome_aucrf_oob <- metabolome_aucrf_oob$err.rate
metabolome_aucrf_oob <- as.character(round(median(metabolome_aucrf_oob[,1]) * 100, 3))
rm(metabolome_aucrf, metabolome)
# 16S
# AUCRF feature selection/reduction (to 0% OOB)
colnames(shared_otu) <- make.names(colnames(shared_otu))
shared_otu_aucrf <- aucrfClearance(shared_otu)
# Get OOB
shared_otu_aucrf_oob <- shared_otu_aucrf$RFopt
shared_otu_aucrf_oob <- shared_otu_aucrf_oob$err.rate
shared_otu_aucrf_oob <- as.character(round(median(shared_otu_aucrf_oob[,1]) * 100, 3))
rm(shared_otu, shared_otu_aucrf)
```
#### Multiple biological signatures in the bacterial community and metabolome differentiated cecal microbiomes that remained colonized by *C. difficile* from those that did not.
Pretreatment with antibiotics not only alters the structure of the resident microbiota, but also has a dramatic impact on the intestinal metabolome [@Antunes2011; @Jump2014; @Theriot2014]. To understand the ramifications each antibiotic had on the cecal metabolomic environment, we performed untargeted metabolomic analysis on the cecal contents that were also utilized in the 16S rRNA gene sequencing. We identified a total of 727 distinct metabolites. In combination with our 16S rRNA gene sequencing results, we first characterized the differences between the microbiomes (i.e. the microbiota, plus the associated metabolome) of the mock-infected animals to quantify possible drivers of communities that cleared the infection. To focus our analysis on ascertaining changes in discrete populations within the microbiota, we generated operational taxonomic units (OTUs) clustered at 97% similarity. We also removed all *C. difficile* 16S rRNA gene sequences, which represented an average of `r cdf_percent`% sequencing reads across infection groups to eliminate its direct impact in downstream calculation. Using these methods we discovered that the Bray-Curtis dissimilarity of both the community structure (*p* < `r otu_permANOVA_pval`) and metabolome (*p* < `r metabolome_permANOVA_pval`) were significantly different between cleared and colonized groups during the early stages of infection (Fig. 2A & 2C). These results supported the hypothesis that the cecal environment created by clindamycin pretreatment was highly divergent from the other groups, and likely contributed to the clearance seen in the subsequent days.
To identify the populations and metabolites that were associated with sustained colonization, we utilized Random Forest machine learning with cross validation to identify the smallest optimal subset of features that could successfully differentiate microbiomes associated with infection clearance and those that remain colonized [@Calle2011]. We identified a model with 5 OTUs that correctly classified all samples to their corresponding groups (Fig. 2B; Out-of-bag error=`r shared_otu_aucrf_oob`%). Interestingly, these OTUs were not consistently abundant in antibiotic-pretreated communities. Similarly, when we used the same approach with the metabolomic data, we identified a model that used 5 metabolites that correctly differentiated the groups (Fig. 2D; Out-of-bag error=`r metabolome_aucrf_oob`%). Together these results further supported the hypothesis that the environment of the cecum, even early during infection, is distinct between groups that clear the infection and those that maintain *C. difficile* at high levels. Furthermore, results from machine learning analysis suggest that rare members of the communities had a disproportionate influence on the clearance patterns observed between pretreatment regimes and that changes in community structure may be less consistent than changes in the metatransciptome or metabolome.
```{r figure_s1, echo=F, warning=F, message=F, results='hide', cache=F}
aucrfSusceptibility <- function(training_data){
# Format levels of susceptibility for AUCRF
colnames(training_data) <- make.names(colnames(training_data))
levels <- as.vector(unique(training_data$susceptibility))
training_data$susceptibility <- as.character(training_data$susceptibility)
training_data$susceptibility[which(training_data$susceptibility==levels[1])] <- 0
training_data$susceptibility[which(training_data$susceptibility==levels[2])] <- 1
training_data$susceptibility <- as.factor(as.numeric(training_data$susceptibility))
rm(levels)
# Run AUCRF with reproduceable parameters
set.seed(906801)
data_RF <- AUCRF(susceptibility ~ ., data=training_data, pdel=0.05, k0=5, ranking='MDA')
return(data_RF)
}
# Metadata
metadata <- read.delim('data/metadata.tsv', sep='\t', header=T, row.names=1)
metadata$type <- NULL
metadata$cage <- NULL
metadata$mouse <- NULL
metadata$gender <- NULL
# Metabolomes
metabolome <- read.delim('data/metabolome/scaled_intensities.log10.tsv', sep='\t', header=TRUE)
metabolome$BIOCHEMICAL <- gsub('_', ' ', metabolome$BIOCHEMICAL)
rownames(metabolome) <- metabolome$BIOCHEMICAL
metabolome$BIOCHEMICAL <- NULL
metabolome$PUBCHEM <- NULL
metabolome$KEGG <- NULL
metabolome$SUB_PATHWAY <- NULL
metabolome$SUPER_PATHWAY <- NULL
metabolome <- as.data.frame(t(metabolome))
metabolome <- clean_merge(metadata, metabolome)
metabolome <- subset(metabolome, abx != 'germfree')
metabolome <- subset(metabolome, infection == 'mock')
metabolome$abx <- NULL
metabolome$infection <- NULL
# 16S
shared_otu <- read.delim('data/16S_analysis/all_treatments.0.03.unique_list.conventional.shared', sep='\t', header=T, row.names=2)
shared_otu <- shared_otu[!rownames(shared_otu) %in% c('CefC5M2'), ] # Remove possible contaminated sample
shared_otu <- shared_otu[,!(names(shared_otu) %in% c('Otu0004','Otu0308'))]
shared_otu$numOtus <- NULL
shared_otu$label <- NULL
shared_otu <- clean_merge(metadata, shared_otu)
shared_otu <- subset(shared_otu, abx != 'germfree')
shared_otu <- subset(shared_otu, infection == 'mock')
shared_otu$abx <- NULL
shared_otu$infection <- NULL
rm(metadata)
# Metabolome
# AUCRF feature selection/reduction (to 0% OOB)
colnames(metabolome) <- make.names(colnames(metabolome))
metabolome_aucrf <- aucrfSusceptibility(metabolome)
# Get OOB
metabolome_aucrf_oob <- metabolome_aucrf$RFopt
metabolome_aucrf_oob <- metabolome_aucrf_oob$err.rate
metabolome_aucrf_oob <- as.character(round(median(metabolome_aucrf_oob[,1]) * 100, 3))
# Get features
metabolome_aucrf <- as.character(OptimalSet(metabolome_aucrf)$Name)
res_metabolome <- subset(metabolome, susceptibility == 'resistant')[, metabolome_aucrf]
res_metabolome$susceptibility <- NULL
sus_metabolome <- subset(metabolome, susceptibility == 'susceptible')[, metabolome_aucrf]
sus_metabolome$susceptibility <- NULL
rm(metabolome, metabolome_aucrf)
# Find significant differences
metabolome_pval <- c()
for (i in 1:ncol(res_metabolome)){metabolome_pval[i] <- wilcox.test(res_metabolome[,i], sus_metabolome[,i], exact=FALSE)$p.value}
metabolome_pval <- round(p.adjust(metabolome_pval, method='BH'), 3)
metabolome_sus_pval <- as.character(max(metabolome_pval))
if (metabolome_sus_pval == '0') {
metabolome_sus_pval = '0.001'
}
rm(res_metabolome, sus_metabolome, metabolome_pval)
# 16S
# AUCRF feature selection/reduction (to 0% OOB)
colnames(shared_otu) <- make.names(colnames(shared_otu))
shared_otu_aucrf <- aucrfSusceptibility(shared_otu)
# Get OOB
shared_otu_aucrf_oob <- shared_otu_aucrf$RFopt
shared_otu_aucrf_oob <- shared_otu_aucrf_oob$err.rate
shared_otu_aucrf_oob <- as.character(round(median(shared_otu_aucrf_oob[,1]) * 100, 3))
# Get features
shared_otu_aucrf <- as.character(OptimalSet(shared_otu_aucrf)$Name)
res_shared_otu <- subset(shared_otu, susceptibility == 'resistant')[, shared_otu_aucrf]
res_shared_otu$susceptibility <- NULL
sus_shared_otu <- subset(shared_otu, susceptibility == 'susceptible')[, shared_otu_aucrf]
sus_shared_otu$susceptibility <- NULL
rm(shared_otu, shared_otu_aucrf)
# Find significant differences
otu_pval <- c()
for (i in 1:ncol(res_shared_otu)){otu_pval[i] <- wilcox.test(res_shared_otu[,i], sus_shared_otu[,i], exact=FALSE)$p.value}
otu_pval <- round(p.adjust(otu_pval, method='BH'), 3)
otu_sus_pval <- as.character(max(otu_pval))
if (otu_sus_pval == '0') {
otu_sus_pval = '0.001'
}
rm(res_shared_otu, sus_shared_otu, otu_pval)
```
```{r figure_s2, echo=F, warning=F, message=F, results='hide', cache=F}
# Metadata
metadata <- read.delim('data/metadata.tsv', sep='\t', header=T, row.names=1)
metadata$type <- NULL
metadata$cage <- NULL
metadata$mouse <- NULL
metadata$gender <- NULL
# Metabolomes
metabolome <- read.delim('data/metabolome/scaled_intensities.log10.tsv', sep='\t', header=TRUE)
metabolome$BIOCHEMICAL <- gsub('_', ' ', metabolome$BIOCHEMICAL)
rownames(metabolome) <- metabolome$BIOCHEMICAL
metabolome$BIOCHEMICAL <- NULL
metabolome$PUBCHEM <- NULL
metabolome$KEGG <- NULL
metabolome$SUB_PATHWAY <- NULL
metabolome$SUPER_PATHWAY <- NULL
metabolome <- as.data.frame(t(metabolome))
metabolome <- clean_merge(metadata, metabolome)
metabolome <- subset(metabolome, abx != 'germfree')
metabolome <- subset(metabolome, susceptibility == 'susceptible')
metabolome$abx <- NULL
metabolome$susceptibility <- NULL
# 16S
shared_otu <- read.delim('data/16S_analysis/all_treatments.0.03.unique_list.conventional.shared', sep='\t', header=T, row.names=2)
shared_otu <- shared_otu[!rownames(shared_otu) %in% c('CefC5M2'), ] # Remove possible contaminated sample
shared_otu <- shared_otu[,!(names(shared_otu) %in% c('Otu0004','Otu0308'))] # Remove residual C. difficile OTUs
shared_otu$numOtus <- NULL
shared_otu$label <- NULL
shared_otu <- clean_merge(metadata, shared_otu)
shared_otu <- subset(shared_otu, abx != 'germfree')
shared_otu <- subset(shared_otu, susceptibility == 'susceptible')
shared_otu$abx <- NULL
shared_otu$susceptibility <- NULL
rm(metadata)
# Metabolome
metabolome_dist <- vegdist(metabolome[,2:ncol(metabolome)], method='bray') # Bray-Curtis
metabolome_permANOVA_pval <- adonis(metabolome_dist ~ metabolome$infection, metabolome, perm=999)$aov.tab[[6]][1]
metabolome_permANOVA_pval <- as.character(round(metabolome_permANOVA_pval, 3))
rm(metabolome_dist, metabolome)
# 16S
otu_dist <- vegdist(shared_otu[,2:ncol(shared_otu)], method='bray') # Bray-Curtis
otu_permANOVA_pval <- adonis(otu_dist ~ shared_otu$infection, shared_otu, perm=999)$aov.tab[[6]][1]
otu_permANOVA_pval <- as.character(round(otu_permANOVA_pval, 3))
rm(otu_dist, shared_otu)
```
#### Amino-acid metabolism by *C. difficile* appears important for sustained colonization across susceptible environments.
*C. difficile*'s ability to metabolize amino acids via Stickland fermentation may be a critical nutrient niche that enables it to colonize some perturbed communities [@Fletcher2018]. We were curious whether this behavior was conserved across multiple distinct gut environments where *C. difficile* was able to colonize. We assessed the changes between the antibiotic-pretreated, mock-infected microbiomes and those of untreated, *C. difficile*-resistant animals. Not only were the relative abundances of Stickland fermentation substrates increased across susceptible environments, but several secondary bile acids, which have been shown to be negatively correlated with *C. difficile* susceptibility were significantly decreased (Fig. S1D; *p* < `r metabolome_sus_pval`). Additionally, when we constructed a Random Forest classification model to differentiate the groups, we identified multiple members of the *Clostridia* which are capable of metabolizing amino acids for growth [@Dai2015]. The relative abundances of these populations were significantly lower in susceptible animals (Fig. S1B; *p* < `r otu_sus_pval`). We also performed a similar analysis to investigate changes induced by *C. difficile* colonization itself in these susceptible conditions. Although CDI alone did not induce significant shifts in the global community structure or metabolome (Fig. S2 A & C; *p* = `r otu_permANOVA_pval`, `r metabolome_permANOVA_pval`), several features were able to discriminate infected and uninfected microbiomes with high accuracy. This analysis highlighted numerous growth substrates that are known for *C. difficile* in all pretreated mice including 6 Stickland substrates, 4 of which were proline conjugates, along with arabonate/xlyonate (Fig. S2D). Furthermore 5-aminovalerate, the most common end product of Stickland fermentation, was significantly increased during infection in almost all of the metabolomes. Inspection of these specific metabolites revealed that clindamycin pretreatment was only condition where both the inputs and outputs of Stickland fermentation were less abundant relative to the untreated mice (Fig. S3).
```{r figure_3, echo=F, warning=F, message=F, results='hide', cache=F}
# Read in data
# Normalized Metatranscriptomes
cef_normalized_reads <- read.delim('data/read_mapping/cef_normalized_metaT.tsv', sep='\t', header=TRUE, row.names=7)
clinda_normalized_reads <- read.delim('data/read_mapping/clinda_normalized_metaT.tsv', sep='\t', header=TRUE, row.names=7)
strep_normalized_reads <- read.delim('data/read_mapping/strep_normalized_metaT.tsv', sep='\t', header=TRUE, row.names=7)
# KEGG organism file
kegg_tax <- read.delim('data/kegg/kegg_taxonomy.tsv', sep='\t', header=TRUE)
kegg_tax[] <- lapply(kegg_tax, as.character)
# 16S data
genus_shared <- read.delim('data/16S_analysis/all_treatments.0.03.unique_list.0.03.filter.0.03.subsample.shared', sep='\t', header=TRUE, row.names=2)
genus_shared$label <- NULL
genus_shared$numOtus <- NULL
genus_tax <- read.delim('data/16S_analysis/all_treatments.genus.final.taxonomy', sep='\t', header=TRUE)
rownames(genus_tax) <- genus_tax$OTU
genus_tax$OTU <- NULL
# Metadata
metadata <- read.delim('data/metadata.tsv', sep='\t', header=TRUE, row.names=1)
# Format transcription
cef_normalized_reads$gene <- NULL
cef_normalized_reads[,c(1:2)] <- log2(cef_normalized_reads[,c(1:2)] + 1)
clinda_normalized_reads$gene <- NULL
clinda_normalized_reads[,c(1:2)] <- log2(clinda_normalized_reads[,c(1:2)] + 1)
strep_normalized_reads$gene <- NULL
strep_normalized_reads[,c(1:2)] <- log2(strep_normalized_reads[,c(1:2)] + 1)
# Remove those genes without an organism annotation
cef_annotated <- cef_normalized_reads[!is.na(cef_normalized_reads$organism),]
clinda_annotated <- clinda_normalized_reads[!is.na(clinda_normalized_reads$organism),]
strep_annotated <- strep_normalized_reads[!is.na(strep_normalized_reads$organism),]
rm(cef_normalized_reads, clinda_normalized_reads, strep_normalized_reads)
# Remove any C. difficile genes with transcription only in infected
cdiff_omit <- c('cdf','pdc','cdc','cdl','pdf')
cef_annotated <- subset(cef_annotated, !(cef_annotated$organism %in% cdiff_omit))
clinda_annotated <- subset(clinda_annotated, !(clinda_annotated$organism %in% cdiff_omit))
strep_annotated <- subset(strep_annotated, !(strep_annotated$organism %in% cdiff_omit))
rm(cdiff_omit)
# Remove all possible mammalian genes
mamm_omit <- c('fab','cfa','ggo','hgl','hsa','mcc','mdo','pon','aml',
'ptr','rno','shr','ssc','aml','bta','cge','ecb',
'pps','fca','mmu','oaa','gga','ola','acs','aga')
strep_annotated <- subset(strep_annotated, !(strep_annotated$organism %in% mamm_omit))
cef_annotated <- subset(cef_annotated, !(cef_annotated$organism %in% mamm_omit))
clinda_annotated <- subset(clinda_annotated, !(clinda_annotated$organism %in% mamm_omit))
rm(mamm_omit)
# Calculate correlation coefficients
strep_corr <- as.character(round(cor.test(strep_annotated[,2], strep_annotated[,1], method='spearman', exact=FALSE)$estimate, digits=3))
cef_corr <- as.character(round(cor.test(cef_annotated[,2], cef_annotated[,1], method='spearman', exact=FALSE)$estimate, digits=3))
clinda_corr <- as.character(round(cor.test(clinda_annotated[,2], clinda_annotated[,1], method='spearman', exact=FALSE)$estimate, digits=3))
# Collate transcript abundances at genus-level
genus_shared <- t(genus_shared)
genus_tax_shared <- merge(genus_tax, genus_shared, by='row.names')
genus_tax_shared$Row.names <- NULL
genus_tax_agg_shared <- aggregate(. ~ Genus, data=genus_tax_shared, FUN=sum)
rownames(genus_tax_agg_shared) <- genus_tax_agg_shared$Genus
genus_tax_agg_shared$Genus <- NULL
genus_tax_agg_shared <- t(genus_tax_agg_shared)
rm(genus_tax, genus_shared, genus_tax_shared)
# Combine with metadata and collate within treatment
genus_tax_agg_metadata_shared <- merge(metadata, genus_tax_agg_shared, by='row.names')
genus_tax_agg_metadata_shared$Row.names <- NULL
genus_tax_agg_metadata_shared$cage <- NULL
genus_tax_agg_metadata_shared$mouse <- NULL
genus_tax_agg_metadata_shared$gender <- NULL
genus_tax_agg_metadata_shared$type <- NULL
genus_tax_agg_metadata_shared$susceptibility <- NULL
genus_tax_agg_metadata_shared$infection <- NULL
genus_tax_agg_metadata_shared <- subset(genus_tax_agg_metadata_shared, genus_tax_agg_metadata_shared$abx %in% c('cefoperazone','clindamycin','streptomycin'))
genus_tax_agg_metadata_shared <- aggregate(. ~ abx, data=genus_tax_agg_metadata_shared, FUN=median)
rownames(genus_tax_agg_metadata_shared) <- genus_tax_agg_metadata_shared$abx
genus_tax_agg_metadata_shared$abx <- NULL
genus_tax_agg_metadata_shared <- (genus_tax_agg_metadata_shared / rowSums(genus_tax_agg_metadata_shared)) * 100
genus_tax_agg_metadata_shared <- as.data.frame(t(genus_tax_agg_metadata_shared))
cef_genus <- genus_tax_agg_metadata_shared
cef_genus$clindamycin <- NULL
cef_genus$streptomycin <- NULL
clinda_genus <- genus_tax_agg_metadata_shared
clinda_genus$cefoperazone <- NULL
clinda_genus$streptomycin <- NULL
strep_genus <- genus_tax_agg_metadata_shared
strep_genus$clindamycin <- NULL
strep_genus$cefoperazone <- NULL
rm(genus_tax_agg_shared, genus_tax_agg_metadata_shared, metadata)
# Using previously defined lines, find outliers to y = x
strep_630_outliers <- subset(strep_annotated, strep_annotated$strep_630_metaT_reads > strep_annotated$strep_mock_metaT_reads + 2)
strep_mock_outliers <- subset(strep_annotated, strep_annotated$strep_mock_metaT_reads > strep_annotated$strep_630_metaT_reads + 2)
cef_630_outliers <- subset(cef_annotated, cef_annotated$cef_630_metaT_reads > cef_annotated$cef_mock_metaT_reads + 2)
cef_mock_outliers <- subset(cef_annotated, cef_annotated$cef_mock_metaT_reads > cef_annotated$cef_630_metaT_reads + 2)
clinda_630_outliers <- clinda_annotated[(clinda_annotated$clinda_630_metaT_reads > clinda_annotated$clinda_mock_metaT_reads + 2),]
clinda_mock_outliers <- clinda_annotated[(clinda_annotated$clinda_mock_metaT_reads > clinda_annotated$clinda_630_metaT_reads + 2),]
rm(strep_annotated, cef_annotated, clinda_annotated)
# Calculate mean distance of outliers to x=y
dists_630 <- c()
for (i in 1:nrow(strep_630_outliers)){
dists_630[i] <- dist_xy(c(strep_630_outliers$strep_630_metaT_reads[i], strep_630_outliers$strep_mock_metaT_reads[i]))
}
dists_mock <- c()
for (i in 1:nrow(strep_mock_outliers)){
dists_mock[i] <- dist_xy(c(strep_mock_outliers$strep_630_metaT_reads[i], strep_mock_outliers$strep_mock_metaT_reads[i]))
}
strep_mean_dist <- as.character(round(mean(c(dists_630, dists_mock)), 3))
strep_outs <- as.character(as.numeric(length(c(dists_630, dists_mock))))
dists_630 <- c()
for (i in 1:nrow(cef_630_outliers)){
dists_630[i] <- dist_xy(c(cef_630_outliers$cef_630_metaT_reads[i], cef_630_outliers$cef_mock_metaT_reads[i]))
}
dists_mock <- c()
for (i in 1:nrow(cef_mock_outliers)){
dists_mock[i] <- dist_xy(c(cef_mock_outliers$cef_630_metaT_reads[i], cef_mock_outliers$cef_mock_metaT_reads[i]))
}
cef_mean_dist <- as.character(round(mean(c(dists_630, dists_mock)), 3))
cef_outs <- as.character(as.numeric(length(c(dists_630, dists_mock))))
dists_630 <- c()
for (i in 1:nrow(clinda_630_outliers)){
dists_630[i] <- dist_xy(c(clinda_630_outliers$clinda_630_metaT_reads[i], clinda_630_outliers$clinda_mock_metaT_reads[i]))
}
dists_mock <- c()
for (i in 1:nrow(clinda_mock_outliers)){
dists_mock[i] <- dist_xy(c(clinda_mock_outliers$clinda_630_metaT_reads[i], clinda_mock_outliers$clinda_mock_metaT_reads[i]))
}
clinda_mean_dist <- as.character(round(mean(c(dists_630, dists_mock)), 3))
clinda_outs <- as.character(as.numeric(length(c(dists_630, dists_mock))))
rm(dists_630, dists_mock, i)
# Drop levels
strep_630_outliers[] <- lapply(strep_630_outliers, as.character)
strep_mock_outliers[] <- lapply(strep_mock_outliers, as.character)
cef_630_outliers[] <- lapply(cef_630_outliers, as.character)
cef_mock_outliers[] <- lapply(cef_mock_outliers, as.character)
clinda_630_outliers[] <- lapply(clinda_630_outliers, as.character)
clinda_mock_outliers[] <- lapply(clinda_mock_outliers, as.character)
# Save KEGG ID names
strep_630_outliers$kegg_id <- rownames(strep_630_outliers)
strep_mock_outliers$kegg_id <- rownames(strep_mock_outliers)
cef_630_outliers$kegg_id <- rownames(cef_630_outliers)
cef_mock_outliers$kegg_id <- rownames(cef_mock_outliers)
clinda_630_outliers$kegg_id <- rownames(clinda_630_outliers)
clinda_mock_outliers$kegg_id <- rownames(clinda_mock_outliers)
# Merge with KEGG taxonomy
strep_630_outliers <- merge(x=strep_630_outliers, y=kegg_tax, by.x='organism', by.y='org_code', all.x=TRUE)
strep_630_outliers$org_code <- NULL
strep_630_outliers$strep_630_metaT_reads <- as.numeric(strep_630_outliers$strep_630_metaT_reads)
strep_630_outliers$strep_mock_metaT_reads <- as.numeric(strep_630_outliers$strep_mock_metaT_reads)
strep_mock_outliers <- merge(x=strep_mock_outliers, y=kegg_tax, by.x='organism', by.y='org_code', all.x=TRUE)
strep_mock_outliers$org_code <- NULL
strep_mock_outliers$strep_630_metaT_reads <- as.numeric(strep_mock_outliers$strep_630_metaT_reads)
strep_mock_outliers$strep_mock_metaT_reads <- as.numeric(strep_mock_outliers$strep_mock_metaT_reads)
cef_630_outliers <- merge(x=cef_630_outliers, y=kegg_tax, by.x='organism', by.y='org_code', all.x=TRUE)
cef_630_outliers$org_code <- NULL
cef_630_outliers$cef_630_metaT_reads <- as.numeric(cef_630_outliers$cef_630_metaT_reads)
cef_630_outliers$cef_mock_metaT_reads <- as.numeric(cef_630_outliers$cef_mock_metaT_reads)
cef_mock_outliers <- merge(x=cef_mock_outliers, y=kegg_tax, by.x='organism', by.y='org_code', all.x=TRUE)
cef_mock_outliers$org_code <- NULL
cef_mock_outliers$cef_630_metaT_reads <- as.numeric(cef_mock_outliers$cef_630_metaT_reads)
cef_mock_outliers$cef_mock_metaT_reads <- as.numeric(cef_mock_outliers$cef_mock_metaT_reads)
clinda_630_outliers <- merge(x=clinda_630_outliers, y=kegg_tax, by.x='organism', by.y='org_code', all.x=TRUE)
clinda_630_outliers$org_code <- NULL
clinda_630_outliers$clinda_630_metaT_reads <- as.numeric(clinda_630_outliers$clinda_630_metaT_reads)
clinda_630_outliers$clinda_mock_metaT_reads <- as.numeric(clinda_630_outliers$clinda_mock_metaT_reads)
clinda_mock_outliers <- merge(x=clinda_mock_outliers, y=kegg_tax, by.x='organism', by.y='org_code', all.x=TRUE)
clinda_mock_outliers$org_code <- NULL
clinda_mock_outliers$clinda_630_metaT_reads <- as.numeric(clinda_mock_outliers$clinda_630_metaT_reads)
clinda_mock_outliers$clinda_mock_metaT_reads <- as.numeric(clinda_mock_outliers$clinda_mock_metaT_reads)
rm(kegg_tax)
# Find difference in expression for outliers
colnames(strep_630_outliers) <- colnames(strep_mock_outliers)
strep_outliers <- rbind(strep_630_outliers, strep_mock_outliers)
strep_difference <- abs(((2^strep_outliers$strep_mock_metaT_reads) - 1) - ((2^strep_outliers$strep_630_metaT_reads) - 1))
strep_difference <- as.data.frame(cbind(strep_outliers$genus, strep_difference))
colnames(strep_difference) <- c('genus', 'difference')
strep_gene_count <- as.data.frame(table(strep_difference$genus))
colnames(strep_gene_count) <- c('genus', 'count')
strep_difference <- aggregate(. ~ genus, data=strep_difference, FUN=sum)
strep_difference <- merge(strep_difference, strep_gene_count, by='genus')
strep_difference$difference <- strep_difference$difference / strep_difference$count
strep_difference$count <- NULL
rm(strep_gene_count, strep_630_outliers, strep_mock_outliers)
strep_difference$difference <- log2(as.numeric(as.character(strep_difference$difference)) + 1)
strep_genus_diff <- merge(strep_genus, strep_difference, by.x='row.names', by.y='genus')
rownames(strep_genus_diff) <- strep_genus_diff$Row.names
strep_genus_diff$Row.names <- NULL
colnames(strep_genus_diff) <- c('relAbund','transcriptChange')
rm(strep_difference, strep_genus)
colnames(cef_630_outliers) <- colnames(cef_mock_outliers)
cef_outliers <- rbind(cef_630_outliers, cef_mock_outliers)
cef_difference <- abs(((2^cef_outliers$cef_mock_metaT_reads) - 1) - ((2^cef_outliers$cef_630_metaT_reads) - 1))
cef_difference <- as.data.frame(cbind(cef_outliers$genus, cef_difference))
colnames(cef_difference) <- c('genus', 'difference')
cef_gene_count <- as.data.frame(table(cef_difference$genus))
colnames(cef_gene_count) <- c('genus', 'count')
cef_difference <- aggregate(. ~ genus, data=cef_difference, FUN=sum)
cef_difference <- merge(cef_difference, cef_gene_count, by='genus')
cef_difference$difference <- cef_difference$difference / cef_difference$count
cef_difference$count <- NULL
rm(cef_gene_count, cef_630_outliers, cef_mock_outliers)
cef_difference$difference <- log2(as.numeric(as.character(cef_difference$difference)) + 1)
cef_genus_diff <- merge(cef_genus, cef_difference, by.x='row.names', by.y='genus')
rownames(cef_genus_diff) <- cef_genus_diff$Row.names
cef_genus_diff$Row.names <- NULL
colnames(cef_genus_diff) <- c('relAbund','transcriptChange')
rm(cef_difference, cef_genus)
colnames(clinda_630_outliers) <- colnames(clinda_mock_outliers)
clinda_outliers <- rbind(clinda_630_outliers, clinda_mock_outliers)
clinda_difference <- abs(((2^clinda_outliers$clinda_mock_metaT_reads) - 1) - ((2^clinda_outliers$clinda_630_metaT_reads) - 1))
clinda_difference <- as.data.frame(cbind(clinda_outliers$genus, clinda_difference))
colnames(clinda_difference) <- c('genus', 'difference')
clinda_gene_count <- as.data.frame(table(clinda_difference$genus))
colnames(clinda_gene_count) <- c('genus', 'count')
clinda_difference <- aggregate(. ~ genus, data=clinda_difference, FUN=sum)
clinda_difference <- merge(clinda_difference, clinda_gene_count, by='genus')
clinda_difference$difference <- clinda_difference$difference / clinda_difference$count
clinda_difference$count <- NULL
rm(clinda_gene_count, clinda_630_outliers, clinda_mock_outliers)
clinda_difference$difference <- log2(as.numeric(as.character(clinda_difference$difference)) + 1)
clinda_genus_diff <- merge(clinda_genus, clinda_difference, by.x='row.names', by.y='genus')
rownames(clinda_genus_diff) <- clinda_genus_diff$Row.names
clinda_genus_diff$Row.names <- NULL
colnames(clinda_genus_diff) <- c('relAbund','transcriptChange')
rm(clinda_difference, clinda_genus)
rm(strep_outliers, cef_outliers, clinda_outliers)
# Spearman correlation of 16S and metatransctiptomic change
strep_16S_r <- as.character(round(cor.test(strep_genus_diff$transcriptChange, strep_genus_diff$relAbund, method='spearman', exact=FALSE)$estimate, 3))
strep_16S_p <- as.character(round(cor.test(strep_genus_diff$transcriptChange, strep_genus_diff$relAbund, method='spearman', exact=FALSE)$p.value, 3))
cef_16S_r <- as.character(round(cor.test(cef_genus_diff$transcriptChange, cef_genus_diff$relAbund, method='spearman', exact=FALSE)$estimate, 3))
cef_16S_p <- as.character(round(cor.test(cef_genus_diff$transcriptChange, cef_genus_diff$relAbund, method='spearman', exact=FALSE)$p.value, 3))
clinda_16S_r <- as.character(round(cor.test(clinda_genus_diff$transcriptChange, clinda_genus_diff$relAbund, method='spearman', exact=FALSE)$estimate, 3))
clinda_16S_p <- as.character(round(cor.test(clinda_genus_diff$transcriptChange, clinda_genus_diff$relAbund, method='spearman', exact=FALSE)$p.value, 3))
rm(strep_genus_diff, cef_genus_diff, clinda_genus_diff)
```
#### Infection corresponded with larger shifts in the metatranscriptomes of communities that allowed sustained *C. difficile* colonization.
Despite the strong associations between bacterial community structure and the metabolome with colonization resistance, it was difficult to associate specific populations with changes in those metabolites that were associated with the duration of infection. To gain a more specific understanding of how the microbiota or *C. difficile* shaped the metabolic environment, we employed parallel metagenomic and metatranscriptomic shotgun sequencing of the samples collected from the cecal content of the mice used in the previous analyses. To achieve usable concentrations of bacterial mRNA after rRNA depletion, we had to pool the samples within each treatment and infection group. To establish confidence in the results of a pooled analysis, we calculated within-group sample variance among replicates using CFU, OTU relative abundance, and metabolomic relative abundance data (Table S3). These analyses revealed low levels of variance within control and experimental groups. Following sequencing, metagenomic reads from mock-infected cecal communities were assembled *de novo* into contigs and putative genes were identified resulting in 234,868 (streptomycin), 83,534 (cefoperazone), and 35,681 (clindamycin) open reading frames in each metagenome. Of these putative genes, 28.5% could be annotated to a known function based on the KEGG database, and many of these annotations were homologs to genes in species that were found in our dataset. Streptomycin pretreatment resulted in a significantly more diverse community than other groups based on 16S rRNA gene sequence data, so a more diverse metagenome was expected (Table S1). Supporting this prediction, 2408 unique functionally annotated genes were detected in the streptomycin pretreatment metagenome, at least 1163 more genes than were found in either the cefoperazone or clindamycin metagenomes (Fig. S4A-D). Metagenome-enabled mapping of the metatranscriptomic reads revealed that we were able to obtain informative depths of sequencing from across the metagenomic libraries (Fig. S4E-F). As expected, genes with any detectable transcript in any metatranscriptome were a subset of their corresponding metagenome. Metatranscriptomic read abundances were normalized to corresponding metagenomic coverage per gene to normalize for the abundance of the contributing bacterial taxa. This step was followed by a final subsampling of reads from each conditions to control for uneven sequencing effort and to identify genes with the largest changes in transcription relative to uninfected animals.
We hypothesized that the degree of change in the metatranscriptome corresponding with *C. difficile* colonization would reflect the shifts seen at in the metabolome. As disparate bacterial taxa possess vastly different metabolic capabilities and the antibiotic pretreatments induced distinct species profiles in each community, we tested our hypothesis by delineating the transcriptomic contributions of separate bacterial taxa within each metatranscriptome. Since many genes lack a specific functional annotation in KEGG but retain general taxonomic information, we continued the analysis at the genus level of classification for all genes contributed to each metagenome. Using this approach, we directly compared the normalized transcript abundances for each gene between infected and uninfected states for each antibiotic pretreatment and calculated the Spearman correlation to identify distinct patterns of transcription (Fig. 3). This resulted in `r strep_outs` genes that had an average distance from the center of `r strep_mean_dist` associated with streptomycin-pretreatment, `r cef_outs` genes at an average distance of `r cef_mean_dist` in cefoperazone-pretreatment, and only `r clinda_outs` genes at an average distance of `r clinda_mean_dist` with clindamycin-pretreatment. Overall, the clindamycin pretreatment was associated with the fewest transcription outliers between uninfected and infection conditions compared with those of the other antibiotic groups. This suggested that the degree to which the metatranscriptome was altered by infection corresponded to prolonged colonization.
This analysis also revealed that outlier genes originated in underrepresented genera. In streptomycin-pretreated mice, 937 genes belonging to *Lactobacillus* were increased in transcription during *C. difficile* infection; where *Lactobacillus* accounted for `r strep_lacto`% of the 16S rRNA gene sequences (Fig. 3A). In cefoperazone-pretreated mice, 2290 genes belonging to *Bacteroides* had lower transcription during *C. difficile* infection; *Bacteroides* accounted for `r cef_bacter`% of the 16S rRNA gene sequences (Fig. 3B). A consistent trend in streptomycin and cefoperazone-pretreated mice was an overrepresentation of highly transcribed genes from genera belonging to *Bacteroidetes* during mock infection. The metatransciptomes among mice from both of these pretreatment conditions poorly correlated between mock and infected conditions, indicating a high degree of change induced by *C. difficile* colonization (*R* = `r strep_corr` & *R* = `r cef_corr`). In clindamycin-pretreated mice the largest difference in transcription was for 510 *Lactobacillus* genes with increased transcription during CDI; *Lactobacillus* accounted for `r clinda_lacto`% of the 16S rRNA gene sequences (Fig. 3C). Infected and uninfected metatranscriptomes from mice pretreated with clindamycin were more strongly correlated with each other than either of the other pretreatments (*R* = `r clinda_corr`). This suggests that although *C. difficile* altered the streptomycin and cefoperazone-pretreated communities in which it was able to remain stably colonized, it had minimal impact on the clindamycin-pretreated community in which it was not able to remain colonized.
```{r figure_4, echo=F, warning=F, message=F, results='hide', cache=F}
# Metadata
metadata <- read.delim('data/metadata.tsv', sep='\t', header=TRUE, row.names=1)
# Normalized Metatranscriptomes
cef_normalized_reads <- read.delim('data/read_mapping/cef_normalized_metaT.tsv', sep='\t', header=TRUE, row.names=7)
clinda_normalized_reads <- read.delim('data/read_mapping/clinda_normalized_metaT.tsv', sep='\t', header=TRUE, row.names=7)
strep_normalized_reads <- read.delim('data/read_mapping/strep_normalized_metaT.tsv', sep='\t', header=TRUE, row.names=7)
# KEGG organism file
kegg_tax <- read.delim('data/kegg/kegg_taxonomy.tsv', sep='\t', header=TRUE)
kegg_tax[] <- lapply(kegg_tax, as.character)
# 16S data
genus_shared <- read.delim('data/16S_analysis/all_treatments.0.03.unique_list.0.03.filter.0.03.subsample.shared', sep='\t', header=TRUE, row.names=2)
genus_shared$label <- NULL
genus_shared$numOtus <- NULL
genus_tax <- read.delim('data/16S_analysis/all_treatments.genus.final.taxonomy', sep='\t', header=TRUE)
rownames(genus_tax) <- genus_tax$OTU
genus_tax$OTU <- NULL
# Taxonomy colors
tax_colors <- read.delim('data/taxonomy_color.tsv', sep='\t', header=TRUE)
tax_colors[] <- lapply(tax_colors, as.character)
# Format transcription
cef_normalized_reads$gene <- NULL
cef_normalized_reads[,c(1:2)] <- log2(cef_normalized_reads[,c(1:2)] + 1)
clinda_normalized_reads$gene <- NULL
clinda_normalized_reads[,c(1:2)] <- log2(clinda_normalized_reads[,c(1:2)] + 1)
strep_normalized_reads$gene <- NULL
strep_normalized_reads[,c(1:2)] <- log2(strep_normalized_reads[,c(1:2)] + 1)
# Remove those genes without an organism annotation
cef_annotated <- cef_normalized_reads[!is.na(cef_normalized_reads$organism),]
clinda_annotated <- clinda_normalized_reads[!is.na(clinda_normalized_reads$organism),]
strep_annotated <- strep_normalized_reads[!is.na(strep_normalized_reads$organism),]
# Remove any C. difficile genes with transcription only in infected
cdiff_omit <- c('cdf','pdc','cdc','cdl','pdf')
cef_annotated <- subset(cef_annotated, !(cef_annotated$organism %in% cdiff_omit))
clinda_annotated <- subset(clinda_annotated, !(clinda_annotated$organism %in% cdiff_omit))
strep_annotated <- subset(strep_annotated, !(strep_annotated$organism %in% cdiff_omit))
rm(cdiff_omit)
# Remove all possible mammalian genes
mamm_omit <- c('fab','cfa','ggo','hgl','hsa','mcc','mdo','pon','aml',
'ptr','rno','shr','ssc','aml','bta','cge','ecb',
'pps','fca','mmu','oaa','gga','ola','acs','aga')
strep_annotated <- subset(strep_annotated, !(strep_annotated$organism %in% mamm_omit))
cef_annotated <- subset(cef_annotated, !(cef_annotated$organism %in% mamm_omit))
clinda_annotated <- subset(clinda_annotated, !(clinda_annotated$organism %in% mamm_omit))
rm(mamm_omit)
# Calculate correlation coefficients
strep_corr <- as.character(round(cor.test(strep_annotated[,2], strep_annotated[,1], method='spearman', exact=FALSE)$estimate, digits=3))
cef_corr <- as.character(round(cor.test(cef_annotated[,2], cef_annotated[,1], method='spearman', exact=FALSE)$estimate, digits=3))
clinda_corr <- as.character(round(cor.test(clinda_annotated[,2], clinda_annotated[,1], method='spearman', exact=FALSE)$estimate, digits=3))
# Collate transcript abundances at genus-level
genus_shared <- t(genus_shared)
genus_tax_shared <- merge(genus_tax, genus_shared, by='row.names')
genus_tax_shared$Row.names <- NULL
genus_tax_agg_shared <- aggregate(. ~ Genus, data=genus_tax_shared, FUN=sum)
rownames(genus_tax_agg_shared) <- genus_tax_agg_shared$Genus
genus_tax_agg_shared$Genus <- NULL
genus_tax_agg_shared <- t(genus_tax_agg_shared)
rm(genus_tax, genus_shared, genus_tax_shared)
# Combine with metadata and collate within treatment
genus_tax_agg_metadata_shared <- merge(metadata, genus_tax_agg_shared, by='row.names')
genus_tax_agg_metadata_shared$Row.names <- NULL
genus_tax_agg_metadata_shared$cage <- NULL
genus_tax_agg_metadata_shared$mouse <- NULL
genus_tax_agg_metadata_shared$gender <- NULL
genus_tax_agg_metadata_shared$type <- NULL
genus_tax_agg_metadata_shared$susceptibility <- NULL
genus_tax_agg_metadata_shared$infection <- NULL
genus_tax_agg_metadata_shared <- subset(genus_tax_agg_metadata_shared, genus_tax_agg_metadata_shared$abx %in% c('cefoperazone','clindamycin','streptomycin'))
genus_tax_agg_metadata_shared <- aggregate(. ~ abx, data=genus_tax_agg_metadata_shared, FUN=median)
rownames(genus_tax_agg_metadata_shared) <- genus_tax_agg_metadata_shared$abx
genus_tax_agg_metadata_shared$abx <- NULL
genus_tax_agg_metadata_shared <- (genus_tax_agg_metadata_shared / rowSums(genus_tax_agg_metadata_shared)) * 100
genus_tax_agg_metadata_shared <- as.data.frame(t(genus_tax_agg_metadata_shared))
cef_genus <- genus_tax_agg_metadata_shared
cef_genus$clindamycin <- NULL
cef_genus$streptomycin <- NULL
clinda_genus <- genus_tax_agg_metadata_shared
clinda_genus$cefoperazone <- NULL
clinda_genus$streptomycin <- NULL
strep_genus <- genus_tax_agg_metadata_shared
strep_genus$clindamycin <- NULL
strep_genus$cefoperazone <- NULL
rm(genus_tax_agg_shared, genus_tax_agg_metadata_shared)
# Using previously defined lines, find outliers to y = x
strep_630_outliers <- subset(strep_annotated, strep_annotated$strep_630_metaT_reads > strep_annotated$strep_mock_metaT_reads + 2)
strep_mock_outliers <- subset(strep_annotated, strep_annotated$strep_mock_metaT_reads > strep_annotated$strep_630_metaT_reads + 2)
cef_630_outliers <- subset(cef_annotated, cef_annotated$cef_630_metaT_reads > cef_annotated$cef_mock_metaT_reads + 2)
cef_mock_outliers <- subset(cef_annotated, cef_annotated$cef_mock_metaT_reads > cef_annotated$cef_630_metaT_reads + 2)
clinda_630_outliers <- clinda_annotated[(clinda_annotated$clinda_630_metaT_reads > clinda_annotated$clinda_mock_metaT_reads + 2),]
clinda_mock_outliers <- clinda_annotated[(clinda_annotated$clinda_mock_metaT_reads > clinda_annotated$clinda_630_metaT_reads + 2),]
# Remove outliers from the rest of the genes
strep_annotated <- strep_annotated[!row.names(strep_annotated) %in% row.names(strep_630_outliers), ]
strep_annotated <- strep_annotated[!row.names(strep_annotated) %in% row.names(strep_mock_outliers), ]
cef_annotated <- cef_annotated[!row.names(cef_annotated) %in% row.names(cef_630_outliers), ]
cef_annotated <- cef_annotated[!row.names(cef_annotated) %in% row.names(cef_mock_outliers), ]
clinda_annotated <- clinda_annotated[!row.names(clinda_annotated) %in% row.names(clinda_630_outliers), ]
clinda_annotated <- clinda_annotated[!row.names(clinda_annotated) %in% row.names(clinda_mock_outliers), ]
# Calculate mean distance of outliers to x=y
dists_630 <- c()
for (i in 1:nrow(strep_630_outliers)){
dists_630[i] <- dist_xy(c(strep_630_outliers$strep_630_metaT_reads[i], strep_630_outliers$strep_mock_metaT_reads[i]))
}
dists_mock <- c()
for (i in 1:nrow(strep_mock_outliers)){
dists_mock[i] <- dist_xy(c(strep_mock_outliers$strep_630_metaT_reads[i], strep_mock_outliers$strep_mock_metaT_reads[i]))
}
round(mean(c(dists_630, dists_mock)), 3)
as.numeric(length(c(dists_630, dists_mock)))
dists_630 <- c()
for (i in 1:nrow(cef_630_outliers)){
dists_630[i] <- dist_xy(c(cef_630_outliers$cef_630_metaT_reads[i], cef_630_outliers$cef_mock_metaT_reads[i]))
}
dists_mock <- c()
for (i in 1:nrow(cef_mock_outliers)){
dists_mock[i] <- dist_xy(c(cef_mock_outliers$cef_630_metaT_reads[i], cef_mock_outliers$cef_mock_metaT_reads[i]))
}
round(mean(c(dists_630, dists_mock)), 3)
as.numeric(length(c(dists_630, dists_mock)))
dists_630 <- c()
for (i in 1:nrow(clinda_630_outliers)){
dists_630[i] <- dist_xy(c(clinda_630_outliers$clinda_630_metaT_reads[i], clinda_630_outliers$clinda_mock_metaT_reads[i]))
}
dists_mock <- c()
for (i in 1:nrow(clinda_mock_outliers)){
dists_mock[i] <- dist_xy(c(clinda_mock_outliers$clinda_630_metaT_reads[i], clinda_mock_outliers$clinda_mock_metaT_reads[i]))
}
round(mean(c(dists_630, dists_mock)), 3)
as.numeric(length(c(dists_630, dists_mock)))
rm(dists_630, dists_mock)
# Break down outliers into taxonomic groups
# Drop levels
strep_630_outliers[] <- lapply(strep_630_outliers, as.character)
strep_mock_outliers[] <- lapply(strep_mock_outliers, as.character)
cef_630_outliers[] <- lapply(cef_630_outliers, as.character)
cef_mock_outliers[] <- lapply(cef_mock_outliers, as.character)
clinda_630_outliers[] <- lapply(clinda_630_outliers, as.character)
clinda_mock_outliers[] <- lapply(clinda_mock_outliers, as.character)
strep_annotated[] <- lapply(strep_annotated, as.character)
cef_annotated[] <- lapply(cef_annotated, as.character)
clinda_annotated[] <- lapply(clinda_annotated, as.character)
# Save KEGG ID names
strep_630_outliers$kegg_id <- rownames(strep_630_outliers)
strep_mock_outliers$kegg_id <- rownames(strep_mock_outliers)
cef_630_outliers$kegg_id <- rownames(cef_630_outliers)
cef_mock_outliers$kegg_id <- rownames(cef_mock_outliers)
clinda_630_outliers$kegg_id <- rownames(clinda_630_outliers)
clinda_mock_outliers$kegg_id <- rownames(clinda_mock_outliers)
strep_annotated$kegg_id <- rownames(strep_annotated)