-
Notifications
You must be signed in to change notification settings - Fork 3
/
02a_i_caseControl_regionalSpecific_probe_differences.R
213 lines (183 loc) · 9.37 KB
/
02a_i_caseControl_regionalSpecific_probe_differences.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
#### Modeling methylation changes
library(minfi)
library(limma)
setwd('/dcl01/lieber/ajaffe/Steve/Alz/Paper')
load('rdas/cleanSamples_n377_processed_data_postfiltered.rda')
### drop probes that do not map to hg38
load('/dcl01/lieber/ajaffe/Steve/meth450k_annotation_hg38/hg38_out/rdas/goldset_GencodeAnnotation_subset.rda') #load hg38 position annotation
drop_hg38_unmappable = which(!rownames(bVals) %in% goldset$Name)
#7966
length(drop_hg38_unmappable)
###
bVals <- bVals[-drop_hg38_unmappable, ]
goldsetSub <- goldset[match(rownames(bVals),goldset$Name), ]
pd$DxOrdinal= as.numeric(factor(pd$DxOrdinal, levels=c("Control", "Alz Drop", "Alz Keep") ))
###### All region analysis: Main models ######
###### Keep within region analysis ######
####---DLPFC---####
regionIndex=which(pd$Region=="DLPFC" & pd$keepList)
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_DLPFC <- topTable(fitEb, num=Inf, coef=2,genelist=goldsetSub, confint=TRUE)
table(subStats_DLPFC$adj.P.Val<0.10)
colnames(subStats_DLPFC)[40:47] <- paste0("DLPFC_subset_NoAdj_",colnames(subStats_DLPFC)[40:47])
####---Hippo---####
regionIndex=which(pd$Region=="HIPPO" & pd$keepList )
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_HIPPO <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(subStats_HIPPO$adj.P.Val<0.10)
colnames(subStats_HIPPO) <- paste0("HIPPO_subset_NoAdj_",colnames(subStats_HIPPO))
####---ERC---####
regionIndex=which(pd$Region=="ERC" & pd$keepList)
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_ERC <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(subStats_ERC$adj.P.Val<0.10)
colnames(subStats_ERC) <- paste0("ERC_subset_NoAdj_",colnames(subStats_ERC))
####---CRB---####
regionIndex=which(pd$Region=="CRB" & pd$keepList)
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_CRB <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(subStats_CRB$adj.P.Val<0.10)
colnames(subStats_CRB) <- paste0("CRB_subset_NoAdj_",colnames(subStats_CRB))
###### Ordinal model region analysis ######
####---DLPFC---####
regionIndex=which(pd$Region=="DLPFC")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_DLPFC <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_DLPFC$adj.P.Val<0.10)
colnames(ordStats_DLPFC) <- paste0("DLPFC_ordinal_NoAdj_",colnames(ordStats_DLPFC))
####---Hippo---####
regionIndex=which(pd$Region=="HIPPO")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_HIPPO <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_HIPPO$adj.P.Val<0.10)
colnames(ordStats_HIPPO) <- paste0("HIPPO_ordinal_NoAdj_",colnames(ordStats_HIPPO))
####---ERC---####
regionIndex=which(pd$Region=="ERC")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_ERC <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_ERC$adj.P.Val<0.10)
colnames(ordStats_ERC) <- paste0("ERC_ordinal_NoAdj_",colnames(ordStats_ERC))
####---CRB---####
regionIndex=which(pd$Region=="CRB")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_CRB <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_CRB$adj.P.Val<0.10)
colnames(ordStats_CRB) <- paste0("CRB_ordinal_NoAdj_",colnames(ordStats_CRB))
####---Subset Sensitivity---####
####---DLPFC---####
regionIndex=which(pd$Region=="DLPFC" & pd$keepList)
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1+NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_DLPFC_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(subStats_DLPFC_adj$adj.P.Val<0.10)
colnames(subStats_DLPFC_adj) <- paste0("DLPFC_subset_cellTypeAdj_",colnames(subStats_DLPFC_adj))
####---Hippo---####
regionIndex=which(pd$Region=="HIPPO" & pd$keepList )
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1+NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_HIPPO_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(subStats_HIPPO_adj$adj.P.Val<0.10)
colnames(subStats_HIPPO_adj) <- paste0("HIPPO_subset_cellTypeAdj_",colnames(subStats_HIPPO_adj))
####---ERC---####
regionIndex=which(pd$Region=="ERC" & pd$keepList)
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1 + NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_ERC_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(subStats_ERC_adj$adj.P.Val<0.10)
colnames(subStats_ERC_adj) <- paste0("ERC_subset_cellTypeAdj_",colnames(subStats_ERC_adj))
####---CRB---####
regionIndex=which(pd$Region=="CRB" & pd$keepList)
mod <- model.matrix(~Dx+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1 + NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
subStats_CRB_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(subStats_CRB_adj$adj.P.Val<0.10)
colnames(subStats_CRB_adj) <- paste0("CRB_subset_cellTypeAdj_",colnames(subStats_CRB_adj))
###### Ordinal model region analysis ######
####---DLPFC---####
regionIndex=which(pd$Region=="DLPFC")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1 + NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_DLPFC_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_DLPFC_adj$adj.P.Val<0.10)
colnames(ordStats_DLPFC_adj) <- paste0("DLPFC_ordinal_cellTypeAdj_",colnames(ordStats_DLPFC_adj))
####---Hippo---####
regionIndex=which(pd$Region=="HIPPO")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1 + NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_HIPPO_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_HIPPO_adj$adj.P.Val<0.10)
colnames(ordStats_HIPPO_adj) <- paste0("HIPPO_ordinal_cellTypeAdj_",colnames(ordStats_HIPPO_adj))
####---ERC---####
regionIndex=which(pd$Region=="ERC")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1 + NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_ERC_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_ERC_adj$adj.P.Val<0.10)
colnames(ordStats_ERC_adj) <- paste0("ERC_ordinal_cellTypeAdj_",colnames(ordStats_ERC_adj))
####---CRB---####
regionIndex=which(pd$Region=="CRB")
mod <- model.matrix(~DxOrdinal+ negControl_PC1 + negControl_PC2 + Age+Sex + snpPC1 + NeuN_pos, data = pd[regionIndex,])
fit <- lmFit(bVals[,regionIndex], mod)
fitEb <- eBayes(fit)
ordStats_CRB_adj <- topTable(fitEb, num=Inf, coef=2, confint=TRUE)
table(ordStats_CRB_adj$adj.P.Val<0.10)
colnames(ordStats_CRB_adj) <- paste0("CRB_ordinal_cellTypeAdj_",colnames(ordStats_CRB_adj))
############## Merge all these tests ############
baseRownames = rownames(subStats_DLPFC)
regionSpecific_mergedStats=
cbind(subStats_DLPFC[baseRownames, ],
subStats_HIPPO[baseRownames, ],
subStats_ERC[baseRownames, ],
subStats_CRB[baseRownames, ],
subStats_DLPFC_adj[baseRownames, ],
subStats_HIPPO_adj[baseRownames, ],
subStats_ERC_adj[baseRownames, ],
subStats_CRB_adj[baseRownames, ],
ordStats_DLPFC[baseRownames, ],
ordStats_HIPPO[baseRownames, ],
ordStats_ERC[baseRownames, ],
ordStats_CRB[baseRownames, ],
ordStats_DLPFC_adj[baseRownames, ],
ordStats_HIPPO_adj[baseRownames, ],
ordStats_ERC_adj[baseRownames, ],
ordStats_CRB_adj[baseRownames, ])
### Reorder columns
col_order = c(colnames(regionSpecific_mergedStats)[1:39],
grep("_adj.P.Val",colnames(regionSpecific_mergedStats),value=T),
grep("_logFC",colnames(regionSpecific_mergedStats),value=T),
grep("_P.Value",colnames(regionSpecific_mergedStats),value=T),
grep("_t$",colnames(regionSpecific_mergedStats),value=T,fixed=F),
grep("_AveExpr$",colnames(regionSpecific_mergedStats),value=T,fixed=F),
grep("_B$",colnames(regionSpecific_mergedStats),value=T,fixed=F),
grep("_CI",colnames(regionSpecific_mergedStats),value=T,fixed=F) )
regionSpecific_mergedStats=regionSpecific_mergedStats[,col_order]
#### define standard error as (R.CI-L.CI)/3.92 <- 1.96*2
ci = regionSpecific_mergedStats[,grep("CI",colnames(regionSpecific_mergedStats),v=T)]
split_col<-seq(1,ncol(ci),by=2)
se = mapply(function(x,y) { (y-x) / (qnorm(.975, mean = 0, sd = 1, log = FALSE) *2) } ,ci[,split_col],ci[,-split_col] )
colnames(se) <- gsub("CI.L","SE",colnames( se) )
regionSpecific_mergedStats= cbind(regionSpecific_mergedStats, se)
save(regionSpecific_mergedStats, file='rdas/merged_DMP_regionSpecific_caseControl_stats.rda')