-
Notifications
You must be signed in to change notification settings - Fork 0
/
Spleen_DESeq2
97 lines (74 loc) · 2.99 KB
/
Spleen_DESeq2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
library(DESeq2)
library(EnhancedVolcano)
library(ggplot2)
library(tidyverse)
intCounts <- read.table("GSE197868_Spleen_dataset.txt",header=TRUE,sep="\t")
head(intCounts)
rnaseqMatrix <- intCounts[,c(2:7)]
rownames(rnaseqMatrix) <- intCounts[,1]
colnames(rnaseqMatrix) <- c("Control_int1", "Control_int2", "Control_int3",
"RSV_int1", "RSV_int2", "RSV_int3")
head(rnaseqMatrix)
samples <- data.frame(matrix(c("Control_int1", "Control_int2", "Control_int3",
"RSV_int1", "RSV_int2", "RSV_int3", "Control",
"Control", "Control", "RSV", "RSV", "RSV"),ncol=2))
names(samples) <- c("ID","Treatment")
rownames(samples) <- samples[,1]
samples$Treatment <- factor(c("CTRL","CTRL","CTRL",
"RSV","RSV","RSV"))
deseq2Data <- DESeqDataSetFromMatrix(countData = rnaseqMatrix,
colData = samples,
design = ~ Treatment)
# Filter lowly expressed genes
dim(deseq2Data)
dim(deseq2Data[rowSums(counts(deseq2Data)) > 10, ])
# Perform pre-filtering of the data
deseq2Data <- deseq2Data[rowSums(counts(deseq2Data)) > 10, ]
# Run pipeline for differential expression steps
deseq2Data <- DESeq(deseq2Data)
res <- results(deseq2Data)
# rlog transform counts
rld <- rlog(deseq2Data, blind=FALSE)
rlogcounts <- data.frame(assay(deseq2Data))
rownames(rlogcounts) <- rownames(deseq2Data)
# PCA plot
p <- plotPCA(rld, intgroup=c("Treatment"), returnData=TRUE)
p
pdf("PCA_plot_spleen_samples.pdf")
plotPCA(rld, intgroup=c("Treatment"))
dev.off()
# Pairwise contrast
res_Control_RSV <- results(deseq2Data)
resOrdered_Control_RSV <- res_Control_RSV[order(res_Control_RSV$pvalue),]
# Get number of differentially expressed data at different thresholds
summary(res_Control_RSV$log2FoldChange)
sum(res_Control_RSV$padj < 0.05, na.rm=TRUE)
sum(res_Control_RSV$padj < 0.000000000000005, na.rm=TRUE)
sum(res_Control_RSV$pvalue < 0.05, na.rm=TRUE)
# MA plot
pdf("RSV_Spleen_MA_plot.pdf")
plotMA(res_Control_RSV)
dev.off()
# Plot the normalized expression data for the top gene
plotCounts(deseq2Data, gene=which.min(res_Control_RSV$padj), intgroup="Treatment")
# Volcano plot
plot(res_Control_RSV$log2FoldChange,-1*log2(res_Control_RSV$padj),pch=19)
# Save the results as a data frame
results_spl <- data.frame(res_Control_RSV) %>%
rownames_to_column(var="Gene") #Adds Gene to label rows
write.table(results_spl,"Spleen_Control_RSV_DESeq2.txt",sep="\t",row.names=FALSE)
#Creating a table of the 10 genes with highest padj
top_10_genes <- results_spl %>%
arrange(padj) %>%
pull(padj, Gene) %>%
head(n=10)
#Creating a dataframe w/ the top ten Padj genes
top_10_genes <- data.frame(top_10_genes)
colnames(top_10_genes) <- c("Padj")
rownames_to_column(top_10_genes, "Gene")
write.csv(top_10_genes, file = "Top_10_genes_padj.csv")
#Creating a table of the 10 genes w/ lowest padj
bottom_10_genes <- results_spl %>%
arrange(padj) %>%
pull(Gene, padj) %>%
tail(n=10)