Skip to content

Commit

Permalink
Merge pull request #1 from dalmolingroup/upset-plot
Browse files Browse the repository at this point in the history
Add upset plot and network analysis
  • Loading branch information
jvfe authored Aug 23, 2023
2 parents 63030b3 + 375a0f4 commit c875ccc
Show file tree
Hide file tree
Showing 17 changed files with 5,423 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,4 @@ vignettes/*.pdf
# dataset
todo.md
functional_enrichment_files/
data/Homo_sapiens.GRCh38.97.chr_patch_hapl_scaff.gtf.gz
59 changes: 59 additions & 0 deletions R/stringdb.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Get interaction network from STRINGdb
get_string_network <-
function(ids,
species = "9606",
required_score = 0) {
ids_collapsed <- paste0(ids, collapse = "%0d")

jsonlite::fromJSON(
RCurl::postForm(
"https://string-db.org/api/json/network",
identifiers = ids_collapsed,
echo_query = "1",
required_score = as.character(required_score),
species = species
),
)
}

# Get identifiers from STRINGdb
get_string_ids <- function(ids, species = "9606") {
ids_collapsed <- paste0(ids, collapse = "%0d")

jsonlite::fromJSON(
RCurl::postForm(
"https://string-db.org/api/json/get_string_ids",
identifiers = ids_collapsed,
echo_query = "1",
species = species
),
)
}

# Function to combine scores according to the STRINGdb algorithm
combinescores <- function(dat,
evidences = "all",
confLevel = 0.4) {
if (evidences[1] == "all") {
edat <- dat[, -c(1, 2, ncol(dat))]
} else {
if (!all(evidences %in% colnames(dat))) {
stop("NOTE: one or more 'evidences' not listed in 'dat' colnames!")
}
edat <- dat[, evidences]
}
if (any(edat > 1)) {
edat <- edat / 1000
}
edat <- 1 - edat
sc <- apply(
X = edat,
MARGIN = 1,
FUN = function(x)
1 - prod(x)
)
dat <- cbind(dat[, c(1, 2)], combined_score = sc)
idx <- dat$combined_score >= confLevel
dat <- dat[idx, ]
return(dat)
}
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ The results of the different DGEs were then used for a meta-analysis.
- [RNA-Seq](./reports/RNASeq.html) - [Source](./analysis/RNASeq.Rmd)
- [GSE107385](./reports/GSE107385.html) - [Source](./analysis/GSE107385.Rmd)
- [Select Genes](./reports/select_genes.html) - [Source](./analysis/select_genes.Rmd)
- [Assemble Network](./reports/assemble_network.html) - [Source](./analysis/assemble_network.Rmd)

## Repository Structure

Expand All @@ -28,6 +29,10 @@ The results of the different DGEs were then used for a meta-analysis.
- DGE for the other two RNA-Seq datasets
- select_genes.Rmd
- Select genes associated to mitophagy, autophagy and mitochondrial biogenesis
- make_upset.R
- Script to generate UpSet plot with intersections between grouped processes.
- assemble_network.Rmd
- Assemble PPI network between metanalysis genes.
- `data`
- `Plataformas.csv`
- Original metadata for the three studies
Expand Down
184 changes: 184 additions & 0 deletions analysis/assemble_network.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
---
title: "Assemble Network"
author: "João Vitor F. Cavalcante"
date: "`r Sys.setlocale('LC_TIME', 'C'); format(Sys.time(), '%d %B, %Y')`"
knit: (function(inputFile, encoding) {
rmarkdown::render(inputFile, encoding = encoding, output_dir = "../reports/") })
output:
html_document:
toc: true
toc_float: true
toc_collapsed: false
theme:
bslib: true
bootswatch: minty
---

```{r knitr, include=FALSE}
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE
)
```

## PPI Network assembly from Metanalysis genes

We took all genes resulting from the metanalysis and assembled a PPI network for them by querying
STRINGdb v12.0.

Evidence channels used to gather the interactions were experimental, co-expression and databases. Said interactions were also filtered for a confidence level of 0.5.

### Load libraries and data

```{r setup}
# remotes::install_github("daniloimparato/easylayout", ref = "dadamorais")
library(easylayout)
library(tidygraph)
library(igraph)
library(ggraph)
library(dplyr)
library(tidyr)
library(vroom)
library(here)
source(here("R/stringdb.R"))
meta <- vroom(here("data/meta_analysis_genes.csv")) %>%
select(gene_symbol, MD, pvalor, `Grouped process`)
```

### Assemble Network

```{r assemble, eval=FALSE}
string_ids <- get_string_ids(meta$gene_symbol)
meta_merged <- meta %>%
left_join(string_ids %>% select(queryItem, stringId),
by = c("gene_symbol" = "queryItem")) %>%
mutate(stringId = stringr::str_remove(stringId, "9606.")) %>%
separate_rows(`Grouped process`, sep = " \\| ")
encoded_source <- meta_merged %>%
mutate(n = 1) %>%
pivot_wider(
id_cols = stringId,
names_from = `Grouped process`,
values_from = n,
values_fn = list(n = length),
values_fill = list(n = 0),
names_prefix = "From "
) %>%
mutate(source_count = starts_with("From ") %>% across %>% rowSums)
network <- get_string_network(string_ids$stringId)
network_separated <- network %>%
separate(stringId_A,
into = c("ncbi_taxon_id", "stringId_A"),
sep = "\\.") %>%
separate(stringId_B,
into = c("ncbi_taxon_id", "stringId_B"),
sep = "\\.")
nodelist <-
data.frame(node = unique(c(network_separated$stringId_A, network_separated$stringId_B))) %>%
left_join(meta_merged, by = c("node" = "stringId")) %>%
left_join(encoded_source, by = c("node" = "stringId")) %>%
distinct(node, gene_symbol, .keep_all = TRUE)
network_filtered <- network %>%
combinescores(.,
evidences = c("ascore", "escore", "dscore"),
confLevel = 0.5) %>%
separate(stringId_A,
into = c("ncbi_taxon_id", "stringId_A"),
sep = "\\.") %>%
separate(stringId_B,
into = c("ncbi_taxon_id", "stringId_B"),
sep = "\\.") %>%
dplyr::select(stringId_A, stringId_B)
```

### Compute Network Layout

```{r layout, eval=FALSE}
graph <-
graph_from_data_frame(network_filtered, directed = FALSE, vertices = nodelist)
layout <- easylayout::vivagraph(graph)
layout <- easylayout::vivagraph(graph, layout = layout, pin_nodes = TRUE, lcc_margin_left = 10)
V(graph)$x <- layout[, 1]
V(graph)$y <- layout[, 2]
save(graph, nodelist, file = here("results/full_network.rda"))
```

### Network Plots

#### By significance

```{r}
load(here("results/full_network.rda"))
graph_colored <- graph %>%
as_tbl_graph() %>%
activate(nodes) %>%
mutate(signif_md = ifelse(pvalor < 0.05, MD, NA))
ggraph(graph_colored,
"manual",
x = V(graph)$x,
y = V(graph)$y) +
geom_edge_link0(edge_width = 0.2, color = "#90909020") +
geom_node_point(aes(color = signif_md)) +
scale_colour_gradientn(colours = RColorBrewer::brewer.pal(9, "OrRd")) +
coord_fixed() +
theme_void() +
theme(
legend.key.size = unit(0.5, 'cm'),
legend.key.height = unit(0.5, 'cm'),
legend.key.width = unit(0.5, 'cm'),
legend.title = element_text(size=6),
legend.text = element_text(size=6),
plot.title = element_text(size = 4, face = "bold")
) +
labs(
color = "Expression"
)
ggsave(here("results/full_network.pdf"), width = 8, height = 8)
ggsave(here("results/full_network.png"), width = 8, height = 8, bg = "white")
```

#### By grouped process

```{r}
ggraph(graph, "manual", x = V(graph)$x, y = V(graph)$y) +
geom_edge_link0(color = "#90909020") +
scatterpie::geom_scatterpie(
cols = colnames(nodelist)[startsWith(colnames(nodelist), "From ")],
data = igraph::as_data_frame(graph, "vertices"),
colour = NA,
pie_scale = 0.2
) +
coord_fixed() +
theme_void() +
theme(
legend.key.size = unit(0.5, 'cm'),
legend.key.height = unit(0.5, 'cm'),
legend.key.width = unit(0.5, 'cm'),
legend.title = element_text(size=6),
legend.text = element_text(size=6),
legend.position = "bottom",
plot.title = element_text(size = 4, face = "bold")
) +
labs(
fill = "Source:"
)
ggsave(here("results/network_processes.pdf"), width = 8, height = 8)
ggsave(here("results/network_processes.png"), width = 8, height = 8, bg = "white")
```



73 changes: 73 additions & 0 deletions analysis/make_upset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
library(here)
library(readr)
library(dplyr)
library(tidyr)
library(UpSetR)

data <- read_csv(here("data/selected_genes_grouped.csv"), skip = 1) %>%
janitor::clean_names() %>%
dplyr::select(gene_symbol, grouped_process) %>%
distinct()

data_encoded_source <- data %>%
filter(!is.na(grouped_process)) %>%
mutate(n = 1) %>%
pivot_wider(
id_cols = gene_symbol,
names_from = grouped_process,
values_from = n,
values_fn = list(n = length),
values_fill = list(n = 0)
) %>%
as.data.frame()

upset_p <- upset(
nsets = 4,
data_encoded_source,
order.by = "freq",
empty.intersections = "on"
)
upset_p

pdf(file=here("results/upset_plot.pdf"), width = 8, height = 5.5)
upset_p
dev.off()

png(file=here("results/upset_plot.png"), width = 8, height = 5.5, units = "in", res=500)
upset_p
dev.off()

data <- read_csv(here("data/meta_analysis_genes.csv")) %>%
janitor::clean_names() %>%
dplyr::select(gene_symbol, grouped_process) %>%
separate_rows(grouped_process, sep = "\\|") %>%
mutate(grouped_process = stringr::str_trim(grouped_process)) %>%
distinct()

data_encoded_source <- data %>%
filter(!is.na(grouped_process)) %>%
mutate(n = 1) %>%
pivot_wider(
id_cols = gene_symbol,
names_from = grouped_process,
values_from = n,
values_fn = list(n = length),
values_fill = list(n = 0)
) %>%
as.data.frame()

upset_p <- upset(
nsets = 4,
data_encoded_source,
order.by = "freq",
empty.intersections = "on"
)
upset_p

pdf(file=here("results/upset_plot_figureb.pdf"), width = 8, height = 5.5)
upset_p
dev.off()

png(file=here("results/upset_plot_figureb.png"), width = 8, height = 5.5, units = "in", res=500)
upset_p
dev.off()
Loading

0 comments on commit c875ccc

Please sign in to comment.