-
Notifications
You must be signed in to change notification settings - Fork 9
/
simplify_expression.R
74 lines (64 loc) · 1.9 KB
/
simplify_expression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
suppressPackageStartupMessages({
library(dplyr)
library(readr)
})
collapse_conditional_direction <- function(direction) {
if (length(direction) == 1) {
return(direction)
} else if (all(direction == direction[[1]])) {
return(paste0(direction[[1]], "*"))
} else {
return(paste(direction, collapse = ","))
}
}
args <- commandArgs(trailingOnly = TRUE)
selected_hits <- read_tsv(args[[2]], col_types = cols()) %>%
select(cancer, what = analysis, versus, features, how)
features <- read_tsv(args[[1]], col_types = cols()) %>%
semi_join(
selected_hits,
by = c("cancer", "what", "versus", "features", "how")
)
id_to_sym <- read_tsv(
"data/gencode_v22_ensg_v98_annots.tsv",
col_types = cols()
)
hits <- features %>%
group_by(cancer, what, versus, features, genera) %>%
tally(name = "method_count") %>%
ungroup() %>%
filter(what == "surv" | method_count >= 2)
features <- features %>%
semi_join(hits, by = c("cancer", "what", "versus", "features", "genera")) %>%
mutate(conditional_direction = ifelse(
p_greater <= 0.5, "Positive", "Negative"
)) %>%
select(cancer, versus, how, genera, seen, conditional_direction)
features <- features %>%
group_by(cancer, versus, genera) %>%
summarize(
seen = paste0(sum(seen), "/", n() * 100),
how = paste(how, collapse = ","),
conditional_direction = collapse_conditional_direction(
conditional_direction
),
.groups = "drop"
)
features <- features %>%
left_join(id_to_sym, by = c(genera = "ID_REF")) %>%
select(
Cancer = cancer,
Versus = versus,
`Gene Symbol` = Symbol,
ENSEMBL = genera,
How = how,
`Models Present` = seen,
`Conditional Direction` = conditional_direction
) %>%
mutate(`Gene Symbol` = ifelse(
is.na(`Gene Symbol`) | `Gene Symbol` == "", '.', `Gene Symbol`)
)
features %>%
arrange(Cancer, Versus, desc(`Models Present`)) %>%
format_tsv() %>%
cat()