-
Notifications
You must be signed in to change notification settings - Fork 9
/
melt_covariates.R
55 lines (50 loc) · 1.6 KB
/
melt_covariates.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
suppressPackageStartupMessages({
library(dplyr)
library(readr)
library(reshape2)
library(tibble)
})
methods <- c(
lgr = "LGR", svm = "RFE", rfe = "RFE", cnet = "CNET", cox = "CNET",
limma = "LIMMA", edger = "EDGER"
)
method_from_filename <- function(filename) {
methods[strsplit(basename(filename), "_")[[1]][1]]
}
parse_testset <- function(testset) {
parts <- strsplit(as.character(testset), "_")
# The first element of parts is the string 'tcga'
cancer <- sapply(parts, function(x) toupper(x[2]))
analysis <- sapply(parts, function(x) x[3])
versus <- sapply(parts, function(x) x[4])
features <- sapply(parts, function(x) x[5])
versus <- ifelse(
analysis == "surv",
toupper(versus),
tools::toTitleCase(versus) # Drugs are title case
)
tibble(cancer = cancer, analysis = analysis, versus = versus, features = features)
}
filenames <- commandArgs(trailingOnly = TRUE)
results <- list()
for (i in seq_along(filenames)) {
file <- filenames[i]
# Models come in 0 indexed, but R doesn't appreciate that, so shift.
model_scores <- as_tibble(readRDS(file), rownames = "index") %>%
mutate(index = as.numeric(index) + 1) %>%
melt("index", variable.name = "testset", value.name = "goodness")
results[[i]] <- cbind(
parse_testset(model_scores$testset),
tibble(
how = method_from_filename(file),
index = model_scores$index,
# Sometimes NaNs get embedded, where NA is the correct meaning.
# Let's fix that.
goodness = ifelse(is.nan(model_scores$goodness), NA, model_scores$goodness)
)
)
}
results %>%
bind_rows() %>%
format_tsv() %>%
cat()