From f8d6f03e9be901c5ea36739123472d76abdc0a50 Mon Sep 17 00:00:00 2001 From: whelena Date: Sat, 23 Nov 2024 17:51:49 -0800 Subject: [PATCH] generalize density calculation --- R/calculate.density.R | 19 ++++ R/calculate.density.each.clone.R | 17 --- ...te.clone.genome.distribution.densityplot.R | 16 +-- R/create.clone.genome.distribution.plot.R | 5 +- R/plot.clone.densityplot.R | 103 ------------------ R/plot.clones.densityplot.histogram.R | 49 --------- R/plot.snv.histogram.R | 80 -------------- 7 files changed, 23 insertions(+), 266 deletions(-) create mode 100644 R/calculate.density.R delete mode 100644 R/calculate.density.each.clone.R delete mode 100644 R/plot.clone.densityplot.R delete mode 100644 R/plot.clones.densityplot.histogram.R delete mode 100644 R/plot.snv.histogram.R diff --git a/R/calculate.density.R b/R/calculate.density.R new file mode 100644 index 0000000..185393c --- /dev/null +++ b/R/calculate.density.R @@ -0,0 +1,19 @@ +calculate.density <- function( + x, + value = 'genome.pos', + group = 'clone.id', + scale = TRUE, + ... + ) { + + if (nrow(x) <= 1) { + return(NULL); + } + density <- density(x = x[[value]], bw = 'nrd', na.rm = TRUE, ...); + density.df <- as.data.frame(density[c('x', 'y')]); + density.df$clone.id <- unique(x[[group]]); + if (scale) { + density.df$y <- nrow(x) / sum(density.df$y) * density.df$y; + } + return(density.df) + } \ No newline at end of file diff --git a/R/calculate.density.each.clone.R b/R/calculate.density.each.clone.R deleted file mode 100644 index dbb1d04..0000000 --- a/R/calculate.density.each.clone.R +++ /dev/null @@ -1,17 +0,0 @@ -## Function: calculate.density.each.clone --------------------------------------------------------- -calculate.density.each.clone <- function(cluster.df, cloneID) { - # Skip clusters with only one SNV - if (nrow(cluster.df) <= 1) { - return(NULL); - } - density <- density( - x = cluster.df$CCF, - bw = 'nrd', - adjust = 1.2, - na.rm = TRUE - ); - density.df <- as.data.frame(density[c('x','y')]); - density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y; - density.df$clone.id <- cloneID; - return(density.df) - } diff --git a/R/create.clone.genome.distribution.densityplot.R b/R/create.clone.genome.distribution.densityplot.R index c301fdd..3bf4c3f 100644 --- a/R/create.clone.genome.distribution.densityplot.R +++ b/R/create.clone.genome.distribution.densityplot.R @@ -8,7 +8,7 @@ create.clone.genome.distribution.densityplot <- function( return(BoutrosLab.plotting.general::create.scatterplot( filename = save.plt, - formula = count ~ x, + formula = y ~ x, data = density.df, groups = density.df$clone.id, xlab.label = 'Chromosome', @@ -24,17 +24,3 @@ create.clone.genome.distribution.densityplot <- function( ... )); } - -calculate.density.and.scale <- function( - x, - value = 'genome.pos', - group = 'clone.id' - ) { - - density <- density(x = x[[value]], bw = 'nrd', adjust = 0.05, na.rm = TRUE); - density.df <- as.data.frame(density[c('x', 'y')]); - density.df$clone.id <- unique(x[[group]]); - density.df$count <- nrow(x) / sum(density.df$y) * density.df$y; - - return(density.df) - } diff --git a/R/create.clone.genome.distribution.plot.R b/R/create.clone.genome.distribution.plot.R index e12596a..daa19f4 100644 --- a/R/create.clone.genome.distribution.plot.R +++ b/R/create.clone.genome.distribution.plot.R @@ -104,8 +104,9 @@ create.clone.genome.distribution.plot.per.sample <- function( warning(paste('Skipping clone', k, 'in sample', unique(sample.df$ID), 'since there is only one SNV')); next; } - density.list[[k]] <- calculate.density.and.scale( - x = sample.df[sample.df$clone.id == k, ] + density.list[[k]] <- calculate.density( + x = sample.df[sample.df$clone.id == k, ], + adjust = 0.05 ); } density.df <- do.call(rbind, density.list); diff --git a/R/plot.clone.densityplot.R b/R/plot.clone.densityplot.R deleted file mode 100644 index 3b1cfb2..0000000 --- a/R/plot.clone.densityplot.R +++ /dev/null @@ -1,103 +0,0 @@ -## Function: plot.clone.densityplot ---------------------------------------------------------------- -plot.clone.densityplot <- function( - dpclust.df, - sampleID, - src.tool, - output.dir - ) { - # Create table of densities for plotting each cluster - density.list <- list(); - for (clone in unique(dpclust.df$clone.id)) { - clone.df <- calculate.density.each.clone( - cluster.df = dpclust.df[dpclust.df$clone.id == clone, ], - cloneID = clone - ); - density.list[[clone]] <- clone.df; - } - density.df <- do.call(rbind, density.list); - - # Calculate average CCF per cluster - cluster.meanCCFs <- unique(dpclust.df$location); - - # Get plot legend - clone.IDs <- unique(density.df$clone.id); - colors.qual <- BoutrosLab.plotting.general::default.colours(12); - cluster.colours <- setNames(colors.qual[1:length(clone.IDs)], clone.IDs); - cluster.legend <- BoutrosLab.plotting.general::legend.grob( - list( - legend = list( - title = 'Clone', - labels = as.character(names(cluster.colours)), - colours = c(cluster.colours), - border = 'black' - ) - ), - size = 1, - title.cex = 1, - label.cex = 1 - ); - - # Set y-axis max limit and y-increments - ymax <- ceiling(max(density.df$count) / 5) * 5; - yseq <- if (ymax > 1000) { - 100; - } else if (ymax > 100) { - 20; - } else if (ymax > 20) { - 10; - } else { - 5; - } - - save.plt <- file.path( - output.dir, - paste(sampleID, src.tool, 'clone_densityplot.png', sep = '_') - ); - return( - BoutrosLab.plotting.general::create.scatterplot( - filename = save.plt, - formula = count ~ x, - data = density.df, - groups = density.df$clone.id, - xlab.label = 'CCF', - ylab.label = 'SNV Count', - xlab.top.label = paste('Total SNVs Clustered:', nrow(dpclust.df)), - xlab.top.cex = 1.5, - xlab.top.x = 0.5, - xlab.top.y = 1, - xlab.cex = 1.5, - ylab.cex = 1.5, - xaxis.cex = 1.2, - yaxis.cex = 1.2, - xlimits = c(0, 1.5), - ylimits = c(0, ymax), - xat = seq(0, 1.5, 0.25), - yat = seq(0, ymax, yseq), - xaxis.tck = c(1, 0), - yaxis.tck = c(1, 0), - col = cluster.colours, - type = 'l', - lwd = 3, - abline.v = cluster.meanCCFs, - abline.lwd = 0.5, - abline.lty = 'longdash', - # add.text = TRUE, - text.labels = lapply(cluster.meanCCFs, round, 2), - text.x = cluster.meanCCFs, - text.y = ymax - 1, - text.fontface = 'plain', - text.cex = 1, - legend = list( - right = list( - fun = cluster.legend - ) - ), - top.padding = 2, - right.padding = 1, - left.padding = 1, - height = 6, - width = 6.5, - resolution = 800 - ) - ); - } diff --git a/R/plot.clones.densityplot.histogram.R b/R/plot.clones.densityplot.histogram.R deleted file mode 100644 index df25f68..0000000 --- a/R/plot.clones.densityplot.histogram.R +++ /dev/null @@ -1,49 +0,0 @@ -# Description: Plot density curves of clones from SRC tool results and histograms of SNVs per clone - -### PREAMBLE ####################################################################################### -library(BoutrosLab.utilities); -library(BoutrosLab.plotting.general); - -# Source functions -dir.functions <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/functions'; -files.functions <- list.files(dir.functions, full.names = TRUE); -lapply(files.functions, source); - -dir.dpclust.parsed <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/call-SRC/DPClust_results_parsed'; -dir.plot.output <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/plot'; - - -### LOAD DATA ###################################################################################### -# Read in clustered SNVs data -files.dpclust <- list.files(dir.dpclust.parsed, pattern = 'DPClust_data.tsv', full.names = TRUE); -names(files.dpclust) <- regmatches( - files.dpclust, - regexpr('STGHKGFH\\d{6}-\\S{4}-\\S{3}-\\w|TCGA-\\S{2}-\\S{4}-\\S{3}', files.dpclust) - ); -list.dpclust.dfs <- lapply(files.dpclust, read.table, sep = '\t', header = TRUE); - - -### PLOTTING ####################################################################################### -# Plot histograms -for (i in 1:length(list.dpclust.dfs)) { - dpclust.df <- list.dpclust.dfs[[i]]; - sampleID <- names(list.dpclust.dfs)[i]; - plot.snv.histogram( - dpclust.df, - sampleID, - 'DPClust', - dir.plot.output - ); - } - -# Plot density plots -for (i in 1:length(list.dpclust.dfs)) { - dpclust.df <- list.dpclust.dfs[[i]]; - sampleID <- names(list.dpclust.dfs)[i]; - plot.clone.densityplot( - dpclust.df, - sampleID, - 'DPClust', - dir.plot.output - ); - } diff --git a/R/plot.snv.histogram.R b/R/plot.snv.histogram.R deleted file mode 100644 index 50f53ea..0000000 --- a/R/plot.snv.histogram.R +++ /dev/null @@ -1,80 +0,0 @@ -## Function: plot.snv.histogram -------------------------------------------------------------------- -# How to automate n.breaks? - # Use 100 if > 1000 SNVs, 50 if > 500 SNVs, 25 if > 100 SNVs, 10 if > 50 SNVs? -# How to automate ymax? - -plot.snv.histogram <- function(dpclust.df, sampleID, src.tool, output.dir) { - num.snv <- nrow(dpclust.df); - - # Automate number of breaks - # # Freedman-Diaconis Rule method - # iqr <- IQR(dpclust.df$subclonal.fraction); - # binwidth <- 2 * iqr / (num.snv^(1 / 3)); - # n.breaks <- ceiling((max(dpclust.df$subclonal.fraction) - min(dpclust.df$subclonal.fraction)) / binwidth); - - # Piecewise method: - n.breaks <- if (num.snv > 1000) { - 100; - } else if (num.snv > 500) { - 50; - } else if (num.snv > 100) { - 25; - } else if (num.snv > 40) { - 20; - } else { - 10; - } - - # Set y-axis max limit - hist.data <- hist(dpclust.df$CCF, breaks = n.breaks, plot = FALSE); - ymax <- ceiling(max(hist.data$counts) / 10) * 10; - # Set y-axis increments - yseq <- if (ymax > 1000) { - 100; - } else if (ymax > 200) { - 50; - } else if (ymax > 40) { - 20; - } else { - 10; - } - - save.plt <- file.path( - output.dir, - paste(sampleID, src.tool, 'clustered_SNVs_histogram.png', sep = '_') - ); - - return( - BoutrosLab.plotting.general::create.histogram( - filename = save.plt, - x = dpclust.df$CCF, - xlab.label = 'CCF', - ylab.label = 'SNV Count', - xlab.top.label = paste('Total SNVs Clustered:', num.snv), - xlab.top.cex = 1.5, - xlab.top.x = 0.5, - xlab.top.y = 1, - xlab.cex = 1.5, - ylab.cex = 1.5, - xaxis.cex = 1.2, - yaxis.cex = 1.2, - xlimits = c(0, 1.5), - ylimits = c(0, ymax), - xat = seq(0, 1.5, 0.25), - yat = seq(0, ymax, yseq), - xaxis.tck = c(1, 0), - yaxis.tck = c(1, 0), - breaks = n.breaks, - type = 'count', - col = 'gray80', - border.col = 'black', - lwd = 0.1, - top.padding = 1, - right.padding = 1, - left.padding = 1, - height = 6, - width = 6, - resolution = 800 - ) - ); - }