From f8d6f03e9be901c5ea36739123472d76abdc0a50 Mon Sep 17 00:00:00 2001
From: whelena <hwinata@mednet.ucla.edu>
Date: Sat, 23 Nov 2024 17:51:49 -0800
Subject: [PATCH] generalize density calculation

---
 R/calculate.density.R                         |  19 ++++
 R/calculate.density.each.clone.R              |  17 ---
 ...te.clone.genome.distribution.densityplot.R |  16 +--
 R/create.clone.genome.distribution.plot.R     |   5 +-
 R/plot.clone.densityplot.R                    | 103 ------------------
 R/plot.clones.densityplot.histogram.R         |  49 ---------
 R/plot.snv.histogram.R                        |  80 --------------
 7 files changed, 23 insertions(+), 266 deletions(-)
 create mode 100644 R/calculate.density.R
 delete mode 100644 R/calculate.density.each.clone.R
 delete mode 100644 R/plot.clone.densityplot.R
 delete mode 100644 R/plot.clones.densityplot.histogram.R
 delete mode 100644 R/plot.snv.histogram.R

diff --git a/R/calculate.density.R b/R/calculate.density.R
new file mode 100644
index 0000000..185393c
--- /dev/null
+++ b/R/calculate.density.R
@@ -0,0 +1,19 @@
+calculate.density <- function(
+    x,
+    value = 'genome.pos',
+    group = 'clone.id',
+    scale = TRUE,
+    ...
+    ) {
+
+    if (nrow(x) <= 1) {
+        return(NULL);
+        }
+    density <- density(x = x[[value]], bw = 'nrd', na.rm = TRUE, ...);
+    density.df <- as.data.frame(density[c('x', 'y')]);
+    density.df$clone.id <- unique(x[[group]]);
+    if (scale) {
+        density.df$y <- nrow(x) / sum(density.df$y) * density.df$y;
+        }
+    return(density.df)
+    }
\ No newline at end of file
diff --git a/R/calculate.density.each.clone.R b/R/calculate.density.each.clone.R
deleted file mode 100644
index dbb1d04..0000000
--- a/R/calculate.density.each.clone.R
+++ /dev/null
@@ -1,17 +0,0 @@
-## Function: calculate.density.each.clone  ---------------------------------------------------------
-calculate.density.each.clone <- function(cluster.df, cloneID) {
-    # Skip clusters with only one SNV
-    if (nrow(cluster.df) <= 1) {
-        return(NULL);
-        }
-    density <- density(
-        x = cluster.df$CCF,
-        bw = 'nrd',
-        adjust = 1.2,
-        na.rm = TRUE
-        );
-    density.df <- as.data.frame(density[c('x','y')]);
-    density.df$count <- nrow(cluster.df) / sum(density.df$y) * density.df$y;
-    density.df$clone.id <- cloneID;
-    return(density.df)
-    }
diff --git a/R/create.clone.genome.distribution.densityplot.R b/R/create.clone.genome.distribution.densityplot.R
index c301fdd..3bf4c3f 100644
--- a/R/create.clone.genome.distribution.densityplot.R
+++ b/R/create.clone.genome.distribution.densityplot.R
@@ -8,7 +8,7 @@ create.clone.genome.distribution.densityplot <- function(
 
     return(BoutrosLab.plotting.general::create.scatterplot(
         filename = save.plt,
-        formula = count ~ x,
+        formula = y ~ x,
         data = density.df,
         groups = density.df$clone.id,
         xlab.label = 'Chromosome',
@@ -24,17 +24,3 @@ create.clone.genome.distribution.densityplot <- function(
         ...
         ));
     }
-
-calculate.density.and.scale <- function(
-    x,
-    value = 'genome.pos',
-    group = 'clone.id'
-    ) {
-
-    density <- density(x = x[[value]], bw = 'nrd', adjust = 0.05, na.rm = TRUE);
-    density.df <- as.data.frame(density[c('x', 'y')]);
-    density.df$clone.id <- unique(x[[group]]);
-    density.df$count <- nrow(x) / sum(density.df$y) * density.df$y;
-
-    return(density.df)
-    }
diff --git a/R/create.clone.genome.distribution.plot.R b/R/create.clone.genome.distribution.plot.R
index e12596a..daa19f4 100644
--- a/R/create.clone.genome.distribution.plot.R
+++ b/R/create.clone.genome.distribution.plot.R
@@ -104,8 +104,9 @@ create.clone.genome.distribution.plot.per.sample <- function(
             warning(paste('Skipping clone', k, 'in sample', unique(sample.df$ID), 'since there is only one SNV'));
             next;
         }
-        density.list[[k]] <- calculate.density.and.scale(
-            x = sample.df[sample.df$clone.id == k, ]
+        density.list[[k]] <- calculate.density(
+            x = sample.df[sample.df$clone.id == k, ],
+            adjust = 0.05
             );
         }
     density.df <- do.call(rbind, density.list);
diff --git a/R/plot.clone.densityplot.R b/R/plot.clone.densityplot.R
deleted file mode 100644
index 3b1cfb2..0000000
--- a/R/plot.clone.densityplot.R
+++ /dev/null
@@ -1,103 +0,0 @@
-## Function: plot.clone.densityplot ----------------------------------------------------------------
-plot.clone.densityplot <- function(
-    dpclust.df,
-    sampleID,
-    src.tool,
-    output.dir
-    ) {
-    # Create table of densities for plotting each cluster
-    density.list <- list();
-    for (clone in unique(dpclust.df$clone.id)) {
-        clone.df <- calculate.density.each.clone(
-            cluster.df = dpclust.df[dpclust.df$clone.id == clone, ],
-            cloneID = clone
-            );
-        density.list[[clone]] <- clone.df;
-        }
-    density.df <- do.call(rbind, density.list);
-
-    # Calculate average CCF per cluster
-    cluster.meanCCFs <- unique(dpclust.df$location);
-
-    # Get plot legend
-    clone.IDs <- unique(density.df$clone.id);
-    colors.qual <- BoutrosLab.plotting.general::default.colours(12);
-    cluster.colours <- setNames(colors.qual[1:length(clone.IDs)], clone.IDs);
-    cluster.legend <- BoutrosLab.plotting.general::legend.grob(
-        list(
-            legend = list(
-                title = 'Clone',
-                labels = as.character(names(cluster.colours)),
-                colours = c(cluster.colours),
-                border = 'black'
-                )
-            ),
-        size = 1,
-        title.cex = 1,
-        label.cex = 1
-        );
-
-    # Set y-axis max limit and y-increments
-    ymax <- ceiling(max(density.df$count) / 5) * 5;
-    yseq <- if (ymax > 1000) {
-        100;
-    } else if (ymax > 100) {
-        20;
-    } else if (ymax > 20) {
-        10;
-    } else {
-        5;
-    }
-
-    save.plt <- file.path(
-        output.dir,
-        paste(sampleID, src.tool, 'clone_densityplot.png', sep = '_')
-        );
-    return(
-        BoutrosLab.plotting.general::create.scatterplot(
-            filename = save.plt,
-            formula = count ~ x,
-            data = density.df,
-            groups = density.df$clone.id,
-            xlab.label = 'CCF',
-            ylab.label = 'SNV Count',
-            xlab.top.label = paste('Total SNVs Clustered:', nrow(dpclust.df)),
-            xlab.top.cex = 1.5,
-            xlab.top.x = 0.5,
-            xlab.top.y = 1,
-            xlab.cex = 1.5,
-            ylab.cex = 1.5,
-            xaxis.cex = 1.2,
-            yaxis.cex = 1.2,
-            xlimits = c(0, 1.5),
-            ylimits = c(0, ymax),
-            xat = seq(0, 1.5, 0.25),
-            yat = seq(0, ymax, yseq),
-            xaxis.tck = c(1, 0),
-            yaxis.tck = c(1, 0),
-            col = cluster.colours,
-            type = 'l',
-            lwd = 3,
-            abline.v = cluster.meanCCFs,
-            abline.lwd = 0.5,
-            abline.lty = 'longdash',
-            # add.text = TRUE,
-            text.labels = lapply(cluster.meanCCFs, round, 2),
-            text.x = cluster.meanCCFs,
-            text.y = ymax - 1,
-            text.fontface = 'plain',
-            text.cex = 1,
-            legend = list(
-                right = list(
-                    fun = cluster.legend
-                    )
-                ),
-            top.padding = 2,
-            right.padding = 1,
-            left.padding = 1,
-            height = 6,
-            width = 6.5,
-            resolution = 800
-            )
-        );
-    }
diff --git a/R/plot.clones.densityplot.histogram.R b/R/plot.clones.densityplot.histogram.R
deleted file mode 100644
index df25f68..0000000
--- a/R/plot.clones.densityplot.histogram.R
+++ /dev/null
@@ -1,49 +0,0 @@
-# Description: Plot density curves of clones from SRC tool results and histograms of SNVs per clone
-
-### PREAMBLE #######################################################################################
-library(BoutrosLab.utilities);
-library(BoutrosLab.plotting.general);
-
-# Source functions
-dir.functions <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/functions';
-files.functions <- list.files(dir.functions, full.names = TRUE);
-lapply(files.functions, source);
-
-dir.dpclust.parsed <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/call-SRC/DPClust_results_parsed';
-dir.plot.output <- '/hot/project/disease/KidneyTumor/GHLR-000108-SubclonalArchitecture/project-disease-KidneyTumor-GHLR-000108-SubclonalArchitecture/sSRC-prep-and-analysis/plot_DPClust_clones/plot';
-
-
-### LOAD DATA ######################################################################################
-# Read in clustered SNVs data
-files.dpclust <- list.files(dir.dpclust.parsed, pattern = 'DPClust_data.tsv', full.names = TRUE);
-names(files.dpclust) <- regmatches(
-    files.dpclust,
-    regexpr('STGHKGFH\\d{6}-\\S{4}-\\S{3}-\\w|TCGA-\\S{2}-\\S{4}-\\S{3}', files.dpclust)
-    );
-list.dpclust.dfs <- lapply(files.dpclust, read.table, sep = '\t', header = TRUE);
-
-
-### PLOTTING #######################################################################################
-# Plot histograms
-for (i in 1:length(list.dpclust.dfs)) {
-    dpclust.df <- list.dpclust.dfs[[i]];
-    sampleID <- names(list.dpclust.dfs)[i];
-    plot.snv.histogram(
-        dpclust.df,
-        sampleID,
-        'DPClust',
-        dir.plot.output
-        );
-    }
-
-# Plot density plots
-for (i in 1:length(list.dpclust.dfs)) {
-    dpclust.df <- list.dpclust.dfs[[i]];
-    sampleID <- names(list.dpclust.dfs)[i];
-    plot.clone.densityplot(
-        dpclust.df,
-        sampleID,
-        'DPClust',
-        dir.plot.output
-        );
-    }
diff --git a/R/plot.snv.histogram.R b/R/plot.snv.histogram.R
deleted file mode 100644
index 50f53ea..0000000
--- a/R/plot.snv.histogram.R
+++ /dev/null
@@ -1,80 +0,0 @@
-## Function: plot.snv.histogram --------------------------------------------------------------------
-# How to automate n.breaks?
-    # Use 100 if > 1000 SNVs, 50 if > 500 SNVs, 25 if > 100 SNVs, 10 if > 50 SNVs?
-# How to automate ymax?
-
-plot.snv.histogram <- function(dpclust.df, sampleID, src.tool, output.dir) {
-    num.snv <- nrow(dpclust.df);
-
-    # Automate number of breaks
-    # # Freedman-Diaconis Rule method
-    # iqr <- IQR(dpclust.df$subclonal.fraction);
-    # binwidth <- 2 * iqr / (num.snv^(1 / 3));
-    # n.breaks <- ceiling((max(dpclust.df$subclonal.fraction) - min(dpclust.df$subclonal.fraction)) / binwidth);
-
-    # Piecewise method:
-    n.breaks <- if (num.snv > 1000) {
-        100;
-    } else if (num.snv > 500) {
-        50;
-    } else if (num.snv > 100) {
-        25;
-    } else if (num.snv > 40) {
-        20;
-    } else {
-        10;
-    }
-
-    # Set y-axis max limit
-    hist.data <- hist(dpclust.df$CCF, breaks = n.breaks, plot = FALSE);
-    ymax <- ceiling(max(hist.data$counts) / 10) * 10;
-    # Set y-axis increments
-    yseq <- if (ymax > 1000) {
-        100;
-    } else if (ymax > 200) {
-        50;
-    } else if (ymax > 40) {
-        20;
-    } else {
-        10;
-    }
-
-    save.plt <- file.path(
-        output.dir,
-        paste(sampleID, src.tool, 'clustered_SNVs_histogram.png', sep = '_')
-        );
-
-    return(
-        BoutrosLab.plotting.general::create.histogram(
-            filename = save.plt,
-            x = dpclust.df$CCF,
-            xlab.label = 'CCF',
-            ylab.label = 'SNV Count',
-            xlab.top.label = paste('Total SNVs Clustered:', num.snv),
-            xlab.top.cex = 1.5,
-            xlab.top.x = 0.5,
-            xlab.top.y = 1,
-            xlab.cex = 1.5,
-            ylab.cex = 1.5,
-            xaxis.cex = 1.2,
-            yaxis.cex = 1.2,
-            xlimits = c(0, 1.5),
-            ylimits = c(0, ymax),
-            xat = seq(0, 1.5, 0.25),
-            yat = seq(0, ymax, yseq),
-            xaxis.tck = c(1, 0),
-            yaxis.tck = c(1, 0),
-            breaks = n.breaks,
-            type = 'count',
-            col = 'gray80',
-            border.col = 'black',
-            lwd = 0.1,
-            top.padding = 1,
-            right.padding = 1,
-            left.padding = 1,
-            height = 6,
-            width = 6,
-            resolution = 800
-            )
-        );
-    }