Skip to content

Commit

Permalink
update sample outputs and code to reproduce manuscript fig1
Browse files Browse the repository at this point in the history
  • Loading branch information
pre-mRNA committed Jul 2, 2024
1 parent c8d064a commit 0f274bd
Show file tree
Hide file tree
Showing 8 changed files with 411 additions and 128 deletions.
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ bio = "1.6.0"
clap = "4.5.4"
multimap = "0.9.1"
rayon = "1.5.0"
path-absolutize = "3.0.14"

[dev-dependencies]
env_logger = "0.11.3"
tempfile = "3.2"
tempfile = "3.2"
assert_cmd = "2.0"
Binary file modified assets/metajunction_m6A.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/metatranscript_m6A.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
89 changes: 52 additions & 37 deletions scripts/R2_plotMetaCodon.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ suppressMessages({
library(binom)
})

cat("Arguments received:\n")
cat(paste(commandArgs(trailingOnly = TRUE), collapse = " "), "\n")

help_message <- function() {
cat("\nUsage: Rscript script.R '/path/to/annotated.bed' '/path/to/output.png' '<probability field>' '<cutoff>' '<upper/lower>' [-c 'loess'/'binom'] [-o '/path/to/output.tsv'] [-l] (-s | -e)\n")
cat("Options:\n")
Expand All @@ -17,55 +20,65 @@ help_message <- function() {
}

args <- commandArgs(trailingOnly = TRUE)
options <- list(ci_method = "loess", output_path = NULL, add_labels = FALSE, plot_from = NULL)
required_args <- args[1:5]
optional_args <- args[6:length(args)]

# help message
if ("-h" %in% args || length(args) == 0) {
if (length(required_args) != 5) {
cat("Error: Incorrect number of required arguments.\n")
cat("Received arguments:", paste(args, collapse = " "), "\n")
help_message()
quit(status = 0)
quit(status = 1)
}

remaining_args <- list()
plot_from_specified <- FALSE
input_file <- required_args[1]
output_file <- required_args[2]
col_name <- required_args[3]
cutoff <- as.numeric(required_args[4])
direction <- required_args[5]

options <- list(ci_method = "loess", output_path = NULL, add_labels = FALSE, plot_from = NULL)

for (arg in args) {
if (arg %in% c("-s", "-e")) {
if (!is.null(options$plot_from)) {
# parse optional arguments
i <- 1
while (i <= length(optional_args)) {
flag <- optional_args[i]

if (flag == "-c" && i + 1 <= length(optional_args)) {
options$ci_method <- optional_args[i + 1]
i <- i + 2
} else if (flag == "-o" && i + 1 <= length(optional_args)) {
options$output_path <- optional_args[i + 1]
i <- i + 2
} else if (flag == "-l") {
options$add_labels <- TRUE
i <- i + 1
} else if (flag == "-s") {
if (is.null(options$plot_from)) {
options$plot_from <- "abs_cds_start"
} else {
stop("Error: Both -s and -e flags cannot be used simultaneously.")
}
options$plot_from <- ifelse(arg == "-s", "abs_cds_start", "abs_cds_end")
plot_from_specified <- TRUE
} else if (arg %in% c("-c", "-o")) {
next_index <- which(args == arg) + 1
if (next_index > length(args)) {
stop(paste("Error: The", arg, "flag requires a following argument."))
i <- i + 1
} else if (flag == "-e") {
if (is.null(options$plot_from)) {
options$plot_from <- "abs_cds_end"
} else {
stop("Error: Both -s and -e flags cannot be used simultaneously.")
}
options[[sub("-", "", arg)]] <- args[next_index]
} else if (arg == "-l") {
options$add_labels <- TRUE
i <- i + 1
} else {
remaining_args <- c(remaining_args, arg)
warning(paste("Unrecognized flag:", flag))
i <- i + 1
}
}

# check for codon flag
if (!plot_from_specified) {
# check for start or end flag
if (is.null(options$plot_from)) {
cat("Error: Either -s or -e flag must be specified.\n")
help_message()
stop("Error: Either -s or -e flag must be specified.")
quit(status = 1)
}

# check remaining positional arguments
if (length(remaining_args) != 5) {
help_message()
stop("Error: Incorrect number of positional arguments.")
}

input_file <- as.character(remaining_args[1])
output_file <- as.character(remaining_args[2])
col_name <- as.character(remaining_args[3])
cutoff <- as.numeric(remaining_args[4])
direction <- as.character(remaining_args[5])

if (!file.exists(input_file)) {
stop("Input file does not exist")
}
Expand Down Expand Up @@ -93,11 +106,11 @@ filter_calls <- function(file, col, cutoff, direction) {

# calculate ratio of significant sites
compute_ratio <- function(calls, interval) {
# Filter calls to include only those within the specified range


calls <- calls %>%
filter(.data[[interval]] >= -100 & .data[[interval]] <= 100)

# Calculate the count of 'sig' and 'ns' at each position
out_ratio <- calls %>%
group_by(.data[[interval]], filter) %>%
summarise(n = n(), .groups = 'drop') %>%
Expand All @@ -107,7 +120,7 @@ compute_ratio <- function(calls, interval) {
ratio = sig / (sig + ns + 1e-9)) %>%
rename(interval = 1)

# Calculate confidence intervals if the binom method is chosen

if (options$ci_method == "binom") {
conf_int <- binom.confint(out_ratio$sig, out_ratio$sig + out_ratio$ns, methods = "wilson")
out_ratio <- mutate(out_ratio, lower = conf_int$lower, upper = conf_int$upper)
Expand Down Expand Up @@ -157,6 +170,8 @@ calls <- filter_calls(input_file, col_name, cutoff, direction)
out_ratio <- compute_ratio(calls, options$plot_from)

# optionally, write the data shown in the plot to a file
print(options$output_path)

if (!is.null(options$output_path)) {
write_tsv(out_ratio, options$output_path)
}
Expand Down
78 changes: 78 additions & 0 deletions scripts/manuscript_demo/process_data_for_manuscript.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
#!/bin/bash

# using minimap v2.1 and samtools v1.19
export PATH="$PATH:/g/data/lf10/as7425/apps/minimap2/"
module load samtools

# map hela DRS reads to GRCh38 cDNA transcriptome
export wd="/g/data/lf10/as7425/R2DTool_demo"; mkdir -p ${wd} 2>/dev/null
export reads="/g/data/lf10/as7425/2023_mrna-biogenesis-maps/analysis/2024-03-28_ASR012_HeLa-total-RNA004-rep1/ASR012_HeLa-total-RNA004-rep1_dorado_polyA_m6A_all_reads.fastq"
export genome="/g/data/lf10/as7425/genomes/human_genome/ensembl_release_110/Homo_sapiens.GRCh38.cdna.all.fa"

minimap2 -ax map-ont -y -k 14 -N 20 ${genome} ${reads} -t 104 | samtools view -bh -@ 104 > ${wd}/aligned_reads.bam

# filter for plus strand primary alignments with nonzero mapq
samtools view -b -F 2308 -@ 104 -q 1 ${wd}/aligned_reads.bam | samtools sort > ${wd}/primary.bam
samtools view -@ 104 -c ${wd}/primary.bam
samtools index ${wd}/primary.bam

# pileup the modification calls using modkit
export PATH="$PATH:/g/data/lf10/as7425/apps/dist"
modkit pileup -t 104 ${wd}/primary.bam "${wd}/pileup.bed" --log-filepath "${wd}/pileup.bed.log"

# convert to table with header for R2Dtool
# filter for coverage >=10
printf "transcript\tstart\tend\tmod\tcov\tstrand\tstart\tend\tcolor\tX1\tX2\tcov\tfrac_mod\tn_mod\tn_canonical\tn_other_mod\tn_delete\tn_fail\tn_diff\tn_no_call\n" > ${wd}/R2D_input.bed
awk 'BEGIN {FS=OFS="\t"} {gsub(",", OFS, $0); $9="."; print}' "${wd}/pileup.bed" | tr " " "\t" | awk '($12 > 9)' >> ${wd}/R2D_input.bed

# if not filtering for coverage
# printf "transcript\tstart\tend\tmod\tcov\tstrand\tstart\tend\tcolor\tX1\tX2\tcov\tfrac_mod\tn_mod\tn_canonical\tn_other_mod\tn_delete\tn_fail\tn_diff\tn_no_call\n" > ${wd}/R2D_input.bed
# awk 'BEGIN {FS=OFS="\t"} {gsub(",", OFS, $0); $9="."; print}' "${wd}/pileup.bed" | tr " " "\t" >> ${wd}/R2D_input.bed


# verify that all sites mapped to 'A' nucleotide as expected
export wd="/g/data/lf10/as7425/R2DTool_demo"; mkdir -p ${wd} 2>/dev/null
export genome="/g/data/lf10/as7425/genomes/human_genome/ensembl_release_110/Homo_sapiens.GRCh38.cdna.all.fa"
tail -n +2 ${wd}/R2D_input.bed > ${wd}/R2D_input_noheader.bed
bedtools getfasta -s -fi ${genome} -bed ${wd}/R2D_input_noheader.bed | tail -n +2 | awk 'NR%2==1' | sort | uniq -c | sort -nr > ${wd}/input_sequence_context.txt
cat ${wd}/input_sequence_context.txt

# 336160 A
# 782 T
# 333 G
# 300 C


##################################################

# run R2Dtool

export wd="/g/data/lf10/as7425/R2DTool_demo";
export sites="${wd}/R2D_input.bed"
export anno="/g/data/lf10/as7425/genomes/human_genome/ensembl_release_110/Homo_sapiens.GRCh38.110.chr_patch_hapl_scaff.gtf"

# R2Dtool rust implementation
cd ~/R2Dtool && rm -rf target; cargo build --release
export PATH="${PATH}:/home/150/as7425/R2Dtool/target/release/"

time r2d annotate -i "${sites}" -g ${anno} -H > "${wd}/methylation_calls_annotated.bed"
time r2d liftover -i "${sites}" -g ${anno} -H > "${wd}/methylation_calls_annotated_liftover.bed"

##################################################

# check sequence context in the liftover

export genome="/g/data/lf10/as7425/genomes/human_genome/ensembl_release_110/Homo_sapiens.GRCh38.dna_sm.toplevel.fa"
tail -n +2 "${wd}/methylation_calls_annotated_liftover.bed" | cut -f1-6 | awk -F "\t" '($6 == "-")' > "${wd}/methylation_calls_annotated_liftover_noheader.bed"
bedtools getfasta -s -fi ${genome} -bed "${wd}/methylation_calls_annotated_liftover_noheader.bed" | tail -n +2 | awk 'NR%2==1' | sort | uniq -c | sort -nr > ${wd}/liftover_sequence_context.txt
cat ${wd}/liftover_sequence_context.txt

##################################################

# make R2Dtool plots

time Rscript ~/R2Dtool/scripts/R2_plotMetaTranscript.R "${wd}/methylation_calls_annotated.bed" "${wd}/metatranscript_out_Rust.svg" "frac_mod" "9" "upper" -c loess -l -o "${wd}/transcript_data_Rust.tsv"
time Rscript ~/R2Dtool/scripts/R2_plotMetaJunction.R "${wd}/methylation_calls_annotated.bed" "${wd}/metajunction_out_binom_rust.svg" "frac_mod" "9" "upper" -c loess -o "$wd/table"


##################################################
74 changes: 42 additions & 32 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,14 @@
use clap::{Arg,Command};
use std::process::Command as ProcessCommand;
use std::env;
use std::path::{Path,PathBuf};
use path_absolutize::Absolutize;

// modules
pub mod parse_annotation;
pub mod annotate;
pub mod liftover;
pub mod parse_gtf;
use std::path::Path;

#[cfg(test)]
mod tests;

const RELATIVE_SCRIPT_PATH: &str = "../../scripts/";

Expand Down Expand Up @@ -376,7 +374,7 @@ fn main() {
println!("Arguments being passed to R2_plotMetaTranscript.R:");
println!("Rscript R2_plotMetaTranscript.R {}", args.join(" "));

match generate_r_plots("R2_plotMetaTranscript.R", &args) {
match generate_r_plots("R2_plotMetaTranscript.R", &args, None) {
Ok(_) => println!("PlotMetaTranscript generated successfully."),
Err(e) => {
eprintln!("Error: {}", e);
Expand Down Expand Up @@ -406,7 +404,7 @@ fn main() {
println!("Arguments being passed to R2_plotMetaJunction.R:");
println!("Rscript R2_plotMetaJunction.R {}", args.join(" "));

match generate_r_plots("R2_plotMetaJunction.R", &args) {
match generate_r_plots("R2_plotMetaJunction.R", &args, None) {
Ok(_) => println!("PlotMetaJunction generated successfully."),
Err(e) => {
eprintln!("Error: {}", e);
Expand All @@ -416,37 +414,49 @@ fn main() {
}
}

// plotMetaCodon
// plotMetaCodon
if let Some(matches) = matches.subcommand_matches("plotMetaCodon") {
let codon_flag = if matches.get_flag("start_codon") { "-s" } else { "-e" };
let codon_flag = if matches.get_flag("start_codon") { "-s" } else { "-e" };

let mut args: Vec<String> = vec![
matches.get_one::<String>("input_file").unwrap().clone(),
matches.get_one::<String>("output_file").unwrap().clone(),
matches.get_one::<String>("field_name").unwrap().clone(),
matches.get_one::<String>("cutoff_value").unwrap().clone(),
matches.get_one::<String>("cutoff_type").unwrap().clone(),
codon_flag.to_string(),
];
let current_dir = std::env::current_dir().expect("Failed to get current directory");
let input_file = current_dir.join(matches.get_one::<String>("input_file").unwrap())
.canonicalize()
.expect("Failed to canonicalize input file path");
let output_file_path = PathBuf::from(matches.get_one::<String>("output_file").unwrap());
let output_file = output_file_path.absolutize()
.expect("Failed to absolutize output file path");

if let Some(method) = matches.get_one::<String>("confidence_method") {
args.extend_from_slice(&["-c".to_string(), method.clone()]);
}
let mut args: Vec<String> = vec![
input_file.to_string_lossy().into_owned(),
output_file.to_string_lossy().into_owned(),
matches.get_one::<String>("field_name").unwrap().clone(),
matches.get_one::<String>("cutoff_value").unwrap().clone(),
matches.get_one::<String>("cutoff_type").unwrap().clone(),
];

if let Some(table) = matches.get_one::<String>("save_table") {
args.extend_from_slice(&["-o".to_string(), table.clone()]);
}
args.push(codon_flag.to_string());

println!("Arguments being passed to R2_plotMetaCodon.R:");
println!("Rscript R2_plotMetaCodon.R {}", args.join(" "));

match generate_r_plots("R2_plotMetaCodon.R", &args) {
Ok(_) => println!("PlotMetaCodon generated successfully."),
Err(e) => {
eprintln!("Error: {}", e);
eprintln!("PlotMetaCodon generation failed. Please check the error messages above.");
std::process::exit(1);
}
if let Some(method) = matches.get_one::<String>("confidence_method") {
args.extend_from_slice(&["-c".to_string(), method.clone()]);
}

if let Some(table) = matches.get_one::<String>("save_table") {
let save_table_path_buf = PathBuf::from(table);
let save_table_path = save_table_path_buf.absolutize()
.expect("Failed to absolutize save table path");
args.extend_from_slice(&["-o".to_string(), save_table_path.to_string_lossy().into_owned()]);
}

println!("Arguments being passed to R2_plotMetaCodon.R:");
println!("Rscript R2_plotMetaCodon.R {}", args.join(" "));

match generate_r_plots("R2_plotMetaCodon.R", &args, None) {
Ok(_) => println!("PlotMetaCodon generated successfully."),
Err(e) => {
eprintln!("Error: {}", e);
eprintln!("PlotMetaCodon generation failed. Please check the error messages above.");
std::process::exit(1);
}
}
}
}
1 change: 1 addition & 0 deletions src/tests/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
mod plot_integration_tests;
Loading

0 comments on commit 0f274bd

Please sign in to comment.