Skip to content

Commit

Permalink
multithreading was buggy + downstream issues with diff splicing analysis
Browse files Browse the repository at this point in the history
  • Loading branch information
Anoushka committed Oct 13, 2021
1 parent 18e8caa commit f5f023d
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 10 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: scisorseqr
Type: Package
Title: Processes long reads for differential isoform analysis
Version: 0.1.3
Version: 0.1.4
Author: Anoushka Joglekar
Maintainer: Anoushka Joglekar <anj2026@med.cornell.edu>
Description: For any comparative (e.g. case-control) studies of alternative splicing,
Expand Down
6 changes: 6 additions & 0 deletions R/ExonQuant.R
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,10 @@ ExonQuant <- function(allInfoFile = 'LongReadInfo/AllInfo_IncompleteReads.gz',
countExons <- paste("Rscript", R_file, allInfoFile, groupingFactor, numThreads, threshold)
system(countExons)

concatComm <- "awk 'FNR==1 && NR!=1{next;}{print}' ExonQuantOutput/PID*.tsv > ExonQuantOutput/InclusionExclusionCounts.tsv"
system(concatComm)

rmComm = "rm ExonQuantOutput/PID*.tsv"
system(rmComm)

}
15 changes: 9 additions & 6 deletions inst/RScript/ExonCounting.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,14 @@ internalExons = allInfo_SE %>% tidyr::separate_rows(Exons,sep = ";%;") %>% dplyr
uniqExons <- internalExons %>% dplyr::ungroup() %>% dplyr::select(Exons, Gene) %>% dplyr::distinct()

inclusionCounts <- internalExons %>% dplyr::ungroup() %>% dplyr::select(Exons,Gene,all_of(groupingFactor)) %>%
dplyr::group_by(Exons,Gene,.dots=groupingFactor) %>% dplyr::add_count(name = "Inclusion") %>%
dplyr::group_by(Exons,Gene,.dots=groupingFactor) %>% dplyr::add_count(name = "inclusion") %>%
dplyr::distinct() %>% as.data.frame()

readSE <- internalExons %>% dplyr::select(Read,Gene,all_of(groupingFactor),start,end) %>% dplyr::distinct() %>% as.data.frame()
readSE <- internalExons %>% dplyr::select(Read,Gene,all_of(groupingFactor),start,end) %>%
dplyr::distinct() %>% as.data.frame()

checkSpanningReads <- function(gene){
tid <- as.character(Sys.getpid())
exons <- uniqExons %>% dplyr::filter(Gene == gene) %>%
tidyr::separate(Exons,into=c("chr","s","e","strand"),sep = "_",remove=FALSE)
reads <- readSE %>% dplyr::filter(Gene == gene)
Expand All @@ -48,13 +50,14 @@ checkSpanningReads <- function(gene){
inclusionReads <- inclusionCounts %>% dplyr::filter(Gene == gene)
inc_tot <- dplyr::right_join(inclusionReads,spanningReads,by = c('Exons',"Gene",groupingFactor)) %>%
replace(is.na(.),0) %>%
dplyr::mutate(PSI = Inclusion/Total, Exclusion = Total-Inclusion) %>% as.data.frame()
fName <- file.path(outDir,"InclusionExclusionCounts.tsv")
dplyr::mutate(PSI = inclusion/Total, exclusion = Total-inclusion) %>% as.data.frame()
inc_tot <- inc_tot[,c(1:4,7,5,6)]
fName <- file.path(outDir,paste0("PID_",tid,"_InclusionExclusionCounts.tsv"))
if(file.exists(fName)){
write.table(inc_tot,file.path(outDir,"InclusionExclusionCounts.tsv"),sep ="\t",
write.table(inc_tot,fName,sep ="\t",
append= T, quote = F, row.names = F, col.names = FALSE)
} else{
write.table(inc_tot,file.path(outDir,"InclusionExclusionCounts.tsv"),sep ="\t",
write.table(inc_tot,fName,sep ="\t",
quote = F, row.names = F, col.names = TRUE)
}
return
Expand Down
2 changes: 1 addition & 1 deletion inst/RScript/ExonTest.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
args <- commandArgs(trailingOnly=TRUE)


inclusionFile <- data.table::fread(args[1])[,-6]
inclusionFile <- data.table::fread(args[1])

comps <- c(args[2],args[4])

Expand Down
3 changes: 1 addition & 2 deletions inst/bash/minimap2comm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,13 @@ n=`cat Misc/fastqGuide | wc -l` ; for i in `seq 1 $n` ; do name=`cat Misc/fastqG
echo "### treating "$name >> $mmOut/REPORT.minimap2 ; \
file=`cat Misc/fastqGuide | head -$i | tail -1 | awk '{print $2}'` ; \
$progPath -t $numThreads -ax splice --secondary=no $genomeFa \
$file > $mmOut/$name.sam ; done &>> $mmOut/REPORT.minimap2
$file > $mmOut/$name.sam ; done >> $mmOut/REPORT.minimap2 2>&1

for i in $(ls $mmOut/*.sam) ; \
do name=`echo $i | awk '{n=split($1,a,/\/|.sam/); print a[n-1]}'`; \
samtools view -bh $i -o $mmOut/$name.bam ; \
rm $mmOut/$name.sam; \
done


## Create a bam guide for further processing
for i in $(ls $mmOut/*.bam) ; do echo $i | awk -v path=$(pwd) '{print path"/"$1}' ; done > Misc/bamGuide

0 comments on commit f5f023d

Please sign in to comment.