-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* add merge of external counts functionality * add the citation to nature communications * fix rd man page linking * fix minor warnings in BiocCheck/RCheck * fix/catch biomaRt specific issues
- Loading branch information
Showing
18 changed files
with
277 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
#' | ||
#' Merge external data | ||
#' | ||
#' To boost its own sequencing data, one can download existing and precounted | ||
#' data. This function merges the existing \code{FraserDataSet} with | ||
#' external count data. | ||
#' | ||
#' For more details on existing datasets have a look at: | ||
#' <https://github.com/gagneurlab/drop#datasets> | ||
#' | ||
#' Since FRASER can not hand NA values, the merge will return only the | ||
#' intersecting regions and will drop any non overlapping features. This has to | ||
#' be kept in mind when analysing rare disease samples. | ||
#' | ||
#' @param fds A \code{FraserDataSet} | ||
#' @param countFiles A character vector of file names pointing to the external | ||
#' count data. The vector has to be names or the files have to start | ||
#' with \code{k_j}, \code{k_theta}, \code{n_psi3}, \code{n_psi5}, | ||
#' \code{n_theta}. | ||
#' @param sampleIDs The samples to be merged from the external data. | ||
#' @param annotation A sample annotation of the external data (optional). | ||
#' | ||
#' @return Merged \code{FraserDataSet} object. | ||
#' | ||
#' @examples | ||
#' anno <- fread(system.file("extdata", "externalCounts", | ||
#' "annotation.tsv.gz", package="FRASER")) | ||
#' ctsFiles <- list.files(full.names = TRUE, pattern="counts", | ||
#' system.file("extdata", "externalCounts", package="FRASER")) | ||
#' | ||
#' fds <- createTestFraserDataSet() | ||
#' fds_merged <- mergeExternalData(fds, ctsFiles, anno[,sampleID], anno) | ||
#' | ||
#' K(fds, "psi5") | ||
#' K(fds_merged, "psi5") | ||
#' | ||
#' @export | ||
mergeExternalData <- function(fds, countFiles, sampleIDs, annotation=NULL){ | ||
|
||
# check input | ||
checkFraserDataSet(fds) | ||
checkCountData(fds) | ||
|
||
if(length(countFiles) != 5){ | ||
stop("You have to provide 5 files, but only ", length(countFiles), | ||
" were provided.") | ||
} | ||
if(is.null(names(countFiles))){ | ||
names(countFiles) <- gsub("(_counts)?.tsv.gz", "", basename(countFiles)) | ||
} | ||
reqNames <- c("k_j", "k_theta", "n_psi3", "n_psi5", "n_theta") | ||
if(any(!reqNames %in% unique(names(countFiles)))){ | ||
stop("Please name the input or the files correctly. We are missing: ", | ||
paste(collapse=", ", | ||
reqNames[!reqNames %in% names(countFiles)])) | ||
} | ||
if(any(!file.exists(countFiles))){ | ||
stop("Provided files are missing! We are missing: ", | ||
paste(collapse=", ", countFiles[!file.exists(countFiles)])) | ||
} | ||
if(any(unique(sampleIDs) != sampleIDs)){ | ||
stop("Provided sampleIDs have to be unique!") | ||
} | ||
sampleIDs <- as.character(sampleIDs) | ||
|
||
# load external counts | ||
message("Loading provided counts") | ||
names(reqNames) <- reqNames | ||
extCts <- lapply(reqNames, function(id){ | ||
gr <- makeGRangesFromDataFrame(fread(countFiles[id]), | ||
keep.extra.columns=TRUE) | ||
if(any(!sampleIDs %in% colnames(mcols(gr)))){ | ||
stop("Can not find provided sampleID in count data. Missing IDs: ", | ||
paste(collapse=", ", | ||
sampleIDs[!sampleIDs %in% colnames(mcols(gr))])) | ||
} | ||
gr[,sampleIDs] | ||
}) | ||
stopifnot(all(granges(extCts[['k_j']]) == granges(extCts[['n_psi3']]))) | ||
stopifnot(all(granges(extCts[['k_j']]) == granges(extCts[['n_psi5']]))) | ||
stopifnot(all(granges(extCts[['k_theta']]) == granges(extCts[['n_theta']]))) | ||
|
||
# | ||
# merging colData | ||
# | ||
message(date(), ": Merging data ...") | ||
if(!is.null(annotation)){ | ||
annotation <- DataFrame(annotation) | ||
} else { | ||
annotation <- DataFrame(sampleID=as.character(sampleIDs)) | ||
} | ||
rownames(annotation) <- annotation[,"sampleID"] | ||
newColData <- DataFrame(rbind(fill=TRUE, | ||
as.data.table(colData(fds)), | ||
as.data.table(annotation[sampleIDs,]))) | ||
rownames(newColData) <- newColData[,"sampleID"] | ||
|
||
# | ||
# merge psi5/psi3 data | ||
# | ||
extractExtData <- function(fds, countFun, type, ov, extData, extName){ | ||
ans <- as.matrix(cbind(countFun(fds, type=type)[from(ov),], | ||
mcols(extData[[extName]])[to(ov),])) | ||
mode(ans) <- "integer" | ||
ans | ||
} | ||
|
||
# find overlap | ||
if(all(strand(rowRanges(fds, type="j")) == "*")){ | ||
for(id in reqNames){ | ||
strand(extCts[[id]]) <- "*" | ||
} | ||
} | ||
ov <- findOverlaps(rowRanges(fds, type="j"), extCts[['k_j']], type="equal") | ||
|
||
newCtsK_J <- extractExtData(fds, K, "j", ov, extCts, "k_j") | ||
newCtsN_psi5 <- extractExtData(fds, N, "psi5", ov, extCts, "n_psi5") | ||
newCtsN_psi3 <- extractExtData(fds, N, "psi3", ov, extCts, "n_psi3") | ||
|
||
|
||
# | ||
# merge theta data | ||
# | ||
# find overlap | ||
ovss <- findOverlaps(rowRanges(fds, type="theta"), | ||
extCts[['k_theta']], type="equal") | ||
|
||
newCtsK_theta <- extractExtData(fds, K, "theta", ovss, extCts, "k_theta") | ||
newCtsN_theta <- extractExtData(fds, N, "theta", ovss, extCts, "n_theta") | ||
|
||
|
||
# | ||
# finalize merged FraserDataObject | ||
# | ||
nsr <- SummarizedExperiment( | ||
colData=newColData, | ||
assays=SimpleList( | ||
rawCountsSS=newCtsK_theta, | ||
rawOtherCounts_theta=newCtsN_theta - newCtsK_theta), | ||
rowRanges=rowRanges(fds, type="theta")[ | ||
from(ovss),c("spliceSiteID", "type")]) | ||
|
||
ans <- new("FraserDataSet", | ||
name = name(fds), | ||
bamParam = scanBamParam(fds), | ||
strandSpecific = strandSpecific(fds), | ||
workingDir = workingDir(fds), | ||
colData = newColData, | ||
assays=Assays(SimpleList( | ||
rawCountsJ=newCtsK_J, | ||
rawOtherCounts_psi5=newCtsN_psi5 - newCtsK_J, | ||
rawOtherCounts_psi3=newCtsN_psi3 - newCtsK_J)), | ||
nonSplicedReads = nsr, | ||
rowRanges = rowRanges(fds)[from(ov),c("startID", "endID")], | ||
elementMetadata = DataFrame(newCtsK_J[,integer(0)])) | ||
|
||
# | ||
# compute new psi values | ||
# | ||
ans <- calculatePSIValues(ans) | ||
|
||
ans | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,24 +1,24 @@ | ||
citHeader("To cite FRASER in publications please use:") | ||
citEntry(entry = "article", | ||
title = "Detection of aberrant splicing events in RNA-seq data with FRASER", | ||
title = "Detection of aberrant splicing events in RNA-seq data using FRASER", | ||
author = personList( | ||
person("Christian", "Mertes"), | ||
person("Ines", "Scheller"), | ||
person(c("Ines", "F"), "Scheller"), | ||
person(c("Vicente", "A"), "Y\\'{e}pez"), | ||
person(c("Muhammed", "H"), "\\c{C}elik"), | ||
person("Yingjiqiong", "Liang"), | ||
person(c("Laura", "S"), "Kremer"), | ||
person("Mirjana", "Gusic"), | ||
person("Holger", "Prokisch"), | ||
person("Julien", "Gagneur")), | ||
journal = "biorxiv", | ||
volume = "", | ||
number = "", | ||
pages = "", | ||
year = "2019", | ||
issn = "", | ||
doi = "10.1101/2019.12.18.866830", | ||
journal = "Nature Communications", | ||
volume = "12", | ||
issue = "1", | ||
pages = "529", | ||
year = "2021", | ||
issn = "2041-1723", | ||
doi = "10.1038/s41467-020-20573-7", | ||
textVersion = paste( | ||
"Mertes C, Scheller I et al.,", | ||
"Detection of aberrant splicing events in RNA-seq data with FRASER.", | ||
"biorxiv, 2019, 10.1101/2019.12.18.866830.") ) | ||
"Mertes, C., Scheller, I.F., Yepez, V.A. et al.", | ||
"Detection of aberrant splicing events in RNA-seq data using FRASER.", | ||
"Nat Commun 12, 529 2021. https://doi.org/10.1038/s41467-020-20573-7.") ) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Oops, something went wrong.