forked from yhwu/makeUCSCHubFromBedFiles
-
Notifications
You must be signed in to change notification settings - Fork 0
/
GTSP2BED_fromFolder.R
110 lines (85 loc) · 3.7 KB
/
GTSP2BED_fromFolder.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#' Generate bed file from a GTSP sample folder
#' Rscript ~/makeUCSCHubFromBedFiles/GTSPFolder2BED.R GTSP0515-10
#' To generate all bed files, run
#' for i in $(ls -d GTSP*/); do echo ${i%%//}; Rscript ~/makeUCSCHubFromBedFiles/GTSP2BED_fromFolder.R ${i%%//}; done
options(stringsAsFactors=FALSE)
#' set all argumentgs for the script
#' @return list of argumentgs
#' @example set_args()
#' set_args(c("~", "-g=test"))
#' Rscript ~/intSiteUploader/intSiteUploader.R
set_args <- function(...) {
## arguments from command line
suppressMessages(library(argparse))
parser <- ArgumentParser(description="generate bed file from a sample folder")
parser$add_argument("workDir", nargs='?',
default='.',
help="Result sample folder from intSiteCaller")
parser$add_argument("-t", "--type", nargs=1,
default='all',
help="Type of sites: uniq, multi, all(default)")
args <- parser$parse_args(...)
args$workDir <- normalizePath(args$workDir, mustWork=TRUE)
stopifnot(args$type %in% c("uniq", "multi", "all"))
return(args)
}
##set_args()
args <- set_args()
write.table(t(as.data.frame(args)), col.names=FALSE, quote=FALSE, sep="\t")
libs <- c("dplyr", "RMySQL", "stats", "GenomicRanges",
"BiocGenerics", "parallel", "IRanges", "GenomeInfoDb")
null <- suppressMessages(sapply(libs, library, character.only=TRUE))
## load uniq hits sites
load(file.path(args$workDir, "sites.final.RData"))
load(file.path(args$workDir, "allSites.RData"))
sites <- data.frame(
"sampleID"=sites.final$sampleName,
"chr"=as.character(seqnames(sites.final)),
"strand"=as.character(strand(sites.final)),
"position"=start(flank(sites.final, -1, start=T)),
"idx"=seq(sites.final))
gtspid <- unique(sites.final$sampleName)
stopifnot(length(gtspid)==1)
stopifnot(grepl(gtspid, args$wordDir))
revmap <- sites.final$revmap
allBreakpoint <- start(flank(allSites, -1, start=F))
bp <- plyr::ldply(seq(revmap), function(i)
data.frame(
"idx"=i,
"breakpoint"=allBreakpoint[revmap[[i]]] ) )
bp <- bp %>% dplyr::group_by(idx, breakpoint) %>%
dplyr::mutate(count=n()) %>%
dplyr::distinct()
sites.uniq <- merge(sites, bp, by="idx")
## load multi hits sites
load(file.path(args$workDir, "multihitData.RData"))
sites.multi <- data.frame(
"chr"=as.character(seqnames(multihitData[[1]])),
"strand"=as.character(strand(multihitData[[1]])),
"position"=start(flank(multihitData[[1]], -1, start=T)),
"breakpoint"=start(flank(multihitData[[1]], -1, start=FALSE)))
sites.multi <- dplyr::distinct(sites.multi)
## write uniq, multi, all sites to bed file
write_bed <- function(allsites, fileName) {
allsites <- dplyr::select(allsites, chr, strand, position, breakpoint)
## write to file
bed <- plyr::arrange(allsites, chr, position, strand, breakpoint)
bed <- data.frame(chrom=as.character(bed$chr),
chromStart=as.integer(pmin(bed$position, bed$breakpoint)),
chromEnd=as.integer(pmax(bed$position, bed$breakpoint)),
name=args$workDir,
score=500,
strand=as.character(bed$strand))
write.table(bed, file=fileName,
quote=FALSE, sep="\t", row.names=FALSE, col.names=FALSE)
message("File written to ", fileName)
}
allsites <- sites.uniq
fileName <- sprintf("%s%s.bed", gtspid, "uniq")
write_bed(allsites, fileName)
allsites <- sites.multi
fileName <- sprintf("%s%s.bed", gtspid, "multi")
write_bed(allsites, fileName)
allsites <- plyr::rbind.fill(sites.uniq, sites.multi)
fileName <- sprintf("%s%s.bed", gtspid, "all")
write_bed(allsites, fileName)