-
Notifications
You must be signed in to change notification settings - Fork 2
/
bed2binCoverage
executable file
·103 lines (92 loc) · 4.02 KB
/
bed2binCoverage
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/bin/bash
#PBS -l nodes=1:ppn=4
GENOME="mm9"
WINDOW=500
BIN=20
#### usage ####
usage() {
echo
echo Program: "bed2binCoverage (compute expression for input BED coordinates divided into bins)"
echo Author: BRIC, University of Copenhagen, Denmark
echo Version: 1.0
echo Contact: pundhir@binf.ku.dk
echo "Usage: bed2binExpr -i <file> -j <file> [OPTIONS]"
echo "Options:"
echo " -i <coor> [input file in BED format (can be stdin)]"
echo " -j <file> [input mapped reads in BAM format]"
echo " [if multiple seperate by a comma]"
echo "[OPTIONS]"
echo " -g <string> [genome (default: mm9)]"
echo " -w <int> [window size (default: 500)]"
echo " -n <int> [bin size within the window (default: 20)]"
echo " -r [require same strandedness (upstream: -; downstream: +)]"
echo " -h [help]"
echo
exit 0
}
#### parse options ####
while getopts i:j:g:w:n:rh ARG; do
case "$ARG" in
i) BEDFILE=$OPTARG;;
j) BAMFILES=$OPTARG;;
g) GENOME=$OPTARG;;
w) WINDOW=$OPTARG;;
n) BIN=$OPTARG;;
r) SAME_STRAND=1;;
h) HELP=1;;
esac
done
## usage, if necessary file and directories are given/exist
if [ -z "$BEDFILE" -o -z "$BAMFILES" -o "$HELP" ]; then
usage
fi
###################
#helperfunction
function wait_for_jobs_to_finish {
for job in `jobs -p`
do
echo $job
wait $job
done
echo $1
}
###############
## populating files based on input genome
if [ "$GENOME" == "mm9" ]; then
GENOME_FILE="/home/pundhir/project/genome_annotations/mouse.mm9.genome"
elif [ "$GENOME" == "hg19" ]; then
GENOME_FILE="/home/pundhir/project/genome_annotations/human.hg19.genome"
else
echo "Presently the program only support analysis for mm9 or hg19"
echo
usage
fi
## create temporary BED file if input is from stdin
if [ "$BEDFILE" == "stdin" ]; then
TMP=$(cat /dev/urandom | tr -dc 'a-zA-Z0-9' | fold -w 32 | head -n 1)
while read LINE; do
echo ${LINE}
done | perl -ane '$line=""; foreach(@F) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";' > $TMP
BEDFILE=$TMP
fi
## parse input bam files in an array
oIFS=$IFS
IFS=","
BAMFILES_ARR=($BAMFILES)
BAMFILES_COUNT=${#BAMFILES_ARR[@]}
IFS=$oIFS
BIN_COUNT=$(($((WINDOW*2))/$BIN))
if [ "$WINDOW" -eq "$BIN" ]; then
if [ -z "$SAME_STRAND" ]; then
paste $BEDFILE <(paste <(cat $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -l -s | bed2coverage -i stdin -j $BAMFILES -m -d -g $GENOME | perl -ane '$start=scalar(@F)-'$BAMFILES_COUNT'; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";') <(cat $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -r -s | bed2coverage -i stdin -j $BAMFILES -m -d -g $GENOME | perl -ane '$start=scalar(@F)-'$BAMFILES_COUNT'; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";'))
else
paste $BEDFILE <(paste <(cat $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -l -s -m | bed2coverage -i stdin -j $BAMFILES -m -d -g $GENOME -r | perl -ane '$start=scalar(@F)-'$BAMFILES_COUNT'; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";') <(cat $BEDFILE | bed2window -i stdin -w 0 -s | bed2window -i stdin -w $WINDOW -r -s -m | bed2coverage -i stdin -j $BAMFILES -m -d -g $GENOME -r | perl -ane '$start=scalar(@F)-'$BAMFILES_COUNT'; $end=scalar(@F)-1; $line=""; foreach(@F[$start..$end]) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n";'))
fi
else
paste $BEDFILE <(while read LINE; do
echo $LINE | bed2window -i stdin -w $WINDOW -s | bedtools makewindows -b stdin -n $BIN_COUNT | bed2coverage -i stdin -j $BAMFILES -m -d -g $GENOME | perl -ane 'BEGIN { @expr=(); } $start=scalar(@F)-'$BAMFILES_COUNT'; $end=scalar(@F)-1; push(@expr, @F[$start..$end]); END { $line=""; foreach(@expr) { $line.="$_\t"; } $line=~s/\t$//g; print "$line\n"; }'
done < $BEDFILE)
fi
if [ ! -z "$TMP" ]; then
rm $TMP
fi