-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from hackseq/master
Update fork from original
- Loading branch information
Showing
4 changed files
with
137 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,51 @@ | ||
runAbyss<-function(input, k) { | ||
name = paste("test_k", k, sep="") | ||
cmd = paste("abyss-pe", | ||
" k=", k, | ||
#" np=8" | ||
" name=", name, | ||
" in='", input, "'", | ||
sep = "") | ||
library("testthat") | ||
|
||
#' Runs Abyss | ||
#' | ||
#' Runs abyss with the specified parameters. | ||
#' @param input Path to the input fastq files seperated by space | ||
#' @param name The name of this assembly | ||
#' @param k size of a single k-mer in a k-mer pair (bp) | ||
#' @export | ||
runAbyss<-function(input, name, k) { | ||
outdir = paste(name, "_abyss_k", k, sep="") | ||
dir.create(file.path(".", "runs"), showWarnings = FALSE) | ||
dir.create(file.path("runs", outdir), showWarnings = FALSE) | ||
outdir <- paste("runs/", outdir, sep="") | ||
|
||
cmd <- paste("abyss-pe", | ||
" -C ", outdir, | ||
" k=", k, | ||
" name=", name, | ||
" in=\"", input, "\"", | ||
sep = "") | ||
print("Running:") | ||
print(cmd) | ||
|
||
t1 <- try(system(cmd, | ||
intern = TRUE, | ||
ignore.stderr = TRUE, | ||
ignore.stdout = TRUE), | ||
#ignore.stderr = TRUE, | ||
#ignore.stdout = TRUE | ||
), | ||
silent = TRUE) | ||
|
||
if (inherits(t1, "try-error")) { | ||
print("[FAILED]") | ||
return() | ||
} | ||
print("[DONE]") | ||
} | ||
|
||
|
||
testRunAbyss <- function() { | ||
#setwd("Hackseq2016/abyss") | ||
test_input = "../../../data/test-data/reads1.fastq ../../../data/test-data/reads2.fastq" | ||
runAbyss(test_input, 22) | ||
#' Runs Abyss for the test data | ||
#' | ||
#' @param k size of a single k-mer in a k-mer pair (bp) | ||
#' @export | ||
runAbyssTest <- function(k) { | ||
runAbyss("$PWD/data/test-data/reads1.fastq $PWD/data/test-data/reads2.fastq", | ||
"test", | ||
k) | ||
} | ||
|
||
testRunAbyss() | ||
runAbyssTest(k=22) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
k=32 | ||
|
||
# Report run time and memory usage | ||
export SHELL=zsh -opipefail | ||
export REPORTTIME=1 | ||
export TIMEFMT=time user=%U system=%S elapsed=%E cpu=%P memory=%M job=%J | ||
|
||
all: \ | ||
results/200k/k$k/hsapiens-scaffolds.fac.tsv \ | ||
results/200k.fac.tsv | ||
|
||
.DELETE_ON_ERROR: | ||
.SECONDARY: | ||
|
||
# Download the complete data. | ||
data/30CJCAAXX_4.fq.gz: | ||
mkdir -p $(@D) | ||
curl -o $@ ftp://ftp.bcgsc.ca/public/sjackman/$(@F) | ||
|
||
# Download a subset of the data. | ||
data/200k.fq.gz: | ||
mkdir -p $(@D) | ||
curl -o $@ ftp://ftp.bcgsc.ca/public/sjackman/$(@F) | ||
|
||
# Take a subset of the data. | ||
data/400k.fq.gz: data/30CJCAAXX_4.fq.gz | ||
gunzip -c $< | head -n1600000 | gzip >$@ | ||
|
||
# Unzip the data. | ||
%.fq: %.fq.gz | ||
gunzip -c $< >$@ | ||
|
||
# Assemble the data with ABySS. | ||
results/200k/k$k/hsapiens-scaffolds.fa: data/200k.fq | ||
mkdir -p $(@D) | ||
abyss-pe -C $(@D) name=hsapiens k=$k in=$(realpath $<) | ||
|
||
# Calculate the assembly contiguity metrics. | ||
%.fac.tsv: %.fa | ||
abyss-fac $< >$@ | ||
|
||
# Concatenate multiple TSV files. | ||
results/200k.fac.tsv: \ | ||
results/200k/k*/hsapiens-scaffolds.fac.tsv \ | ||
results/200k/k$k/hsapiens-scaffolds.fac.tsv | ||
mlr --tsvlite cat $^ >$@ | ||
datamash -H max N50 <$@ | ||
head -n1 $@; grep $$(datamash -H max N50 <$@ | tail -n1) $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# Manual Optimization | ||
|
||
Run ABySS multiple times, manually, for multiple values of *k*, and determine which assembly has the largest N50. | ||
|
||
# Usage | ||
|
||
```sh | ||
make k=24 | ||
make k=28 | ||
make k=32 | ||
make | ||
datamash -H max N50 <200k.fac.tsv | ||
head -n1 200k.fac.tsv; grep $(datamash -H max N50 <200k.fac.tsv | tail -n1) 200k.fac.tsv | ||
``` |