Skip to content

Commit

Permalink
Merge pull request #1747 from milaboratory/sample-sheet-with-mitool
Browse files Browse the repository at this point in the history
integrate mitool with sample-sheet
  • Loading branch information
gnefedev authored Aug 13, 2024
2 parents 9eb9142 + 64ea597 commit b7ea937
Show file tree
Hide file tree
Showing 5 changed files with 86 additions and 38 deletions.
42 changes: 42 additions & 0 deletions itests/case029-two-level-split.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env bash

# Sample barcode integration test

assert() {
expected=$(echo -ne "${2:-}")
result="$(eval 2>/dev/null $1)" || true
result="$(sed -e 's/ *$//' -e 's/^ *//' <<<"$result")"
if [[ "$result" == "$expected" ]]; then
return
fi
result="$(sed -e :a -e '$!N;s/\n/\\n/;ta' <<<"$result")"
[[ -z "$result" ]] && result="nothing" || result="\"$result\""
[[ -z "$2" ]] && expected="nothing" || expected="\"$2\""
echo "expected $expected got $result for" "$1"
exit 1
}

set -euxo pipefail

cp single_cell_vdj_t_subset_R1.fastq.gz single_cell_vdj_t_subset_1_R1.fastq.gz
cp single_cell_vdj_t_subset_R2.fastq.gz single_cell_vdj_t_subset_1_R2.fastq.gz
cp single_cell_vdj_t_subset_R1.fastq.gz single_cell_vdj_t_subset_2_R1.fastq.gz
cp single_cell_vdj_t_subset_R2.fastq.gz single_cell_vdj_t_subset_2_R2.fastq.gz


echo -e 'Sample\tTagPattern\tCELL*' > 10x-samplesheet.tsv
echo -e 'A\t\tGGATTACTCATTGCCC' >> 10x-samplesheet.tsv
echo -e 'A\t\tCTGAAGTTCAAGGTAA' >> 10x-samplesheet.tsv
echo -e 'B\t\tAACTCCCAGATCCTGT' >> 10x-samplesheet.tsv

mixcr analyze 10x-sc-xcr-vdj \
--species hs \
--sample-sheet 10x-samplesheet.tsv \
single_cell_vdj_t_subset_{{SREPLICA}}_{{R}}.fastq.gz \
output_with_two_level_split/

[[ -f output_with_two_level_split/1.A.clones.tsv ]] || exit 1
[[ -f output_with_two_level_split/1.B.clones.tsv ]] || exit 1
[[ -f output_with_two_level_split/2.A.clones.tsv ]] || exit 1
[[ -f output_with_two_level_split/2.B.clones.tsv ]] || exit 1

5 changes: 3 additions & 2 deletions regression/reports/baseSingleCell.raw.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
align:
type: alignerReport
commandLine: align -f --report baseSingleCell.raw.align.report.txt --json-report
baseSingleCell.raw.align.report.json --preset 10x-vdj-bcr --assemble-contigs-by-cells
--species hs baseSingleCell.raw.consensus.4.mic baseSingleCell.raw.alignments.vdjca
baseSingleCell.raw.align.report.json --preset 10x-vdj-bcr --save-output-file-names
baseSingleCell.raw.align.list.tsv --assemble-contigs-by-cells --species hs baseSingleCell.raw.consensus.4.mic
baseSingleCell.raw.alignments.vdjca
inputFiles:
- baseSingleCell.raw.consensus.4.mic
outputFiles:
Expand Down
5 changes: 3 additions & 2 deletions regression/reports/baseSingleCell.vdjcontigs.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
align:
type: alignerReport
commandLine: align -f --report baseSingleCell.vdjcontigs.align.report.txt --json-report
baseSingleCell.vdjcontigs.align.report.json --preset 10x-vdj-bcr --species hs
--assemble-contigs-by VDJRegion baseSingleCell.vdjcontigs.consensus.4.mic baseSingleCell.vdjcontigs.alignments.vdjca
baseSingleCell.vdjcontigs.align.report.json --preset 10x-vdj-bcr --save-output-file-names
baseSingleCell.vdjcontigs.align.list.tsv --species hs --assemble-contigs-by VDJRegion
baseSingleCell.vdjcontigs.consensus.4.mic baseSingleCell.vdjcontigs.alignments.vdjca
inputFiles:
- baseSingleCell.vdjcontigs.consensus.4.mic
outputFiles:
Expand Down
69 changes: 38 additions & 31 deletions src/main/kotlin/com/milaboratory/mixcr/cli/CommandAnalyze.kt
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ object CommandAnalyze {
}

planBuilder.executeSteps(dryRun)
// Taking into account that there are multiple outputs from the mitool parse command.
planBuilder.setActualOutputs(sampleFileList.toPath())

pipeline
Expand All @@ -409,31 +410,29 @@ object CommandAnalyze {
}

// TODO when MiTool will support sample tags from reads, recombine all mitool inputs in one align command
val sampleFileList = if (pipeline.first() != parse && bundle.align!!.splitBySample && !dryRun) {
// Adding an option to save output files by align
outputFolder
.resolve("${outputNamePrefix.dotAfterIfNotBlank()}align.list.tsv")
.also { it.deleteIfExists() }
.toFile().also { it.deleteOnExit() }
} else {
null
}
val extraAlignArgs: List<String> = buildList {
sampleFileList?.let { sampleFileList ->
this += listOf(SAVE_OUTPUT_FILE_NAMES_OPTION, sampleFileList.toString())
}
if (strictMatching) {
this += STRICT_SAMPLE_NAME_MATCHING_OPTION
}
referenceForCram?.let { referenceForCram ->
this += listOf(BAMReader.referenceForCramOption, referenceForCram.toString())
}
}

planBuilder.addStep(align) { _, _, _ ->
val sampleFileListFiles = mutableMapOf<String, Path>()

planBuilder.addStep(align) { _, _, sampleName ->
buildList {
this += listOf("--preset", presetName)
this += extraAlignArgs
if (bundle.align!!.splitBySample && !dryRun) {
// Adding an option to save output files by align
val sampleFileList = outputFolder
.resolve("${outputNamePrefix.dotAfterIfNotBlank()}${sampleName.dotAfterIfNotBlank()}align.list.tsv")
.also { it.deleteIfExists() }
.toFile().also { it.deleteOnExit() }
sampleFileListFiles[sampleName] = sampleFileList.toPath()
this += listOf(SAVE_OUTPUT_FILE_NAMES_OPTION, sampleFileList.toString())
}

if (strictMatching) {
this += STRICT_SAMPLE_NAME_MATCHING_OPTION
}
referenceForCram?.let { referenceForCram ->
this += listOf(BAMReader.referenceForCramOption, referenceForCram.toString())
}

this += mixins.flatMap { it.cmdArgs }
this += pathsForNotAligned.argsOfNotAlignedForAlign()
if (pipeline.first() != parse)
Expand All @@ -443,9 +442,10 @@ object CommandAnalyze {

planBuilder.executeSteps(dryRun)

// Taking into account that there are multiple outputs from the align command
if (sampleFileList != null) {
planBuilder.setActualOutputs(sampleFileList.toPath())
// Taking into account that there are multiple outputs from the align command.
// Even so, mitool could split into several files and then align could split each too
if (sampleFileListFiles.isNotEmpty()) {
planBuilder.setActualOutputs(sampleFileListFiles)
}

// Adding all steps with calculations
Expand Down Expand Up @@ -519,12 +519,19 @@ object CommandAnalyze {
private var nextInputs: List<InputFileSet> = listOf(InputFileSet("", initialInputs.map { it.toString() }))
private val outputsForCommands = mutableListOf<Pair<AnalyzeCommandDescriptor<*, *>, List<InputFileSet>>>()

fun setActualOutputs(fileNamesList: Path) {
val lines = fileNamesList.readLines().drop(1).map { it.split("\t") }
nextInputs = when {
lines.isEmpty() -> emptyList()
else -> lines.map { line ->
InputFileSet(listToSampleName(line.drop(2)), listOf(outputFolder.resolve(line[0]).toString()))
fun setActualOutputs(outputFilesList: Path) {
setActualOutputs(mapOf("" to outputFilesList))
}

fun setActualOutputs(outputFilesList: Map<String, Path>) {
nextInputs = outputFilesList.flatMap { (prefix, file) ->
val withoutHeader = file.readLines().drop(1)
withoutHeader.map { it.split("\t") }.map { line ->
val sampleName = listToSampleName(line.drop(2))
InputFileSet(
"${prefix.dotAfterIfNotBlank()}$sampleName",
listOf(outputFolder.resolve(line[0]).toString())
)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,6 @@ abstract class MiXCRParamsResolver<P : Any>(
ValidationException.require(alignParams.headerExtractors.isEmpty()) {
"`headerExtractors` are not supported with mitool commands in pipeline"
}
ValidationException.require(alignParams.tagTransformationSteps.isEmpty()) {
"`tagTransformationSteps` are not supported with mitool commands in pipeline"
}
if (alignParams.parameters.isSaveOriginalReads) {
logger.warn { "Saving original reads with mitool commands in pipeline will lead to saving reads after mitool processing, not original ones" }
}
Expand Down

0 comments on commit b7ea937

Please sign in to comment.