From 467d6d918e6967202f6eb728b506a11585dbc978 Mon Sep 17 00:00:00 2001 From: Bolotin Dmitriy Date: Mon, 20 May 2019 01:56:21 +0300 Subject: [PATCH 01/13] Next development cycle v3.0.8 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 68a6a5f70..0065c4cb2 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ com.milaboratory mixcr - 3.0.7 + 3.0.8-SNAPSHOT jar MiXCR From 160052ca2ff7477ce929d05e66c57af991fd7121 Mon Sep 17 00:00:00 2001 From: Bolotin Dmitriy Date: Fri, 31 May 2019 01:17:10 +0300 Subject: [PATCH 02/13] (from milib) Fixes exception in `-mutationsDetailed` and similar export options --- CHANGELOG_CURRENT | 1 + milib | 2 +- pom.xml | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG_CURRENT b/CHANGELOG_CURRENT index e69de29bb..264ebd6e3 100644 --- a/CHANGELOG_CURRENT +++ b/CHANGELOG_CURRENT @@ -0,0 +1 @@ +Fixes exception in `-mutationsDetailed` and similar export options \ No newline at end of file diff --git a/milib b/milib index cd564d4bf..d6221957b 160000 --- a/milib +++ b/milib @@ -1 +1 @@ -Subproject commit cd564d4bf5c81be66d0bf168ad4e520e5c00a7c1 +Subproject commit d6221957b45a6c57d74744177feef5b68c86829a diff --git a/pom.xml b/pom.xml index 0065c4cb2..73900eeb9 100644 --- a/pom.xml +++ b/pom.xml @@ -39,7 +39,7 @@ UTF-8 - 1.10 + 1.11-SNAPSHOT From 3ee19559f976e1d5b9dfd44c5d93eff9819689a8 Mon Sep 17 00:00:00 2001 From: Bolotin Dmitriy Date: Fri, 31 May 2019 02:02:45 +0300 Subject: [PATCH 03/13] MiLib upgrade (to prev) --- milib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milib b/milib index d6221957b..f71dc703a 160000 --- a/milib +++ b/milib @@ -1 +1 @@ -Subproject commit d6221957b45a6c57d74744177feef5b68c86829a +Subproject commit f71dc703a4b61a3f16f31cc55a09f4e4a6075d65 From be747119c3bd24e865d10d61389345c60812ee3b Mon Sep 17 00:00:00 2001 From: Stanislav Poslavsky Date: Wed, 19 Jun 2019 19:00:29 +0300 Subject: [PATCH 04/13] fix command filterAlignments --- pom.xml | 2 +- .../com/milaboratory/mixcr/cli/CommandFilterAlignments.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pom.xml b/pom.xml index 73900eeb9..91d35006c 100644 --- a/pom.xml +++ b/pom.xml @@ -46,7 +46,7 @@ io.repseq repseqio - 1.3.1 + 1.3.2-SNAPSHOT com.milaboratory diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandFilterAlignments.java b/src/main/java/com/milaboratory/mixcr/cli/CommandFilterAlignments.java index b51a5bad3..890e54a77 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandFilterAlignments.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandFilterAlignments.java @@ -132,7 +132,7 @@ public AlignmentsFilter getFilter() { public ActionConfiguration getConfiguration() { return new FilterConfiguration(getChains(), chimerasOnly, - limit, getReadIds().toArray(), getContainFeature(), getCdr3Equals()); + limit, getReadIds() == null ? null : getReadIds().toArray(), getContainFeature(), getCdr3Equals()); } @Override @@ -145,7 +145,7 @@ public void run1() throws Exception { sReads = new CountLimitingOutputPort<>(sReads, limit); progress = SmartProgressReporter.extractProgress((CountLimitingOutputPort) sReads); } - writer.header(reader.getParameters(), reader.getUsedGenes(), null); + writer.header(reader.getParameters(), reader.getUsedGenes(), getFullPipelineConfiguration()); SmartProgressReporter.startProgressReport("Filtering", progress); int total = 0, passed = 0; final AlignmentsFilter filter = getFilter(); From 235c7fd103b4e223b3ebfce1b00dd1ba26ea9473 Mon Sep 17 00:00:00 2001 From: Bolotin Dmitriy Date: Mon, 8 Jul 2019 18:51:53 +0300 Subject: [PATCH 05/13] This fixes null-value override in JsonOverrider. Fixes `failed to override some parameter: {subCloningRegion=VRegion}` in `assembleContigs` --- src/main/java/com/milaboratory/mixcr/cli/JsonOverrider.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/main/java/com/milaboratory/mixcr/cli/JsonOverrider.java b/src/main/java/com/milaboratory/mixcr/cli/JsonOverrider.java index b07bc8738..410f12680 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/JsonOverrider.java +++ b/src/main/java/com/milaboratory/mixcr/cli/JsonOverrider.java @@ -205,6 +205,9 @@ else if (value.equalsIgnoreCase("false")) overrideWarn(fieldName, value); oNode.set(fieldName, NullNode.getInstance()); return true; + } else if (valueNode.isNull()) { + oNode.put(fieldName, value); + return true; } return false; } From 408e000773a6f7413bbd16c33c8904ee2a9cade0 Mon Sep 17 00:00:00 2001 From: Dmitry Bolotin Date: Thu, 11 Jul 2019 12:24:34 +0300 Subject: [PATCH 06/13] No C gene split in shotgun analyze pipeine. (#532) --- src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java b/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java index ca2b1187c..a3607ceef 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java @@ -847,8 +847,7 @@ CommandAlign mkAlign() { Collection pipelineSpecificAssembleParameters() { return Arrays.asList( "-OseparateByV=true", - "-OseparateByJ=true", - "-OseparateByC=true" + "-OseparateByJ=true" ); } From dc6156a9a4c078845c293de1c3cf297dbde33c13 Mon Sep 17 00:00:00 2001 From: Stanislav Poslavsky Date: Tue, 16 Jul 2019 15:14:22 +0300 Subject: [PATCH 07/13] fix align pipeline configuration with correcting feature to avoid reanalyzing each time --- .../milaboratory/mixcr/cli/CommandAlign.java | 67 +++++++++++-------- 1 file changed, 38 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java b/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java index 8c637b39c..b13ef7637 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java @@ -37,7 +37,10 @@ import cc.redberry.pipe.util.CountLimitingOutputPort; import cc.redberry.pipe.util.OrderedOutputPort; import cc.redberry.pipe.util.StatusReporter; -import com.fasterxml.jackson.annotation.*; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.milaboratory.cli.ActionConfiguration; import com.milaboratory.cli.PipelineConfiguration; import com.milaboratory.core.PairedEndReadsLayout; @@ -52,7 +55,10 @@ import com.milaboratory.core.io.sequence.fastq.SingleFastqReader; import com.milaboratory.core.io.sequence.fastq.SingleFastqWriter; import com.milaboratory.core.sequence.NucleotideSequence; -import com.milaboratory.mixcr.basictypes.*; +import com.milaboratory.mixcr.basictypes.SequenceHistory; +import com.milaboratory.mixcr.basictypes.VDJCAlignments; +import com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter; +import com.milaboratory.mixcr.basictypes.VDJCHit; import com.milaboratory.mixcr.util.MiXCRVersionInfo; import com.milaboratory.mixcr.vdjaligners.VDJCAligner; import com.milaboratory.mixcr.vdjaligners.VDJCAlignerParameters; @@ -209,6 +215,33 @@ public VDJCAlignerParameters getAlignerParameters() { throwValidationException("Failed to override some parameter: " + overrides); } + // Detect if automatic featureToAlign correction is required + VDJCLibrary library = getLibrary(); + + int totalV = 0, totalVErrors = 0, hasVRegion = 0; + GeneFeature correctingFeature = alignerParameters.getVAlignerParameters().getGeneFeatureToAlign().hasReversedRegions() ? + GeneFeature.VRegionWithP : + GeneFeature.VRegion; + + for (VDJCGene gene : library.getGenes(getChains())) { + if (gene.getGeneType() == GeneType.Variable) { + totalV++; + if (!alignerParameters.containsRequiredFeature(gene)) { + totalVErrors++; + if (gene.getPartitioning().isAvailable(correctingFeature)) + hasVRegion++; + } + } + } + + // Performing V featureToAlign correction if needed + if (totalVErrors > totalV * 0.9 && hasVRegion > totalVErrors * 0.8) { + warn("WARNING: forcing -OvParameters.geneFeatureToAlign=" + GeneFeature.encode(correctingFeature) + + " since current gene feature (" + GeneFeature.encode(alignerParameters.getVAlignerParameters().getGeneFeatureToAlign()) + ") is absent in " + + Util.PERCENT_FORMAT.format(100.0 * totalVErrors / totalV) + "% of V genes."); + alignerParameters.getVAlignerParameters().setGeneFeatureToAlign(correctingFeature); + } + return vdjcAlignerParameters = alignerParameters; } @@ -354,16 +387,7 @@ public void run1() throws Exception { // Getting aligner parameters VDJCAlignerParameters alignerParameters = getAlignerParameters(); - // Creating aligner - VDJCAligner aligner = VDJCAligner.createAligner(alignerParameters, - isInputPaired(), !noMerge); - // Detect if automatic featureToAlign correction is required - int totalV = 0, totalVErrors = 0, hasVRegion = 0; - GeneFeature correctingFeature = alignerParameters.getVAlignerParameters().getGeneFeatureToAlign().hasReversedRegions() ? - GeneFeature.VRegionWithP : - GeneFeature.VRegion; - VDJCLibrary library = getLibrary(); // Printing library level warnings, if specified for the library @@ -380,24 +404,9 @@ public void run1() throws Exception { warn(l); } - for (VDJCGene gene : library.getGenes(getChains())) { - if (gene.getGeneType() == GeneType.Variable) { - totalV++; - if (!alignerParameters.containsRequiredFeature(gene)) { - totalVErrors++; - if (gene.getPartitioning().isAvailable(correctingFeature)) - hasVRegion++; - } - } - } - - // Performing V featureToAlign correction if needed - if (totalVErrors > totalV * 0.9 && hasVRegion > totalVErrors * 0.8) { - warn("WARNING: forcing -OvParameters.geneFeatureToAlign=" + GeneFeature.encode(correctingFeature) + - " since current gene feature (" + GeneFeature.encode(alignerParameters.getVAlignerParameters().getGeneFeatureToAlign()) + ") is absent in " + - Util.PERCENT_FORMAT.format(100.0 * totalVErrors / totalV) + "% of V genes."); - alignerParameters.getVAlignerParameters().setGeneFeatureToAlign(correctingFeature); - } + // Creating aligner + VDJCAligner aligner = VDJCAligner.createAligner(alignerParameters, + isInputPaired(), !noMerge); int numberOfExcludedNFGenes = 0; int numberOfExcludedFGenes = 0; From ecf1aa04030d37ee82d6cd7d6672c34c09d0a6a2 Mon Sep 17 00:00:00 2001 From: Stanislav Poslavsky Date: Tue, 16 Jul 2019 21:40:45 +0300 Subject: [PATCH 08/13] Error message for incorrect --region-of-interest in analyze (#534) --- src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java b/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java index a3607ceef..007a986f3 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandAnalyze.java @@ -727,6 +727,8 @@ private void setRegionOfInterest(String v) { } catch (Exception e) { throwValidationException("Illegal gene feature: " + v); } + if (!assemblingFeature.contains(GeneFeature.ShortCDR3)) + throwValidationException("--region-of-interest must cover CDR3"); } @Override From fc53271e5c74591bf0d570c0194172cbfa8b5037 Mon Sep 17 00:00:00 2001 From: Stanislav Poslavsky Date: Tue, 16 Jul 2019 21:41:08 +0300 Subject: [PATCH 09/13] This fixes #509 (#535) --- .../com/milaboratory/mixcr/cli/CloneAssemblerReport.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/milaboratory/mixcr/cli/CloneAssemblerReport.java b/src/main/java/com/milaboratory/mixcr/cli/CloneAssemblerReport.java index 6f2668f27..1a053856d 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CloneAssemblerReport.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CloneAssemblerReport.java @@ -239,13 +239,13 @@ public void writeReport(ReportHelper helper) { .writePercentAndAbsoluteField("Mapped low quality reads, percent of used", deferredAlignmentsMapped.get(), clusterizationBase) .writePercentAndAbsoluteField("Reads clustered in PCR error correction, percent of used", readsClustered.get(), clusterizationBase) .writePercentAndAbsoluteField("Reads pre-clustered due to the similar VJC-lists, percent of used", readsPreClustered.get(), alignmentsInClones) - .writePercentAndAbsoluteField("Reads dropped due to the lack of a clone sequence", + .writePercentAndAbsoluteField("Reads dropped due to the lack of a clone sequence, percent of total", failedToExtractTarget.get(), totalReads) - .writePercentAndAbsoluteField("Reads dropped due to low quality", + .writePercentAndAbsoluteField("Reads dropped due to low quality, percent of total", droppedAsLowQuality.get(), totalReads) - .writePercentAndAbsoluteField("Reads dropped due to failed mapping", + .writePercentAndAbsoluteField("Reads dropped due to failed mapping, percent of total", deferredAlignmentsDropped.get(), totalReads) - .writePercentAndAbsoluteField("Reads dropped with low quality clones", readsDroppedWithClones.get(), alignmentsInClones) + .writePercentAndAbsoluteField("Reads dropped with low quality clones, percent of total", readsDroppedWithClones.get(), totalReads) .writeField("Clonotypes eliminated by PCR error correction", clonesClustered.get()) .writeField("Clonotypes dropped as low quality", clonesDropped.get()) .writeField("Clonotypes pre-clustered due to the similar VJC-lists", clonesPreClustered.get()); From 94b72fcfa41e313b6701b4cdc64a3837d71ca123 Mon Sep 17 00:00:00 2001 From: Stanislav Poslavsky Date: Sun, 21 Jul 2019 16:22:49 +0300 Subject: [PATCH 10/13] Smart floating bounds in VDJAligner PV-first (#525) * Range island detection done. * Correction for default trimming quality threshold in contig assembly. * milib submodule upgrade * ReadTrimming option on alignment stage. * MiLib submodule upgrade. * Fixes error in FullSeqAssembleTest. * Fix for null listener in PVFirst. * fix bug in PVfirst regarding kAligner2 finest resutls * temp fix bug due to indels in homopolymers on the boundaries of CDR3 * chery pick * Added `exportAlignmentsForClones` action * Fix tricky bug in contig aligners * update submodule repseqio * shift indels at homopolymers by default * FullSeqAssembler: Indices mapping problem + Assertion exception (#536) * First attempt. * This fixes assertion exception appearing when the sequence assembled for the assembing region is not equal to the clonal sequence. --- CHANGELOG_CURRENT | 3 +- itests.sh | 5 + milib | 2 +- repseqio | 2 +- .../mixcr/assembler/CloneAssembler.java | 33 +- .../fullseq/CoverageAccumulator.java | 81 ++++ .../assembler/fullseq/FullSeqAssembler.java | 421 ++++++++++++++---- .../fullseq/FullSeqAssemblerReport.java | 21 + .../mixcr/basictypes/ClnAWriter.java | 9 +- .../mixcr/basictypes/VDJCAlignments.java | 21 +- .../mixcr/basictypes/VDJCHit.java | 2 +- .../mixcr/basictypes/VDJCObject.java | 43 +- .../milaboratory/mixcr/cli/AlignerReport.java | 56 +++ .../milaboratory/mixcr/cli/CommandAlign.java | 29 +- .../mixcr/cli/CommandAssembleContigs.java | 58 ++- .../cli/CommandExportAlignmentsForClones.java | 102 +++++ .../cli/CommandExportAlignmentsPretty.java | 7 + .../milaboratory/mixcr/cli/CommandExtend.java | 2 +- .../java/com/milaboratory/mixcr/cli/Main.java | 1 + .../PartialAlignmentsAssembler.java | 2 +- .../mixcr/vdjaligners/VDJCAligner.java | 7 +- .../vdjaligners/VDJCAlignerEventListener.java | 7 +- .../mixcr/vdjaligners/VDJCAlignerPVFirst.java | 50 ++- .../vdjaligners/VDJCAlignerParameters.java | 19 +- .../vdjaligners/VDJCAlignmentResult.java | 6 + .../full_seq_assembler_parameters.json | 2 +- .../parameters/vdjcaligner_parameters.json | 9 +- .../fullseq/FullSeqAssemblerTest.java | 8 +- .../mixcr/basictypes/VDJCObjectTest.java | 5 +- .../VDJCAlignerParametersTest.java | 2 +- 30 files changed, 852 insertions(+), 163 deletions(-) create mode 100644 src/main/java/com/milaboratory/mixcr/assembler/fullseq/CoverageAccumulator.java create mode 100644 src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsForClones.java diff --git a/CHANGELOG_CURRENT b/CHANGELOG_CURRENT index 264ebd6e3..54760a13a 100644 --- a/CHANGELOG_CURRENT +++ b/CHANGELOG_CURRENT @@ -1 +1,2 @@ -Fixes exception in `-mutationsDetailed` and similar export options \ No newline at end of file +Fixes exception in `-mutationsDetailed` and similar export options +Added `exportAlignmentsForClones` action \ No newline at end of file diff --git a/itests.sh b/itests.sh index aec9a3be0..64b592dc4 100755 --- a/itests.sh +++ b/itests.sh @@ -103,6 +103,11 @@ if [[ $create_standard_results == true ]]; then go_assemble ${s}_paired mixcr align -s hs -r ${s}_single.vdjca.report ${s}_R1.fastq ${s}_single.vdjca go_assemble ${s}_single + + mixcr align -s hs -p kAligner2 -r ${s}_paired.vdjca.report ${s}_R1.fastq ${s}_R2.fastq ${s}_paired2.vdjca + go_assemble ${s}_paired2 + mixcr align -s hs -p kAligner2 -r ${s}_single.vdjca.report ${s}_R1.fastq ${s}_single2.vdjca + go_assemble ${s}_single2 done fi diff --git a/milib b/milib index f71dc703a..c0a0ea983 160000 --- a/milib +++ b/milib @@ -1 +1 @@ -Subproject commit f71dc703a4b61a3f16f31cc55a09f4e4a6075d65 +Subproject commit c0a0ea9835b32575fb09d288dbbffa8c53dc269f diff --git a/repseqio b/repseqio index 143eeb334..29663892c 160000 --- a/repseqio +++ b/repseqio @@ -1 +1 @@ -Subproject commit 143eeb334e19402a670674f35dededf475b9c533 +Subproject commit 29663892cec0ac53438adaabb86542b8e572faea diff --git a/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java b/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java index 0cdd29408..82cc431ae 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/CloneAssembler.java @@ -284,7 +284,7 @@ public void runClustering() { if (clusteredClonesAccumulators != null) throw new IllegalStateException("Already clustered."); if (!preClusteringDone) - throw new IllegalStateException("No preclustering done."); + throw new IllegalStateException("No pre-clustering is done."); @SuppressWarnings("unchecked") Clustering clustering = new Clustering(cloneList, @@ -329,7 +329,7 @@ public long getAlignmentsCount() { public void buildClones() { if (!preClusteringDone) - throw new IllegalStateException("No preclustering done."); + throw new IllegalStateException("No pre-clustering is done."); ClonesBuilder builder = new ClonesBuilder(); progressReporter = builder; builder.buildClones(); @@ -532,7 +532,7 @@ private final class ClonesBuilder implements CanReportProgress { volatile int progress; private ClonesBuilder() { - this.sourceSize = clusteredClonesAccumulators != null ? clusteredClonesAccumulators.size() : clones.size(); + this.sourceSize = clusteredClonesAccumulators != null ? clusteredClonesAccumulators.size() : cloneList.size(); } @Override @@ -572,8 +572,14 @@ void buildClones() { else { for (TIntIntIterator it = idMapping.iterator(); it.hasNext(); ) { it.advance(); - if (newIdMapping.containsKey(it.value())) - it.setValue(newIdMapping.get(it.value())); + int val = it.value(); + if (val >= 0) { // "renaming" normal clonotypes + // if (newIdMapping.containsKey(val)) + it.setValue(newIdMapping.get(val)); + } else { // "renaming" clustered clonotypes + // if (newIdMapping.containsKey(~val)) + it.setValue(newIdMapping.get(~val)); + } } } source = Arrays.asList(sourceArray); @@ -671,11 +677,11 @@ public List build() { // Score filtering step // Calculation - float[] maxScores = new float[3]; + float[] maxScores = new float[2]; for (CloneAccumulator acc : accs) { if (acc == null) continue; - for (int i = 0; i < 3; i++) { + for (int i = 0; i < 2; i++) { // Only for V and J GeneType gt = GeneType.VJC_REFERENCE[i]; maxScores[i] = parameters.getSeparateBy(gt) @@ -684,7 +690,7 @@ public List build() { } } - for (int i = 0; i < 3; i++) + for (int i = 0; i < 2; i++) // Only for V and J maxScores[i] /= parameters.preClusteringScoreFilteringRatio; // Filtering low score clonotypes @@ -693,11 +699,12 @@ public List build() { if (accs[i] == null) continue; - for (int j = 0; j < 3; j++) { + for (int j = 0; j < 2; j++) { // Only for V and J if (accs[i].getBestGene(GeneType.VJC_REFERENCE[j]) != null && accs[i].getBestScore(GeneType.VJC_REFERENCE[j]) < maxScores[j]) { dropped.accept(accs[i]); accs[i] = null; + ++deleted; break; } } @@ -706,7 +713,6 @@ public List build() { // Filtering low quality clonotypes List result = new ArrayList<>(accs.length - deleted); - out: for (CloneAccumulator acc : accs) { // null marks clustered clonotypes if (acc == null) @@ -714,15 +720,16 @@ public List build() { acc.rebuildClonalSequence(); - if (acc.getSequence().getConcatenated().getQuality().minValue() < - parameters.minimalQuality) { + if (acc.getSequence().getConcatenated().getQuality().minValue() < parameters.minimalQuality) { dropped.accept(acc); - continue out; + continue; } result.add(acc); } + assert result.size() == accs.length - deleted; + return result; } diff --git a/src/main/java/com/milaboratory/mixcr/assembler/fullseq/CoverageAccumulator.java b/src/main/java/com/milaboratory/mixcr/assembler/fullseq/CoverageAccumulator.java new file mode 100644 index 000000000..447ec44d3 --- /dev/null +++ b/src/main/java/com/milaboratory/mixcr/assembler/fullseq/CoverageAccumulator.java @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2014-2019, Bolotin Dmitry, Chudakov Dmitry, Shugay Mikhail + * (here and after addressed as Inventors) + * All Rights Reserved + * + * Permission to use, copy, modify and distribute any part of this program for + * educational, research and non-profit purposes, by non-profit institutions + * only, without fee, and without a written agreement is hereby granted, + * provided that the above copyright notice, this paragraph and the following + * three paragraphs appear in all copies. + * + * Those desiring to incorporate this work into commercial products or use for + * commercial purposes should contact MiLaboratory LLC, which owns exclusive + * rights for distribution of this program for commercial purposes, using the + * following email address: licensing@milaboratory.com. + * + * IN NO EVENT SHALL THE INVENTORS BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, + * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, + * ARISING OUT OF THE USE OF THIS SOFTWARE, EVEN IF THE INVENTORS HAS BEEN + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE SOFTWARE PROVIDED HEREIN IS ON AN "AS IS" BASIS, AND THE INVENTORS HAS + * NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR + * MODIFICATIONS. THE INVENTORS MAKES NO REPRESENTATIONS AND EXTENDS NO + * WARRANTIES OF ANY KIND, EITHER IMPLIED OR EXPRESS, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A + * PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY + * PATENT, TRADEMARK OR OTHER RIGHTS. + */ +package com.milaboratory.mixcr.assembler.fullseq; + +import com.milaboratory.core.Range; +import com.milaboratory.core.alignment.Alignment; +import com.milaboratory.core.sequence.NucleotideSequence; +import com.milaboratory.mixcr.basictypes.VDJCHit; +import gnu.trove.map.hash.TLongIntHashMap; + +import java.util.Arrays; +import java.util.DoubleSummaryStatistics; + +public final class CoverageAccumulator { + public final VDJCHit hit; + private final long[] coverage; + + public CoverageAccumulator(VDJCHit hit) { + this.hit = hit; + final int geneLength = hit.getGene().getPartitioning().getLength(hit.getAlignedFeature()); + this.coverage = new long[geneLength]; + } + + public void accumulate(VDJCHit hit) { + for (int targetId = 0; targetId < hit.numberOfTargets(); targetId++) { + Alignment al = hit.getAlignment(targetId); + if (al == null) + continue; + Range coveredRange = al.getSequence1Range(); + for (int i = coveredRange.getLower(); i < coveredRange.getUpper(); i++) + coverage[i]++; + } + } + + public DoubleSummaryStatistics getStat() { + return Arrays.stream(coverage).mapToDouble(i -> i).summaryStatistics(); + } + + private final TLongIntHashMap nocCache = new TLongIntHashMap(); + + public int getNumberOfCoveredPoints(long threshold) { + if (nocCache.containsKey(threshold)) + return nocCache.get(threshold); + else { + int c = (int) Arrays.stream(coverage).filter(i -> i >= threshold).count(); + nocCache.put(threshold, c); + return c; + } + } + + public double getFractionOfCoveredPoints(long threshold) { + return 1.0 * getNumberOfCoveredPoints(threshold) / coverage.length; + } +} diff --git a/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssembler.java b/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssembler.java index 6189b9023..82459c0c3 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssembler.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssembler.java @@ -74,8 +74,6 @@ public final class FullSeqAssembler { final CloneFactory cloneFactory; /** initial clone */ final Clone clone; - /** clone gene scores */ - final EnumMap> geneScores; /** clone assembled feature (must cover CDR3) */ final GeneFeature assemblingFeature; /** top hit genes */ @@ -88,6 +86,8 @@ public final class FullSeqAssembler { final int jLength; /** position of assembling feature in global grid (just "one letter") */ final int positionOfAssemblingFeature; + /** variant id of assembling feature of the target clonotype */ + final int clonalAssemblingFeatureVariantIndex; /** length of assembling feature in the clone */ final int assemblingFeatureLength; // = 1 /** begin of the aligned J part in the reference J gene */ @@ -105,9 +105,26 @@ public final class FullSeqAssembler { new TObjectIntHashMap<>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1); /** integer index -> nucleotide sequence */ final TIntObjectHashMap variantIdToSequence = new TIntObjectHashMap<>(); + /** base hits */ + final VDJCHit vHit, jHit; public FullSeqAssembler(CloneFactory cloneFactory, - FullSeqAssemblerParameters parameters, Clone clone, VDJCAlignerParameters alignerParameters) { + FullSeqAssemblerParameters parameters, + Clone clone, + VDJCAlignerParameters alignerParameters) { + this(cloneFactory, parameters, clone, alignerParameters, + clone.getBestHit(Variable), clone.getBestHit(Joining)); + } + + public FullSeqAssembler(CloneFactory cloneFactory, + FullSeqAssemblerParameters parameters, + Clone clone, + VDJCAlignerParameters alignerParameters, + VDJCHit baseVHit, + VDJCHit baseJHit) { + this.vHit = baseVHit; + this.jHit = baseJHit; + // Checking parameters if (parameters.outputMinimalSumQuality > parameters.branchingMinimalSumQuality) throw new IllegalArgumentException("Wrong parameters. (branchingMinimalSumQuality must be greater than outputMinimalSumQuality)"); @@ -115,13 +132,7 @@ public FullSeqAssembler(CloneFactory cloneFactory, this.cloneFactory = cloneFactory; this.parameters = parameters; this.clone = clone; - this.geneScores = new EnumMap<>(GeneType.class); - for (GeneType gt : GeneType.VDJC_REFERENCE) { - TObjectFloatHashMap scores = new TObjectFloatHashMap<>(); - for (VDJCHit hit : clone.getHits(gt)) - scores.put(hit.getGene().getId(), hit.getScore()); - geneScores.put(gt, scores); - } + this.alignerParameters = alignerParameters; GeneFeature[] assemblingFeatures = clone.getParentCloneSet().getAssemblingFeatures(); if (assemblingFeatures.length != 1) @@ -131,7 +142,9 @@ public FullSeqAssembler(CloneFactory cloneFactory, throw new IllegalArgumentException(); this.assemblingFeature = assemblingFeatures[0]; - this.genes = clone.getBestHitGenes(); + this.genes = new VDJCGenes(baseVHit.getGene(), null, baseJHit.getGene(), null); // clone.getBestHitGenes(); + + this.clonalAssemblingFeatureVariantIndex = initVariantMappings(clone.getFeature(this.assemblingFeature).getSequence()); ReferencePoint start = assemblingFeature.getFirstPoint(), @@ -151,9 +164,8 @@ public FullSeqAssembler(CloneFactory cloneFactory, int splitRegionBegin = -1, splitRegionEnd = -1; if (hasV) { - VDJCHit vHit = clone.getBestHit(Variable); - GeneFeature vFeature = vHit.getAlignedFeature(); - VDJCGene gene = vHit.getGene(); + GeneFeature vFeature = baseVHit.getAlignedFeature(); + VDJCGene gene = baseVHit.getGene(); this.lengthV = gene.getPartitioning().getLength(vFeature) - gene.getFeature(GeneFeature.intersection(assemblingFeature, vFeature)).size(); @@ -173,9 +185,8 @@ public FullSeqAssembler(CloneFactory cloneFactory, this.assemblingFeatureLength = 1; if (hasJ) { - VDJCHit jHit = clone.getBestHit(Joining); - VDJCGene gene = jHit.getGene(); - GeneFeature jFeature = jHit.getAlignedFeature(); + VDJCGene gene = baseJHit.getGene(); + GeneFeature jFeature = baseJHit.getAlignedFeature(); this.jOffset = gene.getPartitioning().getRelativePosition(jFeature, assemblingFeature.getLastPoint()); this.jLength = gene.getPartitioning().getLength(jFeature) - jOffset; @@ -211,6 +222,21 @@ public FullSeqAssemblerReport getReport() { return report; } + private EnumMap> originalGeneScores = null; + + private EnumMap> getOriginalGeneScores() { + if (originalGeneScores == null) { + originalGeneScores = new EnumMap<>(GeneType.class); + for (GeneType gt : GeneType.VDJC_REFERENCE) { + TObjectFloatHashMap scores = new TObjectFloatHashMap<>(); + for (VDJCHit hit : clone.getHits(gt)) + scores.put(hit.getGene().getId(), hit.getScore()); + originalGeneScores.put(gt, scores); + } + } + return originalGeneScores; + } + /* ======================================== Find variants ============================================= */ public Clone[] callVariants(RawVariantsData data) { @@ -245,10 +271,17 @@ public Clone[] callVariants(RawVariantsData data) { clusterizeBranches(data.points, branches); Clone[] result = branches.stream() - .map(branch -> buildClone(branch.count, clean(assembleBranchSequences(data.points, branch)))) + .map(branch -> assembleBranchSequences(data.points, branch)) + .filter(Objects::nonNull) + .map(branch -> buildClone(clean(branch))) .toArray(Clone[]::new); - assert result.length >= 1; + if (result.length == 0) { + // In case assemble procedure failed to assemble even a single clonotype, returning original + // clonotype, to prevent diversity losses + report.onEmptyOutput(clone); + result = new Clone[]{clone}; + } if (report != null) report.afterVariantsClustered(clone, result); @@ -337,22 +370,22 @@ protected VariantBranch clone() { * Performs final sequence cleanup. Removes very short sub-targets, performes quality trimming. */ BranchSequences clean(BranchSequences seq) { - for (int i = seq.ranges.length - 1; i >= 0; --i) { - if (parameters.trimmingParameters != null) + if (parameters.trimmingParameters != null) + for (int i = seq.ranges.length - 1; i >= 0; --i) if (i == seq.assemblingFeatureTargetId) { - Range trimRange = QualityTrimmer.extendRange(seq.sequences[i].getQuality(), parameters.trimmingParameters, + final Range[] ranges = QualityTrimmer.calculateIslandsFromInitialRange(seq.sequences[i].getQuality(), + parameters.trimmingParameters, new Range(seq.assemblingFeatureOffset, seq.assemblingFeatureOffset + seq.assemblingFeatureLength)); - seq = seq.cut(i, trimRange); + seq = seq.splitCut(i, ranges); } else { - Range trimRange = QualityTrimmer.trim(seq.sequences[i].getQuality(), parameters.trimmingParameters); - if (trimRange == null) - seq.without(i); - else - seq = seq.cut(i, trimRange); + // This also completely removes regions with low quality + final Range[] ranges = QualityTrimmer.calculateAllIslands(seq.sequences[i].getQuality(), + parameters.trimmingParameters); + seq = seq.splitCut(i, ranges); } + for (int i = seq.ranges.length - 1; i >= 0; --i) if (seq.sequences[i].size() < parameters.minimalContigLength && i != seq.assemblingFeatureTargetId) seq = seq.without(i); - } return seq; } @@ -397,12 +430,21 @@ BranchSequences assembleBranchSequences(int[] points, VariantBranch branch) { assert currentPosition != nextPosition; + int variantId = ((int) (positionedStates[i] >>> 8)) & 0xFFFFFF; + NSequenceWithQuality seq = new NSequenceWithQuality( - variantIdToSequence.get(((int) (positionedStates[i] >>> 8)) & 0xFFFFFF), + variantIdToSequence.get(variantId), (byte) positionedStates[i]); if (currentPosition == positionOfAssemblingFeature) { assert assemblingFeatureTargetId == -1; + + // Current implementation can work only with variants having the sequence in the assemblingFeature + // region exactly equal to the clonal sequence + if (variantId != clonalAssemblingFeatureVariantIndex) + // Terminating sequence assembling process, and returning null result + return null; + assemblingFeatureTargetId = ranges.size(); assemblingFeatureOffset = sequenceBuilder.size(); assemblingFeatureLength = seq.size(); @@ -437,6 +479,7 @@ BranchSequences assembleBranchSequences(int[] points, VariantBranch branch) { assert assemblingFeatureTargetId != -1; return new BranchSequences( + branch.count, assemblingFeatureTargetId, assemblingFeatureOffset, assemblingFeatureLength, @@ -454,6 +497,10 @@ private static boolean isAbsent(long positionedState) { } private final class BranchSequences { + /** + * Count from VariantBranch + */ + final double count; /** * Id of the target containing assemblingFeature */ @@ -481,8 +528,10 @@ private final class BranchSequences { */ final NSequenceWithQuality[] sequences; - BranchSequences(int assemblingFeatureTargetId, int assemblingFeatureOffset, int assemblingFeatureLength, + BranchSequences(double count, + int assemblingFeatureTargetId, int assemblingFeatureOffset, int assemblingFeatureLength, Range[] ranges, TIntArrayList[] positionMaps, NSequenceWithQuality[] sequences) { + this.count = count; this.assemblingFeatureTargetId = assemblingFeatureTargetId; this.assemblingFeatureOffset = assemblingFeatureOffset; this.assemblingFeatureLength = assemblingFeatureLength; @@ -525,6 +574,7 @@ BranchSequences without(int i) { System.arraycopy(sequences, 0, newSequences, 0, i); System.arraycopy(sequences, i + 1, newSequences, i, newLength - i); return new BranchSequences( + count, i < assemblingFeatureTargetId ? assemblingFeatureTargetId - 1 : assemblingFeatureTargetId, assemblingFeatureOffset, assemblingFeatureLength, @@ -533,11 +583,78 @@ BranchSequences without(int i) { newSequences); } + /** + * Returns new BranchSequences with i-th target splitted according to the rangesToCut. + * + * @param i target id + * @param rangesToCut ranges in local target coordinates (not global) + */ + BranchSequences splitCut(int i, Range... rangesToCut) { + if (rangesToCut.length == 0) + return without(i); + + if (rangesToCut.length == 1 && rangesToCut[0].getLower() == 0 && rangesToCut[0].length() == sequences[i].size()) + return this; + + int newLength = ranges.length - 1 + rangesToCut.length; + int destPos = i + rangesToCut.length; + int rightCopyLen = ranges.length - i - 1; + + Range[] newRanges = new Range[newLength]; + System.arraycopy(ranges, 0, newRanges, 0, i); + System.arraycopy(ranges, i + 1, newRanges, destPos, rightCopyLen); + + TIntArrayList[] newPositionMaps = new TIntArrayList[newLength]; + System.arraycopy(positionMaps, 0, newPositionMaps, 0, i); + System.arraycopy(positionMaps, i + 1, newPositionMaps, destPos, rightCopyLen); + + NSequenceWithQuality[] newSequences = new NSequenceWithQuality[newLength]; + System.arraycopy(sequences, 0, newSequences, 0, i); + System.arraycopy(sequences, i + 1, newSequences, destPos, rightCopyLen); + + Range assemblingFeatureRange = i == assemblingFeatureTargetId + ? new Range(assemblingFeatureOffset, assemblingFeatureOffset + assemblingFeatureLength) + : null; + + int newAssemblingFeatureOffset = i == assemblingFeatureTargetId + ? -1 + : assemblingFeatureOffset; + int newAssemblingFeatureTargetId = i == assemblingFeatureTargetId + ? -1 + : + assemblingFeatureTargetId < i + ? assemblingFeatureTargetId + : assemblingFeatureTargetId - 1 + rangesToCut.length; + + for (int j = 0; j < rangesToCut.length; j++) { + final Range rangeToCut = rangesToCut[j]; + if (assemblingFeatureRange != null && assemblingFeatureRange.intersectsWith(rangeToCut)) { + if (!rangeToCut.contains(new Range(assemblingFeatureOffset, assemblingFeatureOffset + assemblingFeatureLength))) + throw new IllegalArgumentException(); + newRanges[i + j] = new Range(positionMaps[i].get(rangeToCut.getLower()), positionMaps[i].get(rangeToCut.getUpper() - 1) + 1); + newPositionMaps[i + j] = (TIntArrayList) positionMaps[i].subList(rangeToCut.getLower(), rangeToCut.getUpper()); + newSequences[i + j] = sequences[i].getRange(rangeToCut); + newAssemblingFeatureTargetId = i + j; + newAssemblingFeatureOffset = assemblingFeatureOffset - rangeToCut.getLower(); + } else { + newRanges[i + j] = new Range(positionMaps[i].get(rangeToCut.getLower()), positionMaps[i].get(rangeToCut.getUpper() - 1) + 1); + newPositionMaps[i + j] = (TIntArrayList) positionMaps[i].subList(rangeToCut.getLower(), rangeToCut.getUpper()); + newSequences[i + j] = sequences[i].getRange(rangeToCut); + } + } + + assert newAssemblingFeatureOffset != -1; + assert newAssemblingFeatureTargetId != -1; + + return new BranchSequences(count, newAssemblingFeatureTargetId, newAssemblingFeatureOffset, assemblingFeatureLength, + newRanges, newPositionMaps, newSequences); + } + /** * Returns new BranchSequences with i-th target cut according to the rangeToCut. * * @param i target id - * @param rangeToCut range in local taget coordinates (not global) + * @param rangeToCut range in local target coordinates (not global) */ BranchSequences cut(int i, Range rangeToCut) { if (rangeToCut.getLower() == 0 && rangeToCut.length() == sequences[i].size()) @@ -552,33 +669,31 @@ BranchSequences cut(int i, Range rangeToCut) { newRanges[i] = new Range(newPositionMaps[i].get(rangeToCut.getLower()), newPositionMaps[i].get(rangeToCut.getUpper() - 1) + 1); newPositionMaps[i] = (TIntArrayList) newPositionMaps[i].subList(rangeToCut.getLower(), rangeToCut.getUpper()); newSequences[i] = newSequences[i].getRange(rangeToCut); - return new BranchSequences(assemblingFeatureTargetId, + return new BranchSequences(count, assemblingFeatureTargetId, assemblingFeatureOffset - rangeToCut.getLower(), assemblingFeatureLength, newRanges, newPositionMaps, newSequences); } else { newRanges[i] = new Range(newPositionMaps[i].get(rangeToCut.getLower()), newPositionMaps[i].get(rangeToCut.getUpper() - 1) + 1); newPositionMaps[i] = (TIntArrayList) newPositionMaps[i].subList(rangeToCut.getLower(), rangeToCut.getUpper()); newSequences[i] = newSequences[i].getRange(rangeToCut); - return new BranchSequences(assemblingFeatureTargetId, assemblingFeatureOffset, assemblingFeatureLength, + return new BranchSequences(count, assemblingFeatureTargetId, assemblingFeatureOffset, assemblingFeatureLength, newRanges, newPositionMaps, newSequences); } - } } /* ================================= Re-align and build final clone ====================================== */ - private Clone buildClone(double count, BranchSequences targets) { - Alignment[] vTopHitAlignments = new Alignment[targets.ranges.length], - jTopHitAlignments = new Alignment[targets.ranges.length]; - VDJCHit hit = clone.getBestHit(Variable); - if (hit == null) + private Clone buildClone(BranchSequences targets) { + Alignment[] vHitAlignments = new Alignment[targets.ranges.length], + jHitAlignments = new Alignment[targets.ranges.length]; + if (vHit == null) throw new UnsupportedOperationException("No V hit."); - NucleotideSequence vTopReferenceSequence = hit.getGene().getFeature(hit.getAlignedFeature()); - hit = clone.getBestHit(Joining); - if (hit == null) + NucleotideSequence vTopReferenceSequence = vHit.getGene().getFeature(vHit.getAlignedFeature()); + + if (jHit == null) throw new UnsupportedOperationException("No J hit."); - NucleotideSequence jTopReferenceSequence = hit.getGene().getFeature(hit.getAlignedFeature()); + NucleotideSequence jTopReferenceSequence = jHit.getGene().getFeature(jHit.getAlignedFeature()); // Excessive optimization AlignersCache cache = new AlignersCache(); @@ -619,7 +734,7 @@ private Clone buildClone(double count, BranchSequences targets) { // Can be reduced to a single statement if (range.getFrom() < N_LEFT_DUMMIES) // This target contain extra non-V nucleotides on the left - vTopHitAlignments[i] = alignSeq1FromRight( + vHitAlignments[i] = alignSeq1FromRight( alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence.getSequence(), 0, range.getTo() - N_LEFT_DUMMIES, @@ -627,7 +742,7 @@ private Clone buildClone(double count, BranchSequences targets) { !floatingLeftBound, cache); else if (floatingLeftBound) - vTopHitAlignments[i] = alignSeq1FromRight( + vHitAlignments[i] = alignSeq1FromRight( alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence.getSequence(), range.getFrom() - N_LEFT_DUMMIES, range.length(), @@ -635,7 +750,7 @@ else if (floatingLeftBound) false, cache); else - vTopHitAlignments[i] = Aligner.alignGlobal( + vHitAlignments[i] = Aligner.alignGlobal( alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence, @@ -654,7 +769,7 @@ else if (floatingLeftBound) // Can be reduced to a single statement if (range.getFrom() < N_LEFT_DUMMIES) // This target contain extra non-V nucleotides on the left - vTopHitAlignments[i] = alignSeq1FromRight( + vHitAlignments[i] = alignSeq1FromRight( alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence.getSequence(), 0, lengthV, @@ -662,7 +777,7 @@ else if (floatingLeftBound) !vFloatingLeftBound, cache); else if (vFloatingLeftBound) - vTopHitAlignments[i] = alignSeq1FromRight( + vHitAlignments[i] = alignSeq1FromRight( alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence.getSequence(), range.getFrom() - N_LEFT_DUMMIES, lengthV - (range.getFrom() - N_LEFT_DUMMIES), @@ -670,7 +785,7 @@ else if (vFloatingLeftBound) false, cache); else - vTopHitAlignments[i] = Aligner.alignGlobal( + vHitAlignments[i] = Aligner.alignGlobal( alignerParameters.getVAlignerParameters().getScoring(), vTopReferenceSequence, sequence, @@ -688,7 +803,7 @@ else if (vFloatingLeftBound) if (range.getTo() >= rightAssemblingFeatureBound + jLength) // This target contain extra non-J nucleotides on the right - jTopHitAlignments[i] = alignSeq1FromLeft( + jHitAlignments[i] = alignSeq1FromLeft( alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence.getSequence(), jOffset, jLength, @@ -696,7 +811,7 @@ else if (vFloatingLeftBound) !jFloatingRightBound, cache); else if (jFloatingRightBound) - jTopHitAlignments[i] = alignSeq1FromLeft( + jHitAlignments[i] = alignSeq1FromLeft( alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence.getSequence(), jOffset, range.getTo() - rightAssemblingFeatureBound, @@ -704,7 +819,7 @@ else if (jFloatingRightBound) false, cache); else - jTopHitAlignments[i] = Aligner.alignGlobal( + jHitAlignments[i] = Aligner.alignGlobal( alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence, @@ -722,7 +837,7 @@ else if (jFloatingRightBound) if (range.getTo() >= rightAssemblingFeatureBound + jLength) // This target contain extra non-J nucleotides on the right - jTopHitAlignments[i] = alignSeq1FromLeft( + jHitAlignments[i] = alignSeq1FromLeft( alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence.getSequence(), jOffset + (range.getFrom() - rightAssemblingFeatureBound), jLength - (range.getFrom() - rightAssemblingFeatureBound), @@ -730,7 +845,7 @@ else if (jFloatingRightBound) !floatingRightBound, cache); else if (floatingRightBound) - jTopHitAlignments[i] = alignSeq1FromLeft( + jHitAlignments[i] = alignSeq1FromLeft( alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence.getSequence(), jOffset + (range.getFrom() - rightAssemblingFeatureBound), range.length(), @@ -738,7 +853,7 @@ else if (floatingRightBound) false, cache); else - jTopHitAlignments[i] = Aligner.alignGlobal( + jHitAlignments[i] = Aligner.alignGlobal( alignerParameters.getJAlignerParameters().getScoring(), jTopReferenceSequence, sequence, @@ -750,17 +865,17 @@ else if (floatingRightBound) NSequenceWithQuality assemblingFeatureSeq = targets.sequences[targets.assemblingFeatureTargetId] .getRange(targets.assemblingFeatureOffset, targets.assemblingFeatureOffset + targets.assemblingFeatureLength); - Clone clone = cloneFactory.create(0, count, geneScores, new NSequenceWithQuality[]{assemblingFeatureSeq}); + Clone clone = cloneFactory.create(0, targets.count, getOriginalGeneScores(), new NSequenceWithQuality[]{assemblingFeatureSeq}); - vTopHitAlignments[targets.assemblingFeatureTargetId] = + vHitAlignments[targets.assemblingFeatureTargetId] = mergeTwoAlignments( - vTopHitAlignments[targets.assemblingFeatureTargetId], - clone.getBestHit(Variable).getAlignment(0).move(targets.assemblingFeatureOffset)); + vHitAlignments[targets.assemblingFeatureTargetId], + vHit.getAlignment(0).move(targets.assemblingFeatureOffset)); - jTopHitAlignments[targets.assemblingFeatureTargetId] = + jHitAlignments[targets.assemblingFeatureTargetId] = mergeTwoAlignments( - clone.getBestHit(Joining).getAlignment(0).move(targets.assemblingFeatureOffset), - jTopHitAlignments[targets.assemblingFeatureTargetId]); + jHit.getAlignment(0).move(targets.assemblingFeatureOffset), + jHitAlignments[targets.assemblingFeatureTargetId]); EnumMap hits = new EnumMap<>(GeneType.class); for (GeneType gt : GeneType.VDJC_REFERENCE) @@ -770,11 +885,29 @@ else if (floatingRightBound) .toArray(VDJCHit[]::new)); VDJCHit[] tmp = hits.get(Variable); - tmp[0] = substituteAlignments(tmp[0], vTopHitAlignments); + int hitIndex = indexOfGene(tmp, vHit.getGene().getId()); + if (hitIndex != 0 && report != null) { + report.onVHitReorder(); + tmp[hitIndex] = substituteAlignmentsAndScore(tmp[hitIndex], vHitAlignments, tmp[0].getScore() + 1); + } else + tmp[0] = substituteAlignments(tmp[0], vHitAlignments); + tmp = hits.get(Joining); - tmp[0] = substituteAlignments(tmp[0], jTopHitAlignments); + hitIndex = indexOfGene(tmp, jHit.getGene().getId()); + if (hitIndex != 0 && report != null) { + report.onJHitReorder(); + tmp[hitIndex] = substituteAlignmentsAndScore(tmp[hitIndex], jHitAlignments, tmp[0].getScore() + 1); + } else + tmp[0] = substituteAlignments(tmp[0], jHitAlignments); - return new Clone(targets.sequences, hits, count, 0); + return new Clone(targets.sequences, hits, targets.count, 0); + } + + static int indexOfGene(VDJCHit[] hits, VDJCGeneId gene) { + for (int i = 0; i < hits.length; i++) + if (hits[i].getGene().getId().equals(gene)) + return i; + throw new IllegalStateException(); } static final class AlignersCache { @@ -883,7 +1016,9 @@ static final class AlignersCache { if (global) { int seq2added = width; if (length2 > length1) { - length2 = Math.min(length2, length1 + width); + int length2upd = Math.min(length2, length1 + width); + offset2 += length2 - length2upd; + length2 = length2upd; seq2added = Math.min(length2, width + (length2 - length1)); } result = BandedLinearAligner.alignLeftAdded0(scoring, seq1, seq2, offset1, length1, 0, offset2, length2, seq2added, width, mutations, cache); @@ -907,7 +1042,9 @@ static final class AlignersCache { if (global) { int seq2added = width; if (length2 > length1) { - length2 = Math.min(length2, length1 + width); + int length2upd = Math.min(length2, length1 + width); + offset2 += length2 - length2upd; + length2 = length2upd; seq2added = Math.min(length2, width + (length2 - length1)); } result = BandedAffineAligner.semiGlobalLeft0(scoring, seq1, seq2, offset1, length1, 0, offset2, length2, seq2added, width, mutations, cache); @@ -923,6 +1060,10 @@ static VDJCHit substituteAlignments(VDJCHit hit, Alignment[] return new VDJCHit(hit.getGene(), alignments, hit.getAlignedFeature(), hit.getScore()); } + static VDJCHit substituteAlignmentsAndScore(VDJCHit hit, Alignment[] alignments, float score) { + return new VDJCHit(hit.getGene(), alignments, hit.getAlignedFeature(), score); + } + static VDJCHit moveHitTarget(VDJCHit hit, int targetTargetId, int sequence2OffsetInTarget, int targetsCount) { // TODO (!!!) extend alignments for targets.assemblingFeatureTargetId @@ -1092,32 +1233,46 @@ private int getVariantIndex(NucleotideSequence sequence) { } /** - * Aggregates information about position states in all the provided alignments, and returns the object that allows - * to iterate from one position to another (sorted by coverage, from most covered to less covered) and see states - * across all the alignments for each of the positions. - * - * @param alignments supplier of alignments iterators. Will be invoked twice. + * Sets common sequences states with well-defined ids. Returns assembling feature variant id. */ - public RawVariantsData calculateRawData(Supplier> alignments) { - if (!sequenceToVariantId.isEmpty()) - throw new IllegalStateException(); + private int initVariantMappings(NucleotideSequence clonalAssemblingFeatureSequence) { + assert sequenceToVariantId.isEmpty(); + assert variantIdToSequence.isEmpty(); - // Setting common sequences states with well-defined ids upfront + // Single letters for (byte letter = 0; letter < NucleotideSequence.ALPHABET.basicSize(); letter++) { NucleotideSequence seq = new NucleotideSequence(new byte[]{letter}); sequenceToVariantId.put(seq, letter); variantIdToSequence.put(letter, seq); } + + // Empty sequence sequenceToVariantId.put(NucleotideSequence.EMPTY, NucleotideSequence.ALPHABET.basicSize()); variantIdToSequence.put(NucleotideSequence.ALPHABET.basicSize(), NucleotideSequence.EMPTY); + // Assembling feature + int assemblingFeatureId = NucleotideSequence.ALPHABET.basicSize() + 1; + sequenceToVariantId.put(clonalAssemblingFeatureSequence, assemblingFeatureId); + variantIdToSequence.put(assemblingFeatureId, clonalAssemblingFeatureSequence); + + return assemblingFeatureId; + } + + /** + * Aggregates information about position states in all the provided alignments, and returns the object that allows + * to iterate from one position to another (sorted by coverage, from most covered to less covered) and see states + * across all the alignments for each of the positions. + * + * @param alignments supplier of alignments iterators. Will be invoked twice. + */ + public RawVariantsData calculateRawData(Supplier> alignments) { TIntIntHashMap coverage = new TIntIntHashMap(); TIntObjectHashMap> variants = new TIntObjectHashMap<>(); // Collecting coverage and VariantAggregators int nAlignments = 0; for (VDJCAlignments al : CUtils.it(alignments.get())) { - al = al.updateAlignments(AlignmentUtils::shiftIndelsAtHomopolymers); + // al = al.mapAlignments(AlignmentUtils::shiftIndelsAtHomopolymers); // FIXME ++nAlignments; for (PointSequence point : toPointSequences(al)) { int seqIndex = getVariantIndex(point.sequence.getSequence()); @@ -1166,7 +1321,7 @@ public RawVariantsData calculateRawData(Supplier> ali // Main data collection loop i = 0; for (VDJCAlignments al : CUtils.it(alignments.get())) { - al = al.updateAlignments(AlignmentUtils::shiftIndelsAtHomopolymers); + // al = al.mapAlignments(AlignmentUtils::shiftIndelsAtHomopolymers); // FIXME for (PointSequence point : toPointSequences(al)) { int pointIndex = revIndex.get(point.point); packedData[pointIndex][i] = @@ -1293,6 +1448,56 @@ public String toString(byte qualityThreshold, int readsFrom, int readsTo) { .collect(Collectors.joining("\n")); } + /** + * String representation of this state matrix + * + * @param qualityThreshold quality threshold (positions with quality lower then this value, wil be printed in + * lower case) + */ + public String toCsv(byte qualityThreshold) { + int minPosition = Arrays.stream(points).min().getAsInt(); + int maxPosition = Arrays.stream(points).max().getAsInt() + 1; + + String[][] cells = new String[nReads][maxPosition - minPosition + 1]; + + OutputPort port = createPort(); + for (int position : points) { + int[] states = port.take(); + for (int j = 0; j < nReads; j++) { + int state = states[j]; + if (state != ABSENT_PACKED_VARIANT_INFO) { + String seq = variantIdToSequence.get(state >>> 8).toString(); + if ((state & 0x7F) < qualityThreshold) + seq = seq.toLowerCase(); + if (seq.equals("")) + seq = "-"; + cells[j][position - minPosition] = seq; + } + } + } + assert port.take() == null; + + final String allLines = IntStream.range(0, nReads) + .mapToObj(readIndex -> + "a" + readIndex + "\t" + + IntStream + .range(0, maxPosition - minPosition + 1) + .mapToObj(positionOffset -> { + String cell = cells[readIndex][positionOffset]; + return cell == null ? "" : cell; + }) + .collect(Collectors.joining("\t")) + ).collect(Collectors.joining("\n")); + + StringBuilder header = new StringBuilder(); + header.append("aIndex\t").append(IntStream + .range(0, maxPosition - minPosition + 1) + .mapToObj(positionOffset -> "" + (positionOffset + minPosition)) + .collect(Collectors.joining("\t"))); + + return header.toString() + "\n" + allLines; + } + /** * String representation of this state matrix * @@ -1340,27 +1545,47 @@ List toPointSequences(VDJCAlignments alignments, int iTarget) { // ↓ ↓ ↓ // 0000000vvvvvvvvvvvvvvCDR3CDR3CDR3CDR3jjjjjjjjjjjjjjjjCCCCCCCCC - VDJCPartitionedSequence target = alignments.getPartitionedTarget(iTarget); + // VDJCHit vHit = alignments.getBestHit(Variable); + // Alignment vAlignment = + // (vHit == null + // || vHit.getAlignment(iTarget) == null + // || !Objects.equals(genes.v, vHit.getGene()) + // || vHit.getAlignment(iTarget).getSequence1Range().getFrom() > lengthV) + // ? null + // : vHit.getAlignment(iTarget); + + final Optional vHit = Arrays.stream(alignments.getHits(Variable) == null ? new VDJCHit[0] : alignments.getHits(Variable)) + .filter(hit -> hit != null && Objects.equals(genes.v, hit.getGene())) + .findFirst(); + Alignment vAlignment = vHit + .map(hit -> hit.getAlignment(iTarget)) + .filter(al -> al != null && al.getSequence1Range().getFrom() <= lengthV) + .orElse(null); + + // VDJCHit jHit = alignments.getBestHit(Joining); + // Alignment jAlignment = + // (jHit == null + // || jHit.getAlignment(iTarget) == null + // || !Objects.equals(genes.j, jHit.getGene()) + // || jHit.getAlignment(iTarget).getSequence1Range().getTo() < jOffset) + // ? null + // : jHit.getAlignment(iTarget); + + final Optional jHit = Arrays.stream(alignments.getHits(Joining) == null ? new VDJCHit[0] : alignments.getHits(Joining)) + .filter(hit -> hit != null && Objects.equals(genes.j, hit.getGene())) + .findFirst(); + Alignment jAlignment = jHit + .map(hit -> hit.getAlignment(iTarget)) + .filter(al -> al != null && al.getSequence1Range().getTo() >= jOffset) + .orElse(null); + + VDJCPartitionedSequence target = alignments.getPartitionedTarget(iTarget, + vHit.map(VDJCHit::getGene).orElse(null), + null, + jHit.map(VDJCHit::getGene).orElse(null), + null); NSequenceWithQuality targetSeq = alignments.getTarget(iTarget); - VDJCHit vHit = alignments.getBestHit(Variable); - Alignment vAlignment = - (vHit == null - || vHit.getAlignment(iTarget) == null - || !Objects.equals(genes.v, vHit.getGene()) - || vHit.getAlignment(iTarget).getSequence1Range().getFrom() > lengthV) - ? null - : vHit.getAlignment(iTarget); - - VDJCHit jHit = alignments.getBestHit(Joining); - Alignment jAlignment = - (jHit == null - || jHit.getAlignment(iTarget) == null - || !Objects.equals(genes.j, jHit.getGene()) - || jHit.getAlignment(iTarget).getSequence1Range().getTo() < jOffset) - ? null - : jHit.getAlignment(iTarget); - List points = new ArrayList<>(); if (target.getPartitioning().isAvailable(assemblingFeature.getFirstPoint())) { // This target contains left edge of the assembling feature diff --git a/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerReport.java b/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerReport.java index dd3c7f010..4251edcb3 100644 --- a/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerReport.java +++ b/src/main/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerReport.java @@ -46,6 +46,7 @@ * */ public class FullSeqAssemblerReport implements Report { + private final AtomicInteger assemblePrematureTerminationCount = new AtomicInteger(0); private final AtomicInteger initialCloneCount = new AtomicInteger(0); private final AtomicInteger finalCloneCount = new AtomicInteger(0); private final AtomicDouble totalReads = new AtomicDouble(0); @@ -54,16 +55,30 @@ public class FullSeqAssemblerReport implements Report { private final AtomicDouble dividedReads = new AtomicDouble(0); private final AtomicInteger longestContigLength = new AtomicInteger(0); private final AtomicLong variantsBeforeClustering = new AtomicLong(0); + private final AtomicLong vHitsReorder = new AtomicLong(0); + private final AtomicLong jHitsReorder = new AtomicLong(0); public void onVariantClustered(VariantBranch minor) { totalClustered.incrementAndGet(); totalClusteredReads.addAndGet(minor.count); } + public void onEmptyOutput(Clone clone) { + assemblePrematureTerminationCount.incrementAndGet(); + } + public void onVariantsCreated(List branches) { variantsBeforeClustering.addAndGet(branches.size()); } + public void onVHitReorder() { + vHitsReorder.incrementAndGet(); + } + + public void onJHitReorder() { + jHitsReorder.incrementAndGet(); + } + public void afterVariantsClustered(Clone initialClone, Clone[] branches) { initialCloneCount.incrementAndGet(); finalCloneCount.addAndGet(branches.length); @@ -111,10 +126,16 @@ public double getTotalDividedVariantReads() { return dividedReads.get(); } + @JsonProperty("assemblePrematureTerminationEvents") + public double getAssemblePrematureTerminationEvents() { + return assemblePrematureTerminationCount.get(); + } + @Override public void writeReport(ReportHelper helper) { helper.writeField("Initial clonotype count", getInitialCloneCount()) .writePercentAndAbsoluteField("Final clonotype count", getFinalCloneCount(), getInitialCloneCount()) + .writePercentAndAbsoluteField("Number of premature termination assembly events, percent of number of initial clonotypes", getAssemblePrematureTerminationEvents(), getInitialCloneCount()) .writeField("Longest contig length", getLongestContigLength()) .writePercentAndAbsoluteField("Clustered variants", getClonesClustered(), getFinalCloneCount() + getClonesClustered()) .writePercentAndAbsoluteField("Reads in clustered variants", getReadsClustered(), getTotalReads()) diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java b/src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java index 31c7edb9e..eb17de445 100644 --- a/src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java +++ b/src/main/java/com/milaboratory/mixcr/basictypes/ClnAWriter.java @@ -188,12 +188,7 @@ public synchronized void sortAlignments(OutputPort alignments, this.toSorter = new CountingOutputPort<>(alignments); this.sortedAlignments = Sorter.sort( toSorter, - (o1, o2) -> { - int i = Integer.compare(o1.cloneIndex, o2.cloneIndex); - if (i != 0) - return i; - return Byte.compare(o1.mappingType, o2.mappingType); - }, + Comparator.comparingInt((VDJCAlignments o) -> o.cloneIndex).thenComparingInt(o -> o.mappingType), chunkSize, new VDJCAlignmentsSerializer(usedGenes, featureToAlign), tempFile); @@ -341,7 +336,7 @@ public synchronized void writeAlignmentsAndIndex() { // To make counts index the same length as aBlockOffset aBlockCount.add(0); - // Saving index offset in file to write in the end of stream + // Saving index offset in file to write in the end of the stream long indexBeginOffset = outputStream.getByteCount(); long previousValue = 0; diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java index 4583500f5..413010bc6 100644 --- a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java +++ b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCAlignments.java @@ -30,6 +30,7 @@ package com.milaboratory.mixcr.basictypes; import com.milaboratory.core.alignment.Alignment; +import com.milaboratory.core.alignment.AlignmentUtils; import com.milaboratory.core.io.sequence.SequenceRead; import com.milaboratory.core.io.sequence.SequenceReadUtil; import com.milaboratory.core.sequence.NSequenceWithQuality; @@ -40,10 +41,7 @@ import gnu.trove.map.hash.TLongObjectHashMap; import io.repseq.core.GeneType; -import java.util.Arrays; -import java.util.Collections; -import java.util.EnumMap; -import java.util.List; +import java.util.*; import java.util.function.Function; @Serializable(by = IO.VDJCAlignmentsSerializer.class) @@ -104,6 +102,17 @@ public VDJCAlignments(VDJCHit[] vHits, VDJCHit[] dHits, VDJCHit[] jHits, VDJCHit this(-1, createHits(vHits, dHits, jHits, cHits), targets, history, originalReads); } + public VDJCAlignments shiftIndelsAtHomopolymers() { + return mapHits(h -> h.mapAlignments(AlignmentUtils::shiftIndelsAtHomopolymers)); + } + + public VDJCAlignments mapHits(Function mapper) { + EnumMap result = new EnumMap<>(GeneType.class); + for (Map.Entry e : hits.entrySet()) + result.put(e.getKey(), Arrays.stream(e.getValue()).map(mapper).toArray(VDJCHit[]::new)); + return new VDJCAlignments(alignmentsIndex, result, targets, history, originalReads, mappingType, cloneIndex); + } + public boolean isClustered() { return ReadToCloneMapping.isClustered(mappingType); } @@ -144,7 +153,7 @@ public VDJCAlignments updateCloneIndex(int newCloneIndex) { public VDJCAlignments updateAlignments(Function, Alignment> processor) { EnumMap newHits = this.hits.clone(); - newHits.replaceAll((k, v) -> Arrays.stream(v).map(h -> h.updateAlignments(processor)).toArray(VDJCHit[]::new)); + newHits.replaceAll((k, v) -> Arrays.stream(v).map(h -> h.mapAlignments(processor)).toArray(VDJCHit[]::new)); return new VDJCAlignments(alignmentsIndex, newHits, targets, history, originalReads, mappingType, cloneIndex); } @@ -283,7 +292,7 @@ public VDJCAlignments setAlignmentsIndex(long alignmentsIndex) { * * @param top numer of top hits to test * @return {@code true} if at least one V and one J hit among first {@code top} hits have same chain and false - * otherwise (first {@code top} V hits have different chain from those have first {@code top} J hits) + * otherwise (first {@code top} V hits have different chain from those have first {@code top} J hits) */ public final boolean hasSameVJLoci(final int top) { final VDJCHit[] vHits = hits.get(GeneType.Variable), diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCHit.java b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCHit.java index f0f1ee38d..f927017e4 100644 --- a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCHit.java +++ b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCHit.java @@ -82,7 +82,7 @@ public VDJCHit setAlignment(int target, Alignment alignment) } @SuppressWarnings("unchecked") - public VDJCHit updateAlignments(Function, Alignment> processor) { + public VDJCHit mapAlignments(Function, Alignment> processor) { return new VDJCHit(gene, Arrays .stream(alignments) diff --git a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java index 714b942fe..7f43808cf 100644 --- a/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java +++ b/src/main/java/com/milaboratory/mixcr/basictypes/VDJCObject.java @@ -36,10 +36,12 @@ import io.repseq.gen.VDJCGenes; import java.util.*; +import java.util.stream.Collectors; import static com.milaboratory.core.alignment.Alignment.aabs; +import static com.milaboratory.util.StreamUtil.noMerge; -public class VDJCObject { +public abstract class VDJCObject { protected final NSequenceWithQuality[] targets; protected final EnumMap hits; protected volatile EnumMap allChains; @@ -92,6 +94,16 @@ public final VDJCHit[] getHits(GeneType type) { return hits == null ? new VDJCHit[0] : hits; } + public final EnumMap getHitsMap() { + return hits.entrySet() + .stream() + .collect(Collectors.toMap( + Map.Entry::getKey, + e -> e.getValue().clone(), + noMerge(), + () -> new EnumMap<>(GeneType.class))); + } + public Chains getTopChain(GeneType gt) { final VDJCHit top = getBestHit(gt); if (top == null) @@ -186,6 +198,35 @@ public final VDJCPartitionedSequence getPartitionedTarget(int target) { return partitionedTargets[target]; } + public final VDJCPartitionedSequence getPartitionedTarget(int target, VDJCGene vGene, VDJCGene dGene, VDJCGene jGene, VDJCGene cGene) { + EnumMap genes = new EnumMap<>(GeneType.class); + if (vGene != null) + genes.put(GeneType.Variable, vGene); + if (dGene != null) + genes.put(GeneType.Diversity, dGene); + if (jGene != null) + genes.put(GeneType.Joining, jGene); + if (cGene != null) + genes.put(GeneType.Constant, cGene); + return getPartitionedTarget(target, genes); + } + + public final VDJCPartitionedSequence getPartitionedTarget(int target, EnumMap genes) { + EnumMap topHits = new EnumMap<>(GeneType.class); + for (GeneType geneType : GeneType.values()) { + if (!genes.containsKey(geneType)) + continue; + VDJCHit[] hits = this.hits.get(geneType); + if (hits == null) + continue; + Arrays.stream(hits) + .filter(hit -> hit.getGene().equals(genes.get(geneType))) + .findFirst() + .ifPresent(vdjcHit -> topHits.put(geneType, vdjcHit)); + } + return new VDJCPartitionedSequence(targets[target], new TargetPartitioning(target, topHits)); + } + public VDJCHit getBestHit(GeneType type) { VDJCHit[] hits = this.hits.get(type); if (hits == null || hits.length == 0) diff --git a/src/main/java/com/milaboratory/mixcr/cli/AlignerReport.java b/src/main/java/com/milaboratory/mixcr/cli/AlignerReport.java index 5cd1efa39..d1cb57fd6 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/AlignerReport.java +++ b/src/main/java/com/milaboratory/mixcr/cli/AlignerReport.java @@ -31,6 +31,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.milaboratory.core.io.sequence.SequenceRead; +import com.milaboratory.core.sequence.quality.ReadTrimmerReport; import com.milaboratory.mixcr.basictypes.VDJCAlignments; import com.milaboratory.mixcr.vdjaligners.VDJCAlignerEventListener; import com.milaboratory.mixcr.vdjaligners.VDJCAlignmentFailCause; @@ -52,6 +53,10 @@ public final class AlignerReport extends AbstractCommandReport implements VDJCAl private final AtomicLong topHitConflict = new AtomicLong(0); private final AtomicLong vChimeras = new AtomicLong(0); private final AtomicLong jChimeras = new AtomicLong(0); + private final AtomicLong realignedWithForcedNonFloatingBound = new AtomicLong(0); + private final AtomicLong realignedWithForcedNonFloatingRightBoundInLeftRead = new AtomicLong(0); + private final AtomicLong realignedWithForcedNonFloatingLeftBoundInRightRead = new AtomicLong(0); + private ReadTrimmerReport trimmingReport; public AlignerReport() { } @@ -65,6 +70,15 @@ public long getFails(VDJCAlignmentFailCause cause) { return fails.get(cause.ordinal()); } + @JsonProperty("trimmingReport") + public ReadTrimmerReport getTrimmingReport() { + return trimmingReport; + } + + public void setTrimmingReport(ReadTrimmerReport trimmingReport) { + this.trimmingReport = trimmingReport; + } + @JsonProperty("totalReadsProcessed") public long getTotal() { long total = 0; @@ -151,6 +165,21 @@ public long getJChimeras() { return jChimeras.get(); } + @JsonProperty("realignedWithForcedNonFloatingBound") + public long getRealignedWithForcedNonFloatingBound() { + return realignedWithForcedNonFloatingBound.get(); + } + + @JsonProperty("realignedWithForcedNonFloatingRightBoundInLeftRead") + public long getRealignedWithForcedNonFloatingRightBoundInLeftRead() { + return realignedWithForcedNonFloatingRightBoundInLeftRead.get(); + } + + @JsonProperty("realignedWithForcedNonFloatingLeftBoundInRightRead") + public long getRealignedWithForcedNonFloatingLeftBoundInRightRead() { + return realignedWithForcedNonFloatingLeftBoundInRightRead.get(); + } + @Override public void onFailedAlignment(SequenceRead read, VDJCAlignmentFailCause cause) { fails.incrementAndGet(cause.ordinal()); @@ -200,6 +229,15 @@ public void onChimera() { chimeras.incrementAndGet(); } + @Override + public void onRealignmentWithForcedNonFloatingBound(boolean forceLeftEdgeInRight, boolean forceRightEdgeInLeft) { + realignedWithForcedNonFloatingBound.getAndIncrement(); + if (forceRightEdgeInLeft) + realignedWithForcedNonFloatingRightBoundInLeftRead.incrementAndGet(); + if (forceRightEdgeInLeft) + realignedWithForcedNonFloatingLeftBoundInRightRead.incrementAndGet(); + } + @Override public void writeReport(ReportHelper helper) { // Writing common analysis information @@ -233,5 +271,23 @@ public void writeReport(ReportHelper helper) { // Writing distribution by chains chainStats.writeReport(helper); + + helper.writePercentAndAbsoluteField("Realigned with forced non-floating bound", getRealignedWithForcedNonFloatingBound(), total); + helper.writePercentAndAbsoluteField("Realigned with forced non-floating right bound in left read", getRealignedWithForcedNonFloatingRightBoundInLeftRead(), total); + helper.writePercentAndAbsoluteField("Realigned with forced non-floating left bound in right read", getRealignedWithForcedNonFloatingLeftBoundInRightRead(), total); + + if (trimmingReport != null) { + assert trimmingReport.getAlignments() == total; + helper.writePercentAndAbsoluteField("R1 Reads Trimmed Left", trimmingReport.getR1LeftTrimmedEvents(), total); + helper.writePercentAndAbsoluteField("R1 Reads Trimmed Right", trimmingReport.getR1RightTrimmedEvents(), total); + helper.writeField("Average R1 Nucleotides Trimmed Left", 1.0 * trimmingReport.getR1LeftTrimmedNucleotides() / total); + helper.writeField("Average R1 Nucleotides Trimmed Right", 1.0 * trimmingReport.getR1RightTrimmedNucleotides() / total); + if (trimmingReport.getR2LeftTrimmedEvents() > 0 || trimmingReport.getR2RightTrimmedEvents() > 0) { + helper.writePercentAndAbsoluteField("R2 Reads Trimmed Left", trimmingReport.getR2LeftTrimmedEvents(), total); + helper.writePercentAndAbsoluteField("R2 Reads Trimmed Right", trimmingReport.getR2RightTrimmedEvents(), total); + helper.writeField("Average R2 Nucleotides Trimmed Left", 1.0 * trimmingReport.getR2LeftTrimmedNucleotides() / total); + helper.writeField("Average R2 Nucleotides Trimmed Right", 1.0 * trimmingReport.getR2RightTrimmedNucleotides() / total); + } + } } } diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java b/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java index b13ef7637..a4ccbd8e4 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java @@ -55,6 +55,9 @@ import com.milaboratory.core.io.sequence.fastq.SingleFastqReader; import com.milaboratory.core.io.sequence.fastq.SingleFastqWriter; import com.milaboratory.core.sequence.NucleotideSequence; +import com.milaboratory.core.sequence.quality.QualityTrimmerParameters; +import com.milaboratory.core.sequence.quality.ReadTrimmerProcessor; +import com.milaboratory.core.sequence.quality.ReadTrimmerReport; import com.milaboratory.mixcr.basictypes.SequenceHistory; import com.milaboratory.mixcr.basictypes.VDJCAlignments; import com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter; @@ -141,6 +144,14 @@ public void setLimit(int limit) { this.limit = limit; } + @Option(description = "Read pre-processing: trimming quality threshold", + names = {"--trimming-quality-threshold"}) + public byte trimmingQualityThreshold = 0; + + @Option(description = "Read pre-processing: trimming window size", + names = {"--trimming-window-size"}) + public byte trimmingWindowSize = 6; + @Option(description = "Parameters preset.", names = {"-p", "--parameters"}) public String alignerParametersName = "default"; @@ -487,7 +498,19 @@ else if (featureSequence.containsWildcards()) SmartProgressReporter.startProgressReport("Alignment", progress); Merger> mainInputReads = CUtils.buffered((OutputPort) chunked(sReads, 64), Math.max(16, threads)); - ParallelProcessor alignedChunks = new ParallelProcessor(mainInputReads, chunked(aligner), Math.max(16, threads), threads); + + OutputPort> mainInputReadsPreprocessed = mainInputReads; + if (trimmingQualityThreshold > 0) { + ReadTrimmerReport rep = new ReadTrimmerReport(); + mainInputReadsPreprocessed = CUtils.wrap( + mainInputReadsPreprocessed, + CUtils.chunked(new ReadTrimmerProcessor( + new QualityTrimmerParameters(trimmingQualityThreshold, + trimmingWindowSize), rep))); + report.setTrimmingReport(rep); + } + + ParallelProcessor alignedChunks = new ParallelProcessor(mainInputReadsPreprocessed, chunked(aligner), Math.max(16, threads), threads); if (reportBuffers) { StatusReporter reporter = new StatusReporter(); reporter.addBuffer("Input (chunked; chunk size = 64)", mainInputReads.getBufferStatusProvider()); @@ -514,7 +537,9 @@ public String getStatus() { }); reporter.start(); } - OutputPort alignments = unchunked(alignedChunks); + OutputPort alignments = unchunked( + CUtils.wrap(alignedChunks, + CUtils.chunked(VDJCAlignmentResult::shiftIndelsAtHomopolymers))); for (VDJCAlignmentResult result : CUtils.it(new OrderedOutputPort<>(alignments, o -> o.read.getId()))) { VDJCAlignments alignment = result.alignment; SequenceRead read = result.read; diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandAssembleContigs.java b/src/main/java/com/milaboratory/mixcr/cli/CommandAssembleContigs.java index 3478b2bd3..3d88a7da2 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandAssembleContigs.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandAssembleContigs.java @@ -32,10 +32,14 @@ import cc.redberry.pipe.CUtils; import cc.redberry.pipe.OutputPort; import cc.redberry.pipe.blocks.ParallelProcessor; -import com.fasterxml.jackson.annotation.*; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.milaboratory.cli.ActionConfiguration; import com.milaboratory.mixcr.assembler.CloneAssemblerParameters; import com.milaboratory.mixcr.assembler.CloneFactory; +import com.milaboratory.mixcr.assembler.fullseq.CoverageAccumulator; import com.milaboratory.mixcr.assembler.fullseq.FullSeqAssembler; import com.milaboratory.mixcr.assembler.fullseq.FullSeqAssemblerParameters; import com.milaboratory.mixcr.assembler.fullseq.FullSeqAssemblerReport; @@ -45,15 +49,19 @@ import com.milaboratory.primitivio.PrimitivI; import com.milaboratory.primitivio.PrimitivO; import com.milaboratory.util.SmartProgressReporter; +import io.repseq.core.GeneType; import io.repseq.core.VDJCGene; +import io.repseq.core.VDJCGeneId; import io.repseq.core.VDJCLibraryRegistry; import picocli.CommandLine.Command; import picocli.CommandLine.Option; import java.io.*; import java.util.*; +import java.util.stream.Collectors; import static com.milaboratory.mixcr.cli.CommandAssembleContigs.ASSEMBLE_CONTIGS_COMMAND_NAME; +import static com.milaboratory.util.StreamUtil.noMerge; @Command(name = ASSEMBLE_CONTIGS_COMMAND_NAME, sortOptions = true, @@ -130,13 +138,59 @@ public void run1() throws Exception { OutputPort parallelProcessor = new ParallelProcessor<>(cloneAlignmentsPort, cloneAlignments -> { try { - FullSeqAssembler fullSeqAssembler = new FullSeqAssembler(cloneFactory, assemblerParameters, cloneAlignments.clone, alignerParameters); + // Collecting statistics + + EnumMap> coverages = cloneAlignments.clone.getHitsMap() + .entrySet().stream() + .filter(e -> e.getValue() != null && e.getValue().length > 0) + .collect(Collectors.toMap( + Map.Entry::getKey, + e -> + Arrays.stream(e.getValue()).collect( + Collectors.toMap( + h -> h.getGene().getId(), + CoverageAccumulator::new + ) + ), + noMerge(), + () -> new EnumMap<>(GeneType.class))); + + for (VDJCAlignments alignments : CUtils.it(cloneAlignments.alignments())) + for (Map.Entry e : alignments.getHitsMap().entrySet()) + for (VDJCHit hit : e.getValue()) + Optional.ofNullable(coverages.get(e.getKey())) + .flatMap(m -> Optional.ofNullable(m.get(hit.getGene().getId()))) + .ifPresent(acc -> acc.accumulate(hit)); + + // Selecting best hits for clonal sequence assembly based in the coverage information + final EnumMap bestGenes = coverages.entrySet().stream() + .collect(Collectors.toMap( + Map.Entry::getKey, + accs -> accs.getValue().entrySet().stream() + .max(Comparator.comparing(e -> -e.getValue().getNumberOfCoveredPoints(1))) + .map(e -> e.getValue().hit).get(), + noMerge(), + () -> new EnumMap<>(GeneType.class))); + + // Performing contig assembly + + FullSeqAssembler fullSeqAssembler = new FullSeqAssembler( + cloneFactory, assemblerParameters, + cloneAlignments.clone, alignerParameters, + bestGenes.get(GeneType.Variable), bestGenes.get(GeneType.Joining) + ); + fullSeqAssembler.setReport(report); FullSeqAssembler.RawVariantsData rawVariantsData = fullSeqAssembler.calculateRawData(cloneAlignments::alignments); if (debugReport != null) { synchronized (debugReport) { + try (FileOutputStream fos = new FileOutputStream(debugReportFile + "." + cloneAlignments.clone.getId())) { + final String content = rawVariantsData.toCsv((byte) 10); + fos.write(content.getBytes()); + } + try { debugReport.write("Clone: " + cloneAlignments.clone.getId()); debugReport.newLine(); diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsForClones.java b/src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsForClones.java new file mode 100644 index 000000000..18b69ad94 --- /dev/null +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsForClones.java @@ -0,0 +1,102 @@ +package com.milaboratory.mixcr.cli; + +import cc.redberry.pipe.OutputPortCloseable; +import com.fasterxml.jackson.annotation.JsonAutoDetect; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.milaboratory.cli.ActionConfiguration; +import com.milaboratory.mixcr.basictypes.ClnAReader; +import com.milaboratory.mixcr.basictypes.VDJCAlignments; +import com.milaboratory.mixcr.basictypes.VDJCAlignmentsWriter; +import io.repseq.core.VDJCLibraryRegistry; +import picocli.CommandLine; + +import java.util.*; + +/** + * + */ +@CommandLine.Command(name = "exportAlignmentsForClones", + sortOptions = true, + separator = " ", + description = "Export alignments for particular clones from \"clones & alignments\" (*.clna) file.") +public class CommandExportAlignmentsForClones extends ACommandWithSmartOverwriteWithSingleInputMiXCR { + static final String EXPORT_ALIGNMENTS_FOR_CLONES_COMMAND_NAME = "exportAlignmentsForClones"; + + @CommandLine.Parameters(index = "0", description = "input_file.clna") + public String in; + + @CommandLine.Parameters(index = "1", description = "[output_file.vdjca[.gz]") + public String out; + + @CommandLine.Option(names = "--id", description = "[cloneId1 [cloneId2 [cloneId3]]]", arity = "0..*") + public List ids = new ArrayList<>(); + +// @CommandLine.Option(description = "Create separate files for each clone. File with '_clnN' suffix, " + +// "where N is clone index, will be created for each clone index.", +// names = {"-s", "--separate"}) +// public boolean separate = false; + + @Override + public ActionConfiguration getConfiguration() { + return new ExportAlignmentsConfiguration(new HashSet<>(ids)); + } + + public int[] getCloneIds() { + return ids.stream().mapToInt(Integer::intValue).sorted().toArray(); + } + + @Override + public void run1() throws Exception { + try (ClnAReader clna = new ClnAReader(in, VDJCLibraryRegistry.getDefault()); + VDJCAlignmentsWriter writer = new VDJCAlignmentsWriter(getOutput())) { + writer.header(clna.getAlignerParameters(), clna.getGenes(), getFullPipelineConfiguration()); + long count = 0; + for (int id : getCloneIds()) { + OutputPortCloseable reader = clna.readAlignmentsOfClone(id); + VDJCAlignments al; + while ((al = reader.take()) != null) { + writer.write(al); + ++count; + } + } + writer.setNumberOfProcessedReads(count); + } + } + + @JsonAutoDetect( + fieldVisibility = JsonAutoDetect.Visibility.ANY, + isGetterVisibility = JsonAutoDetect.Visibility.NONE, + getterVisibility = JsonAutoDetect.Visibility.NONE) + @JsonTypeInfo( + use = JsonTypeInfo.Id.CLASS, + include = JsonTypeInfo.As.PROPERTY, + property = "type") + public static class ExportAlignmentsConfiguration implements ActionConfiguration { + public final Set cloneIds; + + @JsonCreator + public ExportAlignmentsConfiguration(@JsonProperty("cloneIds") Set cloneIds) { + this.cloneIds = cloneIds; + } + + @Override + public String actionName() { + return EXPORT_ALIGNMENTS_FOR_CLONES_COMMAND_NAME; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + ExportAlignmentsConfiguration that = (ExportAlignmentsConfiguration) o; + return cloneIds.equals(that.cloneIds); + } + + @Override + public int hashCode() { + return Objects.hash(cloneIds); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.java b/src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.java index b8c649de3..719582840 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandExportAlignmentsPretty.java @@ -219,6 +219,13 @@ public void outputCompact(PrintStream output, final VDJCAlignments alignments) { output.println(">>> Read ids: " + Arrays.toString(alignments.getReadIds()) .replace("[", "") .replace("]", "")); + if (alignments.getCloneIndex() != -1) { + output.print(">>> Clone mapping: "); + output.print(alignments.getCloneIndex()); + output.print(" "); + output.println(alignments.getMappingType()); + } + output.println(); output.println(); for (int i = 0; i < alignments.numberOfTargets(); i++) { if (printDescriptions) { diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandExtend.java b/src/main/java/com/milaboratory/mixcr/cli/CommandExtend.java index 971ec6a23..a43b16f60 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandExtend.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandExtend.java @@ -173,7 +173,7 @@ void processVDJCA() throws IOException { reader.getParameters().getJAlignerParameters().getParameters().getScoring()); for (VDJCAlignments alignments : CUtils.it(new OrderedOutputPort<>(process.getOutput(), VDJCAlignments::getAlignmentsIndex))) - writer.write(alignments); + writer.write(alignments.shiftIndelsAtHomopolymers()); writer.setNumberOfProcessedReads(reader.getNumberOfReads()); process.finish(); diff --git a/src/main/java/com/milaboratory/mixcr/cli/Main.java b/src/main/java/com/milaboratory/mixcr/cli/Main.java index c2a76a8a2..a3bae357c 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/Main.java +++ b/src/main/java/com/milaboratory/mixcr/cli/Main.java @@ -150,6 +150,7 @@ public static CommandLine mkCmd() { .addSubcommand("exportClonesPretty", CommandExportClonesPretty.class) .addSubcommand("exportReadsForClones", CommandExportReadsForClones.class) + .addSubcommand("exportAlignmentsForClones", CommandExportAlignmentsForClones.class) .addSubcommand("exportReads", CommandExportReads.class) .addSubcommand("mergeAlignments", CommandMergeAlignments.class) diff --git a/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java b/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java index 61d2eb38a..8f4454505 100644 --- a/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java +++ b/src/main/java/com/milaboratory/mixcr/partialassembler/PartialAlignmentsAssembler.java @@ -240,7 +240,7 @@ public void run() { overlapped.incrementAndGet(); totalWritten.incrementAndGet(); - writer.write(mAlignment); + writer.write(mAlignment.shiftIndelsAtHomopolymers()); // Saving alignment that where merge to prevent it's use as left part alreadyMergedIds.add(alignment.getAlignmentsIndex()); diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAligner.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAligner.java index 761b40ea9..70d4fe058 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAligner.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAligner.java @@ -243,11 +243,6 @@ static HasGene[] wrapAlignmentHits(final AlignmentHit[] hits) { } static HasGene wrapAlignmentHit(final AlignmentHit hit) { - return new HasGene() { - @Override - public VDJCGene getGene() { - return hit.getRecordPayload(); - } - }; + return hit::getRecordPayload; } } diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerEventListener.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerEventListener.java index b805dc040..4483442eb 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerEventListener.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerEventListener.java @@ -39,8 +39,8 @@ public interface VDJCAlignerEventListener { void onSuccessfulAlignment(SequenceRead read, VDJCAlignments alignment); /** - * Fired on successful sequence-aided overlap (e.g. using PEAR-like algorithm, - * see {@link com.milaboratory.core.merger.MismatchOnlyPairedReadMerger}) + * Fired on successful sequence-aided overlap (e.g. using PEAR-like algorithm, see {@link + * com.milaboratory.core.merger.MismatchOnlyPairedReadMerger}) * * @param read original read * @param alignments resulting alignment @@ -61,4 +61,7 @@ public interface VDJCAlignerEventListener { void onTopHitSequenceConflict(SequenceRead read, VDJCAlignments alignments, GeneType geneType); void onSegmentChimeraDetected(GeneType geneType, SequenceRead read, VDJCAlignments alignments); + + /** only for paired-end PV-first aligner */ + void onRealignmentWithForcedNonFloatingBound(boolean forceLeftEdgeInRight, boolean forceRightEdgeInLeft); } diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java index 3dc357ec7..a0833fee7 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerPVFirst.java @@ -34,6 +34,7 @@ import com.milaboratory.core.alignment.*; import com.milaboratory.core.alignment.batch.AlignmentHit; import com.milaboratory.core.alignment.batch.AlignmentHitImpl; +import com.milaboratory.core.alignment.batch.BatchAlignerWithBaseWithFilter; import com.milaboratory.core.alignment.kaligner1.AbstractKAlignerParameters; import com.milaboratory.core.alignment.kaligner1.KAlignerParameters; import com.milaboratory.core.alignment.kaligner2.KAlignerParameters2; @@ -61,6 +62,10 @@ public final class VDJCAlignerPVFirst extends VDJCAlignerAbstract { // Used in case of AMerge private VDJCAlignerS sAligner = null; private final TargetMerger alignmentsMerger; + private BatchAlignerWithBaseWithFilter> + vAlignerNotFloatingLeft, + vAlignerNotFloatingRight; + public VDJCAlignerPVFirst(VDJCAlignerParameters parameters) { super(parameters); @@ -78,12 +83,13 @@ public void setSAligner(VDJCAlignerS sAligner) { @Override protected void init() { super.init(); + vAlignerNotFloatingLeft = vAligner.setFloatingLeftBound(false); + vAlignerNotFloatingRight = vAligner.setFloatingRightBound(false); } @Override protected VDJCAlignmentResult process0(final PairedRead input) { Target[] targets = getTargets(input); - // Creates helper classes for each PTarget PAlignmentHelper[] helpers = createInitialHelpers(targets); @@ -373,6 +379,41 @@ PAlignmentHelper alignVThenJ(final Target target) { vAl1 = vAligner.align(target.targets[0].getSequence()).getHits(), vAl2 = vAligner.align(target.targets[1].getSequence()).getHits(); + /* + * Step 1.4: force floating bounds = false for ---xxx> <---- and ---> + lAl = vHit.hit0.getAlignment(), + rAl = vHit.hit1.getAlignment(); + + if (lAl == null || rAl == null) + continue; + + if (lAl.getSequence2Range().getTo() != target.targets[0].size() && rAl.getSequence2Range().getFrom() == 0) { + //---xxx> <---- + forceRightEdgeInLeft = true; + } else if (lAl.getSequence2Range().getTo() == target.targets[0].size() && rAl.getSequence2Range().getFrom() != 0) { + //---> scoring = parameters.getVAlignerParameters().getParameters().getScoring(); - if (scoring instanceof AffineGapAlignmentScoring) - continue; //TODO IMPLEMENT - final NucleotideSequence sequence2 = target.targets[1].getSequence(); final NucleotideSequence sequence1 = leftHit.getAlignment().getSequence1(); final int beginFR3 = leftHit.getRecordPayload().getPartitioning().getRelativePosition(parameters.getFeatureToAlign(GeneType.Variable), ReferencePoint.FR3Begin); @@ -457,9 +496,6 @@ PAlignmentHelper alignVThenJ(final Target target) { continue; final AlignmentScoring scoring = parameters.getJAlignerParameters().getParameters().getScoring(); - if (scoring instanceof AffineGapAlignmentScoring) - continue;//TODO IMPLEMENT - final NucleotideSequence sequence2 = target.targets[0].getSequence(); final NucleotideSequence sequence1 = rightHit.getAlignment().getSequence1(); diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java index f3e7150a4..9d2b76e5e 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParameters.java @@ -64,6 +64,7 @@ public final class VDJCAlignerParameters implements HasFeatureToAlign, java.io.S private int minChimeraDetectionScore; private int vjOverlapWindow; private boolean saveOriginalReads; + private boolean smartForceEdgeAlignments; @JsonCreator public VDJCAlignerParameters(@JsonProperty("vParameters") KGeneAlignmentParameters vParameters, @@ -85,7 +86,8 @@ public VDJCAlignerParameters(@JsonProperty("vParameters") KGeneAlignmentParamete @JsonProperty("alignmentBoundaryTolerance") int alignmentBoundaryTolerance, @JsonProperty("minChimeraDetectionScore") int minChimeraDetectionScore, @JsonProperty("vjOverlapWindow") int vjOverlapWindow, - @JsonProperty("saveOriginalReads") boolean saveOriginalReads) { + @JsonProperty("saveOriginalReads") boolean saveOriginalReads, + @JsonProperty("smartForceEdgeAlignments") boolean smartForceEdgeAlignments) { this.alignmentParameters = new EnumMap<>(GeneType.class); setGeneAlignerParameters(GeneType.Variable, vParameters); setGeneAlignerParameters(GeneType.Diversity, dParameters); @@ -107,6 +109,7 @@ public VDJCAlignerParameters(@JsonProperty("vParameters") KGeneAlignmentParamete this.minChimeraDetectionScore = minChimeraDetectionScore; this.vjOverlapWindow = vjOverlapWindow; this.saveOriginalReads = saveOriginalReads; + this.smartForceEdgeAlignments = smartForceEdgeAlignments; } public int getVJOverlapWindow() { @@ -236,6 +239,15 @@ public VJAlignmentOrder getVJAlignmentOrder() { return vjAlignmentOrder; } + public void setSmartForceEdgeAlignments(boolean smartForceEdgeAlignments) { + this.smartForceEdgeAlignments = smartForceEdgeAlignments; + } + + @JsonProperty("smartForceEdgeAlignments") + public boolean isSmartForceEdgeAlignments() { + return smartForceEdgeAlignments; + } + public void setVjAlignmentOrder(VJAlignmentOrder vjAlignmentOrder) { this.vjAlignmentOrder = vjAlignmentOrder; } @@ -388,7 +400,8 @@ public boolean equals(Object o) { Objects.equals(alignmentParameters, that.alignmentParameters) && vjAlignmentOrder == that.vjAlignmentOrder && readsLayout == that.readsLayout && - Objects.equals(mergerParameters, that.mergerParameters); + Objects.equals(mergerParameters, that.mergerParameters) && + smartForceEdgeAlignments == that.smartForceEdgeAlignments; } @Override @@ -402,6 +415,6 @@ public VDJCAlignerParameters clone() { getCAlignerParameters(), vjAlignmentOrder, includeDScore, includeCScore, minSumScore, maxHits, relativeMinVFR3CDR3Score, allowPartialAlignments, allowNoCDR3PartAlignments, allowChimeras, readsLayout, mergerParameters, fixSeed, alignmentBoundaryTolerance, - minChimeraDetectionScore, vjOverlapWindow, saveOriginalReads); + minChimeraDetectionScore, vjOverlapWindow, saveOriginalReads, smartForceEdgeAlignments); } } diff --git a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignmentResult.java b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignmentResult.java index 610a19419..b0a2dbe98 100644 --- a/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignmentResult.java +++ b/src/main/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignmentResult.java @@ -45,4 +45,10 @@ public VDJCAlignmentResult(R read) { this.read = read; this.alignment = null; } + + public VDJCAlignmentResult shiftIndelsAtHomopolymers() { + if (alignment == null) + return this; + return new VDJCAlignmentResult<>(read, alignment.shiftIndelsAtHomopolymers()); + } } diff --git a/src/main/resources/parameters/full_seq_assembler_parameters.json b/src/main/resources/parameters/full_seq_assembler_parameters.json index dd925fa14..805260fda 100644 --- a/src/main/resources/parameters/full_seq_assembler_parameters.json +++ b/src/main/resources/parameters/full_seq_assembler_parameters.json @@ -10,7 +10,7 @@ "minimalNonEdgePointsFraction": 0.25, "subCloningRegion": null, "trimmingParameters": { - "averageQualityThreshold": 20.0, + "averageQualityThreshold": 10.0, "windowSize": 8 }, "minimalContigLength": 20, diff --git a/src/main/resources/parameters/vdjcaligner_parameters.json b/src/main/resources/parameters/vdjcaligner_parameters.json index b586d2565..7975a4797 100644 --- a/src/main/resources/parameters/vdjcaligner_parameters.json +++ b/src/main/resources/parameters/vdjcaligner_parameters.json @@ -118,7 +118,8 @@ "minChimeraDetectionScore": 120, "vjOverlapWindow": 3, "readsLayout": "Opposite", - "saveOriginalReads": false + "saveOriginalReads": false, + "smartForceEdgeAlignments": true }, "kaligner2": { "fixSeed" : true, @@ -251,7 +252,8 @@ "minChimeraDetectionScore": 120, "vjOverlapWindow": 3, "readsLayout": "Opposite", - "saveOriginalReads": false + "saveOriginalReads": false, + "smartForceEdgeAlignments": true }, "rna-seq": { "fixSeed" : true, @@ -372,6 +374,7 @@ "minChimeraDetectionScore": 120, "vjOverlapWindow": 3, "readsLayout": "Opposite", - "saveOriginalReads": false + "saveOriginalReads": false, + "smartForceEdgeAlignments": true } } \ No newline at end of file diff --git a/src/test/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.java b/src/test/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.java index 09e294def..6184f58b9 100644 --- a/src/test/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.java +++ b/src/test/java/com/milaboratory/mixcr/assembler/fullseq/FullSeqAssemblerTest.java @@ -321,12 +321,12 @@ public CloneFraction(int count, MasterSequence seq) { public void test1() throws Exception { int len = 140; PairedRead read1 = new PairedRead( - new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len)), "R1"), - new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len).getReverseComplement()), "R2")); + new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-200, len)), "R1"), + new SingleReadImpl(0, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-20, len).getReverseComplement()), "R2")); PairedRead read2 = new PairedRead( - new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len)), "R1"), - new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len).getReverseComplement()), "R2")); + new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-150, len)), "R1"), + new SingleReadImpl(1, new NSequenceWithQuality(masterSeq1WT.getRangeFromCDR3Begin(-30, len).getReverseComplement()), "R2")); RunMiXCR.RunMiXCRAnalysis params = new RunMiXCR.RunMiXCRAnalysis(read1, read2); diff --git a/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java b/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java index b0c7e3f7e..d07baee71 100644 --- a/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java +++ b/src/test/java/com/milaboratory/mixcr/basictypes/VDJCObjectTest.java @@ -30,6 +30,7 @@ package com.milaboratory.mixcr.basictypes; import com.milaboratory.core.sequence.NSequenceWithQuality; +import com.milaboratory.mixcr.cli.CommandAlign; import com.milaboratory.mixcr.cli.CommandExportAlignmentsPretty; import com.milaboratory.mixcr.util.RunMiXCR; import io.repseq.core.GeneFeature; @@ -104,9 +105,11 @@ public void test2() throws Exception { RunMiXCR.class.getResource("/sequences/test_R2.fastq").getFile()); RunMiXCR.AlignResult align = RunMiXCR.align(params); + //new CommandExportAlignmentsPretty().outputCompact(System.out, align.alignments.get(3)); + assertNotNull(align.alignments.get(0).getIncompleteFeature(new GeneFeature(new GeneFeature(FR3Begin, FR3End.move(-10)), CDR3))); assertNotNull(align.alignments.get(2).getIncompleteFeature(new GeneFeature(new GeneFeature(FR3Begin, FR3End.move(-10)), CDR3))); - assertNull(align.alignments.get(3).getIncompleteFeature(new GeneFeature(new GeneFeature(FR3Begin, FR3End.move(-10)), CDR3))); + assertNotNull(align.alignments.get(3).getIncompleteFeature(new GeneFeature(new GeneFeature(FR3Begin, FR3End.move(-10)), CDR3))); //new ActionExportAlignmentsPretty().outputCompact(System.out, al); new CommandExportAlignmentsPretty().outputCompact(System.out, align.alignments.get(0)); diff --git a/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java b/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java index a3e84cf36..2d19960c9 100644 --- a/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java +++ b/src/test/java/com/milaboratory/mixcr/vdjaligners/VDJCAlignerParametersTest.java @@ -63,7 +63,7 @@ public void test1() throws Exception { VJAlignmentOrder.JThenV, false, false, 120.0f, 5, 0.7f, false, false, false, PairedEndReadsLayout.Opposite, new MergerParameters( - QualityMergingAlgorithm.SumSubtraction, null, 12, null, 0.12, Unweighted), false, 5, 120, 10, true); + QualityMergingAlgorithm.SumSubtraction, null, 12, null, 0.12, Unweighted), false, 5, 120, 10, true, true); String str = GlobalObjectMappers.PRETTY.writeValueAsString(paramentrs); VDJCAlignerParameters deser = GlobalObjectMappers.PRETTY.readValue(str, VDJCAlignerParameters.class); From 3b7a608ac8e649bd5cc6b05e5d0c02e8e76b88c0 Mon Sep 17 00:00:00 2001 From: Bolotin Dmitriy Date: Sun, 21 Jul 2019 14:51:08 +0100 Subject: [PATCH 11/13] Current changelog --- CHANGELOG_CURRENT | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/CHANGELOG_CURRENT b/CHANGELOG_CURRENT index 54760a13a..920916b6d 100644 --- a/CHANGELOG_CURRENT +++ b/CHANGELOG_CURRENT @@ -1,2 +1,11 @@ -Fixes exception in `-mutationsDetailed` and similar export options -Added `exportAlignmentsForClones` action \ No newline at end of file +Alignments are forced to the corresponding edge if the same V/J gene is detected in the opposite PE read mate +Average quality threshold (`-OaverageQualityThreshold=...`) in `assembleContigs` increased to `20` +Added `exportAlignmentsForClones` action +All indels in homopolimeric stretches are now shifted left in all alignment algorihtms +Smarter base V/J/C hit selection in `assembleContigs` +Read quality trimming (see help for `--trimming-window-size` and `--trimming-quality-threshold` options in `align`) (disabled by default until 3.1) +Fix exception in `-mutationsDetailed` and similar export options +Fixes in step skipping logic in `analyze` (additional automatic parameter adjustment for VDJC libraries covering only `VRegion`) +Several more fixes for `assembleContigs` +minor: Fixes incorrect numbers in `assemble` report +minor: Fix for filterAlignmentsAction \ No newline at end of file From b14271fc2cca4e10b7cf1aee64639ddedbc9cf4a Mon Sep 17 00:00:00 2001 From: Bolotin Dmitriy Date: Mon, 22 Jul 2019 19:34:54 +0100 Subject: [PATCH 12/13] Final preparations before release. --- milib | 2 +- pom.xml | 4 ++-- repseqio | 2 +- src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/milib b/milib index c0a0ea983..f9219a0b3 160000 --- a/milib +++ b/milib @@ -1 +1 @@ -Subproject commit c0a0ea9835b32575fb09d288dbbffa8c53dc269f +Subproject commit f9219a0b31e37ec45c3cdb055bd1e8463499a3ff diff --git a/pom.xml b/pom.xml index 91d35006c..312318068 100644 --- a/pom.xml +++ b/pom.xml @@ -39,14 +39,14 @@ UTF-8 - 1.11-SNAPSHOT + 1.11 io.repseq repseqio - 1.3.2-SNAPSHOT + 1.3.2 com.milaboratory diff --git a/repseqio b/repseqio index 29663892c..6e897db70 160000 --- a/repseqio +++ b/repseqio @@ -1 +1 @@ -Subproject commit 29663892cec0ac53438adaabb86542b8e572faea +Subproject commit 6e897db703192909b8b7eaa423230b6ce630a0bd diff --git a/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java b/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java index a4ccbd8e4..66470207b 100644 --- a/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java +++ b/src/main/java/com/milaboratory/mixcr/cli/CommandAlign.java @@ -146,11 +146,11 @@ public void setLimit(int limit) { @Option(description = "Read pre-processing: trimming quality threshold", names = {"--trimming-quality-threshold"}) - public byte trimmingQualityThreshold = 0; + public byte trimmingQualityThreshold = 0; // 17 @Option(description = "Read pre-processing: trimming window size", names = {"--trimming-window-size"}) - public byte trimmingWindowSize = 6; + public byte trimmingWindowSize = 6; // 3 @Option(description = "Parameters preset.", names = {"-p", "--parameters"}) From 3340fd433233cf851aabd89b7a744fa61395e5c2 Mon Sep 17 00:00:00 2001 From: Bolotin Dmitriy Date: Mon, 22 Jul 2019 19:37:17 +0100 Subject: [PATCH 13/13] Release v3.0.8 --- CHANGELOG | 20 ++++++++++++++++++++ CHANGELOG_CURRENT | 11 ----------- pom.xml | 2 +- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 53e89c538..59f09b128 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,24 @@ +MiXCR 3.0.8 (22 Jul 2019) +======================== + +-- Alignments are forced to the corresponding edge if the same V/J gene is detected in the opposite + PE read mate +-- Average quality threshold (`-OaverageQualityThreshold=...`) in `assembleContigs` increased to + `20` +-- Added `exportAlignmentsForClones` action +-- All indels in homopolimeric stretches are now shifted left in all alignment algorihtms +-- Smarter base V/J/C hit selection in `assembleContigs` +-- Read quality trimming (see help for `--trimming-window-size` and `--trimming-quality-threshold` + options in `align`) (disabled by default until 3.1) +-- Fix exception in `-mutationsDetailed` and similar export options +-- Fixes in step skipping logic in `analyze` (additional automatic parameter adjustment for VDJC + libraries covering only `VRegion`) +-- Several more fixes for `assembleContigs` +-- minor: Fixes incorrect numbers in `assemble` report +-- minor: Fix for filterAlignmentsAction + + MiXCR 3.0.7 (20 May 2019) ======================== diff --git a/CHANGELOG_CURRENT b/CHANGELOG_CURRENT index 920916b6d..e69de29bb 100644 --- a/CHANGELOG_CURRENT +++ b/CHANGELOG_CURRENT @@ -1,11 +0,0 @@ -Alignments are forced to the corresponding edge if the same V/J gene is detected in the opposite PE read mate -Average quality threshold (`-OaverageQualityThreshold=...`) in `assembleContigs` increased to `20` -Added `exportAlignmentsForClones` action -All indels in homopolimeric stretches are now shifted left in all alignment algorihtms -Smarter base V/J/C hit selection in `assembleContigs` -Read quality trimming (see help for `--trimming-window-size` and `--trimming-quality-threshold` options in `align`) (disabled by default until 3.1) -Fix exception in `-mutationsDetailed` and similar export options -Fixes in step skipping logic in `analyze` (additional automatic parameter adjustment for VDJC libraries covering only `VRegion`) -Several more fixes for `assembleContigs` -minor: Fixes incorrect numbers in `assemble` report -minor: Fix for filterAlignmentsAction \ No newline at end of file diff --git a/pom.xml b/pom.xml index 312318068..596f5e7d4 100644 --- a/pom.xml +++ b/pom.xml @@ -33,7 +33,7 @@ com.milaboratory mixcr - 3.0.8-SNAPSHOT + 3.0.8 jar MiXCR