Skip to content

Commit

Permalink
Added option to disallow complex indels at read edge to workaround Ma…
Browse files Browse the repository at this point in the history
…nta problem. See: #24
  • Loading branch information
lmose committed Oct 8, 2019
1 parent 28ef057 commit f4970af
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 2 deletions.
48 changes: 48 additions & 0 deletions src/main/java/abra/CigarUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,50 @@ public static boolean hasNDN(String cigar) {
return hasNDM;
}

/**
* Returns true if the input cigar string begins or ends with 2 adjacent indels.
* Clipping is ignored.
*/
public static boolean startsOrEndsWithComplexIndel(String cigar) {
boolean ret = false;

List<CigarBlock> blocks = getUnclippedCigarBlocks(cigar);

if (blocks.size() > 1) {
if (blocks.get(0).isIndel() && blocks.get(1).isIndel()) {
ret = true;
} else if (blocks.get(blocks.size()-1).isIndel() && blocks.get(blocks.size()-2).isIndel()) {
ret = true;
}
}

return ret;
}

private static List<CigarBlock> getUnclippedCigarBlocks(String cigar) {

List<CigarBlock> cigarBlocks = new ArrayList<CigarBlock>();
try {
StringBuffer len = new StringBuffer();
for (int i=0; i<cigar.length(); i++) {
char ch = cigar.charAt(i);
if (Character.isDigit(ch)) {
len.append(ch);
} else {
if (ch != 'H' && ch != 'S') {
cigarBlocks.add(new CigarBlock(Integer.valueOf(len.toString()), ch));
}
len.setLength(0);
}
}
} catch (NumberFormatException e) {
Logger.error("NumberFormatException: " + cigar);
throw e;
}

return cigarBlocks;
}

private static List<CigarBlock> getCigarBlocks(String cigar) {

List<CigarBlock> cigarBlocks = new ArrayList<CigarBlock>();
Expand Down Expand Up @@ -272,6 +316,10 @@ static class CigarBlock {
boolean isGap() {
return type == 'D' || type == 'N';
}

boolean isIndel() {
return type == 'D' || type == 'I';
}
}

}
6 changes: 5 additions & 1 deletion src/main/java/abra/ReAligner.java
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ public class ReAligner {
private boolean shouldFilterNDN;
private boolean isGappedContigsOnly;
private boolean shouldUseJunctionsAsContigs;
private boolean disallowComplexIndelsAtReadEdge;

public void reAlign(String[] inputFiles, String[] outputFiles) throws Exception {

Expand Down Expand Up @@ -323,7 +324,7 @@ void processChromosomeChunk(int chromosomeChunkIdx) throws Exception {
read1.setBaseQualityString(rc.reverse(read1.getBaseQualityString()));
read1.setReadNegativeStrandFlag(false);
record.setUnalignedRc(true);
}
}
}

List<Integer> overlappingRegions = new ArrayList<Integer>();
Expand Down Expand Up @@ -605,6 +606,8 @@ private void remapRead(ReadEvaluator readEvaluator, SAMRecord read, int origEdit
Logger.trace("Not moving read: " + read.getReadName() + " from: " + read.getAlignmentStart() + " to: " + alignment.pos);
} else if (shouldFilterNDN && CigarUtils.hasNDN(alignment.cigar)) {
Logger.trace("Not remapping read: %s to NDM cigar: %s", read, alignment.cigar);
} else if (disallowComplexIndelsAtReadEdge && CigarUtils.startsOrEndsWithComplexIndel(alignment.cigar)) {
Logger.trace("Not remapping read: %s to edge complex indel cigar: %s", read, alignment.cigar);
} else if (origEditDist > alignment.numMismatches) {

SAMRecord orig = read.deepCopy();
Expand Down Expand Up @@ -1798,6 +1801,7 @@ public static void run(String[] args) throws Exception {
realigner.shouldFilterNDN = options.isNoNDN();
realigner.isGappedContigsOnly = options.isGappedContigsOnly();
realigner.shouldUseJunctionsAsContigs = options.shouldUseJunctionsAsContigs();
realigner.disallowComplexIndelsAtReadEdge = options.disallowComplexIndelsAtReadEdge();

MAX_REGION_LENGTH = options.getWindowSize();
MIN_REGION_REMAINDER = options.getWindowOverlap();
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/abra/ReAlignerOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ public class ReAlignerOptions extends Options {
private static final String NO_NDN = "no-ndn";
private static final String GAPPED_CONTIGS_ONLY = "gc";
private static final String USE_JUNCTIONS_AS_CONTIGS = "ujac";
private static final String NO_COMPLEX_INDELS_AT_READ_EDGE = "no-edge-ci";

private OptionParser parser;
private boolean isValid;
Expand Down Expand Up @@ -116,6 +117,7 @@ protected OptionParser getOptionParser() {
parser.accepts(NO_NDN, "If specified, do not allow adjacent N-D-N cigar elements");
parser.accepts(GAPPED_CONTIGS_ONLY, "If specified, only reprocess regions that contain at least one contig containing an indel or splice (experimental)");
parser.accepts(USE_JUNCTIONS_AS_CONTIGS, "If specified, use junction permuations as contigs (Experimental - may use excessive memory and compute times)");
parser.accepts(NO_COMPLEX_INDELS_AT_READ_EDGE, "If specified, do not update alignments for reads that have a complex indel at the read edge. i.e. Do not allow alignments like: 90M10D10I");
}

return parser;
Expand Down Expand Up @@ -456,4 +458,8 @@ public int getCompressionLevel() {
public boolean shouldUseJunctionsAsContigs() {
return (Boolean) getOptions().has(USE_JUNCTIONS_AS_CONTIGS);
}

public boolean disallowComplexIndelsAtReadEdge() {
return (Boolean) getOptions().has(NO_COMPLEX_INDELS_AT_READ_EDGE);
}
}
32 changes: 31 additions & 1 deletion src/test/java/abra/CigarUtilsTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import static org.testng.Assert.assertEquals;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

Expand Down Expand Up @@ -230,4 +229,35 @@ public void testHasNDM() {
assertEquals(false, CigarUtils.hasNDN("100I"));
assertEquals(false, CigarUtils.hasNDN("50M200N5M300N5M"));
}

@Test (groups = "unit")
public void testStartsOrEndsWithComplexIndel() {
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("100M"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10D50M"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10I50M"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10I50M5S"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("10S100M10S"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("10H100M10H"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("10H100M1D1M1I"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I50M"));
assertEquals(false, CigarUtils.startsOrEndsWithComplexIndel("50M10I10D50M"));

assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10I10D"));
assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I"));
assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I5S"));
assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("50M10D10I5S5H"));
assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("10D10I50M"));
assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("5S10D10I50M"));
assertEquals(true, CigarUtils.startsOrEndsWithComplexIndel("5H10I10D50M"));


}

/*
public static void main(String[] args) {
TestNG testSuite = new TestNG();
testSuite.setTestClasses(new Class[] { CigarUtilsTest.class });
testSuite.run();
}
*/
}

0 comments on commit f4970af

Please sign in to comment.