forked from Aqcurate/Needleman-Wunsch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
NeedlemanWunsch.java
177 lines (155 loc) · 6.63 KB
/
NeedlemanWunsch.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import java.lang.Math;
import java.util.Arrays;
/**
* This class uses the Needleman-Wunsch algorithm
* to align nucleotide sequences.
*
* @author Andrew Quach
* @author Tamir Enkhjargal
*
* @version 1.0.0
*/
public class NeedlemanWunsch {
// Default scoring scheme for match, mismatch, and indel
public static final int START = 0;
public static final int MATCH = 1;
public static final int MISMATCH = -10;
public static final int INDEL = -1;
/**
* Generates solution matrix given 2 RNA strands.
* Uses the Needleman-Wunsch algorithm.
*
* @return the solution matrix
*/
public static int[][] findSolution(String strand1, String strand2) {
// Generate solution matrix based on lengths of both strands
// Let strand1 be the side strand
// Let strand2 be the top strand
int[][] solution = new int[strand1.length()+1][strand2.length()+1];
// Set the starting point to value of START
solution[0][0] = START;
// Fill in the top row. Moving to the right always adds the value of INDEL.
for (int i = 1; i < strand2.length()+1; i++) {
solution[0][i] = solution[0][i-1] + INDEL;
}
// Fill in the left column. Moving down always adds the value of INDEL.
for (int i = 1; i < strand1.length()+1; i++) {
solution[i][0] = solution[i-1][0] + INDEL;
}
// Fill in the rest of the matrix based on a few rules.
for (int i = 1; i < strand1.length()+1; i++) {
for (int j = 1; j < strand2.length()+1; j++) {
int matchValue;
// If the characters that correspond to the grid position are equal for both strands
// Set the matchValue to MATCH, else set the matchValue to MISMATCH
if (strand1.charAt(i-1) == strand2.charAt(j-1)) matchValue = MATCH;
else matchValue = MISMATCH;
// Set the value to the maximum of these three values
// Position to the left + INDEL
// Position above + INDEL
// Position top-left + matchVALUE
solution[i][j] = max(solution[i][j-1] + INDEL, solution[i-1][j] + INDEL, solution[i-1][j-1] + matchValue);
}
}
// Return solution matrix
return solution;
}
/**
* Helper method for calculating a maximum of three numbers.
*
* @return the maximum of the three given integers
*/
private static int max(int a, int b, int c) {
return Math.max(Math.max(a, b), c);
}
/**
* Aligns RNA strands based off a solution matrix.
* Finds one of the 'best' paths in the solution matrix.
* Uses the 'best' path to generate aligned RNA strands.
* This method does so non-recursively.
*
* @return the two aligned RNA strands
*/
public static String[] findPath(int[][] solution, String strand1, String strand2) {
// Let strand1 be the side strand
// Let strand2 be the top strand
String alignedStrand1 = "";
String alignedStrand2 = "";
// Start from the bottom right of the solution matrix
int i = solution.length - 1;
int j = solution[0].length - 1;
int best;
// While you are not at the top/left side of the matrix
// This prevents an OOB exception
while (i != 0 && j != 0) {
// Calculate the best path to the current position
// Check position to the left, above, and top-left
best = max(solution[i][j-1], solution[i-1][j], solution[i-1][j-1]);
// If the top-left position is the best
if (solution[i-1][j-1] == best) {
// Add the character corresponding to that position to both strands
// This is the case for either a match or mismatch
alignedStrand1 = strand1.charAt(i-1) + alignedStrand1;
alignedStrand2 = strand2.charAt(j-1) + alignedStrand2;
// Move to the new position
i -= 1;
j -= 1;
// If the left position is the best
} else if (solution[i][j-1] == best) {
// Add '-' to strand1
// Add the character correponding to that position to strand2
// This represents a gap in the side strand
alignedStrand1 = "-" + alignedStrand1;
alignedStrand2 = strand2.charAt(j-1) + alignedStrand2;
// Move to the new position
j -= 1;
// If the above position is the best
} else {
// Add '-' to strand2
// Add the character corresponding to that position to strand1
// This represents a gap in the top strand
alignedStrand1 = strand1.charAt(i-1) + alignedStrand1;
alignedStrand2 = "-" + alignedStrand2;
// Move to the new position
i -= 1;
}
}
// If you are at the top of the matrix
if (i == 0) {
// Append "-" for every space you are away from 0,0 to strand2
// EX: If you are at 0,3 (j = 3), add "---" to strand2
for (int k = 0; k < j; k++) {
alignedStrand1 = "-" + alignedStrand1;
alignedStrand2 = strand2.charAt(j-1) + alignedStrand2;
}
// If you are at the left most side of the matrix
} else {
// Append "-" for every space you are away from 0,0 to strand1
// EX: If you are at 3,0 (i = 3), add "---" to strand1
for (int k = 0; k < i; k++) {
alignedStrand1 = strand1.charAt(i-1) + alignedStrand1;
alignedStrand2 = "-" + alignedStrand2;
}
}
return new String[] {alignedStrand1, alignedStrand2};
}
/**
* Method that abstracts away findSolution and findPath.
* Prints out the aligned strands and alignment score.
*/
public static void alignStrands(String strand1, String strand2) {
int[][] solution = findSolution(strand1, strand2);
int score = solution[solution.length-1][solution[0].length-1];
String[] alignedStrands = findPath(solution, strand1, strand2);
System.out.println(alignedStrands[0]);
System.out.println(alignedStrands[1]);
System.out.println("The score for this alignment is: " + score);
}
public static void main(String[] args) {
alignStrands("ABC", "CAB");
// System.out.println();
// alignStrands("GATTACA", "GCATGCU");
// System.out.println();
// alignStrands("UGAC", "UUGAGC");
}
}