Skip to content

Commit

Permalink
Corrected Parsimony
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Mar 14, 2018
1 parent defe2ad commit 4939293
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 63 deletions.
4 changes: 2 additions & 2 deletions acr/parsimony.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,14 @@ func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, rando
var upstates []AncestralState = make([]AncestralState, len(nodes)) // Upside states of each node
// Initialize indices of characters
alphabet := make([]string, 0, 10)
sort.Strings(alphabet)
seenState := make(map[string]bool)
for _, state := range tipCharacters {
if _, ok := seenState[state]; !ok {
alphabet = append(alphabet, state)
}
seenState[state] = true
}
sort.Strings(alphabet)
stateIndices := AncestralStateIndices(alphabet)

// We initialize all ancestral states
Expand Down Expand Up @@ -170,8 +170,8 @@ func parsimonyDOWNPASS(cur, prev *tree.Node,
for k, c := range states[child.Id()] {
state[k] += c
}
nchild++
}
nchild++
}
computeParsimony(state, states[cur.Id()], nchild)
}
Expand Down
141 changes: 82 additions & 59 deletions asr/parsimony.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool)
var err error
var nodes []*tree.Node = t.Nodes()
var seqs []*AncestralSequence = make([]*AncestralSequence, len(nodes))
var upseqs []*AncestralSequence = make([]*AncestralSequence, len(nodes)) // Upside seqs of each node

// Initialize indices of characters
var charToIndex map[rune]int = make(map[rune]int)
for i, c := range a.AlphabetCharacters() {
Expand All @@ -38,6 +40,9 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool)
if seqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil {
return err
}
if upseqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil {
return err
}
}

err = parsimonyUPPASS(t.Root(), nil, a, seqs, charToIndex)
Expand All @@ -47,9 +52,9 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool)

switch algo {
case ALGO_DOWNPASS:
parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, randomResolve)
parsimonyDOWNPASS(t.Root(), nil, a, seqs, upseqs, charToIndex, randomResolve)
case ALGO_DELTRAN:
parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, false)
parsimonyDOWNPASS(t.Root(), nil, a, seqs, upseqs, charToIndex, false)
parsimonyDELTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve)
case ALGO_ACCTRAN:
parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve)
Expand Down Expand Up @@ -103,79 +108,70 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS
}
}

// we take the state present in
// the largest number of children
max := 0.0
for _, c := range ances.counts {
if c > max {
max = c
}
}
for k, c := range ances.counts {
if int(max) == nchild && c == max {
// We have a characters shared by all neighbors wo parent: Intersection ok
ances.counts[k] = 1
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
ances.counts[k] = 1
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
ances.counts[k] = 1
} else {
// Else we do not take it
ances.counts[k] = 0
}
}
computeParsimony(ances, ances, nchild)
}
}
return nil
}

// Second step of the parsimony computatation: From root to tips
func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) {
func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment,
seqs []*AncestralSequence, upseqs []*AncestralSequence,
charToIndex map[rune]int, randomResolve bool) {
// If it is not a tip and not the root
if !cur.Tip() {
// We compute the up sequence states for each children of
// the current node (may be the root)
// i.e. the parsimony from the upside of the tree
for _, child := range cur.Neigh() {
if child != prev {
for j, _ := range seqs[cur.Id()].seq {
state := AncestralState{make([]float64, len(charToIndex))}
nchild := 0
// already computed up state of the current node
if prev != nil { // Not the root
nchild++
for k, c := range upseqs[cur.Id()].seq[j].counts {
state.counts[k] += c
}
}
// already computed down states of children of current node
// except current child _child_
for _, child2 := range cur.Neigh() {
if child2 != prev && child2 != child {
for k, c := range seqs[child2.Id()].seq[j].counts {
state.counts[k] += c
}
nchild++
}
}
// Compute the up state now
computeParsimony(state, upseqs[child.Id()].seq[j], nchild)
}
}
}

if prev != nil {
// As we are manipulating trees with multifurcations
// For each character we count the number of children having it
// and then we take character(s) with the maximum number of children
// And that for each site of the alignment
for j, ances := range seqs[cur.Id()].seq {
for j, _ := range seqs[cur.Id()].seq {
state := AncestralState{make([]float64, len(charToIndex))}
// With Parent
nchild := 0
for _, child := range cur.Neigh() {
counts := seqs[child.Id()].seq[j].counts
for k, c := range counts {
state.counts[k] += c
}
nchild++
}

// If intersection of states of children and parent is emtpy:
// then State of cur node == Union of intersection of nodes 2 by 2
// If state is still empty, then state of cur node is union of all states
max := 0.0
for _, c := range state.counts {
if c > max {
max = c
}
// With Parent using its upseq
nchild := 1
for k, c := range upseqs[cur.Id()].seq[j].counts {
state.counts[k] += c
}
for k, c := range state.counts {
if int(max) == nchild && c == max {
// We have a characters shared by all neighbors and parent: Intersection ok
ances.counts[k] = 1
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
ances.counts[k] = 1
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
ances.counts[k] = 1
} else {
// Else we do not take it
ances.counts[k] = 0
for _, child := range cur.Neigh() {
if child != prev {
for k, c := range seqs[child.Id()].seq[j].counts {
state.counts[k] += c
}
nchild++
}
}
computeParsimony(state, seqs[cur.Id()].seq[j], nchild)
}
}

Expand All @@ -187,12 +183,39 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestra

for _, child := range cur.Neigh() {
if child != prev {
parsimonyDOWNPASS(child, cur, a, seqs, charToIndex, randomResolve)
parsimonyDOWNPASS(child, cur, a, seqs, upseqs, charToIndex, randomResolve)
}
}
}
}

func computeParsimony(neighborStates AncestralState, currentStates AncestralState, nchild int) {
// If intersection of states of children and parent is emtpy:
// then State of cur node == Union of intersection of nodes 2 by 2
// If state is still empty, then state of cur node is union of all states
max := 0.0
for _, c := range neighborStates.counts {
if c > max {
max = c
}
}
for k, c := range neighborStates.counts {
if int(max) == nchild && c == max {
// We have a characters shared by all neighbors and parent: Intersection ok
currentStates.counts[k] = 1
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
currentStates.counts[k] = 1
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
currentStates.counts[k] = 1
} else {
// Else we do not take it
currentStates.counts[k] = 0
}
}
}

// Third step of the parsimony computation for resolving ambiguities
func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) {
// If it is not a tip
Expand Down
78 changes: 76 additions & 2 deletions test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ gotree reformat newick -i nexus -f nexus -o result
diff expected result
rm -f expected result nexus

echo "->gotree acr"
echo "->gotree acr acctran"
cat > tmp_states.txt <<EOF
1,A
2,A
Expand All @@ -890,7 +890,56 @@ gotree acr -i tmp_tree.txt --states tmp_states.txt --algo acctran -o result
diff expected result
rm -f expected result tmp_tree.txt tmp_states.txt

echo "->gotree asr"
echo "->gotree acr downpass"
cat > tmp_states.txt <<EOF
t1,A
t2,A
t3,B
t4,B
t5,A
t6,B
t7,B
t8,A
t9,A
t10,A
t11,A
EOF
cat > tmp_tree.txt <<EOF
(t1,(t2,((t3,(t4,t5)),(t6,((t7,t8),((t9,t10),t11))))));
EOF
cat > expected <<EOF
(t1[A],(t2[A],((t3[B],(t4[B],t5[A])[A|B])[A|B],(t6[B],((t7[B],t8[A])[A|B],((t9[A],t10[A])[A],t11[A])[A])[A|B])[A|B])[A|B])[A])[A];
EOF
gotree acr -i tmp_tree.txt --states tmp_states.txt --algo downpass -o result
diff expected result
rm -f expected result tmp_tree.txt tmp_states.txt

echo "->gotree acr deltran"
cat > tmp_states.txt <<EOF
t1,A
t2,A
t3,B
t4,B
t5,A
t6,B
t7,B
t8,A
t9,A
t10,A
t11,A
EOF
cat > tmp_tree.txt <<EOF
(t1,(t2,((t3,(t4,t5)),(t6,((t7,t8),((t9,t10),t11))))));
EOF
cat > expected <<EOF
(t1[A],(t2[A],((t3[B],(t4[B],t5[A])[A])[A],(t6[B],((t7[B],t8[A])[A],((t9[A],t10[A])[A],t11[A])[A])[A])[A])[A])[A])[A];
EOF
gotree acr -i tmp_tree.txt --states tmp_states.txt --algo deltran -o result
diff expected result
rm -f expected result tmp_tree.txt tmp_states.txt


echo "->gotree asr acctran"
cat > tmp_states.txt <<EOF
11 2
1 AA
Expand All @@ -914,3 +963,28 @@ EOF
gotree asr -i tmp_tree.txt -p -a tmp_states.txt --algo acctran -o result
diff expected result
rm -f expected result tmp_tree.txt tmp_states.txt

echo "->gotree asr downpass"
cat > tmp_states.txt <<EOF
11 4
t1 AAAT
t2 AAAT
t3 CCCT
t4 CCCT
t5 AAAT
t6 CCCT
t7 CCCT
t8 AAAT
t9 AAAT
t10 AAAT
t11 AAAT
EOF
cat > tmp_tree.txt <<EOF
(t1,(t2,((t3,(t4,t5)),(t6,((t7,t8),((t9,t10),t11))))));
EOF
cat > expected <<EOF
(t1[AAAT],(t2[AAAT],((t3[CCCT],(t4[CCCT],t5[AAAT])[{AC}{AC}{AC}T])[{AC}{AC}{AC}T],(t6[CCCT],((t7[CCCT],t8[AAAT])[{AC}{AC}{AC}T],((t9[AAAT],t10[AAAT])[AAAT],t11[AAAT])[AAAT])[{AC}{AC}{AC}T])[{AC}{AC}{AC}T])[{AC}{AC}{AC}T])[AAAT])[AAAT];
EOF
gotree asr -i tmp_tree.txt -p -a tmp_states.txt --algo downpass -o result
diff expected result
rm -f expected result tmp_tree.txt tmp_states.txt

0 comments on commit 4939293

Please sign in to comment.