Skip to content

Commit

Permalink
Corrected acr+asr
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Mar 12, 2018
1 parent 385c2aa commit 27b9cbe
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 49 deletions.
24 changes: 12 additions & 12 deletions acr/acr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,18 @@ func TestACRDELTRANParsimony(t *testing.T) {
testCheckStates(t, 2, ni, "t3", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t4", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t5", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t6", states, stateIndices, "A", "B")
testCheckStates(t, 2, ni, "t7", states, stateIndices, "A", "B")
testCheckStates(t, 2, ni, "t6", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t7", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t8", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t9", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t10", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t11", states, stateIndices, "A", "B")
testCheckStates(t, 2, ni, "t11", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t12", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t13", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t14", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t15", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t16", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t17", states, stateIndices, "A", "B")
testCheckStates(t, 2, ni, "t17", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t18", states, stateIndices, "A", "B")
testCheckStates(t, 2, ni, "t19", states, stateIndices, "A", "B")
testCheckStates(t, 2, ni, "t20", states, stateIndices, "A")
Expand All @@ -108,8 +108,8 @@ func TestACRDELTRANParsimony(t *testing.T) {
testCheckStates(t, 2, ni, "t3", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t4", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t5", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t6", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t7", states, stateIndices, "A")
testCheckStates(t, 2, ni, "t6", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t7", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t8", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t9", states, stateIndices, "B")
testCheckStates(t, 2, ni, "t10", states, stateIndices, "A")
Expand Down Expand Up @@ -257,8 +257,8 @@ func TestALLDELTRAN(t *testing.T) {
if err != nil {
t.Error(err)
}
testCheckMap(t, "t6", statemap, "A")
testCheckMap(t, "t7", statemap, "A")
testCheckMap(t, "t6", statemap, "B")
testCheckMap(t, "t7", statemap, "B")
testCheckMap(t, "t11", statemap, "A")
testCheckMap(t, "t14", statemap, "A")
testCheckMap(t, "t16", statemap, "A")
Expand Down Expand Up @@ -304,12 +304,12 @@ func TestALLDOWNPASS(t *testing.T) {
if err != nil {
t.Error(err)
}
testCheckMap(t, "t6", statemap, "A", "B")
testCheckMap(t, "t7", statemap, "A", "B")
testCheckMap(t, "t11", statemap, "A", "B")
testCheckMap(t, "t6", statemap, "B")
testCheckMap(t, "t7", statemap, "B")
testCheckMap(t, "t11", statemap, "A")
testCheckMap(t, "t14", statemap, "A")
testCheckMap(t, "t16", statemap, "A")
testCheckMap(t, "t17", statemap, "A", "B")
testCheckMap(t, "t17", statemap, "A")
testCheckMap(t, "t18", statemap, "A", "B")
testCheckMap(t, "t19", statemap, "A", "B")
testCheckMap(t, "t20", statemap, "A")
Expand Down
16 changes: 10 additions & 6 deletions acr/parsimony.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,13 @@ func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, stat
// Else
// State of cur node == Intersection of States of children if
// works with trees having multifurcations
nchild := 0
for _, child := range cur.Neigh() {
if child != prev {
for k, c := range states[child.Id()] {
states[cur.Id()][k] += c
}
nchild++
}
}
// Now we set to 0 all character states that are present
Expand All @@ -119,13 +121,13 @@ func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, stat
}
}
for k, c := range states[cur.Id()] {
if int(max) == (len(cur.Neigh())-1) && c == max {
if int(max) == nchild && c == max {
// We have a characters shared by all neighbors wo parent: Intersection ok
states[cur.Id()][k] = 1
} else if max == 1 && c > 0 {
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
states[cur.Id()][k] = 1
} else if c > 1 {
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
states[cur.Id()][k] = 1
} else {
Expand All @@ -147,10 +149,12 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, states []AncestralState, stateIndic
// and then we take character(s) with the maximum number of children
state := make(AncestralState, len(stateIndices))
// With Parent
nchild := 0
for _, child := range cur.Neigh() {
for k, c := range states[child.Id()] {
state[k] += c
}
nchild++
}

// If intersection of states of children and parent is emtpy:
Expand All @@ -163,13 +167,13 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, states []AncestralState, stateIndic
}
}
for k, c := range state {
if int(max) == len(cur.Neigh()) && c == max {
if int(max) == nchild && c == max {
// We have a characters shared by all neighbors and parent: Intersection ok
states[cur.Id()][k] = 1
} else if max == 1 && c > 0 {
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
states[cur.Id()][k] = 1
} else if c > 1 {
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
states[cur.Id()][k] = 1
} else {
Expand Down
119 changes: 94 additions & 25 deletions asr/parsimony.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"errors"
"fmt"
"math/rand"

"github.com/fredericlemoine/goalign/align"
"github.com/fredericlemoine/gotree/io"
Expand All @@ -20,7 +21,7 @@ const (
// Computed using parsimony
// Sequences will be located in the comment field of each node
// at the first index
func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int) error {
func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) error {
var err error
var nodes []*tree.Node = t.Nodes()
var seqs []*AncestralSequence = make([]*AncestralSequence, len(nodes))
Expand All @@ -43,14 +44,16 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int) error {
if err != nil {
return err
}
if (algo == ALGO_DOWNPASS) || (algo == ALGO_DELTRAN) {
parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex)
if algo == ALGO_DELTRAN {
parsimonyDELTRAN(t.Root(), nil, a, seqs, charToIndex)
}
} else if algo == ALGO_ACCTRAN {
parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex)
} else {

switch algo {
case ALGO_DOWNPASS:
parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, randomResolve)
case ALGO_DELTRAN:
parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, false)
parsimonyDELTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve)
case ALGO_ACCTRAN:
parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve)
default:
return fmt.Errorf("Parsimony algorithm %d unkown", algo)
}

Expand Down Expand Up @@ -89,12 +92,14 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS
// and then we take character(s) with the maximum number of children
// And that for each site of the alignment
for j, ances := range seqs[cur.Id()].seq {
nchild := 0
for _, child := range cur.Neigh() {
if child != prev {
counts := seqs[child.Id()].seq[j].counts
for k, c := range counts {
ances.counts[k] += c
}
nchild++
}
}
// Now we set to 0 all character states that are not the max, and to 1 the states that are the max
Expand All @@ -105,9 +110,17 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS
}
}
for k, c := range ances.counts {
if c == max {
if int(max) == nchild && c == max {
// We have a characters shared by all neighbors wo parent: Intersection ok
ances.counts[k] = 1
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
ances.counts[k] = 1
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
ances.counts[k] = 1
} else {
// Else we do not take it
ances.counts[k] = 0
}
}
Expand All @@ -117,7 +130,7 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS
}

// Second step of the parsimony computatation: From root to tips
func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) {
func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) {
// If it is not a tip and not the root
if !cur.Tip() {
if prev != nil {
Expand All @@ -128,42 +141,58 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestra
for j, ances := range seqs[cur.Id()].seq {
state := AncestralState{make([]float64, len(charToIndex))}
// With Parent
nchild := 0
for _, child := range cur.Neigh() {
counts := seqs[child.Id()].seq[j].counts
for k, c := range counts {
state.counts[k] += c
}
nchild++
}
// Now we set to 0 all character states that are not the max, and to 1 the states that are the max
maxall := 0.0
nbmaxall := 0

// If intersection of states of children and parent is emtpy:
// then State of cur node == Union of intersection of nodes 2 by 2
// If state is still empty, then state of cur node is union of all states
max := 0.0
for _, c := range state.counts {
if c > maxall {
maxall = c
nbmaxall = 1
} else if c == maxall {
nbmaxall++
if c > max {
max = c
}
}
for k, c := range state.counts {
if c == maxall {
if int(max) == nchild && c == max {
// We have a characters shared by all neighbors and parent: Intersection ok
ances.counts[k] = 1
} else if int(max) == 1 && c > 0 {
// Else we have no intersection between any children: take union
ances.counts[k] = 1
} else if int(max) < nchild && c > 1 {
// Else we have a character shared by at least 2 children: OK
ances.counts[k] = 1
} else {
// Else we do not take it
ances.counts[k] = 0
}
}
}
}

// We randomly resolve ambiguities
// Even for the root (outside if statement)
if randomResolve {
randomlyResolveNodeStates(cur, seqs)
}

for _, child := range cur.Neigh() {
if child != prev {
parsimonyDOWNPASS(child, cur, a, seqs, charToIndex)
parsimonyDOWNPASS(child, cur, a, seqs, charToIndex, randomResolve)
}
}
}
}

// Third step of the parsimony computation for resolving ambiguities
func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) {
func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) {
// If it is not a tip
if !cur.Tip() {
// If it is not the root
Expand Down Expand Up @@ -194,19 +223,31 @@ func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestral
}
}
}

// We resolve ambiguities if randomResolve
// Even for the root (outside if statement)
if randomResolve {
randomlyResolveNodeStates(cur, seqs)
}

// We go down in the tree
for _, child := range cur.Neigh() {
if child != prev {
parsimonyDELTRAN(child, cur, a, seqs, charToIndex)
parsimonyDELTRAN(child, cur, a, seqs, charToIndex, randomResolve)
}
}
}
}

// Second step of the parsimony computation (instead of DOWNPASS) for resolving ambiguities
func parsimonyACCTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) {
func parsimonyACCTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) {
// If it is not a tip
if !cur.Tip() {
// We resolve ambiguities if randomResolve
if randomResolve {
randomlyResolveNodeStates(cur, seqs)
}

// We Analyze each direct child
for _, child := range cur.Neigh() {
if child != prev {
Expand Down Expand Up @@ -239,7 +280,35 @@ func parsimonyACCTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestral
// We go down in the tree
for _, child := range cur.Neigh() {
if child != prev {
parsimonyACCTRAN(child, cur, a, seqs, charToIndex)
parsimonyACCTRAN(child, cur, a, seqs, charToIndex, randomResolve)
}
}
}
}

// Randomly resolve all sequences states of the node that are ambiguous
func randomlyResolveNodeStates(node *tree.Node, seqs []*AncestralSequence) {
for _, ances := range seqs[node.Id()].seq {
numstates := 0
for _, c := range ances.counts {
if c >= 1 {
numstates++
}
}
if numstates > 1 {
curstate := 0
randstate := rand.Intn(numstates)
for k, c := range ances.counts {
if c >= 1 {
if curstate == randstate {
ances.counts[k] = 1
} else {
ances.counts[k] = 0
}
curstate++
} else {
ances.counts[k] = 0
}
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/acr.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Depending on the chosen algorithm, it will run:
b) DOWN-PASS+DELTRAN or
c) ACCTRAN
It works on multifurcated trees, by taking the most frequent state(s).
Should work on multifurcated trees.
If --random-resolve is given then, during the last pass, each time
a node with several possible states still exists, one state is chosen
Expand Down
Loading

0 comments on commit 27b9cbe

Please sign in to comment.