From 493929311f18a7a745b9cf13d2d150e26560e4dd Mon Sep 17 00:00:00 2001 From: Frederic Lemoine Date: Wed, 14 Mar 2018 11:35:51 +0100 Subject: [PATCH] Corrected Parsimony --- acr/parsimony.go | 4 +- asr/parsimony.go | 141 +++++++++++++++++++++++++++-------------------- test.sh | 78 +++++++++++++++++++++++++- 3 files changed, 160 insertions(+), 63 deletions(-) diff --git a/acr/parsimony.go b/acr/parsimony.go index 6b54b6e..fd2703b 100644 --- a/acr/parsimony.go +++ b/acr/parsimony.go @@ -36,7 +36,6 @@ func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, rando var upstates []AncestralState = make([]AncestralState, len(nodes)) // Upside states of each node // Initialize indices of characters alphabet := make([]string, 0, 10) - sort.Strings(alphabet) seenState := make(map[string]bool) for _, state := range tipCharacters { if _, ok := seenState[state]; !ok { @@ -44,6 +43,7 @@ func ParsimonyAcr(t *tree.Tree, tipCharacters map[string]string, algo int, rando } seenState[state] = true } + sort.Strings(alphabet) stateIndices := AncestralStateIndices(alphabet) // We initialize all ancestral states @@ -170,8 +170,8 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, for k, c := range states[child.Id()] { state[k] += c } + nchild++ } - nchild++ } computeParsimony(state, states[cur.Id()], nchild) } diff --git a/asr/parsimony.go b/asr/parsimony.go index 43dc194..6bbcbef 100644 --- a/asr/parsimony.go +++ b/asr/parsimony.go @@ -25,6 +25,8 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) var err error var nodes []*tree.Node = t.Nodes() var seqs []*AncestralSequence = make([]*AncestralSequence, len(nodes)) + var upseqs []*AncestralSequence = make([]*AncestralSequence, len(nodes)) // Upside seqs of each node + // Initialize indices of characters var charToIndex map[rune]int = make(map[rune]int) for i, c := range a.AlphabetCharacters() { @@ -38,6 +40,9 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) if seqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil { return err } + if upseqs[i], err = NewAncestralSequence(a.Length(), len(a.AlphabetCharacters())); err != nil { + return err + } } err = parsimonyUPPASS(t.Root(), nil, a, seqs, charToIndex) @@ -47,9 +52,9 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) switch algo { case ALGO_DOWNPASS: - parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, randomResolve) + parsimonyDOWNPASS(t.Root(), nil, a, seqs, upseqs, charToIndex, randomResolve) case ALGO_DELTRAN: - parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, false) + parsimonyDOWNPASS(t.Root(), nil, a, seqs, upseqs, charToIndex, false) parsimonyDELTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve) case ALGO_ACCTRAN: parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve) @@ -103,79 +108,70 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS } } - // we take the state present in - // the largest number of children - max := 0.0 - for _, c := range ances.counts { - if c > max { - max = c - } - } - for k, c := range ances.counts { - if int(max) == nchild && c == max { - // We have a characters shared by all neighbors wo parent: Intersection ok - ances.counts[k] = 1 - } else if int(max) == 1 && c > 0 { - // Else we have no intersection between any children: take union - ances.counts[k] = 1 - } else if int(max) < nchild && c > 1 { - // Else we have a character shared by at least 2 children: OK - ances.counts[k] = 1 - } else { - // Else we do not take it - ances.counts[k] = 0 - } - } + computeParsimony(ances, ances, nchild) } } return nil } // Second step of the parsimony computatation: From root to tips -func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) { +func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, + seqs []*AncestralSequence, upseqs []*AncestralSequence, + charToIndex map[rune]int, randomResolve bool) { // If it is not a tip and not the root if !cur.Tip() { + // We compute the up sequence states for each children of + // the current node (may be the root) + // i.e. the parsimony from the upside of the tree + for _, child := range cur.Neigh() { + if child != prev { + for j, _ := range seqs[cur.Id()].seq { + state := AncestralState{make([]float64, len(charToIndex))} + nchild := 0 + // already computed up state of the current node + if prev != nil { // Not the root + nchild++ + for k, c := range upseqs[cur.Id()].seq[j].counts { + state.counts[k] += c + } + } + // already computed down states of children of current node + // except current child _child_ + for _, child2 := range cur.Neigh() { + if child2 != prev && child2 != child { + for k, c := range seqs[child2.Id()].seq[j].counts { + state.counts[k] += c + } + nchild++ + } + } + // Compute the up state now + computeParsimony(state, upseqs[child.Id()].seq[j], nchild) + } + } + } + if prev != nil { // As we are manipulating trees with multifurcations // For each character we count the number of children having it // and then we take character(s) with the maximum number of children // And that for each site of the alignment - for j, ances := range seqs[cur.Id()].seq { + for j, _ := range seqs[cur.Id()].seq { state := AncestralState{make([]float64, len(charToIndex))} - // With Parent - nchild := 0 - for _, child := range cur.Neigh() { - counts := seqs[child.Id()].seq[j].counts - for k, c := range counts { - state.counts[k] += c - } - nchild++ - } - - // If intersection of states of children and parent is emtpy: - // then State of cur node == Union of intersection of nodes 2 by 2 - // If state is still empty, then state of cur node is union of all states - max := 0.0 - for _, c := range state.counts { - if c > max { - max = c - } + // With Parent using its upseq + nchild := 1 + for k, c := range upseqs[cur.Id()].seq[j].counts { + state.counts[k] += c } - for k, c := range state.counts { - if int(max) == nchild && c == max { - // We have a characters shared by all neighbors and parent: Intersection ok - ances.counts[k] = 1 - } else if int(max) == 1 && c > 0 { - // Else we have no intersection between any children: take union - ances.counts[k] = 1 - } else if int(max) < nchild && c > 1 { - // Else we have a character shared by at least 2 children: OK - ances.counts[k] = 1 - } else { - // Else we do not take it - ances.counts[k] = 0 + for _, child := range cur.Neigh() { + if child != prev { + for k, c := range seqs[child.Id()].seq[j].counts { + state.counts[k] += c + } + nchild++ } } + computeParsimony(state, seqs[cur.Id()].seq[j], nchild) } } @@ -187,12 +183,39 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestra for _, child := range cur.Neigh() { if child != prev { - parsimonyDOWNPASS(child, cur, a, seqs, charToIndex, randomResolve) + parsimonyDOWNPASS(child, cur, a, seqs, upseqs, charToIndex, randomResolve) } } } } +func computeParsimony(neighborStates AncestralState, currentStates AncestralState, nchild int) { + // If intersection of states of children and parent is emtpy: + // then State of cur node == Union of intersection of nodes 2 by 2 + // If state is still empty, then state of cur node is union of all states + max := 0.0 + for _, c := range neighborStates.counts { + if c > max { + max = c + } + } + for k, c := range neighborStates.counts { + if int(max) == nchild && c == max { + // We have a characters shared by all neighbors and parent: Intersection ok + currentStates.counts[k] = 1 + } else if int(max) == 1 && c > 0 { + // Else we have no intersection between any children: take union + currentStates.counts[k] = 1 + } else if int(max) < nchild && c > 1 { + // Else we have a character shared by at least 2 children: OK + currentStates.counts[k] = 1 + } else { + // Else we do not take it + currentStates.counts[k] = 0 + } + } +} + // Third step of the parsimony computation for resolving ambiguities func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) { // If it is not a tip diff --git a/test.sh b/test.sh index 2e0cb49..02b1283 100755 --- a/test.sh +++ b/test.sh @@ -866,7 +866,7 @@ gotree reformat newick -i nexus -f nexus -o result diff expected result rm -f expected result nexus -echo "->gotree acr" +echo "->gotree acr acctran" cat > tmp_states.txt <gotree asr" +echo "->gotree acr downpass" +cat > tmp_states.txt < tmp_tree.txt < expected <gotree acr deltran" +cat > tmp_states.txt < tmp_tree.txt < expected <gotree asr acctran" cat > tmp_states.txt <gotree asr downpass" +cat > tmp_states.txt < tmp_tree.txt < expected <