diff --git a/acr/acr_test.go b/acr/acr_test.go index 5aaed93..e044c56 100644 --- a/acr/acr_test.go +++ b/acr/acr_test.go @@ -84,18 +84,18 @@ func TestACRDELTRANParsimony(t *testing.T) { testCheckStates(t, 2, ni, "t3", states, stateIndices, "B") testCheckStates(t, 2, ni, "t4", states, stateIndices, "B") testCheckStates(t, 2, ni, "t5", states, stateIndices, "A") - testCheckStates(t, 2, ni, "t6", states, stateIndices, "A", "B") - testCheckStates(t, 2, ni, "t7", states, stateIndices, "A", "B") + testCheckStates(t, 2, ni, "t6", states, stateIndices, "B") + testCheckStates(t, 2, ni, "t7", states, stateIndices, "B") testCheckStates(t, 2, ni, "t8", states, stateIndices, "B") testCheckStates(t, 2, ni, "t9", states, stateIndices, "B") testCheckStates(t, 2, ni, "t10", states, stateIndices, "A") - testCheckStates(t, 2, ni, "t11", states, stateIndices, "A", "B") + testCheckStates(t, 2, ni, "t11", states, stateIndices, "A") testCheckStates(t, 2, ni, "t12", states, stateIndices, "A") testCheckStates(t, 2, ni, "t13", states, stateIndices, "A") testCheckStates(t, 2, ni, "t14", states, stateIndices, "A") testCheckStates(t, 2, ni, "t15", states, stateIndices, "A") testCheckStates(t, 2, ni, "t16", states, stateIndices, "A") - testCheckStates(t, 2, ni, "t17", states, stateIndices, "A", "B") + testCheckStates(t, 2, ni, "t17", states, stateIndices, "A") testCheckStates(t, 2, ni, "t18", states, stateIndices, "A", "B") testCheckStates(t, 2, ni, "t19", states, stateIndices, "A", "B") testCheckStates(t, 2, ni, "t20", states, stateIndices, "A") @@ -108,8 +108,8 @@ func TestACRDELTRANParsimony(t *testing.T) { testCheckStates(t, 2, ni, "t3", states, stateIndices, "B") testCheckStates(t, 2, ni, "t4", states, stateIndices, "B") testCheckStates(t, 2, ni, "t5", states, stateIndices, "A") - testCheckStates(t, 2, ni, "t6", states, stateIndices, "A") - testCheckStates(t, 2, ni, "t7", states, stateIndices, "A") + testCheckStates(t, 2, ni, "t6", states, stateIndices, "B") + testCheckStates(t, 2, ni, "t7", states, stateIndices, "B") testCheckStates(t, 2, ni, "t8", states, stateIndices, "B") testCheckStates(t, 2, ni, "t9", states, stateIndices, "B") testCheckStates(t, 2, ni, "t10", states, stateIndices, "A") @@ -257,8 +257,8 @@ func TestALLDELTRAN(t *testing.T) { if err != nil { t.Error(err) } - testCheckMap(t, "t6", statemap, "A") - testCheckMap(t, "t7", statemap, "A") + testCheckMap(t, "t6", statemap, "B") + testCheckMap(t, "t7", statemap, "B") testCheckMap(t, "t11", statemap, "A") testCheckMap(t, "t14", statemap, "A") testCheckMap(t, "t16", statemap, "A") @@ -304,12 +304,12 @@ func TestALLDOWNPASS(t *testing.T) { if err != nil { t.Error(err) } - testCheckMap(t, "t6", statemap, "A", "B") - testCheckMap(t, "t7", statemap, "A", "B") - testCheckMap(t, "t11", statemap, "A", "B") + testCheckMap(t, "t6", statemap, "B") + testCheckMap(t, "t7", statemap, "B") + testCheckMap(t, "t11", statemap, "A") testCheckMap(t, "t14", statemap, "A") testCheckMap(t, "t16", statemap, "A") - testCheckMap(t, "t17", statemap, "A", "B") + testCheckMap(t, "t17", statemap, "A") testCheckMap(t, "t18", statemap, "A", "B") testCheckMap(t, "t19", statemap, "A", "B") testCheckMap(t, "t20", statemap, "A") diff --git a/acr/parsimony.go b/acr/parsimony.go index effd477..27c0acf 100644 --- a/acr/parsimony.go +++ b/acr/parsimony.go @@ -103,11 +103,13 @@ func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, stat // Else // State of cur node == Intersection of States of children if // works with trees having multifurcations + nchild := 0 for _, child := range cur.Neigh() { if child != prev { for k, c := range states[child.Id()] { states[cur.Id()][k] += c } + nchild++ } } // Now we set to 0 all character states that are present @@ -119,13 +121,13 @@ func parsimonyUPPASS(cur, prev *tree.Node, tipCharacters map[string]string, stat } } for k, c := range states[cur.Id()] { - if int(max) == (len(cur.Neigh())-1) && c == max { + if int(max) == nchild && c == max { // We have a characters shared by all neighbors wo parent: Intersection ok states[cur.Id()][k] = 1 - } else if max == 1 && c > 0 { + } else if int(max) == 1 && c > 0 { // Else we have no intersection between any children: take union states[cur.Id()][k] = 1 - } else if c > 1 { + } else if int(max) < nchild && c > 1 { // Else we have a character shared by at least 2 children: OK states[cur.Id()][k] = 1 } else { @@ -147,10 +149,12 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, states []AncestralState, stateIndic // and then we take character(s) with the maximum number of children state := make(AncestralState, len(stateIndices)) // With Parent + nchild := 0 for _, child := range cur.Neigh() { for k, c := range states[child.Id()] { state[k] += c } + nchild++ } // If intersection of states of children and parent is emtpy: @@ -163,13 +167,13 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, states []AncestralState, stateIndic } } for k, c := range state { - if int(max) == len(cur.Neigh()) && c == max { + if int(max) == nchild && c == max { // We have a characters shared by all neighbors and parent: Intersection ok states[cur.Id()][k] = 1 - } else if max == 1 && c > 0 { + } else if int(max) == 1 && c > 0 { // Else we have no intersection between any children: take union states[cur.Id()][k] = 1 - } else if c > 1 { + } else if int(max) < nchild && c > 1 { // Else we have a character shared by at least 2 children: OK states[cur.Id()][k] = 1 } else { diff --git a/asr/parsimony.go b/asr/parsimony.go index 0424690..9dc9b5b 100644 --- a/asr/parsimony.go +++ b/asr/parsimony.go @@ -4,6 +4,7 @@ import ( "bytes" "errors" "fmt" + "math/rand" "github.com/fredericlemoine/goalign/align" "github.com/fredericlemoine/gotree/io" @@ -20,7 +21,7 @@ const ( // Computed using parsimony // Sequences will be located in the comment field of each node // at the first index -func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int) error { +func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int, randomResolve bool) error { var err error var nodes []*tree.Node = t.Nodes() var seqs []*AncestralSequence = make([]*AncestralSequence, len(nodes)) @@ -43,14 +44,16 @@ func ParsimonyAsr(t *tree.Tree, a align.Alignment, algo int) error { if err != nil { return err } - if (algo == ALGO_DOWNPASS) || (algo == ALGO_DELTRAN) { - parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex) - if algo == ALGO_DELTRAN { - parsimonyDELTRAN(t.Root(), nil, a, seqs, charToIndex) - } - } else if algo == ALGO_ACCTRAN { - parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex) - } else { + + switch algo { + case ALGO_DOWNPASS: + parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, randomResolve) + case ALGO_DELTRAN: + parsimonyDOWNPASS(t.Root(), nil, a, seqs, charToIndex, false) + parsimonyDELTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve) + case ALGO_ACCTRAN: + parsimonyACCTRAN(t.Root(), nil, a, seqs, charToIndex, randomResolve) + default: return fmt.Errorf("Parsimony algorithm %d unkown", algo) } @@ -89,12 +92,14 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS // and then we take character(s) with the maximum number of children // And that for each site of the alignment for j, ances := range seqs[cur.Id()].seq { + nchild := 0 for _, child := range cur.Neigh() { if child != prev { counts := seqs[child.Id()].seq[j].counts for k, c := range counts { ances.counts[k] += c } + nchild++ } } // Now we set to 0 all character states that are not the max, and to 1 the states that are the max @@ -105,9 +110,17 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS } } for k, c := range ances.counts { - if c == max { + if int(max) == nchild && c == max { + // We have a characters shared by all neighbors wo parent: Intersection ok + ances.counts[k] = 1 + } else if int(max) == 1 && c > 0 { + // Else we have no intersection between any children: take union + ances.counts[k] = 1 + } else if int(max) < nchild && c > 1 { + // Else we have a character shared by at least 2 children: OK ances.counts[k] = 1 } else { + // Else we do not take it ances.counts[k] = 0 } } @@ -117,7 +130,7 @@ func parsimonyUPPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralS } // Second step of the parsimony computatation: From root to tips -func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) { +func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) { // If it is not a tip and not the root if !cur.Tip() { if prev != nil { @@ -128,42 +141,58 @@ func parsimonyDOWNPASS(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestra for j, ances := range seqs[cur.Id()].seq { state := AncestralState{make([]float64, len(charToIndex))} // With Parent + nchild := 0 for _, child := range cur.Neigh() { counts := seqs[child.Id()].seq[j].counts for k, c := range counts { state.counts[k] += c } + nchild++ } - // Now we set to 0 all character states that are not the max, and to 1 the states that are the max - maxall := 0.0 - nbmaxall := 0 + + // If intersection of states of children and parent is emtpy: + // then State of cur node == Union of intersection of nodes 2 by 2 + // If state is still empty, then state of cur node is union of all states + max := 0.0 for _, c := range state.counts { - if c > maxall { - maxall = c - nbmaxall = 1 - } else if c == maxall { - nbmaxall++ + if c > max { + max = c } } for k, c := range state.counts { - if c == maxall { + if int(max) == nchild && c == max { + // We have a characters shared by all neighbors and parent: Intersection ok + ances.counts[k] = 1 + } else if int(max) == 1 && c > 0 { + // Else we have no intersection between any children: take union + ances.counts[k] = 1 + } else if int(max) < nchild && c > 1 { + // Else we have a character shared by at least 2 children: OK ances.counts[k] = 1 } else { + // Else we do not take it ances.counts[k] = 0 } } } } + + // We randomly resolve ambiguities + // Even for the root (outside if statement) + if randomResolve { + randomlyResolveNodeStates(cur, seqs) + } + for _, child := range cur.Neigh() { if child != prev { - parsimonyDOWNPASS(child, cur, a, seqs, charToIndex) + parsimonyDOWNPASS(child, cur, a, seqs, charToIndex, randomResolve) } } } } // Third step of the parsimony computation for resolving ambiguities -func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) { +func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) { // If it is not a tip if !cur.Tip() { // If it is not the root @@ -194,19 +223,31 @@ func parsimonyDELTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestral } } } + + // We resolve ambiguities if randomResolve + // Even for the root (outside if statement) + if randomResolve { + randomlyResolveNodeStates(cur, seqs) + } + // We go down in the tree for _, child := range cur.Neigh() { if child != prev { - parsimonyDELTRAN(child, cur, a, seqs, charToIndex) + parsimonyDELTRAN(child, cur, a, seqs, charToIndex, randomResolve) } } } } // Second step of the parsimony computation (instead of DOWNPASS) for resolving ambiguities -func parsimonyACCTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int) { +func parsimonyACCTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*AncestralSequence, charToIndex map[rune]int, randomResolve bool) { // If it is not a tip if !cur.Tip() { + // We resolve ambiguities if randomResolve + if randomResolve { + randomlyResolveNodeStates(cur, seqs) + } + // We Analyze each direct child for _, child := range cur.Neigh() { if child != prev { @@ -239,7 +280,35 @@ func parsimonyACCTRAN(cur, prev *tree.Node, a align.Alignment, seqs []*Ancestral // We go down in the tree for _, child := range cur.Neigh() { if child != prev { - parsimonyACCTRAN(child, cur, a, seqs, charToIndex) + parsimonyACCTRAN(child, cur, a, seqs, charToIndex, randomResolve) + } + } + } +} + +// Randomly resolve all sequences states of the node that are ambiguous +func randomlyResolveNodeStates(node *tree.Node, seqs []*AncestralSequence) { + for _, ances := range seqs[node.Id()].seq { + numstates := 0 + for _, c := range ances.counts { + if c >= 1 { + numstates++ + } + } + if numstates > 1 { + curstate := 0 + randstate := rand.Intn(numstates) + for k, c := range ances.counts { + if c >= 1 { + if curstate == randstate { + ances.counts[k] = 1 + } else { + ances.counts[k] = 0 + } + curstate++ + } else { + ances.counts[k] = 0 + } } } } diff --git a/cmd/acr.go b/cmd/acr.go index 93eca98..dde9427 100644 --- a/cmd/acr.go +++ b/cmd/acr.go @@ -32,7 +32,7 @@ Depending on the chosen algorithm, it will run: b) DOWN-PASS+DELTRAN or c) ACCTRAN -It works on multifurcated trees, by taking the most frequent state(s). +Should work on multifurcated trees. If --random-resolve is given then, during the last pass, each time a node with several possible states still exists, one state is chosen diff --git a/cmd/asr.go b/cmd/asr.go index 7648ed2..44089d5 100644 --- a/cmd/asr.go +++ b/cmd/asr.go @@ -4,7 +4,9 @@ import ( "bufio" "fmt" goio "io" + "math/rand" "strings" + "time" "github.com/fredericlemoine/goalign/align" "github.com/fredericlemoine/goalign/io/fasta" @@ -18,6 +20,7 @@ import ( var asralign string var asrphylip bool var asrinputstrict bool +var asrrandomresolve bool // Resolve ambiguities randomly in the downpass/deltran/acctran algo // asrCmd represents the asr command var asrCmd = &cobra.Command{ @@ -25,12 +28,18 @@ var asrCmd = &cobra.Command{ Short: "Reconstructs most parsimonious ancestral sequences", Long: `Reconstructs most parsimonious ancestral sequences. -It does 2 tree straversal: -1) One postorder -2) One preorder +Depending on the chosen algorithm, it will run: +1) UP-PASS and +2) Either + a) DOWN-PASS or + b) DOWN-PASS+DELTRAN or + c) ACCTRAN -Works on multifurcated trees, by taking the most frequent state(s). +Should work on multifurcated trees +If --random-resolve is given then, during the last pass, each time +a node with several possible states still exists, one state is chosen +randomly before going deeper in the tree. `, Run: func(cmd *cobra.Command, args []string) { var align align.Alignment @@ -38,6 +47,7 @@ Works on multifurcated trees, by taking the most frequent state(s). var r *bufio.Reader var err error var algo int + rand.Seed(seed) switch strings.ToLower(parsimonyAlgo) { case "acctran": @@ -75,7 +85,7 @@ Works on multifurcated trees, by taking the most frequent state(s). // Computing parsimony ASR and writing each trees f := openWriteFile(outtreefile) for t := range treechan { - err = asr.ParsimonyAsr(t.Tree, align, algo) + err = asr.ParsimonyAsr(t.Tree, align, algo, asrrandomresolve) if err != nil { io.ExitWithMessage(err) } @@ -93,4 +103,6 @@ func init() { asrCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree") asrCmd.PersistentFlags().StringVarP(&outtreefile, "output", "o", "stdout", "Output file") asrCmd.PersistentFlags().StringVar(&parsimonyAlgo, "algo", "acctran", "Parsimony algorithm for resolving ambiguities: acctran, deltran, or downpass") + asrCmd.PersistentFlags().BoolVar(&asrrandomresolve, "random-resolve", false, "Random resolve states when several possibilities in: acctran, deltran, or downpass") + asrCmd.Flags().Int64VarP(&seed, "seed", "s", time.Now().UTC().UnixNano(), "Initial Random Seed") }