github.com/vertgenlab/gonomics@v1.0.0/cmd/DEPRECATED/mouseRecon/mouseRecon.go (about)

     1  // Command Group: "Sequence Evolution & Reconstruction"
     2  
     3  // WARNING: This program is now deprecated. Please use 'reconstructSeq' as its replacement.
     4  // Returns maximum likelihood ancestral sequences from a Mouse-Rat-ChineseHamster-Squirrel multiple alignment in multiFa format
     5  package main
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"log"
    11  
    12  	"github.com/vertgenlab/gonomics/dna"
    13  	"github.com/vertgenlab/gonomics/exception"
    14  	"github.com/vertgenlab/gonomics/expandedTree"
    15  	"github.com/vertgenlab/gonomics/fasta"
    16  	"github.com/vertgenlab/gonomics/reconstruct"
    17  )
    18  
    19  func mouseReconMraMle(inFile string, outFile string, treeFileName string, probThreshold float64, nonBiasProbThreshold float64) {
    20  	var tree, mouseNode, ratNode, hamsterNode, squirrelNode, mraNode *expandedTree.ETree
    21  	var err error
    22  	var i int
    23  
    24  	tree, err = expandedTree.ReadTree(treeFileName, inFile)
    25  	exception.FatalOnErr(err)
    26  
    27  	//roll call
    28  	mouseNode = expandedTree.FindNodeName(tree, "mm10")
    29  	if mouseNode == nil {
    30  		log.Fatalf("Didn't find mm10 in the tree\n")
    31  	}
    32  	ratNode = expandedTree.FindNodeName(tree, "rn7")
    33  	if ratNode == nil {
    34  		log.Fatalf("Didn't find rn7 in the tree\n")
    35  	}
    36  	hamsterNode = expandedTree.FindNodeName(tree, "criGriChoV2")
    37  	if hamsterNode == nil {
    38  		log.Fatalf("Didn't find criGriChoV2 in the tree\n")
    39  	}
    40  	squirrelNode = expandedTree.FindNodeName(tree, "speTri2")
    41  	if squirrelNode == nil {
    42  		log.Fatalf("Didn't find speTri2 in the tree\n")
    43  	}
    44  	mraNode = expandedTree.FindNodeName(tree, "mra")
    45  	if mraNode == nil {
    46  		log.Fatalf("Didn't find mra in the tree\n")
    47  	}
    48  
    49  	for i = range mouseNode.Fasta.Seq {
    50  		if mraIsPresent(mouseNode.Fasta.Seq[i], ratNode.Fasta.Seq[i], hamsterNode.Fasta.Seq[i], squirrelNode.Fasta.Seq[i]) {
    51  			reconMraBase(tree, mouseNode, mraNode, i, probThreshold, nonBiasProbThreshold)
    52  		} else {
    53  			mraNode.Fasta.Seq = append(mraNode.Fasta.Seq, dna.Gap)
    54  		}
    55  	}
    56  	fasta.Write(outFile, []fasta.Fasta{*mouseNode.Fasta, *ratNode.Fasta, *hamsterNode.Fasta, *squirrelNode.Fasta, *mraNode.Fasta})
    57  }
    58  
    59  func mraIsPresent(mouse, rat, hamster, squirrel dna.Base) bool {
    60  	if baseIsPresent(mouse) && baseIsPresent(rat) {
    61  		return true
    62  	}
    63  	if (baseIsPresent(mouse) || baseIsPresent(rat)) && (baseIsPresent(hamster) || baseIsPresent(squirrel)) {
    64  		return true
    65  	}
    66  	return false
    67  }
    68  
    69  func baseIsPresent(b dna.Base) bool {
    70  	if dna.DefineBase(b) || b == dna.N {
    71  		return true
    72  	}
    73  	return false
    74  }
    75  
    76  func reconMraBase(root, mouseNode, nodeToRecon *expandedTree.ETree, position int, probThreshold float64, nonBiasProbThreshold float64) {
    77  	var likelihoods []float64
    78  	var nextBase dna.Base
    79  	reconstruct.SetState(root, position)
    80  	likelihoods = reconstruct.FixFc(root, nodeToRecon)
    81  	nextBase = likelihoodToBaseBias(likelihoods, mouseNode.Fasta.Seq[position], probThreshold, nonBiasProbThreshold)
    82  	nodeToRecon.Fasta.Seq = append(nodeToRecon.Fasta.Seq, nextBase)
    83  }
    84  
    85  func likelihoodToBaseBias(likes []float64, biasBase dna.Base, probThreshold float64, nonBiasProbThreshold float64) dna.Base {
    86  	var total, nonBiasTotal, bestProb float64
    87  	var i int
    88  	var answer dna.Base
    89  
    90  	if biasBase == dna.Gap {
    91  		answer = dna.N
    92  	} else {
    93  		answer = biasBase
    94  	}
    95  
    96  	for i = range likes {
    97  		total += likes[i]
    98  		if i != int(biasBase) {
    99  			nonBiasTotal += likes[i]
   100  		}
   101  	}
   102  
   103  	for i = range likes {
   104  		if likes[i]/total >= probThreshold && likes[i] > bestProb && nonBiasTotal/total >= nonBiasProbThreshold {
   105  			bestProb = likes[i]
   106  			answer = dna.Base(i)
   107  		}
   108  	}
   109  	return answer
   110  }
   111  
   112  func usage() {
   113  	fmt.Print(
   114  		"mouseRecon - Returns maximum likelihood ancestral sequences from a Mouse-Rat-ChineseHamster-Squirrel multiple alignment in multiFa format.\n" +
   115  			"Usage:\n" +
   116  			"mouseRecon input.fa output.fa\n" +
   117  			"options:\n")
   118  	flag.PrintDefaults()
   119  }
   120  
   121  func main() {
   122  	var expectedNumArgs int = 2
   123  	var tree *string = flag.String("mleTree", "", "Filename for newick tree with branch lengths. Must have the anticipated assembly names and the desired ancestral node labeled.")
   124  	var probThreshold *float64 = flag.Float64("probThreshold", 0.0, "The minimum probability assigned to a base required for it to be called.")
   125  	var nonBiasProbThreshold *float64 = flag.Float64("nonBiasProbThreshold", 0.0, "The summation of all base probabilities (other than the biased species) must pass this threshold for a non-biased species base to be considered in the ancestral state.")
   126  
   127  	flag.Usage = usage
   128  	log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
   129  	flag.Parse()
   130  
   131  	if len(flag.Args()) != expectedNumArgs {
   132  		flag.Usage()
   133  		log.Fatalf("Error: expecting %d arguments, but got %d\n",
   134  			expectedNumArgs, len(flag.Args()))
   135  	}
   136  
   137  	inFile := flag.Arg(0)
   138  	outFile := flag.Arg(1)
   139  
   140  	mouseReconMraMle(inFile, outFile, *tree, *probThreshold, *nonBiasProbThreshold)
   141  }