github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/genomeGraph_test.go (about)

     1  package genomeGraph
     2  
     3  import (
     4  	"log"
     5  	"sync"
     6  	"testing"
     7  	"time"
     8  
     9  	"github.com/vertgenlab/gonomics/fastq"
    10  	"github.com/vertgenlab/gonomics/fileio"
    11  	"github.com/vertgenlab/gonomics/giraf"
    12  )
    13  
    14  func TestWorkerWithWriting(t *testing.T) {
    15  	var output string = "testdata/pairedTest.giraf"
    16  	var tileSize int = 32
    17  	var stepSize int = 32
    18  	var numberOfReads int = 100
    19  	var readLength int = 150
    20  	var mutations int = 0
    21  	var workerWaiter, writerWaiter sync.WaitGroup
    22  	var numWorkers int = 8
    23  	var scoreMatrix = HumanChimpTwoScoreMatrix
    24  	genome := Read("testdata/bigGenome.sg")
    25  	log.Printf("Reading in the genome (simple graph)...\n")
    26  	log.Printf("Indexing the genome...\n")
    27  	log.Printf("Making fastq channel...\n")
    28  	fastqPipe := make(chan fastq.PairedEndBig, 824)
    29  
    30  	log.Printf("Making sam channel...\n")
    31  	samPipe := make(chan giraf.GirafPair, 824)
    32  
    33  	log.Printf("Simulating reads...\n")
    34  	simReads := RandomPairedReads(genome, readLength, numberOfReads, mutations)
    35  	//os.Remove("testdata/simReads_R1.fq")
    36  	//os.Remove("testdata/simReads_R2.fq")
    37  	fastq.WritePair("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", simReads)
    38  	tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize)
    39  	go fastq.ReadPairBigToChan("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", fastqPipe)
    40  	log.Printf("Finished Indexing Genome...\n")
    41  	start := time.Now()
    42  
    43  	log.Printf("Starting alignment worker...\n")
    44  	workerWaiter.Add(numWorkers)
    45  	for i := 0; i < numWorkers; i++ {
    46  		go RoutineFqPairToGiraf(genome, tiles, tileSize, stepSize, scoreMatrix, fastqPipe, samPipe, &workerWaiter)
    47  	}
    48  	go giraf.GirafPairChanToFile(output, samPipe, &writerWaiter)
    49  	writerWaiter.Add(1)
    50  	workerWaiter.Wait()
    51  	close(samPipe)
    52  	log.Printf("Aligners finished and channel closed\n")
    53  	writerWaiter.Wait()
    54  	log.Printf("Sam writer finished and we are all done\n")
    55  	stop := time.Now()
    56  	duration := stop.Sub(start)
    57  	log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", len(simReads)*2, duration, float64(len(simReads)*2)/duration.Seconds())
    58  	fileio.EasyRemove("testdata/simReads_R1.fq")
    59  	fileio.EasyRemove("testdata/simReads_R2.fq")
    60  	fileio.EasyRemove("testdata/pairedTest.giraf")
    61  }
    62  
    63  /*
    64  func TestHippoAln(t *testing.T) {
    65  	var hippo *fastq.Fastq = &fastq.Fastq{Name: "hippoOne", Seq: dna.StringToBases("TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGAGTGATTTGAAGGTACATGGAATACCACCACGGGAGCAAAGC"), Qual: fastq.ToQualUint8([]rune("JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ"))}
    66  	var tileSize int = 32
    67  	var stepSize int = tileSize - 1
    68  	var alignment *sam.SamAln = nil
    69  	var dummyWaiter sync.WaitGroup
    70  	var scoreMatrix = HumanChimpTwoScoreMatrix
    71  	log.Printf("Reading in the genome (simple graph)...\n")
    72  	genome := Read("testdata/bigGenome.sg")
    73  
    74  	log.Printf("Indexing the genome...\n")
    75  	tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize)
    76  
    77  	log.Printf("Making fastq channel...\n")
    78  	fastqPipe := make(chan *fastq.FastqBig, 1)
    79  
    80  	log.Printf("Making sam channel...\n")
    81  	samPipe := make(chan *giraf.Giraf, 1)
    82  
    83  	log.Printf("Starting alignment worker...\n")
    84  	go RoutineFqToGiraf(genome, tiles, scoreMatrix, tileSize, stepSize, fastqPipe, samPipe, &dummyWaiter)
    85  
    86  	log.Printf("Waiting for 5 seconds and then aligning read...\n")
    87  	time.Sleep(5 * time.Second)
    88  
    89  	start := time.Now()
    90  	fastqPipe <- hippo
    91  	alignment = <-samPipe
    92  	end := time.Now()
    93  	duration := end.Sub(start)
    94  	log.Printf("duration:%s\t%s\n", duration, dna.BasesToString(alignment.Seq))
    95  }*/
    96  
    97  /*func TestReadsWithTiming(t *testing.T) {
    98  	//var hippo *fastq.Fastq = &fastq.Fastq{Name: "hippoOne", Seq: dna.StringToBases("ACCTTTTTCTTGTTGTATTTAAAGACAAATGATTTGATTTTATATAGCCAAATGGTTTTCAACGCTAGCAGTGTTTGGTGGCAACTCAGTTTCACCCACGTCTGTTCCAACTAACATGCAATATGTTTCCTGTAATCTGCAGCACGCTTT"), Qual: []rune("JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ")}
    99  	var tileSize int = 32
   100  	var stepSize int = 31
   101  	var numberOfReads int = 1
   102  	var readLength int = 150
   103  	var mutations int = 2
   104  	var dummyWaiter sync.WaitGroup
   105  
   106  	var fastestRead, slowestRead *fastq.Fastq = nil, nil
   107  	var fastestTime, slowestTime float64 = math.MaxFloat64, 0
   108  
   109  	log.Printf("Reading in the genome (simple graph)...\n")
   110  	fa, _ := Read("testdata/bigGenome.sg")
   111  	//TODO: this file does not exist in testdata
   112  	genome, _ := Read("testdata/rabsBepa.gg")
   113  	log.Printf("Simulating reads...\n")
   114  	simReads := RandomReads(fa.Nodes, readLength, numberOfReads, mutations)
   115  
   116  	log.Printf("Indexing the genome...\n")
   117  	tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize)
   118  
   119  	log.Printf("Making fastq channel...\n")
   120  	fastqPipe := make(chan *fastq.Fastq, numberOfReads)
   121  
   122  	log.Printf("Making sam channel...\n")
   123  	samPipe := make(chan *sam.SamAln, numberOfReads)
   124  
   125  	alignments := make([]*sam.SamAln, numberOfReads)
   126  
   127  	log.Printf("Starting alignment worker...\n")
   128  
   129  	go gswWorker(genome, tiles, tileSize, stepSize, fastqPipe, samPipe, &dummyWaiter)
   130  
   131  	log.Printf("Waiting for 5 seconds and then aligning reads...\n")
   132  	time.Sleep(5 * time.Second)
   133  
   134  	for i := 0; i < numberOfReads; i++ {
   135  
   136  		start := time.Now()
   137  		fastqPipe <- simReads[i]
   138  		alignments[i] = <-samPipe
   139  		stop := time.Now()
   140  		duration := stop.Sub(start).Seconds()
   141  		if duration > slowestTime {
   142  			slowestTime = duration
   143  			slowestRead = simReads[i]
   144  		} else if duration < fastestTime {
   145  			fastestTime = duration
   146  			fastestRead = simReads[i]
   147  		}
   148  	}
   149  	log.Printf("Fastest read was (%.4f):\n%s\nSlowest reads was (%.4f):\n%s\n", fastestTime, dna.BasesToString(fastestRead.Seq), slowestTime, dna.BasesToString(slowestRead.Seq))
   150  
   151  	//CheckAnswers(alignments, genome)
   152  }*/
   153  
   154  // This looks like a good test, but it is failing when trying to
   155  // create the graph from the smallFasta and vcfTest
   156  /*func TestVcfGraph(t *testing.T) {
   157  	//smallFasta := fasta.Fasta{Name: "chr1", Seq: dna.StringToBases("ATCGA")}
   158  	smallFasta := fasta.Fasta{Name: "chr1", Seq: dna.StringToBases("ATCGA")}
   159  	//smallFasta := fasta.Fasta{Name: "chr1", Seq: dna.StringToBases("ATCGA")}
   160  	fmt.Printf("Reference sequence is: %s\n", dna.BasesToString(smallFasta.Seq))
   161  	var vcfTest []*vcf.Vcf
   162  	//vcfTest = append(vcfTest, &vcf.Vcf{Chr: "chr1", Pos: 3, Id: ".", Ref: "T", Alt: "TA", Qual: 0, Filter: "PASS", Info: "", Format: "SVTYPE=INS"})
   163  	vcfTest = append(vcfTest, &vcf.Vcf{Chr: "chr1", Pos: 3, Id: ".", Ref: "C", Alt: "T", Qual: 0, Filter: "PASS", Info: "", Format: "SVTYPE=SNP"})
   164  	sg := vChrGraph(NewGraph(), &smallFasta, vcfTest)
   165  
   166  	for n := 0; n < len(sg.Nodes); n++ {
   167  		fmt.Printf("Printing nodes: %d, seq=%s, numOfEdges=%d ", sg.Nodes[n].Id, dna.BasesToString(sg.Nodes[n].Seq), len(sg.Nodes[n].Next))
   168  		for e := 0; e < len(sg.Nodes[n].Next); e++ {
   169  			fmt.Printf("-> %v, weight=%v", dna.BasesToString(sg.Nodes[n].Next[e].Dest.Seq), sg.Nodes[n].Next[e].Prob)
   170  		}
   171  		fmt.Println("")
   172  	}
   173  	PrintGraph(sg)
   174  	Write("testdata/vcfGraphTest.gg", sg)
   175  	newSg := Read("testdata/vcfGraphTest.gg")
   176  
   177  	PrintGraph(newSg)
   178  	vcf.Write("anotherTesting.vcf", vcfTest)
   179  }*/
   180  
   181  /*
   182  func BenchmarkGoRoutinesMap(b *testing.B) {
   183  	var tileSize int = 32
   184  	var stepSize int = tileSize - 1
   185  	var readLength int = 150
   186  	var numberOfReads int = 5
   187  	var mutations int = 0
   188  
   189  	genome, _ := Read("testdata/bigGenome.sg")
   190  	tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize)
   191  	simReads := RandomReads(genome, readLength, numberOfReads, mutations)
   192  	b.ResetTimer()
   193  
   194  	for n := 0; n < b.N; n++ {
   195  		for i := 0; i < len(simReads); i++ {
   196  			wrapNoChanMap(genome, simReads[i], tiles, tileSize, stepSize)
   197  		}
   198  	}
   199  }*/
   200  
   201  //TODO: slices not working right now
   202  /*func BenchmarkGoRoutinesSlice(b *testing.B) {
   203  	var tileSize int = 12
   204  	var stepSize int = tileSize - 1
   205  	var readLength int = 150
   206  	var numberOfReads int = 50
   207  	var mutations int = 0
   208  
   209  	genome, _ := Read("testdata/bigGenome.sg")
   210  	tiles := IndexGenomeIntoSlice(genome.Nodes, tileSize, stepSize)
   211  	simReads := RandomReads(genome.Nodes, readLength, numberOfReads, mutations)
   212  	b.ResetTimer()
   213  
   214  	for n := 0; n < b.N; n++ {
   215  		for i := 0; i < len(simReads); i++ {
   216  			wrapNoChan(genome, simReads[i], tiles, tileSize)
   217  		}
   218  	}
   219  }*/
   220  
   221  /*
   222  func TestFaFormat(t *testing.T) {
   223  
   224  	var tileSize int = 32
   225  	var numberOfReads int = 100
   226  	var readLength int = 150
   227  	var mutations int = 0
   228  	var numWorkers int = 8
   229  	log.Printf("Reading in the genome (simple graph)...\n")
   230  	genome, chrSize := Read("testdata/gasAcu1.fa")
   231  	simReads := RandomPairedReads(genome.Nodes, readLength, numberOfReads, mutations)
   232  	fastq.WritePair("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", simReads)
   233  	header := sam.ChromInfoMapSamHeader(chrSize)
   234  	GswAlignFaFormat(genome, "testdata/simReads_R1.fq", "testdata/simReads_R2.fq", "testdata/format.sam", numWorkers, tileSize, header)
   235  }*/
   236  
   237  /*
   238  func TestNewSeed(t *testing.T) {
   239  	var tileSize int = 32
   240  	var stepSize int = tileSize - 1
   241  	var numberOfReads int = 30000
   242  	var readLength int = 150
   243  	var mutations int = 0
   244  	var workerWaiter, writerWaiter sync.WaitGroup
   245  	var numWorkers int = 4
   246  
   247  	log.Printf("Reading in the genome (simple graph)...\n")
   248  	genome := Read("testdata/gasAcu1.fa")
   249  
   250  	log.Printf("Indexing the genome...\n")
   251  	tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize)
   252  
   253  	log.Printf("Making fastq channel...\n")
   254  	fastqPipe := make(chan *fastq.Fastq, 824)
   255  
   256  	log.Printf("Making sam channel...\n")
   257  	samPipe := make(chan *sam.SamAln, 824)
   258  
   259  	log.Printf("Simulating reads...\n")
   260  	simReads := RandomReads(genome.Nodes, readLength, numberOfReads, mutations)
   261  	fastq.Write("testdata/simReads.fq", simReads)
   262  	start := time.Now()
   263  	go fastq.ReadToChan("testdata/simReads.fq", fastqPipe)
   264  
   265  	log.Printf("Starting alignment worker...\n")
   266  	workerWaiter.Add(numWorkers)
   267  	for i := 0; i < numWorkers; i++ {
   268  		go SeedTestGswWorker(genome, tiles, tileSize, stepSize, fastqPipe, samPipe, &workerWaiter)
   269  	}
   270  
   271  	writerWaiter.Add(1)
   272  	go sam.SamChanToFile(samPipe, "/dev/stdout", &writerWaiter)
   273  
   274  	workerWaiter.Wait()
   275  	close(samPipe)
   276  	log.Printf("Aligners finished and channel closed\n")
   277  	writerWaiter.Wait()
   278  	log.Printf("Sam writer finished and we are all done\n")
   279  	stop := time.Now()
   280  	duration := stop.Sub(start)
   281  	log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", len(simReads), duration, float64(len(simReads))/duration.Seconds())
   282  }*/
   283  
   284  /*
   285  func BenchmarkAligningNoGoroutines(b *testing.B) {
   286  	//var mappedReads []*sam.SamAln = make([]*sam.SamAln, numberOfReads)
   287  	numberOfReads = 10
   288  	genome := Read("testdata/bigGenome.sg")
   289  	//tiles := indexGenome(genome.Nodes, tileSize)
   290  	tiles := IndexGenomeDev(genome.Nodes, tileSize, stepSize)
   291  	simReads := RandomReads(genome.Nodes, readLength, numberOfReads, mutations)
   292  	fastq.Write("testdata/fakeReads.fastq", simReads)
   293  	//var seeds []Seed = make([]Seed, 256)
   294  	m, trace := swMatrixSetup(10000)
   295  	//var seeds []Seed = make([]Seed, 256)
   296  	b.ResetTimer()
   297  
   298  	//c := make(chan *sam.SamAln)
   299  	var mappedRead *sam.SamAln
   300  	for n := 0; n < b.N; n++ {
   301  		for i := 0; i < len(simReads);i++ {
   302  			mappedRead = GraphSmithWaterman(genome, simReads[i], tiles, tileSize, m, trace)
   303  			log.Printf("%s\n", sam.SamAlnToString(mappedRead))
   304  		}
   305  
   306  		//devGSWsBatch(genome, "testdata/fakeReads.fastq", tiles, tileSize, m, trace, 400)
   307  		//noLimitGSW(genome, "testdata/fakeReads.fastq", tiles, tileSize, m, trace)
   308  	}
   309  }*/
   310  
   311  /*
   312  func TestRead(t *testing.T) {
   313  	for _, test := range readWriteTests {
   314  		actual := Read(test.filename)
   315  
   316  		if !AllAreEqual(test.data.Nodes, actual.Nodes) {
   317  			t.Errorf("The %s file was not read correctly.", test.filename)
   318  		}
   319  	}
   320  }
   321  
   322  func TestWriteAndRead(t *testing.T) {
   323  	//var actual []*Node
   324  	for _, test := range readWriteTests {
   325  		tempFile := test.filename + ".tmp"
   326  		Write(tempFile, test.data)
   327  		actual := Read(tempFile)
   328  		if !AllAreEqual(test.data, actual.Nodes) {
   329  			t.Errorf("The %s file was not read correctly.", test.filename)
   330  		}
   331  		err := os.Remove(tempFile)
   332  		if err != nil {
   333  			t.Errorf("Deleting temp file %s gave an error.", tempFile)
   334  		}
   335  	}
   336  }*/