github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/genomeGraph_test.go (about) 1 package genomeGraph 2 3 import ( 4 "log" 5 "sync" 6 "testing" 7 "time" 8 9 "github.com/vertgenlab/gonomics/fastq" 10 "github.com/vertgenlab/gonomics/fileio" 11 "github.com/vertgenlab/gonomics/giraf" 12 ) 13 14 func TestWorkerWithWriting(t *testing.T) { 15 var output string = "testdata/pairedTest.giraf" 16 var tileSize int = 32 17 var stepSize int = 32 18 var numberOfReads int = 100 19 var readLength int = 150 20 var mutations int = 0 21 var workerWaiter, writerWaiter sync.WaitGroup 22 var numWorkers int = 8 23 var scoreMatrix = HumanChimpTwoScoreMatrix 24 genome := Read("testdata/bigGenome.sg") 25 log.Printf("Reading in the genome (simple graph)...\n") 26 log.Printf("Indexing the genome...\n") 27 log.Printf("Making fastq channel...\n") 28 fastqPipe := make(chan fastq.PairedEndBig, 824) 29 30 log.Printf("Making sam channel...\n") 31 samPipe := make(chan giraf.GirafPair, 824) 32 33 log.Printf("Simulating reads...\n") 34 simReads := RandomPairedReads(genome, readLength, numberOfReads, mutations) 35 //os.Remove("testdata/simReads_R1.fq") 36 //os.Remove("testdata/simReads_R2.fq") 37 fastq.WritePair("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", simReads) 38 tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize) 39 go fastq.ReadPairBigToChan("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", fastqPipe) 40 log.Printf("Finished Indexing Genome...\n") 41 start := time.Now() 42 43 log.Printf("Starting alignment worker...\n") 44 workerWaiter.Add(numWorkers) 45 for i := 0; i < numWorkers; i++ { 46 go RoutineFqPairToGiraf(genome, tiles, tileSize, stepSize, scoreMatrix, fastqPipe, samPipe, &workerWaiter) 47 } 48 go giraf.GirafPairChanToFile(output, samPipe, &writerWaiter) 49 writerWaiter.Add(1) 50 workerWaiter.Wait() 51 close(samPipe) 52 log.Printf("Aligners finished and channel closed\n") 53 writerWaiter.Wait() 54 log.Printf("Sam writer finished and we are all done\n") 55 stop := time.Now() 56 duration := stop.Sub(start) 57 log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", len(simReads)*2, duration, float64(len(simReads)*2)/duration.Seconds()) 58 fileio.EasyRemove("testdata/simReads_R1.fq") 59 fileio.EasyRemove("testdata/simReads_R2.fq") 60 fileio.EasyRemove("testdata/pairedTest.giraf") 61 } 62 63 /* 64 func TestHippoAln(t *testing.T) { 65 var hippo *fastq.Fastq = &fastq.Fastq{Name: "hippoOne", Seq: dna.StringToBases("TGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGAGTGATTTGAAGGTACATGGAATACCACCACGGGAGCAAAGC"), Qual: fastq.ToQualUint8([]rune("JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ"))} 66 var tileSize int = 32 67 var stepSize int = tileSize - 1 68 var alignment *sam.SamAln = nil 69 var dummyWaiter sync.WaitGroup 70 var scoreMatrix = HumanChimpTwoScoreMatrix 71 log.Printf("Reading in the genome (simple graph)...\n") 72 genome := Read("testdata/bigGenome.sg") 73 74 log.Printf("Indexing the genome...\n") 75 tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize) 76 77 log.Printf("Making fastq channel...\n") 78 fastqPipe := make(chan *fastq.FastqBig, 1) 79 80 log.Printf("Making sam channel...\n") 81 samPipe := make(chan *giraf.Giraf, 1) 82 83 log.Printf("Starting alignment worker...\n") 84 go RoutineFqToGiraf(genome, tiles, scoreMatrix, tileSize, stepSize, fastqPipe, samPipe, &dummyWaiter) 85 86 log.Printf("Waiting for 5 seconds and then aligning read...\n") 87 time.Sleep(5 * time.Second) 88 89 start := time.Now() 90 fastqPipe <- hippo 91 alignment = <-samPipe 92 end := time.Now() 93 duration := end.Sub(start) 94 log.Printf("duration:%s\t%s\n", duration, dna.BasesToString(alignment.Seq)) 95 }*/ 96 97 /*func TestReadsWithTiming(t *testing.T) { 98 //var hippo *fastq.Fastq = &fastq.Fastq{Name: "hippoOne", Seq: dna.StringToBases("ACCTTTTTCTTGTTGTATTTAAAGACAAATGATTTGATTTTATATAGCCAAATGGTTTTCAACGCTAGCAGTGTTTGGTGGCAACTCAGTTTCACCCACGTCTGTTCCAACTAACATGCAATATGTTTCCTGTAATCTGCAGCACGCTTT"), Qual: []rune("JJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJJ")} 99 var tileSize int = 32 100 var stepSize int = 31 101 var numberOfReads int = 1 102 var readLength int = 150 103 var mutations int = 2 104 var dummyWaiter sync.WaitGroup 105 106 var fastestRead, slowestRead *fastq.Fastq = nil, nil 107 var fastestTime, slowestTime float64 = math.MaxFloat64, 0 108 109 log.Printf("Reading in the genome (simple graph)...\n") 110 fa, _ := Read("testdata/bigGenome.sg") 111 //TODO: this file does not exist in testdata 112 genome, _ := Read("testdata/rabsBepa.gg") 113 log.Printf("Simulating reads...\n") 114 simReads := RandomReads(fa.Nodes, readLength, numberOfReads, mutations) 115 116 log.Printf("Indexing the genome...\n") 117 tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize) 118 119 log.Printf("Making fastq channel...\n") 120 fastqPipe := make(chan *fastq.Fastq, numberOfReads) 121 122 log.Printf("Making sam channel...\n") 123 samPipe := make(chan *sam.SamAln, numberOfReads) 124 125 alignments := make([]*sam.SamAln, numberOfReads) 126 127 log.Printf("Starting alignment worker...\n") 128 129 go gswWorker(genome, tiles, tileSize, stepSize, fastqPipe, samPipe, &dummyWaiter) 130 131 log.Printf("Waiting for 5 seconds and then aligning reads...\n") 132 time.Sleep(5 * time.Second) 133 134 for i := 0; i < numberOfReads; i++ { 135 136 start := time.Now() 137 fastqPipe <- simReads[i] 138 alignments[i] = <-samPipe 139 stop := time.Now() 140 duration := stop.Sub(start).Seconds() 141 if duration > slowestTime { 142 slowestTime = duration 143 slowestRead = simReads[i] 144 } else if duration < fastestTime { 145 fastestTime = duration 146 fastestRead = simReads[i] 147 } 148 } 149 log.Printf("Fastest read was (%.4f):\n%s\nSlowest reads was (%.4f):\n%s\n", fastestTime, dna.BasesToString(fastestRead.Seq), slowestTime, dna.BasesToString(slowestRead.Seq)) 150 151 //CheckAnswers(alignments, genome) 152 }*/ 153 154 // This looks like a good test, but it is failing when trying to 155 // create the graph from the smallFasta and vcfTest 156 /*func TestVcfGraph(t *testing.T) { 157 //smallFasta := fasta.Fasta{Name: "chr1", Seq: dna.StringToBases("ATCGA")} 158 smallFasta := fasta.Fasta{Name: "chr1", Seq: dna.StringToBases("ATCGA")} 159 //smallFasta := fasta.Fasta{Name: "chr1", Seq: dna.StringToBases("ATCGA")} 160 fmt.Printf("Reference sequence is: %s\n", dna.BasesToString(smallFasta.Seq)) 161 var vcfTest []*vcf.Vcf 162 //vcfTest = append(vcfTest, &vcf.Vcf{Chr: "chr1", Pos: 3, Id: ".", Ref: "T", Alt: "TA", Qual: 0, Filter: "PASS", Info: "", Format: "SVTYPE=INS"}) 163 vcfTest = append(vcfTest, &vcf.Vcf{Chr: "chr1", Pos: 3, Id: ".", Ref: "C", Alt: "T", Qual: 0, Filter: "PASS", Info: "", Format: "SVTYPE=SNP"}) 164 sg := vChrGraph(NewGraph(), &smallFasta, vcfTest) 165 166 for n := 0; n < len(sg.Nodes); n++ { 167 fmt.Printf("Printing nodes: %d, seq=%s, numOfEdges=%d ", sg.Nodes[n].Id, dna.BasesToString(sg.Nodes[n].Seq), len(sg.Nodes[n].Next)) 168 for e := 0; e < len(sg.Nodes[n].Next); e++ { 169 fmt.Printf("-> %v, weight=%v", dna.BasesToString(sg.Nodes[n].Next[e].Dest.Seq), sg.Nodes[n].Next[e].Prob) 170 } 171 fmt.Println("") 172 } 173 PrintGraph(sg) 174 Write("testdata/vcfGraphTest.gg", sg) 175 newSg := Read("testdata/vcfGraphTest.gg") 176 177 PrintGraph(newSg) 178 vcf.Write("anotherTesting.vcf", vcfTest) 179 }*/ 180 181 /* 182 func BenchmarkGoRoutinesMap(b *testing.B) { 183 var tileSize int = 32 184 var stepSize int = tileSize - 1 185 var readLength int = 150 186 var numberOfReads int = 5 187 var mutations int = 0 188 189 genome, _ := Read("testdata/bigGenome.sg") 190 tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize) 191 simReads := RandomReads(genome, readLength, numberOfReads, mutations) 192 b.ResetTimer() 193 194 for n := 0; n < b.N; n++ { 195 for i := 0; i < len(simReads); i++ { 196 wrapNoChanMap(genome, simReads[i], tiles, tileSize, stepSize) 197 } 198 } 199 }*/ 200 201 //TODO: slices not working right now 202 /*func BenchmarkGoRoutinesSlice(b *testing.B) { 203 var tileSize int = 12 204 var stepSize int = tileSize - 1 205 var readLength int = 150 206 var numberOfReads int = 50 207 var mutations int = 0 208 209 genome, _ := Read("testdata/bigGenome.sg") 210 tiles := IndexGenomeIntoSlice(genome.Nodes, tileSize, stepSize) 211 simReads := RandomReads(genome.Nodes, readLength, numberOfReads, mutations) 212 b.ResetTimer() 213 214 for n := 0; n < b.N; n++ { 215 for i := 0; i < len(simReads); i++ { 216 wrapNoChan(genome, simReads[i], tiles, tileSize) 217 } 218 } 219 }*/ 220 221 /* 222 func TestFaFormat(t *testing.T) { 223 224 var tileSize int = 32 225 var numberOfReads int = 100 226 var readLength int = 150 227 var mutations int = 0 228 var numWorkers int = 8 229 log.Printf("Reading in the genome (simple graph)...\n") 230 genome, chrSize := Read("testdata/gasAcu1.fa") 231 simReads := RandomPairedReads(genome.Nodes, readLength, numberOfReads, mutations) 232 fastq.WritePair("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", simReads) 233 header := sam.ChromInfoMapSamHeader(chrSize) 234 GswAlignFaFormat(genome, "testdata/simReads_R1.fq", "testdata/simReads_R2.fq", "testdata/format.sam", numWorkers, tileSize, header) 235 }*/ 236 237 /* 238 func TestNewSeed(t *testing.T) { 239 var tileSize int = 32 240 var stepSize int = tileSize - 1 241 var numberOfReads int = 30000 242 var readLength int = 150 243 var mutations int = 0 244 var workerWaiter, writerWaiter sync.WaitGroup 245 var numWorkers int = 4 246 247 log.Printf("Reading in the genome (simple graph)...\n") 248 genome := Read("testdata/gasAcu1.fa") 249 250 log.Printf("Indexing the genome...\n") 251 tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize) 252 253 log.Printf("Making fastq channel...\n") 254 fastqPipe := make(chan *fastq.Fastq, 824) 255 256 log.Printf("Making sam channel...\n") 257 samPipe := make(chan *sam.SamAln, 824) 258 259 log.Printf("Simulating reads...\n") 260 simReads := RandomReads(genome.Nodes, readLength, numberOfReads, mutations) 261 fastq.Write("testdata/simReads.fq", simReads) 262 start := time.Now() 263 go fastq.ReadToChan("testdata/simReads.fq", fastqPipe) 264 265 log.Printf("Starting alignment worker...\n") 266 workerWaiter.Add(numWorkers) 267 for i := 0; i < numWorkers; i++ { 268 go SeedTestGswWorker(genome, tiles, tileSize, stepSize, fastqPipe, samPipe, &workerWaiter) 269 } 270 271 writerWaiter.Add(1) 272 go sam.SamChanToFile(samPipe, "/dev/stdout", &writerWaiter) 273 274 workerWaiter.Wait() 275 close(samPipe) 276 log.Printf("Aligners finished and channel closed\n") 277 writerWaiter.Wait() 278 log.Printf("Sam writer finished and we are all done\n") 279 stop := time.Now() 280 duration := stop.Sub(start) 281 log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", len(simReads), duration, float64(len(simReads))/duration.Seconds()) 282 }*/ 283 284 /* 285 func BenchmarkAligningNoGoroutines(b *testing.B) { 286 //var mappedReads []*sam.SamAln = make([]*sam.SamAln, numberOfReads) 287 numberOfReads = 10 288 genome := Read("testdata/bigGenome.sg") 289 //tiles := indexGenome(genome.Nodes, tileSize) 290 tiles := IndexGenomeDev(genome.Nodes, tileSize, stepSize) 291 simReads := RandomReads(genome.Nodes, readLength, numberOfReads, mutations) 292 fastq.Write("testdata/fakeReads.fastq", simReads) 293 //var seeds []Seed = make([]Seed, 256) 294 m, trace := swMatrixSetup(10000) 295 //var seeds []Seed = make([]Seed, 256) 296 b.ResetTimer() 297 298 //c := make(chan *sam.SamAln) 299 var mappedRead *sam.SamAln 300 for n := 0; n < b.N; n++ { 301 for i := 0; i < len(simReads);i++ { 302 mappedRead = GraphSmithWaterman(genome, simReads[i], tiles, tileSize, m, trace) 303 log.Printf("%s\n", sam.SamAlnToString(mappedRead)) 304 } 305 306 //devGSWsBatch(genome, "testdata/fakeReads.fastq", tiles, tileSize, m, trace, 400) 307 //noLimitGSW(genome, "testdata/fakeReads.fastq", tiles, tileSize, m, trace) 308 } 309 }*/ 310 311 /* 312 func TestRead(t *testing.T) { 313 for _, test := range readWriteTests { 314 actual := Read(test.filename) 315 316 if !AllAreEqual(test.data.Nodes, actual.Nodes) { 317 t.Errorf("The %s file was not read correctly.", test.filename) 318 } 319 } 320 } 321 322 func TestWriteAndRead(t *testing.T) { 323 //var actual []*Node 324 for _, test := range readWriteTests { 325 tempFile := test.filename + ".tmp" 326 Write(tempFile, test.data) 327 actual := Read(tempFile) 328 if !AllAreEqual(test.data, actual.Nodes) { 329 t.Errorf("The %s file was not read correctly.", test.filename) 330 } 331 err := os.Remove(tempFile) 332 if err != nil { 333 t.Errorf("Deleting temp file %s gave an error.", tempFile) 334 } 335 } 336 }*/