github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/align_test.go (about) 1 package genomeGraph 2 3 import ( 4 "flag" 5 "github.com/vertgenlab/gonomics/fastq" 6 "github.com/vertgenlab/gonomics/giraf" 7 "github.com/vertgenlab/gonomics/numbers/parse" 8 "log" 9 "os" 10 "runtime" 11 "runtime/pprof" 12 "strings" 13 "sync" 14 "testing" 15 "time" 16 ) 17 18 var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`") 19 var memprofile = flag.String("memprofile", "", "write memory profile to `file`") 20 21 func BenchmarkGsw(b *testing.B) { 22 if *cpuprofile != "" { 23 f, err := os.Create(*cpuprofile) 24 if err != nil { 25 log.Fatal("could not create CPU profile: ", err) 26 } 27 defer f.Close() // error handling omitted for example 28 if err := pprof.StartCPUProfile(f); err != nil { 29 log.Fatal("could not start CPU profile: ", err) 30 } 31 defer pprof.StopCPUProfile() 32 } 33 b.ReportAllocs() 34 //var output string = "/dev/stdout" 35 var output string = "testdata/rabs_test.giraf" 36 var tileSize int = 32 37 var stepSize int = 32 38 //var numberOfReads int = 50000 39 //var readLength int = 150 40 //var mutations int = 1 41 var workerWaiter, writerWaiter sync.WaitGroup 42 var numWorkers int = 6 43 var scoreMatrix = HumanChimpTwoScoreMatrix 44 b.ResetTimer() 45 genome := Read("testdata/rabsTHREEspine.fa") 46 log.Printf("Reading in the genome (simple graph)...\n") 47 log.Printf("Indexing the genome...\n") 48 49 fastqPipe := make(chan fastq.PairedEndBig, 2408) 50 girafPipe := make(chan giraf.GirafPair, 2408) 51 52 //log.Printf("Simulating reads...\n") 53 //simReads := RandomPairedReads(genome, readLength, numberOfReads, mutations) 54 //os.Remove("testdata/simReads_R1.fq") 55 //os.Remove("testdata/simReads_R2.fq") 56 57 //fastq.WritePair("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", simReads) 58 59 tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize) 60 61 go readFastqGsw("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", fastqPipe) 62 log.Printf("Finished Indexing Genome...\n") 63 64 log.Printf("Starting alignment worker...\n") 65 workerWaiter.Add(numWorkers) 66 start := time.Now() 67 for i := 0; i < numWorkers; i++ { 68 go RoutineFqPairToGiraf(genome, tiles, tileSize, stepSize, scoreMatrix, fastqPipe, girafPipe, &workerWaiter) 69 } 70 go SimpleWriteGirafPair(output, girafPipe, &writerWaiter) 71 //go isGirafPairCorrect(girafPipe, genome, &writerWaiter, 2*len(simReads)) 72 writerWaiter.Add(1) 73 workerWaiter.Wait() 74 close(girafPipe) 75 76 writerWaiter.Wait() 77 78 stop := time.Now() 79 duration := stop.Sub(start) 80 //log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", len(simReads)*2, duration, float64(len(simReads)*2)/duration.Seconds()) 81 log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", 50000*2, duration, float64(50000*2)/duration.Seconds()) 82 if *memprofile != "" { 83 f, err := os.Create(*memprofile) 84 if err != nil { 85 log.Fatal("could not create memory profile: ", err) 86 } 87 defer f.Close() // error handling omitted for example 88 runtime.GC() // get up-to-date statistics 89 if err := pprof.WriteHeapProfile(f); err != nil { 90 log.Fatal("could not write memory profile: ", err) 91 } 92 } 93 } 94 95 func checkAlignment(aln giraf.Giraf, genome *GenomeGraph) bool { 96 qName := strings.Split(aln.QName, "_") 97 //if len(qName) < 5 { 98 // log.Fatalf("Error: input giraf file does not match simulation format...\n") 99 //} 100 if len(aln.Cigar) < 1 { 101 return false 102 } 103 104 targetStart := aln.Path.TStart 105 targetEnd := aln.Path.TEnd 106 //if len(aln.Aln) < 1 { 107 if aln.Cigar[0].Op == 'S' { 108 //log.Printf("%s\n", giraf.GirafToString(aln)) 109 targetStart = targetStart - int(aln.Cigar[0].RunLen) 110 } 111 if aln.Cigar[len(aln.Cigar)-1].Op == 'S' { 112 targetEnd = targetEnd + int(aln.Cigar[len(aln.Cigar)-1].RunLen) 113 114 //} 115 } 116 if parse.StringToInt(qName[0]) == int(aln.Path.Nodes[0]) && parse.StringToInt(qName[1]) == targetStart && targetEnd == parse.StringToInt(qName[3]) { 117 //log.Printf("%s\n", giraf.GirafToString(aln)) 118 //log.Printf("Results: %d != %d or %d != %d\n", headNode, aln.Path.Nodes[0], startPos, aln.Path.TStart) 119 // log.Printf("%s\n", giraf.GirafToString(aln)) 120 return true 121 } else { 122 //log.Printf("endPos=%d, right side cigar runLength: %d\n", endPos, aln.Aln[len(aln.Aln)-1].RunLen) 123 //log.Printf("%s\n", giraf.GirafToString(aln)) 124 //log.Printf("Error: this read is not aligning correctly...\n") 125 } 126 return false 127 } 128 func percentOfFloat(part int, total int) float64 { 129 return (float64(part) * float64(100)) / float64(total) 130 } 131 132 func isGirafPairCorrect(input <-chan giraf.GirafPair, genome *GenomeGraph, wg *sync.WaitGroup, numReads int) { 133 var unmapped int = 0 134 for pair := range input { 135 if !checkAlignment(pair.Fwd, genome) { 136 unmapped++ 137 //log.Printf("Error: failed alignment simulation...\n") 138 //buf := GirafStringBuilder(pair.Fwd,&bytes.Buffer{}) 139 //log.Printf("%s\n", buf.String()) 140 //log.Printf("%s\n", giraf.GirafToString(pair.Rev)) 141 } 142 143 if !checkAlignment(pair.Rev, genome) { 144 //log.Printf("Error: failed alignment simulation...\n") 145 //buf := GirafStringBuilder(pair.Fwd,&bytes.Buffer{}) 146 //log.Printf("%s\n", buf.String()) 147 unmapped++ 148 } 149 } 150 151 log.Printf("Mapped %d out of %d\n", numReads-unmapped, numReads) 152 log.Printf("%f of the reads are mapping correctly\n", percentOfFloat(numReads-unmapped, numReads)) 153 154 wg.Done() 155 }