github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/align_test.go (about)

     1  package genomeGraph
     2  
     3  import (
     4  	"flag"
     5  	"github.com/vertgenlab/gonomics/fastq"
     6  	"github.com/vertgenlab/gonomics/giraf"
     7  	"github.com/vertgenlab/gonomics/numbers/parse"
     8  	"log"
     9  	"os"
    10  	"runtime"
    11  	"runtime/pprof"
    12  	"strings"
    13  	"sync"
    14  	"testing"
    15  	"time"
    16  )
    17  
    18  var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
    19  var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
    20  
    21  func BenchmarkGsw(b *testing.B) {
    22  	if *cpuprofile != "" {
    23  		f, err := os.Create(*cpuprofile)
    24  		if err != nil {
    25  			log.Fatal("could not create CPU profile: ", err)
    26  		}
    27  		defer f.Close() // error handling omitted for example
    28  		if err := pprof.StartCPUProfile(f); err != nil {
    29  			log.Fatal("could not start CPU profile: ", err)
    30  		}
    31  		defer pprof.StopCPUProfile()
    32  	}
    33  	b.ReportAllocs()
    34  	//var output string = "/dev/stdout"
    35  	var output string = "testdata/rabs_test.giraf"
    36  	var tileSize int = 32
    37  	var stepSize int = 32
    38  	//var numberOfReads int = 50000
    39  	//var readLength int = 150
    40  	//var mutations int = 1
    41  	var workerWaiter, writerWaiter sync.WaitGroup
    42  	var numWorkers int = 6
    43  	var scoreMatrix = HumanChimpTwoScoreMatrix
    44  	b.ResetTimer()
    45  	genome := Read("testdata/rabsTHREEspine.fa")
    46  	log.Printf("Reading in the genome (simple graph)...\n")
    47  	log.Printf("Indexing the genome...\n")
    48  
    49  	fastqPipe := make(chan fastq.PairedEndBig, 2408)
    50  	girafPipe := make(chan giraf.GirafPair, 2408)
    51  
    52  	//log.Printf("Simulating reads...\n")
    53  	//simReads := RandomPairedReads(genome, readLength, numberOfReads, mutations)
    54  	//os.Remove("testdata/simReads_R1.fq")
    55  	//os.Remove("testdata/simReads_R2.fq")
    56  
    57  	//fastq.WritePair("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", simReads)
    58  
    59  	tiles := IndexGenomeIntoMap(genome.Nodes, tileSize, stepSize)
    60  
    61  	go readFastqGsw("testdata/simReads_R1.fq", "testdata/simReads_R2.fq", fastqPipe)
    62  	log.Printf("Finished Indexing Genome...\n")
    63  
    64  	log.Printf("Starting alignment worker...\n")
    65  	workerWaiter.Add(numWorkers)
    66  	start := time.Now()
    67  	for i := 0; i < numWorkers; i++ {
    68  		go RoutineFqPairToGiraf(genome, tiles, tileSize, stepSize, scoreMatrix, fastqPipe, girafPipe, &workerWaiter)
    69  	}
    70  	go SimpleWriteGirafPair(output, girafPipe, &writerWaiter)
    71  	//go isGirafPairCorrect(girafPipe, genome, &writerWaiter, 2*len(simReads))
    72  	writerWaiter.Add(1)
    73  	workerWaiter.Wait()
    74  	close(girafPipe)
    75  
    76  	writerWaiter.Wait()
    77  
    78  	stop := time.Now()
    79  	duration := stop.Sub(start)
    80  	//log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", len(simReads)*2, duration, float64(len(simReads)*2)/duration.Seconds())
    81  	log.Printf("Aligned %d reads in %s (%.1f reads per second).\n", 50000*2, duration, float64(50000*2)/duration.Seconds())
    82  	if *memprofile != "" {
    83  		f, err := os.Create(*memprofile)
    84  		if err != nil {
    85  			log.Fatal("could not create memory profile: ", err)
    86  		}
    87  		defer f.Close() // error handling omitted for example
    88  		runtime.GC()    // get up-to-date statistics
    89  		if err := pprof.WriteHeapProfile(f); err != nil {
    90  			log.Fatal("could not write memory profile: ", err)
    91  		}
    92  	}
    93  }
    94  
    95  func checkAlignment(aln giraf.Giraf, genome *GenomeGraph) bool {
    96  	qName := strings.Split(aln.QName, "_")
    97  	//if len(qName) < 5 {
    98  	//	log.Fatalf("Error: input giraf file does not match simulation format...\n")
    99  	//}
   100  	if len(aln.Cigar) < 1 {
   101  		return false
   102  	}
   103  
   104  	targetStart := aln.Path.TStart
   105  	targetEnd := aln.Path.TEnd
   106  	//if len(aln.Aln) < 1 {
   107  	if aln.Cigar[0].Op == 'S' {
   108  		//log.Printf("%s\n", giraf.GirafToString(aln))
   109  		targetStart = targetStart - int(aln.Cigar[0].RunLen)
   110  	}
   111  	if aln.Cigar[len(aln.Cigar)-1].Op == 'S' {
   112  		targetEnd = targetEnd + int(aln.Cigar[len(aln.Cigar)-1].RunLen)
   113  
   114  		//}
   115  	}
   116  	if parse.StringToInt(qName[0]) == int(aln.Path.Nodes[0]) && parse.StringToInt(qName[1]) == targetStart && targetEnd == parse.StringToInt(qName[3]) {
   117  		//log.Printf("%s\n", giraf.GirafToString(aln))
   118  		//log.Printf("Results: %d != %d or %d != %d\n", headNode, aln.Path.Nodes[0], startPos, aln.Path.TStart)
   119  		//	log.Printf("%s\n", giraf.GirafToString(aln))
   120  		return true
   121  	} else {
   122  		//log.Printf("endPos=%d, right side cigar runLength: %d\n", endPos, aln.Aln[len(aln.Aln)-1].RunLen)
   123  		//log.Printf("%s\n", giraf.GirafToString(aln))
   124  		//log.Printf("Error: this read is not aligning correctly...\n")
   125  	}
   126  	return false
   127  }
   128  func percentOfFloat(part int, total int) float64 {
   129  	return (float64(part) * float64(100)) / float64(total)
   130  }
   131  
   132  func isGirafPairCorrect(input <-chan giraf.GirafPair, genome *GenomeGraph, wg *sync.WaitGroup, numReads int) {
   133  	var unmapped int = 0
   134  	for pair := range input {
   135  		if !checkAlignment(pair.Fwd, genome) {
   136  			unmapped++
   137  			//log.Printf("Error: failed alignment simulation...\n")
   138  			//buf := GirafStringBuilder(pair.Fwd,&bytes.Buffer{})
   139  			//log.Printf("%s\n", buf.String())
   140  			//log.Printf("%s\n", giraf.GirafToString(pair.Rev))
   141  		}
   142  
   143  		if !checkAlignment(pair.Rev, genome) {
   144  			//log.Printf("Error: failed alignment simulation...\n")
   145  			//buf := GirafStringBuilder(pair.Fwd,&bytes.Buffer{})
   146  			//log.Printf("%s\n", buf.String())
   147  			unmapped++
   148  		}
   149  	}
   150  
   151  	log.Printf("Mapped %d out of %d\n", numReads-unmapped, numReads)
   152  	log.Printf("%f of the reads are mapping correctly\n", percentOfFloat(numReads-unmapped, numReads))
   153  
   154  	wg.Done()
   155  }