github.com/vertgenlab/gonomics@v1.0.0/cmd/gsw/pairedEndFastqs.go (about)

     1  package main
     2  
     3  import (
     4  	"log"
     5  	"path/filepath"
     6  	"strings"
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/vertgenlab/gonomics/fastq"
    11  	"github.com/vertgenlab/gonomics/fileio"
    12  	"github.com/vertgenlab/gonomics/genomeGraph"
    13  	"github.com/vertgenlab/gonomics/giraf"
    14  	"github.com/vertgenlab/gonomics/sam"
    15  )
    16  
    17  func GswToGirafPair(ref *genomeGraph.GenomeGraph, readOne string, readTwo string, output string, threads int, seedLen int, stepSize int, scoreMatrix [][]int64) {
    18  	log.Printf("Paired end reads detected...\n")
    19  
    20  	log.Printf("Indexing the genome...\n\n")
    21  	seedHash := genomeGraph.IndexGenomeIntoMap(ref.Nodes, seedLen, stepSize)
    22  	var wgAlign, wgWrite sync.WaitGroup
    23  
    24  	fastqPipe := make(chan fastq.PairedEndBig, 824)
    25  	girafPipe := make(chan giraf.GirafPair, 824)
    26  	go readFastqGsw(readOne, readTwo, fastqPipe)
    27  	log.Printf("Scoring matrix used:\n%s\n", genomeGraph.ViewMatrix(scoreMatrix))
    28  	log.Printf("Aligning with the following settings:\n\t\tthreads=%d, seedLen=%d, stepSize=%d\n\n", threads, seedLen, stepSize)
    29  	wgAlign.Add(threads)
    30  
    31  	log.Printf("Aligning %s and %s to genome graph...", strings.Split(filepath.Base(readOne), ".")[0], strings.Split(filepath.Base(readTwo), ".")[0])
    32  	start := time.Now()
    33  	for i := 0; i < threads; i++ {
    34  		go genomeGraph.RoutineFqPairToGiraf(ref, seedHash, seedLen, stepSize, scoreMatrix, fastqPipe, girafPipe, &wgAlign)
    35  	}
    36  	wgWrite.Add(1)
    37  	go genomeGraph.SimpleWriteGirafPair(output, girafPipe, &wgWrite)
    38  	wgAlign.Wait()
    39  	stop := time.Now()
    40  	close(girafPipe)
    41  	wgWrite.Wait()
    42  	log.Printf("GSW aligner finished in %.1f seconds\n", stop.Sub(start).Seconds())
    43  	log.Printf("Enjoy analyzing your data!\n\n--xoxo GG\n")
    44  }
    45  
    46  func GswToSamPair(ref *genomeGraph.GenomeGraph, readOne string, readTwo string, output string, threads int, seedLen int, stepSize int, scoreMatrix [][]int64, header sam.Header) {
    47  	log.SetFlags(log.Ldate | log.Ltime)
    48  	log.Printf("Paired end reads detected...\n")
    49  
    50  	log.Printf("Indexing the genome...\n\n")
    51  	seedHash := genomeGraph.IndexGenomeIntoMap(ref.Nodes, seedLen, stepSize)
    52  	var wgAlign, wgWrite sync.WaitGroup
    53  	//log.Printf("Setting up read and write channels...\n\n")
    54  	fastqPipe := make(chan fastq.PairedEndBig, 824)
    55  	samPipe := make(chan sam.Sam, 824)
    56  	go readFastqGsw(readOne, readTwo, fastqPipe)
    57  
    58  	log.Printf("Scoring matrix used:\n%s\n", genomeGraph.ViewMatrix(scoreMatrix))
    59  	log.Printf("Aligning with the following settings:\n\t\tthreads=%d, seedLen=%d, stepSize=%d\n\n", threads, seedLen, stepSize)
    60  	wgAlign.Add(threads)
    61  	log.Printf("Aligning sequence to genome graph...")
    62  	start := time.Now()
    63  	for i := 0; i < threads; i++ {
    64  		go genomeGraph.RoutineGirafToSam(ref, seedHash, seedLen, stepSize, scoreMatrix, fastqPipe, samPipe, &wgAlign)
    65  	}
    66  	wgWrite.Add(1)
    67  	go sam.WriteFromChan(samPipe, output, header, &wgWrite)
    68  	wgAlign.Wait()
    69  	stop := time.Now()
    70  	close(samPipe)
    71  	wgWrite.Wait()
    72  	log.Printf("GSW aligner finished in %.1f seconds\n", stop.Sub(start).Seconds())
    73  	log.Printf("Enjoy analyzing your data!\n\n--xoxo GG\n")
    74  }
    75  
    76  func readFastqGsw(fileOne string, fileTwo string, answer chan<- fastq.PairedEndBig) {
    77  	readOne, readTwo := fileio.NewByteReader(fileOne), fileio.NewByteReader(fileTwo)
    78  	for fq, done := fastq.ReadFqBigPair(readOne, readTwo); !done; fq, done = fastq.ReadFqBigPair(readOne, readTwo) {
    79  		answer <- fq
    80  	}
    81  	close(answer)
    82  }