github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/toGiraf.go (about) 1 package genomeGraph 2 3 import ( 4 "fmt" 5 "github.com/vertgenlab/gonomics/cigar" 6 "github.com/vertgenlab/gonomics/fastq" 7 "github.com/vertgenlab/gonomics/fileio" 8 "github.com/vertgenlab/gonomics/giraf" 9 "github.com/vertgenlab/gonomics/numbers/parse" 10 "github.com/vertgenlab/gonomics/sam" 11 "math" 12 "strings" 13 "sync" 14 ) 15 16 func GraphSmithWatermanToGiraf(gg *GenomeGraph, read fastq.FastqBig, seedHash map[uint64][]uint64, seedLen int, stepSize int, matrix *MatrixAln, scoreMatrix [][]int64, seedPool *sync.Pool, dnaPool *sync.Pool, sk scoreKeeper, dynamicScore dynamicScoreKeeper, seedBuildHelper *seedHelper) *giraf.Giraf { 17 var currBest giraf.Giraf = giraf.Giraf{ 18 QName: read.Name, 19 QStart: 0, 20 QEnd: 0, 21 PosStrand: true, 22 Path: giraf.Path{}, 23 Cigar: nil, 24 AlnScore: 0, 25 MapQ: 255, 26 Seq: read.Seq, 27 Qual: read.Qual, 28 Notes: []giraf.Note{{Tag: []byte{'X', 'O'}, Type: 'Z', Value: "~"}}, 29 } 30 resetScoreKeeper(sk) 31 sk.perfectScore = perfectMatchBig(read, scoreMatrix) 32 sk.extension = int(sk.perfectScore/600) + len(read.Seq) 33 seeds := seedPool.Get().(*memoryPool) 34 seeds.Hits = seeds.Hits[:0] 35 seeds.Worker = seeds.Worker[:0] 36 seeds.Hits = seedMapMemPool(seedHash, gg.Nodes, &read, seedLen, sk.perfectScore, scoreMatrix, seeds.Hits, seeds.Worker, seedBuildHelper) 37 for i := 0; i < len(seeds.Hits) && seedCouldBeBetter(int64(seeds.Hits[i].TotalLength), int64(currBest.AlnScore), sk.perfectScore, int64(len(read.Seq)), 100, 90, -196, -296); i++ { 38 sk.currSeed = seeds.Hits[i] 39 sk.tailSeed = *getLastPart(&sk.currSeed) 40 if sk.currSeed.PosStrand { 41 sk.currSeq = read.Seq 42 } else { 43 sk.currSeq = read.SeqRc 44 } 45 sk.seedScore = scoreSeedSeq(sk.currSeq, sk.currSeed.QueryStart, sk.tailSeed.QueryStart+sk.tailSeed.Length, scoreMatrix) 46 if int(sk.currSeed.TotalLength) == len(sk.currSeq) { 47 sk.targetStart = int(sk.currSeed.TargetStart) 48 sk.targetEnd = int(sk.tailSeed.TargetStart + sk.tailSeed.Length) 49 sk.queryStart = int(sk.currSeed.QueryStart) 50 sk.currScore = sk.seedScore 51 } else { 52 sk.leftAlignment, sk.leftScore, sk.targetStart, sk.queryStart, sk.leftPath = LeftAlignTraversal(&gg.Nodes[sk.currSeed.TargetId], sk.leftSeq, int(sk.currSeed.TargetStart), sk.leftPath, sk.extension-int(sk.currSeed.TotalLength), sk.currSeq[:sk.currSeed.QueryStart], scoreMatrix, matrix, sk, dynamicScore, dnaPool) 53 sk.rightAlignment, sk.rightScore, sk.targetEnd, sk.queryEnd, sk.rightPath = RightAlignTraversal(&gg.Nodes[sk.tailSeed.TargetId], sk.rightSeq, int(sk.tailSeed.TargetStart+sk.tailSeed.Length), sk.rightPath, sk.extension-int(sk.currSeed.TotalLength), sk.currSeq[sk.tailSeed.QueryStart+sk.tailSeed.Length:], scoreMatrix, matrix, sk, dynamicScore, dnaPool) 54 sk.currScore = sk.leftScore + sk.seedScore + sk.rightScore 55 } 56 if sk.currScore > int64(currBest.AlnScore) { 57 currBest.QStart = sk.queryStart 58 currBest.QEnd = int(sk.currSeed.QueryStart) + sk.queryStart + sk.queryEnd + int(sk.currSeed.TotalLength) - 1 59 currBest.PosStrand = sk.currSeed.PosStrand 60 currBest.Path = setPath(currBest.Path, sk.targetStart, CatPaths(CatPaths(sk.leftPath, getSeedPath(&sk.currSeed)), sk.rightPath), sk.targetEnd) 61 currBest.Cigar = SoftClipBases(sk.queryStart, len(sk.currSeq), cigar.CatByteCigar(cigar.AddCigarByte(sk.leftAlignment, cigar.ByteCigar{RunLen: uint16(sk.currSeed.TotalLength), Op: 'M'}), sk.rightAlignment)) 62 currBest.AlnScore = int(sk.currScore) 63 currBest.Seq = sk.currSeq 64 } 65 } 66 seedPool.Put(seeds) 67 if !currBest.PosStrand { 68 fastq.ReverseQualUint8Record(currBest.Qual) 69 } 70 return &currBest 71 } 72 73 func readFastqGsw(fileOne string, fileTwo string, answer chan<- fastq.PairedEndBig) { 74 readOne, readTwo := fileio.NewByteReader(fileOne), fileio.NewByteReader(fileTwo) 75 for fq, done := fastq.ReadFqBigPair(readOne, readTwo); !done; fq, done = fastq.ReadFqBigPair(readOne, readTwo) { 76 answer <- fq 77 } 78 close(answer) 79 } 80 81 type ScoreMatrixHelper struct { 82 Matrix [][]int64 83 MaxMatch int64 84 MinMatch int64 85 LeastSevereMismatch int64 86 LeastSevereMatchMismatchChange int64 87 } 88 89 func getScoreMatrixHelp(scoreMatrix [][]int64) *ScoreMatrixHelper { 90 help := ScoreMatrixHelper{Matrix: scoreMatrix} 91 help.MaxMatch, help.MinMatch, help.LeastSevereMismatch, help.LeastSevereMatchMismatchChange = MismatchStats(scoreMatrix) 92 return &help 93 } 94 95 func MismatchStats(scoreMatrix [][]int64) (int64, int64, int64, int64) { 96 var maxMatch int64 = 0 97 var minMatch int64 98 var leastSevereMismatch int64 = scoreMatrix[0][1] 99 var i, j int 100 for i = 0; i < len(scoreMatrix); i++ { 101 for j = 0; j < len(scoreMatrix[i]); j++ { 102 if scoreMatrix[i][j] > maxMatch { 103 minMatch = maxMatch 104 maxMatch = scoreMatrix[i][j] 105 } else { 106 if scoreMatrix[i][j] < 0 && leastSevereMismatch < scoreMatrix[i][j] { 107 leastSevereMismatch = scoreMatrix[i][j] 108 } 109 } 110 } 111 } 112 var leastSevereMatchMismatchChange int64 = leastSevereMismatch - maxMatch 113 return maxMatch, minMatch, leastSevereMismatch, leastSevereMatchMismatchChange 114 } 115 116 func WrapPairGiraf(gg *GenomeGraph, fq fastq.PairedEndBig, seedHash map[uint64][]uint64, seedLen int, stepSize int, matrix *MatrixAln, scoreMatrix [][]int64, seedPool *sync.Pool, dnaPool *sync.Pool, sk scoreKeeper, dynamicScore dynamicScoreKeeper, seedBuildHelper *seedHelper) giraf.GirafPair { 117 var mappedPair giraf.GirafPair = giraf.GirafPair{ 118 Fwd: *GraphSmithWatermanToGiraf(gg, fq.Fwd, seedHash, seedLen, stepSize, matrix, scoreMatrix, seedPool, dnaPool, sk, dynamicScore, seedBuildHelper), 119 Rev: *GraphSmithWatermanToGiraf(gg, fq.Rev, seedHash, seedLen, stepSize, matrix, scoreMatrix, seedPool, dnaPool, sk, dynamicScore, seedBuildHelper), 120 } 121 setGirafFlags(&mappedPair) 122 return mappedPair 123 } 124 125 // setGirafFlags generates the appropriate flags for each giraf in a pair. 126 func setGirafFlags(pair *giraf.GirafPair) { 127 pair.Fwd.Flag = getGirafFlags(&pair.Fwd) 128 pair.Rev.Flag = getGirafFlags(&pair.Rev) 129 pair.Fwd.Flag += 8 // Forward 130 pair.Fwd.Flag += 16 // Paired Reads 131 pair.Fwd.Flag += 16 // Paired Reads 132 if isProperPairAlign(pair) { 133 pair.Fwd.Flag += 1 // Properly Aligned 134 pair.Rev.Flag += 1 // Properly Aligned 135 } 136 } 137 138 func GirafToSam(ag *giraf.Giraf) sam.Sam { 139 curr := sam.Sam{QName: ag.QName, Flag: 4, RName: "*", Pos: 0, MapQ: 255, Cigar: []cigar.Cigar{{Op: '*'}}, RNext: "*", PNext: 0, TLen: 0, Seq: ag.Seq, Qual: fastq.QualString(ag.Qual), Extra: "BZ:i:0\tGP:Z:-1\tXO:Z:~"} 140 //read is unMapped 141 if strings.Compare(ag.Notes[0].Value, "~") == 0 { 142 return curr 143 } else { 144 target := strings.Split(ag.Notes[0].Value, "=") 145 curr.RName = target[0] 146 curr.Pos = uint32(ag.Path.TStart + parse.StringToInt(target[1])) 147 curr.Flag = getSamFlags(ag) 148 if len(ag.Notes) == 2 { 149 curr.Extra = fmt.Sprintf("BZ:i:%d\tGP:Z:%s\tXO:i:%d\t%s", ag.AlnScore, PathToString(ag.Path.Nodes), ag.Path.TStart, giraf.NoteToString(ag.Notes[1])) 150 } else { 151 curr.Extra = fmt.Sprintf("BZ:i:%d\tGP:Z:%s\tXO:i:%d", ag.AlnScore, PathToString(ag.Path.Nodes), ag.Path.TStart) 152 } 153 } 154 return curr 155 } 156 157 func GirafPairToSam(ag giraf.GirafPair) sam.MatePair { 158 var mappedPair sam.MatePair = sam.MatePair{Fwd: sam.Sam{}, Rev: sam.Sam{}} 159 mappedPair.Fwd = GirafToSam(&ag.Fwd) 160 mappedPair.Rev = GirafToSam(&ag.Rev) 161 mappedPair.Fwd.Flag += 64 + 2 162 mappedPair.Rev.Flag += 128 + 2 163 if isProperPairAlign(&ag) { 164 mappedPair.Fwd.Flag += 1 165 mappedPair.Rev.Flag += 1 166 } 167 return mappedPair 168 } 169 170 func isProperPairAlign(mappedPair *giraf.GirafPair) bool { 171 if math.Abs(float64(mappedPair.Fwd.Path.TStart-mappedPair.Rev.Path.TStart)) < 10000 { 172 if mappedPair.Fwd.Path.TStart < mappedPair.Rev.Path.TStart && mappedPair.Fwd.PosStrand && !mappedPair.Rev.PosStrand { 173 return true 174 } 175 if mappedPair.Fwd.Path.TStart > mappedPair.Rev.Path.TStart && !mappedPair.Fwd.PosStrand && mappedPair.Rev.PosStrand { 176 return true 177 } 178 } 179 return false 180 } 181 182 func getGirafFlags(ag *giraf.Giraf) uint8 { 183 var answer uint8 184 if ag.PosStrand { 185 answer += 4 // Positive Strand 186 } 187 if ag.AlnScore < 1200 { 188 answer += 2 // Unmapped 189 } 190 return answer 191 } 192 193 func getSamFlags(ag *giraf.Giraf) uint16 { 194 var answer uint16 195 if !ag.PosStrand { 196 answer += 16 197 } 198 if ag.AlnScore < 1200 { 199 answer += 4 200 } 201 return answer 202 } 203 204 func setPath(p giraf.Path, targetStart int, nodes []uint32, targetEnd int) giraf.Path { 205 p.TStart = targetStart 206 p.Nodes = nodes 207 p.TEnd = targetEnd 208 return p 209 } 210 211 /*func vInfoToValue(n *Node) string { 212 var answer string 213 switch { 214 case n.Info.Variant == 1: 215 answer = fmt.Sprintf("%d=%s", n.Id, "snp") 216 case n.Info.Variant == 2: 217 answer = fmt.Sprintf("%d=%s", n.Id, "ins") 218 case n.Info.Variant == 3: 219 answer = fmt.Sprintf("%d=%s", n.Id, "del") 220 } 221 return answer 222 }*/ 223 224 /*func infoToNotes(nodes []*Node, path []uint32) giraf.Note { 225 var vInfo giraf.Note = giraf.Note{Tag: []byte{'X', 'V'}, Type: 'Z'} 226 vInfo.Value = fmt.Sprintf("%d_%d", nodes[0].Info.Allele, nodes[0].Info.Variant) 227 if len(path) > 0 { 228 for i := 1; i < len(path); i++ { 229 if nodes[i].Info.Variant > 0 { 230 vInfo.Value += fmt.Sprintf(",%s", vInfoToValue(nodes[path[i]])) 231 } else { 232 vInfo.Value += fmt.Sprintf(",%d_%d", nodes[i].Info.Allele, nodes[path[i]].Info.Variant) 233 } 234 235 } 236 } 237 return vInfo 238 }*/