github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/search.go (about) 1 package genomeGraph 2 3 import ( 4 "bytes" 5 "github.com/vertgenlab/gonomics/cigar" 6 "github.com/vertgenlab/gonomics/dna" 7 "github.com/vertgenlab/gonomics/dna/dnaTwoBit" 8 "github.com/vertgenlab/gonomics/exception" 9 "github.com/vertgenlab/gonomics/fastq" 10 "github.com/vertgenlab/gonomics/fileio" 11 "github.com/vertgenlab/gonomics/giraf" 12 "github.com/vertgenlab/gonomics/numbers" 13 "io" 14 "log" 15 "math" 16 "sort" 17 "sync" 18 ) 19 20 const ( 21 defaultMatrixSize int = 2480 22 ) 23 24 type memoryPool struct { 25 Hits []SeedDev 26 Worker []SeedDev 27 } 28 29 type MatrixAln struct { 30 m [][]int64 31 trace [][]byte 32 } 33 34 type dnaPool struct { 35 Seq []dna.Base 36 Path []uint32 37 queryStart int 38 queryEnd int 39 targetStart int 40 targetEnd int 41 currScore int64 42 } 43 44 type dynamicScoreKeeper struct { 45 i int 46 j int 47 routeIdx int 48 currMax int64 49 route []cigar.ByteCigar 50 } 51 52 type scoreKeeper struct { 53 targetStart int 54 targetEnd int 55 queryStart int 56 queryEnd int 57 extension int 58 currScore int64 59 seedScore int64 60 perfectScore int64 61 leftScore int64 62 rightScore int64 63 leftPath []uint32 64 rightPath []uint32 65 leftSeq []dna.Base 66 rightSeq []dna.Base 67 currSeq []dna.Base 68 tailSeed SeedDev 69 70 currSeed SeedDev 71 leftAlignment []cigar.ByteCigar 72 rightAlignment []cigar.ByteCigar 73 } 74 75 func NewDnaPool() sync.Pool { 76 return sync.Pool{ 77 New: func() interface{} { 78 dnaSeq := dnaPool{ 79 Seq: make([]dna.Base, 0, 150), 80 Path: make([]uint32, 0, 10), 81 queryStart: 0, 82 targetStart: 0, 83 targetEnd: 0, 84 queryEnd: 0, 85 } 86 return &dnaSeq 87 }, 88 } 89 } 90 91 func NewMemSeedPool() sync.Pool { 92 return sync.Pool{ 93 New: func() interface{} { 94 pool := memoryPool{ 95 Hits: make([]SeedDev, 0, 10000), 96 Worker: make([]SeedDev, 0, 10000), 97 } 98 return &pool 99 }, 100 } 101 } 102 103 func resetDynamicScore(sk dynamicScoreKeeper) { 104 sk.route = sk.route[:0] 105 sk.currMax = 0 106 } 107 func NewSwMatrix(size int) MatrixAln { 108 sw := MatrixAln{} 109 sw.m, sw.trace = MatrixSetup(size) 110 return sw 111 } 112 113 func MatrixSetup(size int) ([][]int64, [][]byte) { 114 m := make([][]int64, size) 115 trace := make([][]byte, size) 116 for idx := range m { 117 m[idx] = make([]int64, size) 118 trace[idx] = make([]byte, size) 119 } 120 return m, trace 121 } 122 123 func resetScoreKeeper(sk scoreKeeper) { 124 sk.targetStart, sk.targetEnd = 0, 0 125 sk.queryStart, sk.queryEnd = 0, 0 126 sk.currScore, sk.seedScore = 0, 0 127 sk.perfectScore = 0 128 sk.leftAlignment, sk.rightAlignment = sk.leftAlignment[:0], sk.rightAlignment[:0] 129 sk.leftPath, sk.rightPath = sk.leftPath[:0], sk.rightPath[:0] 130 sk.leftSeq, sk.rightSeq, sk.currSeq = sk.leftSeq[:0], sk.rightSeq[:0], sk.currSeq[:0] 131 sk.leftScore, sk.rightScore = 0, 0 132 } 133 134 func getLeftTargetBases(n *Node, extension int, refEnd int, seq []dna.Base, ans []dna.Base) []dna.Base { 135 //var availableBases int = len(seq) + refEnd 136 //var targetLength int = common.Min(availableBases, extension) 137 //var basesToTake int = targetLength - len(seq) 138 return append(append(ans, n.Seq[refEnd-numbers.Min(len(seq)+refEnd, extension)-len(seq):refEnd]...), seq...) 139 } 140 141 func getRightBases(n *Node, extension int, start int, seq []dna.Base, ans []dna.Base) []dna.Base { 142 //var availableBases int = len(seq) + len(n.Seq) - start 143 //var targetLength int = common.Min(availableBases, extension) 144 //var basesToTake int = targetLength - len(seq) 145 return append(append(ans, seq...), n.Seq[start:start+numbers.Min(len(seq)+len(n.Seq)-start, extension)-len(seq)]...) 146 } 147 148 /* 149 func leftBasesFromTwoBit(n *Node, extension int, refEnd int, seq []dna.Base, ans []dna.Base) []dna.Base { 150 var availableBases int = len(seq) + refEnd 151 var targetLength int = common.Min(availableBases, extension) 152 var basesToTake int = targetLength - len(seq) 153 return append(append(ans, seq...), dnaTwoBit.GetFrag(n.SeqTwoBit, refEnd-basesToTake, refEnd)...) 154 }*/ 155 156 /* 157 func rightBasesFromTwoBit(n *Node, extension int, start int, seq []dna.Base, ans []dna.Base) []dna.Base { 158 var availableBases int = len(seq) + len(n.Seq) - start 159 var targetLength int = common.Min(availableBases, extension) 160 var basesToTake int = targetLength - len(seq) 161 162 return append(append(ans, seq...), dnaTwoBit.GetFrag(n.SeqTwoBit, start, start+basesToTake)...) 163 }*/ 164 165 func LeftAlignTraversal(n *Node, seq []dna.Base, refEnd int, currentPath []uint32, extension int, read []dna.Base, scores [][]int64, matrix *MatrixAln, sk scoreKeeper, dynamicScore dynamicScoreKeeper, pool *sync.Pool) ([]cigar.ByteCigar, int64, int, int, []uint32) { 166 //if len(seq) >= extension { 167 // log.Fatalf("Error: left traversal, the length=%d of DNA sequence in previous nodes should not be enough to satisfy the desired extension=%d.\n", len(seq), extension) 168 //} 169 s := pool.Get().(*dnaPool) 170 s.Seq, s.Path = s.Seq[:0], s.Path[:0] 171 s.Seq = getLeftTargetBases(n, extension, refEnd, seq, s.Seq) 172 s.Path = make([]uint32, len(currentPath)) 173 copy(s.Path, currentPath) 174 AddPath(s.Path, n.Id) 175 if len(seq)+refEnd >= extension || len(n.Prev) == 0 { 176 sk.leftScore, sk.leftAlignment, sk.targetStart, sk.queryStart = LeftDynamicAln(s.Seq, read, scores, matrix, -600, dynamicScore) 177 sk.targetStart = refEnd - len(s.Seq) - len(seq) + sk.targetStart 178 sk.leftPath = s.Path 179 pool.Put(s) 180 return sk.leftAlignment, sk.leftScore, sk.targetStart, sk.queryStart, sk.leftPath 181 } else { 182 //A very negative number 183 sk.leftScore = math.MinInt64 184 for _, i := range n.Prev { 185 dynamicScore.route, s.currScore, s.targetStart, s.queryStart, s.Path = LeftAlignTraversal(i.Dest, s.Seq, len(i.Dest.Seq), s.Path, extension, read, scores, matrix, sk, dynamicScore, pool) 186 if s.currScore > sk.leftScore { 187 sk.leftScore = s.currScore 188 sk.leftAlignment = dynamicScore.route 189 sk.targetStart = refEnd - len(s.Seq) - len(seq) + s.targetStart 190 sk.queryStart = s.queryStart 191 sk.leftPath = s.Path 192 } 193 } 194 pool.Put(s) 195 cigar.ReverseBytesCigar(sk.leftAlignment) 196 ReversePath(sk.leftPath) 197 return sk.leftAlignment, sk.leftScore, sk.targetStart, sk.queryStart, sk.leftPath 198 } 199 } 200 201 func RightAlignTraversal(n *Node, seq []dna.Base, start int, currentPath []uint32, extension int, read []dna.Base, scoreMatrix [][]int64, matrix *MatrixAln, sk scoreKeeper, dynamicScore dynamicScoreKeeper, pool *sync.Pool) ([]cigar.ByteCigar, int64, int, int, []uint32) { 202 //if len(seq) >= extension { 203 // log.Fatalf("Error: right traversal, the length=%d of DNA sequence in previous nodes should not be enough to satisfy the desired extension=%d.\n", len(seq), extension) 204 //} 205 s := pool.Get().(*dnaPool) 206 s.Seq, s.Path = s.Seq[:0], s.Path[:0] 207 s.Seq = getRightBases(n, extension, start, seq, s.Seq) 208 s.Path = make([]uint32, len(currentPath)) 209 copy(s.Path, currentPath) 210 if len(seq)+len(n.Seq)-start >= extension || len(n.Next) == 0 { 211 sk.rightScore, sk.rightAlignment, sk.targetEnd, sk.queryEnd = RightDynamicAln(s.Seq, read, scoreMatrix, matrix, -600, dynamicScore) 212 sk.rightPath = s.Path 213 pool.Put(s) 214 return sk.rightAlignment, sk.rightScore, sk.targetEnd + start, sk.queryEnd, sk.rightPath 215 } else { 216 sk.rightScore = math.MinInt64 217 for _, i := range n.Next { 218 dynamicScore.route, s.currScore, s.targetEnd, s.queryEnd, s.Path = RightAlignTraversal(i.Dest, s.Seq, 0, s.Path, extension, read, scoreMatrix, matrix, sk, dynamicScore, pool) 219 if s.currScore > sk.rightScore { 220 sk.rightScore = s.currScore 221 sk.rightAlignment = dynamicScore.route 222 sk.targetEnd = s.targetEnd 223 sk.queryEnd = s.queryEnd 224 sk.rightPath = s.Path 225 } 226 } 227 pool.Put(s) 228 cigar.ReverseBytesCigar(sk.rightAlignment) 229 return sk.rightAlignment, sk.rightScore, sk.targetEnd + start, sk.queryEnd, sk.rightPath 230 } 231 } 232 233 func LeftDynamicAln(alpha []dna.Base, beta []dna.Base, scores [][]int64, matrix *MatrixAln, gapPen int64, dynamicScore dynamicScoreKeeper) (int64, []cigar.ByteCigar, int, int) { 234 resetDynamicScore(dynamicScore) 235 for dynamicScore.i = 0; dynamicScore.i < len(alpha)+1; dynamicScore.i++ { 236 matrix.m[dynamicScore.i][0] = 0 237 } 238 239 for dynamicScore.j = 0; dynamicScore.j < len(beta)+1; dynamicScore.j++ { 240 matrix.m[0][dynamicScore.j] = 0 241 } 242 243 for dynamicScore.i = 1; dynamicScore.i < len(alpha)+1; dynamicScore.i++ { 244 for dynamicScore.j = 1; dynamicScore.j < len(beta)+1; dynamicScore.j++ { 245 matrix.m[dynamicScore.i][dynamicScore.j], matrix.trace[dynamicScore.i][dynamicScore.j] = cigar.ByteMatrixTrace(matrix.m[dynamicScore.i-1][dynamicScore.j-1]+scores[alpha[dynamicScore.i-1]][beta[dynamicScore.j-1]], matrix.m[dynamicScore.i][dynamicScore.j-1]+gapPen, matrix.m[dynamicScore.i-1][dynamicScore.j]+gapPen) 246 if matrix.m[dynamicScore.i][dynamicScore.j] < 0 { 247 matrix.m[dynamicScore.i][dynamicScore.j] = 0 248 } 249 } 250 } 251 252 for dynamicScore.i, dynamicScore.j, dynamicScore.routeIdx = len(alpha), len(beta), 0; matrix.m[dynamicScore.i][dynamicScore.j] > 0; { 253 if len(dynamicScore.route) == 0 { 254 dynamicScore.route = append(dynamicScore.route, cigar.ByteCigar{RunLen: 1, Op: matrix.trace[dynamicScore.i][dynamicScore.j]}) 255 } else if dynamicScore.route[dynamicScore.routeIdx].Op == matrix.trace[dynamicScore.i][dynamicScore.j] { 256 dynamicScore.route[dynamicScore.routeIdx].RunLen += 1 257 } else { 258 dynamicScore.route = append(dynamicScore.route, cigar.ByteCigar{RunLen: 1, Op: matrix.trace[dynamicScore.i][dynamicScore.j]}) 259 dynamicScore.routeIdx++ 260 } 261 switch matrix.trace[dynamicScore.i][dynamicScore.j] { 262 case 'M': 263 dynamicScore.i, dynamicScore.j = dynamicScore.i-1, dynamicScore.j-1 264 case 'I': 265 dynamicScore.j -= 1 266 case 'D': 267 dynamicScore.i -= 1 268 default: 269 log.Fatalf("Error: unexpected traceback %c\n", matrix.trace[dynamicScore.i][dynamicScore.j]) 270 } 271 } 272 return matrix.m[len(alpha)][len(beta)], dynamicScore.route, dynamicScore.i, dynamicScore.j 273 } 274 275 func RightDynamicAln(alpha []dna.Base, beta []dna.Base, scores [][]int64, matrix *MatrixAln, gapPen int64, dynamicScore dynamicScoreKeeper) (int64, []cigar.ByteCigar, int, int) { 276 resetDynamicScore(dynamicScore) 277 var maxI int 278 var maxJ int 279 for dynamicScore.i = 0; dynamicScore.i < len(alpha)+1; dynamicScore.i++ { 280 for dynamicScore.j = 0; dynamicScore.j < len(beta)+1; dynamicScore.j++ { 281 if dynamicScore.i == 0 && dynamicScore.j == 0 { 282 matrix.m[dynamicScore.i][dynamicScore.j] = 0 283 } else if dynamicScore.i == 0 { 284 matrix.m[dynamicScore.i][dynamicScore.j] = matrix.m[dynamicScore.i][dynamicScore.j-1] + gapPen 285 matrix.trace[dynamicScore.i][dynamicScore.j] = 'I' 286 } else if dynamicScore.j == 0 { 287 matrix.m[dynamicScore.i][dynamicScore.j] = matrix.m[dynamicScore.i-1][dynamicScore.j] + gapPen 288 matrix.trace[dynamicScore.i][dynamicScore.j] = 'D' 289 } else { 290 matrix.m[dynamicScore.i][dynamicScore.j], matrix.trace[dynamicScore.i][dynamicScore.j] = cigar.ByteMatrixTrace(matrix.m[dynamicScore.i-1][dynamicScore.j-1]+scores[alpha[dynamicScore.i-1]][beta[dynamicScore.j-1]], matrix.m[dynamicScore.i][dynamicScore.j-1]+gapPen, matrix.m[dynamicScore.i-1][dynamicScore.j]+gapPen) 291 } 292 if matrix.m[dynamicScore.i][dynamicScore.j] > dynamicScore.currMax { 293 dynamicScore.currMax = matrix.m[dynamicScore.i][dynamicScore.j] 294 maxI = dynamicScore.i 295 maxJ = dynamicScore.j 296 } 297 } 298 } 299 for dynamicScore.i, dynamicScore.j, dynamicScore.routeIdx = maxI, maxJ, 0; dynamicScore.i > 0 || dynamicScore.j > 0; { 300 if len(dynamicScore.route) == 0 { 301 dynamicScore.route = append(dynamicScore.route, cigar.ByteCigar{RunLen: 1, Op: matrix.trace[dynamicScore.i][dynamicScore.j]}) 302 } else if dynamicScore.route[dynamicScore.routeIdx].Op == matrix.trace[dynamicScore.i][dynamicScore.j] { 303 dynamicScore.route[dynamicScore.routeIdx].RunLen += 1 304 } else { 305 dynamicScore.route = append(dynamicScore.route, cigar.ByteCigar{RunLen: 1, Op: matrix.trace[dynamicScore.i][dynamicScore.j]}) 306 dynamicScore.routeIdx++ 307 } 308 switch matrix.trace[dynamicScore.i][dynamicScore.j] { 309 case 'M': 310 dynamicScore.i, dynamicScore.j = dynamicScore.i-1, dynamicScore.j-1 311 case 'I': 312 dynamicScore.j -= 1 313 case 'D': 314 dynamicScore.i -= 1 315 default: 316 log.Fatalf("Error: unexpected traceback with %c\n", matrix.trace[dynamicScore.i][dynamicScore.j]) 317 } 318 } 319 return matrix.m[maxI][maxJ], dynamicScore.route, maxI, maxJ 320 } 321 322 func ReversePath(alpha []uint32) { 323 var i, off int 324 for i = len(alpha)/2 - 1; i >= 0; i-- { 325 off = len(alpha) - 1 - i 326 alpha[i], alpha[off] = alpha[off], alpha[i] 327 } 328 } 329 330 func leftSeed(i int) int { 331 return 2*i + 1 332 } 333 334 func rightSeed(i int) int { 335 return 2*i + 2 336 } 337 338 func seedsHeapify(a []SeedDev, i int) []SeedDev { 339 l := leftSeed(i) 340 r := rightSeed(i) 341 var max int 342 if l < len(a) && l >= 0 && a[l].TotalLength < a[i].TotalLength { 343 max = l 344 } else { 345 max = i 346 } 347 if r < len(a) && r >= 0 && a[r].TotalLength < a[max].TotalLength { 348 max = r 349 } 350 if max != i { 351 a[i], a[max] = a[max], a[i] 352 a = seedsHeapify(a, max) 353 } 354 return a 355 } 356 357 func buildSeedHeap(a []SeedDev) []SeedDev { 358 for i := len(a)/2 - 1; i >= 0; i-- { 359 a = seedsHeapify(a, i) 360 } 361 return a 362 } 363 364 func heapSortSeeds(a []SeedDev) { 365 a = buildSeedHeap(a) 366 size := len(a) 367 for i := size - 1; i >= 1; i-- { 368 a[0], a[i] = a[i], a[0] 369 size-- 370 seedsHeapify(a[:size], 0) 371 } 372 } 373 374 func quickSort(arr []*SeedDev) []*SeedDev { 375 newArr := make([]*SeedDev, len(arr)) 376 copy(newArr, arr) 377 recursiveSort(newArr, 0, len(arr)-1) 378 return newArr 379 } 380 381 func recursiveSort(arr []*SeedDev, start, end int) { 382 if (end - start) < 1 { 383 return 384 } 385 386 pivot := arr[end] 387 splitIndex := start 388 389 // Iterate sub array to find values less than pivot 390 // and move them to the beginning of the array 391 // keeping splitIndex denoting less-value array size 392 for i := start; i < end; i++ { 393 if arr[i].TotalLength > pivot.TotalLength { 394 if splitIndex != i { 395 temp := arr[splitIndex] 396 397 arr[splitIndex] = arr[i] 398 arr[i] = temp 399 } 400 401 splitIndex++ 402 } 403 } 404 405 arr[end] = arr[splitIndex] 406 arr[splitIndex] = pivot 407 408 recursiveSort(arr, start, splitIndex-1) 409 recursiveSort(arr, splitIndex+1, end) 410 } 411 412 type seedHelper struct { 413 currHits []uint64 414 codedNodeCoord uint64 415 seqKey uint64 416 keyShift uint 417 keyIdx, keyOffset, readOffset, nodeOffset int 418 nodeIdx, nodePos int64 419 leftMatches int 420 rightMatches int 421 tempSeed SeedDev 422 } 423 424 func extendToTheRightDev(node *Node, read *fastq.FastqBig, readStart int, nodeStart int, posStrand bool, answer []SeedDev) []SeedDev { 425 const basesPerInt int = 32 426 answer = answer[:0] 427 var nodeOffset int = nodeStart % basesPerInt 428 var readOffset int = 31 - ((readStart - nodeOffset + 31) % 32) 429 var rightMatches int = 0 430 var currNode SeedDev 431 var nextParts []SeedDev 432 var i, j int = 0, 0 433 if posStrand { 434 rightMatches = dnaTwoBit.CountRightMatches(node.SeqTwoBit, nodeStart, &read.Rainbow[readOffset], readStart+readOffset) 435 } else { 436 rightMatches = dnaTwoBit.CountRightMatches(node.SeqTwoBit, nodeStart, &read.RainbowRc[readOffset], readStart+readOffset) 437 } 438 // nothing aligned here 439 if rightMatches == 0 { 440 return nil 441 } 442 // we went all the way to end and there might be more 443 if readStart+rightMatches < len(read.Seq) && nodeStart+rightMatches == node.SeqTwoBit.Len && len(node.Next) != 0 { 444 for i = 0; i < len(node.Next); i++ { 445 nextParts = extendToTheRightDev(node.Next[i].Dest, read, readStart+rightMatches, 0, posStrand, nextParts) 446 // if we aligned into the next node, make a seed for this node and point it to the next one 447 for j = 0; j < len(nextParts); j++ { 448 currNode = SeedDev{TargetId: node.Id, TargetStart: uint32(nodeStart), QueryStart: uint32(readStart), Length: uint32(rightMatches), PosStrand: posStrand, TotalLength: uint32(rightMatches) + nextParts[j].TotalLength, NextPart: &nextParts[j]} 449 answer = append(answer, currNode) 450 } 451 } 452 } 453 // if the alignment did not go to another node, return the match for this node 454 if len(answer) == 0 { 455 currNode = SeedDev{TargetId: node.Id, TargetStart: uint32(nodeStart), QueryStart: uint32(readStart), Length: uint32(rightMatches), PosStrand: posStrand, TotalLength: uint32(rightMatches), NextPart: nil} 456 answer = []SeedDev{currNode} 457 } 458 459 return answer 460 } 461 462 func extendToTheLeftDev(node *Node, read *fastq.FastqBig, currPart SeedDev) []SeedDev { 463 var answer, prevParts []SeedDev 464 var i int 465 var readBase dna.Base 466 467 if currPart.QueryStart > 0 && currPart.TargetStart == 0 { 468 for i = 0; i < len(node.Prev); i++ { 469 if currPart.PosStrand { 470 readBase = dnaTwoBit.GetBase(&read.Rainbow[0], uint(currPart.QueryStart)-1) 471 } else { 472 readBase = dnaTwoBit.GetBase(&read.RainbowRc[0], uint(currPart.QueryStart)-1) 473 } 474 if readBase == dnaTwoBit.GetBase(node.Prev[i].Dest.SeqTwoBit, uint(node.Prev[i].Dest.SeqTwoBit.Len)-1) { 475 prevParts = extendToTheLeftHelperDev(node.Prev[i].Dest, read, currPart) 476 answer = append(answer, prevParts...) 477 } 478 } 479 } 480 481 if len(answer) == 0 { 482 return []SeedDev{currPart} 483 } else { 484 return answer 485 } 486 } 487 488 func extendToTheLeftHelperDev(node *Node, read *fastq.FastqBig, nextPart SeedDev) []SeedDev { 489 const basesPerInt int = 32 490 var nodePos int = node.SeqTwoBit.Len - 1 491 var readPos int = int(nextPart.QueryStart) - 1 492 var nodeOffset int = nodePos % basesPerInt 493 var readOffset int = 31 - ((readPos - nodeOffset + 31) % 32) 494 var leftMatches int = 0 495 var currPart SeedDev 496 var prevParts, answer []SeedDev 497 var i int 498 var readBase dna.Base 499 500 if nextPart.PosStrand { 501 leftMatches = numbers.Min(readPos+1, dnaTwoBit.CountLeftMatches(node.SeqTwoBit, nodePos, &read.Rainbow[readOffset], readPos+readOffset)) 502 } else { 503 leftMatches = numbers.Min(readPos+1, dnaTwoBit.CountLeftMatches(node.SeqTwoBit, nodePos, &read.RainbowRc[readOffset], readPos+readOffset)) 504 } 505 506 if leftMatches == 0 { 507 log.Fatal("Error: should not have zero matches to the left\n") 508 } 509 currPart = SeedDev{TargetId: node.Id, TargetStart: uint32(nodePos - (leftMatches - 1)), QueryStart: uint32(readPos - (leftMatches - 1)), Length: uint32(leftMatches), PosStrand: nextPart.PosStrand, TotalLength: uint32(leftMatches) + nextPart.TotalLength, NextPart: &nextPart} 510 // we went all the way to end and there might be more 511 if currPart.QueryStart > 0 && currPart.TargetStart == 0 { 512 for i = 0; i < len(node.Prev); i++ { 513 if nextPart.PosStrand { 514 readBase = dnaTwoBit.GetBase(&read.Rainbow[0], uint(currPart.QueryStart)-1) 515 } else { 516 readBase = dnaTwoBit.GetBase(&read.RainbowRc[0], uint(currPart.QueryStart)-1) 517 } 518 if readBase == dnaTwoBit.GetBase(node.Prev[i].Dest.SeqTwoBit, uint(node.Prev[i].Dest.SeqTwoBit.Len)-1) { 519 prevParts = extendToTheLeftHelperDev(node.Prev[i].Dest, read, currPart) 520 answer = append(answer, prevParts...) 521 } 522 } 523 } 524 // if the alignment did not go to another node, return the match for this node 525 if len(answer) == 0 { 526 answer = []SeedDev{currPart} 527 } 528 return answer 529 } 530 531 func newSeedBuilder() *seedHelper { 532 var tmp SeedDev = SeedDev{} 533 return &seedHelper{ 534 currHits: make([]uint64, 0, 20), 535 tempSeed: tmp, 536 } 537 } 538 539 func restartSeedHelper(helper *seedHelper) { 540 helper.currHits = helper.currHits[:0] 541 helper.keyIdx, helper.keyOffset, helper.readOffset, helper.nodeOffset = 0, 0, 0, 0 542 helper.nodeIdx, helper.nodePos = 0, 0 543 helper.seqKey, helper.codedNodeCoord = 0, 0 544 helper.leftMatches = 0 545 } 546 547 // seedBuildHelper.nodeIdx, seedBuildHelper.nodePos int64 = 0, 0. 548 func seedMapMemPool(seedHash map[uint64][]uint64, nodes []Node, read *fastq.FastqBig, seedLen int, perfectScore int64, scoreMatrix [][]int64, finalSeeds []SeedDev, tempSeeds []SeedDev, seedBuildHelper *seedHelper) []SeedDev { 549 const basesPerInt int64 = 32 550 restartSeedHelper(seedBuildHelper) 551 seedBuildHelper.keyShift = 64 - (uint(seedLen) * 2) 552 553 for readStart := 0; readStart < len(read.Seq)-seedLen+1; readStart++ { 554 seedBuildHelper.keyIdx = (readStart + 31) / 32 555 seedBuildHelper.keyOffset = 31 - ((readStart + 31) % 32) 556 // do fwd strand 557 seedBuildHelper.seqKey = read.Rainbow[seedBuildHelper.keyOffset].Seq[seedBuildHelper.keyIdx] >> seedBuildHelper.keyShift 558 seedBuildHelper.currHits = seedHash[seedBuildHelper.seqKey] 559 560 for _, seedBuildHelper.codedNodeCoord = range seedBuildHelper.currHits { 561 seedBuildHelper.nodeIdx, seedBuildHelper.nodePos = numberToChromAndPos(seedBuildHelper.codedNodeCoord) 562 seedBuildHelper.nodeOffset = int(seedBuildHelper.nodePos % basesPerInt) 563 seedBuildHelper.readOffset = 31 - ((readStart - seedBuildHelper.nodeOffset + 31) % 32) 564 seedBuildHelper.leftMatches = numbers.Min(readStart+1, dnaTwoBit.CountLeftMatches(nodes[seedBuildHelper.nodeIdx].SeqTwoBit, int(seedBuildHelper.nodePos), &read.Rainbow[seedBuildHelper.readOffset], readStart+seedBuildHelper.readOffset)) 565 tempSeeds = extendToTheRightDev(&nodes[seedBuildHelper.nodeIdx], read, readStart-(seedBuildHelper.leftMatches-1), int(seedBuildHelper.nodePos)-(seedBuildHelper.leftMatches-1), true, tempSeeds) 566 for _, seedBuildHelper.tempSeed = range tempSeeds { 567 finalSeeds = append(finalSeeds, extendToTheLeftDev(&nodes[seedBuildHelper.nodeIdx], read, seedBuildHelper.tempSeed)...) 568 } 569 } 570 // do rev strand 571 seedBuildHelper.seqKey = read.RainbowRc[seedBuildHelper.keyOffset].Seq[seedBuildHelper.keyIdx] >> seedBuildHelper.keyShift 572 seedBuildHelper.currHits = seedHash[seedBuildHelper.seqKey] 573 for _, seedBuildHelper.codedNodeCoord = range seedBuildHelper.currHits { 574 seedBuildHelper.nodeIdx, seedBuildHelper.nodePos = numberToChromAndPos(seedBuildHelper.codedNodeCoord) 575 seedBuildHelper.nodeOffset = int(seedBuildHelper.nodePos % basesPerInt) 576 seedBuildHelper.readOffset = 31 - ((readStart - seedBuildHelper.nodeOffset + 31) % 32) 577 578 seedBuildHelper.leftMatches = numbers.Min(readStart+1, dnaTwoBit.CountLeftMatches(nodes[seedBuildHelper.nodeIdx].SeqTwoBit, int(seedBuildHelper.nodePos), &read.RainbowRc[seedBuildHelper.readOffset], readStart+seedBuildHelper.readOffset)) 579 tempSeeds = extendToTheRightDev(&nodes[seedBuildHelper.nodeIdx], read, readStart-(seedBuildHelper.leftMatches-1), int(seedBuildHelper.nodePos)-(seedBuildHelper.leftMatches-1), false, tempSeeds) 580 finalSeeds = append(finalSeeds, tempSeeds...) 581 } 582 } 583 if len(finalSeeds) > 100 { 584 SortSeedLen(finalSeeds) 585 } else { 586 heapSortSeeds(finalSeeds) 587 } 588 return finalSeeds 589 } 590 func SortSeedLen(seeds []SeedDev) { 591 sort.Slice(seeds, func(i, j int) bool { return seeds[i].TotalLength > seeds[j].TotalLength }) 592 } 593 594 func SoftClipBases(front int, lengthOfRead int, cig []cigar.ByteCigar) []cigar.ByteCigar { 595 var runLen int = cigar.QueryRunLen(cig) 596 if runLen < lengthOfRead { 597 answer := make([]cigar.ByteCigar, 0, len(cig)+2) 598 if front > 0 { 599 answer = append(answer, cigar.ByteCigar{RunLen: uint16(front), Op: 'S'}) 600 } 601 answer = append(answer, cig...) 602 if front+cigar.QueryRunLen(cig) < lengthOfRead { 603 answer = append(answer, cigar.ByteCigar{RunLen: uint16(lengthOfRead - front - runLen), Op: 'S'}) 604 } 605 return answer 606 } else { 607 return cig 608 } 609 } 610 611 func SimpleWriteGirafPair(filename string, input <-chan giraf.GirafPair, wg *sync.WaitGroup) { 612 file := fileio.EasyCreate(filename) 613 var buf *bytes.Buffer 614 var simplePool = sync.Pool{ 615 New: func() interface{} { 616 return &bytes.Buffer{} 617 }, 618 } 619 var err error 620 for gp := range input { 621 buf = simplePool.Get().(*bytes.Buffer) 622 buf.Reset() 623 _, err = buf.WriteString(giraf.ToString(&gp.Fwd)) 624 exception.PanicOnErr(err) 625 err = buf.WriteByte('\n') 626 exception.PanicOnErr(err) 627 _, err = buf.WriteString(giraf.ToString(&gp.Rev)) 628 exception.PanicOnErr(err) 629 err = buf.WriteByte('\n') 630 exception.PanicOnErr(err) 631 _, err = io.Copy(file, buf) 632 exception.PanicOnErr(err) 633 simplePool.Put(buf) 634 } 635 err = file.Close() 636 exception.PanicOnErr(err) 637 wg.Done() 638 }