github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/globalAlignment.go (about) 1 package genomeGraph 2 3 import ( 4 "log" 5 6 "github.com/vertgenlab/gonomics/align" 7 "github.com/vertgenlab/gonomics/cigar" 8 "github.com/vertgenlab/gonomics/dna" 9 "github.com/vertgenlab/gonomics/fasta" 10 ) 11 12 func nmMatrixSetup(size int64) ([][]int64, [][]rune) { 13 m := make([][]int64, size) 14 trace := make([][]rune, size) 15 for idx := range m { 16 m[idx] = make([]int64, size) 17 trace[idx] = make([]rune, size) 18 } 19 return m, trace 20 } 21 22 func NeedlemanWunsch(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, trace [][]rune) (int64, []cigar.Cigar) { 23 var i, j, routeIdx int 24 for i = 0; i < len(alpha)+1; i++ { 25 for j = 0; j < len(beta)+1; j++ { 26 if i == 0 && j == 0 { 27 m[i][j] = 'M' 28 } else if i == 0 { 29 m[i][j] = m[i][j-1] + gapPen 30 trace[i][j] = 'I' 31 } else if j == 0 { 32 m[i][j] = m[i-1][j] + gapPen 33 trace[i][j] = 'D' 34 } else { 35 m[i][j], trace[i][j] = tripleMaxTrace(m[i-1][j-1], m[i-1][j-1]+scores[alpha[i-1]][beta[j-1]], m[i][j-1]+gapPen, m[i-1][j]+gapPen) 36 } 37 } 38 } 39 var route []cigar.Cigar 40 route = append(route, cigar.Cigar{RunLength: 0, Op: trace[len(alpha)][len(beta)]}) 41 for i, j, routeIdx = len(alpha)-1, len(beta)-1, 0; i > 0 || j > 0; { 42 if route[routeIdx].RunLength == 0 { 43 route[routeIdx].RunLength = 1 44 route[routeIdx].Op = trace[i][j] 45 } else if route[routeIdx].Op == trace[i][j] { 46 route[routeIdx].RunLength += 1 47 } else { 48 route = append(route, cigar.Cigar{RunLength: 1, Op: trace[i][j]}) 49 routeIdx++ 50 } 51 switch trace[i][j] { 52 case '=': 53 i, j = i-1, j-1 54 case 'X': 55 i, j = i-1, j-1 56 case 'I': 57 j -= 1 58 case 'D': 59 i -= 1 60 default: 61 log.Fatalf("Error: unexpected traceback") 62 } 63 } 64 reverseCigarPointer(route) 65 return m[len(alpha)-1][len(beta)-1], route 66 } 67 68 // FaSeqToNode is a general function used create a new node based on a target fasta, query fasta and a cigar operation. 69 // In addition, given two indices, it will update start/end for the subset of bases used to create the new Node. 70 // TODO: Add logic for correct node name annotation convention. 71 func FaSeqToNode(target fasta.Fasta, query fasta.Fasta, tStart int, qStart int, cigar align.Cigar, index int) (*Node, int, int) { 72 switch cigar.Op { 73 case align.ColM: 74 curr := &Node{Id: uint32(index), Seq: target.Seq[tStart : tStart+int(cigar.RunLength)]} 75 return curr, tStart + int(cigar.RunLength), qStart + int(cigar.RunLength) 76 case align.ColI: 77 ins := &Node{Id: uint32(index), Seq: query.Seq[qStart : qStart+int(cigar.RunLength)]} 78 return ins, tStart, qStart + int(cigar.RunLength) 79 case align.ColD: 80 del := &Node{Id: uint32(index), Seq: target.Seq[tStart : tStart+int(cigar.RunLength)]} 81 return del, tStart + int(cigar.RunLength), qStart 82 default: 83 log.Fatalf("Error: Did not recognize cigar op %d...\n", cigar.Op) 84 return nil, 0, 0 85 } 86 }