github.com/vertgenlab/gonomics@v1.0.0/genomeGraph/globalAlignment.go (about)

     1  package genomeGraph
     2  
     3  import (
     4  	"log"
     5  
     6  	"github.com/vertgenlab/gonomics/align"
     7  	"github.com/vertgenlab/gonomics/cigar"
     8  	"github.com/vertgenlab/gonomics/dna"
     9  	"github.com/vertgenlab/gonomics/fasta"
    10  )
    11  
    12  func nmMatrixSetup(size int64) ([][]int64, [][]rune) {
    13  	m := make([][]int64, size)
    14  	trace := make([][]rune, size)
    15  	for idx := range m {
    16  		m[idx] = make([]int64, size)
    17  		trace[idx] = make([]rune, size)
    18  	}
    19  	return m, trace
    20  }
    21  
    22  func NeedlemanWunsch(alpha []dna.Base, beta []dna.Base, scores [][]int64, gapPen int64, m [][]int64, trace [][]rune) (int64, []cigar.Cigar) {
    23  	var i, j, routeIdx int
    24  	for i = 0; i < len(alpha)+1; i++ {
    25  		for j = 0; j < len(beta)+1; j++ {
    26  			if i == 0 && j == 0 {
    27  				m[i][j] = 'M'
    28  			} else if i == 0 {
    29  				m[i][j] = m[i][j-1] + gapPen
    30  				trace[i][j] = 'I'
    31  			} else if j == 0 {
    32  				m[i][j] = m[i-1][j] + gapPen
    33  				trace[i][j] = 'D'
    34  			} else {
    35  				m[i][j], trace[i][j] = tripleMaxTrace(m[i-1][j-1], m[i-1][j-1]+scores[alpha[i-1]][beta[j-1]], m[i][j-1]+gapPen, m[i-1][j]+gapPen)
    36  			}
    37  		}
    38  	}
    39  	var route []cigar.Cigar
    40  	route = append(route, cigar.Cigar{RunLength: 0, Op: trace[len(alpha)][len(beta)]})
    41  	for i, j, routeIdx = len(alpha)-1, len(beta)-1, 0; i > 0 || j > 0; {
    42  		if route[routeIdx].RunLength == 0 {
    43  			route[routeIdx].RunLength = 1
    44  			route[routeIdx].Op = trace[i][j]
    45  		} else if route[routeIdx].Op == trace[i][j] {
    46  			route[routeIdx].RunLength += 1
    47  		} else {
    48  			route = append(route, cigar.Cigar{RunLength: 1, Op: trace[i][j]})
    49  			routeIdx++
    50  		}
    51  		switch trace[i][j] {
    52  		case '=':
    53  			i, j = i-1, j-1
    54  		case 'X':
    55  			i, j = i-1, j-1
    56  		case 'I':
    57  			j -= 1
    58  		case 'D':
    59  			i -= 1
    60  		default:
    61  			log.Fatalf("Error: unexpected traceback")
    62  		}
    63  	}
    64  	reverseCigarPointer(route)
    65  	return m[len(alpha)-1][len(beta)-1], route
    66  }
    67  
    68  // FaSeqToNode is a general function used create a new node based on a target fasta, query fasta and a cigar operation.
    69  // In addition, given two indices, it will update start/end for the subset of bases used to create the new Node.
    70  // TODO: Add logic for correct node name annotation convention.
    71  func FaSeqToNode(target fasta.Fasta, query fasta.Fasta, tStart int, qStart int, cigar align.Cigar, index int) (*Node, int, int) {
    72  	switch cigar.Op {
    73  	case align.ColM:
    74  		curr := &Node{Id: uint32(index), Seq: target.Seq[tStart : tStart+int(cigar.RunLength)]}
    75  		return curr, tStart + int(cigar.RunLength), qStart + int(cigar.RunLength)
    76  	case align.ColI:
    77  		ins := &Node{Id: uint32(index), Seq: query.Seq[qStart : qStart+int(cigar.RunLength)]}
    78  		return ins, tStart, qStart + int(cigar.RunLength)
    79  	case align.ColD:
    80  		del := &Node{Id: uint32(index), Seq: target.Seq[tStart : tStart+int(cigar.RunLength)]}
    81  		return del, tStart + int(cigar.RunLength), qStart
    82  	default:
    83  		log.Fatalf("Error: Did not recognize cigar op %d...\n", cigar.Op)
    84  		return nil, 0, 0
    85  	}
    86  }