github.com/biogo/biogo@v1.0.4/align/pals/dp/align.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package providing PALS dynamic programming alignment routines. 6 package dp 7 8 import ( 9 "github.com/biogo/biogo/align/pals/filter" 10 "github.com/biogo/biogo/seq/linear" 11 12 "errors" 13 "sort" 14 "sync" 15 ) 16 17 // A Params holds dynamic programming alignment parameters. 18 type Params struct { 19 MinHitLength int 20 MinId float64 21 } 22 23 // A Costs specifies dynamic programming behaviour. 24 type Costs struct { 25 MaxIGap int 26 DiffCost int 27 SameCost int 28 MatchCost int 29 BlockCost int 30 RMatchCost float64 31 } 32 33 // An Aligner provides allows local alignment of subsections of long sequences. 34 type Aligner struct { 35 target, query *linear.Seq 36 k int 37 minHitLength int 38 minId float64 39 segs Hits 40 Costs *Costs 41 } 42 43 // Create a new Aligner based on target and query sequences. 44 func NewAligner(target, query *linear.Seq, k, minLength int, minId float64) *Aligner { 45 return &Aligner{ 46 target: target, 47 query: query, 48 k: k, 49 minHitLength: minLength, 50 minId: minId, 51 } 52 } 53 54 // Align pairs of sequence segments defined by trapezoids. 55 // Returns aligning segment pairs satisfying length and identity requirements. 56 func (a *Aligner) AlignTraps(trapezoids filter.Trapezoids) Hits { 57 covered := make([]bool, len(trapezoids)) 58 59 dp := &kernel{ 60 target: a.target, 61 query: a.query, 62 valueToCode: a.target.Alpha.LetterIndex(), 63 trapezoids: trapezoids, 64 covered: covered, 65 minLen: a.minHitLength, 66 maxDiff: 1 - a.minId, 67 68 Costs: *a.Costs, 69 70 result: make(chan Hit), 71 } 72 wg := &sync.WaitGroup{} 73 wg.Add(1) 74 var segs Hits 75 go func() { 76 defer wg.Done() 77 for h := range dp.result { 78 segs = append(segs, h) 79 } 80 }() 81 for i, t := range trapezoids { 82 if !dp.covered[i] && t.Top-t.Bottom >= a.k { 83 dp.slot = i 84 dp.alignRecursion(t) 85 } 86 } 87 close(dp.result) 88 wg.Wait() 89 90 /* Remove lower scoring segments that begin or end at 91 the same point as a higher scoring segment. */ 92 93 if len(segs) > 0 { 94 var i, j int 95 96 sort.Sort(starts(segs)) 97 for i = 0; i < len(segs); i = j { 98 for j = i + 1; j < len(segs); j++ { 99 if segs[j].Abpos != segs[i].Abpos { 100 break 101 } 102 if segs[j].Bbpos != segs[i].Bbpos { 103 break 104 } 105 if segs[j].Score > segs[i].Score { 106 segs[i].Score = -1 107 i = j 108 } else { 109 segs[j].Score = -1 110 } 111 } 112 } 113 114 sort.Sort(ends(segs)) 115 for i = 0; i < len(segs); i = j { 116 for j = i + 1; j < len(segs); j++ { 117 if segs[j].Aepos != segs[i].Aepos { 118 break 119 } 120 if segs[j].Bepos != segs[i].Bepos { 121 break 122 } 123 if segs[j].Score > segs[i].Score { 124 segs[i].Score = -1 125 i = j 126 } else { 127 segs[j].Score = -1 128 } 129 } 130 } 131 132 found := 0 133 for i = 0; i < len(segs); i++ { 134 if segs[i].Score >= 0 { 135 segs[found] = segs[i] 136 found++ 137 } 138 } 139 segs = segs[:found] 140 } 141 142 return segs 143 } 144 145 // Hit holds details of alignment result. 146 type Hit struct { 147 Abpos, Bbpos int // Start coordinate of local alignment 148 Aepos, Bepos int // End coordinate of local alignment 149 LowDiagonal, HighDiagonal int // Alignment is between (anti)diagonals LowDiagonal & HighDiagonal 150 Score int // Score of alignment where match = SameCost, difference = -DiffCost 151 Error float64 // Lower bound on error rate of match 152 } 153 154 // DPHits is a collection of alignment results. 155 type Hits []Hit 156 157 // Returns the sums of alignment lengths. 158 func (h Hits) Sum() (a, b int, err error) { 159 for _, hit := range h { 160 la, lb := hit.Aepos-hit.Abpos, hit.Bepos-hit.Bbpos 161 if la < 0 || lb < 0 { 162 return 0, 0, errors.New("dp: negative trapezoid area") 163 } 164 a, b = a+la, b+lb 165 } 166 return 167 }