github.com/biogo/biogo@v1.0.4/align/pals/dp/kernel.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package dp 6 7 import ( 8 "github.com/biogo/biogo/align/pals/filter" 9 "github.com/biogo/biogo/alphabet" 10 "github.com/biogo/biogo/seq/linear" 11 ) 12 13 // A kernel handles the actual dp alignment process. 14 type kernel struct { 15 target, query *linear.Seq 16 17 minLen int 18 maxDiff float64 19 20 valueToCode alphabet.Index 21 22 Costs 23 24 lowEnd Hit 25 highEnd Hit 26 vectors [2][]int 27 trapezoids []filter.Trapezoid 28 covered []bool 29 slot int 30 result chan Hit 31 } 32 33 // An offset slice seems to be the easiest way to implement the C idiom used in PALS to implement 34 // an offset (by o) view (v) on an array (a): 35 // 36 // int *v, o; 37 // int [n]a; 38 // v = a - o; 39 // 40 // now v[i] is a view on a[i-o] 41 type offsetSlice struct { 42 offset int 43 slice []int 44 } 45 46 func (o *offsetSlice) at(i int) int { return o.slice[i-o.offset] } 47 func (o *offsetSlice) set(i, v int) { o.slice[i-o.offset] = v } 48 49 var vecBuffering int = 100000 50 51 // Handle the recursive search for alignable segments. 52 func (k *kernel) alignRecursion(t filter.Trapezoid) { 53 mid := (t.Bottom + t.Top) / 2 54 55 k.traceForward(mid, mid-t.Right, mid-t.Left) 56 57 for x := 1; x == 1 || k.highEnd.Bbpos > mid+x*k.MaxIGap && k.highEnd.Score < k.lowEnd.Score; x++ { 58 k.traceReverse(k.lowEnd.Bepos, k.lowEnd.Aepos, k.lowEnd.Aepos, mid+k.MaxIGap, k.BlockCost+2*x*k.DiffCost) 59 } 60 61 k.highEnd.Aepos, k.highEnd.Bepos = k.lowEnd.Aepos, k.lowEnd.Bepos 62 63 lowTrap, highTrap := t, t 64 lowTrap.Top = k.highEnd.Bbpos - k.MaxIGap 65 highTrap.Bottom = k.highEnd.Bepos + k.MaxIGap 66 67 if k.highEnd.Bepos-k.highEnd.Bbpos >= k.minLen && k.highEnd.Aepos-k.highEnd.Abpos >= k.minLen { 68 indel := (k.highEnd.Abpos - k.highEnd.Bbpos) - (k.highEnd.Aepos - k.highEnd.Bepos) 69 if indel < 0 { 70 if indel == -indel { 71 panic("dp: weird number overflow") 72 } 73 indel = -indel 74 } 75 identity := ((1 / k.RMatchCost) - float64(k.highEnd.Score-indel)/(k.RMatchCost*float64(k.highEnd.Bepos-k.highEnd.Bbpos))) 76 77 if identity <= k.maxDiff { 78 k.highEnd.Error = identity 79 80 for i, trap := range k.trapezoids[k.slot+1:] { 81 var trapAProjection, trapBProjection, coverageA, coverageB int 82 83 if trap.Bottom >= k.highEnd.Bepos { 84 break 85 } 86 87 trapBProjection = trap.Top - trap.Bottom + 1 88 trapAProjection = trap.Right - trap.Left + 1 89 if trap.Left < k.highEnd.LowDiagonal { 90 coverageA = k.highEnd.LowDiagonal 91 } else { 92 coverageA = trap.Left 93 } 94 if trap.Right > k.highEnd.HighDiagonal { 95 coverageB = k.highEnd.HighDiagonal 96 } else { 97 coverageB = trap.Right 98 } 99 100 if coverageA > coverageB { 101 continue 102 } 103 104 coverageA = coverageB - coverageA + 1 105 if trap.Top > k.highEnd.Bepos { 106 coverageB = k.highEnd.Bepos - trap.Bottom + 1 107 } else { 108 coverageB = trapBProjection 109 } 110 111 if (float64(coverageA)/float64(trapAProjection))*(float64(coverageB)/float64(trapBProjection)) > 0.99 { 112 k.covered[i] = true 113 } 114 } 115 116 // Diagonals to this point are query-target, not target-query. 117 k.highEnd.LowDiagonal, k.highEnd.HighDiagonal = -k.highEnd.HighDiagonal, -k.highEnd.LowDiagonal 118 119 k.result <- k.highEnd 120 } 121 } 122 123 if lowTrap.Top-lowTrap.Bottom > k.minLen && lowTrap.Top < t.Top-k.MaxIGap { 124 k.alignRecursion(lowTrap) 125 } 126 if highTrap.Top-highTrap.Bottom > k.minLen { 127 k.alignRecursion(highTrap) 128 } 129 } 130 131 func (k *kernel) allocateVectors(required int) { 132 vecMax := required + required>>2 + vecBuffering 133 k.vectors[0] = make([]int, vecMax) 134 k.vectors[1] = make([]int, vecMax) 135 } 136 137 // Forward and Reverse D.P. Extension Routines 138 // Called at the mid-point of trapezoid -- mid X [low,high], the extension 139 // is computed to an end point and the lowest and highest diagonals 140 // are recorded. These are returned in a partially filled DPHit 141 // record, that will be merged with that returned for extension in the 142 // opposite direction. 143 func (k *kernel) traceForward(mid, low, high int) { 144 odd := false 145 var ( 146 maxScore int 147 maxLeft, maxRight int 148 maxI, maxJ int 149 i, j int 150 ) 151 152 // Set basis from (mid,low) .. (mid,high). 153 if low < 0 { 154 low = 0 155 } 156 if low > k.target.Len() { 157 low = k.target.Len() 158 } 159 if high > k.target.Len() { 160 high = k.target.Len() 161 } 162 if high < low { 163 high = low 164 } 165 166 if required := (high - low) + k.MaxIGap; required >= len(k.vectors[0]) { 167 k.allocateVectors(required) 168 } 169 170 thisVector := &offsetSlice{ 171 slice: k.vectors[0], 172 offset: low, 173 } 174 175 for j = low; j <= high; j++ { 176 thisVector.set(j, 0) 177 } 178 179 high += k.MaxIGap 180 if high > k.target.Len() { 181 high = k.target.Len() 182 } 183 184 for ; j <= high; j++ { 185 thisVector.set(j, thisVector.at(j-1)-k.DiffCost) 186 } 187 188 maxScore = 0 189 maxRight = mid - low 190 maxLeft = mid - high 191 maxI = mid 192 maxJ = low 193 194 // Advance to next row. 195 thatVector := &offsetSlice{} 196 for i = mid; low <= high && i < k.query.Len(); i++ { 197 var cost, score int 198 199 *thatVector = *thisVector 200 if !odd { 201 thisVector.slice = k.vectors[1] 202 } else { 203 thisVector.slice = k.vectors[0] 204 } 205 thisVector.offset = low 206 odd = !odd 207 208 score = thatVector.at(low) 209 thisVector.set(low, score-k.DiffCost) 210 cost = thisVector.at(low) 211 212 for j = low + 1; j <= high; j++ { 213 var ratchet, temp int 214 215 temp = cost 216 cost = score 217 score = thatVector.at(j) 218 if k.query.Seq[i] == k.target.Seq[j-1] && k.valueToCode[k.query.Seq[i]] >= 0 { 219 cost += k.MatchCost 220 } 221 222 ratchet = cost 223 if score > ratchet { 224 ratchet = score 225 } 226 if temp > ratchet { 227 ratchet = temp 228 } 229 230 cost = ratchet - k.DiffCost 231 thisVector.set(j, cost) 232 if cost >= maxScore { 233 maxScore = cost 234 maxI = i + 1 235 maxJ = j 236 } 237 } 238 239 if j <= k.target.Len() { 240 var ratchet int 241 242 if k.query.Seq[i] == k.target.Seq[j-1] && k.valueToCode[k.query.Seq[i]] >= 0 { 243 score += k.MatchCost 244 } 245 246 ratchet = score 247 if cost > ratchet { 248 ratchet = cost 249 } 250 251 score = ratchet - k.DiffCost 252 thisVector.set(j, score) 253 if score > maxScore { 254 maxScore = score 255 maxI = i + 1 256 maxJ = j 257 } 258 259 for j++; j <= k.target.Len(); j++ { 260 score -= k.DiffCost 261 if score < maxScore-k.BlockCost { 262 break 263 } 264 thisVector.set(j, score) 265 } 266 } 267 268 high = j - 1 269 270 for low <= high && thisVector.at(low) < maxScore-k.BlockCost { 271 low++ 272 } 273 for low <= high && thisVector.at(high) < maxScore-k.BlockCost { 274 high-- 275 } 276 277 if required := (high - low) + 2; required > len(k.vectors[0]) { 278 k.allocateVectors(required) 279 } 280 281 if (i+1)-low > maxRight { 282 maxRight = (i + 1) - low 283 } 284 if (i+1)-high < maxLeft { 285 maxLeft = (i + 1) - high 286 } 287 } 288 289 k.lowEnd.Aepos = maxJ 290 k.lowEnd.Bepos = maxI 291 k.lowEnd.LowDiagonal = maxLeft 292 k.lowEnd.HighDiagonal = maxRight 293 k.lowEnd.Score = maxScore 294 } 295 296 func (k *kernel) traceReverse(top, low, high, bottom, xfactor int) { 297 odd := false 298 var ( 299 maxScore int 300 maxLeft, maxRight int 301 maxI, maxJ int 302 i, j int 303 ) 304 305 // Set basis from (top,low) .. (top,high). 306 if low < 0 { 307 low = 0 308 } 309 if low > k.target.Len() { 310 low = k.target.Len() 311 } 312 if high > k.target.Len() { 313 high = k.target.Len() 314 } 315 if high < low { 316 high = low 317 } 318 319 if required := (high - low) + k.MaxIGap; required >= len(k.vectors[0]) { 320 k.allocateVectors(required) 321 } 322 323 thisVector := &offsetSlice{ 324 slice: k.vectors[0], 325 offset: high - (len(k.vectors[0]) - 1), 326 } 327 for j = high; j >= low; j-- { 328 thisVector.set(j, 0) 329 } 330 331 low -= k.MaxIGap 332 if low < 0 { 333 low = 0 334 } 335 336 for ; j >= low; j-- { 337 thisVector.set(j, thisVector.at(j+1)-k.DiffCost) 338 } 339 340 maxScore = 0 341 maxRight = top - low 342 maxLeft = top - high 343 maxI = top 344 maxJ = low 345 346 // Advance to next row. 347 if top-1 <= bottom { 348 xfactor = k.BlockCost 349 } 350 351 thatVector := &offsetSlice{} 352 for i = top - 1; low <= high && i >= 0; i-- { 353 var cost, score int 354 355 *thatVector = *thisVector 356 if !odd { 357 thisVector.slice = k.vectors[1] 358 } else { 359 thisVector.slice = k.vectors[0] 360 } 361 thisVector.offset = high - (len(k.vectors[0]) - 1) 362 odd = !odd 363 364 score = thatVector.at(high) 365 thisVector.set(high, score-k.DiffCost) 366 cost = thisVector.at(high) 367 368 for j = high - 1; j >= low; j-- { 369 var ratchet, temp int 370 371 temp = cost 372 cost = score 373 score = thatVector.at(j) 374 if k.query.Seq[i] == k.target.Seq[j] && k.valueToCode[k.query.Seq[i]] >= 0 { 375 cost += k.MatchCost 376 } 377 378 ratchet = cost 379 if score > ratchet { 380 ratchet = score 381 } 382 if temp > ratchet { 383 ratchet = temp 384 } 385 386 cost = ratchet - k.DiffCost 387 thisVector.set(j, cost) 388 if cost >= maxScore { 389 maxScore = cost 390 maxI = i 391 maxJ = j 392 } 393 } 394 395 if j >= 0 { 396 var ratchet int 397 398 if k.query.Seq[i] == k.target.Seq[j] && k.valueToCode[k.query.Seq[i]] >= 0 { 399 score += k.MatchCost 400 } 401 402 ratchet = score 403 if cost > ratchet { 404 ratchet = cost 405 } 406 407 score = ratchet - k.DiffCost 408 thisVector.set(j, score) 409 if score > maxScore { 410 maxScore = score 411 maxI = i 412 maxJ = j 413 } 414 415 for j--; j >= 0; j-- { 416 score -= k.DiffCost 417 if score < maxScore-xfactor { 418 break 419 } 420 thisVector.set(j, score) 421 } 422 } 423 424 low = j + 1 425 426 for low <= high && thisVector.at(low) < maxScore-xfactor { 427 low++ 428 } 429 for low <= high && thisVector.at(high) < maxScore-xfactor { 430 high-- 431 } 432 433 if i == bottom { 434 xfactor = k.BlockCost 435 } 436 437 if required := (high - low) + 2; required > len(k.vectors[0]) { 438 k.allocateVectors(required) 439 } 440 441 if i-low > maxRight { 442 maxRight = i - low 443 } 444 if i-high < maxLeft { 445 maxLeft = i - high 446 } 447 } 448 449 k.highEnd.Abpos = maxJ 450 k.highEnd.Bbpos = maxI 451 k.highEnd.LowDiagonal = maxLeft 452 k.highEnd.HighDiagonal = maxRight 453 k.highEnd.Score = maxScore 454 }