github.com/biogo/biogo@v1.0.4/align/pals/dp/kernel.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package dp
     6  
     7  import (
     8  	"github.com/biogo/biogo/align/pals/filter"
     9  	"github.com/biogo/biogo/alphabet"
    10  	"github.com/biogo/biogo/seq/linear"
    11  )
    12  
    13  // A kernel handles the actual dp alignment process.
    14  type kernel struct {
    15  	target, query *linear.Seq
    16  
    17  	minLen  int
    18  	maxDiff float64
    19  
    20  	valueToCode alphabet.Index
    21  
    22  	Costs
    23  
    24  	lowEnd     Hit
    25  	highEnd    Hit
    26  	vectors    [2][]int
    27  	trapezoids []filter.Trapezoid
    28  	covered    []bool
    29  	slot       int
    30  	result     chan Hit
    31  }
    32  
    33  // An offset slice seems to be the easiest way to implement the C idiom used in PALS to implement
    34  // an offset (by o) view (v) on an array (a):
    35  //
    36  //  int *v, o;
    37  //  int [n]a;
    38  //  v = a - o;
    39  //
    40  // now v[i] is a view on a[i-o]
    41  type offsetSlice struct {
    42  	offset int
    43  	slice  []int
    44  }
    45  
    46  func (o *offsetSlice) at(i int) int { return o.slice[i-o.offset] }
    47  func (o *offsetSlice) set(i, v int) { o.slice[i-o.offset] = v }
    48  
    49  var vecBuffering int = 100000
    50  
    51  // Handle the recursive search for alignable segments.
    52  func (k *kernel) alignRecursion(t filter.Trapezoid) {
    53  	mid := (t.Bottom + t.Top) / 2
    54  
    55  	k.traceForward(mid, mid-t.Right, mid-t.Left)
    56  
    57  	for x := 1; x == 1 || k.highEnd.Bbpos > mid+x*k.MaxIGap && k.highEnd.Score < k.lowEnd.Score; x++ {
    58  		k.traceReverse(k.lowEnd.Bepos, k.lowEnd.Aepos, k.lowEnd.Aepos, mid+k.MaxIGap, k.BlockCost+2*x*k.DiffCost)
    59  	}
    60  
    61  	k.highEnd.Aepos, k.highEnd.Bepos = k.lowEnd.Aepos, k.lowEnd.Bepos
    62  
    63  	lowTrap, highTrap := t, t
    64  	lowTrap.Top = k.highEnd.Bbpos - k.MaxIGap
    65  	highTrap.Bottom = k.highEnd.Bepos + k.MaxIGap
    66  
    67  	if k.highEnd.Bepos-k.highEnd.Bbpos >= k.minLen && k.highEnd.Aepos-k.highEnd.Abpos >= k.minLen {
    68  		indel := (k.highEnd.Abpos - k.highEnd.Bbpos) - (k.highEnd.Aepos - k.highEnd.Bepos)
    69  		if indel < 0 {
    70  			if indel == -indel {
    71  				panic("dp: weird number overflow")
    72  			}
    73  			indel = -indel
    74  		}
    75  		identity := ((1 / k.RMatchCost) - float64(k.highEnd.Score-indel)/(k.RMatchCost*float64(k.highEnd.Bepos-k.highEnd.Bbpos)))
    76  
    77  		if identity <= k.maxDiff {
    78  			k.highEnd.Error = identity
    79  
    80  			for i, trap := range k.trapezoids[k.slot+1:] {
    81  				var trapAProjection, trapBProjection, coverageA, coverageB int
    82  
    83  				if trap.Bottom >= k.highEnd.Bepos {
    84  					break
    85  				}
    86  
    87  				trapBProjection = trap.Top - trap.Bottom + 1
    88  				trapAProjection = trap.Right - trap.Left + 1
    89  				if trap.Left < k.highEnd.LowDiagonal {
    90  					coverageA = k.highEnd.LowDiagonal
    91  				} else {
    92  					coverageA = trap.Left
    93  				}
    94  				if trap.Right > k.highEnd.HighDiagonal {
    95  					coverageB = k.highEnd.HighDiagonal
    96  				} else {
    97  					coverageB = trap.Right
    98  				}
    99  
   100  				if coverageA > coverageB {
   101  					continue
   102  				}
   103  
   104  				coverageA = coverageB - coverageA + 1
   105  				if trap.Top > k.highEnd.Bepos {
   106  					coverageB = k.highEnd.Bepos - trap.Bottom + 1
   107  				} else {
   108  					coverageB = trapBProjection
   109  				}
   110  
   111  				if (float64(coverageA)/float64(trapAProjection))*(float64(coverageB)/float64(trapBProjection)) > 0.99 {
   112  					k.covered[i] = true
   113  				}
   114  			}
   115  
   116  			// Diagonals to this point are query-target, not target-query.
   117  			k.highEnd.LowDiagonal, k.highEnd.HighDiagonal = -k.highEnd.HighDiagonal, -k.highEnd.LowDiagonal
   118  
   119  			k.result <- k.highEnd
   120  		}
   121  	}
   122  
   123  	if lowTrap.Top-lowTrap.Bottom > k.minLen && lowTrap.Top < t.Top-k.MaxIGap {
   124  		k.alignRecursion(lowTrap)
   125  	}
   126  	if highTrap.Top-highTrap.Bottom > k.minLen {
   127  		k.alignRecursion(highTrap)
   128  	}
   129  }
   130  
   131  func (k *kernel) allocateVectors(required int) {
   132  	vecMax := required + required>>2 + vecBuffering
   133  	k.vectors[0] = make([]int, vecMax)
   134  	k.vectors[1] = make([]int, vecMax)
   135  }
   136  
   137  // Forward and Reverse D.P. Extension Routines
   138  // Called at the mid-point of trapezoid -- mid X [low,high], the extension
   139  // is computed to an end point and the lowest and highest diagonals
   140  // are recorded. These are returned in a partially filled DPHit
   141  // record, that will be merged with that returned for extension in the
   142  // opposite direction.
   143  func (k *kernel) traceForward(mid, low, high int) {
   144  	odd := false
   145  	var (
   146  		maxScore          int
   147  		maxLeft, maxRight int
   148  		maxI, maxJ        int
   149  		i, j              int
   150  	)
   151  
   152  	// Set basis from (mid,low) .. (mid,high).
   153  	if low < 0 {
   154  		low = 0
   155  	}
   156  	if low > k.target.Len() {
   157  		low = k.target.Len()
   158  	}
   159  	if high > k.target.Len() {
   160  		high = k.target.Len()
   161  	}
   162  	if high < low {
   163  		high = low
   164  	}
   165  
   166  	if required := (high - low) + k.MaxIGap; required >= len(k.vectors[0]) {
   167  		k.allocateVectors(required)
   168  	}
   169  
   170  	thisVector := &offsetSlice{
   171  		slice:  k.vectors[0],
   172  		offset: low,
   173  	}
   174  
   175  	for j = low; j <= high; j++ {
   176  		thisVector.set(j, 0)
   177  	}
   178  
   179  	high += k.MaxIGap
   180  	if high > k.target.Len() {
   181  		high = k.target.Len()
   182  	}
   183  
   184  	for ; j <= high; j++ {
   185  		thisVector.set(j, thisVector.at(j-1)-k.DiffCost)
   186  	}
   187  
   188  	maxScore = 0
   189  	maxRight = mid - low
   190  	maxLeft = mid - high
   191  	maxI = mid
   192  	maxJ = low
   193  
   194  	// Advance to next row.
   195  	thatVector := &offsetSlice{}
   196  	for i = mid; low <= high && i < k.query.Len(); i++ {
   197  		var cost, score int
   198  
   199  		*thatVector = *thisVector
   200  		if !odd {
   201  			thisVector.slice = k.vectors[1]
   202  		} else {
   203  			thisVector.slice = k.vectors[0]
   204  		}
   205  		thisVector.offset = low
   206  		odd = !odd
   207  
   208  		score = thatVector.at(low)
   209  		thisVector.set(low, score-k.DiffCost)
   210  		cost = thisVector.at(low)
   211  
   212  		for j = low + 1; j <= high; j++ {
   213  			var ratchet, temp int
   214  
   215  			temp = cost
   216  			cost = score
   217  			score = thatVector.at(j)
   218  			if k.query.Seq[i] == k.target.Seq[j-1] && k.valueToCode[k.query.Seq[i]] >= 0 {
   219  				cost += k.MatchCost
   220  			}
   221  
   222  			ratchet = cost
   223  			if score > ratchet {
   224  				ratchet = score
   225  			}
   226  			if temp > ratchet {
   227  				ratchet = temp
   228  			}
   229  
   230  			cost = ratchet - k.DiffCost
   231  			thisVector.set(j, cost)
   232  			if cost >= maxScore {
   233  				maxScore = cost
   234  				maxI = i + 1
   235  				maxJ = j
   236  			}
   237  		}
   238  
   239  		if j <= k.target.Len() {
   240  			var ratchet int
   241  
   242  			if k.query.Seq[i] == k.target.Seq[j-1] && k.valueToCode[k.query.Seq[i]] >= 0 {
   243  				score += k.MatchCost
   244  			}
   245  
   246  			ratchet = score
   247  			if cost > ratchet {
   248  				ratchet = cost
   249  			}
   250  
   251  			score = ratchet - k.DiffCost
   252  			thisVector.set(j, score)
   253  			if score > maxScore {
   254  				maxScore = score
   255  				maxI = i + 1
   256  				maxJ = j
   257  			}
   258  
   259  			for j++; j <= k.target.Len(); j++ {
   260  				score -= k.DiffCost
   261  				if score < maxScore-k.BlockCost {
   262  					break
   263  				}
   264  				thisVector.set(j, score)
   265  			}
   266  		}
   267  
   268  		high = j - 1
   269  
   270  		for low <= high && thisVector.at(low) < maxScore-k.BlockCost {
   271  			low++
   272  		}
   273  		for low <= high && thisVector.at(high) < maxScore-k.BlockCost {
   274  			high--
   275  		}
   276  
   277  		if required := (high - low) + 2; required > len(k.vectors[0]) {
   278  			k.allocateVectors(required)
   279  		}
   280  
   281  		if (i+1)-low > maxRight {
   282  			maxRight = (i + 1) - low
   283  		}
   284  		if (i+1)-high < maxLeft {
   285  			maxLeft = (i + 1) - high
   286  		}
   287  	}
   288  
   289  	k.lowEnd.Aepos = maxJ
   290  	k.lowEnd.Bepos = maxI
   291  	k.lowEnd.LowDiagonal = maxLeft
   292  	k.lowEnd.HighDiagonal = maxRight
   293  	k.lowEnd.Score = maxScore
   294  }
   295  
   296  func (k *kernel) traceReverse(top, low, high, bottom, xfactor int) {
   297  	odd := false
   298  	var (
   299  		maxScore          int
   300  		maxLeft, maxRight int
   301  		maxI, maxJ        int
   302  		i, j              int
   303  	)
   304  
   305  	// Set basis from (top,low) .. (top,high).
   306  	if low < 0 {
   307  		low = 0
   308  	}
   309  	if low > k.target.Len() {
   310  		low = k.target.Len()
   311  	}
   312  	if high > k.target.Len() {
   313  		high = k.target.Len()
   314  	}
   315  	if high < low {
   316  		high = low
   317  	}
   318  
   319  	if required := (high - low) + k.MaxIGap; required >= len(k.vectors[0]) {
   320  		k.allocateVectors(required)
   321  	}
   322  
   323  	thisVector := &offsetSlice{
   324  		slice:  k.vectors[0],
   325  		offset: high - (len(k.vectors[0]) - 1),
   326  	}
   327  	for j = high; j >= low; j-- {
   328  		thisVector.set(j, 0)
   329  	}
   330  
   331  	low -= k.MaxIGap
   332  	if low < 0 {
   333  		low = 0
   334  	}
   335  
   336  	for ; j >= low; j-- {
   337  		thisVector.set(j, thisVector.at(j+1)-k.DiffCost)
   338  	}
   339  
   340  	maxScore = 0
   341  	maxRight = top - low
   342  	maxLeft = top - high
   343  	maxI = top
   344  	maxJ = low
   345  
   346  	// Advance to next row.
   347  	if top-1 <= bottom {
   348  		xfactor = k.BlockCost
   349  	}
   350  
   351  	thatVector := &offsetSlice{}
   352  	for i = top - 1; low <= high && i >= 0; i-- {
   353  		var cost, score int
   354  
   355  		*thatVector = *thisVector
   356  		if !odd {
   357  			thisVector.slice = k.vectors[1]
   358  		} else {
   359  			thisVector.slice = k.vectors[0]
   360  		}
   361  		thisVector.offset = high - (len(k.vectors[0]) - 1)
   362  		odd = !odd
   363  
   364  		score = thatVector.at(high)
   365  		thisVector.set(high, score-k.DiffCost)
   366  		cost = thisVector.at(high)
   367  
   368  		for j = high - 1; j >= low; j-- {
   369  			var ratchet, temp int
   370  
   371  			temp = cost
   372  			cost = score
   373  			score = thatVector.at(j)
   374  			if k.query.Seq[i] == k.target.Seq[j] && k.valueToCode[k.query.Seq[i]] >= 0 {
   375  				cost += k.MatchCost
   376  			}
   377  
   378  			ratchet = cost
   379  			if score > ratchet {
   380  				ratchet = score
   381  			}
   382  			if temp > ratchet {
   383  				ratchet = temp
   384  			}
   385  
   386  			cost = ratchet - k.DiffCost
   387  			thisVector.set(j, cost)
   388  			if cost >= maxScore {
   389  				maxScore = cost
   390  				maxI = i
   391  				maxJ = j
   392  			}
   393  		}
   394  
   395  		if j >= 0 {
   396  			var ratchet int
   397  
   398  			if k.query.Seq[i] == k.target.Seq[j] && k.valueToCode[k.query.Seq[i]] >= 0 {
   399  				score += k.MatchCost
   400  			}
   401  
   402  			ratchet = score
   403  			if cost > ratchet {
   404  				ratchet = cost
   405  			}
   406  
   407  			score = ratchet - k.DiffCost
   408  			thisVector.set(j, score)
   409  			if score > maxScore {
   410  				maxScore = score
   411  				maxI = i
   412  				maxJ = j
   413  			}
   414  
   415  			for j--; j >= 0; j-- {
   416  				score -= k.DiffCost
   417  				if score < maxScore-xfactor {
   418  					break
   419  				}
   420  				thisVector.set(j, score)
   421  			}
   422  		}
   423  
   424  		low = j + 1
   425  
   426  		for low <= high && thisVector.at(low) < maxScore-xfactor {
   427  			low++
   428  		}
   429  		for low <= high && thisVector.at(high) < maxScore-xfactor {
   430  			high--
   431  		}
   432  
   433  		if i == bottom {
   434  			xfactor = k.BlockCost
   435  		}
   436  
   437  		if required := (high - low) + 2; required > len(k.vectors[0]) {
   438  			k.allocateVectors(required)
   439  		}
   440  
   441  		if i-low > maxRight {
   442  			maxRight = i - low
   443  		}
   444  		if i-high < maxLeft {
   445  			maxLeft = i - high
   446  		}
   447  	}
   448  
   449  	k.highEnd.Abpos = maxJ
   450  	k.highEnd.Bbpos = maxI
   451  	k.highEnd.LowDiagonal = maxLeft
   452  	k.highEnd.HighDiagonal = maxRight
   453  	k.highEnd.Score = maxScore
   454  }