github.com/biogo/biogo@v1.0.4/align/pals/dp/align.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package providing PALS dynamic programming alignment routines.
     6  package dp
     7  
     8  import (
     9  	"github.com/biogo/biogo/align/pals/filter"
    10  	"github.com/biogo/biogo/seq/linear"
    11  
    12  	"errors"
    13  	"sort"
    14  	"sync"
    15  )
    16  
    17  // A Params holds dynamic programming alignment parameters.
    18  type Params struct {
    19  	MinHitLength int
    20  	MinId        float64
    21  }
    22  
    23  // A Costs specifies dynamic programming behaviour.
    24  type Costs struct {
    25  	MaxIGap    int
    26  	DiffCost   int
    27  	SameCost   int
    28  	MatchCost  int
    29  	BlockCost  int
    30  	RMatchCost float64
    31  }
    32  
    33  // An Aligner provides allows local alignment of subsections of long sequences.
    34  type Aligner struct {
    35  	target, query *linear.Seq
    36  	k             int
    37  	minHitLength  int
    38  	minId         float64
    39  	segs          Hits
    40  	Costs         *Costs
    41  }
    42  
    43  // Create a new Aligner based on target and query sequences.
    44  func NewAligner(target, query *linear.Seq, k, minLength int, minId float64) *Aligner {
    45  	return &Aligner{
    46  		target:       target,
    47  		query:        query,
    48  		k:            k,
    49  		minHitLength: minLength,
    50  		minId:        minId,
    51  	}
    52  }
    53  
    54  // Align pairs of sequence segments defined by trapezoids.
    55  // Returns aligning segment pairs satisfying length and identity requirements.
    56  func (a *Aligner) AlignTraps(trapezoids filter.Trapezoids) Hits {
    57  	covered := make([]bool, len(trapezoids))
    58  
    59  	dp := &kernel{
    60  		target:      a.target,
    61  		query:       a.query,
    62  		valueToCode: a.target.Alpha.LetterIndex(),
    63  		trapezoids:  trapezoids,
    64  		covered:     covered,
    65  		minLen:      a.minHitLength,
    66  		maxDiff:     1 - a.minId,
    67  
    68  		Costs: *a.Costs,
    69  
    70  		result: make(chan Hit),
    71  	}
    72  	wg := &sync.WaitGroup{}
    73  	wg.Add(1)
    74  	var segs Hits
    75  	go func() {
    76  		defer wg.Done()
    77  		for h := range dp.result {
    78  			segs = append(segs, h)
    79  		}
    80  	}()
    81  	for i, t := range trapezoids {
    82  		if !dp.covered[i] && t.Top-t.Bottom >= a.k {
    83  			dp.slot = i
    84  			dp.alignRecursion(t)
    85  		}
    86  	}
    87  	close(dp.result)
    88  	wg.Wait()
    89  
    90  	/* Remove lower scoring segments that begin or end at
    91  	   the same point as a higher scoring segment.       */
    92  
    93  	if len(segs) > 0 {
    94  		var i, j int
    95  
    96  		sort.Sort(starts(segs))
    97  		for i = 0; i < len(segs); i = j {
    98  			for j = i + 1; j < len(segs); j++ {
    99  				if segs[j].Abpos != segs[i].Abpos {
   100  					break
   101  				}
   102  				if segs[j].Bbpos != segs[i].Bbpos {
   103  					break
   104  				}
   105  				if segs[j].Score > segs[i].Score {
   106  					segs[i].Score = -1
   107  					i = j
   108  				} else {
   109  					segs[j].Score = -1
   110  				}
   111  			}
   112  		}
   113  
   114  		sort.Sort(ends(segs))
   115  		for i = 0; i < len(segs); i = j {
   116  			for j = i + 1; j < len(segs); j++ {
   117  				if segs[j].Aepos != segs[i].Aepos {
   118  					break
   119  				}
   120  				if segs[j].Bepos != segs[i].Bepos {
   121  					break
   122  				}
   123  				if segs[j].Score > segs[i].Score {
   124  					segs[i].Score = -1
   125  					i = j
   126  				} else {
   127  					segs[j].Score = -1
   128  				}
   129  			}
   130  		}
   131  
   132  		found := 0
   133  		for i = 0; i < len(segs); i++ {
   134  			if segs[i].Score >= 0 {
   135  				segs[found] = segs[i]
   136  				found++
   137  			}
   138  		}
   139  		segs = segs[:found]
   140  	}
   141  
   142  	return segs
   143  }
   144  
   145  // Hit holds details of alignment result.
   146  type Hit struct {
   147  	Abpos, Bbpos              int     // Start coordinate of local alignment
   148  	Aepos, Bepos              int     // End coordinate of local alignment
   149  	LowDiagonal, HighDiagonal int     // Alignment is between (anti)diagonals LowDiagonal & HighDiagonal
   150  	Score                     int     // Score of alignment where match = SameCost, difference = -DiffCost
   151  	Error                     float64 // Lower bound on error rate of match
   152  }
   153  
   154  // DPHits is a collection of alignment results.
   155  type Hits []Hit
   156  
   157  // Returns the sums of alignment lengths.
   158  func (h Hits) Sum() (a, b int, err error) {
   159  	for _, hit := range h {
   160  		la, lb := hit.Aepos-hit.Abpos, hit.Bepos-hit.Bbpos
   161  		if la < 0 || lb < 0 {
   162  			return 0, 0, errors.New("dp: negative trapezoid area")
   163  		}
   164  		a, b = a+la, b+lb
   165  	}
   166  	return
   167  }