github.com/biogo/biogo@v1.0.4/align/align.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  //go:generate ./genCode.sh
     6  
     7  // Package align provide basic sequence alignment types and helpers.
     8  package align
     9  
    10  import (
    11  	"github.com/biogo/biogo/alphabet"
    12  	"github.com/biogo/biogo/feat"
    13  	"github.com/biogo/biogo/seq"
    14  
    15  	"errors"
    16  	"fmt"
    17  )
    18  
    19  type AlphabetSlicer interface {
    20  	Alphabet() alphabet.Alphabet
    21  	Slice() alphabet.Slice
    22  }
    23  
    24  // An Aligner aligns the sequence data of two type-matching Slicers, returning an ordered
    25  // slice of features describing matching and mismatching segments. The sequences to be aligned
    26  // must have a valid gap letter in the first position of their alphabet; the alphabets
    27  // {DNA,RNA}{gapped,redundant} and Protein provided by the alphabet package satisfy this.
    28  type Aligner interface {
    29  	Align(reference, query AlphabetSlicer) ([]feat.Pair, error)
    30  }
    31  
    32  // A Linear is a basic linear gap penalty alignment description.
    33  // It is a square scoring matrix with the first column and first row specifying gap penalties.
    34  type Linear [][]int
    35  
    36  // An Affine is a basic affine gap penalty alignment description.
    37  type Affine struct {
    38  	Matrix  Linear
    39  	GapOpen int
    40  }
    41  
    42  var (
    43  	_ Aligner = SW{}
    44  	_ Aligner = NW{}
    45  )
    46  
    47  const (
    48  	diag = iota
    49  	up
    50  	left
    51  
    52  	gap = 0
    53  
    54  	minInt = -int(^uint(0)>>1) - 1
    55  )
    56  
    57  var (
    58  	ErrMismatchedTypes     = errors.New("align: mismatched sequence types")
    59  	ErrMismatchedAlphabets = errors.New("align: mismatched alphabets")
    60  	ErrNoAlphabet          = errors.New("align: no alphabet")
    61  	ErrNotGappedAlphabet   = errors.New("align: alphabet does not have gap at position 0")
    62  	ErrTypeNotHandled      = errors.New("align: sequence type not handled")
    63  	ErrMatrixNotSquare     = errors.New("align: scoring matrix is not square")
    64  )
    65  
    66  type ErrMatrixWrongSize struct {
    67  	Size int // size of the matrix
    68  	Len  int // length of the alphabet
    69  }
    70  
    71  func (e ErrMatrixWrongSize) Error() string {
    72  	return fmt.Sprintf("align: scoring matrix size %d does not match alphabet length %d", e.Size, e.Len)
    73  }
    74  
    75  func max3(a, b, c int) int {
    76  	if b > a {
    77  		a = b
    78  	}
    79  	if c > a {
    80  		return c
    81  	}
    82  	return a
    83  }
    84  
    85  func max2(a, b int) int {
    86  	if a > b {
    87  		return a
    88  	}
    89  	return b
    90  }
    91  
    92  func add(a, b int) int {
    93  	if a == minInt || b == minInt {
    94  		return minInt
    95  	}
    96  	return a + b
    97  }
    98  
    99  type feature struct {
   100  	start, end int
   101  	loc        feat.Feature
   102  }
   103  
   104  func (f feature) Name() string {
   105  	if f.loc != nil {
   106  		return f.loc.Name()
   107  	}
   108  	return ""
   109  }
   110  func (f feature) Description() string {
   111  	if f.loc != nil {
   112  		return f.loc.Description()
   113  	}
   114  	return ""
   115  }
   116  func (f feature) Location() feat.Feature { return f.loc }
   117  func (f feature) Start() int             { return f.start }
   118  func (f feature) End() int               { return f.end }
   119  func (f feature) Len() int               { return f.end - f.start }
   120  
   121  type featPair struct {
   122  	a, b  feature
   123  	score int
   124  }
   125  
   126  func (fp *featPair) Features() [2]feat.Feature { return [2]feat.Feature{fp.a, fp.b} }
   127  func (fp *featPair) Score() int                { return fp.score }
   128  func (fp *featPair) Invert()                   { fp.a, fp.b = fp.b, fp.a }
   129  func (fp *featPair) String() string {
   130  	switch {
   131  	case fp.a.start == fp.a.end:
   132  		return fmt.Sprintf("-/%s[%d,%d)=%d",
   133  			fp.b.Name(), fp.b.start, fp.b.end,
   134  			fp.score)
   135  	case fp.b.start == fp.b.end:
   136  		return fmt.Sprintf("%s[%d,%d)/-=%d",
   137  			fp.a.Name(), fp.a.start, fp.a.end,
   138  			fp.score)
   139  	}
   140  	return fmt.Sprintf("%s[%d,%d)/%s[%d,%d)=%d",
   141  		fp.a.Name(), fp.a.start, fp.a.end,
   142  		fp.b.Name(), fp.b.start, fp.b.end,
   143  		fp.score)
   144  }
   145  
   146  // Format returns a [2]alphabet.Slice representing the formatted alignment of a and b described by the
   147  // list of feature pairs in f, with gap used to fill gaps in the alignment.
   148  func Format(a, b seq.Slicer, f []feat.Pair, gap alphabet.Letter) [2]alphabet.Slice {
   149  	var as, aln [2]alphabet.Slice
   150  	for i, s := range [2]seq.Slicer{a, b} {
   151  		as[i] = s.Slice()
   152  		aln[i] = as[i].Make(0, 0)
   153  	}
   154  	for _, fs := range f {
   155  		fc := fs.Features()
   156  		for i := range aln {
   157  			if fc[i].Len() == 0 {
   158  				switch aln[i].(type) {
   159  				case alphabet.Letters:
   160  					aln[i] = aln[i].Append(alphabet.Letters(gap.Repeat(fc[1-i].Len())))
   161  				case alphabet.QLetters:
   162  					aln[i] = aln[i].Append(alphabet.QLetters(alphabet.QLetter{L: gap}.Repeat(fc[1-i].Len())))
   163  				}
   164  			} else {
   165  				aln[i] = aln[i].Append(as[i].Slice(fc[i].Start(), fc[i].End()))
   166  			}
   167  		}
   168  	}
   169  	return aln
   170  }