github.com/biogo/biogo@v1.0.4/align/align.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 //go:generate ./genCode.sh 6 7 // Package align provide basic sequence alignment types and helpers. 8 package align 9 10 import ( 11 "github.com/biogo/biogo/alphabet" 12 "github.com/biogo/biogo/feat" 13 "github.com/biogo/biogo/seq" 14 15 "errors" 16 "fmt" 17 ) 18 19 type AlphabetSlicer interface { 20 Alphabet() alphabet.Alphabet 21 Slice() alphabet.Slice 22 } 23 24 // An Aligner aligns the sequence data of two type-matching Slicers, returning an ordered 25 // slice of features describing matching and mismatching segments. The sequences to be aligned 26 // must have a valid gap letter in the first position of their alphabet; the alphabets 27 // {DNA,RNA}{gapped,redundant} and Protein provided by the alphabet package satisfy this. 28 type Aligner interface { 29 Align(reference, query AlphabetSlicer) ([]feat.Pair, error) 30 } 31 32 // A Linear is a basic linear gap penalty alignment description. 33 // It is a square scoring matrix with the first column and first row specifying gap penalties. 34 type Linear [][]int 35 36 // An Affine is a basic affine gap penalty alignment description. 37 type Affine struct { 38 Matrix Linear 39 GapOpen int 40 } 41 42 var ( 43 _ Aligner = SW{} 44 _ Aligner = NW{} 45 ) 46 47 const ( 48 diag = iota 49 up 50 left 51 52 gap = 0 53 54 minInt = -int(^uint(0)>>1) - 1 55 ) 56 57 var ( 58 ErrMismatchedTypes = errors.New("align: mismatched sequence types") 59 ErrMismatchedAlphabets = errors.New("align: mismatched alphabets") 60 ErrNoAlphabet = errors.New("align: no alphabet") 61 ErrNotGappedAlphabet = errors.New("align: alphabet does not have gap at position 0") 62 ErrTypeNotHandled = errors.New("align: sequence type not handled") 63 ErrMatrixNotSquare = errors.New("align: scoring matrix is not square") 64 ) 65 66 type ErrMatrixWrongSize struct { 67 Size int // size of the matrix 68 Len int // length of the alphabet 69 } 70 71 func (e ErrMatrixWrongSize) Error() string { 72 return fmt.Sprintf("align: scoring matrix size %d does not match alphabet length %d", e.Size, e.Len) 73 } 74 75 func max3(a, b, c int) int { 76 if b > a { 77 a = b 78 } 79 if c > a { 80 return c 81 } 82 return a 83 } 84 85 func max2(a, b int) int { 86 if a > b { 87 return a 88 } 89 return b 90 } 91 92 func add(a, b int) int { 93 if a == minInt || b == minInt { 94 return minInt 95 } 96 return a + b 97 } 98 99 type feature struct { 100 start, end int 101 loc feat.Feature 102 } 103 104 func (f feature) Name() string { 105 if f.loc != nil { 106 return f.loc.Name() 107 } 108 return "" 109 } 110 func (f feature) Description() string { 111 if f.loc != nil { 112 return f.loc.Description() 113 } 114 return "" 115 } 116 func (f feature) Location() feat.Feature { return f.loc } 117 func (f feature) Start() int { return f.start } 118 func (f feature) End() int { return f.end } 119 func (f feature) Len() int { return f.end - f.start } 120 121 type featPair struct { 122 a, b feature 123 score int 124 } 125 126 func (fp *featPair) Features() [2]feat.Feature { return [2]feat.Feature{fp.a, fp.b} } 127 func (fp *featPair) Score() int { return fp.score } 128 func (fp *featPair) Invert() { fp.a, fp.b = fp.b, fp.a } 129 func (fp *featPair) String() string { 130 switch { 131 case fp.a.start == fp.a.end: 132 return fmt.Sprintf("-/%s[%d,%d)=%d", 133 fp.b.Name(), fp.b.start, fp.b.end, 134 fp.score) 135 case fp.b.start == fp.b.end: 136 return fmt.Sprintf("%s[%d,%d)/-=%d", 137 fp.a.Name(), fp.a.start, fp.a.end, 138 fp.score) 139 } 140 return fmt.Sprintf("%s[%d,%d)/%s[%d,%d)=%d", 141 fp.a.Name(), fp.a.start, fp.a.end, 142 fp.b.Name(), fp.b.start, fp.b.end, 143 fp.score) 144 } 145 146 // Format returns a [2]alphabet.Slice representing the formatted alignment of a and b described by the 147 // list of feature pairs in f, with gap used to fill gaps in the alignment. 148 func Format(a, b seq.Slicer, f []feat.Pair, gap alphabet.Letter) [2]alphabet.Slice { 149 var as, aln [2]alphabet.Slice 150 for i, s := range [2]seq.Slicer{a, b} { 151 as[i] = s.Slice() 152 aln[i] = as[i].Make(0, 0) 153 } 154 for _, fs := range f { 155 fc := fs.Features() 156 for i := range aln { 157 if fc[i].Len() == 0 { 158 switch aln[i].(type) { 159 case alphabet.Letters: 160 aln[i] = aln[i].Append(alphabet.Letters(gap.Repeat(fc[1-i].Len()))) 161 case alphabet.QLetters: 162 aln[i] = aln[i].Append(alphabet.QLetters(alphabet.QLetter{L: gap}.Repeat(fc[1-i].Len()))) 163 } 164 } else { 165 aln[i] = aln[i].Append(as[i].Slice(fc[i].Start(), fc[i].End())) 166 } 167 } 168 } 169 return aln 170 }