github.com/biogo/biogo@v1.0.4/seq/sequtils/utils.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package sequtils provides generic functions for manipulation of biogo/seq/... types. 6 package sequtils 7 8 import ( 9 "github.com/biogo/biogo/alphabet" 10 "github.com/biogo/biogo/feat" 11 "github.com/biogo/biogo/seq" 12 13 "errors" 14 "sort" 15 ) 16 17 // A Joinable can be joined to another of the same concrete type using the Join function. 18 type Joinable interface { 19 SetOffset(int) error 20 Slice() alphabet.Slice 21 SetSlice(alphabet.Slice) 22 } 23 24 // Join joins a and be with the target location of b specified by where. The offset of dst 25 // will be updated if src is prepended. Join will panic if dst and src do not hold the same 26 // concrete Slice type. Circular sequences cannot be joined. 27 func Join(dst, src Joinable, where int) error { 28 dstC, dstOk := dst.(seq.Conformationer) 29 srcC, srcOk := src.(seq.Conformationer) 30 switch { 31 case dstOk && dstC.Conformation() > feat.Linear, srcOk && srcC.Conformation() > feat.Linear: 32 return errors.New("sequtils: cannot join circular sequence") 33 } 34 35 o := dst 36 if where == seq.End { 37 src, dst = dst, src 38 } 39 dstSl, srcSl := dst.Slice(), src.Slice() 40 srcLen := srcSl.Len() 41 if where == seq.Start { 42 dst.SetOffset(-srcLen) 43 } 44 t := dst.Slice().Make(srcLen, srcLen+dstSl.Len()) 45 t.Copy(srcSl) 46 o.SetSlice(t.Append(dstSl)) 47 return nil 48 } 49 50 // A Sliceable can be truncated, stitched and composed. 51 type Sliceable interface { 52 Start() int 53 End() int 54 SetOffset(int) error 55 Slice() alphabet.Slice 56 SetSlice(alphabet.Slice) 57 } 58 59 // Truncate performs a truncation on src from start to end and places the result in dst. 60 // The conformation of dst is set to linear and the offset is set to start. If dst and src 61 // are not equal, a copy of the truncation is allocated. Only circular sequences can be 62 // truncated with start > end. 63 func Truncate(dst, src Sliceable, start, end int) error { 64 var ( 65 sl = src.Slice() 66 offset = src.Start() 67 ) 68 if start < offset || end > src.End() { 69 return errors.New("sequtils: index out of range") 70 } 71 if start <= end { 72 if dst == src { 73 dst.SetSlice(sl.Slice(start-offset, end-offset)) 74 } else { 75 dst.SetSlice(sl.Make(0, end-start).Append(sl.Slice(start-offset, end-offset))) 76 } 77 dst.SetOffset(start) 78 if dst, ok := dst.(seq.ConformationSetter); ok { 79 dst.SetConformation(feat.Linear) 80 } 81 return nil 82 } 83 84 if src, ok := src.(seq.Conformationer); !ok || src.Conformation() == feat.Linear { 85 return errors.New("sequtils: start position greater than end position for linear sequence") 86 } 87 if end < offset || start > src.End() { 88 return errors.New("sequtils: index out of range") 89 } 90 t := sl.Make(sl.Len()-start+offset, sl.Len()+end-start) 91 t.Copy(sl.Slice(start-offset, sl.Len())) 92 dst.SetSlice(t.Append(sl.Slice(0, end-offset))) 93 dst.SetOffset(start) 94 if dst, ok := dst.(seq.ConformationSetter); ok { 95 dst.SetConformation(feat.Linear) 96 } 97 98 return nil 99 } 100 101 type feats []feat.Feature 102 103 func (f feats) Len() int { return len(f) } 104 func (f feats) Less(i, j int) bool { return f[i].Start() < f[j].Start() } 105 func (f feats) Swap(i, j int) { f[i], f[j] = f[j], f[i] } 106 107 func max(a, b int) int { 108 if a > b { 109 return a 110 } 111 return b 112 } 113 func min(a, b int) int { 114 if a < b { 115 return a 116 } 117 return b 118 } 119 120 // Stitch produces a subsequence of src defined by fs and places the the result in dst. 121 // The subsequences are guaranteed to be in order and non-overlapping even if not provided as such. 122 // Stitching a circular sequence returns a linear sequence. 123 func Stitch(dst, src Sliceable, fs feat.Set) error { 124 var ( 125 sl = src.Slice() 126 offset = src.Start() 127 ff = feats(fs.Features()) 128 ) 129 for _, f := range ff { 130 if f.End() < f.Start() { 131 return errors.New("sequtils: feature end < feature start") 132 } 133 } 134 ff = append(feats(nil), ff...) 135 sort.Sort(ff) 136 // FIXME Does not correctly deal with circular sequences and feature sets. 137 // Range over ff if src is circular and and trunc at start and end, do checks to 138 // see if feature splits on origin and rearrange tail to front. 139 140 pLen := sl.Len() 141 end := pLen + offset 142 143 type fi struct{ s, e int } 144 var ( 145 fsp = make([]*fi, 0, len(ff)) 146 csp *fi 147 ) 148 for i, f := range ff { 149 if s := f.Start(); i == 0 || s > csp.e { 150 csp = &fi{s: s, e: f.End()} 151 fsp = append(fsp, csp) 152 } else { 153 csp.e = max(csp.e, f.End()) 154 } 155 } 156 157 var l int 158 for _, f := range fsp { 159 l += max(0, min(f.e, end)-max(f.s, offset)) 160 } 161 t := sl.Make(0, l) 162 163 for _, f := range fsp { 164 fs, fe := max(f.s-offset, 0), min(f.e-offset, pLen) 165 if fs >= fe { 166 continue 167 } 168 t = t.Append(sl.Slice(fs, fe)) 169 } 170 171 dst.SetSlice(t) 172 if dst, ok := dst.(seq.ConformationSetter); ok { 173 dst.SetConformation(feat.Linear) 174 } 175 dst.SetOffset(0) 176 177 return nil 178 } 179 180 type SliceReverser interface { 181 Sliceable 182 New() seq.Sequence 183 Alphabet() alphabet.Alphabet 184 SetAlphabet(alphabet.Alphabet) error 185 RevComp() 186 Reverse() 187 } 188 189 // Compose produces a composition of src defined by the features in fs. The subparts of 190 // the composition may be out of order and if features in fs specify orientation may be 191 // reversed or reverse complemented depending on the src - if src is a SliceReverser and 192 // its alphabet is a Complementor the segment will be reverse complemented, if the alphabte 193 // is not a Complementor these segments will only be reversed. If src is not a SliceREverser 194 // and a reverse segment is specified an error is returned. 195 // Composing a circular sequence returns a linear sequence. 196 func Compose(dst, src Sliceable, fs feat.Set) error { 197 var ( 198 sl = src.Slice() 199 offset = src.Start() 200 ff = feats(fs.Features()) 201 ) 202 203 pLen := sl.Len() 204 end := pLen + offset 205 206 t := make([]alphabet.Slice, len(ff)) 207 var tl int 208 for i, f := range ff { 209 if f.End() < f.Start() { 210 return errors.New("sequtils: feature end < feature start") 211 } 212 l := min(f.End(), end) - max(f.Start(), offset) 213 tl += l 214 t[i] = sl.Make(l, l) 215 t[i].Copy(sl.Slice(max(f.Start()-offset, 0), min(f.End()-offset, pLen))) 216 } 217 218 c := sl.Make(0, tl) 219 var r SliceReverser 220 for i, ts := range t { 221 if f, ok := ff[i].(feat.Orienter); ok && f.Orientation() == feat.Reverse { 222 switch src := src.(type) { 223 case SliceReverser: 224 if r == nil { 225 r = src.New().(SliceReverser) 226 if _, ok := src.Alphabet().(alphabet.Complementor); ok { 227 r.SetAlphabet(src.Alphabet()) 228 r.SetSlice(ts) 229 r.RevComp() 230 } else { 231 r.SetSlice(ts) 232 r.Reverse() 233 } 234 } 235 default: 236 return errors.New("sequtils: unable to reverse segment during compose") 237 } 238 c = c.Append(r.Slice()) 239 } else { 240 c = c.Append(ts) 241 } 242 } 243 244 dst.SetSlice(c) 245 if dst, ok := dst.(seq.ConformationSetter); ok { 246 dst.SetConformation(feat.Linear) 247 } 248 dst.SetOffset(0) 249 250 return nil 251 } 252 253 // A QualityFeature describes a segment of sequence quality information. EAt() called with 254 // column values within Start() and End() is expected to return valid error probabilities for 255 // the zero'th row position. 256 type QualityFeature interface { 257 feat.Feature 258 EAt(int) float64 259 } 260 261 // Trim uses the modified-Mott trimming function to determine the start and end positions 262 // of good sequence. http://www.phrap.org/phredphrap/phred.html 263 func Trim(q QualityFeature, limit float64) (start, end int) { 264 var sum, max float64 265 for i := q.Start(); i < q.End(); i++ { 266 sum += limit - q.EAt(i) 267 if sum < 0 { 268 sum, start = 0, i+1 269 } 270 if sum >= max { 271 max, end = sum, i+1 272 } 273 } 274 return 275 }