github.com/biogo/biogo@v1.0.4/seq/sequtils/utils.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package sequtils provides generic functions for manipulation of biogo/seq/... types.
     6  package sequtils
     7  
     8  import (
     9  	"github.com/biogo/biogo/alphabet"
    10  	"github.com/biogo/biogo/feat"
    11  	"github.com/biogo/biogo/seq"
    12  
    13  	"errors"
    14  	"sort"
    15  )
    16  
    17  // A Joinable can be joined to another of the same concrete type using the Join function.
    18  type Joinable interface {
    19  	SetOffset(int) error
    20  	Slice() alphabet.Slice
    21  	SetSlice(alphabet.Slice)
    22  }
    23  
    24  // Join joins a and be with the target location of b specified by where. The offset of dst
    25  // will be updated if src is prepended. Join will panic if dst and src do not hold the same
    26  // concrete Slice type. Circular sequences cannot be joined.
    27  func Join(dst, src Joinable, where int) error {
    28  	dstC, dstOk := dst.(seq.Conformationer)
    29  	srcC, srcOk := src.(seq.Conformationer)
    30  	switch {
    31  	case dstOk && dstC.Conformation() > feat.Linear, srcOk && srcC.Conformation() > feat.Linear:
    32  		return errors.New("sequtils: cannot join circular sequence")
    33  	}
    34  
    35  	o := dst
    36  	if where == seq.End {
    37  		src, dst = dst, src
    38  	}
    39  	dstSl, srcSl := dst.Slice(), src.Slice()
    40  	srcLen := srcSl.Len()
    41  	if where == seq.Start {
    42  		dst.SetOffset(-srcLen)
    43  	}
    44  	t := dst.Slice().Make(srcLen, srcLen+dstSl.Len())
    45  	t.Copy(srcSl)
    46  	o.SetSlice(t.Append(dstSl))
    47  	return nil
    48  }
    49  
    50  // A Sliceable can be truncated, stitched and composed.
    51  type Sliceable interface {
    52  	Start() int
    53  	End() int
    54  	SetOffset(int) error
    55  	Slice() alphabet.Slice
    56  	SetSlice(alphabet.Slice)
    57  }
    58  
    59  // Truncate performs a truncation on src from start to end and places the result in dst.
    60  // The conformation of dst is set to linear and the offset is set to start. If dst and src
    61  // are not equal, a copy of the truncation is allocated. Only circular sequences can be
    62  // truncated with start > end.
    63  func Truncate(dst, src Sliceable, start, end int) error {
    64  	var (
    65  		sl     = src.Slice()
    66  		offset = src.Start()
    67  	)
    68  	if start < offset || end > src.End() {
    69  		return errors.New("sequtils: index out of range")
    70  	}
    71  	if start <= end {
    72  		if dst == src {
    73  			dst.SetSlice(sl.Slice(start-offset, end-offset))
    74  		} else {
    75  			dst.SetSlice(sl.Make(0, end-start).Append(sl.Slice(start-offset, end-offset)))
    76  		}
    77  		dst.SetOffset(start)
    78  		if dst, ok := dst.(seq.ConformationSetter); ok {
    79  			dst.SetConformation(feat.Linear)
    80  		}
    81  		return nil
    82  	}
    83  
    84  	if src, ok := src.(seq.Conformationer); !ok || src.Conformation() == feat.Linear {
    85  		return errors.New("sequtils: start position greater than end position for linear sequence")
    86  	}
    87  	if end < offset || start > src.End() {
    88  		return errors.New("sequtils: index out of range")
    89  	}
    90  	t := sl.Make(sl.Len()-start+offset, sl.Len()+end-start)
    91  	t.Copy(sl.Slice(start-offset, sl.Len()))
    92  	dst.SetSlice(t.Append(sl.Slice(0, end-offset)))
    93  	dst.SetOffset(start)
    94  	if dst, ok := dst.(seq.ConformationSetter); ok {
    95  		dst.SetConformation(feat.Linear)
    96  	}
    97  
    98  	return nil
    99  }
   100  
   101  type feats []feat.Feature
   102  
   103  func (f feats) Len() int           { return len(f) }
   104  func (f feats) Less(i, j int) bool { return f[i].Start() < f[j].Start() }
   105  func (f feats) Swap(i, j int)      { f[i], f[j] = f[j], f[i] }
   106  
   107  func max(a, b int) int {
   108  	if a > b {
   109  		return a
   110  	}
   111  	return b
   112  }
   113  func min(a, b int) int {
   114  	if a < b {
   115  		return a
   116  	}
   117  	return b
   118  }
   119  
   120  // Stitch produces a subsequence of src defined by fs and places the the result in dst.
   121  // The subsequences are guaranteed to be in order and non-overlapping even if not provided as such.
   122  // Stitching a circular sequence returns a linear sequence.
   123  func Stitch(dst, src Sliceable, fs feat.Set) error {
   124  	var (
   125  		sl     = src.Slice()
   126  		offset = src.Start()
   127  		ff     = feats(fs.Features())
   128  	)
   129  	for _, f := range ff {
   130  		if f.End() < f.Start() {
   131  			return errors.New("sequtils: feature end < feature start")
   132  		}
   133  	}
   134  	ff = append(feats(nil), ff...)
   135  	sort.Sort(ff)
   136  	// FIXME Does not correctly deal with circular sequences and feature sets.
   137  	// Range over ff if src is circular and  and trunc at start and end, do checks to
   138  	// see if feature splits on origin and rearrange tail to front.
   139  
   140  	pLen := sl.Len()
   141  	end := pLen + offset
   142  
   143  	type fi struct{ s, e int }
   144  	var (
   145  		fsp = make([]*fi, 0, len(ff))
   146  		csp *fi
   147  	)
   148  	for i, f := range ff {
   149  		if s := f.Start(); i == 0 || s > csp.e {
   150  			csp = &fi{s: s, e: f.End()}
   151  			fsp = append(fsp, csp)
   152  		} else {
   153  			csp.e = max(csp.e, f.End())
   154  		}
   155  	}
   156  
   157  	var l int
   158  	for _, f := range fsp {
   159  		l += max(0, min(f.e, end)-max(f.s, offset))
   160  	}
   161  	t := sl.Make(0, l)
   162  
   163  	for _, f := range fsp {
   164  		fs, fe := max(f.s-offset, 0), min(f.e-offset, pLen)
   165  		if fs >= fe {
   166  			continue
   167  		}
   168  		t = t.Append(sl.Slice(fs, fe))
   169  	}
   170  
   171  	dst.SetSlice(t)
   172  	if dst, ok := dst.(seq.ConformationSetter); ok {
   173  		dst.SetConformation(feat.Linear)
   174  	}
   175  	dst.SetOffset(0)
   176  
   177  	return nil
   178  }
   179  
   180  type SliceReverser interface {
   181  	Sliceable
   182  	New() seq.Sequence
   183  	Alphabet() alphabet.Alphabet
   184  	SetAlphabet(alphabet.Alphabet) error
   185  	RevComp()
   186  	Reverse()
   187  }
   188  
   189  // Compose produces a composition of src defined by the features in fs. The subparts of
   190  // the composition may be out of order and if features in fs specify orientation may be
   191  // reversed or reverse complemented depending on the src - if src is a SliceReverser and
   192  // its alphabet is a Complementor the segment will be reverse complemented, if the alphabte
   193  // is not a Complementor these segments will only be reversed. If src is not a SliceREverser
   194  // and a reverse segment is specified an error is returned.
   195  // Composing a circular sequence returns a linear sequence.
   196  func Compose(dst, src Sliceable, fs feat.Set) error {
   197  	var (
   198  		sl     = src.Slice()
   199  		offset = src.Start()
   200  		ff     = feats(fs.Features())
   201  	)
   202  
   203  	pLen := sl.Len()
   204  	end := pLen + offset
   205  
   206  	t := make([]alphabet.Slice, len(ff))
   207  	var tl int
   208  	for i, f := range ff {
   209  		if f.End() < f.Start() {
   210  			return errors.New("sequtils: feature end < feature start")
   211  		}
   212  		l := min(f.End(), end) - max(f.Start(), offset)
   213  		tl += l
   214  		t[i] = sl.Make(l, l)
   215  		t[i].Copy(sl.Slice(max(f.Start()-offset, 0), min(f.End()-offset, pLen)))
   216  	}
   217  
   218  	c := sl.Make(0, tl)
   219  	var r SliceReverser
   220  	for i, ts := range t {
   221  		if f, ok := ff[i].(feat.Orienter); ok && f.Orientation() == feat.Reverse {
   222  			switch src := src.(type) {
   223  			case SliceReverser:
   224  				if r == nil {
   225  					r = src.New().(SliceReverser)
   226  					if _, ok := src.Alphabet().(alphabet.Complementor); ok {
   227  						r.SetAlphabet(src.Alphabet())
   228  						r.SetSlice(ts)
   229  						r.RevComp()
   230  					} else {
   231  						r.SetSlice(ts)
   232  						r.Reverse()
   233  					}
   234  				}
   235  			default:
   236  				return errors.New("sequtils: unable to reverse segment during compose")
   237  			}
   238  			c = c.Append(r.Slice())
   239  		} else {
   240  			c = c.Append(ts)
   241  		}
   242  	}
   243  
   244  	dst.SetSlice(c)
   245  	if dst, ok := dst.(seq.ConformationSetter); ok {
   246  		dst.SetConformation(feat.Linear)
   247  	}
   248  	dst.SetOffset(0)
   249  
   250  	return nil
   251  }
   252  
   253  // A QualityFeature describes a segment of sequence quality information. EAt() called with
   254  // column values within Start() and End() is expected to return valid error probabilities for
   255  // the zero'th row position.
   256  type QualityFeature interface {
   257  	feat.Feature
   258  	EAt(int) float64
   259  }
   260  
   261  // Trim uses the modified-Mott trimming function to determine the start and end positions
   262  // of good sequence. http://www.phrap.org/phredphrap/phred.html
   263  func Trim(q QualityFeature, limit float64) (start, end int) {
   264  	var sum, max float64
   265  	for i := q.Start(); i < q.End(); i++ {
   266  		sum += limit - q.EAt(i)
   267  		if sum < 0 {
   268  			sum, start = 0, i+1
   269  		}
   270  		if sum >= max {
   271  			max, end = sum, i+1
   272  		}
   273  	}
   274  	return
   275  }