github.com/biogo/biogo@v1.0.4/seq/linear/qseq.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package linear
     6  
     7  import (
     8  	"github.com/biogo/biogo/alphabet"
     9  	"github.com/biogo/biogo/feat"
    10  	"github.com/biogo/biogo/seq"
    11  
    12  	"fmt"
    13  	"unicode"
    14  )
    15  
    16  // A QSeq is a basic linear sequence with Phred quality scores.
    17  type QSeq struct {
    18  	seq.Annotation
    19  	Seq       alphabet.QLetters
    20  	Threshold alphabet.Qphred // Threshold for returning valid letter.
    21  	QFilter   seq.QFilter     // How to represent below threshold letter.
    22  	Encode    alphabet.Encoding
    23  }
    24  
    25  // Interface guarantees
    26  var (
    27  	_ feat.Feature = (*QSeq)(nil)
    28  	_ seq.Sequence = (*QSeq)(nil)
    29  	_ seq.Scorer   = (*QSeq)(nil)
    30  )
    31  
    32  // NewQSeq create a new QSeq with the given id, letter sequence, alphabet and quality encoding.
    33  func NewQSeq(id string, ql []alphabet.QLetter, alpha alphabet.Alphabet, enc alphabet.Encoding) *QSeq {
    34  	return &QSeq{
    35  		Annotation: seq.Annotation{
    36  			ID:     id,
    37  			Alpha:  alpha,
    38  			Strand: seq.Plus,
    39  		},
    40  		Seq:       append(alphabet.QLetters(nil), ql...),
    41  		Encode:    enc,
    42  		Threshold: 3,
    43  		QFilter:   seq.AmbigFilter,
    44  	}
    45  }
    46  
    47  // Append append Letters to the sequence, the DefaultQphred value is used for quality scores.
    48  func (s *QSeq) AppendLetters(a ...alphabet.Letter) error {
    49  	l := s.Len()
    50  	s.Seq = append(s.Seq, make([]alphabet.QLetter, len(a))...)[:l]
    51  	for _, v := range a {
    52  		s.Seq = append(s.Seq, alphabet.QLetter{L: v, Q: seq.DefaultQphred})
    53  	}
    54  	return nil
    55  }
    56  
    57  // Append appends QLetters to the sequence.
    58  func (s *QSeq) AppendQLetters(a ...alphabet.QLetter) error {
    59  	s.Seq = append(s.Seq, a...)
    60  	return nil
    61  }
    62  
    63  // Slice returns the sequence data as a alphabet.Slice.
    64  func (s *QSeq) Slice() alphabet.Slice { return s.Seq }
    65  
    66  // SetSlice sets the sequence data represented by the sequence. SetSlice will panic if sl
    67  // is not a alphabet.QLetters.
    68  func (s *QSeq) SetSlice(sl alphabet.Slice) { s.Seq = sl.(alphabet.QLetters) }
    69  
    70  // At returns the letter at position pos.
    71  func (s *QSeq) At(i int) alphabet.QLetter {
    72  	return s.Seq[i-s.Offset]
    73  }
    74  
    75  // QEncode encodes the quality at position pos to a letter based on the sequence encoding setting.
    76  func (s *QSeq) QEncode(i int) byte {
    77  	return s.Seq[i-s.Offset].Q.Encode(s.Encode)
    78  }
    79  
    80  // Encoding returns the quality encoding scheme.
    81  func (s *QSeq) Encoding() alphabet.Encoding { return s.Encode }
    82  
    83  // SetEncoding sets the quality encoding scheme to e.
    84  func (s *QSeq) SetEncoding(e alphabet.Encoding) error { s.Encode = e; return nil }
    85  
    86  // EAt returns the probability of a sequence error at position pos.
    87  func (s *QSeq) EAt(i int) float64 {
    88  	return s.Seq[i-s.Offset].Q.ProbE()
    89  }
    90  
    91  // Set sets the letter at position pos to l.
    92  func (s *QSeq) Set(i int, l alphabet.QLetter) error {
    93  	s.Seq[i-s.Offset] = l
    94  	return nil
    95  }
    96  
    97  // SetE sets the quality at position pos to e to reflect the given p(Error).
    98  func (s *QSeq) SetE(i int, e float64) error {
    99  	s.Seq[i-s.Offset].Q = alphabet.Ephred(e)
   100  	return nil
   101  }
   102  
   103  // Len returns the length of the sequence.
   104  func (s *QSeq) Len() int { return len(s.Seq) }
   105  
   106  // Start return the start position of the sequence in coordinates relative to the
   107  // sequence location.
   108  func (s *QSeq) Start() int { return s.Offset }
   109  
   110  // End returns the end position of the sequence in coordinates relative to the
   111  // sequence location.
   112  func (s *QSeq) End() int { return s.Offset + s.Len() }
   113  
   114  // Validate validates the letters of the sequence according to the sequence alphabet.
   115  func (s *QSeq) Validate() (bool, int) {
   116  	for i, ql := range s.Seq {
   117  		if !s.Alpha.IsValid(ql.L) {
   118  			return false, i
   119  		}
   120  	}
   121  
   122  	return true, -1
   123  }
   124  
   125  // Clone returns a copy of the sequence.
   126  func (s *QSeq) Clone() seq.Sequence {
   127  	c := *s
   128  	c.Seq = append([]alphabet.QLetter(nil), s.Seq...)
   129  
   130  	return &c
   131  }
   132  
   133  // New returns an empty *QSeq sequence with the same alphabet.
   134  func (s *QSeq) New() seq.Sequence {
   135  	return &QSeq{Annotation: seq.Annotation{Alpha: s.Alpha}}
   136  }
   137  
   138  // RevComp reverse complements the sequence. RevComp will panic if the alphabet used by
   139  // the receiver is not a Complementor.
   140  func (s *QSeq) RevComp() {
   141  	l, comp := s.Seq, s.Alphabet().(alphabet.Complementor).ComplementTable()
   142  	i, j := 0, len(l)-1
   143  	for ; i < j; i, j = i+1, j-1 {
   144  		l[i].L, l[j].L = comp[l[j].L], comp[l[i].L]
   145  		l[i].Q, l[j].Q = l[j].Q, l[i].Q
   146  	}
   147  	if i == j {
   148  		l[i].L = comp[l[i].L]
   149  	}
   150  	s.Strand = -s.Strand
   151  }
   152  
   153  // Reverse reverses the order of letters in the the sequence without complementing them.
   154  func (s *QSeq) Reverse() {
   155  	l := s.Seq
   156  	for i, j := 0, len(l)-1; i < j; i, j = i+1, j-1 {
   157  		l[i], l[j] = l[j], l[i]
   158  	}
   159  	s.Strand = seq.None
   160  }
   161  
   162  // String returns a string representation of the sequence data only.
   163  func (s *QSeq) String() string {
   164  	cs := make([]alphabet.Letter, 0, len(s.Seq))
   165  	for _, ql := range s.Seq {
   166  		cs = append(cs, s.QFilter(s.Alpha, s.Threshold, ql))
   167  	}
   168  
   169  	return alphabet.Letters(cs).String()
   170  }
   171  
   172  func min(a, b int) int {
   173  	if a < b {
   174  		return a
   175  	}
   176  	return b
   177  }
   178  
   179  // Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's'
   180  // (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support
   181  // truncated output via the verb's precision. Fasta format supports sequence line
   182  // specification via the verb's width field. Fastq format supports optional inclusion
   183  // of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#'
   184  // flag for Go syntax output. The 's' and 'v' formats support the '-' flag for
   185  // omission of the sequence name.
   186  func (s *QSeq) Format(fs fmt.State, c rune) {
   187  	if s == nil {
   188  		fmt.Fprint(fs, "<nil>")
   189  		return
   190  	}
   191  	var (
   192  		w, wOk = fs.Width()
   193  		p, pOk = fs.Precision()
   194  		buf    []alphabet.QLetter
   195  	)
   196  	if pOk {
   197  		buf = s.Seq[:min(p, len(s.Seq))]
   198  	} else {
   199  		buf = s.Seq
   200  	}
   201  
   202  	switch c {
   203  	case 'v':
   204  		if fs.Flag('#') {
   205  			fmt.Fprintf(fs, "&%#v", *s)
   206  			return
   207  		}
   208  		fallthrough
   209  	case 's':
   210  		if !fs.Flag('-') {
   211  			fmt.Fprintf(fs, "%q ", s.ID)
   212  		}
   213  		for _, ql := range buf {
   214  			fmt.Fprintf(fs, "%c", s.QFilter(s.Alpha, s.Threshold, ql))
   215  		}
   216  		if pOk && p < s.Len() {
   217  			fmt.Fprint(fs, "...")
   218  		}
   219  	case 'a':
   220  		s.formatDescLineTo(fs, '>')
   221  		for i, ql := range buf {
   222  			fmt.Fprintf(fs, "%c", s.QFilter(s.Alpha, s.Threshold, ql))
   223  			if wOk && i < s.Len()-1 && i%w == w-1 {
   224  				fmt.Fprintln(fs)
   225  			}
   226  		}
   227  		if pOk && p < s.Len() {
   228  			fmt.Fprint(fs, "...")
   229  		}
   230  	case 'q':
   231  		s.formatDescLineTo(fs, '@')
   232  		for _, ql := range buf {
   233  			fmt.Fprintf(fs, "%c", s.QFilter(s.Alpha, s.Threshold, ql))
   234  		}
   235  		if pOk && p < s.Len() {
   236  			fmt.Fprintln(fs, "...")
   237  		} else {
   238  			fmt.Fprintln(fs)
   239  		}
   240  		if fs.Flag('+') {
   241  			s.formatDescLineTo(fs, '+')
   242  		} else {
   243  			fmt.Fprintln(fs, "+")
   244  		}
   245  		for _, ql := range buf {
   246  			e := ql.Q.Encode(s.Encode)
   247  			if e >= unicode.MaxASCII {
   248  				e = unicode.MaxASCII - 1
   249  			}
   250  			fmt.Fprintf(fs, "%c", e)
   251  		}
   252  		if pOk && p < s.Len() {
   253  			fmt.Fprint(fs, "...")
   254  		}
   255  	default:
   256  		fmt.Fprintf(fs, "%%!%c(linear.QSeq=%.10s)", c, s)
   257  	}
   258  }
   259  
   260  func (s *QSeq) formatDescLineTo(fs fmt.State, p rune) {
   261  	fmt.Fprintf(fs, "%c%s", p, s.ID)
   262  	if s.Desc != "" {
   263  		fmt.Fprintf(fs, " %s", s.Desc)
   264  	}
   265  	fmt.Fprintln(fs)
   266  }