github.com/biogo/biogo@v1.0.4/seq/linear/qseq.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package linear 6 7 import ( 8 "github.com/biogo/biogo/alphabet" 9 "github.com/biogo/biogo/feat" 10 "github.com/biogo/biogo/seq" 11 12 "fmt" 13 "unicode" 14 ) 15 16 // A QSeq is a basic linear sequence with Phred quality scores. 17 type QSeq struct { 18 seq.Annotation 19 Seq alphabet.QLetters 20 Threshold alphabet.Qphred // Threshold for returning valid letter. 21 QFilter seq.QFilter // How to represent below threshold letter. 22 Encode alphabet.Encoding 23 } 24 25 // Interface guarantees 26 var ( 27 _ feat.Feature = (*QSeq)(nil) 28 _ seq.Sequence = (*QSeq)(nil) 29 _ seq.Scorer = (*QSeq)(nil) 30 ) 31 32 // NewQSeq create a new QSeq with the given id, letter sequence, alphabet and quality encoding. 33 func NewQSeq(id string, ql []alphabet.QLetter, alpha alphabet.Alphabet, enc alphabet.Encoding) *QSeq { 34 return &QSeq{ 35 Annotation: seq.Annotation{ 36 ID: id, 37 Alpha: alpha, 38 Strand: seq.Plus, 39 }, 40 Seq: append(alphabet.QLetters(nil), ql...), 41 Encode: enc, 42 Threshold: 3, 43 QFilter: seq.AmbigFilter, 44 } 45 } 46 47 // Append append Letters to the sequence, the DefaultQphred value is used for quality scores. 48 func (s *QSeq) AppendLetters(a ...alphabet.Letter) error { 49 l := s.Len() 50 s.Seq = append(s.Seq, make([]alphabet.QLetter, len(a))...)[:l] 51 for _, v := range a { 52 s.Seq = append(s.Seq, alphabet.QLetter{L: v, Q: seq.DefaultQphred}) 53 } 54 return nil 55 } 56 57 // Append appends QLetters to the sequence. 58 func (s *QSeq) AppendQLetters(a ...alphabet.QLetter) error { 59 s.Seq = append(s.Seq, a...) 60 return nil 61 } 62 63 // Slice returns the sequence data as a alphabet.Slice. 64 func (s *QSeq) Slice() alphabet.Slice { return s.Seq } 65 66 // SetSlice sets the sequence data represented by the sequence. SetSlice will panic if sl 67 // is not a alphabet.QLetters. 68 func (s *QSeq) SetSlice(sl alphabet.Slice) { s.Seq = sl.(alphabet.QLetters) } 69 70 // At returns the letter at position pos. 71 func (s *QSeq) At(i int) alphabet.QLetter { 72 return s.Seq[i-s.Offset] 73 } 74 75 // QEncode encodes the quality at position pos to a letter based on the sequence encoding setting. 76 func (s *QSeq) QEncode(i int) byte { 77 return s.Seq[i-s.Offset].Q.Encode(s.Encode) 78 } 79 80 // Encoding returns the quality encoding scheme. 81 func (s *QSeq) Encoding() alphabet.Encoding { return s.Encode } 82 83 // SetEncoding sets the quality encoding scheme to e. 84 func (s *QSeq) SetEncoding(e alphabet.Encoding) error { s.Encode = e; return nil } 85 86 // EAt returns the probability of a sequence error at position pos. 87 func (s *QSeq) EAt(i int) float64 { 88 return s.Seq[i-s.Offset].Q.ProbE() 89 } 90 91 // Set sets the letter at position pos to l. 92 func (s *QSeq) Set(i int, l alphabet.QLetter) error { 93 s.Seq[i-s.Offset] = l 94 return nil 95 } 96 97 // SetE sets the quality at position pos to e to reflect the given p(Error). 98 func (s *QSeq) SetE(i int, e float64) error { 99 s.Seq[i-s.Offset].Q = alphabet.Ephred(e) 100 return nil 101 } 102 103 // Len returns the length of the sequence. 104 func (s *QSeq) Len() int { return len(s.Seq) } 105 106 // Start return the start position of the sequence in coordinates relative to the 107 // sequence location. 108 func (s *QSeq) Start() int { return s.Offset } 109 110 // End returns the end position of the sequence in coordinates relative to the 111 // sequence location. 112 func (s *QSeq) End() int { return s.Offset + s.Len() } 113 114 // Validate validates the letters of the sequence according to the sequence alphabet. 115 func (s *QSeq) Validate() (bool, int) { 116 for i, ql := range s.Seq { 117 if !s.Alpha.IsValid(ql.L) { 118 return false, i 119 } 120 } 121 122 return true, -1 123 } 124 125 // Clone returns a copy of the sequence. 126 func (s *QSeq) Clone() seq.Sequence { 127 c := *s 128 c.Seq = append([]alphabet.QLetter(nil), s.Seq...) 129 130 return &c 131 } 132 133 // New returns an empty *QSeq sequence with the same alphabet. 134 func (s *QSeq) New() seq.Sequence { 135 return &QSeq{Annotation: seq.Annotation{Alpha: s.Alpha}} 136 } 137 138 // RevComp reverse complements the sequence. RevComp will panic if the alphabet used by 139 // the receiver is not a Complementor. 140 func (s *QSeq) RevComp() { 141 l, comp := s.Seq, s.Alphabet().(alphabet.Complementor).ComplementTable() 142 i, j := 0, len(l)-1 143 for ; i < j; i, j = i+1, j-1 { 144 l[i].L, l[j].L = comp[l[j].L], comp[l[i].L] 145 l[i].Q, l[j].Q = l[j].Q, l[i].Q 146 } 147 if i == j { 148 l[i].L = comp[l[i].L] 149 } 150 s.Strand = -s.Strand 151 } 152 153 // Reverse reverses the order of letters in the the sequence without complementing them. 154 func (s *QSeq) Reverse() { 155 l := s.Seq 156 for i, j := 0, len(l)-1; i < j; i, j = i+1, j-1 { 157 l[i], l[j] = l[j], l[i] 158 } 159 s.Strand = seq.None 160 } 161 162 // String returns a string representation of the sequence data only. 163 func (s *QSeq) String() string { 164 cs := make([]alphabet.Letter, 0, len(s.Seq)) 165 for _, ql := range s.Seq { 166 cs = append(cs, s.QFilter(s.Alpha, s.Threshold, ql)) 167 } 168 169 return alphabet.Letters(cs).String() 170 } 171 172 func min(a, b int) int { 173 if a < b { 174 return a 175 } 176 return b 177 } 178 179 // Format is a support routine for fmt.Formatter. It accepts the formats 'v' and 's' 180 // (string), 'a' (fasta) and 'q' (fastq). String, fasta and fastq formats support 181 // truncated output via the verb's precision. Fasta format supports sequence line 182 // specification via the verb's width field. Fastq format supports optional inclusion 183 // of the '+' line descriptor line with the '+' flag. The 'v' verb supports the '#' 184 // flag for Go syntax output. The 's' and 'v' formats support the '-' flag for 185 // omission of the sequence name. 186 func (s *QSeq) Format(fs fmt.State, c rune) { 187 if s == nil { 188 fmt.Fprint(fs, "<nil>") 189 return 190 } 191 var ( 192 w, wOk = fs.Width() 193 p, pOk = fs.Precision() 194 buf []alphabet.QLetter 195 ) 196 if pOk { 197 buf = s.Seq[:min(p, len(s.Seq))] 198 } else { 199 buf = s.Seq 200 } 201 202 switch c { 203 case 'v': 204 if fs.Flag('#') { 205 fmt.Fprintf(fs, "&%#v", *s) 206 return 207 } 208 fallthrough 209 case 's': 210 if !fs.Flag('-') { 211 fmt.Fprintf(fs, "%q ", s.ID) 212 } 213 for _, ql := range buf { 214 fmt.Fprintf(fs, "%c", s.QFilter(s.Alpha, s.Threshold, ql)) 215 } 216 if pOk && p < s.Len() { 217 fmt.Fprint(fs, "...") 218 } 219 case 'a': 220 s.formatDescLineTo(fs, '>') 221 for i, ql := range buf { 222 fmt.Fprintf(fs, "%c", s.QFilter(s.Alpha, s.Threshold, ql)) 223 if wOk && i < s.Len()-1 && i%w == w-1 { 224 fmt.Fprintln(fs) 225 } 226 } 227 if pOk && p < s.Len() { 228 fmt.Fprint(fs, "...") 229 } 230 case 'q': 231 s.formatDescLineTo(fs, '@') 232 for _, ql := range buf { 233 fmt.Fprintf(fs, "%c", s.QFilter(s.Alpha, s.Threshold, ql)) 234 } 235 if pOk && p < s.Len() { 236 fmt.Fprintln(fs, "...") 237 } else { 238 fmt.Fprintln(fs) 239 } 240 if fs.Flag('+') { 241 s.formatDescLineTo(fs, '+') 242 } else { 243 fmt.Fprintln(fs, "+") 244 } 245 for _, ql := range buf { 246 e := ql.Q.Encode(s.Encode) 247 if e >= unicode.MaxASCII { 248 e = unicode.MaxASCII - 1 249 } 250 fmt.Fprintf(fs, "%c", e) 251 } 252 if pOk && p < s.Len() { 253 fmt.Fprint(fs, "...") 254 } 255 default: 256 fmt.Fprintf(fs, "%%!%c(linear.QSeq=%.10s)", c, s) 257 } 258 } 259 260 func (s *QSeq) formatDescLineTo(fs fmt.State, p rune) { 261 fmt.Fprintf(fs, "%c%s", p, s.ID) 262 if s.Desc != "" { 263 fmt.Fprintf(fs, " %s", s.Desc) 264 } 265 fmt.Fprintln(fs) 266 }