github.com/biogo/biogo@v1.0.4/io/seqio/fasta/fasta.go (about) 1 // Copyright ©2011-2013 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package fasta provides types to read and write FASTA format files. 6 package fasta 7 8 import ( 9 "github.com/biogo/biogo/alphabet" 10 "github.com/biogo/biogo/io/seqio" 11 "github.com/biogo/biogo/seq" 12 13 "bufio" 14 "bytes" 15 "fmt" 16 "io" 17 ) 18 19 var ( 20 _ seqio.Reader = (*Reader)(nil) 21 _ seqio.Writer = (*Writer)(nil) 22 ) 23 24 // Default delimiters. 25 const ( 26 DefaultIDPrefix = ">" 27 DefaultSeqPrefix = "" 28 ) 29 30 // Fasta sequence format reader type. 31 type Reader struct { 32 r *bufio.Reader 33 t seqio.SequenceAppender 34 IDPrefix []byte 35 SeqPrefix []byte 36 working seqio.SequenceAppender 37 err error 38 } 39 40 // Returns a new fasta format reader using f. Sequences returned by the Reader are copied 41 // from the provided template. 42 func NewReader(f io.Reader, template seqio.SequenceAppender) *Reader { 43 return &Reader{ 44 r: bufio.NewReader(f), 45 t: template, 46 IDPrefix: []byte(DefaultIDPrefix), 47 SeqPrefix: []byte(DefaultSeqPrefix), 48 } 49 } 50 51 // Read a single sequence and return it and potentially an error. Note that 52 // a non-nil returned error may be associated with a valid sequence, so it is 53 // the responsibility of the caller to examine the error to determine whether 54 // the read was successful. 55 // Note that if the Reader's template type returns different non-nil error 56 // values from calls to SetName and SetDescription, a new error string will be 57 // returned on each call to Read. So to allow direct error comparison these 58 // methods should return the same error. 59 func (r *Reader) Read() (seq.Sequence, error) { 60 var ( 61 buff, line []byte 62 isPrefix bool 63 s seq.Sequence 64 ) 65 defer func() { 66 if r.working == nil { 67 r.err = nil 68 } 69 }() 70 71 for { 72 var err error 73 if buff, isPrefix, err = r.r.ReadLine(); err != nil { 74 if err != io.EOF || r.working == nil { 75 return nil, err 76 } 77 s, err = r.working, r.err 78 r.working = nil 79 return s, err 80 } 81 line = append(line, buff...) 82 if isPrefix { 83 continue 84 } 85 line = bytes.TrimSpace(line) 86 if len(line) == 0 { 87 continue 88 } 89 90 if bytes.HasPrefix(line, r.IDPrefix) { 91 if r.working == nil { 92 r.working, r.err = r.header(line) 93 line = nil 94 } else { 95 s, err = r.working, r.err 96 r.working, r.err = r.header(line) 97 return s, err 98 } 99 } else if bytes.HasPrefix(line, r.SeqPrefix) { 100 if r.working == nil { 101 return nil, fmt.Errorf("fasta: badly formed line %q", line) 102 } 103 line = bytes.Join(bytes.Fields(line[len(r.SeqPrefix):]), nil) 104 r.working.AppendLetters(alphabet.BytesToLetters(line)...) 105 line = nil 106 } else { 107 return nil, fmt.Errorf("fasta: badly formed line %q", line) 108 } 109 } 110 } 111 112 func (r *Reader) header(line []byte) (seqio.SequenceAppender, error) { 113 s := r.t.Clone().(seqio.SequenceAppender) 114 fieldMark := bytes.IndexAny(line, " \t") 115 var err error 116 if fieldMark < 0 { 117 err = s.SetName(string(line[len(r.IDPrefix):])) 118 return s, err 119 } else { 120 err = s.SetName(string(line[len(r.IDPrefix):fieldMark])) 121 _err := s.SetDescription(string(line[fieldMark+1:])) 122 if err != nil || _err != nil { 123 switch { 124 case err == _err: 125 return s, err 126 case err != nil && _err != nil: 127 return s, fmt.Errorf("fasta: multiple errors: name: %s, desc:%s", err, _err) 128 case err != nil: 129 return s, err 130 case _err != nil: 131 return s, _err 132 } 133 } 134 } 135 136 return s, nil 137 } 138 139 // Fasta sequence format writer type. 140 type Writer struct { 141 w io.Writer 142 IDPrefix []byte 143 SeqPrefix []byte 144 Width int 145 } 146 147 // Returns a new fasta format writer using f. 148 func NewWriter(w io.Writer, width int) *Writer { 149 return &Writer{ 150 w: w, 151 IDPrefix: []byte(DefaultIDPrefix), 152 SeqPrefix: []byte(DefaultSeqPrefix), 153 Width: width, 154 } 155 } 156 157 // Write a single sequence and return the number of bytes written and any error. 158 func (w *Writer) Write(s seq.Sequence) (n int, err error) { 159 var ( 160 _n int 161 prefix = append([]byte{'\n'}, w.SeqPrefix...) 162 ) 163 id, desc := s.Name(), s.Description() 164 header := make([]byte, 0, len(w.IDPrefix)+len(id)+len(desc)+1) 165 header = append(header, w.IDPrefix...) 166 header = append(header, id...) 167 if len(desc) > 0 { 168 header = append(header, ' ') 169 header = append(header, desc...) 170 } 171 172 n, err = w.w.Write(header) 173 if err != nil { 174 return n, err 175 } 176 for i := 0; i < s.Len(); i++ { 177 if i%w.Width == 0 { 178 _n, err = w.w.Write(prefix) 179 if n += _n; err != nil { 180 return n, err 181 } 182 } 183 _n, err = w.w.Write([]byte{byte(s.At(i).L)}) 184 if n += _n; err != nil { 185 return n, err 186 } 187 } 188 _n, err = w.w.Write([]byte{'\n'}) 189 if n += _n; err != nil { 190 return n, err 191 } 192 193 return n, nil 194 }