github.com/biogo/biogo@v1.0.4/io/seqio/fasta/fasta.go (about)

     1  // Copyright ©2011-2013 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package fasta provides types to read and write FASTA format files.
     6  package fasta
     7  
     8  import (
     9  	"github.com/biogo/biogo/alphabet"
    10  	"github.com/biogo/biogo/io/seqio"
    11  	"github.com/biogo/biogo/seq"
    12  
    13  	"bufio"
    14  	"bytes"
    15  	"fmt"
    16  	"io"
    17  )
    18  
    19  var (
    20  	_ seqio.Reader = (*Reader)(nil)
    21  	_ seqio.Writer = (*Writer)(nil)
    22  )
    23  
    24  // Default delimiters.
    25  const (
    26  	DefaultIDPrefix  = ">"
    27  	DefaultSeqPrefix = ""
    28  )
    29  
    30  // Fasta sequence format reader type.
    31  type Reader struct {
    32  	r         *bufio.Reader
    33  	t         seqio.SequenceAppender
    34  	IDPrefix  []byte
    35  	SeqPrefix []byte
    36  	working   seqio.SequenceAppender
    37  	err       error
    38  }
    39  
    40  // Returns a new fasta format reader using f. Sequences returned by the Reader are copied
    41  // from the provided template.
    42  func NewReader(f io.Reader, template seqio.SequenceAppender) *Reader {
    43  	return &Reader{
    44  		r:         bufio.NewReader(f),
    45  		t:         template,
    46  		IDPrefix:  []byte(DefaultIDPrefix),
    47  		SeqPrefix: []byte(DefaultSeqPrefix),
    48  	}
    49  }
    50  
    51  // Read a single sequence and return it and potentially an error. Note that
    52  // a non-nil returned error may be associated with a valid sequence, so it is
    53  // the responsibility of the caller to examine the error to determine whether
    54  // the read was successful.
    55  // Note that if the Reader's template type returns different non-nil error
    56  // values from calls to SetName and SetDescription, a new error string will be
    57  // returned on each call to Read. So to allow direct error comparison these
    58  // methods should return the same error.
    59  func (r *Reader) Read() (seq.Sequence, error) {
    60  	var (
    61  		buff, line []byte
    62  		isPrefix   bool
    63  		s          seq.Sequence
    64  	)
    65  	defer func() {
    66  		if r.working == nil {
    67  			r.err = nil
    68  		}
    69  	}()
    70  
    71  	for {
    72  		var err error
    73  		if buff, isPrefix, err = r.r.ReadLine(); err != nil {
    74  			if err != io.EOF || r.working == nil {
    75  				return nil, err
    76  			}
    77  			s, err = r.working, r.err
    78  			r.working = nil
    79  			return s, err
    80  		}
    81  		line = append(line, buff...)
    82  		if isPrefix {
    83  			continue
    84  		}
    85  		line = bytes.TrimSpace(line)
    86  		if len(line) == 0 {
    87  			continue
    88  		}
    89  
    90  		if bytes.HasPrefix(line, r.IDPrefix) {
    91  			if r.working == nil {
    92  				r.working, r.err = r.header(line)
    93  				line = nil
    94  			} else {
    95  				s, err = r.working, r.err
    96  				r.working, r.err = r.header(line)
    97  				return s, err
    98  			}
    99  		} else if bytes.HasPrefix(line, r.SeqPrefix) {
   100  			if r.working == nil {
   101  				return nil, fmt.Errorf("fasta: badly formed line %q", line)
   102  			}
   103  			line = bytes.Join(bytes.Fields(line[len(r.SeqPrefix):]), nil)
   104  			r.working.AppendLetters(alphabet.BytesToLetters(line)...)
   105  			line = nil
   106  		} else {
   107  			return nil, fmt.Errorf("fasta: badly formed line %q", line)
   108  		}
   109  	}
   110  }
   111  
   112  func (r *Reader) header(line []byte) (seqio.SequenceAppender, error) {
   113  	s := r.t.Clone().(seqio.SequenceAppender)
   114  	fieldMark := bytes.IndexAny(line, " \t")
   115  	var err error
   116  	if fieldMark < 0 {
   117  		err = s.SetName(string(line[len(r.IDPrefix):]))
   118  		return s, err
   119  	} else {
   120  		err = s.SetName(string(line[len(r.IDPrefix):fieldMark]))
   121  		_err := s.SetDescription(string(line[fieldMark+1:]))
   122  		if err != nil || _err != nil {
   123  			switch {
   124  			case err == _err:
   125  				return s, err
   126  			case err != nil && _err != nil:
   127  				return s, fmt.Errorf("fasta: multiple errors: name: %s, desc:%s", err, _err)
   128  			case err != nil:
   129  				return s, err
   130  			case _err != nil:
   131  				return s, _err
   132  			}
   133  		}
   134  	}
   135  
   136  	return s, nil
   137  }
   138  
   139  // Fasta sequence format writer type.
   140  type Writer struct {
   141  	w         io.Writer
   142  	IDPrefix  []byte
   143  	SeqPrefix []byte
   144  	Width     int
   145  }
   146  
   147  // Returns a new fasta format writer using f.
   148  func NewWriter(w io.Writer, width int) *Writer {
   149  	return &Writer{
   150  		w:         w,
   151  		IDPrefix:  []byte(DefaultIDPrefix),
   152  		SeqPrefix: []byte(DefaultSeqPrefix),
   153  		Width:     width,
   154  	}
   155  }
   156  
   157  // Write a single sequence and return the number of bytes written and any error.
   158  func (w *Writer) Write(s seq.Sequence) (n int, err error) {
   159  	var (
   160  		_n     int
   161  		prefix = append([]byte{'\n'}, w.SeqPrefix...)
   162  	)
   163  	id, desc := s.Name(), s.Description()
   164  	header := make([]byte, 0, len(w.IDPrefix)+len(id)+len(desc)+1)
   165  	header = append(header, w.IDPrefix...)
   166  	header = append(header, id...)
   167  	if len(desc) > 0 {
   168  		header = append(header, ' ')
   169  		header = append(header, desc...)
   170  	}
   171  
   172  	n, err = w.w.Write(header)
   173  	if err != nil {
   174  		return n, err
   175  	}
   176  	for i := 0; i < s.Len(); i++ {
   177  		if i%w.Width == 0 {
   178  			_n, err = w.w.Write(prefix)
   179  			if n += _n; err != nil {
   180  				return n, err
   181  			}
   182  		}
   183  		_n, err = w.w.Write([]byte{byte(s.At(i).L)})
   184  		if n += _n; err != nil {
   185  			return n, err
   186  		}
   187  	}
   188  	_n, err = w.w.Write([]byte{'\n'})
   189  	if n += _n; err != nil {
   190  		return n, err
   191  	}
   192  
   193  	return n, nil
   194  }