github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/sam/sam.go (about)

     1  // Copyright ©2012 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package sam implements SAM file format reading and writing. The SAM format
     6  // is described in the SAM specification.
     7  //
     8  // http://samtools.github.io/hts-specs/SAMv1.pdf
     9  package sam
    10  
    11  import (
    12  	"bufio"
    13  	"errors"
    14  	"io"
    15  )
    16  
    17  // Reader implements SAM format reading.
    18  type Reader struct {
    19  	r *bufio.Reader
    20  	h *Header
    21  
    22  	seenRefs map[string]*Reference
    23  }
    24  
    25  // NewReader returns a new Reader, reading from the given io.Reader.
    26  func NewReader(r io.Reader) (*Reader, error) {
    27  	h, _ := NewHeader(nil, nil)
    28  	sr := &Reader{
    29  		r: bufio.NewReader(r),
    30  		h: h,
    31  	}
    32  
    33  	var b []byte
    34  	p, err := sr.r.Peek(1)
    35  	if err != nil {
    36  		return nil, err
    37  	}
    38  	if p[0] != '@' {
    39  		sr.seenRefs = make(map[string]*Reference)
    40  		return sr, nil
    41  	}
    42  
    43  	for {
    44  		l, err := sr.r.ReadBytes('\n')
    45  		if err != nil {
    46  			return nil, io.ErrUnexpectedEOF
    47  		}
    48  		b = append(b, l...)
    49  		p, err := sr.r.Peek(1)
    50  		if err == io.EOF {
    51  			break
    52  		}
    53  		if err != nil {
    54  			return nil, err
    55  		}
    56  		if p[0] != '@' {
    57  			break
    58  		}
    59  	}
    60  
    61  	err = sr.h.UnmarshalText(b)
    62  	if err != nil {
    63  		return nil, err
    64  	}
    65  
    66  	return sr, nil
    67  }
    68  
    69  // Header returns the SAM Header held by the Reader.
    70  func (r *Reader) Header() *Header {
    71  	return r.h
    72  }
    73  
    74  // Read returns the next sam.Record in the SAM stream.
    75  func (r *Reader) Read() (*Record, error) {
    76  	b, err := r.r.ReadBytes('\n')
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  	b = b[:len(b)-1]
    81  	if b[len(b)-1] == '\r' {
    82  		b = b[:len(b)-1]
    83  	}
    84  	var rec Record
    85  
    86  	// Handle cases where a header was present.
    87  	if r.seenRefs == nil {
    88  		err = rec.UnmarshalSAM(r.h, b)
    89  		if err != nil {
    90  			return nil, err
    91  		}
    92  		return &rec, nil
    93  	}
    94  
    95  	// Handle cases where no SAM header is present.
    96  	err = rec.UnmarshalSAM(nil, b)
    97  	if err != nil {
    98  		return nil, err
    99  	}
   100  
   101  	if ref, ok := r.seenRefs[rec.Ref.Name()]; ok {
   102  		rec.Ref = ref
   103  	} else if rec.Ref != nil {
   104  		err = r.h.AddReference(rec.Ref)
   105  		if err != nil {
   106  			return nil, err
   107  		}
   108  		r.seenRefs[rec.Ref.Name()] = rec.Ref
   109  	} else {
   110  		r.seenRefs["*"] = nil
   111  	}
   112  	if ref, ok := r.seenRefs[rec.MateRef.Name()]; ok {
   113  		rec.MateRef = ref
   114  	} else if rec.MateRef != nil {
   115  		err = r.h.AddReference(rec.MateRef)
   116  		if err != nil {
   117  			return nil, err
   118  		}
   119  		r.seenRefs[rec.MateRef.Name()] = rec.MateRef
   120  	} else {
   121  		r.seenRefs["*"] = nil
   122  	}
   123  
   124  	return &rec, nil
   125  }
   126  
   127  // RecordReader wraps types that can read SAM Records.
   128  type RecordReader interface {
   129  	Read() (*Record, error)
   130  }
   131  
   132  // Iterator wraps a Reader to provide a convenient loop interface for reading SAM/BAM data.
   133  // Successive calls to the Next method will step through the features of the provided
   134  // Reader. Iteration stops unrecoverably at EOF or the first error.
   135  type Iterator struct {
   136  	r   RecordReader
   137  	rec *Record
   138  	err error
   139  }
   140  
   141  // NewIterator returns a Iterator to read from r.
   142  //
   143  //  i, err := NewIterator(r)
   144  //  if err != nil {
   145  //  	return err
   146  //  }
   147  //  for i.Next() {
   148  //  	fn(i.Record())
   149  //  }
   150  //  return i.Error()
   151  //
   152  func NewIterator(r RecordReader) *Iterator { return &Iterator{r: r} }
   153  
   154  // Next advances the Iterator past the next record, which will then be available through
   155  // the Record method. It returns false when the iteration stops, either by reaching the end of the
   156  // input or an error. After Next returns false, the Error method will return any error that
   157  // occurred during iteration, except that if it was io.EOF, Error will return nil.
   158  func (i *Iterator) Next() bool {
   159  	if i.err != nil {
   160  		return false
   161  	}
   162  	i.rec, i.err = i.r.Read()
   163  	return i.err == nil
   164  }
   165  
   166  // Error returns the first non-EOF error that was encountered by the Iterator.
   167  func (i *Iterator) Error() error {
   168  	if i.err == io.EOF {
   169  		return nil
   170  	}
   171  	return i.err
   172  }
   173  
   174  // Record returns the most recent record read by a call to Next.
   175  func (i *Iterator) Record() *Record { return i.rec }
   176  
   177  // Writer implements SAM format writing.
   178  type Writer struct {
   179  	w     io.Writer
   180  	flags int
   181  }
   182  
   183  // NewWriter returns a Writer to the given io.Writer using h for the SAM
   184  // header. The format of flags for SAM lines can be FlagDecimal, FlagHex
   185  // or FlagString.
   186  func NewWriter(w io.Writer, h *Header, flags int) (*Writer, error) {
   187  	if flags < FlagDecimal || flags > FlagString {
   188  		return nil, errors.New("bam: flag format option out of range")
   189  	}
   190  	sw := &Writer{w: w, flags: flags}
   191  	text, _ := h.MarshalText()
   192  	_, err := w.Write(text)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  	return sw, nil
   197  }
   198  
   199  // Write writes r to the SAM stream.
   200  func (w *Writer) Write(r *Record) error {
   201  	b, err := r.MarshalSAM(w.flags)
   202  	if err != nil {
   203  		return err
   204  	}
   205  	b = append(b, '\n')
   206  	_, err = w.w.Write(b)
   207  	return err
   208  }
   209  
   210  const (
   211  	wordBits = 31
   212  
   213  	maxInt32 = int(int32(^uint32(0) >> 1))
   214  	minInt32 = -int(maxInt32) - 1
   215  )
   216  
   217  func validInt32(i int) bool { return minInt32 <= i && i <= maxInt32 }
   218  
   219  func validLen(i int) bool      { return 1 <= i && i <= 1<<wordBits-1 }
   220  func validPos(i int) bool      { return -1 <= i && i <= (1<<wordBits-1)-1 } // 0-based.
   221  func validTmpltLen(i int) bool { return -(1<<wordBits) <= i && i <= 1<<wordBits-1 }