github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bam/writer.go (about)

     1  // Copyright ©2012 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bam
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"errors"
    11  	"io"
    12  
    13  	"github.com/Schaudge/hts/bgzf"
    14  	"github.com/Schaudge/hts/sam"
    15  	"github.com/klauspost/compress/gzip"
    16  )
    17  
    18  // Writer implements BAM data writing.
    19  type Writer struct {
    20  	h *sam.Header
    21  
    22  	bg  *bgzf.Writer
    23  	buf bytes.Buffer
    24  }
    25  
    26  // NewWriter returns a new Writer using the given SAM header. Write
    27  // concurrency is set to wc.
    28  func NewWriter(w io.Writer, h *sam.Header, wc int) (*Writer, error) {
    29  	return NewWriterLevel(w, h, gzip.DefaultCompression, wc)
    30  }
    31  
    32  func makeWriter(w io.Writer, level, wc int) (*bgzf.Writer, error) {
    33  	if bw, ok := w.(*bgzf.Writer); ok {
    34  		return bw, nil
    35  	}
    36  	return bgzf.NewWriterLevel(w, level, wc)
    37  }
    38  
    39  // NewWriterLevel returns a new Writer using the given SAM header. Write
    40  // concurrency is set to wc and compression level is set to level. Valid
    41  // values for level are described in the compress/gzip documentation.
    42  func NewWriterLevel(w io.Writer, h *sam.Header, level, wc int) (*Writer, error) {
    43  	bg, err := makeWriter(w, level, wc)
    44  	if err != nil {
    45  		return nil, err
    46  	}
    47  	bw := &Writer{
    48  		bg: bg,
    49  		h:  h,
    50  	}
    51  
    52  	err = bw.writeHeader(h)
    53  	if err != nil {
    54  		return nil, err
    55  	}
    56  	bw.bg.Flush()
    57  	err = bw.bg.Wait()
    58  	if err != nil {
    59  		return nil, err
    60  	}
    61  	return bw, nil
    62  }
    63  
    64  func (bw *Writer) writeHeader(h *sam.Header) error {
    65  	bw.buf.Reset()
    66  	err := h.EncodeBinary(&bw.buf)
    67  	if err != nil {
    68  		return err
    69  	}
    70  
    71  	_, err = bw.bg.Write(bw.buf.Bytes())
    72  	return err
    73  }
    74  
    75  // MarshalHeader encodes header in BAM binary format.
    76  func MarshalHeader(header *sam.Header) ([]byte, error) {
    77  	bb := bytes.Buffer{}
    78  	if err := header.EncodeBinary(&bb); err != nil {
    79  		return nil, err
    80  	}
    81  	return bb.Bytes(), nil
    82  }
    83  
    84  // Marshal serializes the record into "buf".
    85  func Marshal(r *sam.Record, buf *bytes.Buffer) error {
    86  	if len(r.Name) == 0 || len(r.Name) > 254 {
    87  		return errors.New("bam: name absent or too long")
    88  	}
    89  	if r.Qual != nil && len(r.Qual) != r.Seq.Length {
    90  		return errors.New("bam: sequence/quality length mismatch")
    91  	}
    92  
    93  	scratch := bufPool.Get().([]byte)
    94  	resizeScratch(&scratch, 0)
    95  	buildAux(r.AuxFields, &scratch)
    96  	tags := scratch
    97  	wb := errWriter{w: buf}
    98  	bin := binaryWriter{w: &wb}
    99  	recLen := bamFixedRemainder +
   100  		len(r.Name) + 1 + // Null terminated.
   101  		len(r.Cigar)<<2 + // CigarOps are 4 bytes.
   102  		len(r.Seq.Seq) +
   103  		len(r.Qual) +
   104  		len(tags)
   105  
   106  	// Write record header data.
   107  	bin.writeInt32(int32(recLen))
   108  	bin.writeInt32(int32(r.Ref.ID()))
   109  	bin.writeInt32(int32(r.Pos))
   110  	bin.writeUint8(byte(len(r.Name) + 1))
   111  	bin.writeUint8(r.MapQ)
   112  	bin.writeUint16(uint16(r.Bin())) //r.bin
   113  	bin.writeUint16(uint16(len(r.Cigar)))
   114  	bin.writeUint16(uint16(r.Flags))
   115  	bin.writeInt32(int32(r.Seq.Length))
   116  	bin.writeInt32(int32(r.MateRef.ID()))
   117  	bin.writeInt32(int32(r.MatePos))
   118  	bin.writeInt32(int32(r.TempLen))
   119  
   120  	// Write variable length data.
   121  	wb.WriteString(r.Name)
   122  	wb.WriteByte(0)
   123  	writeCigarOps(&bin, r.Cigar)
   124  	wb.Write(doublets(r.Seq.Seq).Bytes())
   125  	if r.Qual != nil {
   126  		wb.Write(r.Qual)
   127  	} else {
   128  		for i := 0; i < r.Seq.Length; i++ {
   129  			wb.WriteByte(0xff)
   130  		}
   131  	}
   132  	wb.Write(tags)
   133  	bufPool.Put(scratch)
   134  	return wb.err
   135  }
   136  
   137  // Write writes r to the BAM stream.
   138  func (bw *Writer) Write(r *sam.Record) error {
   139  	bw.buf.Reset()
   140  	if err := Marshal(r, &bw.buf); err != nil {
   141  		return err
   142  	}
   143  	_, err := bw.bg.Write(bw.buf.Bytes())
   144  	return err
   145  }
   146  
   147  func writeCigarOps(bin *binaryWriter, co []sam.CigarOp) {
   148  	for _, o := range co {
   149  		bin.writeUint32(uint32(o))
   150  		if bin.w.err != nil {
   151  			return
   152  		}
   153  	}
   154  	return
   155  }
   156  
   157  // Close closes the writer.
   158  func (bw *Writer) Close() error {
   159  	return bw.bg.Close()
   160  }
   161  
   162  type errWriter struct {
   163  	w   *bytes.Buffer
   164  	err error
   165  }
   166  
   167  func (w *errWriter) Write(p []byte) (int, error) {
   168  	if w.err != nil {
   169  		return 0, w.err
   170  	}
   171  	var n int
   172  	n, w.err = w.w.Write(p)
   173  	return n, w.err
   174  }
   175  
   176  func (w *errWriter) WriteString(s string) (int, error) {
   177  	if w.err != nil {
   178  		return 0, w.err
   179  	}
   180  	var n int
   181  	n, w.err = w.w.WriteString(s)
   182  	return n, w.err
   183  }
   184  
   185  func (w *errWriter) WriteByte(b byte) error {
   186  	if w.err != nil {
   187  		return w.err
   188  	}
   189  	w.err = w.w.WriteByte(b)
   190  	return w.err
   191  }
   192  
   193  type binaryWriter struct {
   194  	w   *errWriter
   195  	buf [4]byte
   196  }
   197  
   198  func (w *binaryWriter) writeUint8(v uint8) {
   199  	w.buf[0] = v
   200  	w.w.Write(w.buf[:1])
   201  }
   202  
   203  func (w *binaryWriter) writeUint16(v uint16) {
   204  	binary.LittleEndian.PutUint16(w.buf[:2], v)
   205  	w.w.Write(w.buf[:2])
   206  }
   207  
   208  func (w *binaryWriter) writeInt32(v int32) {
   209  	binary.LittleEndian.PutUint32(w.buf[:4], uint32(v))
   210  	w.w.Write(w.buf[:4])
   211  }
   212  
   213  func (w *binaryWriter) writeUint32(v uint32) {
   214  	binary.LittleEndian.PutUint32(w.buf[:4], v)
   215  	w.w.Write(w.buf[:4])
   216  }