github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bgzf/writer.go (about)

     1  // Copyright ©2012 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bgzf
     6  
     7  import (
     8  	"bytes"
     9  	"compress/gzip"
    10  	"fmt"
    11  	"io"
    12  	"sync"
    13  
    14  	"github.com/Schaudge/grailbase/compress/libdeflate"
    15  )
    16  
    17  // Writer implements BGZF blocked gzip compression.
    18  //
    19  // Because the SAM specification requires that the RFC1952 FLG header field
    20  // be set to 0x04, a Writer's Name and Comment fields should not be set if
    21  // its output is to be read by another BGZF decompressor implementation.
    22  type Writer struct {
    23  	gzip.Header
    24  	w io.Writer
    25  
    26  	active *compressor
    27  
    28  	queue chan *compressor
    29  	qwg   sync.WaitGroup
    30  
    31  	waiting chan *compressor
    32  
    33  	wg sync.WaitGroup
    34  
    35  	closed bool
    36  
    37  	m   sync.Mutex
    38  	err error
    39  }
    40  
    41  // NewWriter returns a new Writer. Writes to the returned writer are
    42  // compressed and written to w.
    43  //
    44  // The number of concurrent write compressors is specified by wc.
    45  func NewWriter(w io.Writer, wc int) *Writer {
    46  	bg, _ := NewWriterLevel(w, gzip.DefaultCompression, wc)
    47  	return bg
    48  }
    49  
    50  // NewWriterLevel returns a new Writer using the specified compression level
    51  // instead of gzip.DefaultCompression. Allowable level options are integer
    52  // values between between gzip.BestSpeed and gzip.BestCompression inclusive.
    53  //
    54  // The number of concurrent write compressors is specified by wc.
    55  func NewWriterLevel(w io.Writer, level, wc int) (*Writer, error) {
    56  	if level < gzip.DefaultCompression || level > gzip.BestCompression {
    57  		return nil, fmt.Errorf("bgzf: invalid compression level: %d", level)
    58  	}
    59  	wc++ // We count one for the active compressor.
    60  	if wc < 2 {
    61  		wc = 2
    62  	}
    63  	bg := &Writer{
    64  		w:       w,
    65  		waiting: make(chan *compressor, wc),
    66  		queue:   make(chan *compressor, wc),
    67  	}
    68  	bg.Header.OS = 0xff // Set default OS to unknown.
    69  
    70  	c := make([]compressor, wc)
    71  	for i := range c {
    72  		c[i].Header = &bg.Header
    73  		c[i].level = level
    74  		c[i].waiting = bg.waiting
    75  		c[i].flush = make(chan *compressor, 1)
    76  		c[i].qwg = &bg.qwg
    77  		bg.waiting <- &c[i]
    78  	}
    79  	bg.active = <-bg.waiting
    80  
    81  	bg.wg.Add(1)
    82  	go func() {
    83  		defer bg.wg.Done()
    84  		for qw := range bg.queue {
    85  			if !writeOK(bg, <-qw.flush) {
    86  				break
    87  			}
    88  		}
    89  	}()
    90  
    91  	return bg, nil
    92  }
    93  
    94  func writeOK(bg *Writer, c *compressor) bool {
    95  	defer func() { bg.waiting <- c }()
    96  
    97  	if c.err != nil {
    98  		bg.setErr(c.err)
    99  		return false
   100  	}
   101  	if c.buf.Len() == 0 {
   102  		return true
   103  	}
   104  
   105  	_, err := io.Copy(bg.w, &c.buf)
   106  	bg.qwg.Done()
   107  	if err != nil {
   108  		bg.setErr(err)
   109  		return false
   110  	}
   111  	c.next = 0
   112  
   113  	return true
   114  }
   115  
   116  type compressor struct {
   117  	*gzip.Header
   118  	ld    *libdeflate.Writer
   119  	level int
   120  
   121  	next  int
   122  	block [BlockSize]byte
   123  	buf   bytes.Buffer
   124  
   125  	flush chan *compressor
   126  	qwg   *sync.WaitGroup
   127  
   128  	waiting chan *compressor
   129  
   130  	err error
   131  }
   132  
   133  func (c *compressor) writeBlock() {
   134  	defer func() { c.flush <- c }()
   135  
   136  	if c.ld == nil {
   137  		c.ld, c.err = libdeflate.NewWriterLevel(&c.buf, c.level)
   138  		if c.err != nil {
   139  			return
   140  		}
   141  	} else {
   142  		c.ld.Reset(&c.buf)
   143  	}
   144  	c.ld.Header = gzip.Header{
   145  		Comment: c.Comment,
   146  		Extra:   append([]byte(bgzfExtra), c.Extra...),
   147  		ModTime: c.ModTime,
   148  		Name:    c.Name,
   149  		OS:      c.OS,
   150  	}
   151  
   152  	_, c.err = c.ld.Write(c.block[:c.next])
   153  	if c.err != nil {
   154  		return
   155  	}
   156  	c.err = c.ld.Close()
   157  	if c.err != nil {
   158  		return
   159  	}
   160  	c.next = 0
   161  
   162  	b := c.buf.Bytes()
   163  	i := bytes.Index(b, bgzfExtraPrefix)
   164  	if i < 0 {
   165  		c.err = gzip.ErrHeader
   166  		return
   167  	}
   168  	size := len(b) - 1
   169  	if size >= MaxBlockSize {
   170  		c.err = ErrBlockOverflow
   171  		return
   172  	}
   173  	b[i+4], b[i+5] = byte(size), byte(size>>8)
   174  }
   175  
   176  // Next returns the index of the start of the next write within the
   177  // decompressed data block.
   178  func (bg *Writer) Next() (int, error) {
   179  	if bg.closed {
   180  		return 0, ErrClosed
   181  	}
   182  	if err := bg.Error(); err != nil {
   183  		return 0, err
   184  	}
   185  
   186  	return bg.active.next, nil
   187  }
   188  
   189  // Write writes the compressed form of b to the underlying io.Writer.
   190  // Decompressed data blocks are limited to BlockSize, so individual
   191  // byte slices may span block boundaries, however the Writer attempts
   192  // to keep each write within a single data block.
   193  func (bg *Writer) Write(b []byte) (int, error) {
   194  	if bg.closed {
   195  		return 0, ErrClosed
   196  	}
   197  	err := bg.Error()
   198  	if err != nil {
   199  		return 0, err
   200  	}
   201  
   202  	c := bg.active
   203  	var n int
   204  	for ; len(b) > 0 && err == nil; err = bg.Error() {
   205  		var _n int
   206  		if c.next == 0 || c.next+len(b) <= len(c.block) {
   207  			_n = copy(c.block[c.next:], b)
   208  			b = b[_n:]
   209  			c.next += _n
   210  			n += _n
   211  		}
   212  
   213  		if c.next == len(c.block) || _n == 0 {
   214  			bg.queue <- c
   215  			bg.qwg.Add(1)
   216  			go c.writeBlock()
   217  			c = <-bg.waiting
   218  		}
   219  	}
   220  	bg.active = c
   221  
   222  	return n, bg.Error()
   223  }
   224  
   225  // Flush writes unwritten data to the underlying io.Writer. Flush does not block.
   226  func (bg *Writer) Flush() error {
   227  	if bg.closed {
   228  		return ErrClosed
   229  	}
   230  	if err := bg.Error(); err != nil {
   231  		return err
   232  	}
   233  
   234  	if bg.active.next == 0 {
   235  		return nil
   236  	}
   237  
   238  	var c *compressor
   239  	c, bg.active = bg.active, <-bg.waiting
   240  	bg.queue <- c
   241  	bg.qwg.Add(1)
   242  	go c.writeBlock()
   243  
   244  	return bg.Error()
   245  }
   246  
   247  // Wait waits for all pending writes to complete and returns the subsequent
   248  // error state of the Writer.
   249  func (bg *Writer) Wait() error {
   250  	if err := bg.Error(); err != nil {
   251  		return err
   252  	}
   253  	bg.qwg.Wait()
   254  	return bg.Error()
   255  }
   256  
   257  // Error returns the error state of the Writer.
   258  func (bg *Writer) Error() error {
   259  	bg.m.Lock()
   260  	defer bg.m.Unlock()
   261  	return bg.err
   262  }
   263  
   264  func (bg *Writer) setErr(err error) {
   265  	bg.m.Lock()
   266  	defer bg.m.Unlock()
   267  	if bg.err == nil {
   268  		bg.err = err
   269  	}
   270  }
   271  
   272  // Close closes the Writer, waiting for any pending writes before returning
   273  // the final error of the Writer.
   274  func (bg *Writer) Close() error {
   275  	if !bg.closed {
   276  		c := bg.active
   277  		// If there are no alignment records at all, don't write an extra empty
   278  		// block.
   279  		if c.next != 0 {
   280  			bg.queue <- c
   281  			bg.qwg.Add(1)
   282  			<-bg.waiting
   283  			c.writeBlock()
   284  		}
   285  		bg.closed = true
   286  		close(bg.queue)
   287  		bg.wg.Wait()
   288  		if bg.err == nil {
   289  			_, bg.err = bg.w.Write([]byte(magicBlock))
   290  		}
   291  	}
   292  	return bg.err
   293  }