github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bgzf/writer.go (about) 1 // Copyright ©2012 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package bgzf 6 7 import ( 8 "bytes" 9 "compress/gzip" 10 "fmt" 11 "io" 12 "sync" 13 14 "github.com/Schaudge/grailbase/compress/libdeflate" 15 ) 16 17 // Writer implements BGZF blocked gzip compression. 18 // 19 // Because the SAM specification requires that the RFC1952 FLG header field 20 // be set to 0x04, a Writer's Name and Comment fields should not be set if 21 // its output is to be read by another BGZF decompressor implementation. 22 type Writer struct { 23 gzip.Header 24 w io.Writer 25 26 active *compressor 27 28 queue chan *compressor 29 qwg sync.WaitGroup 30 31 waiting chan *compressor 32 33 wg sync.WaitGroup 34 35 closed bool 36 37 m sync.Mutex 38 err error 39 } 40 41 // NewWriter returns a new Writer. Writes to the returned writer are 42 // compressed and written to w. 43 // 44 // The number of concurrent write compressors is specified by wc. 45 func NewWriter(w io.Writer, wc int) *Writer { 46 bg, _ := NewWriterLevel(w, gzip.DefaultCompression, wc) 47 return bg 48 } 49 50 // NewWriterLevel returns a new Writer using the specified compression level 51 // instead of gzip.DefaultCompression. Allowable level options are integer 52 // values between between gzip.BestSpeed and gzip.BestCompression inclusive. 53 // 54 // The number of concurrent write compressors is specified by wc. 55 func NewWriterLevel(w io.Writer, level, wc int) (*Writer, error) { 56 if level < gzip.DefaultCompression || level > gzip.BestCompression { 57 return nil, fmt.Errorf("bgzf: invalid compression level: %d", level) 58 } 59 wc++ // We count one for the active compressor. 60 if wc < 2 { 61 wc = 2 62 } 63 bg := &Writer{ 64 w: w, 65 waiting: make(chan *compressor, wc), 66 queue: make(chan *compressor, wc), 67 } 68 bg.Header.OS = 0xff // Set default OS to unknown. 69 70 c := make([]compressor, wc) 71 for i := range c { 72 c[i].Header = &bg.Header 73 c[i].level = level 74 c[i].waiting = bg.waiting 75 c[i].flush = make(chan *compressor, 1) 76 c[i].qwg = &bg.qwg 77 bg.waiting <- &c[i] 78 } 79 bg.active = <-bg.waiting 80 81 bg.wg.Add(1) 82 go func() { 83 defer bg.wg.Done() 84 for qw := range bg.queue { 85 if !writeOK(bg, <-qw.flush) { 86 break 87 } 88 } 89 }() 90 91 return bg, nil 92 } 93 94 func writeOK(bg *Writer, c *compressor) bool { 95 defer func() { bg.waiting <- c }() 96 97 if c.err != nil { 98 bg.setErr(c.err) 99 return false 100 } 101 if c.buf.Len() == 0 { 102 return true 103 } 104 105 _, err := io.Copy(bg.w, &c.buf) 106 bg.qwg.Done() 107 if err != nil { 108 bg.setErr(err) 109 return false 110 } 111 c.next = 0 112 113 return true 114 } 115 116 type compressor struct { 117 *gzip.Header 118 ld *libdeflate.Writer 119 level int 120 121 next int 122 block [BlockSize]byte 123 buf bytes.Buffer 124 125 flush chan *compressor 126 qwg *sync.WaitGroup 127 128 waiting chan *compressor 129 130 err error 131 } 132 133 func (c *compressor) writeBlock() { 134 defer func() { c.flush <- c }() 135 136 if c.ld == nil { 137 c.ld, c.err = libdeflate.NewWriterLevel(&c.buf, c.level) 138 if c.err != nil { 139 return 140 } 141 } else { 142 c.ld.Reset(&c.buf) 143 } 144 c.ld.Header = gzip.Header{ 145 Comment: c.Comment, 146 Extra: append([]byte(bgzfExtra), c.Extra...), 147 ModTime: c.ModTime, 148 Name: c.Name, 149 OS: c.OS, 150 } 151 152 _, c.err = c.ld.Write(c.block[:c.next]) 153 if c.err != nil { 154 return 155 } 156 c.err = c.ld.Close() 157 if c.err != nil { 158 return 159 } 160 c.next = 0 161 162 b := c.buf.Bytes() 163 i := bytes.Index(b, bgzfExtraPrefix) 164 if i < 0 { 165 c.err = gzip.ErrHeader 166 return 167 } 168 size := len(b) - 1 169 if size >= MaxBlockSize { 170 c.err = ErrBlockOverflow 171 return 172 } 173 b[i+4], b[i+5] = byte(size), byte(size>>8) 174 } 175 176 // Next returns the index of the start of the next write within the 177 // decompressed data block. 178 func (bg *Writer) Next() (int, error) { 179 if bg.closed { 180 return 0, ErrClosed 181 } 182 if err := bg.Error(); err != nil { 183 return 0, err 184 } 185 186 return bg.active.next, nil 187 } 188 189 // Write writes the compressed form of b to the underlying io.Writer. 190 // Decompressed data blocks are limited to BlockSize, so individual 191 // byte slices may span block boundaries, however the Writer attempts 192 // to keep each write within a single data block. 193 func (bg *Writer) Write(b []byte) (int, error) { 194 if bg.closed { 195 return 0, ErrClosed 196 } 197 err := bg.Error() 198 if err != nil { 199 return 0, err 200 } 201 202 c := bg.active 203 var n int 204 for ; len(b) > 0 && err == nil; err = bg.Error() { 205 var _n int 206 if c.next == 0 || c.next+len(b) <= len(c.block) { 207 _n = copy(c.block[c.next:], b) 208 b = b[_n:] 209 c.next += _n 210 n += _n 211 } 212 213 if c.next == len(c.block) || _n == 0 { 214 bg.queue <- c 215 bg.qwg.Add(1) 216 go c.writeBlock() 217 c = <-bg.waiting 218 } 219 } 220 bg.active = c 221 222 return n, bg.Error() 223 } 224 225 // Flush writes unwritten data to the underlying io.Writer. Flush does not block. 226 func (bg *Writer) Flush() error { 227 if bg.closed { 228 return ErrClosed 229 } 230 if err := bg.Error(); err != nil { 231 return err 232 } 233 234 if bg.active.next == 0 { 235 return nil 236 } 237 238 var c *compressor 239 c, bg.active = bg.active, <-bg.waiting 240 bg.queue <- c 241 bg.qwg.Add(1) 242 go c.writeBlock() 243 244 return bg.Error() 245 } 246 247 // Wait waits for all pending writes to complete and returns the subsequent 248 // error state of the Writer. 249 func (bg *Writer) Wait() error { 250 if err := bg.Error(); err != nil { 251 return err 252 } 253 bg.qwg.Wait() 254 return bg.Error() 255 } 256 257 // Error returns the error state of the Writer. 258 func (bg *Writer) Error() error { 259 bg.m.Lock() 260 defer bg.m.Unlock() 261 return bg.err 262 } 263 264 func (bg *Writer) setErr(err error) { 265 bg.m.Lock() 266 defer bg.m.Unlock() 267 if bg.err == nil { 268 bg.err = err 269 } 270 } 271 272 // Close closes the Writer, waiting for any pending writes before returning 273 // the final error of the Writer. 274 func (bg *Writer) Close() error { 275 if !bg.closed { 276 c := bg.active 277 // If there are no alignment records at all, don't write an extra empty 278 // block. 279 if c.next != 0 { 280 bg.queue <- c 281 bg.qwg.Add(1) 282 <-bg.waiting 283 c.writeBlock() 284 } 285 bg.closed = true 286 close(bg.queue) 287 bg.wg.Wait() 288 if bg.err == nil { 289 _, bg.err = bg.w.Write([]byte(magicBlock)) 290 } 291 } 292 return bg.err 293 }