github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bgzf/cache.go (about)

     1  // Copyright ©2012 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package bgzf
     6  
     7  import (
     8  	"bytes"
     9  	"compress/gzip"
    10  	"io"
    11  
    12  	"github.com/Schaudge/grailbase/compress/libdeflate"
    13  )
    14  
    15  // Cache is a Block caching type. Basic cache implementations are provided
    16  // in the cache package. A Cache must be safe for concurrent use.
    17  //
    18  // If a Cache is a Wrapper, its Wrap method is called on newly created blocks.
    19  type Cache interface {
    20  	// Get returns the Block in the Cache with the specified
    21  	// base or a nil Block if it does not exist. The returned
    22  	// Block must be removed from the Cache.
    23  	Get(base int64) Block
    24  
    25  	// Put inserts a Block into the Cache, returning the Block
    26  	// that was evicted or nil if no eviction was necessary and
    27  	// a boolean indicating whether the put Block was retained
    28  	// by the Cache.
    29  	Put(Block) (evicted Block, retained bool)
    30  
    31  	// Peek returns whether a Block exists in the cache for the
    32  	// given base. If a Block satisfies the request, then exists
    33  	// is returned as true with the offset for the next Block in
    34  	// the stream, otherwise false and -1.
    35  	Peek(base int64) (exists bool, next int64)
    36  }
    37  
    38  // Wrapper defines Cache types that need to modify a Block at its creation.
    39  type Wrapper interface {
    40  	Wrap(Block) Block
    41  }
    42  
    43  // Block wraps interaction with decompressed BGZF data blocks.
    44  type Block interface {
    45  	// Base returns the file offset of the start of
    46  	// the gzip member from which the Block data was
    47  	// decompressed.
    48  	Base() int64
    49  
    50  	io.Reader
    51  
    52  	// Used returns whether one or more bytes have
    53  	// been read from the Block.
    54  	Used() bool
    55  
    56  	// header returns the gzip.Header of the gzip member
    57  	// from which the Block data was decompressed.
    58  	header() gzip.Header
    59  
    60  	// isMagicBlock returns whether the Block is a BGZF
    61  	// magic EOF marker block.
    62  	isMagicBlock() bool
    63  
    64  	// ownedBy returns whether the Block is owned by
    65  	// the given Reader.
    66  	ownedBy(*Reader) bool
    67  
    68  	// setOwner changes the owner to the given Reader,
    69  	// reseting other data to its zero state.
    70  	setOwner(*Reader)
    71  
    72  	// hasData returns whether the Block has read data.
    73  	hasData() bool
    74  
    75  	// The following are unexported equivalents
    76  	// of the io interfaces. seek is limited to
    77  	// the file origin offset case and does not
    78  	// return the new offset.
    79  	seek(offset int64) error
    80  
    81  	// readBuf uncompresses the given input data.
    82  	readBuf(in []byte, dd libdeflate.Decompressor) error
    83  
    84  	// len returns the number of remaining
    85  	// bytes that can be read from the Block.
    86  	len() int
    87  
    88  	// setBase sets the file offset of the start
    89  	// and of the gzip member that the Block data
    90  	// was decompressed from.
    91  	setBase(int64)
    92  
    93  	// NextBase returns the expected position of the next
    94  	// BGZF block. It returns -1 if the Block is not valid.
    95  	NextBase() int64
    96  
    97  	// setHeader sets the file header of of the gzip
    98  	// member that the Block data was decompressed from.
    99  	setHeader(gzip.Header)
   100  
   101  	// txOffset returns the current vitual offset.
   102  	txOffset() Offset
   103  }
   104  
   105  type block struct {
   106  	owner *Reader
   107  	used  bool
   108  
   109  	base  int64
   110  	h     gzip.Header
   111  	magic bool
   112  
   113  	offset Offset
   114  
   115  	buf  *bytes.Reader
   116  	data [MaxBlockSize]byte
   117  }
   118  
   119  func (b *block) Base() int64 { return b.base }
   120  
   121  func (b *block) Used() bool { return b.used }
   122  
   123  func (b *block) Read(p []byte) (int, error) {
   124  	n, err := b.buf.Read(p)
   125  	b.offset.Block += uint16(n)
   126  	if n > 0 {
   127  		b.used = true
   128  	}
   129  	return n, err
   130  }
   131  
   132  func (b *block) readBuf(inData []byte, dd libdeflate.Decompressor) error {
   133  	o := b.owner
   134  	b.owner = nil
   135  	n, err := dd.Decompress(b.data[:], inData)
   136  	if err != nil {
   137  		return err
   138  	}
   139  	b.buf = bytes.NewReader(b.data[:n])
   140  	b.owner = o
   141  	b.magic = b.magic && b.len() == 0
   142  	return nil
   143  }
   144  
   145  func (b *block) seek(offset int64) error {
   146  	_, err := b.buf.Seek(offset, 0)
   147  	if err == nil {
   148  		b.offset.Block = uint16(offset)
   149  	}
   150  	return err
   151  }
   152  
   153  func (b *block) len() int {
   154  	if b.buf == nil {
   155  		return 0
   156  	}
   157  	return b.buf.Len()
   158  }
   159  
   160  func (b *block) setBase(n int64) {
   161  	b.base = n
   162  	b.offset = Offset{File: n}
   163  }
   164  
   165  func (b *block) NextBase() int64 {
   166  	size := int64(expectedMemberSize(b.h))
   167  	if size == -1 {
   168  		return -1
   169  	}
   170  	return b.base + size
   171  }
   172  
   173  func (b *block) setHeader(h gzip.Header) {
   174  	b.h = h
   175  	b.magic = h.OS == 0xff &&
   176  		// Test for zero time and old compress/gzip behaviour.
   177  		(h.ModTime.IsZero() || h.ModTime.Equal(unixEpoch)) &&
   178  		h.Name == "" &&
   179  		h.Comment == "" &&
   180  		bytes.Equal(h.Extra, []byte("BC\x02\x00\x1b\x00"))
   181  }
   182  
   183  func (b *block) header() gzip.Header { return b.h }
   184  
   185  func (b *block) isMagicBlock() bool { return b.magic }
   186  
   187  func (b *block) setOwner(r *Reader) {
   188  	b.owner = r
   189  	b.used = false
   190  	b.base = -1
   191  	b.h = gzip.Header{}
   192  	b.offset = Offset{}
   193  	b.buf = nil
   194  }
   195  
   196  func (b *block) ownedBy(r *Reader) bool { return b.owner == r }
   197  
   198  func (b *block) hasData() bool { return b.buf != nil }
   199  
   200  func (b *block) txOffset() Offset { return b.offset }