github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bgzf/index/index.go (about)

     1  // Copyright ©2015 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package index provides common code for CSI and tabix BGZF indexing.
     6  package index
     7  
     8  import (
     9  	"errors"
    10  	"io"
    11  
    12  	"github.com/Schaudge/hts/bgzf"
    13  )
    14  
    15  var (
    16  	ErrNoReference = errors.New("index: no reference")
    17  	ErrInvalid     = errors.New("index: invalid interval")
    18  )
    19  
    20  // ReferenceStats holds mapping statistics for a genomic reference.
    21  type ReferenceStats struct {
    22  	// Chunk is the span of the indexed BGZF
    23  	// holding alignments to the reference.
    24  	Chunk bgzf.Chunk
    25  
    26  	// Mapped is the count of mapped reads.
    27  	Mapped uint64
    28  
    29  	// Unmapped is the count of unmapped reads.
    30  	Unmapped uint64
    31  }
    32  
    33  // Reader wraps a bgzf.Reader to provide a mechanism to read a selection of
    34  // BGZF chunks.
    35  type ChunkReader struct {
    36  	r *bgzf.Reader
    37  
    38  	wasBlocked bool
    39  
    40  	chunks []bgzf.Chunk
    41  }
    42  
    43  // NewChunkReader returns a ChunkReader to read from r, limiting the reads to
    44  // the provided chunks. The provided bgzf.Reader will be put into Blocked mode.
    45  func NewChunkReader(r *bgzf.Reader, chunks []bgzf.Chunk) (*ChunkReader, error) {
    46  	b := r.Blocked
    47  	r.Blocked = true
    48  	if len(chunks) != 0 {
    49  		err := r.Seek(chunks[0].Begin)
    50  		if err != nil {
    51  			return nil, err
    52  		}
    53  	}
    54  	return &ChunkReader{r: r, wasBlocked: b, chunks: chunks}, nil
    55  }
    56  
    57  // Read satisfies the io.Reader interface.
    58  func (r *ChunkReader) Read(p []byte) (int, error) {
    59  	if len(r.chunks) == 0 {
    60  		return 0, io.EOF
    61  	}
    62  	last := r.r.LastChunk()
    63  	if vOffset(last.End) >= vOffset(r.chunks[0].End) {
    64  		return 0, io.EOF
    65  	}
    66  
    67  	// Ensure the byte slice does not extend beyond the end of
    68  	// the current chunk. We do not need to consider reading
    69  	// beyond the end of the block because the bgzf.Reader is in
    70  	// blocked mode and so will stop there anyway.
    71  	want := int(r.chunks[0].End.Block)
    72  	if r.chunks[0].End.Block == 0 && r.chunks[0].End.File > last.End.File {
    73  		// Special case for when the current end block offset
    74  		// is zero.
    75  		want = r.r.BlockLen()
    76  	}
    77  	var cursor int
    78  	if last.End.File == r.chunks[0].End.File {
    79  		// Our end is in the same block as the last chunk end
    80  		// so set the cursor to the chunk block end to prevent
    81  		// reading past the end of the chunk.
    82  		cursor = int(last.End.Block)
    83  	}
    84  	n, err := r.r.Read(p[:min(len(p), want-cursor)])
    85  	if err != nil {
    86  		if n != 0 && err == io.EOF {
    87  			err = nil
    88  		}
    89  		return n, err
    90  	}
    91  
    92  	// Check whether we are at or past the end of the current
    93  	// chunk or we have not made progress for reasons other than
    94  	// zero length p.
    95  	this := r.r.LastChunk()
    96  	if (len(p) != 0 && this == last) || vOffset(this.End) >= vOffset(r.chunks[0].End) {
    97  		r.chunks = r.chunks[1:]
    98  		if len(r.chunks) == 0 {
    99  			return n, io.EOF
   100  		}
   101  		err = r.r.Seek(r.chunks[0].Begin)
   102  	}
   103  
   104  	return n, err
   105  }
   106  
   107  func vOffset(o bgzf.Offset) int64 {
   108  	return o.File<<16 | int64(o.Block)
   109  }
   110  
   111  func min(a, b int) int {
   112  	if a < b {
   113  		return a
   114  	}
   115  	return b
   116  }
   117  
   118  // Close returns the bgzf.Reader to its original blocking mode and releases it.
   119  // The bgzf.Reader is not closed.
   120  func (r *ChunkReader) Close() error {
   121  	r.r.Blocked = r.wasBlocked
   122  	r.r = nil
   123  	return nil
   124  }