github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/bgzf/index/index.go (about) 1 // Copyright ©2015 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // Package index provides common code for CSI and tabix BGZF indexing. 6 package index 7 8 import ( 9 "errors" 10 "io" 11 12 "github.com/Schaudge/hts/bgzf" 13 ) 14 15 var ( 16 ErrNoReference = errors.New("index: no reference") 17 ErrInvalid = errors.New("index: invalid interval") 18 ) 19 20 // ReferenceStats holds mapping statistics for a genomic reference. 21 type ReferenceStats struct { 22 // Chunk is the span of the indexed BGZF 23 // holding alignments to the reference. 24 Chunk bgzf.Chunk 25 26 // Mapped is the count of mapped reads. 27 Mapped uint64 28 29 // Unmapped is the count of unmapped reads. 30 Unmapped uint64 31 } 32 33 // Reader wraps a bgzf.Reader to provide a mechanism to read a selection of 34 // BGZF chunks. 35 type ChunkReader struct { 36 r *bgzf.Reader 37 38 wasBlocked bool 39 40 chunks []bgzf.Chunk 41 } 42 43 // NewChunkReader returns a ChunkReader to read from r, limiting the reads to 44 // the provided chunks. The provided bgzf.Reader will be put into Blocked mode. 45 func NewChunkReader(r *bgzf.Reader, chunks []bgzf.Chunk) (*ChunkReader, error) { 46 b := r.Blocked 47 r.Blocked = true 48 if len(chunks) != 0 { 49 err := r.Seek(chunks[0].Begin) 50 if err != nil { 51 return nil, err 52 } 53 } 54 return &ChunkReader{r: r, wasBlocked: b, chunks: chunks}, nil 55 } 56 57 // Read satisfies the io.Reader interface. 58 func (r *ChunkReader) Read(p []byte) (int, error) { 59 if len(r.chunks) == 0 { 60 return 0, io.EOF 61 } 62 last := r.r.LastChunk() 63 if vOffset(last.End) >= vOffset(r.chunks[0].End) { 64 return 0, io.EOF 65 } 66 67 // Ensure the byte slice does not extend beyond the end of 68 // the current chunk. We do not need to consider reading 69 // beyond the end of the block because the bgzf.Reader is in 70 // blocked mode and so will stop there anyway. 71 want := int(r.chunks[0].End.Block) 72 if r.chunks[0].End.Block == 0 && r.chunks[0].End.File > last.End.File { 73 // Special case for when the current end block offset 74 // is zero. 75 want = r.r.BlockLen() 76 } 77 var cursor int 78 if last.End.File == r.chunks[0].End.File { 79 // Our end is in the same block as the last chunk end 80 // so set the cursor to the chunk block end to prevent 81 // reading past the end of the chunk. 82 cursor = int(last.End.Block) 83 } 84 n, err := r.r.Read(p[:min(len(p), want-cursor)]) 85 if err != nil { 86 if n != 0 && err == io.EOF { 87 err = nil 88 } 89 return n, err 90 } 91 92 // Check whether we are at or past the end of the current 93 // chunk or we have not made progress for reasons other than 94 // zero length p. 95 this := r.r.LastChunk() 96 if (len(p) != 0 && this == last) || vOffset(this.End) >= vOffset(r.chunks[0].End) { 97 r.chunks = r.chunks[1:] 98 if len(r.chunks) == 0 { 99 return n, io.EOF 100 } 101 err = r.r.Seek(r.chunks[0].Begin) 102 } 103 104 return n, err 105 } 106 107 func vOffset(o bgzf.Offset) int64 { 108 return o.File<<16 | int64(o.Block) 109 } 110 111 func min(a, b int) int { 112 if a < b { 113 return a 114 } 115 return b 116 } 117 118 // Close returns the bgzf.Reader to its original blocking mode and releases it. 119 // The bgzf.Reader is not closed. 120 func (r *ChunkReader) Close() error { 121 r.r.Blocked = r.wasBlocked 122 r.r = nil 123 return nil 124 }