github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/internal/index_read.go (about) 1 // Copyright ©2014 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package internal 6 7 import ( 8 "encoding/binary" 9 "fmt" 10 "io" 11 "sort" 12 13 "github.com/Schaudge/hts/bgzf" 14 ) 15 16 // ReadIndex reads the Index from the given io.Reader. 17 func ReadIndex(r io.Reader, n int32, typ string) (Index, error) { 18 var ( 19 idx Index 20 err error 21 ) 22 idx.Refs, err = readIndices(r, n, typ) 23 if err != nil { 24 return idx, err 25 } 26 var nUnmapped uint64 27 err = binary.Read(r, binary.LittleEndian, &nUnmapped) 28 if err == nil { 29 idx.Unmapped = &nUnmapped 30 } else if err != io.EOF { 31 return idx, err 32 } 33 idx.IsSorted = true 34 35 // Set the index of the last record to max int to 36 // prevent addition of records out of order. This 37 // means that the only way to append to an index is 38 // to re-index and add to that created index. 39 // TODO(kortschak) See if index appending is feasible 40 // and needed. 41 idx.LastRecord = int(^uint(0) >> 1) 42 43 return idx, nil 44 } 45 46 func readIndices(r io.Reader, n int32, typ string) ([]RefIndex, error) { 47 var err error 48 idx := make([]RefIndex, n) 49 for i := range idx { 50 idx[i].Bins, idx[i].Stats, err = readBins(r, typ) 51 if err != nil { 52 return nil, err 53 } 54 idx[i].Intervals, err = readIntervals(r, typ) 55 if err != nil { 56 return nil, err 57 } 58 } 59 return idx, nil 60 } 61 62 func readBins(r io.Reader, typ string) ([]Bin, *ReferenceStats, error) { 63 var n int32 64 err := binary.Read(r, binary.LittleEndian, &n) 65 if err != nil { 66 return nil, nil, err 67 } 68 if n == 0 { 69 return nil, nil, nil 70 } 71 var stats *ReferenceStats 72 bins := make([]Bin, n) 73 for i := 0; i < len(bins); i++ { 74 err = binary.Read(r, binary.LittleEndian, &bins[i].Bin) 75 if err != nil { 76 return nil, nil, fmt.Errorf("%s: failed to read bin number: %v", typ, err) 77 } 78 err = binary.Read(r, binary.LittleEndian, &n) 79 if err != nil { 80 return nil, nil, fmt.Errorf("%s: failed to read bin count: %v", typ, err) 81 } 82 if bins[i].Bin == StatsDummyBin { 83 if n != 2 { 84 return nil, nil, fmt.Errorf("%s: malformed dummy bin header", typ) 85 } 86 stats, err = readStats(r, typ) 87 if err != nil { 88 return nil, nil, err 89 } 90 bins = bins[:len(bins)-1] 91 i-- 92 continue 93 } 94 bins[i].Chunks, err = readChunks(r, n, typ) 95 if err != nil { 96 return nil, nil, err 97 } 98 } 99 if !sort.IsSorted(byBinNumber(bins)) { 100 sort.Sort(byBinNumber(bins)) 101 } 102 return bins, stats, nil 103 } 104 105 func readChunks(r io.Reader, n int32, typ string) ([]bgzf.Chunk, error) { 106 if n == 0 { 107 return nil, nil 108 } 109 chunks := make([]bgzf.Chunk, n) 110 var buf [16]byte 111 for i := range chunks { 112 // Get the begin and end offset in a single read. 113 _, err := io.ReadFull(r, buf[:]) 114 if err != nil { 115 return nil, fmt.Errorf("%s: failed to read chunk virtual offset: %v", typ, err) 116 } 117 chunks[i].Begin = makeOffset(binary.LittleEndian.Uint64(buf[:8])) 118 chunks[i].End = makeOffset(binary.LittleEndian.Uint64(buf[8:])) 119 } 120 if !sort.IsSorted(byBeginOffset(chunks)) { 121 sort.Sort(byBeginOffset(chunks)) 122 } 123 return chunks, nil 124 } 125 126 func readStats(r io.Reader, typ string) (*ReferenceStats, error) { 127 var ( 128 vOff uint64 129 stats ReferenceStats 130 err error 131 ) 132 err = binary.Read(r, binary.LittleEndian, &vOff) 133 if err != nil { 134 return nil, fmt.Errorf("%s: failed to read index stats chunk begin virtual offset: %v", typ, err) 135 } 136 stats.Chunk.Begin = makeOffset(vOff) 137 err = binary.Read(r, binary.LittleEndian, &vOff) 138 if err != nil { 139 return nil, fmt.Errorf("%s: failed to read index stats chunk end virtual offset: %v", typ, err) 140 } 141 stats.Chunk.End = makeOffset(vOff) 142 err = binary.Read(r, binary.LittleEndian, &stats.Mapped) 143 if err != nil { 144 return nil, fmt.Errorf("%s: failed to read index stats mapped count: %v", typ, err) 145 } 146 err = binary.Read(r, binary.LittleEndian, &stats.Unmapped) 147 if err != nil { 148 return nil, fmt.Errorf("%s: failed to read index stats unmapped count: %v", typ, err) 149 } 150 return &stats, nil 151 } 152 153 func readIntervals(r io.Reader, typ string) ([]bgzf.Offset, error) { 154 var n int32 155 err := binary.Read(r, binary.LittleEndian, &n) 156 if err != nil { 157 return nil, err 158 } 159 if n == 0 { 160 return nil, nil 161 } 162 offsets := make([]bgzf.Offset, n) 163 // chunkSize determines the number of offsets consumed by each binary.Read. 164 const chunkSize = 512 165 var vOffs [chunkSize]uint64 166 for i := 0; i < int(n); i += chunkSize { 167 l := min(int(n)-i, len(vOffs)) 168 err = binary.Read(r, binary.LittleEndian, vOffs[:l]) 169 if err != nil { 170 return nil, fmt.Errorf("%s: failed to read tile interval virtual offset: %v", typ, err) 171 } 172 for k := 0; k < l; k++ { 173 offsets[i+k] = makeOffset(vOffs[k]) 174 } 175 } 176 177 if !sort.IsSorted(byVirtOffset(offsets)) { 178 sort.Sort(byVirtOffset(offsets)) 179 } 180 return offsets, nil 181 } 182 183 func min(a, b int) int { 184 if a < b { 185 return a 186 } 187 return b 188 }