github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/csi/csi_read.go (about) 1 // Copyright ©2015 The bíogo Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package csi 6 7 import ( 8 "encoding/binary" 9 "errors" 10 "fmt" 11 "io" 12 "sort" 13 14 "github.com/Schaudge/hts/bgzf" 15 "github.com/Schaudge/hts/bgzf/index" 16 ) 17 18 // ReadFrom reads the CSI index from the given io.Reader. Note that 19 // the csi specification states that the index is stored as BGZF, but 20 // ReadFrom does not perform decompression. 21 func ReadFrom(r io.Reader) (*Index, error) { 22 var ( 23 idx Index 24 magic [3]byte 25 err error 26 ) 27 err = binary.Read(r, binary.LittleEndian, &magic) 28 if err != nil { 29 return nil, err 30 } 31 if magic != csiMagic { 32 return nil, errors.New("csi: magic number mismatch") 33 } 34 version := []byte{0} 35 _, err = io.ReadFull(r, version) 36 if err != nil { 37 return nil, err 38 } 39 idx.Version = version[0] 40 if idx.Version != 0x1 && idx.Version != 0x2 { 41 return nil, fmt.Errorf("csi: unknown version: %d", version[0]) 42 } 43 err = binary.Read(r, binary.LittleEndian, &idx.minShift) 44 if err != nil { 45 return nil, err 46 } 47 if int32(idx.minShift) < 0 { 48 return nil, errors.New("csi: invalid minimum shift value") 49 } 50 err = binary.Read(r, binary.LittleEndian, &idx.depth) 51 if err != nil { 52 return nil, err 53 } 54 if int32(idx.depth) < 0 { 55 return nil, errors.New("csi: invalid index depth value") 56 } 57 var n int32 58 err = binary.Read(r, binary.LittleEndian, &n) 59 if err != nil { 60 return nil, err 61 } 62 if n > 0 { 63 idx.Auxilliary = make([]byte, n) 64 _, err = io.ReadFull(r, idx.Auxilliary) 65 if err != nil { 66 return nil, err 67 } 68 } 69 binLimit := uint32(((1 << ((idx.depth + 1) * nextBinShift)) - 1) / 7) 70 idx.refs, err = readIndices(r, idx.Version, binLimit) 71 if err != nil { 72 return nil, err 73 } 74 var nUnmapped uint64 75 err = binary.Read(r, binary.LittleEndian, &nUnmapped) 76 if err == nil { 77 idx.unmapped = &nUnmapped 78 } else if err != io.EOF { 79 return nil, err 80 } 81 idx.isSorted = true 82 return &idx, nil 83 } 84 85 func readIndices(r io.Reader, version byte, binLimit uint32) ([]refIndex, error) { 86 var n int32 87 err := binary.Read(r, binary.LittleEndian, &n) 88 if err != nil { 89 return nil, err 90 } 91 if n == 0 { 92 return nil, nil 93 } 94 idx := make([]refIndex, n) 95 for i := range idx { 96 idx[i].bins, idx[i].stats, err = readBins(r, version, binLimit) 97 if err != nil { 98 return nil, err 99 } 100 } 101 return idx, nil 102 } 103 104 func readBins(r io.Reader, version byte, binLimit uint32) ([]bin, *index.ReferenceStats, error) { 105 var nBins int32 106 err := binary.Read(r, binary.LittleEndian, &nBins) 107 if err != nil { 108 return nil, nil, err 109 } 110 if nBins == 0 { 111 return nil, nil, nil 112 } 113 if uint32(nBins) > binLimit { 114 return nil, nil, fmt.Errorf("csi: invalid bin count: %d > %d", nBins, binLimit) 115 } 116 var stats *index.ReferenceStats 117 bins := make([]bin, nBins) 118 statsDummyBin := binLimit + 1 119 for i := 0; i < len(bins); i++ { 120 err = binary.Read(r, binary.LittleEndian, &bins[i].bin) 121 if err != nil { 122 return nil, nil, fmt.Errorf("csi: failed to read bin number: %v", err) 123 } 124 var vOff uint64 125 err = binary.Read(r, binary.LittleEndian, &vOff) 126 if err != nil { 127 return nil, nil, fmt.Errorf("csi: failed to read left virtual offset: %v", err) 128 } 129 bins[i].left = makeOffset(vOff) 130 if version == 0x2 { 131 err = binary.Read(r, binary.LittleEndian, &bins[i].records) 132 if err != nil { 133 return nil, nil, fmt.Errorf("csi: failed to read record count: %v", err) 134 } 135 } 136 var nChunks int32 137 err = binary.Read(r, binary.LittleEndian, &nChunks) 138 if err != nil { 139 return nil, nil, fmt.Errorf("csi: failed to read bin count: %v", err) 140 } 141 if bins[i].bin == statsDummyBin { 142 if nChunks != 2 { 143 return nil, nil, errors.New("csi: malformed dummy bin header") 144 } 145 stats, err = readStats(r) 146 if err != nil { 147 return nil, nil, err 148 } 149 bins = bins[:len(bins)-1] 150 i-- 151 continue 152 } 153 bins[i].chunks, err = readChunks(r, nChunks) 154 if err != nil { 155 return nil, nil, err 156 } 157 } 158 if !sort.IsSorted(byBinNumber(bins)) { 159 sort.Sort(byBinNumber(bins)) 160 } 161 return bins, stats, nil 162 } 163 164 func readChunks(r io.Reader, n int32) ([]bgzf.Chunk, error) { 165 if n == 0 { 166 return nil, nil 167 } 168 var ( 169 vOff uint64 170 err error 171 ) 172 chunks := make([]bgzf.Chunk, n) 173 for i := range chunks { 174 err = binary.Read(r, binary.LittleEndian, &vOff) 175 if err != nil { 176 return nil, fmt.Errorf("csi: failed to read chunk begin virtual offset: %v", err) 177 } 178 chunks[i].Begin = makeOffset(vOff) 179 err = binary.Read(r, binary.LittleEndian, &vOff) 180 if err != nil { 181 return nil, fmt.Errorf("csi: failed to read chunk end virtual offset: %v", err) 182 } 183 chunks[i].End = makeOffset(vOff) 184 } 185 if !sort.IsSorted(byBeginOffset(chunks)) { 186 sort.Sort(byBeginOffset(chunks)) 187 } 188 return chunks, nil 189 } 190 191 func readStats(r io.Reader) (*index.ReferenceStats, error) { 192 var ( 193 vOff uint64 194 stats index.ReferenceStats 195 err error 196 ) 197 err = binary.Read(r, binary.LittleEndian, &vOff) 198 if err != nil { 199 return nil, fmt.Errorf("bam: failed to read index stats chunk begin virtual offset: %v", err) 200 } 201 stats.Chunk.Begin = makeOffset(vOff) 202 err = binary.Read(r, binary.LittleEndian, &vOff) 203 if err != nil { 204 return nil, fmt.Errorf("bam: failed to read index stats chunk end virtual offset: %v", err) 205 } 206 stats.Chunk.End = makeOffset(vOff) 207 err = binary.Read(r, binary.LittleEndian, &stats.Mapped) 208 if err != nil { 209 return nil, fmt.Errorf("bam: failed to read index stats mapped count: %v", err) 210 } 211 err = binary.Read(r, binary.LittleEndian, &stats.Unmapped) 212 if err != nil { 213 return nil, fmt.Errorf("bam: failed to read index stats unmapped count: %v", err) 214 } 215 return &stats, nil 216 }