github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/csi/csi_read.go (about)

     1  // Copyright ©2015 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package csi
     6  
     7  import (
     8  	"encoding/binary"
     9  	"errors"
    10  	"fmt"
    11  	"io"
    12  	"sort"
    13  
    14  	"github.com/Schaudge/hts/bgzf"
    15  	"github.com/Schaudge/hts/bgzf/index"
    16  )
    17  
    18  // ReadFrom reads the CSI index from the given io.Reader. Note that
    19  // the csi specification states that the index is stored as BGZF, but
    20  // ReadFrom does not perform decompression.
    21  func ReadFrom(r io.Reader) (*Index, error) {
    22  	var (
    23  		idx   Index
    24  		magic [3]byte
    25  		err   error
    26  	)
    27  	err = binary.Read(r, binary.LittleEndian, &magic)
    28  	if err != nil {
    29  		return nil, err
    30  	}
    31  	if magic != csiMagic {
    32  		return nil, errors.New("csi: magic number mismatch")
    33  	}
    34  	version := []byte{0}
    35  	_, err = io.ReadFull(r, version)
    36  	if err != nil {
    37  		return nil, err
    38  	}
    39  	idx.Version = version[0]
    40  	if idx.Version != 0x1 && idx.Version != 0x2 {
    41  		return nil, fmt.Errorf("csi: unknown version: %d", version[0])
    42  	}
    43  	err = binary.Read(r, binary.LittleEndian, &idx.minShift)
    44  	if err != nil {
    45  		return nil, err
    46  	}
    47  	if int32(idx.minShift) < 0 {
    48  		return nil, errors.New("csi: invalid minimum shift value")
    49  	}
    50  	err = binary.Read(r, binary.LittleEndian, &idx.depth)
    51  	if err != nil {
    52  		return nil, err
    53  	}
    54  	if int32(idx.depth) < 0 {
    55  		return nil, errors.New("csi: invalid index depth value")
    56  	}
    57  	var n int32
    58  	err = binary.Read(r, binary.LittleEndian, &n)
    59  	if err != nil {
    60  		return nil, err
    61  	}
    62  	if n > 0 {
    63  		idx.Auxilliary = make([]byte, n)
    64  		_, err = io.ReadFull(r, idx.Auxilliary)
    65  		if err != nil {
    66  			return nil, err
    67  		}
    68  	}
    69  	binLimit := uint32(((1 << ((idx.depth + 1) * nextBinShift)) - 1) / 7)
    70  	idx.refs, err = readIndices(r, idx.Version, binLimit)
    71  	if err != nil {
    72  		return nil, err
    73  	}
    74  	var nUnmapped uint64
    75  	err = binary.Read(r, binary.LittleEndian, &nUnmapped)
    76  	if err == nil {
    77  		idx.unmapped = &nUnmapped
    78  	} else if err != io.EOF {
    79  		return nil, err
    80  	}
    81  	idx.isSorted = true
    82  	return &idx, nil
    83  }
    84  
    85  func readIndices(r io.Reader, version byte, binLimit uint32) ([]refIndex, error) {
    86  	var n int32
    87  	err := binary.Read(r, binary.LittleEndian, &n)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  	if n == 0 {
    92  		return nil, nil
    93  	}
    94  	idx := make([]refIndex, n)
    95  	for i := range idx {
    96  		idx[i].bins, idx[i].stats, err = readBins(r, version, binLimit)
    97  		if err != nil {
    98  			return nil, err
    99  		}
   100  	}
   101  	return idx, nil
   102  }
   103  
   104  func readBins(r io.Reader, version byte, binLimit uint32) ([]bin, *index.ReferenceStats, error) {
   105  	var nBins int32
   106  	err := binary.Read(r, binary.LittleEndian, &nBins)
   107  	if err != nil {
   108  		return nil, nil, err
   109  	}
   110  	if nBins == 0 {
   111  		return nil, nil, nil
   112  	}
   113  	if uint32(nBins) > binLimit {
   114  		return nil, nil, fmt.Errorf("csi: invalid bin count: %d > %d", nBins, binLimit)
   115  	}
   116  	var stats *index.ReferenceStats
   117  	bins := make([]bin, nBins)
   118  	statsDummyBin := binLimit + 1
   119  	for i := 0; i < len(bins); i++ {
   120  		err = binary.Read(r, binary.LittleEndian, &bins[i].bin)
   121  		if err != nil {
   122  			return nil, nil, fmt.Errorf("csi: failed to read bin number: %v", err)
   123  		}
   124  		var vOff uint64
   125  		err = binary.Read(r, binary.LittleEndian, &vOff)
   126  		if err != nil {
   127  			return nil, nil, fmt.Errorf("csi: failed to read left virtual offset: %v", err)
   128  		}
   129  		bins[i].left = makeOffset(vOff)
   130  		if version == 0x2 {
   131  			err = binary.Read(r, binary.LittleEndian, &bins[i].records)
   132  			if err != nil {
   133  				return nil, nil, fmt.Errorf("csi: failed to read record count: %v", err)
   134  			}
   135  		}
   136  		var nChunks int32
   137  		err = binary.Read(r, binary.LittleEndian, &nChunks)
   138  		if err != nil {
   139  			return nil, nil, fmt.Errorf("csi: failed to read bin count: %v", err)
   140  		}
   141  		if bins[i].bin == statsDummyBin {
   142  			if nChunks != 2 {
   143  				return nil, nil, errors.New("csi: malformed dummy bin header")
   144  			}
   145  			stats, err = readStats(r)
   146  			if err != nil {
   147  				return nil, nil, err
   148  			}
   149  			bins = bins[:len(bins)-1]
   150  			i--
   151  			continue
   152  		}
   153  		bins[i].chunks, err = readChunks(r, nChunks)
   154  		if err != nil {
   155  			return nil, nil, err
   156  		}
   157  	}
   158  	if !sort.IsSorted(byBinNumber(bins)) {
   159  		sort.Sort(byBinNumber(bins))
   160  	}
   161  	return bins, stats, nil
   162  }
   163  
   164  func readChunks(r io.Reader, n int32) ([]bgzf.Chunk, error) {
   165  	if n == 0 {
   166  		return nil, nil
   167  	}
   168  	var (
   169  		vOff uint64
   170  		err  error
   171  	)
   172  	chunks := make([]bgzf.Chunk, n)
   173  	for i := range chunks {
   174  		err = binary.Read(r, binary.LittleEndian, &vOff)
   175  		if err != nil {
   176  			return nil, fmt.Errorf("csi: failed to read chunk begin virtual offset: %v", err)
   177  		}
   178  		chunks[i].Begin = makeOffset(vOff)
   179  		err = binary.Read(r, binary.LittleEndian, &vOff)
   180  		if err != nil {
   181  			return nil, fmt.Errorf("csi: failed to read chunk end virtual offset: %v", err)
   182  		}
   183  		chunks[i].End = makeOffset(vOff)
   184  	}
   185  	if !sort.IsSorted(byBeginOffset(chunks)) {
   186  		sort.Sort(byBeginOffset(chunks))
   187  	}
   188  	return chunks, nil
   189  }
   190  
   191  func readStats(r io.Reader) (*index.ReferenceStats, error) {
   192  	var (
   193  		vOff  uint64
   194  		stats index.ReferenceStats
   195  		err   error
   196  	)
   197  	err = binary.Read(r, binary.LittleEndian, &vOff)
   198  	if err != nil {
   199  		return nil, fmt.Errorf("bam: failed to read index stats chunk begin virtual offset: %v", err)
   200  	}
   201  	stats.Chunk.Begin = makeOffset(vOff)
   202  	err = binary.Read(r, binary.LittleEndian, &vOff)
   203  	if err != nil {
   204  		return nil, fmt.Errorf("bam: failed to read index stats chunk end virtual offset: %v", err)
   205  	}
   206  	stats.Chunk.End = makeOffset(vOff)
   207  	err = binary.Read(r, binary.LittleEndian, &stats.Mapped)
   208  	if err != nil {
   209  		return nil, fmt.Errorf("bam: failed to read index stats mapped count: %v", err)
   210  	}
   211  	err = binary.Read(r, binary.LittleEndian, &stats.Unmapped)
   212  	if err != nil {
   213  		return nil, fmt.Errorf("bam: failed to read index stats unmapped count: %v", err)
   214  	}
   215  	return &stats, nil
   216  }