github.com/Schaudge/hts@v0.0.0-20240223063651-737b4d69d68c/internal/index_read.go (about)

     1  // Copyright ©2014 The bíogo Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package internal
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  	"io"
    11  	"sort"
    12  
    13  	"github.com/Schaudge/hts/bgzf"
    14  )
    15  
    16  // ReadIndex reads the Index from the given io.Reader.
    17  func ReadIndex(r io.Reader, n int32, typ string) (Index, error) {
    18  	var (
    19  		idx Index
    20  		err error
    21  	)
    22  	idx.Refs, err = readIndices(r, n, typ)
    23  	if err != nil {
    24  		return idx, err
    25  	}
    26  	var nUnmapped uint64
    27  	err = binary.Read(r, binary.LittleEndian, &nUnmapped)
    28  	if err == nil {
    29  		idx.Unmapped = &nUnmapped
    30  	} else if err != io.EOF {
    31  		return idx, err
    32  	}
    33  	idx.IsSorted = true
    34  
    35  	// Set the index of the last record to max int to
    36  	// prevent addition of records out of order. This
    37  	// means that the only way to append to an index is
    38  	// to re-index and add to that created index.
    39  	// TODO(kortschak) See if index appending is feasible
    40  	// and needed.
    41  	idx.LastRecord = int(^uint(0) >> 1)
    42  
    43  	return idx, nil
    44  }
    45  
    46  func readIndices(r io.Reader, n int32, typ string) ([]RefIndex, error) {
    47  	var err error
    48  	idx := make([]RefIndex, n)
    49  	for i := range idx {
    50  		idx[i].Bins, idx[i].Stats, err = readBins(r, typ)
    51  		if err != nil {
    52  			return nil, err
    53  		}
    54  		idx[i].Intervals, err = readIntervals(r, typ)
    55  		if err != nil {
    56  			return nil, err
    57  		}
    58  	}
    59  	return idx, nil
    60  }
    61  
    62  func readBins(r io.Reader, typ string) ([]Bin, *ReferenceStats, error) {
    63  	var n int32
    64  	err := binary.Read(r, binary.LittleEndian, &n)
    65  	if err != nil {
    66  		return nil, nil, err
    67  	}
    68  	if n == 0 {
    69  		return nil, nil, nil
    70  	}
    71  	var stats *ReferenceStats
    72  	bins := make([]Bin, n)
    73  	for i := 0; i < len(bins); i++ {
    74  		err = binary.Read(r, binary.LittleEndian, &bins[i].Bin)
    75  		if err != nil {
    76  			return nil, nil, fmt.Errorf("%s: failed to read bin number: %v", typ, err)
    77  		}
    78  		err = binary.Read(r, binary.LittleEndian, &n)
    79  		if err != nil {
    80  			return nil, nil, fmt.Errorf("%s: failed to read bin count: %v", typ, err)
    81  		}
    82  		if bins[i].Bin == StatsDummyBin {
    83  			if n != 2 {
    84  				return nil, nil, fmt.Errorf("%s: malformed dummy bin header", typ)
    85  			}
    86  			stats, err = readStats(r, typ)
    87  			if err != nil {
    88  				return nil, nil, err
    89  			}
    90  			bins = bins[:len(bins)-1]
    91  			i--
    92  			continue
    93  		}
    94  		bins[i].Chunks, err = readChunks(r, n, typ)
    95  		if err != nil {
    96  			return nil, nil, err
    97  		}
    98  	}
    99  	if !sort.IsSorted(byBinNumber(bins)) {
   100  		sort.Sort(byBinNumber(bins))
   101  	}
   102  	return bins, stats, nil
   103  }
   104  
   105  func readChunks(r io.Reader, n int32, typ string) ([]bgzf.Chunk, error) {
   106  	if n == 0 {
   107  		return nil, nil
   108  	}
   109  	chunks := make([]bgzf.Chunk, n)
   110  	var buf [16]byte
   111  	for i := range chunks {
   112  		// Get the begin and end offset in a single read.
   113  		_, err := io.ReadFull(r, buf[:])
   114  		if err != nil {
   115  			return nil, fmt.Errorf("%s: failed to read chunk virtual offset: %v", typ, err)
   116  		}
   117  		chunks[i].Begin = makeOffset(binary.LittleEndian.Uint64(buf[:8]))
   118  		chunks[i].End = makeOffset(binary.LittleEndian.Uint64(buf[8:]))
   119  	}
   120  	if !sort.IsSorted(byBeginOffset(chunks)) {
   121  		sort.Sort(byBeginOffset(chunks))
   122  	}
   123  	return chunks, nil
   124  }
   125  
   126  func readStats(r io.Reader, typ string) (*ReferenceStats, error) {
   127  	var (
   128  		vOff  uint64
   129  		stats ReferenceStats
   130  		err   error
   131  	)
   132  	err = binary.Read(r, binary.LittleEndian, &vOff)
   133  	if err != nil {
   134  		return nil, fmt.Errorf("%s: failed to read index stats chunk begin virtual offset: %v", typ, err)
   135  	}
   136  	stats.Chunk.Begin = makeOffset(vOff)
   137  	err = binary.Read(r, binary.LittleEndian, &vOff)
   138  	if err != nil {
   139  		return nil, fmt.Errorf("%s: failed to read index stats chunk end virtual offset: %v", typ, err)
   140  	}
   141  	stats.Chunk.End = makeOffset(vOff)
   142  	err = binary.Read(r, binary.LittleEndian, &stats.Mapped)
   143  	if err != nil {
   144  		return nil, fmt.Errorf("%s: failed to read index stats mapped count: %v", typ, err)
   145  	}
   146  	err = binary.Read(r, binary.LittleEndian, &stats.Unmapped)
   147  	if err != nil {
   148  		return nil, fmt.Errorf("%s: failed to read index stats unmapped count: %v", typ, err)
   149  	}
   150  	return &stats, nil
   151  }
   152  
   153  func readIntervals(r io.Reader, typ string) ([]bgzf.Offset, error) {
   154  	var n int32
   155  	err := binary.Read(r, binary.LittleEndian, &n)
   156  	if err != nil {
   157  		return nil, err
   158  	}
   159  	if n == 0 {
   160  		return nil, nil
   161  	}
   162  	offsets := make([]bgzf.Offset, n)
   163  	// chunkSize determines the number of offsets consumed by each binary.Read.
   164  	const chunkSize = 512
   165  	var vOffs [chunkSize]uint64
   166  	for i := 0; i < int(n); i += chunkSize {
   167  		l := min(int(n)-i, len(vOffs))
   168  		err = binary.Read(r, binary.LittleEndian, vOffs[:l])
   169  		if err != nil {
   170  			return nil, fmt.Errorf("%s: failed to read tile interval virtual offset: %v", typ, err)
   171  		}
   172  		for k := 0; k < l; k++ {
   173  			offsets[i+k] = makeOffset(vOffs[k])
   174  		}
   175  	}
   176  
   177  	if !sort.IsSorted(byVirtOffset(offsets)) {
   178  		sort.Sort(byVirtOffset(offsets))
   179  	}
   180  	return offsets, nil
   181  }
   182  
   183  func min(a, b int) int {
   184  	if a < b {
   185  		return a
   186  	}
   187  	return b
   188  }