github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/index_transformer.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"encoding/binary"
    19  	"errors"
    20  	"io"
    21  
    22  	"github.com/dolthub/dolt/go/store/hash"
    23  )
    24  
    25  var (
    26  	ErrNotEnoughBytes = errors.New("reader did not return enough bytes")
    27  )
    28  
    29  func NewIndexTransformer(src io.Reader, chunkCount int) io.Reader {
    30  	tuplesSize := chunkCount * prefixTupleSize
    31  	lengthsSize := chunkCount * lengthSize
    32  	suffixesSize := chunkCount * hash.SuffixLen
    33  
    34  	tupleReader := io.LimitReader(src, int64(tuplesSize))
    35  	lengthsReader := io.LimitReader(src, int64(lengthsSize))
    36  	suffixesReader := io.LimitReader(src, int64(suffixesSize))
    37  
    38  	return io.MultiReader(
    39  		tupleReader,
    40  		NewOffsetsReader(lengthsReader),
    41  		suffixesReader,
    42  	)
    43  }
    44  
    45  // OffsetsReader transforms a byte stream of table file lengths
    46  // into a byte stream of table file offsets
    47  type OffsetsReader struct {
    48  	lengthsReader io.Reader
    49  	offset        uint64
    50  }
    51  
    52  func NewOffsetsReader(lengthsReader io.Reader) *OffsetsReader {
    53  	return &OffsetsReader{
    54  		lengthsReader: lengthsReader,
    55  	}
    56  }
    57  
    58  func (tra *OffsetsReader) Read(p []byte) (n int, err error) {
    59  
    60  	// Read as many lengths, as offsets we can fit into p. Which is half.
    61  	// Below assumes that lengthSize * 2 = offsetSize
    62  
    63  	// Strategy is to first read lengths into the second half of p
    64  	// Then, while iterating the lengths, compute the current offset,
    65  	// and write it to the beginning of p.
    66  
    67  	// Align p
    68  	rem := len(p) % offsetSize
    69  	p = p[:len(p)-rem]
    70  
    71  	// Read lengths into second half of p
    72  	secondHalf := p[len(p)/2:]
    73  	n, err = tra.lengthsReader.Read(secondHalf)
    74  	if err != nil {
    75  		return 0, err
    76  	}
    77  	if n%lengthSize != 0 {
    78  		return 0, ErrNotEnoughBytes
    79  	}
    80  
    81  	// Iterate lengths in second half of p while writing offsets starting from the beginning.
    82  	// On the last iteration, we overwrite the last length with the final offset.
    83  	for l, r := 0, 0; r < n; l, r = l+offsetSize, r+lengthSize {
    84  		lengthBytes := secondHalf[r : r+lengthSize]
    85  		length := binary.BigEndian.Uint32(lengthBytes)
    86  		tra.offset += uint64(length)
    87  
    88  		offsetBytes := p[l : l+offsetSize]
    89  		binary.BigEndian.PutUint64(offsetBytes, tra.offset)
    90  	}
    91  
    92  	return n * 2, nil
    93  }