github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/index_transformer.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "encoding/binary" 19 "errors" 20 "io" 21 22 "github.com/dolthub/dolt/go/store/hash" 23 ) 24 25 var ( 26 ErrNotEnoughBytes = errors.New("reader did not return enough bytes") 27 ) 28 29 func NewIndexTransformer(src io.Reader, chunkCount int) io.Reader { 30 tuplesSize := chunkCount * prefixTupleSize 31 lengthsSize := chunkCount * lengthSize 32 suffixesSize := chunkCount * hash.SuffixLen 33 34 tupleReader := io.LimitReader(src, int64(tuplesSize)) 35 lengthsReader := io.LimitReader(src, int64(lengthsSize)) 36 suffixesReader := io.LimitReader(src, int64(suffixesSize)) 37 38 return io.MultiReader( 39 tupleReader, 40 NewOffsetsReader(lengthsReader), 41 suffixesReader, 42 ) 43 } 44 45 // OffsetsReader transforms a byte stream of table file lengths 46 // into a byte stream of table file offsets 47 type OffsetsReader struct { 48 lengthsReader io.Reader 49 offset uint64 50 } 51 52 func NewOffsetsReader(lengthsReader io.Reader) *OffsetsReader { 53 return &OffsetsReader{ 54 lengthsReader: lengthsReader, 55 } 56 } 57 58 func (tra *OffsetsReader) Read(p []byte) (n int, err error) { 59 60 // Read as many lengths, as offsets we can fit into p. Which is half. 61 // Below assumes that lengthSize * 2 = offsetSize 62 63 // Strategy is to first read lengths into the second half of p 64 // Then, while iterating the lengths, compute the current offset, 65 // and write it to the beginning of p. 66 67 // Align p 68 rem := len(p) % offsetSize 69 p = p[:len(p)-rem] 70 71 // Read lengths into second half of p 72 secondHalf := p[len(p)/2:] 73 n, err = tra.lengthsReader.Read(secondHalf) 74 if err != nil { 75 return 0, err 76 } 77 if n%lengthSize != 0 { 78 return 0, ErrNotEnoughBytes 79 } 80 81 // Iterate lengths in second half of p while writing offsets starting from the beginning. 82 // On the last iteration, we overwrite the last length with the final offset. 83 for l, r := 0, 0; r < n; l, r = l+offsetSize, r+lengthSize { 84 lengthBytes := secondHalf[r : r+lengthSize] 85 length := binary.BigEndian.Uint32(lengthBytes) 86 tra.offset += uint64(length) 87 88 offsetBytes := p[l : l+offsetSize] 89 binary.BigEndian.PutUint64(offsetBytes, tra.offset) 90 } 91 92 return n * 2, nil 93 }