github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/journal_chunk_source.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "io" 22 "sort" 23 "sync" 24 25 "golang.org/x/sync/errgroup" 26 27 "github.com/dolthub/dolt/go/store/chunks" 28 "github.com/dolthub/dolt/go/store/hash" 29 ) 30 31 // journalChunkSource is a chunkSource that reads chunks 32 // from a ChunkJournal. Unlike other NBS chunkSources, 33 // it is not immutable and its set of chunks grows as 34 // more commits are made to the ChunkJournal. 35 type journalChunkSource struct { 36 journal *journalWriter 37 } 38 39 var _ chunkSource = journalChunkSource{} 40 41 func (s journalChunkSource) has(h hash.Hash) (bool, error) { 42 return s.journal.hasAddr(h), nil 43 } 44 45 func (s journalChunkSource) hasMany(addrs []hasRecord) (missing bool, err error) { 46 for i := range addrs { 47 ok := s.journal.hasAddr(*addrs[i].a) 48 if ok { 49 addrs[i].has = true 50 } else { 51 missing = true 52 } 53 } 54 return 55 } 56 57 func (s journalChunkSource) getCompressed(_ context.Context, h hash.Hash, _ *Stats) (CompressedChunk, error) { 58 return s.journal.getCompressedChunk(h) 59 } 60 61 func (s journalChunkSource) get(_ context.Context, h hash.Hash, _ *Stats) ([]byte, error) { 62 cc, err := s.journal.getCompressedChunk(h) 63 if err != nil { 64 return nil, err 65 } else if cc.IsEmpty() { 66 return nil, nil 67 } 68 ch, err := cc.ToChunk() 69 if err != nil { 70 return nil, err 71 } 72 return ch.Data(), nil 73 } 74 75 type journalRecord struct { 76 // r is the journal range for this chunk 77 r Range 78 // idx is the array offset into the shared |reqs| 79 idx int 80 } 81 82 func (s journalChunkSource) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, *chunks.Chunk), stats *Stats) (bool, error) { 83 return s.getManyCompressed(ctx, eg, reqs, func(ctx context.Context, cc CompressedChunk) { 84 ch, err := cc.ToChunk() 85 if err != nil { 86 eg.Go(func() error { 87 return err 88 }) 89 return 90 } 91 chWHash := chunks.NewChunkWithHash(cc.Hash(), ch.Data()) 92 found(ctx, &chWHash) 93 }, stats) 94 } 95 96 // getManyCompressed implements chunkReader. Here we (1) synchronously check 97 // the journal index for read ranges, (2) record if the source misses any 98 // needed remaining chunks, (3) sort the lookups for efficient disk access, 99 // and then (4) asynchronously perform reads. We release the journal read 100 // lock after returning when all reads are completed, which can be after the 101 // function returns. 102 func (s journalChunkSource) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) { 103 var remaining bool 104 var jReqs []journalRecord 105 var wg sync.WaitGroup 106 s.journal.lock.RLock() 107 for i, r := range reqs { 108 if r.found { 109 continue 110 } 111 rang, ok := s.journal.ranges.get(*r.a) 112 if !ok { 113 remaining = true 114 continue 115 } 116 jReqs = append(jReqs, journalRecord{r: rang, idx: i}) 117 reqs[i].found = true 118 } 119 120 // sort chunks by journal locality 121 sort.Slice(jReqs, func(i, j int) bool { 122 return jReqs[i].r.Offset < jReqs[j].r.Offset 123 }) 124 125 for i := range jReqs { 126 // workers populate the parent error group 127 // record local workers for releasing lock 128 wg.Add(1) 129 eg.Go(func() error { 130 defer wg.Done() 131 rec := jReqs[i] 132 a := reqs[rec.idx].a 133 if cc, err := s.journal.getCompressedChunkAtRange(rec.r, *a); err != nil { 134 return err 135 } else if cc.IsEmpty() { 136 return errors.New("chunk in journal index was empty.") 137 } else { 138 found(ctx, cc) 139 return nil 140 } 141 }) 142 } 143 go func() { 144 wg.Wait() 145 s.journal.lock.RUnlock() 146 }() 147 return remaining, nil 148 } 149 150 func (s journalChunkSource) count() (uint32, error) { 151 return s.journal.recordCount(), nil 152 } 153 154 func (s journalChunkSource) uncompressedLen() (uint64, error) { 155 return s.journal.uncompressedSize(), nil 156 } 157 158 func (s journalChunkSource) hash() hash.Hash { 159 return journalAddr 160 } 161 162 // reader implements chunkSource. 163 func (s journalChunkSource) reader(context.Context) (io.ReadCloser, uint64, error) { 164 rdr, sz, err := s.journal.snapshot() 165 return rdr, uint64(sz), err 166 } 167 168 func (s journalChunkSource) getRecordRanges(requests []getRecord) (map[hash.Hash]Range, error) { 169 ranges := make(map[hash.Hash]Range, len(requests)) 170 for _, req := range requests { 171 if req.found { 172 continue 173 } 174 rng, ok, err := s.journal.getRange(*req.a) 175 if err != nil { 176 return nil, err 177 } else if !ok { 178 continue 179 } 180 req.found = true // update |requests| 181 ranges[hash.Hash(*req.a)] = rng 182 } 183 return ranges, nil 184 } 185 186 // size implements chunkSource. 187 // size returns the total size of the chunkSource: chunks, index, and footer 188 func (s journalChunkSource) currentSize() uint64 { 189 return uint64(s.journal.currentSize()) 190 } 191 192 // index implements chunkSource. 193 func (s journalChunkSource) index() (tableIndex, error) { 194 return nil, fmt.Errorf("journalChunkSource cannot be conjoined") 195 } 196 197 func (s journalChunkSource) clone() (chunkSource, error) { 198 return s, nil 199 } 200 201 func (s journalChunkSource) close() error { 202 // |s.journal| closed via ChunkJournal 203 return nil 204 } 205 206 func equalSpecs(left, right []tableSpec) bool { 207 if len(left) != len(right) { 208 return false 209 } 210 l := make(map[hash.Hash]struct{}, len(left)) 211 for _, s := range left { 212 l[s.name] = struct{}{} 213 } 214 for _, s := range right { 215 if _, ok := l[s.name]; !ok { 216 return false 217 } 218 } 219 return true 220 }