github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/generational_chunk_store.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package nbs 16 17 import ( 18 "context" 19 "fmt" 20 "io" 21 "path/filepath" 22 "strings" 23 "sync" 24 25 "github.com/dolthub/dolt/go/store/chunks" 26 "github.com/dolthub/dolt/go/store/hash" 27 ) 28 29 var _ chunks.ChunkStore = (*GenerationalNBS)(nil) 30 var _ chunks.GenerationalCS = (*GenerationalNBS)(nil) 31 var _ chunks.TableFileStore = (*GenerationalNBS)(nil) 32 33 type GenerationalNBS struct { 34 oldGen *NomsBlockStore 35 newGen *NomsBlockStore 36 ghostGen *GhostBlockStore 37 } 38 39 func (gcs *GenerationalNBS) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error { 40 if gcs.ghostGen == nil { 41 return gcs.ghostGen.PersistGhostHashes(ctx, refs) 42 } 43 return fmt.Errorf("runtime error. ghostGen is nil but an attempt to persist ghost hashes was made") 44 } 45 46 func (gcs *GenerationalNBS) GhostGen() chunks.ChunkStore { 47 return gcs.ghostGen 48 } 49 50 func NewGenerationalCS(oldGen, newGen *NomsBlockStore, ghostGen *GhostBlockStore) *GenerationalNBS { 51 if oldGen.Version() != "" && oldGen.Version() != newGen.Version() { 52 panic("oldgen and newgen chunkstore versions vary") 53 } 54 55 return &GenerationalNBS{ 56 oldGen: oldGen, 57 newGen: newGen, 58 ghostGen: ghostGen, 59 } 60 } 61 62 func (gcs *GenerationalNBS) NewGen() chunks.ChunkStoreGarbageCollector { 63 return gcs.newGen 64 } 65 66 func (gcs *GenerationalNBS) OldGen() chunks.ChunkStoreGarbageCollector { 67 return gcs.oldGen 68 } 69 70 // Get the Chunk for the value of the hash in the store. If the hash is absent from the store EmptyChunk is returned. 71 func (gcs *GenerationalNBS) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, error) { 72 c, err := gcs.oldGen.Get(ctx, h) 73 74 if err != nil { 75 return chunks.EmptyChunk, err 76 } 77 78 if c.IsEmpty() { 79 c, err = gcs.newGen.Get(ctx, h) 80 } 81 if err != nil { 82 return chunks.EmptyChunk, err 83 } 84 85 if c.IsEmpty() && gcs.ghostGen != nil { 86 c, err = gcs.ghostGen.Get(ctx, h) 87 if err != nil { 88 return chunks.EmptyChunk, err 89 } 90 } 91 92 return c, nil 93 } 94 95 // GetMany gets the Chunks with |hashes| from the store. On return, |foundChunks| will have been fully sent all chunks 96 // which have been found. Any non-present chunks will silently be ignored. 97 func (gcs *GenerationalNBS) GetMany(ctx context.Context, hashes hash.HashSet, found func(context.Context, *chunks.Chunk)) error { 98 mu := &sync.Mutex{} 99 notFound := hashes.Copy() 100 err := gcs.oldGen.GetMany(ctx, hashes, func(ctx context.Context, chunk *chunks.Chunk) { 101 func() { 102 mu.Lock() 103 defer mu.Unlock() 104 delete(notFound, chunk.Hash()) 105 }() 106 107 found(ctx, chunk) 108 }) 109 if err != nil { 110 return err 111 } 112 if len(notFound) == 0 { 113 return nil 114 } 115 116 err = gcs.newGen.GetMany(ctx, notFound, func(ctx context.Context, chunk *chunks.Chunk) { 117 func() { 118 mu.Lock() 119 defer mu.Unlock() 120 delete(notFound, chunk.Hash()) 121 }() 122 123 found(ctx, chunk) 124 }) 125 if err != nil { 126 return err 127 } 128 if len(notFound) == 0 { 129 return nil 130 } 131 132 // Last ditch effort to see if the requested objects are commits we've decided to ignore. Note the function spec 133 // considers non-present chunks to be silently ignored, so we don't need to return an error here 134 if gcs.ghostGen == nil { 135 return nil 136 } 137 return gcs.ghostGen.GetMany(ctx, notFound, found) 138 } 139 140 func (gcs *GenerationalNBS) GetManyCompressed(ctx context.Context, hashes hash.HashSet, found func(context.Context, CompressedChunk)) error { 141 mu := &sync.Mutex{} 142 notInOldGen := hashes.Copy() 143 err := gcs.oldGen.GetManyCompressed(ctx, hashes, func(ctx context.Context, chunk CompressedChunk) { 144 func() { 145 mu.Lock() 146 defer mu.Unlock() 147 delete(notInOldGen, chunk.Hash()) 148 }() 149 150 found(ctx, chunk) 151 }) 152 153 if err != nil { 154 return err 155 } 156 157 if len(notInOldGen) == 0 { 158 return nil 159 } 160 161 return gcs.newGen.GetManyCompressed(ctx, notInOldGen, found) 162 } 163 164 // Has returns true iff the value at the address |h| is contained in the store 165 func (gcs *GenerationalNBS) Has(ctx context.Context, h hash.Hash) (bool, error) { 166 has, err := gcs.oldGen.Has(ctx, h) 167 if err != nil || has { 168 return has, err 169 } 170 171 has, err = gcs.newGen.Has(ctx, h) 172 if err != nil || has { 173 return has, err 174 } 175 176 // Possibly a truncated commit. 177 if gcs.ghostGen != nil { 178 has, err = gcs.ghostGen.Has(ctx, h) 179 if err != nil { 180 return has, err 181 } 182 } 183 return has, nil 184 } 185 186 // HasMany returns a new HashSet containing any members of |hashes| that are absent from the store. 187 func (gcs *GenerationalNBS) HasMany(ctx context.Context, hashes hash.HashSet) (absent hash.HashSet, err error) { 188 gcs.newGen.mu.RLock() 189 defer gcs.newGen.mu.RUnlock() 190 return gcs.hasMany(toHasRecords(hashes)) 191 } 192 193 func (gcs *GenerationalNBS) hasMany(recs []hasRecord) (absent hash.HashSet, err error) { 194 absent, err = gcs.newGen.hasMany(recs) 195 if err != nil { 196 return nil, err 197 } else if len(absent) == 0 { 198 return absent, nil 199 } 200 201 absent, err = func() (hash.HashSet, error) { 202 gcs.oldGen.mu.RLock() 203 defer gcs.oldGen.mu.RUnlock() 204 return gcs.oldGen.hasMany(recs) 205 }() 206 if err != nil { 207 return nil, err 208 } 209 210 if len(absent) == 0 || gcs.ghostGen == nil { 211 return absent, nil 212 } 213 214 return gcs.ghostGen.hasMany(absent) 215 } 216 217 // Put caches c in the ChunkSource. Upon return, c must be visible to 218 // subsequent Get and Has calls, but must not be persistent until a call 219 // to Flush(). Put may be called concurrently with other calls to Put(), 220 // Get(), GetMany(), Has() and HasMany(). 221 func (gcs *GenerationalNBS) Put(ctx context.Context, c chunks.Chunk, getAddrs chunks.GetAddrsCurry) error { 222 return gcs.newGen.putChunk(ctx, c, getAddrs, gcs.hasMany) 223 } 224 225 // Returns the NomsBinFormat with which this ChunkSource is compatible. 226 func (gcs *GenerationalNBS) Version() string { 227 return gcs.newGen.Version() 228 } 229 230 func (gcs *GenerationalNBS) AccessMode() chunks.ExclusiveAccessMode { 231 newGenMode := gcs.newGen.AccessMode() 232 oldGenMode := gcs.oldGen.AccessMode() 233 if oldGenMode > newGenMode { 234 return oldGenMode 235 } 236 return newGenMode 237 } 238 239 // Rebase brings this ChunkStore into sync with the persistent storage's 240 // current root. 241 func (gcs *GenerationalNBS) Rebase(ctx context.Context) error { 242 oErr := gcs.oldGen.Rebase(ctx) 243 nErr := gcs.newGen.Rebase(ctx) 244 245 if oErr != nil { 246 return oErr 247 } 248 249 return nErr 250 } 251 252 // Root returns the root of the database as of the time the ChunkStore 253 // was opened or the most recent call to Rebase. 254 func (gcs *GenerationalNBS) Root(ctx context.Context) (hash.Hash, error) { 255 return gcs.newGen.Root(ctx) 256 } 257 258 // Commit atomically attempts to persist all novel Chunks and update the 259 // persisted root hash from last to current (or keeps it the same). 260 // If last doesn't match the root in persistent storage, returns false. 261 func (gcs *GenerationalNBS) Commit(ctx context.Context, current, last hash.Hash) (bool, error) { 262 return gcs.newGen.commit(ctx, current, last, gcs.hasMany) 263 } 264 265 // Stats may return some kind of struct that reports statistics about the 266 // ChunkStore instance. The type is implementation-dependent, and impls 267 // may return nil 268 func (gcs *GenerationalNBS) Stats() interface{} { 269 return nil 270 } 271 272 // StatsSummary may return a string containing summarized statistics for 273 // this ChunkStore. It must return "Unsupported" if this operation is not 274 // supported. 275 func (gcs *GenerationalNBS) StatsSummary() string { 276 var sb strings.Builder 277 sb.WriteString("New Gen: \n\t") 278 sb.WriteString(gcs.newGen.StatsSummary()) 279 sb.WriteString("\nOld Gen: \n\t") 280 sb.WriteString(gcs.oldGen.StatsSummary()) 281 return sb.String() 282 } 283 284 // Close tears down any resources in use by the implementation. After 285 // Close(), the ChunkStore may not be used again. It is NOT SAFE to call 286 // Close() concurrently with any other ChunkStore method; behavior is 287 // undefined and probably crashy. 288 func (gcs *GenerationalNBS) Close() error { 289 oErr := gcs.oldGen.Close() 290 nErr := gcs.newGen.Close() 291 292 if oErr != nil { 293 return oErr 294 } 295 296 return nErr 297 } 298 299 func (gcs *GenerationalNBS) copyToOldGen(ctx context.Context, hashes hash.HashSet) error { 300 notInOldGen, err := gcs.oldGen.HasMany(ctx, hashes) 301 302 if err != nil { 303 return err 304 } 305 306 var putErr error 307 err = gcs.newGen.GetMany(ctx, notInOldGen, func(ctx context.Context, chunk *chunks.Chunk) { 308 if putErr == nil { 309 putErr = gcs.oldGen.Put(ctx, *chunk, func(c chunks.Chunk) chunks.GetAddrsCb { 310 return func(ctx context.Context, addrs hash.HashSet, _ chunks.PendingRefExists) error { return nil } 311 }) 312 } 313 }) 314 315 if putErr != nil { 316 return putErr 317 } 318 319 return err 320 } 321 322 type prefixedTableFile struct { 323 chunks.TableFile 324 prefix string 325 } 326 327 func (p prefixedTableFile) LocationPrefix() string { 328 return p.prefix + "/" 329 } 330 331 // Sources retrieves the current root hash, a list of all the table files (which may include appendix table files), 332 // and a second list containing only appendix table files for both the old gen and new gen stores. 333 func (gcs *GenerationalNBS) Sources(ctx context.Context) (hash.Hash, []chunks.TableFile, []chunks.TableFile, error) { 334 root, tFiles, appFiles, err := gcs.newGen.Sources(ctx) 335 if err != nil { 336 return hash.Hash{}, nil, nil, err 337 } 338 339 _, oldTFiles, oldAppFiles, err := gcs.oldGen.Sources(ctx) 340 if err != nil { 341 return hash.Hash{}, nil, nil, err 342 } 343 344 prefix := gcs.RelativeOldGenPath() 345 346 for _, tf := range oldTFiles { 347 tFiles = append(tFiles, prefixedTableFile{tf, prefix}) 348 } 349 for _, tf := range oldAppFiles { 350 appFiles = append(appFiles, prefixedTableFile{tf, prefix}) 351 } 352 353 return root, tFiles, appFiles, nil 354 } 355 356 // Size returns the total size, in bytes, of the table files in the new and old gen stores combined 357 func (gcs *GenerationalNBS) Size(ctx context.Context) (uint64, error) { 358 oldSize, err := gcs.oldGen.Size(ctx) 359 360 if err != nil { 361 return 0, err 362 } 363 364 newSize, err := gcs.newGen.Size(ctx) 365 366 if err != nil { 367 return 0, err 368 } 369 370 return oldSize + newSize, nil 371 } 372 373 // WriteTableFile will read a table file from the provided reader and write it to the new gen TableFileStore 374 func (gcs *GenerationalNBS) WriteTableFile(ctx context.Context, fileId string, numChunks int, contentHash []byte, getRd func() (io.ReadCloser, uint64, error)) error { 375 return gcs.newGen.WriteTableFile(ctx, fileId, numChunks, contentHash, getRd) 376 } 377 378 // AddTableFilesToManifest adds table files to the manifest of the newgen cs 379 func (gcs *GenerationalNBS) AddTableFilesToManifest(ctx context.Context, fileIdToNumChunks map[string]int) error { 380 return gcs.newGen.AddTableFilesToManifest(ctx, fileIdToNumChunks) 381 } 382 383 // PruneTableFiles deletes old table files that are no longer referenced in the manifest of the new or old gen chunkstores 384 func (gcs *GenerationalNBS) PruneTableFiles(ctx context.Context) error { 385 err := gcs.oldGen.pruneTableFiles(ctx, gcs.hasMany) 386 387 if err != nil { 388 return err 389 } 390 391 return gcs.newGen.pruneTableFiles(ctx, gcs.hasMany) 392 } 393 394 // SetRootChunk changes the root chunk hash from the previous value to the new root for the newgen cs 395 func (gcs *GenerationalNBS) SetRootChunk(ctx context.Context, root, previous hash.Hash) error { 396 return gcs.newGen.setRootChunk(ctx, root, previous, gcs.hasMany) 397 } 398 399 // SupportedOperations returns a description of the support TableFile operations. Some stores only support reading table files, not writing. 400 func (gcs *GenerationalNBS) SupportedOperations() chunks.TableFileStoreOps { 401 return gcs.newGen.SupportedOperations() 402 } 403 404 func (gcs *GenerationalNBS) GetChunkLocationsWithPaths(hashes hash.HashSet) (map[string]map[hash.Hash]Range, error) { 405 res, err := gcs.newGen.GetChunkLocationsWithPaths(hashes) 406 if err != nil { 407 return nil, err 408 } 409 if len(hashes) > 0 { 410 prefix := gcs.RelativeOldGenPath() 411 toadd, err := gcs.oldGen.GetChunkLocationsWithPaths(hashes) 412 if err != nil { 413 return nil, err 414 } 415 for k, v := range toadd { 416 res[filepath.ToSlash(filepath.Join(prefix, k))] = v 417 } 418 } 419 return res, nil 420 } 421 422 func (gcs *GenerationalNBS) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash]map[hash.Hash]Range, error) { 423 res, err := gcs.newGen.GetChunkLocations(hashes) 424 if err != nil { 425 return nil, err 426 } 427 if len(hashes) > 0 { 428 toadd, err := gcs.oldGen.GetChunkLocations(hashes) 429 if err != nil { 430 return nil, err 431 } 432 for k, v := range toadd { 433 res[k] = v 434 } 435 } 436 return res, nil 437 } 438 439 func (gcs *GenerationalNBS) RelativeOldGenPath() string { 440 newgenpath, ngpok := gcs.newGen.Path() 441 oldgenpath, ogpok := gcs.oldGen.Path() 442 if ngpok && ogpok { 443 if p, err := filepath.Rel(newgenpath, oldgenpath); err == nil { 444 return p 445 } 446 } 447 return "oldgen" 448 } 449 450 func (gcs *GenerationalNBS) Path() (string, bool) { 451 return gcs.newGen.Path() 452 } 453 454 func (gcs *GenerationalNBS) UpdateManifest(ctx context.Context, updates map[hash.Hash]uint32) (mi ManifestInfo, err error) { 455 return gcs.newGen.UpdateManifest(ctx, updates) 456 }