github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/generational_chunk_store.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"io"
    21  	"path/filepath"
    22  	"strings"
    23  	"sync"
    24  
    25  	"github.com/dolthub/dolt/go/store/chunks"
    26  	"github.com/dolthub/dolt/go/store/hash"
    27  )
    28  
    29  var _ chunks.ChunkStore = (*GenerationalNBS)(nil)
    30  var _ chunks.GenerationalCS = (*GenerationalNBS)(nil)
    31  var _ chunks.TableFileStore = (*GenerationalNBS)(nil)
    32  
    33  type GenerationalNBS struct {
    34  	oldGen   *NomsBlockStore
    35  	newGen   *NomsBlockStore
    36  	ghostGen *GhostBlockStore
    37  }
    38  
    39  func (gcs *GenerationalNBS) PersistGhostHashes(ctx context.Context, refs hash.HashSet) error {
    40  	if gcs.ghostGen == nil {
    41  		return gcs.ghostGen.PersistGhostHashes(ctx, refs)
    42  	}
    43  	return fmt.Errorf("runtime error. ghostGen is nil but an attempt to persist ghost hashes was made")
    44  }
    45  
    46  func (gcs *GenerationalNBS) GhostGen() chunks.ChunkStore {
    47  	return gcs.ghostGen
    48  }
    49  
    50  func NewGenerationalCS(oldGen, newGen *NomsBlockStore, ghostGen *GhostBlockStore) *GenerationalNBS {
    51  	if oldGen.Version() != "" && oldGen.Version() != newGen.Version() {
    52  		panic("oldgen and newgen chunkstore versions vary")
    53  	}
    54  
    55  	return &GenerationalNBS{
    56  		oldGen:   oldGen,
    57  		newGen:   newGen,
    58  		ghostGen: ghostGen,
    59  	}
    60  }
    61  
    62  func (gcs *GenerationalNBS) NewGen() chunks.ChunkStoreGarbageCollector {
    63  	return gcs.newGen
    64  }
    65  
    66  func (gcs *GenerationalNBS) OldGen() chunks.ChunkStoreGarbageCollector {
    67  	return gcs.oldGen
    68  }
    69  
    70  // Get the Chunk for the value of the hash in the store. If the hash is absent from the store EmptyChunk is returned.
    71  func (gcs *GenerationalNBS) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, error) {
    72  	c, err := gcs.oldGen.Get(ctx, h)
    73  
    74  	if err != nil {
    75  		return chunks.EmptyChunk, err
    76  	}
    77  
    78  	if c.IsEmpty() {
    79  		c, err = gcs.newGen.Get(ctx, h)
    80  	}
    81  	if err != nil {
    82  		return chunks.EmptyChunk, err
    83  	}
    84  
    85  	if c.IsEmpty() && gcs.ghostGen != nil {
    86  		c, err = gcs.ghostGen.Get(ctx, h)
    87  		if err != nil {
    88  			return chunks.EmptyChunk, err
    89  		}
    90  	}
    91  
    92  	return c, nil
    93  }
    94  
    95  // GetMany gets the Chunks with |hashes| from the store. On return, |foundChunks| will have been fully sent all chunks
    96  // which have been found. Any non-present chunks will silently be ignored.
    97  func (gcs *GenerationalNBS) GetMany(ctx context.Context, hashes hash.HashSet, found func(context.Context, *chunks.Chunk)) error {
    98  	mu := &sync.Mutex{}
    99  	notFound := hashes.Copy()
   100  	err := gcs.oldGen.GetMany(ctx, hashes, func(ctx context.Context, chunk *chunks.Chunk) {
   101  		func() {
   102  			mu.Lock()
   103  			defer mu.Unlock()
   104  			delete(notFound, chunk.Hash())
   105  		}()
   106  
   107  		found(ctx, chunk)
   108  	})
   109  	if err != nil {
   110  		return err
   111  	}
   112  	if len(notFound) == 0 {
   113  		return nil
   114  	}
   115  
   116  	err = gcs.newGen.GetMany(ctx, notFound, func(ctx context.Context, chunk *chunks.Chunk) {
   117  		func() {
   118  			mu.Lock()
   119  			defer mu.Unlock()
   120  			delete(notFound, chunk.Hash())
   121  		}()
   122  
   123  		found(ctx, chunk)
   124  	})
   125  	if err != nil {
   126  		return err
   127  	}
   128  	if len(notFound) == 0 {
   129  		return nil
   130  	}
   131  
   132  	// Last ditch effort to see if the requested objects are commits we've decided to ignore. Note the function spec
   133  	// considers non-present chunks to be silently ignored, so we don't need to return an error here
   134  	if gcs.ghostGen == nil {
   135  		return nil
   136  	}
   137  	return gcs.ghostGen.GetMany(ctx, notFound, found)
   138  }
   139  
   140  func (gcs *GenerationalNBS) GetManyCompressed(ctx context.Context, hashes hash.HashSet, found func(context.Context, CompressedChunk)) error {
   141  	mu := &sync.Mutex{}
   142  	notInOldGen := hashes.Copy()
   143  	err := gcs.oldGen.GetManyCompressed(ctx, hashes, func(ctx context.Context, chunk CompressedChunk) {
   144  		func() {
   145  			mu.Lock()
   146  			defer mu.Unlock()
   147  			delete(notInOldGen, chunk.Hash())
   148  		}()
   149  
   150  		found(ctx, chunk)
   151  	})
   152  
   153  	if err != nil {
   154  		return err
   155  	}
   156  
   157  	if len(notInOldGen) == 0 {
   158  		return nil
   159  	}
   160  
   161  	return gcs.newGen.GetManyCompressed(ctx, notInOldGen, found)
   162  }
   163  
   164  // Has returns true iff the value at the address |h| is contained in the store
   165  func (gcs *GenerationalNBS) Has(ctx context.Context, h hash.Hash) (bool, error) {
   166  	has, err := gcs.oldGen.Has(ctx, h)
   167  	if err != nil || has {
   168  		return has, err
   169  	}
   170  
   171  	has, err = gcs.newGen.Has(ctx, h)
   172  	if err != nil || has {
   173  		return has, err
   174  	}
   175  
   176  	// Possibly a truncated commit.
   177  	if gcs.ghostGen != nil {
   178  		has, err = gcs.ghostGen.Has(ctx, h)
   179  		if err != nil {
   180  			return has, err
   181  		}
   182  	}
   183  	return has, nil
   184  }
   185  
   186  // HasMany returns a new HashSet containing any members of |hashes| that are absent from the store.
   187  func (gcs *GenerationalNBS) HasMany(ctx context.Context, hashes hash.HashSet) (absent hash.HashSet, err error) {
   188  	gcs.newGen.mu.RLock()
   189  	defer gcs.newGen.mu.RUnlock()
   190  	return gcs.hasMany(toHasRecords(hashes))
   191  }
   192  
   193  func (gcs *GenerationalNBS) hasMany(recs []hasRecord) (absent hash.HashSet, err error) {
   194  	absent, err = gcs.newGen.hasMany(recs)
   195  	if err != nil {
   196  		return nil, err
   197  	} else if len(absent) == 0 {
   198  		return absent, nil
   199  	}
   200  
   201  	absent, err = func() (hash.HashSet, error) {
   202  		gcs.oldGen.mu.RLock()
   203  		defer gcs.oldGen.mu.RUnlock()
   204  		return gcs.oldGen.hasMany(recs)
   205  	}()
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  
   210  	if len(absent) == 0 || gcs.ghostGen == nil {
   211  		return absent, nil
   212  	}
   213  
   214  	return gcs.ghostGen.hasMany(absent)
   215  }
   216  
   217  // Put caches c in the ChunkSource. Upon return, c must be visible to
   218  // subsequent Get and Has calls, but must not be persistent until a call
   219  // to Flush(). Put may be called concurrently with other calls to Put(),
   220  // Get(), GetMany(), Has() and HasMany().
   221  func (gcs *GenerationalNBS) Put(ctx context.Context, c chunks.Chunk, getAddrs chunks.GetAddrsCurry) error {
   222  	return gcs.newGen.putChunk(ctx, c, getAddrs, gcs.hasMany)
   223  }
   224  
   225  // Returns the NomsBinFormat with which this ChunkSource is compatible.
   226  func (gcs *GenerationalNBS) Version() string {
   227  	return gcs.newGen.Version()
   228  }
   229  
   230  func (gcs *GenerationalNBS) AccessMode() chunks.ExclusiveAccessMode {
   231  	newGenMode := gcs.newGen.AccessMode()
   232  	oldGenMode := gcs.oldGen.AccessMode()
   233  	if oldGenMode > newGenMode {
   234  		return oldGenMode
   235  	}
   236  	return newGenMode
   237  }
   238  
   239  // Rebase brings this ChunkStore into sync with the persistent storage's
   240  // current root.
   241  func (gcs *GenerationalNBS) Rebase(ctx context.Context) error {
   242  	oErr := gcs.oldGen.Rebase(ctx)
   243  	nErr := gcs.newGen.Rebase(ctx)
   244  
   245  	if oErr != nil {
   246  		return oErr
   247  	}
   248  
   249  	return nErr
   250  }
   251  
   252  // Root returns the root of the database as of the time the ChunkStore
   253  // was opened or the most recent call to Rebase.
   254  func (gcs *GenerationalNBS) Root(ctx context.Context) (hash.Hash, error) {
   255  	return gcs.newGen.Root(ctx)
   256  }
   257  
   258  // Commit atomically attempts to persist all novel Chunks and update the
   259  // persisted root hash from last to current (or keeps it the same).
   260  // If last doesn't match the root in persistent storage, returns false.
   261  func (gcs *GenerationalNBS) Commit(ctx context.Context, current, last hash.Hash) (bool, error) {
   262  	return gcs.newGen.commit(ctx, current, last, gcs.hasMany)
   263  }
   264  
   265  // Stats may return some kind of struct that reports statistics about the
   266  // ChunkStore instance. The type is implementation-dependent, and impls
   267  // may return nil
   268  func (gcs *GenerationalNBS) Stats() interface{} {
   269  	return nil
   270  }
   271  
   272  // StatsSummary may return a string containing summarized statistics for
   273  // this ChunkStore. It must return "Unsupported" if this operation is not
   274  // supported.
   275  func (gcs *GenerationalNBS) StatsSummary() string {
   276  	var sb strings.Builder
   277  	sb.WriteString("New Gen: \n\t")
   278  	sb.WriteString(gcs.newGen.StatsSummary())
   279  	sb.WriteString("\nOld Gen: \n\t")
   280  	sb.WriteString(gcs.oldGen.StatsSummary())
   281  	return sb.String()
   282  }
   283  
   284  // Close tears down any resources in use by the implementation. After
   285  // Close(), the ChunkStore may not be used again. It is NOT SAFE to call
   286  // Close() concurrently with any other ChunkStore method; behavior is
   287  // undefined and probably crashy.
   288  func (gcs *GenerationalNBS) Close() error {
   289  	oErr := gcs.oldGen.Close()
   290  	nErr := gcs.newGen.Close()
   291  
   292  	if oErr != nil {
   293  		return oErr
   294  	}
   295  
   296  	return nErr
   297  }
   298  
   299  func (gcs *GenerationalNBS) copyToOldGen(ctx context.Context, hashes hash.HashSet) error {
   300  	notInOldGen, err := gcs.oldGen.HasMany(ctx, hashes)
   301  
   302  	if err != nil {
   303  		return err
   304  	}
   305  
   306  	var putErr error
   307  	err = gcs.newGen.GetMany(ctx, notInOldGen, func(ctx context.Context, chunk *chunks.Chunk) {
   308  		if putErr == nil {
   309  			putErr = gcs.oldGen.Put(ctx, *chunk, func(c chunks.Chunk) chunks.GetAddrsCb {
   310  				return func(ctx context.Context, addrs hash.HashSet, _ chunks.PendingRefExists) error { return nil }
   311  			})
   312  		}
   313  	})
   314  
   315  	if putErr != nil {
   316  		return putErr
   317  	}
   318  
   319  	return err
   320  }
   321  
   322  type prefixedTableFile struct {
   323  	chunks.TableFile
   324  	prefix string
   325  }
   326  
   327  func (p prefixedTableFile) LocationPrefix() string {
   328  	return p.prefix + "/"
   329  }
   330  
   331  // Sources retrieves the current root hash, a list of all the table files (which may include appendix table files),
   332  // and a second list containing only appendix table files for both the old gen and new gen stores.
   333  func (gcs *GenerationalNBS) Sources(ctx context.Context) (hash.Hash, []chunks.TableFile, []chunks.TableFile, error) {
   334  	root, tFiles, appFiles, err := gcs.newGen.Sources(ctx)
   335  	if err != nil {
   336  		return hash.Hash{}, nil, nil, err
   337  	}
   338  
   339  	_, oldTFiles, oldAppFiles, err := gcs.oldGen.Sources(ctx)
   340  	if err != nil {
   341  		return hash.Hash{}, nil, nil, err
   342  	}
   343  
   344  	prefix := gcs.RelativeOldGenPath()
   345  
   346  	for _, tf := range oldTFiles {
   347  		tFiles = append(tFiles, prefixedTableFile{tf, prefix})
   348  	}
   349  	for _, tf := range oldAppFiles {
   350  		appFiles = append(appFiles, prefixedTableFile{tf, prefix})
   351  	}
   352  
   353  	return root, tFiles, appFiles, nil
   354  }
   355  
   356  // Size  returns the total size, in bytes, of the table files in the new and old gen stores combined
   357  func (gcs *GenerationalNBS) Size(ctx context.Context) (uint64, error) {
   358  	oldSize, err := gcs.oldGen.Size(ctx)
   359  
   360  	if err != nil {
   361  		return 0, err
   362  	}
   363  
   364  	newSize, err := gcs.newGen.Size(ctx)
   365  
   366  	if err != nil {
   367  		return 0, err
   368  	}
   369  
   370  	return oldSize + newSize, nil
   371  }
   372  
   373  // WriteTableFile will read a table file from the provided reader and write it to the new gen TableFileStore
   374  func (gcs *GenerationalNBS) WriteTableFile(ctx context.Context, fileId string, numChunks int, contentHash []byte, getRd func() (io.ReadCloser, uint64, error)) error {
   375  	return gcs.newGen.WriteTableFile(ctx, fileId, numChunks, contentHash, getRd)
   376  }
   377  
   378  // AddTableFilesToManifest adds table files to the manifest of the newgen cs
   379  func (gcs *GenerationalNBS) AddTableFilesToManifest(ctx context.Context, fileIdToNumChunks map[string]int) error {
   380  	return gcs.newGen.AddTableFilesToManifest(ctx, fileIdToNumChunks)
   381  }
   382  
   383  // PruneTableFiles deletes old table files that are no longer referenced in the manifest of the new or old gen chunkstores
   384  func (gcs *GenerationalNBS) PruneTableFiles(ctx context.Context) error {
   385  	err := gcs.oldGen.pruneTableFiles(ctx, gcs.hasMany)
   386  
   387  	if err != nil {
   388  		return err
   389  	}
   390  
   391  	return gcs.newGen.pruneTableFiles(ctx, gcs.hasMany)
   392  }
   393  
   394  // SetRootChunk changes the root chunk hash from the previous value to the new root for the newgen cs
   395  func (gcs *GenerationalNBS) SetRootChunk(ctx context.Context, root, previous hash.Hash) error {
   396  	return gcs.newGen.setRootChunk(ctx, root, previous, gcs.hasMany)
   397  }
   398  
   399  // SupportedOperations returns a description of the support TableFile operations. Some stores only support reading table files, not writing.
   400  func (gcs *GenerationalNBS) SupportedOperations() chunks.TableFileStoreOps {
   401  	return gcs.newGen.SupportedOperations()
   402  }
   403  
   404  func (gcs *GenerationalNBS) GetChunkLocationsWithPaths(hashes hash.HashSet) (map[string]map[hash.Hash]Range, error) {
   405  	res, err := gcs.newGen.GetChunkLocationsWithPaths(hashes)
   406  	if err != nil {
   407  		return nil, err
   408  	}
   409  	if len(hashes) > 0 {
   410  		prefix := gcs.RelativeOldGenPath()
   411  		toadd, err := gcs.oldGen.GetChunkLocationsWithPaths(hashes)
   412  		if err != nil {
   413  			return nil, err
   414  		}
   415  		for k, v := range toadd {
   416  			res[filepath.ToSlash(filepath.Join(prefix, k))] = v
   417  		}
   418  	}
   419  	return res, nil
   420  }
   421  
   422  func (gcs *GenerationalNBS) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash]map[hash.Hash]Range, error) {
   423  	res, err := gcs.newGen.GetChunkLocations(hashes)
   424  	if err != nil {
   425  		return nil, err
   426  	}
   427  	if len(hashes) > 0 {
   428  		toadd, err := gcs.oldGen.GetChunkLocations(hashes)
   429  		if err != nil {
   430  			return nil, err
   431  		}
   432  		for k, v := range toadd {
   433  			res[k] = v
   434  		}
   435  	}
   436  	return res, nil
   437  }
   438  
   439  func (gcs *GenerationalNBS) RelativeOldGenPath() string {
   440  	newgenpath, ngpok := gcs.newGen.Path()
   441  	oldgenpath, ogpok := gcs.oldGen.Path()
   442  	if ngpok && ogpok {
   443  		if p, err := filepath.Rel(newgenpath, oldgenpath); err == nil {
   444  			return p
   445  		}
   446  	}
   447  	return "oldgen"
   448  }
   449  
   450  func (gcs *GenerationalNBS) Path() (string, bool) {
   451  	return gcs.newGen.Path()
   452  }
   453  
   454  func (gcs *GenerationalNBS) UpdateManifest(ctx context.Context, updates map[hash.Hash]uint32) (mi ManifestInfo, err error) {
   455  	return gcs.newGen.UpdateManifest(ctx, updates)
   456  }