github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/manifest.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"crypto/sha512"
    27  	"errors"
    28  	"strconv"
    29  	"sync"
    30  	"time"
    31  
    32  	"github.com/dolthub/dolt/go/store/chunks"
    33  	"github.com/dolthub/dolt/go/store/d"
    34  	"github.com/dolthub/dolt/go/store/hash"
    35  )
    36  
    37  var ErrCorruptManifest = errors.New("corrupt manifest")
    38  var ErrUnsupportedManifestAppendixOption = errors.New("unsupported manifest appendix option")
    39  
    40  type manifest interface {
    41  	// Name returns a stable, unique identifier for the store this manifest describes.
    42  	Name() string
    43  
    44  	// ParseIfExists extracts and returns values from a NomsBlockStore
    45  	// manifest, if one exists. Concrete implementations are responsible for
    46  	// defining how to find and parse the desired manifest, e.g. a
    47  	// particularly-named file in a given directory. Implementations are also
    48  	// responsible for managing whatever concurrency guarantees they require
    49  	// for correctness. If the manifest exists, |exists| is set to true and
    50  	// manifest data is returned, including the version of the Noms data in
    51  	// the store, the root root hash.Hash of the store, and a tableSpec
    52  	// describing every table that comprises the store.
    53  	// If the manifest doesn't exist, |exists| is set to false and the other
    54  	// return values are undefined. The |readHook| parameter allows race
    55  	// condition testing. If it is non-nil, it will be invoked while the
    56  	// implementation is guaranteeing exclusive access to the manifest.
    57  	ParseIfExists(ctx context.Context, stats *Stats, readHook func() error) (exists bool, contents manifestContents, err error)
    58  
    59  	manifestUpdater
    60  }
    61  
    62  type manifestUpdater interface {
    63  	// Update optimistically tries to write a new manifest containing
    64  	// |newContents|. If |lastLock| matches the lock hash in the currently
    65  	// persisted manifest (logically, the lock that would be returned by
    66  	// ParseIfExists), then Update succeeds and subsequent calls to both
    67  	// Update and ParseIfExists will reflect a manifest containing
    68  	// |newContents|. If not, Update fails. Regardless, the returned
    69  	// manifestContents will reflect the current state of the world. Callers
    70  	// should check that the returned root == the proposed root and, if not,
    71  	// merge any desired new table information with the contents of the
    72  	// returned []tableSpec before trying again.
    73  	// Concrete implementations are responsible for ensuring that concurrent
    74  	// Update calls (and ParseIfExists calls) are correct.
    75  	// If writeHook is non-nil, it will be invoked while the implementation is
    76  	// guaranteeing exclusive access to the manifest. This allows for testing
    77  	// of race conditions.
    78  	Update(ctx context.Context, lastLock addr, newContents manifestContents, stats *Stats, writeHook func() error) (manifestContents, error)
    79  }
    80  
    81  type manifestGCGenUpdater interface {
    82  	// UpdateGCGen tries to write a new manifest containing |newContents|.
    83  	// Like Update(), it requires that |lastLock| matches the currently persisted
    84  	// lock hash. However, unlike Update() |newContents.root| must remain the same,
    85  	// while |newContents.gcGen| must be updated to a new value.
    86  	// Concrete implementations are responsible for ensuring that concurrent
    87  	// Update calls (and ParseIfExists calls) are correct.
    88  	// If writeHook is non-nil, it will be invoked while the implementation is
    89  	// guaranteeing exclusive access to the manifest. This allows for testing
    90  	// of race conditions.
    91  	UpdateGCGen(ctx context.Context, lastLock addr, newContents manifestContents, stats *Stats, writeHook func() error) (manifestContents, error)
    92  }
    93  
    94  // manifestVersionGetter is an interface for retrieving the manifest version
    95  type manifestVersionGetter interface {
    96  	// GetManifestVersion returns the version of the manifest
    97  	GetManifestVersion() string
    98  }
    99  
   100  // ManifestInfo is an interface for retrieving data from a manifest outside of this package
   101  type ManifestInfo interface {
   102  	GetVersion() string
   103  	GetLock() string
   104  	GetGCGen() string
   105  	GetRoot() hash.Hash
   106  	NumTableSpecs() int
   107  	NumAppendixSpecs() int
   108  	GetTableSpecInfo(i int) TableSpecInfo
   109  	GetAppendixTableSpecInfo(i int) TableSpecInfo
   110  }
   111  
   112  type ManifestAppendixOption int
   113  
   114  const (
   115  	ManifestAppendixOption_Unspecified ManifestAppendixOption = iota
   116  	ManifestAppendixOption_Set
   117  	ManifestAppendixOption_Append
   118  )
   119  
   120  type manifestContents struct {
   121  	vers  string
   122  	lock  addr
   123  	root  hash.Hash
   124  	gcGen addr
   125  	specs []tableSpec
   126  
   127  	// An appendix is a list of |tableSpecs| that track an auxillary collection of
   128  	// table files used _only_ for query performance optimizations. These appendix |tableSpecs| can be safely
   129  	// managed with nbs.UpdateManifestWithAppendix, however generation and removal of the actual table files
   130  	// the appendix |tableSpecs| reference is done manually. All appendix |tableSpecs| will be prepended to the
   131  	// manifest.specs across manifest updates.
   132  	appendix []tableSpec
   133  }
   134  
   135  func (mc manifestContents) GetVersion() string {
   136  	return mc.vers
   137  }
   138  
   139  func (mc manifestContents) GetLock() string {
   140  	return mc.lock.String()
   141  }
   142  
   143  func (mc manifestContents) GetGCGen() string {
   144  	return mc.gcGen.String()
   145  }
   146  
   147  func (mc manifestContents) GetRoot() hash.Hash {
   148  	return mc.root
   149  }
   150  
   151  func (mc manifestContents) NumTableSpecs() int {
   152  	return len(mc.specs)
   153  }
   154  
   155  func (mc manifestContents) NumAppendixSpecs() int {
   156  	return len(mc.appendix)
   157  }
   158  
   159  func (mc manifestContents) GetTableSpecInfo(i int) TableSpecInfo {
   160  	return mc.specs[i]
   161  }
   162  
   163  func (mc manifestContents) GetAppendixTableSpecInfo(i int) TableSpecInfo {
   164  	return mc.appendix[i]
   165  }
   166  
   167  func (mc manifestContents) getSpec(i int) tableSpec {
   168  	return mc.specs[i]
   169  }
   170  
   171  func (mc manifestContents) getAppendixSpec(i int) tableSpec {
   172  	return mc.appendix[i]
   173  }
   174  
   175  func (mc manifestContents) removeAppendixSpecs() (manifestContents, []tableSpec) {
   176  	if mc.appendix == nil || len(mc.appendix) == 0 {
   177  		return mc, nil
   178  	}
   179  
   180  	appendixSet := mc.getAppendixSet()
   181  	filtered := make([]tableSpec, 0)
   182  	removed := make([]tableSpec, 0)
   183  	for _, s := range mc.specs {
   184  		if _, ok := appendixSet[s.name]; ok {
   185  			removed = append(removed, s)
   186  		} else {
   187  			filtered = append(filtered, s)
   188  		}
   189  	}
   190  
   191  	return manifestContents{
   192  		vers:  mc.vers,
   193  		lock:  mc.lock,
   194  		root:  mc.root,
   195  		gcGen: mc.gcGen,
   196  		specs: filtered,
   197  	}, removed
   198  }
   199  
   200  func (mc manifestContents) getSpecSet() (ss map[addr]struct{}) {
   201  	return toSpecSet(mc.specs)
   202  }
   203  
   204  func (mc manifestContents) getAppendixSet() (ss map[addr]struct{}) {
   205  	return toSpecSet(mc.appendix)
   206  }
   207  
   208  func toSpecSet(specs []tableSpec) (ss map[addr]struct{}) {
   209  	ss = make(map[addr]struct{}, len(specs))
   210  	for _, ts := range specs {
   211  		ss[ts.name] = struct{}{}
   212  	}
   213  	return ss
   214  }
   215  
   216  func (mc manifestContents) size() (size uint64) {
   217  	size += uint64(len(mc.vers)) + addrSize + hash.ByteLen
   218  	for _, sp := range mc.specs {
   219  		size += uint64(len(sp.name)) + uint32Size // for sp.chunkCount
   220  	}
   221  	return
   222  }
   223  
   224  func newManifestLocks() *manifestLocks {
   225  	return &manifestLocks{map[string]struct{}{}, map[string]struct{}{}, sync.NewCond(&sync.Mutex{})}
   226  }
   227  
   228  type manifestLocks struct {
   229  	updating map[string]struct{}
   230  	fetching map[string]struct{}
   231  	cond     *sync.Cond
   232  }
   233  
   234  func (ml *manifestLocks) lockForFetch(db string) {
   235  	lockByName(db, ml.cond, ml.fetching)
   236  }
   237  
   238  func (ml *manifestLocks) unlockForFetch(db string) error {
   239  	return unlockByName(db, ml.cond, ml.fetching)
   240  }
   241  
   242  func (ml *manifestLocks) lockForUpdate(db string) {
   243  	lockByName(db, ml.cond, ml.updating)
   244  }
   245  
   246  func (ml *manifestLocks) unlockForUpdate(db string) error {
   247  	return unlockByName(db, ml.cond, ml.updating)
   248  }
   249  
   250  func lockByName(db string, c *sync.Cond, locks map[string]struct{}) {
   251  	c.L.Lock()
   252  	defer c.L.Unlock()
   253  
   254  	for {
   255  		if _, inProgress := locks[db]; !inProgress {
   256  			locks[db] = struct{}{}
   257  			break
   258  		}
   259  		c.Wait()
   260  	}
   261  }
   262  
   263  func unlockByName(db string, c *sync.Cond, locks map[string]struct{}) error {
   264  	c.L.Lock()
   265  	defer c.L.Unlock()
   266  
   267  	if _, ok := locks[db]; !ok {
   268  		return errors.New("unlock failed")
   269  	}
   270  
   271  	delete(locks, db)
   272  
   273  	c.Broadcast()
   274  
   275  	return nil
   276  }
   277  
   278  type manifestManager struct {
   279  	m     manifest
   280  	cache *manifestCache
   281  	locks *manifestLocks
   282  }
   283  
   284  func (mm manifestManager) lockOutFetch() {
   285  	mm.locks.lockForFetch(mm.Name())
   286  }
   287  
   288  func (mm manifestManager) allowFetch() error {
   289  	return mm.locks.unlockForFetch(mm.Name())
   290  }
   291  
   292  func (mm manifestManager) LockForUpdate() {
   293  	mm.locks.lockForUpdate(mm.Name())
   294  }
   295  
   296  func (mm manifestManager) UnlockForUpdate() error {
   297  	return mm.locks.unlockForUpdate(mm.Name())
   298  }
   299  
   300  func (mm manifestManager) updateWillFail(lastLock addr) (cached manifestContents, doomed bool) {
   301  	if upstream, _, hit := mm.cache.Get(mm.Name()); hit {
   302  		if lastLock != upstream.lock {
   303  			doomed, cached = true, upstream
   304  		}
   305  	}
   306  	return
   307  }
   308  
   309  func (mm manifestManager) Fetch(ctx context.Context, stats *Stats) (exists bool, contents manifestContents, err error) {
   310  	entryTime := time.Now()
   311  
   312  	mm.lockOutFetch()
   313  	defer func() {
   314  		afErr := mm.allowFetch()
   315  
   316  		if err == nil {
   317  			err = afErr
   318  		}
   319  	}()
   320  
   321  	f := func() (bool, manifestContents, error) {
   322  		cached, t, hit := mm.cache.Get(mm.Name())
   323  
   324  		if hit && t.After(entryTime) {
   325  			// Cache contains a manifest which is newer than entry time.
   326  			return true, cached, nil
   327  		}
   328  
   329  		t = time.Now()
   330  
   331  		exists, contents, err := mm.m.ParseIfExists(ctx, stats, nil)
   332  
   333  		if err != nil {
   334  			return false, manifestContents{}, err
   335  		}
   336  
   337  		err = mm.cache.Put(mm.Name(), contents, t)
   338  
   339  		if err != nil {
   340  			return false, manifestContents{}, err
   341  		}
   342  
   343  		return exists, contents, nil
   344  	}
   345  
   346  	exists, contents, err = f()
   347  	return
   348  }
   349  
   350  // Update attempts to write a new manifest.
   351  // Callers MUST protect uses of Update with Lock/UnlockForUpdate.
   352  // Update does not call Lock/UnlockForUpdate() on its own because it is
   353  // intended to be used in a larger critical section along with updateWillFail.
   354  func (mm manifestManager) Update(ctx context.Context, lastLock addr, newContents manifestContents, stats *Stats, writeHook func() error) (contents manifestContents, err error) {
   355  	if upstream, _, hit := mm.cache.Get(mm.Name()); hit {
   356  		if lastLock != upstream.lock {
   357  			return upstream, nil
   358  		}
   359  	}
   360  	t := time.Now()
   361  
   362  	mm.lockOutFetch()
   363  	defer func() {
   364  		afErr := mm.allowFetch()
   365  
   366  		if err == nil {
   367  			err = afErr
   368  		}
   369  	}()
   370  
   371  	f := func() (manifestContents, error) {
   372  		contents, err := mm.m.Update(ctx, lastLock, newContents, stats, writeHook)
   373  
   374  		if err != nil {
   375  			return contents, err
   376  		}
   377  
   378  		err = mm.cache.Put(mm.Name(), contents, t)
   379  
   380  		if err != nil {
   381  			return manifestContents{}, err
   382  		}
   383  
   384  		return contents, nil
   385  	}
   386  
   387  	contents, err = f()
   388  	return
   389  }
   390  
   391  // UpdateGCGen will update the manifest with a new garbage collection generation.
   392  // Callers MUST protect uses of UpdateGCGen with Lock/UnlockForUpdate.
   393  func (mm manifestManager) UpdateGCGen(ctx context.Context, lastLock addr, newContents manifestContents, stats *Stats, writeHook func() error) (contents manifestContents, err error) {
   394  	updater, ok := mm.m.(manifestGCGenUpdater)
   395  	if !ok {
   396  		return manifestContents{}, errors.New("manifest does not support updating gc gen")
   397  	}
   398  
   399  	if upstream, _, hit := mm.cache.Get(mm.Name()); hit {
   400  		if lastLock != upstream.lock {
   401  			return manifestContents{}, errors.New("manifest was modified during garbage collection")
   402  		}
   403  	}
   404  	t := time.Now()
   405  
   406  	mm.lockOutFetch()
   407  	defer func() {
   408  		afErr := mm.allowFetch()
   409  
   410  		if err == nil {
   411  			err = afErr
   412  		}
   413  	}()
   414  
   415  	f := func() (manifestContents, error) {
   416  		contents, err := updater.UpdateGCGen(ctx, lastLock, newContents, stats, writeHook)
   417  
   418  		if err != nil {
   419  			return contents, err
   420  		}
   421  
   422  		err = mm.cache.Put(mm.Name(), contents, t)
   423  
   424  		if err != nil {
   425  			return manifestContents{}, err
   426  		}
   427  
   428  		return contents, nil
   429  	}
   430  
   431  	contents, err = f()
   432  	return
   433  }
   434  
   435  func (mm manifestManager) Name() string {
   436  	return mm.m.Name()
   437  }
   438  
   439  // GetManifestVersion returns the manifest storage version or an error if the operation is not supported
   440  func (mm manifestManager) GetManifestVersion() (string, error) {
   441  	vg, ok := mm.m.(manifestVersionGetter)
   442  	if !ok {
   443  		return "", chunks.ErrUnsupportedOperation
   444  	}
   445  	return vg.GetManifestVersion(), nil
   446  }
   447  
   448  // TableSpecInfo is an interface for retrieving data from a tableSpec outside of this package
   449  type TableSpecInfo interface {
   450  	GetName() string
   451  	GetChunkCount() uint32
   452  }
   453  
   454  type tableSpec struct {
   455  	name       addr
   456  	chunkCount uint32
   457  }
   458  
   459  func (ts tableSpec) GetName() string {
   460  	return ts.name.String()
   461  }
   462  
   463  func (ts tableSpec) GetChunkCount() uint32 {
   464  	return ts.chunkCount
   465  }
   466  
   467  func parseSpecs(tableInfo []string) ([]tableSpec, error) {
   468  	specs := make([]tableSpec, len(tableInfo)/2)
   469  	for i := range specs {
   470  		var err error
   471  		specs[i].name, err = parseAddr(tableInfo[2*i])
   472  
   473  		if err != nil {
   474  			return nil, err
   475  		}
   476  
   477  		c, err := strconv.ParseUint(tableInfo[2*i+1], 10, 32)
   478  
   479  		if err != nil {
   480  			return nil, err
   481  		}
   482  
   483  		specs[i].chunkCount = uint32(c)
   484  	}
   485  
   486  	return specs, nil
   487  }
   488  
   489  func formatSpecs(specs []tableSpec, tableInfo []string) {
   490  	d.Chk.True(len(tableInfo) == 2*len(specs))
   491  	for i, t := range specs {
   492  		tableInfo[2*i] = t.name.String()
   493  		tableInfo[2*i+1] = strconv.FormatUint(uint64(t.chunkCount), 10)
   494  	}
   495  }
   496  
   497  // generateLockHash returns a hash of root and the names of all the tables in
   498  // specs, which should be included in all persisted manifests. When a client
   499  // attempts to update a manifest, it must check the lock hash in the currently
   500  // persisted manifest against the lock hash it saw last time it loaded the
   501  // contents of a manifest. If they do not match, the client must not update
   502  // the persisted manifest.
   503  func generateLockHash(root hash.Hash, specs []tableSpec) (lock addr) {
   504  	blockHash := sha512.New()
   505  	blockHash.Write(root[:])
   506  	for _, spec := range specs {
   507  		blockHash.Write(spec.name[:])
   508  	}
   509  	var h []byte
   510  	h = blockHash.Sum(h) // Appends hash to h
   511  	copy(lock[:], h)
   512  	return
   513  }