github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/store.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"fmt"
    27  	"io"
    28  	"io/ioutil"
    29  	"os"
    30  	"path/filepath"
    31  	"reflect"
    32  	"sort"
    33  	"sync"
    34  	"time"
    35  
    36  	"cloud.google.com/go/storage"
    37  	"github.com/dustin/go-humanize"
    38  	"github.com/pkg/errors"
    39  	"golang.org/x/sync/errgroup"
    40  
    41  	"github.com/dolthub/dolt/go/libraries/utils/tracing"
    42  	"github.com/dolthub/dolt/go/store/blobstore"
    43  	"github.com/dolthub/dolt/go/store/chunks"
    44  	"github.com/dolthub/dolt/go/store/hash"
    45  )
    46  
    47  var ErrFetchFailure = errors.New("fetch failed")
    48  var ErrSpecWithoutChunkSource = errors.New("manifest referenced table file for which there is no chunkSource.")
    49  
    50  // The root of a Noms Chunk Store is stored in a 'manifest', along with the
    51  // names of the tables that hold all the chunks in the store. The number of
    52  // chunks in each table is also stored in the manifest.
    53  
    54  const (
    55  	// StorageVersion is the version of the on-disk Noms Chunks Store data format.
    56  	StorageVersion = "5"
    57  
    58  	defaultMemTableSize uint64 = (1 << 20) * 128 // 128MB
    59  	defaultMaxTables           = 256
    60  
    61  	defaultIndexCacheSize    = (1 << 20) * 64 // 64MB
    62  	defaultManifestCacheSize = 1 << 23        // 8MB
    63  	preflushChunkCount       = 8
    64  
    65  	copyTableFileBufferSize = 128 * 1024 * 1024
    66  )
    67  
    68  var (
    69  	cacheOnce           = sync.Once{}
    70  	globalIndexCache    *indexCache
    71  	makeManifestManager func(manifest) manifestManager
    72  	globalFDCache       *fdCache
    73  )
    74  
    75  func makeGlobalCaches() {
    76  	globalIndexCache = newIndexCache(defaultIndexCacheSize)
    77  	globalFDCache = newFDCache(defaultMaxTables)
    78  
    79  	manifestCache := newManifestCache(defaultManifestCacheSize)
    80  	manifestLocks := newManifestLocks()
    81  	makeManifestManager = func(m manifest) manifestManager { return manifestManager{m, manifestCache, manifestLocks} }
    82  }
    83  
    84  type NomsBlockStore struct {
    85  	mm manifestManager
    86  	p  tablePersister
    87  	c  conjoiner
    88  
    89  	mu       sync.RWMutex // protects the following state
    90  	mt       *memTable
    91  	tables   tableSet
    92  	upstream manifestContents
    93  
    94  	mtSize   uint64
    95  	putCount uint64
    96  
    97  	stats *Stats
    98  }
    99  
   100  var _ TableFileStore = &NomsBlockStore{}
   101  var _ chunks.ChunkStoreGarbageCollector = &NomsBlockStore{}
   102  
   103  type Range struct {
   104  	Offset uint64
   105  	Length uint32
   106  }
   107  
   108  func (nbs *NomsBlockStore) GetChunkLocations(hashes hash.HashSet) (map[hash.Hash]map[hash.Hash]Range, error) {
   109  	gr := toGetRecords(hashes)
   110  
   111  	ranges := make(map[hash.Hash]map[hash.Hash]Range)
   112  	f := func(css chunkSources) error {
   113  		for _, cs := range css {
   114  			switch tr := cs.(type) {
   115  			case *mmapTableReader:
   116  				offsetRecSlice, _ := tr.findOffsets(gr)
   117  				if len(offsetRecSlice) > 0 {
   118  					y, ok := ranges[hash.Hash(tr.h)]
   119  
   120  					if !ok {
   121  						y = make(map[hash.Hash]Range)
   122  					}
   123  
   124  					for _, offsetRec := range offsetRecSlice {
   125  						h := hash.Hash(*offsetRec.a)
   126  						y[h] = Range{Offset: offsetRec.offset, Length: offsetRec.length}
   127  
   128  						delete(hashes, h)
   129  					}
   130  
   131  					if len(offsetRecSlice) > 0 {
   132  						gr = toGetRecords(hashes)
   133  					}
   134  
   135  					ranges[hash.Hash(tr.h)] = y
   136  				}
   137  			case *chunkSourceAdapter:
   138  				y, ok := ranges[hash.Hash(tr.h)]
   139  
   140  				if !ok {
   141  					y = make(map[hash.Hash]Range)
   142  				}
   143  
   144  				tableIndex, err := tr.index()
   145  
   146  				if err != nil {
   147  					return err
   148  				}
   149  
   150  				var foundHashes []hash.Hash
   151  				for h := range hashes {
   152  					a := addr(h)
   153  					e, ok := tableIndex.Lookup(&a)
   154  					if ok {
   155  						foundHashes = append(foundHashes, h)
   156  						y[h] = Range{Offset: e.Offset(), Length: e.Length()}
   157  					}
   158  				}
   159  
   160  				ranges[hash.Hash(tr.h)] = y
   161  
   162  				for _, h := range foundHashes {
   163  					delete(hashes, h)
   164  				}
   165  
   166  			default:
   167  				panic(reflect.TypeOf(cs))
   168  			}
   169  
   170  		}
   171  
   172  		return nil
   173  	}
   174  
   175  	err := f(nbs.tables.upstream)
   176  
   177  	if err != nil {
   178  		return nil, err
   179  	}
   180  
   181  	err = f(nbs.tables.novel)
   182  
   183  	if err != nil {
   184  		return nil, err
   185  	}
   186  
   187  	return ranges, nil
   188  }
   189  
   190  func (nbs *NomsBlockStore) UpdateManifest(ctx context.Context, updates map[hash.Hash]uint32) (mi ManifestInfo, err error) {
   191  	nbs.mm.LockForUpdate()
   192  	defer func() {
   193  		unlockErr := nbs.mm.UnlockForUpdate()
   194  
   195  		if err == nil {
   196  			err = unlockErr
   197  		}
   198  	}()
   199  
   200  	nbs.mu.Lock()
   201  	defer nbs.mu.Unlock()
   202  
   203  	var stats Stats
   204  	var ok bool
   205  	var contents manifestContents
   206  	ok, contents, err = nbs.mm.Fetch(ctx, &stats)
   207  
   208  	if err != nil {
   209  		return manifestContents{}, err
   210  	} else if !ok {
   211  		contents = manifestContents{vers: nbs.upstream.vers}
   212  	}
   213  
   214  	currSpecs := contents.getSpecSet()
   215  
   216  	var addCount int
   217  	for h, count := range updates {
   218  		a := addr(h)
   219  
   220  		if _, ok := currSpecs[a]; !ok {
   221  			addCount++
   222  			contents.specs = append(contents.specs, tableSpec{a, count})
   223  		}
   224  	}
   225  
   226  	if addCount == 0 {
   227  		return contents, nil
   228  	}
   229  
   230  	// ensure we dont drop existing appendices
   231  	if contents.appendix != nil && len(contents.appendix) > 0 {
   232  		contents, err = fromManifestAppendixOptionNewContents(contents, contents.appendix, ManifestAppendixOption_Set)
   233  		if err != nil {
   234  			return manifestContents{}, err
   235  		}
   236  	}
   237  
   238  	var updatedContents manifestContents
   239  	updatedContents, err = nbs.mm.Update(ctx, contents.lock, contents, &stats, nil)
   240  
   241  	if err != nil {
   242  		return manifestContents{}, err
   243  	}
   244  
   245  	newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats)
   246  
   247  	if err != nil {
   248  		return manifestContents{}, err
   249  	}
   250  
   251  	nbs.upstream = updatedContents
   252  	oldTables := nbs.tables
   253  	nbs.tables = newTables
   254  	err = oldTables.Close()
   255  	if err != nil {
   256  		return manifestContents{}, err
   257  	}
   258  
   259  	return updatedContents, nil
   260  }
   261  
   262  func (nbs *NomsBlockStore) UpdateManifestWithAppendix(ctx context.Context, updates map[hash.Hash]uint32, option ManifestAppendixOption) (mi ManifestInfo, err error) {
   263  	nbs.mm.LockForUpdate()
   264  	defer func() {
   265  		unlockErr := nbs.mm.UnlockForUpdate()
   266  
   267  		if err == nil {
   268  			err = unlockErr
   269  		}
   270  	}()
   271  
   272  	nbs.mu.Lock()
   273  	defer nbs.mu.Unlock()
   274  
   275  	var stats Stats
   276  	var ok bool
   277  	var contents manifestContents
   278  	ok, contents, err = nbs.mm.Fetch(ctx, &stats)
   279  
   280  	if err != nil {
   281  		return manifestContents{}, err
   282  	} else if !ok {
   283  		contents = manifestContents{vers: nbs.upstream.vers}
   284  	}
   285  
   286  	currAppendixSpecs := contents.getAppendixSet()
   287  
   288  	appendixSpecs := make([]tableSpec, 0)
   289  	var addCount int
   290  	for h, count := range updates {
   291  		a := addr(h)
   292  
   293  		if option == ManifestAppendixOption_Set {
   294  			appendixSpecs = append(appendixSpecs, tableSpec{a, count})
   295  		} else {
   296  			if _, ok := currAppendixSpecs[a]; !ok {
   297  				addCount++
   298  				appendixSpecs = append(appendixSpecs, tableSpec{a, count})
   299  			}
   300  		}
   301  	}
   302  
   303  	if addCount == 0 && option != ManifestAppendixOption_Set {
   304  		return contents, nil
   305  	}
   306  
   307  	contents, err = fromManifestAppendixOptionNewContents(contents, appendixSpecs, option)
   308  	if err != nil {
   309  		return manifestContents{}, err
   310  	}
   311  
   312  	var updatedContents manifestContents
   313  	updatedContents, err = nbs.mm.Update(ctx, contents.lock, contents, &stats, nil)
   314  	if err != nil {
   315  		return manifestContents{}, err
   316  	}
   317  
   318  	newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats)
   319  	if err != nil {
   320  		return manifestContents{}, err
   321  	}
   322  
   323  	nbs.upstream = updatedContents
   324  	oldTables := nbs.tables
   325  	nbs.tables = newTables
   326  	err = oldTables.Close()
   327  	if err != nil {
   328  		return manifestContents{}, err
   329  	}
   330  	return updatedContents, nil
   331  }
   332  
   333  func fromManifestAppendixOptionNewContents(upstream manifestContents, appendixSpecs []tableSpec, option ManifestAppendixOption) (manifestContents, error) {
   334  	contents, upstreamAppendixSpecs := upstream.removeAppendixSpecs()
   335  	switch option {
   336  	case ManifestAppendixOption_Append:
   337  		// prepend all appendix specs to contents.specs
   338  		specs := append([]tableSpec{}, appendixSpecs...)
   339  		specs = append(specs, upstreamAppendixSpecs...)
   340  		contents.specs = append(specs, contents.specs...)
   341  
   342  		// append all appendix specs to contents.appendix
   343  		newAppendixSpecs := append([]tableSpec{}, upstreamAppendixSpecs...)
   344  		contents.appendix = append(newAppendixSpecs, appendixSpecs...)
   345  
   346  		return contents, nil
   347  	case ManifestAppendixOption_Set:
   348  		if len(appendixSpecs) < 1 {
   349  			return contents, nil
   350  		}
   351  
   352  		// prepend new appendix specs to contents.specs
   353  		// dropping all upstream appendix specs
   354  		specs := append([]tableSpec{}, appendixSpecs...)
   355  		contents.specs = append(specs, contents.specs...)
   356  
   357  		// append new appendix specs to contents.appendix
   358  		contents.appendix = append([]tableSpec{}, appendixSpecs...)
   359  		return contents, nil
   360  	default:
   361  		return manifestContents{}, ErrUnsupportedManifestAppendixOption
   362  	}
   363  }
   364  
   365  // GetManifestStorageVersion returns the manifest storage version or an error if the operation is not supported
   366  func (nbs *NomsBlockStore) GetManifestStorageVersion(ctx context.Context) (version string, err error) {
   367  	// possibly unnecessary
   368  	nbs.mm.LockForUpdate()
   369  	defer func() {
   370  		err = nbs.mm.UnlockForUpdate()
   371  	}()
   372  	nbs.mu.Lock()
   373  	defer nbs.mu.Unlock()
   374  
   375  	return nbs.mm.GetManifestVersion()
   376  }
   377  
   378  func NewAWSStoreWithMMapIndex(ctx context.Context, nbfVerStr string, table, ns, bucket string, s3 s3svc, ddb ddbsvc, memTableSize uint64) (*NomsBlockStore, error) {
   379  	cacheOnce.Do(makeGlobalCaches)
   380  	readRateLimiter := make(chan struct{}, 32)
   381  	p := &awsTablePersister{
   382  		s3,
   383  		bucket,
   384  		readRateLimiter,
   385  		nil,
   386  		&ddbTableStore{ddb, table, readRateLimiter, nil},
   387  		awsLimits{defaultS3PartSize, minS3PartSize, maxS3PartSize, maxDynamoItemSize, maxDynamoChunks},
   388  		globalIndexCache,
   389  		ns,
   390  		func(bs []byte) (tableIndex, error) {
   391  			ohi, err := parseTableIndex(bs)
   392  			if err != nil {
   393  				return nil, err
   394  			}
   395  			return newMmapTableIndex(ohi, nil)
   396  		},
   397  	}
   398  	mm := makeManifestManager(newDynamoManifest(table, ns, ddb))
   399  	return newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{defaultMaxTables}, memTableSize)
   400  }
   401  
   402  func NewAWSStore(ctx context.Context, nbfVerStr string, table, ns, bucket string, s3 s3svc, ddb ddbsvc, memTableSize uint64) (*NomsBlockStore, error) {
   403  	cacheOnce.Do(makeGlobalCaches)
   404  	readRateLimiter := make(chan struct{}, 32)
   405  	p := &awsTablePersister{
   406  		s3,
   407  		bucket,
   408  		readRateLimiter,
   409  		nil,
   410  		&ddbTableStore{ddb, table, readRateLimiter, nil},
   411  		awsLimits{defaultS3PartSize, minS3PartSize, maxS3PartSize, maxDynamoItemSize, maxDynamoChunks},
   412  		globalIndexCache,
   413  		ns,
   414  		func(bs []byte) (tableIndex, error) {
   415  			return parseTableIndex(bs)
   416  		},
   417  	}
   418  	mm := makeManifestManager(newDynamoManifest(table, ns, ddb))
   419  	return newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{defaultMaxTables}, memTableSize)
   420  }
   421  
   422  // NewGCSStore returns an nbs implementation backed by a GCSBlobstore
   423  func NewGCSStore(ctx context.Context, nbfVerStr string, bucketName, path string, gcs *storage.Client, memTableSize uint64) (*NomsBlockStore, error) {
   424  	cacheOnce.Do(makeGlobalCaches)
   425  
   426  	bs := blobstore.NewGCSBlobstore(gcs, bucketName, path)
   427  	return NewBSStore(ctx, nbfVerStr, bs, memTableSize)
   428  }
   429  
   430  // NewBSStore returns an nbs implementation backed by a Blobstore
   431  func NewBSStore(ctx context.Context, nbfVerStr string, bs blobstore.Blobstore, memTableSize uint64) (*NomsBlockStore, error) {
   432  	cacheOnce.Do(makeGlobalCaches)
   433  
   434  	mm := makeManifestManager(blobstoreManifest{"manifest", bs})
   435  
   436  	p := &blobstorePersister{bs, s3BlockSize, globalIndexCache}
   437  	return newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{defaultMaxTables}, memTableSize)
   438  }
   439  
   440  func NewLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSize uint64) (*NomsBlockStore, error) {
   441  	return newLocalStore(ctx, nbfVerStr, dir, memTableSize, defaultMaxTables)
   442  }
   443  
   444  func newLocalStore(ctx context.Context, nbfVerStr string, dir string, memTableSize uint64, maxTables int) (*NomsBlockStore, error) {
   445  	cacheOnce.Do(makeGlobalCaches)
   446  	err := checkDir(dir)
   447  
   448  	if err != nil {
   449  		return nil, err
   450  	}
   451  
   452  	m, err := getFileManifest(ctx, dir)
   453  
   454  	if err != nil {
   455  		return nil, err
   456  	}
   457  
   458  	mm := makeManifestManager(m)
   459  	p := newFSTablePersister(dir, globalFDCache, globalIndexCache)
   460  	nbs, err := newNomsBlockStore(ctx, nbfVerStr, mm, p, inlineConjoiner{maxTables}, memTableSize)
   461  
   462  	if err != nil {
   463  		return nil, err
   464  	}
   465  
   466  	return nbs, nil
   467  }
   468  
   469  func checkDir(dir string) error {
   470  	stat, err := os.Stat(dir)
   471  	if err != nil {
   472  		return err
   473  	}
   474  	if !stat.IsDir() {
   475  		return fmt.Errorf("path is not a directory: %s", dir)
   476  	}
   477  	return nil
   478  }
   479  
   480  func newNomsBlockStore(ctx context.Context, nbfVerStr string, mm manifestManager, p tablePersister, c conjoiner, memTableSize uint64) (*NomsBlockStore, error) {
   481  	if memTableSize == 0 {
   482  		memTableSize = defaultMemTableSize
   483  	}
   484  
   485  	nbs := &NomsBlockStore{
   486  		mm:       mm,
   487  		p:        p,
   488  		c:        c,
   489  		tables:   newTableSet(p),
   490  		upstream: manifestContents{vers: nbfVerStr},
   491  		mtSize:   memTableSize,
   492  		stats:    NewStats(),
   493  	}
   494  
   495  	t1 := time.Now()
   496  	defer nbs.stats.OpenLatency.SampleTimeSince(t1)
   497  
   498  	exists, contents, err := nbs.mm.Fetch(ctx, nbs.stats)
   499  
   500  	if err != nil {
   501  		return nil, err
   502  	}
   503  
   504  	if exists {
   505  		newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats)
   506  
   507  		if err != nil {
   508  			return nil, err
   509  		}
   510  
   511  		nbs.upstream = contents
   512  		oldTables := nbs.tables
   513  		nbs.tables = newTables
   514  		err = oldTables.Close()
   515  		if err != nil {
   516  			return nil, err
   517  		}
   518  	}
   519  
   520  	return nbs, nil
   521  }
   522  
   523  // WithoutConjoiner returns a new *NomsBlockStore instance that will not
   524  // conjoin table files during manifest updates. Used in some server-side
   525  // contexts when things like table file maintenance is done out-of-process. Not
   526  // safe for use outside of NomsBlockStore construction.
   527  func (nbs *NomsBlockStore) WithoutConjoiner() *NomsBlockStore {
   528  	return &NomsBlockStore{
   529  		mm:       nbs.mm,
   530  		p:        nbs.p,
   531  		c:        noopConjoiner{},
   532  		mu:       sync.RWMutex{},
   533  		mt:       nbs.mt,
   534  		tables:   nbs.tables,
   535  		upstream: nbs.upstream,
   536  		mtSize:   nbs.mtSize,
   537  		putCount: nbs.putCount,
   538  		stats:    nbs.stats,
   539  	}
   540  }
   541  
   542  func (nbs *NomsBlockStore) Put(ctx context.Context, c chunks.Chunk) error {
   543  	t1 := time.Now()
   544  	a := addr(c.Hash())
   545  	success := nbs.addChunk(ctx, a, c.Data())
   546  
   547  	if !success {
   548  		return errors.New("failed to add chunk")
   549  	}
   550  
   551  	nbs.putCount++
   552  
   553  	nbs.stats.PutLatency.SampleTimeSince(t1)
   554  
   555  	return nil
   556  }
   557  
   558  func (nbs *NomsBlockStore) addChunk(ctx context.Context, h addr, data []byte) bool {
   559  	nbs.mu.Lock()
   560  	defer nbs.mu.Unlock()
   561  	if nbs.mt == nil {
   562  		nbs.mt = newMemTable(nbs.mtSize)
   563  	}
   564  	if !nbs.mt.addChunk(h, data) {
   565  		nbs.tables = nbs.tables.Prepend(ctx, nbs.mt, nbs.stats)
   566  		nbs.mt = newMemTable(nbs.mtSize)
   567  		return nbs.mt.addChunk(h, data)
   568  	}
   569  	return true
   570  }
   571  
   572  func (nbs *NomsBlockStore) Get(ctx context.Context, h hash.Hash) (chunks.Chunk, error) {
   573  	span, ctx := tracing.StartSpan(ctx, "nbs.Get")
   574  	defer func() {
   575  		span.Finish()
   576  	}()
   577  
   578  	t1 := time.Now()
   579  	defer func() {
   580  		nbs.stats.GetLatency.SampleTimeSince(t1)
   581  		nbs.stats.ChunksPerGet.Sample(1)
   582  	}()
   583  
   584  	a := addr(h)
   585  	data, tables, err := func() ([]byte, chunkReader, error) {
   586  		var data []byte
   587  		nbs.mu.RLock()
   588  		defer nbs.mu.RUnlock()
   589  		if nbs.mt != nil {
   590  			var err error
   591  			data, err = nbs.mt.get(ctx, a, nbs.stats)
   592  
   593  			if err != nil {
   594  				return nil, nil, err
   595  			}
   596  		}
   597  		return data, nbs.tables, nil
   598  	}()
   599  
   600  	if err != nil {
   601  		return chunks.EmptyChunk, err
   602  	}
   603  
   604  	if data != nil {
   605  		return chunks.NewChunkWithHash(h, data), nil
   606  	}
   607  
   608  	data, err = tables.get(ctx, a, nbs.stats)
   609  
   610  	if err != nil {
   611  		return chunks.EmptyChunk, err
   612  	}
   613  
   614  	if data != nil {
   615  		return chunks.NewChunkWithHash(h, data), nil
   616  	}
   617  
   618  	return chunks.EmptyChunk, nil
   619  }
   620  
   621  func (nbs *NomsBlockStore) GetMany(ctx context.Context, hashes hash.HashSet, found func(*chunks.Chunk)) error {
   622  	span, ctx := tracing.StartSpan(ctx, "nbs.GetMany")
   623  	span.LogKV("num_hashes", len(hashes))
   624  	defer func() {
   625  		span.Finish()
   626  	}()
   627  	return nbs.getManyWithFunc(ctx, hashes, func(ctx context.Context, cr chunkReader, eg *errgroup.Group, reqs []getRecord, stats *Stats) (bool, error) {
   628  		return cr.getMany(ctx, eg, reqs, found, nbs.stats)
   629  	})
   630  }
   631  
   632  func (nbs *NomsBlockStore) GetManyCompressed(ctx context.Context, hashes hash.HashSet, found func(CompressedChunk)) error {
   633  	span, ctx := tracing.StartSpan(ctx, "nbs.GetManyCompressed")
   634  	span.LogKV("num_hashes", len(hashes))
   635  	defer func() {
   636  		span.Finish()
   637  	}()
   638  	return nbs.getManyWithFunc(ctx, hashes, func(ctx context.Context, cr chunkReader, eg *errgroup.Group, reqs []getRecord, stats *Stats) (bool, error) {
   639  		return cr.getManyCompressed(ctx, eg, reqs, found, nbs.stats)
   640  	})
   641  }
   642  
   643  func (nbs *NomsBlockStore) getManyWithFunc(
   644  	ctx context.Context,
   645  	hashes hash.HashSet,
   646  	getManyFunc func(ctx context.Context, cr chunkReader, eg *errgroup.Group, reqs []getRecord, stats *Stats) (bool, error),
   647  ) error {
   648  	t1 := time.Now()
   649  	reqs := toGetRecords(hashes)
   650  
   651  	defer func() {
   652  		if len(hashes) > 0 {
   653  			nbs.stats.GetLatency.SampleTimeSince(t1)
   654  			nbs.stats.ChunksPerGet.Sample(uint64(len(reqs)))
   655  		}
   656  	}()
   657  
   658  	eg, ctx := errgroup.WithContext(ctx)
   659  
   660  	tables, remaining, err := func() (tables chunkReader, remaining bool, err error) {
   661  		nbs.mu.RLock()
   662  		defer nbs.mu.RUnlock()
   663  		tables = nbs.tables
   664  		remaining = true
   665  		if nbs.mt != nil {
   666  			remaining, err = getManyFunc(ctx, nbs.mt, eg, reqs, nbs.stats)
   667  		}
   668  		return
   669  	}()
   670  	if err != nil {
   671  		return err
   672  	}
   673  
   674  	if remaining {
   675  		_, err = getManyFunc(ctx, tables, eg, reqs, nbs.stats)
   676  	}
   677  
   678  	if err != nil {
   679  		eg.Wait()
   680  		return err
   681  	}
   682  	return eg.Wait()
   683  }
   684  
   685  func toGetRecords(hashes hash.HashSet) []getRecord {
   686  	reqs := make([]getRecord, len(hashes))
   687  	idx := 0
   688  	for h := range hashes {
   689  		a := addr(h)
   690  		reqs[idx] = getRecord{
   691  			a:      &a,
   692  			prefix: a.Prefix(),
   693  		}
   694  		idx++
   695  	}
   696  
   697  	sort.Sort(getRecordByPrefix(reqs))
   698  	return reqs
   699  }
   700  
   701  func (nbs *NomsBlockStore) CalcReads(hashes hash.HashSet, blockSize uint64) (reads int, split bool, err error) {
   702  	reqs := toGetRecords(hashes)
   703  	tables := func() (tables tableSet) {
   704  		nbs.mu.RLock()
   705  		defer nbs.mu.RUnlock()
   706  		tables = nbs.tables
   707  
   708  		return
   709  	}()
   710  
   711  	reads, split, remaining, err := tables.calcReads(reqs, blockSize)
   712  
   713  	if err != nil {
   714  		return 0, false, err
   715  	}
   716  
   717  	if remaining {
   718  		return 0, false, errors.New("failed to find all chunks")
   719  	}
   720  
   721  	return
   722  }
   723  
   724  func (nbs *NomsBlockStore) Count() (uint32, error) {
   725  	count, tables, err := func() (count uint32, tables chunkReader, err error) {
   726  		nbs.mu.RLock()
   727  		defer nbs.mu.RUnlock()
   728  		if nbs.mt != nil {
   729  			count, err = nbs.mt.count()
   730  		}
   731  
   732  		if err != nil {
   733  			return 0, nil, err
   734  		}
   735  
   736  		return count, nbs.tables, nil
   737  	}()
   738  
   739  	if err != nil {
   740  		return 0, err
   741  	}
   742  
   743  	tablesCount, err := tables.count()
   744  
   745  	if err != nil {
   746  		return 0, err
   747  	}
   748  
   749  	return count + tablesCount, nil
   750  }
   751  
   752  func (nbs *NomsBlockStore) Has(ctx context.Context, h hash.Hash) (bool, error) {
   753  	t1 := time.Now()
   754  	defer func() {
   755  		nbs.stats.HasLatency.SampleTimeSince(t1)
   756  		nbs.stats.AddressesPerHas.Sample(1)
   757  	}()
   758  
   759  	a := addr(h)
   760  	has, tables, err := func() (bool, chunkReader, error) {
   761  		nbs.mu.RLock()
   762  		defer nbs.mu.RUnlock()
   763  
   764  		if nbs.mt != nil {
   765  			has, err := nbs.mt.has(a)
   766  
   767  			if err != nil {
   768  				return false, nil, err
   769  			}
   770  
   771  			return has, nbs.tables, nil
   772  		}
   773  
   774  		return false, nbs.tables, nil
   775  	}()
   776  
   777  	if err != nil {
   778  		return false, err
   779  	}
   780  
   781  	if !has {
   782  		has, err = tables.has(a)
   783  
   784  		if err != nil {
   785  			return false, err
   786  		}
   787  	}
   788  
   789  	return has, nil
   790  }
   791  
   792  func (nbs *NomsBlockStore) HasMany(ctx context.Context, hashes hash.HashSet) (hash.HashSet, error) {
   793  	t1 := time.Now()
   794  
   795  	reqs := toHasRecords(hashes)
   796  
   797  	tables, remaining, err := func() (tables chunkReader, remaining bool, err error) {
   798  		nbs.mu.RLock()
   799  		defer nbs.mu.RUnlock()
   800  		tables = nbs.tables
   801  
   802  		remaining = true
   803  		if nbs.mt != nil {
   804  			remaining, err = nbs.mt.hasMany(reqs)
   805  
   806  			if err != nil {
   807  				return nil, false, err
   808  			}
   809  		}
   810  
   811  		return tables, remaining, nil
   812  	}()
   813  
   814  	if err != nil {
   815  		return nil, err
   816  	}
   817  
   818  	if remaining {
   819  		_, err := tables.hasMany(reqs)
   820  
   821  		if err != nil {
   822  			return nil, err
   823  		}
   824  	}
   825  
   826  	if len(hashes) > 0 {
   827  		nbs.stats.HasLatency.SampleTimeSince(t1)
   828  		nbs.stats.AddressesPerHas.SampleLen(len(reqs))
   829  	}
   830  
   831  	absent := hash.HashSet{}
   832  	for _, r := range reqs {
   833  		if !r.has {
   834  			absent.Insert(hash.New(r.a[:]))
   835  		}
   836  	}
   837  	return absent, nil
   838  }
   839  
   840  func toHasRecords(hashes hash.HashSet) []hasRecord {
   841  	reqs := make([]hasRecord, len(hashes))
   842  	idx := 0
   843  	for h := range hashes {
   844  		a := addr(h)
   845  		reqs[idx] = hasRecord{
   846  			a:      &a,
   847  			prefix: a.Prefix(),
   848  			order:  idx,
   849  		}
   850  		idx++
   851  	}
   852  
   853  	sort.Sort(hasRecordByPrefix(reqs))
   854  	return reqs
   855  }
   856  
   857  func (nbs *NomsBlockStore) Rebase(ctx context.Context) error {
   858  	nbs.mu.Lock()
   859  	defer nbs.mu.Unlock()
   860  	exists, contents, err := nbs.mm.Fetch(ctx, nbs.stats)
   861  
   862  	if err != nil {
   863  		return err
   864  	}
   865  
   866  	if exists {
   867  		newTables, err := nbs.tables.Rebase(ctx, contents.specs, nbs.stats)
   868  
   869  		if err != nil {
   870  			return err
   871  		}
   872  
   873  		nbs.upstream = contents
   874  		oldTables := nbs.tables
   875  		nbs.tables = newTables
   876  		err = oldTables.Close()
   877  		if err != nil {
   878  			return err
   879  		}
   880  	}
   881  
   882  	return nil
   883  }
   884  
   885  func (nbs *NomsBlockStore) Root(ctx context.Context) (hash.Hash, error) {
   886  	nbs.mu.RLock()
   887  	defer nbs.mu.RUnlock()
   888  	return nbs.upstream.root, nil
   889  }
   890  
   891  func (nbs *NomsBlockStore) Commit(ctx context.Context, current, last hash.Hash) (success bool, err error) {
   892  	t1 := time.Now()
   893  	defer nbs.stats.CommitLatency.SampleTimeSince(t1)
   894  
   895  	anyPossiblyNovelChunks := func() bool {
   896  		nbs.mu.Lock()
   897  		defer nbs.mu.Unlock()
   898  		return nbs.mt != nil || nbs.tables.Novel() > 0
   899  	}
   900  
   901  	if !anyPossiblyNovelChunks() && current == last {
   902  		err := nbs.Rebase(ctx)
   903  
   904  		if err != nil {
   905  			return false, err
   906  		}
   907  
   908  		return true, nil
   909  	}
   910  
   911  	err = func() error {
   912  		// This is unfortunate. We want to serialize commits to the same store
   913  		// so that we avoid writing a bunch of unreachable small tables which result
   914  		// from optimistic lock failures. However, this means that the time to
   915  		// write tables is included in "commit" time and if all commits are
   916  		// serialized, it means a lot more waiting.
   917  		// "non-trivial" tables are persisted here, outside of the commit-lock.
   918  		// all other tables are persisted in updateManifest()
   919  		nbs.mu.Lock()
   920  		defer nbs.mu.Unlock()
   921  
   922  		if nbs.mt != nil {
   923  			cnt, err := nbs.mt.count()
   924  
   925  			if err != nil {
   926  				return err
   927  			}
   928  
   929  			if cnt > preflushChunkCount {
   930  				nbs.tables = nbs.tables.Prepend(ctx, nbs.mt, nbs.stats)
   931  				nbs.mt = nil
   932  			}
   933  		}
   934  
   935  		return nil
   936  	}()
   937  
   938  	if err != nil {
   939  		return false, err
   940  	}
   941  
   942  	nbs.mm.LockForUpdate()
   943  	defer func() {
   944  		unlockErr := nbs.mm.UnlockForUpdate()
   945  
   946  		if err == nil {
   947  			err = unlockErr
   948  		}
   949  	}()
   950  
   951  	nbs.mu.Lock()
   952  	defer nbs.mu.Unlock()
   953  	for {
   954  		if err := nbs.updateManifest(ctx, current, last); err == nil {
   955  			return true, nil
   956  		} else if err == errOptimisticLockFailedRoot || err == errLastRootMismatch {
   957  			return false, nil
   958  		} else if err != errOptimisticLockFailedTables {
   959  			return false, err
   960  		}
   961  
   962  		// I guess this thing infinitely retries without backoff in the case off errOptimisticLockFailedTables
   963  	}
   964  }
   965  
   966  var (
   967  	errLastRootMismatch           = fmt.Errorf("last does not match nbs.Root()")
   968  	errOptimisticLockFailedRoot   = fmt.Errorf("root moved")
   969  	errOptimisticLockFailedTables = fmt.Errorf("tables changed")
   970  )
   971  
   972  // callers must acquire lock |nbs.mu|
   973  func (nbs *NomsBlockStore) updateManifest(ctx context.Context, current, last hash.Hash) error {
   974  	if nbs.upstream.root != last {
   975  		return errLastRootMismatch
   976  	}
   977  
   978  	handleOptimisticLockFailure := func(upstream manifestContents) error {
   979  		newTables, err := nbs.tables.Rebase(ctx, upstream.specs, nbs.stats)
   980  		if err != nil {
   981  			return err
   982  		}
   983  
   984  		nbs.upstream = upstream
   985  		oldTables := nbs.tables
   986  		nbs.tables = newTables
   987  		err = oldTables.Close()
   988  
   989  		if last != upstream.root {
   990  			return errOptimisticLockFailedRoot
   991  		}
   992  
   993  		if err != nil {
   994  			return err
   995  		}
   996  
   997  		return errOptimisticLockFailedTables
   998  	}
   999  
  1000  	if cached, doomed := nbs.mm.updateWillFail(nbs.upstream.lock); doomed {
  1001  		// Pre-emptive optimistic lock failure. Someone else in-process moved to the root, the set of tables, or both out from under us.
  1002  		return handleOptimisticLockFailure(cached)
  1003  	}
  1004  
  1005  	if nbs.mt != nil {
  1006  		cnt, err := nbs.mt.count()
  1007  
  1008  		if err != nil {
  1009  			return err
  1010  		}
  1011  
  1012  		if cnt > 0 {
  1013  			nbs.tables = nbs.tables.Prepend(ctx, nbs.mt, nbs.stats)
  1014  			nbs.mt = nil
  1015  		}
  1016  	}
  1017  
  1018  	if nbs.c.ConjoinRequired(nbs.tables) {
  1019  		var err error
  1020  
  1021  		newUpstream, err := nbs.c.Conjoin(ctx, nbs.upstream, nbs.mm, nbs.p, nbs.stats)
  1022  
  1023  		if err != nil {
  1024  			return err
  1025  		}
  1026  
  1027  		newTables, err := nbs.tables.Rebase(ctx, newUpstream.specs, nbs.stats)
  1028  
  1029  		if err != nil {
  1030  			return err
  1031  		}
  1032  
  1033  		nbs.upstream = newUpstream
  1034  		oldTables := nbs.tables
  1035  		nbs.tables = newTables
  1036  		err = oldTables.Close()
  1037  		if err != nil {
  1038  			return err
  1039  		}
  1040  
  1041  		return errOptimisticLockFailedTables
  1042  	}
  1043  
  1044  	specs, err := nbs.tables.ToSpecs()
  1045  	if err != nil {
  1046  		return err
  1047  	}
  1048  
  1049  	// ensure we dont drop appendices on commit
  1050  	var appendixSpecs []tableSpec
  1051  	if nbs.upstream.appendix != nil && len(nbs.upstream.appendix) > 0 {
  1052  		appendixSet := nbs.upstream.getAppendixSet()
  1053  
  1054  		filtered := make([]tableSpec, 0, len(specs))
  1055  		for _, s := range specs {
  1056  			if _, present := appendixSet[s.name]; !present {
  1057  				filtered = append(filtered, s)
  1058  			}
  1059  		}
  1060  
  1061  		_, appendixSpecs = nbs.upstream.removeAppendixSpecs()
  1062  		prepended := append([]tableSpec{}, appendixSpecs...)
  1063  		specs = append(prepended, filtered...)
  1064  	}
  1065  
  1066  	newContents := manifestContents{
  1067  		vers:     nbs.upstream.vers,
  1068  		root:     current,
  1069  		lock:     generateLockHash(current, specs),
  1070  		gcGen:    nbs.upstream.gcGen,
  1071  		specs:    specs,
  1072  		appendix: appendixSpecs,
  1073  	}
  1074  
  1075  	upstream, err := nbs.mm.Update(ctx, nbs.upstream.lock, newContents, nbs.stats, nil)
  1076  	if err != nil {
  1077  		return err
  1078  	}
  1079  
  1080  	if newContents.lock != upstream.lock {
  1081  		// Optimistic lock failure. Someone else moved to the root, the set of tables, or both out from under us.
  1082  		return handleOptimisticLockFailure(upstream)
  1083  	}
  1084  
  1085  	newTables, err := nbs.tables.Flatten()
  1086  
  1087  	if err != nil {
  1088  		return nil
  1089  	}
  1090  
  1091  	nbs.upstream = newContents
  1092  	nbs.tables = newTables
  1093  
  1094  	return nil
  1095  }
  1096  
  1097  func (nbs *NomsBlockStore) Version() string {
  1098  	return nbs.upstream.vers
  1099  }
  1100  
  1101  func (nbs *NomsBlockStore) Close() error {
  1102  	return nbs.tables.Close()
  1103  }
  1104  
  1105  func (nbs *NomsBlockStore) Stats() interface{} {
  1106  	return nbs.stats.Clone()
  1107  }
  1108  
  1109  func (nbs *NomsBlockStore) StatsSummary() string {
  1110  	nbs.mu.Lock()
  1111  	defer nbs.mu.Unlock()
  1112  	cnt, _ := nbs.tables.count()
  1113  	physLen, _ := nbs.tables.physicalLen()
  1114  	return fmt.Sprintf("Root: %s; Chunk Count %d; Physical Bytes %s", nbs.upstream.root, cnt, humanize.Bytes(physLen))
  1115  }
  1116  
  1117  // tableFile is our implementation of TableFile.
  1118  type tableFile struct {
  1119  	info TableSpecInfo
  1120  	open func(ctx context.Context) (io.ReadCloser, error)
  1121  }
  1122  
  1123  // FileID gets the id of the file
  1124  func (tf tableFile) FileID() string {
  1125  	return tf.info.GetName()
  1126  }
  1127  
  1128  // NumChunks returns the number of chunks in a table file
  1129  func (tf tableFile) NumChunks() int {
  1130  	return int(tf.info.GetChunkCount())
  1131  }
  1132  
  1133  // Open returns an io.ReadCloser which can be used to read the bytes of a table file.
  1134  func (tf tableFile) Open(ctx context.Context) (io.ReadCloser, error) {
  1135  	return tf.open(ctx)
  1136  }
  1137  
  1138  // Sources retrieves the current root hash, a list of all table files (which may include appendix tablefiles),
  1139  // and a second list of only the appendix table files
  1140  func (nbs *NomsBlockStore) Sources(ctx context.Context) (hash.Hash, []TableFile, []TableFile, error) {
  1141  	nbs.mu.Lock()
  1142  	defer nbs.mu.Unlock()
  1143  
  1144  	stats := &Stats{}
  1145  	exists, contents, err := nbs.mm.m.ParseIfExists(ctx, stats, nil)
  1146  
  1147  	if err != nil {
  1148  		return hash.Hash{}, nil, nil, err
  1149  	}
  1150  
  1151  	if !exists {
  1152  		return hash.Hash{}, nil, nil, nil
  1153  	}
  1154  
  1155  	css, err := nbs.chunkSourcesByAddr()
  1156  	if err != nil {
  1157  		return hash.Hash{}, nil, nil, err
  1158  	}
  1159  
  1160  	appendixTableFiles, err := getTableFiles(css, contents, contents.NumAppendixSpecs(), func(mc manifestContents, idx int) tableSpec {
  1161  		return mc.getAppendixSpec(idx)
  1162  	})
  1163  	if err != nil {
  1164  		return hash.Hash{}, nil, nil, err
  1165  	}
  1166  
  1167  	allTableFiles, err := getTableFiles(css, contents, contents.NumTableSpecs(), func(mc manifestContents, idx int) tableSpec {
  1168  		return mc.getSpec(idx)
  1169  	})
  1170  	if err != nil {
  1171  		return hash.Hash{}, nil, nil, err
  1172  	}
  1173  
  1174  	return contents.GetRoot(), allTableFiles, appendixTableFiles, nil
  1175  }
  1176  
  1177  func getTableFiles(css map[addr]chunkSource, contents manifestContents, numSpecs int, specFunc func(mc manifestContents, idx int) tableSpec) ([]TableFile, error) {
  1178  	tableFiles := make([]TableFile, 0)
  1179  	if numSpecs == 0 {
  1180  		return tableFiles, nil
  1181  	}
  1182  	for i := 0; i < numSpecs; i++ {
  1183  		info := specFunc(contents, i)
  1184  		cs, ok := css[info.name]
  1185  		if !ok {
  1186  			return nil, ErrSpecWithoutChunkSource
  1187  		}
  1188  		tableFiles = append(tableFiles, newTableFile(cs, info))
  1189  	}
  1190  	return tableFiles, nil
  1191  }
  1192  
  1193  func newTableFile(cs chunkSource, info tableSpec) tableFile {
  1194  	return tableFile{
  1195  		info: info,
  1196  		open: func(ctx context.Context) (io.ReadCloser, error) {
  1197  			r, err := cs.reader(ctx)
  1198  			if err != nil {
  1199  				return nil, err
  1200  			}
  1201  
  1202  			return ioutil.NopCloser(r), nil
  1203  		},
  1204  	}
  1205  }
  1206  
  1207  func (nbs *NomsBlockStore) Size(ctx context.Context) (uint64, error) {
  1208  	nbs.mu.Lock()
  1209  	defer nbs.mu.Unlock()
  1210  
  1211  	stats := &Stats{}
  1212  	exists, contents, err := nbs.mm.m.ParseIfExists(ctx, stats, nil)
  1213  
  1214  	if err != nil {
  1215  		return uint64(0), err
  1216  	}
  1217  
  1218  	if !exists {
  1219  		return uint64(0), nil
  1220  	}
  1221  
  1222  	css, err := nbs.chunkSourcesByAddr()
  1223  	if err != nil {
  1224  		return uint64(0), err
  1225  	}
  1226  
  1227  	numSpecs := contents.NumTableSpecs()
  1228  
  1229  	size := uint64(0)
  1230  	for i := 0; i < numSpecs; i++ {
  1231  		info := contents.getSpec(i)
  1232  		cs, ok := css[info.name]
  1233  		if !ok {
  1234  			return uint64(0), errors.New("manifest referenced table file for which there is no chunkSource.")
  1235  		}
  1236  		ti, err := cs.index()
  1237  		if err != nil {
  1238  			return uint64(0), fmt.Errorf("error getting table file index for chunkSource. %w", err)
  1239  		}
  1240  		size += ti.TableFileSize()
  1241  	}
  1242  	return size, nil
  1243  }
  1244  
  1245  func (nbs *NomsBlockStore) chunkSourcesByAddr() (map[addr]chunkSource, error) {
  1246  	css := make(map[addr]chunkSource, len(nbs.tables.upstream)+len(nbs.tables.novel))
  1247  	for _, cs := range nbs.tables.upstream {
  1248  		a, err := cs.hash()
  1249  		if err != nil {
  1250  			return nil, err
  1251  		}
  1252  		css[a] = cs
  1253  	}
  1254  	for _, cs := range nbs.tables.novel {
  1255  		a, err := cs.hash()
  1256  		if err != nil {
  1257  			return nil, err
  1258  		}
  1259  		css[a] = cs
  1260  	}
  1261  	return css, nil
  1262  
  1263  }
  1264  
  1265  func (nbs *NomsBlockStore) SupportedOperations() TableFileStoreOps {
  1266  	_, ok := nbs.p.(*fsTablePersister)
  1267  	return TableFileStoreOps{
  1268  		CanRead:  true,
  1269  		CanWrite: ok,
  1270  		CanPrune: ok,
  1271  		CanGC:    ok,
  1272  	}
  1273  }
  1274  
  1275  // WriteTableFile will read a table file from the provided reader and write it to the TableFileStore
  1276  func (nbs *NomsBlockStore) WriteTableFile(ctx context.Context, fileId string, numChunks int, rd io.Reader, contentLength uint64, contentHash []byte) error {
  1277  	fsPersister, ok := nbs.p.(*fsTablePersister)
  1278  
  1279  	if !ok {
  1280  		return errors.New("Not implemented")
  1281  	}
  1282  
  1283  	path := filepath.Join(fsPersister.dir, fileId)
  1284  
  1285  	err := func() (err error) {
  1286  		var f *os.File
  1287  		f, err = os.OpenFile(path, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, os.ModePerm)
  1288  
  1289  		if err != nil {
  1290  			return err
  1291  		}
  1292  
  1293  		defer func() {
  1294  			closeErr := f.Close()
  1295  
  1296  			if err == nil {
  1297  				err = closeErr
  1298  			}
  1299  		}()
  1300  
  1301  		return writeTo(f, rd, copyTableFileBufferSize)
  1302  	}()
  1303  
  1304  	if err != nil {
  1305  		return err
  1306  	}
  1307  
  1308  	fileIdHash, ok := hash.MaybeParse(fileId)
  1309  
  1310  	if !ok {
  1311  		return errors.New("invalid base32 encoded hash: " + fileId)
  1312  	}
  1313  
  1314  	_, err = nbs.UpdateManifest(ctx, map[hash.Hash]uint32{fileIdHash: uint32(numChunks)})
  1315  
  1316  	return err
  1317  }
  1318  
  1319  func writeTo(wr io.Writer, rd io.Reader, bufferSize uint32) error {
  1320  	buf := make([]byte, bufferSize)
  1321  
  1322  	for {
  1323  		// can return bytes and an io.EOF
  1324  		n, err := rd.Read(buf)
  1325  
  1326  		if err != nil && err != io.EOF {
  1327  			return err
  1328  		}
  1329  
  1330  		pos := 0
  1331  		for pos < n {
  1332  			n, wrErr := wr.Write(buf[pos:n])
  1333  
  1334  			if wrErr != nil {
  1335  				return wrErr
  1336  			}
  1337  
  1338  			pos += n
  1339  		}
  1340  
  1341  		if err == io.EOF {
  1342  			break
  1343  		}
  1344  	}
  1345  
  1346  	return nil
  1347  }
  1348  
  1349  // PruneTableFiles deletes old table files that are no longer referenced in the manifest.
  1350  func (nbs *NomsBlockStore) PruneTableFiles(ctx context.Context) (err error) {
  1351  	nbs.mu.Lock()
  1352  	defer nbs.mu.Unlock()
  1353  
  1354  	nbs.mm.LockForUpdate()
  1355  	defer func() {
  1356  		unlockErr := nbs.mm.UnlockForUpdate()
  1357  
  1358  		if err == nil {
  1359  			err = unlockErr
  1360  		}
  1361  	}()
  1362  
  1363  	for {
  1364  		// flush all tables and update manifest
  1365  		err = nbs.updateManifest(ctx, nbs.upstream.root, nbs.upstream.root)
  1366  
  1367  		if err == nil {
  1368  			break
  1369  		} else if err == errOptimisticLockFailedTables {
  1370  			continue
  1371  		} else {
  1372  			return err
  1373  		}
  1374  
  1375  		// Same behavior as Commit
  1376  		// infinitely retries without backoff in the case off errOptimisticLockFailedTables
  1377  	}
  1378  
  1379  	ok, contents, err := nbs.mm.Fetch(ctx, &Stats{})
  1380  	if err != nil {
  1381  		return err
  1382  	}
  1383  	if !ok {
  1384  		return nil // no manifest exists
  1385  	}
  1386  
  1387  	return nbs.p.PruneTableFiles(ctx, contents)
  1388  }
  1389  
  1390  func (nbs *NomsBlockStore) MarkAndSweepChunks(ctx context.Context, last hash.Hash, keepChunks <-chan []hash.Hash) error {
  1391  	ops := nbs.SupportedOperations()
  1392  	if !ops.CanGC || !ops.CanPrune {
  1393  		return chunks.ErrUnsupportedOperation
  1394  	}
  1395  
  1396  	if nbs.upstream.root != last {
  1397  		return errLastRootMismatch
  1398  	}
  1399  
  1400  	specs, err := nbs.copyMarkedChunks(ctx, keepChunks)
  1401  	if err != nil {
  1402  		return err
  1403  	}
  1404  	if ctx.Err() != nil {
  1405  		return ctx.Err()
  1406  	}
  1407  
  1408  	err = nbs.swapTables(ctx, specs)
  1409  	if err != nil {
  1410  		return err
  1411  	}
  1412  	if ctx.Err() != nil {
  1413  		return ctx.Err()
  1414  	}
  1415  
  1416  	ok, contents, err := nbs.mm.Fetch(ctx, &Stats{})
  1417  	if err != nil {
  1418  		return err
  1419  	}
  1420  	if !ok {
  1421  		panic("no manifest")
  1422  	}
  1423  	if ctx.Err() != nil {
  1424  		return ctx.Err()
  1425  	}
  1426  
  1427  	return nbs.p.PruneTableFiles(ctx, contents)
  1428  }
  1429  
  1430  func (nbs *NomsBlockStore) copyMarkedChunks(ctx context.Context, keepChunks <-chan []hash.Hash) ([]tableSpec, error) {
  1431  	gcc, err := newGarbageCollectionCopier()
  1432  	if err != nil {
  1433  		return nil, err
  1434  	}
  1435  
  1436  LOOP:
  1437  	for {
  1438  		select {
  1439  		case hs, ok := <-keepChunks:
  1440  			if !ok {
  1441  				break LOOP
  1442  			}
  1443  			var addErr error
  1444  			mu := new(sync.Mutex)
  1445  			hashset := hash.NewHashSet(hs...)
  1446  			err := nbs.GetManyCompressed(ctx, hashset, func(c CompressedChunk) {
  1447  				mu.Lock()
  1448  				defer mu.Unlock()
  1449  				if addErr != nil {
  1450  					return
  1451  				}
  1452  				addErr = gcc.addChunk(ctx, c)
  1453  			})
  1454  			if err != nil {
  1455  				return nil, err
  1456  			}
  1457  			if addErr != nil {
  1458  				return nil, addErr
  1459  			}
  1460  		case <-ctx.Done():
  1461  			return nil, ctx.Err()
  1462  		}
  1463  	}
  1464  
  1465  	nomsDir := nbs.p.(*fsTablePersister).dir
  1466  
  1467  	return gcc.copyTablesToDir(ctx, nomsDir)
  1468  }
  1469  
  1470  // todo: what's the optimal table size to copy to?
  1471  func (nbs *NomsBlockStore) gcTableSize() (uint64, error) {
  1472  	total, err := nbs.tables.physicalLen()
  1473  
  1474  	if err != nil {
  1475  		return 0, err
  1476  	}
  1477  
  1478  	avgTableSize := total / uint64(nbs.tables.Upstream()+nbs.tables.Novel()+1)
  1479  
  1480  	// max(avgTableSize, defaultMemTableSize)
  1481  	if avgTableSize > nbs.mtSize {
  1482  		return avgTableSize, nil
  1483  	}
  1484  	return nbs.mtSize, nil
  1485  }
  1486  
  1487  func (nbs *NomsBlockStore) swapTables(ctx context.Context, specs []tableSpec) error {
  1488  	newLock := generateLockHash(nbs.upstream.root, specs)
  1489  	newContents := manifestContents{
  1490  		vers:  nbs.upstream.vers,
  1491  		root:  nbs.upstream.root,
  1492  		lock:  newLock,
  1493  		gcGen: newLock,
  1494  		specs: specs,
  1495  	}
  1496  
  1497  	var err error
  1498  	nbs.mm.LockForUpdate()
  1499  	defer func() {
  1500  		unlockErr := nbs.mm.UnlockForUpdate()
  1501  
  1502  		if err == nil {
  1503  			err = unlockErr
  1504  		}
  1505  	}()
  1506  
  1507  	upstream, err := nbs.mm.UpdateGCGen(ctx, nbs.upstream.lock, newContents, nbs.stats, nil)
  1508  	if err != nil {
  1509  		return err
  1510  	}
  1511  
  1512  	// clear memTable
  1513  	nbs.mt = newMemTable(nbs.mtSize)
  1514  
  1515  	// clear nbs.tables.novel
  1516  	nbs.tables, err = nbs.tables.Flatten()
  1517  
  1518  	if err != nil {
  1519  		return nil
  1520  	}
  1521  
  1522  	// replace nbs.tables.upstream with gc compacted tables
  1523  	nbs.upstream = upstream
  1524  	nbs.tables, err = nbs.tables.Rebase(ctx, specs, nbs.stats)
  1525  
  1526  	if err != nil {
  1527  		return err
  1528  	}
  1529  
  1530  	return nil
  1531  }
  1532  
  1533  // SetRootChunk changes the root chunk hash from the previous value to the new root.
  1534  func (nbs *NomsBlockStore) SetRootChunk(ctx context.Context, root, previous hash.Hash) error {
  1535  	nbs.mu.Lock()
  1536  	defer nbs.mu.Unlock()
  1537  	for {
  1538  		err := nbs.updateManifest(ctx, root, previous)
  1539  
  1540  		if err == nil {
  1541  			return nil
  1542  		} else if err == errOptimisticLockFailedTables {
  1543  			continue
  1544  		} else {
  1545  			return err
  1546  		}
  1547  
  1548  		// Same behavior as Commit
  1549  		// I guess this thing infinitely retries without backoff in the case off errOptimisticLockFailedTables
  1550  	}
  1551  }