github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/nbs/table_set.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package nbs
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"sync"
    28  	"sync/atomic"
    29  
    30  	"golang.org/x/sync/errgroup"
    31  
    32  	"github.com/dolthub/dolt/go/store/atomicerr"
    33  	"github.com/dolthub/dolt/go/store/chunks"
    34  )
    35  
    36  const concurrentCompactions = 5
    37  
    38  func newTableSet(persister tablePersister) tableSet {
    39  	return tableSet{p: persister, rl: make(chan struct{}, concurrentCompactions)}
    40  }
    41  
    42  // tableSet is an immutable set of persistable chunkSources.
    43  type tableSet struct {
    44  	novel, upstream chunkSources
    45  	p               tablePersister
    46  	rl              chan struct{}
    47  }
    48  
    49  func (ts tableSet) has(h addr) (bool, error) {
    50  	f := func(css chunkSources) (bool, error) {
    51  		for _, haver := range css {
    52  			has, err := haver.has(h)
    53  
    54  			if err != nil {
    55  				return false, err
    56  			}
    57  
    58  			if has {
    59  				return true, nil
    60  			}
    61  		}
    62  		return false, nil
    63  	}
    64  
    65  	novelHas, err := f(ts.novel)
    66  
    67  	if err != nil {
    68  		return false, err
    69  	}
    70  
    71  	if novelHas {
    72  		return true, nil
    73  	}
    74  
    75  	return f(ts.upstream)
    76  }
    77  
    78  func (ts tableSet) hasMany(addrs []hasRecord) (bool, error) {
    79  	f := func(css chunkSources) (bool, error) {
    80  		for _, haver := range css {
    81  			has, err := haver.hasMany(addrs)
    82  
    83  			if err != nil {
    84  				return false, err
    85  			}
    86  
    87  			if !has {
    88  				return false, nil
    89  			}
    90  		}
    91  		return true, nil
    92  	}
    93  	remaining, err := f(ts.novel)
    94  
    95  	if err != nil {
    96  		return false, err
    97  	}
    98  
    99  	if !remaining {
   100  		return false, nil
   101  	}
   102  
   103  	return f(ts.upstream)
   104  }
   105  
   106  func (ts tableSet) get(ctx context.Context, h addr, stats *Stats) ([]byte, error) {
   107  	f := func(css chunkSources) ([]byte, error) {
   108  		for _, haver := range css {
   109  			data, err := haver.get(ctx, h, stats)
   110  
   111  			if err != nil {
   112  				return nil, err
   113  			}
   114  
   115  			if data != nil {
   116  				return data, nil
   117  			}
   118  		}
   119  
   120  		return nil, nil
   121  	}
   122  
   123  	data, err := f(ts.novel)
   124  
   125  	if err != nil {
   126  		return nil, err
   127  	}
   128  
   129  	if data != nil {
   130  		return data, nil
   131  	}
   132  
   133  	return f(ts.upstream)
   134  }
   135  
   136  func (ts tableSet) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(*chunks.Chunk), stats *Stats) (remaining bool, err error) {
   137  	f := func(css chunkSources) bool {
   138  		for _, haver := range css {
   139  			if rp, ok := haver.(chunkReadPlanner); ok {
   140  				offsets, remaining := rp.findOffsets(reqs)
   141  				err = rp.getManyAtOffsets(ctx, eg, offsets, found, stats)
   142  				if err != nil {
   143  					return true
   144  				}
   145  				if !remaining {
   146  					return false
   147  				}
   148  				continue
   149  			}
   150  			remaining, err = haver.getMany(ctx, eg, reqs, found, stats)
   151  			if err != nil {
   152  				return true
   153  			}
   154  			if !remaining {
   155  				return false
   156  			}
   157  		}
   158  		return true
   159  	}
   160  
   161  	return f(ts.novel) && err == nil && f(ts.upstream), err
   162  }
   163  
   164  func (ts tableSet) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(CompressedChunk), stats *Stats) (remaining bool, err error) {
   165  	f := func(css chunkSources) bool {
   166  		for _, haver := range css {
   167  			if rp, ok := haver.(chunkReadPlanner); ok {
   168  				offsets, remaining := rp.findOffsets(reqs)
   169  				if len(offsets) > 0 {
   170  					err = rp.getManyCompressedAtOffsets(ctx, eg, offsets, found, stats)
   171  					if err != nil {
   172  						return true
   173  					}
   174  				}
   175  
   176  				if !remaining {
   177  					return false
   178  				}
   179  
   180  				continue
   181  			}
   182  
   183  			remaining, err = haver.getManyCompressed(ctx, eg, reqs, found, stats)
   184  			if err != nil {
   185  				return true
   186  			}
   187  			if !remaining {
   188  				return false
   189  			}
   190  		}
   191  
   192  		return true
   193  	}
   194  
   195  	return f(ts.novel) && err == nil && f(ts.upstream), err
   196  }
   197  
   198  func (ts tableSet) calcReads(reqs []getRecord, blockSize uint64) (reads int, split, remaining bool, err error) {
   199  	f := func(css chunkSources) (int, bool, bool, error) {
   200  		reads, split := 0, false
   201  		for _, haver := range css {
   202  			rds, rmn, err := haver.calcReads(reqs, blockSize)
   203  
   204  			if err != nil {
   205  				return 0, false, false, err
   206  			}
   207  
   208  			reads += rds
   209  			if !rmn {
   210  				return reads, split, false, nil
   211  			}
   212  			split = true
   213  		}
   214  		return reads, split, true, nil
   215  	}
   216  	reads, split, remaining, err = f(ts.novel)
   217  
   218  	if err != nil {
   219  		return 0, false, false, err
   220  	}
   221  
   222  	if remaining {
   223  		var rds int
   224  		rds, split, remaining, err = f(ts.upstream)
   225  
   226  		if err != nil {
   227  			return 0, false, false, err
   228  		}
   229  
   230  		reads += rds
   231  	}
   232  
   233  	return reads, split, remaining, nil
   234  }
   235  
   236  func (ts tableSet) count() (uint32, error) {
   237  	f := func(css chunkSources) (count uint32, err error) {
   238  		for _, haver := range css {
   239  			thisCount, err := haver.count()
   240  
   241  			if err != nil {
   242  				return 0, err
   243  			}
   244  
   245  			count += thisCount
   246  		}
   247  		return
   248  	}
   249  
   250  	novelCount, err := f(ts.novel)
   251  
   252  	if err != nil {
   253  		return 0, err
   254  	}
   255  
   256  	upCount, err := f(ts.upstream)
   257  
   258  	if err != nil {
   259  		return 0, err
   260  	}
   261  
   262  	return novelCount + upCount, nil
   263  }
   264  
   265  func (ts tableSet) uncompressedLen() (uint64, error) {
   266  	f := func(css chunkSources) (data uint64, err error) {
   267  		for _, haver := range css {
   268  			uncmpLen, err := haver.uncompressedLen()
   269  
   270  			if err != nil {
   271  				return 0, err
   272  			}
   273  
   274  			data += uncmpLen
   275  		}
   276  		return
   277  	}
   278  
   279  	novelCount, err := f(ts.novel)
   280  
   281  	if err != nil {
   282  		return 0, err
   283  	}
   284  
   285  	upCount, err := f(ts.upstream)
   286  
   287  	if err != nil {
   288  		return 0, err
   289  	}
   290  
   291  	return novelCount + upCount, nil
   292  }
   293  
   294  func (ts tableSet) physicalLen() (uint64, error) {
   295  	f := func(css chunkSources) (data uint64, err error) {
   296  		for _, haver := range css {
   297  			index, err := haver.index()
   298  			if err != nil {
   299  				return 0, err
   300  			}
   301  			data += index.TableFileSize()
   302  		}
   303  		return
   304  	}
   305  
   306  	lenNovel, err := f(ts.novel)
   307  	if err != nil {
   308  		return 0, err
   309  	}
   310  
   311  	lenUp, err := f(ts.upstream)
   312  	if err != nil {
   313  		return 0, err
   314  	}
   315  
   316  	return lenNovel + lenUp, nil
   317  }
   318  
   319  func (ts tableSet) Close() error {
   320  	var firstErr error
   321  	for _, t := range ts.novel {
   322  		err := t.Close()
   323  		if err != nil && firstErr == nil {
   324  			firstErr = err
   325  		}
   326  	}
   327  	for _, t := range ts.upstream {
   328  		err := t.Close()
   329  		if err != nil && firstErr == nil {
   330  			firstErr = err
   331  		}
   332  	}
   333  	return firstErr
   334  }
   335  
   336  // Size returns the number of tables in this tableSet.
   337  func (ts tableSet) Size() int {
   338  	return len(ts.novel) + len(ts.upstream)
   339  }
   340  
   341  // Novel returns the number of tables containing novel chunks in this
   342  // tableSet.
   343  func (ts tableSet) Novel() int {
   344  	return len(ts.novel)
   345  }
   346  
   347  // Upstream returns the number of known-persisted tables in this tableSet.
   348  func (ts tableSet) Upstream() int {
   349  	return len(ts.upstream)
   350  }
   351  
   352  // Prepend adds a memTable to an existing tableSet, compacting |mt| and
   353  // returning a new tableSet with newly compacted table added.
   354  func (ts tableSet) Prepend(ctx context.Context, mt *memTable, stats *Stats) tableSet {
   355  	newTs := tableSet{
   356  		novel:    make(chunkSources, len(ts.novel)+1),
   357  		upstream: make(chunkSources, len(ts.upstream)),
   358  		p:        ts.p,
   359  		rl:       ts.rl,
   360  	}
   361  	newTs.novel[0] = newPersistingChunkSource(ctx, mt, ts, ts.p, ts.rl, stats)
   362  	copy(newTs.novel[1:], ts.novel)
   363  	copy(newTs.upstream, ts.upstream)
   364  	return newTs
   365  }
   366  
   367  func (ts tableSet) extract(ctx context.Context, chunks chan<- extractRecord) error {
   368  	// Since new tables are _prepended_ to a tableSet, extracting chunks in insertOrder requires iterating ts.upstream back to front, followed by ts.novel.
   369  	for i := len(ts.upstream) - 1; i >= 0; i-- {
   370  		err := ts.upstream[i].extract(ctx, chunks)
   371  
   372  		if err != nil {
   373  			return err
   374  		}
   375  	}
   376  	for i := len(ts.novel) - 1; i >= 0; i-- {
   377  		err := ts.novel[i].extract(ctx, chunks)
   378  
   379  		if err != nil {
   380  			return err
   381  		}
   382  	}
   383  
   384  	return nil
   385  }
   386  
   387  // Flatten returns a new tableSet with |upstream| set to the union of ts.novel
   388  // and ts.upstream.
   389  func (ts tableSet) Flatten() (tableSet, error) {
   390  	flattened := tableSet{
   391  		upstream: make(chunkSources, 0, ts.Size()),
   392  		p:        ts.p,
   393  		rl:       ts.rl,
   394  	}
   395  
   396  	for _, src := range ts.novel {
   397  		cnt, err := src.count()
   398  
   399  		if err != nil {
   400  			return tableSet{}, err
   401  		}
   402  
   403  		if cnt > 0 {
   404  			flattened.upstream = append(flattened.upstream, src)
   405  		}
   406  	}
   407  
   408  	flattened.upstream = append(flattened.upstream, ts.upstream...)
   409  	return flattened, nil
   410  }
   411  
   412  // Rebase returns a new tableSet holding the novel tables managed by |ts| and
   413  // those specified by |specs|.
   414  func (ts tableSet) Rebase(ctx context.Context, specs []tableSpec, stats *Stats) (tableSet, error) {
   415  	merged := tableSet{
   416  		novel:    make(chunkSources, 0, len(ts.novel)),
   417  		upstream: make(chunkSources, 0, len(specs)),
   418  		p:        ts.p,
   419  		rl:       ts.rl,
   420  	}
   421  
   422  	// Rebase the novel tables, skipping those that are actually empty (usually due to de-duping during table compaction)
   423  	for _, t := range ts.novel {
   424  		cnt, err := t.count()
   425  
   426  		if err != nil {
   427  			return tableSet{}, err
   428  		}
   429  
   430  		if cnt > 0 {
   431  			merged.novel = append(merged.novel, t.Clone())
   432  		}
   433  	}
   434  
   435  	// Create a list of tables to open so we can open them in parallel.
   436  	tablesToOpen := []tableSpec{} // keep specs in order to play nicely with manifest appendix optimization
   437  	presents := map[addr]tableSpec{}
   438  	for _, spec := range specs {
   439  		if _, present := presents[spec.name]; !present { // Filter out dups
   440  			tablesToOpen = append(tablesToOpen, spec)
   441  			presents[spec.name] = spec
   442  		}
   443  	}
   444  
   445  	// Open all the new upstream tables concurrently
   446  	var rp atomic.Value
   447  	ae := atomicerr.New()
   448  	merged.upstream = make(chunkSources, len(tablesToOpen))
   449  	wg := &sync.WaitGroup{}
   450  	wg.Add(len(tablesToOpen))
   451  	for i, spec := range tablesToOpen {
   452  		go func(idx int, spec tableSpec) {
   453  			defer wg.Done()
   454  			defer func() {
   455  				if r := recover(); r != nil {
   456  					rp.Store(r)
   457  				}
   458  			}()
   459  			if !ae.IsSet() {
   460  				var err error
   461  				for _, existing := range ts.upstream {
   462  					h, err := existing.hash()
   463  					if err != nil {
   464  						ae.SetIfError(err)
   465  						return
   466  					}
   467  					if spec.name == h {
   468  						merged.upstream[idx] = existing.Clone()
   469  						return
   470  					}
   471  				}
   472  				merged.upstream[idx], err = ts.p.Open(ctx, spec.name, spec.chunkCount, stats)
   473  				ae.SetIfError(err)
   474  			}
   475  		}(i, spec)
   476  	}
   477  	wg.Wait()
   478  
   479  	if r := rp.Load(); r != nil {
   480  		panic(r)
   481  	}
   482  
   483  	if err := ae.Get(); err != nil {
   484  		return tableSet{}, err
   485  	}
   486  
   487  	return merged, nil
   488  }
   489  
   490  func (ts tableSet) ToSpecs() ([]tableSpec, error) {
   491  	tableSpecs := make([]tableSpec, 0, ts.Size())
   492  	for _, src := range ts.novel {
   493  		cnt, err := src.count()
   494  
   495  		if err != nil {
   496  			return nil, err
   497  		}
   498  
   499  		if cnt > 0 {
   500  			h, err := src.hash()
   501  
   502  			if err != nil {
   503  				return nil, err
   504  			}
   505  
   506  			tableSpecs = append(tableSpecs, tableSpec{h, cnt})
   507  		}
   508  	}
   509  	for _, src := range ts.upstream {
   510  		cnt, err := src.count()
   511  
   512  		if err != nil {
   513  			return nil, err
   514  		}
   515  
   516  		if cnt <= 0 {
   517  			return nil, errors.New("no upstream chunks")
   518  		}
   519  
   520  		h, err := src.hash()
   521  
   522  		if err != nil {
   523  			return nil, err
   524  		}
   525  
   526  		tableSpecs = append(tableSpecs, tableSpec{h, cnt})
   527  	}
   528  	return tableSpecs, nil
   529  }