github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/nbs/journal_chunk_source.go (about)

     1  // Copyright 2022 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nbs
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"sort"
    23  	"sync"
    24  
    25  	"golang.org/x/sync/errgroup"
    26  
    27  	"github.com/dolthub/dolt/go/store/chunks"
    28  	"github.com/dolthub/dolt/go/store/hash"
    29  )
    30  
    31  // journalChunkSource is a chunkSource that reads chunks
    32  // from a ChunkJournal. Unlike other NBS chunkSources,
    33  // it is not immutable and its set of chunks grows as
    34  // more commits are made to the ChunkJournal.
    35  type journalChunkSource struct {
    36  	journal *journalWriter
    37  }
    38  
    39  var _ chunkSource = journalChunkSource{}
    40  
    41  func (s journalChunkSource) has(h hash.Hash) (bool, error) {
    42  	return s.journal.hasAddr(h), nil
    43  }
    44  
    45  func (s journalChunkSource) hasMany(addrs []hasRecord) (missing bool, err error) {
    46  	for i := range addrs {
    47  		ok := s.journal.hasAddr(*addrs[i].a)
    48  		if ok {
    49  			addrs[i].has = true
    50  		} else {
    51  			missing = true
    52  		}
    53  	}
    54  	return
    55  }
    56  
    57  func (s journalChunkSource) getCompressed(_ context.Context, h hash.Hash, _ *Stats) (CompressedChunk, error) {
    58  	return s.journal.getCompressedChunk(h)
    59  }
    60  
    61  func (s journalChunkSource) get(_ context.Context, h hash.Hash, _ *Stats) ([]byte, error) {
    62  	cc, err := s.journal.getCompressedChunk(h)
    63  	if err != nil {
    64  		return nil, err
    65  	} else if cc.IsEmpty() {
    66  		return nil, nil
    67  	}
    68  	ch, err := cc.ToChunk()
    69  	if err != nil {
    70  		return nil, err
    71  	}
    72  	return ch.Data(), nil
    73  }
    74  
    75  type journalRecord struct {
    76  	// r is the journal range for this chunk
    77  	r Range
    78  	// idx is the array offset into the shared |reqs|
    79  	idx int
    80  }
    81  
    82  func (s journalChunkSource) getMany(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, *chunks.Chunk), stats *Stats) (bool, error) {
    83  	return s.getManyCompressed(ctx, eg, reqs, func(ctx context.Context, cc CompressedChunk) {
    84  		ch, err := cc.ToChunk()
    85  		if err != nil {
    86  			eg.Go(func() error {
    87  				return err
    88  			})
    89  			return
    90  		}
    91  		chWHash := chunks.NewChunkWithHash(cc.Hash(), ch.Data())
    92  		found(ctx, &chWHash)
    93  	}, stats)
    94  }
    95  
    96  // getManyCompressed implements chunkReader. Here we (1) synchronously check
    97  // the journal index for read ranges, (2) record if the source misses any
    98  // needed remaining chunks, (3) sort the lookups for efficient disk access,
    99  // and then (4) asynchronously perform reads. We release the journal read
   100  // lock after returning when all reads are completed, which can be after the
   101  // function returns.
   102  func (s journalChunkSource) getManyCompressed(ctx context.Context, eg *errgroup.Group, reqs []getRecord, found func(context.Context, CompressedChunk), stats *Stats) (bool, error) {
   103  	var remaining bool
   104  	var jReqs []journalRecord
   105  	var wg sync.WaitGroup
   106  	s.journal.lock.RLock()
   107  	for i, r := range reqs {
   108  		if r.found {
   109  			continue
   110  		}
   111  		rang, ok := s.journal.ranges.get(*r.a)
   112  		if !ok {
   113  			remaining = true
   114  			continue
   115  		}
   116  		jReqs = append(jReqs, journalRecord{r: rang, idx: i})
   117  		reqs[i].found = true
   118  	}
   119  
   120  	// sort chunks by journal locality
   121  	sort.Slice(jReqs, func(i, j int) bool {
   122  		return jReqs[i].r.Offset < jReqs[j].r.Offset
   123  	})
   124  
   125  	for i := range jReqs {
   126  		// workers populate the parent error group
   127  		// record local workers for releasing lock
   128  		wg.Add(1)
   129  		eg.Go(func() error {
   130  			defer wg.Done()
   131  			rec := jReqs[i]
   132  			a := reqs[rec.idx].a
   133  			if cc, err := s.journal.getCompressedChunkAtRange(rec.r, *a); err != nil {
   134  				return err
   135  			} else if cc.IsEmpty() {
   136  				return errors.New("chunk in journal index was empty.")
   137  			} else {
   138  				found(ctx, cc)
   139  				return nil
   140  			}
   141  		})
   142  	}
   143  	go func() {
   144  		wg.Wait()
   145  		s.journal.lock.RUnlock()
   146  	}()
   147  	return remaining, nil
   148  }
   149  
   150  func (s journalChunkSource) count() (uint32, error) {
   151  	return s.journal.recordCount(), nil
   152  }
   153  
   154  func (s journalChunkSource) uncompressedLen() (uint64, error) {
   155  	return s.journal.uncompressedSize(), nil
   156  }
   157  
   158  func (s journalChunkSource) hash() hash.Hash {
   159  	return journalAddr
   160  }
   161  
   162  // reader implements chunkSource.
   163  func (s journalChunkSource) reader(context.Context) (io.ReadCloser, uint64, error) {
   164  	rdr, sz, err := s.journal.snapshot()
   165  	return rdr, uint64(sz), err
   166  }
   167  
   168  func (s journalChunkSource) getRecordRanges(requests []getRecord) (map[hash.Hash]Range, error) {
   169  	ranges := make(map[hash.Hash]Range, len(requests))
   170  	for _, req := range requests {
   171  		if req.found {
   172  			continue
   173  		}
   174  		rng, ok, err := s.journal.getRange(*req.a)
   175  		if err != nil {
   176  			return nil, err
   177  		} else if !ok {
   178  			continue
   179  		}
   180  		req.found = true // update |requests|
   181  		ranges[hash.Hash(*req.a)] = rng
   182  	}
   183  	return ranges, nil
   184  }
   185  
   186  // size implements chunkSource.
   187  // size returns the total size of the chunkSource: chunks, index, and footer
   188  func (s journalChunkSource) currentSize() uint64 {
   189  	return uint64(s.journal.currentSize())
   190  }
   191  
   192  // index implements chunkSource.
   193  func (s journalChunkSource) index() (tableIndex, error) {
   194  	return nil, fmt.Errorf("journalChunkSource cannot be conjoined")
   195  }
   196  
   197  func (s journalChunkSource) clone() (chunkSource, error) {
   198  	return s, nil
   199  }
   200  
   201  func (s journalChunkSource) close() error {
   202  	// |s.journal| closed via ChunkJournal
   203  	return nil
   204  }
   205  
   206  func equalSpecs(left, right []tableSpec) bool {
   207  	if len(left) != len(right) {
   208  		return false
   209  	}
   210  	l := make(map[hash.Hash]struct{}, len(left))
   211  	for _, s := range left {
   212  		l[s.name] = struct{}{}
   213  	}
   214  	for _, s := range right {
   215  		if _, ok := l[s.name]; !ok {
   216  			return false
   217  		}
   218  	}
   219  	return true
   220  }