github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/datas/pull/pull_chunk_fetcher.go (about)

     1  // Copyright 2024 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pull
    16  
    17  import (
    18  	"context"
    19  	"io"
    20  	"sync"
    21  
    22  	"golang.org/x/sync/errgroup"
    23  
    24  	"github.com/dolthub/dolt/go/store/hash"
    25  	"github.com/dolthub/dolt/go/store/nbs"
    26  )
    27  
    28  type GetManyer interface {
    29  	GetManyCompressed(ctx context.Context, hashes hash.HashSet, found func(context.Context, nbs.CompressedChunk)) error
    30  }
    31  
    32  type ChunkFetcherable interface {
    33  	ChunkFetcher(ctx context.Context) nbs.ChunkFetcher
    34  }
    35  
    36  func GetChunkFetcher(ctx context.Context, cs GetManyer) nbs.ChunkFetcher {
    37  	if fable, ok := cs.(ChunkFetcherable); ok {
    38  		return fable.ChunkFetcher(ctx)
    39  	}
    40  	return NewPullChunkFetcher(ctx, cs)
    41  }
    42  
    43  // A PullChunkFetcher is a simple implementation of |ChunkFetcher| based on
    44  // calling GetManyCompressed.
    45  //
    46  // It only has one outstanding GetManyCompressed call at a time.
    47  type PullChunkFetcher struct {
    48  	ctx context.Context
    49  	eg  *errgroup.Group
    50  
    51  	getter GetManyer
    52  
    53  	batchCh chan hash.HashSet
    54  	doneCh  chan struct{}
    55  	resCh   chan nbs.CompressedChunk
    56  }
    57  
    58  func NewPullChunkFetcher(ctx context.Context, getter GetManyer) *PullChunkFetcher {
    59  	eg, ctx := errgroup.WithContext(ctx)
    60  	ret := &PullChunkFetcher{
    61  		ctx:     ctx,
    62  		eg:      eg,
    63  		getter:  getter,
    64  		batchCh: make(chan hash.HashSet),
    65  		doneCh:  make(chan struct{}),
    66  		resCh:   make(chan nbs.CompressedChunk),
    67  	}
    68  	ret.eg.Go(func() error {
    69  		return ret.fetcherThread(func() {
    70  			close(ret.resCh)
    71  		})
    72  	})
    73  	return ret
    74  }
    75  
    76  func (f *PullChunkFetcher) fetcherThread(finalize func()) error {
    77  	for {
    78  		select {
    79  		case batch, ok := <-f.batchCh:
    80  			if !ok {
    81  				finalize()
    82  				return nil
    83  			}
    84  
    85  			var mu sync.Mutex
    86  			missing := batch.Copy()
    87  
    88  			// Blocking get, no concurrency, only one fetcher.
    89  			err := f.getter.GetManyCompressed(f.ctx, batch, func(ctx context.Context, chk nbs.CompressedChunk) {
    90  				mu.Lock()
    91  				missing.Remove(chk.H)
    92  				mu.Unlock()
    93  				select {
    94  				case <-ctx.Done():
    95  				case <-f.ctx.Done():
    96  				case f.resCh <- chk:
    97  				case <-f.doneCh:
    98  				}
    99  			})
   100  			if err != nil {
   101  				return err
   102  			}
   103  
   104  			for h := range missing {
   105  				select {
   106  				case <-f.ctx.Done():
   107  					return context.Cause(f.ctx)
   108  				case f.resCh <- nbs.CompressedChunk{H: h}:
   109  				case <-f.doneCh:
   110  					return nil
   111  				}
   112  			}
   113  		case <-f.ctx.Done():
   114  			return context.Cause(f.ctx)
   115  		case <-f.doneCh:
   116  			return nil
   117  		}
   118  	}
   119  }
   120  
   121  func (f *PullChunkFetcher) Get(ctx context.Context, hashes hash.HashSet) error {
   122  	select {
   123  	case f.batchCh <- hashes:
   124  		return nil
   125  	case <-ctx.Done():
   126  		return context.Cause(ctx)
   127  	case <-f.ctx.Done():
   128  		return context.Cause(f.ctx)
   129  	}
   130  }
   131  
   132  func (f *PullChunkFetcher) CloseSend() error {
   133  	close(f.batchCh)
   134  	return nil
   135  }
   136  
   137  func (f *PullChunkFetcher) Close() error {
   138  	close(f.doneCh)
   139  	return f.eg.Wait()
   140  }
   141  
   142  func (f *PullChunkFetcher) Recv(ctx context.Context) (nbs.CompressedChunk, error) {
   143  	select {
   144  	case res, ok := <-f.resCh:
   145  		if !ok {
   146  			return nbs.CompressedChunk{}, io.EOF
   147  		}
   148  		return res, nil
   149  	case <-ctx.Done():
   150  		return nbs.CompressedChunk{}, context.Cause(ctx)
   151  	case <-f.ctx.Done():
   152  		return nbs.CompressedChunk{}, context.Cause(f.ctx)
   153  	}
   154  }