github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/internal/download/batch.go (about)

     1  // Copyright 2021 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //    https://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package download contains a library for downloading data from logs.
    16  package download
    17  
    18  import (
    19  	"context"
    20  	"errors"
    21  	"fmt"
    22  	"time"
    23  
    24  	backoff "github.com/cenkalti/backoff/v4"
    25  	"github.com/golang/glog"
    26  )
    27  
    28  // BatchFetch should be implemented to provide a mechanism to fetch a range of leaves.
    29  // It should return the number of leaves fetched, or an error if the fetch failed.
    30  type BatchFetch func(start uint64, leaves [][]byte) (uint64, error)
    31  
    32  // BulkResult combines a downloaded leaf, or the error found when trying to obtain the leaf.
    33  type BulkResult struct {
    34  	Leaf []byte
    35  	Err  error
    36  }
    37  
    38  // Bulk keeps downloading leaves starting from `first`, using the given leaf fetcher.
    39  // The number of workers and the batch size to use for each of the fetch requests are also specified.
    40  // The resulting leaves or terminal errors are returned in order over `rc`. Bulk takes ownership of `rc` and will close it when no more values will be written.
    41  // Internally this uses exponential backoff on the workers to download as fast as possible, but no faster.
    42  func Bulk(ctx context.Context, first, treeSize uint64, batchFetch BatchFetch, workers, batchSize uint, rc chan<- BulkResult) {
    43  	defer close(rc)
    44  	ctx, cancel := context.WithCancel(ctx)
    45  	defer cancel()
    46  	// Each worker gets its own unbuffered channel to make sure it can only be at most one ahead.
    47  	// This prevents lots of wasted work happening if one shard gets stuck.
    48  	rangeChans := make([]chan workerResult, workers)
    49  	increment := workers * batchSize
    50  
    51  	badAlignErr := errors.New("not aligned")
    52  	align := func() (uint64, error) {
    53  		if left := treeSize - first; left < uint64(batchSize) {
    54  			batchSize = uint(left)
    55  		}
    56  		glog.Infof("Attempting to align by making request [%d, %d)", first, first+uint64(batchSize))
    57  		leaves := make([][]byte, batchSize)
    58  		fetched, err := batchFetch(first, leaves)
    59  		if err != nil {
    60  			glog.Warningf("Failed to fetch batch: %v", err)
    61  			return 0, err
    62  		}
    63  		for i := 0; i < int(fetched); i++ {
    64  			rc <- BulkResult{
    65  				Leaf: leaves[i],
    66  				Err:  nil,
    67  			}
    68  		}
    69  		first += fetched
    70  		if fetched != uint64(batchSize) {
    71  			glog.Warningf("Received partial batch (expected %d, got %d)", batchSize, fetched)
    72  			return first, badAlignErr
    73  		}
    74  		glog.Infof("Received full batch (expected %d, got %d)", batchSize, fetched)
    75  		return first, nil
    76  	}
    77  
    78  	var err error
    79  	if first, err = align(); err != nil {
    80  		if err != badAlignErr {
    81  			glog.Errorf("Failed to align: %v", err)
    82  			return
    83  		}
    84  		// We failed to align once, but let's try a second time. If this works then
    85  		// we're aligned with how the log wants to server chunks of entries. If it
    86  		// fails then we can guess that the desired batch size is not configured well.
    87  		if first, err = align(); err != nil {
    88  			glog.Errorf("Failed to align after two attempts, consider a different batch size? %v", err)
    89  			return
    90  		}
    91  	}
    92  
    93  	for i := uint(0); i < workers; i++ {
    94  		rangeChans[i] = make(chan workerResult)
    95  		start := first + uint64(i*batchSize)
    96  		go func(i uint, start uint64) {
    97  			fetchWorker{
    98  				label:      fmt.Sprintf("worker %d", i),
    99  				start:      start,
   100  				treeSize:   treeSize,
   101  				count:      batchSize,
   102  				increment:  uint64(increment),
   103  				out:        rangeChans[i],
   104  				batchFetch: batchFetch,
   105  			}.run(ctx)
   106  		}(i, start)
   107  	}
   108  
   109  	var lastStart uint64
   110  	if treeSize > uint64(batchSize) {
   111  		lastStart = treeSize - uint64(batchSize)
   112  	}
   113  	var r workerResult
   114  	// Perpetually round-robin through the sharded ranges.
   115  	for i := 0; ; i = (i + 1) % int(workers) {
   116  		select {
   117  		case <-ctx.Done():
   118  			rc <- BulkResult{
   119  				Leaf: nil,
   120  				Err:  ctx.Err(),
   121  			}
   122  			return
   123  		case r = <-rangeChans[i]:
   124  		}
   125  		if r.err != nil {
   126  			rc <- BulkResult{
   127  				Leaf: nil,
   128  				Err:  r.err,
   129  			}
   130  			return
   131  		}
   132  		for _, l := range r.leaves {
   133  			rc <- BulkResult{
   134  				Leaf: l,
   135  				Err:  nil,
   136  			}
   137  		}
   138  		if r.start >= lastStart {
   139  			return
   140  		}
   141  	}
   142  }
   143  
   144  type workerResult struct {
   145  	start   uint64
   146  	leaves  [][]byte
   147  	fetched uint64
   148  	err     error
   149  }
   150  
   151  type fetchWorker struct {
   152  	label                      string
   153  	start, treeSize, increment uint64
   154  	count                      uint
   155  	out                        chan<- workerResult
   156  	batchFetch                 BatchFetch
   157  }
   158  
   159  func (w fetchWorker) run(ctx context.Context) {
   160  	glog.V(2).Infof("fetchWorker %q started", w.label)
   161  	defer glog.V(2).Infof("fetchWorker %q finished", w.label)
   162  	defer close(w.out)
   163  	// This exponential backoff is always reset before use in backoff.RetryNotify.
   164  	bo := backoff.NewExponentialBackOff()
   165  	for {
   166  		if w.start >= w.treeSize {
   167  			return
   168  		}
   169  		count := w.count
   170  		if left := w.treeSize - w.start; left < uint64(count) {
   171  			count = uint(left)
   172  		}
   173  
   174  		leaves := make([][]byte, count)
   175  		var c workerResult
   176  		operation := func() error {
   177  			fetched, err := w.batchFetch(w.start, leaves)
   178  			if err != nil {
   179  				return fmt.Errorf("LeafFetcher.Batch(%d, %d): %w", w.start, w.count, err)
   180  			}
   181  			c = workerResult{
   182  				start:   w.start,
   183  				leaves:  leaves,
   184  				fetched: fetched,
   185  				err:     nil,
   186  			}
   187  			if fetched != uint64(len(leaves)) {
   188  				return backoff.Permanent(fmt.Errorf("LeafFetcher.Batch(%d, %d): wanted %d leaves but got %d", w.start, w.count, len(leaves), fetched))
   189  			}
   190  			return nil
   191  		}
   192  		c.err = backoff.RetryNotify(operation, bo, func(e error, _ time.Duration) {
   193  			glog.V(1).Infof("%s: Retryable error getting data: %q", w.label, e)
   194  		})
   195  		select {
   196  		case <-ctx.Done():
   197  			return
   198  		case w.out <- c:
   199  		}
   200  		if c.err != nil {
   201  			return
   202  		}
   203  		w.start += w.increment
   204  	}
   205  }