github.com/google/trillian-examples@v0.0.0-20240520080811-0d40d35cef0e/clone/internal/download/batch.go (about) 1 // Copyright 2021 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package download contains a library for downloading data from logs. 16 package download 17 18 import ( 19 "context" 20 "errors" 21 "fmt" 22 "time" 23 24 backoff "github.com/cenkalti/backoff/v4" 25 "github.com/golang/glog" 26 ) 27 28 // BatchFetch should be implemented to provide a mechanism to fetch a range of leaves. 29 // It should return the number of leaves fetched, or an error if the fetch failed. 30 type BatchFetch func(start uint64, leaves [][]byte) (uint64, error) 31 32 // BulkResult combines a downloaded leaf, or the error found when trying to obtain the leaf. 33 type BulkResult struct { 34 Leaf []byte 35 Err error 36 } 37 38 // Bulk keeps downloading leaves starting from `first`, using the given leaf fetcher. 39 // The number of workers and the batch size to use for each of the fetch requests are also specified. 40 // The resulting leaves or terminal errors are returned in order over `rc`. Bulk takes ownership of `rc` and will close it when no more values will be written. 41 // Internally this uses exponential backoff on the workers to download as fast as possible, but no faster. 42 func Bulk(ctx context.Context, first, treeSize uint64, batchFetch BatchFetch, workers, batchSize uint, rc chan<- BulkResult) { 43 defer close(rc) 44 ctx, cancel := context.WithCancel(ctx) 45 defer cancel() 46 // Each worker gets its own unbuffered channel to make sure it can only be at most one ahead. 47 // This prevents lots of wasted work happening if one shard gets stuck. 48 rangeChans := make([]chan workerResult, workers) 49 increment := workers * batchSize 50 51 badAlignErr := errors.New("not aligned") 52 align := func() (uint64, error) { 53 if left := treeSize - first; left < uint64(batchSize) { 54 batchSize = uint(left) 55 } 56 glog.Infof("Attempting to align by making request [%d, %d)", first, first+uint64(batchSize)) 57 leaves := make([][]byte, batchSize) 58 fetched, err := batchFetch(first, leaves) 59 if err != nil { 60 glog.Warningf("Failed to fetch batch: %v", err) 61 return 0, err 62 } 63 for i := 0; i < int(fetched); i++ { 64 rc <- BulkResult{ 65 Leaf: leaves[i], 66 Err: nil, 67 } 68 } 69 first += fetched 70 if fetched != uint64(batchSize) { 71 glog.Warningf("Received partial batch (expected %d, got %d)", batchSize, fetched) 72 return first, badAlignErr 73 } 74 glog.Infof("Received full batch (expected %d, got %d)", batchSize, fetched) 75 return first, nil 76 } 77 78 var err error 79 if first, err = align(); err != nil { 80 if err != badAlignErr { 81 glog.Errorf("Failed to align: %v", err) 82 return 83 } 84 // We failed to align once, but let's try a second time. If this works then 85 // we're aligned with how the log wants to server chunks of entries. If it 86 // fails then we can guess that the desired batch size is not configured well. 87 if first, err = align(); err != nil { 88 glog.Errorf("Failed to align after two attempts, consider a different batch size? %v", err) 89 return 90 } 91 } 92 93 for i := uint(0); i < workers; i++ { 94 rangeChans[i] = make(chan workerResult) 95 start := first + uint64(i*batchSize) 96 go func(i uint, start uint64) { 97 fetchWorker{ 98 label: fmt.Sprintf("worker %d", i), 99 start: start, 100 treeSize: treeSize, 101 count: batchSize, 102 increment: uint64(increment), 103 out: rangeChans[i], 104 batchFetch: batchFetch, 105 }.run(ctx) 106 }(i, start) 107 } 108 109 var lastStart uint64 110 if treeSize > uint64(batchSize) { 111 lastStart = treeSize - uint64(batchSize) 112 } 113 var r workerResult 114 // Perpetually round-robin through the sharded ranges. 115 for i := 0; ; i = (i + 1) % int(workers) { 116 select { 117 case <-ctx.Done(): 118 rc <- BulkResult{ 119 Leaf: nil, 120 Err: ctx.Err(), 121 } 122 return 123 case r = <-rangeChans[i]: 124 } 125 if r.err != nil { 126 rc <- BulkResult{ 127 Leaf: nil, 128 Err: r.err, 129 } 130 return 131 } 132 for _, l := range r.leaves { 133 rc <- BulkResult{ 134 Leaf: l, 135 Err: nil, 136 } 137 } 138 if r.start >= lastStart { 139 return 140 } 141 } 142 } 143 144 type workerResult struct { 145 start uint64 146 leaves [][]byte 147 fetched uint64 148 err error 149 } 150 151 type fetchWorker struct { 152 label string 153 start, treeSize, increment uint64 154 count uint 155 out chan<- workerResult 156 batchFetch BatchFetch 157 } 158 159 func (w fetchWorker) run(ctx context.Context) { 160 glog.V(2).Infof("fetchWorker %q started", w.label) 161 defer glog.V(2).Infof("fetchWorker %q finished", w.label) 162 defer close(w.out) 163 // This exponential backoff is always reset before use in backoff.RetryNotify. 164 bo := backoff.NewExponentialBackOff() 165 for { 166 if w.start >= w.treeSize { 167 return 168 } 169 count := w.count 170 if left := w.treeSize - w.start; left < uint64(count) { 171 count = uint(left) 172 } 173 174 leaves := make([][]byte, count) 175 var c workerResult 176 operation := func() error { 177 fetched, err := w.batchFetch(w.start, leaves) 178 if err != nil { 179 return fmt.Errorf("LeafFetcher.Batch(%d, %d): %w", w.start, w.count, err) 180 } 181 c = workerResult{ 182 start: w.start, 183 leaves: leaves, 184 fetched: fetched, 185 err: nil, 186 } 187 if fetched != uint64(len(leaves)) { 188 return backoff.Permanent(fmt.Errorf("LeafFetcher.Batch(%d, %d): wanted %d leaves but got %d", w.start, w.count, len(leaves), fetched)) 189 } 190 return nil 191 } 192 c.err = backoff.RetryNotify(operation, bo, func(e error, _ time.Duration) { 193 glog.V(1).Infof("%s: Retryable error getting data: %q", w.label, e) 194 }) 195 select { 196 case <-ctx.Done(): 197 return 198 case w.out <- c: 199 } 200 if c.err != nil { 201 return 202 } 203 w.start += w.increment 204 } 205 }