github.com/KinWaiYuen/client-go/v2@v2.5.4/txnkv/rangetask/range_task.go (about) 1 // Copyright 2021 TiKV Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // NOTE: The code in this file is based on code from the 16 // TiDB project, licensed under the Apache License v 2.0 17 // 18 // https://github.com/pingcap/tidb/tree/cc5e161ac06827589c4966674597c137cc9e809c/store/tikv/range_task.go 19 // 20 21 // Copyright 2019 PingCAP, Inc. 22 // 23 // Licensed under the Apache License, Version 2.0 (the "License"); 24 // you may not use this file except in compliance with the License. 25 // You may obtain a copy of the License at 26 // 27 // http://www.apache.org/licenses/LICENSE-2.0 28 // 29 // Unless required by applicable law or agreed to in writing, software 30 // distributed under the License is distributed on an "AS IS" BASIS, 31 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 32 // See the License for the specific language governing permissions and 33 // limitations under the License. 34 35 package rangetask 36 37 import ( 38 "bytes" 39 "context" 40 "sync" 41 "sync/atomic" 42 "time" 43 44 "github.com/KinWaiYuen/client-go/v2/internal/logutil" 45 "github.com/KinWaiYuen/client-go/v2/internal/retry" 46 "github.com/KinWaiYuen/client-go/v2/kv" 47 "github.com/KinWaiYuen/client-go/v2/metrics" 48 "github.com/pingcap/errors" 49 "go.uber.org/zap" 50 ) 51 52 const ( 53 rangeTaskDefaultStatLogInterval = time.Minute * 10 54 defaultRegionsPerTask = 128 55 56 lblCompletedRegions = "completed-regions" 57 lblFailedRegions = "failed-regions" 58 ) 59 60 // Runner splits a range into many ranges to process concurrently, and convenient to send requests to all 61 // regions in the range. Because of merging and splitting, it's possible that multiple requests for disjoint ranges are 62 // sent to the same region. 63 type Runner struct { 64 name string 65 store storage 66 concurrency int 67 handler TaskHandler 68 statLogInterval time.Duration 69 regionsPerTask int 70 71 completedRegions int32 72 failedRegions int32 73 } 74 75 // TaskStat is used to count Regions that completed or failed to do the task. 76 type TaskStat struct { 77 CompletedRegions int 78 FailedRegions int 79 } 80 81 // TaskHandler is the type of functions that processes a task of a key range. 82 // The function should calculate Regions that succeeded or failed to the task. 83 // Returning error from the handler means the error caused the whole task should be stopped. 84 type TaskHandler = func(ctx context.Context, r kv.KeyRange) (TaskStat, error) 85 86 // NewRangeTaskRunner creates a RangeTaskRunner. 87 // 88 // `requestCreator` is the function used to create RPC request according to the given range. 89 // `responseHandler` is the function to process responses of errors. If `responseHandler` returns error, the whole job 90 // will be canceled. 91 func NewRangeTaskRunner( 92 name string, 93 store storage, 94 concurrency int, 95 handler TaskHandler, 96 ) *Runner { 97 return &Runner{ 98 name: name, 99 store: store, 100 concurrency: concurrency, 101 handler: handler, 102 statLogInterval: rangeTaskDefaultStatLogInterval, 103 regionsPerTask: defaultRegionsPerTask, 104 } 105 } 106 107 // SetRegionsPerTask sets how many regions is in a divided task. Since regions may split and merge, it's possible that 108 // a sub task contains not exactly specified number of regions. 109 func (s *Runner) SetRegionsPerTask(regionsPerTask int) { 110 if regionsPerTask < 1 { 111 panic("RangeTaskRunner: regionsPerTask should be at least 1") 112 } 113 s.regionsPerTask = regionsPerTask 114 } 115 116 const locateRegionMaxBackoff = 20000 117 118 // NewLocateRegionBackoffer creates the backoofer for LocateRegion request. 119 func NewLocateRegionBackoffer(ctx context.Context) *retry.Backoffer { 120 return retry.NewBackofferWithVars(ctx, locateRegionMaxBackoff, nil) 121 } 122 123 // RunOnRange runs the task on the given range. 124 // Empty startKey or endKey means unbounded. 125 func (s *Runner) RunOnRange(ctx context.Context, startKey, endKey []byte) error { 126 s.completedRegions = 0 127 metrics.TiKVRangeTaskStats.WithLabelValues(s.name, lblCompletedRegions).Set(0) 128 129 if len(endKey) != 0 && bytes.Compare(startKey, endKey) >= 0 { 130 logutil.Logger(ctx).Info("empty range task executed. ignored", 131 zap.String("name", s.name), 132 zap.String("startKey", kv.StrKey(startKey)), 133 zap.String("endKey", kv.StrKey(endKey))) 134 return nil 135 } 136 137 logutil.Logger(ctx).Info("range task started", 138 zap.String("name", s.name), 139 zap.String("startKey", kv.StrKey(startKey)), 140 zap.String("endKey", kv.StrKey(endKey)), 141 zap.Int("concurrency", s.concurrency)) 142 143 // Periodically log the progress 144 statLogTicker := time.NewTicker(s.statLogInterval) 145 146 ctx, cancel := context.WithCancel(ctx) 147 taskCh := make(chan *kv.KeyRange, s.concurrency) 148 var wg sync.WaitGroup 149 150 // Create workers that concurrently process the whole range. 151 workers := make([]*rangeTaskWorker, 0, s.concurrency) 152 for i := 0; i < s.concurrency; i++ { 153 w := s.createWorker(taskCh, &wg) 154 workers = append(workers, w) 155 wg.Add(1) 156 go w.run(ctx, cancel) 157 } 158 159 startTime := time.Now() 160 161 // Make sure taskCh is closed exactly once 162 isClosed := false 163 defer func() { 164 if !isClosed { 165 close(taskCh) 166 wg.Wait() 167 } 168 statLogTicker.Stop() 169 cancel() 170 metrics.TiKVRangeTaskStats.WithLabelValues(s.name, lblCompletedRegions).Set(0) 171 }() 172 173 // Iterate all regions and send each region's range as a task to the workers. 174 key := startKey 175 Loop: 176 for { 177 select { 178 case <-statLogTicker.C: 179 logutil.Logger(ctx).Info("range task in progress", 180 zap.String("name", s.name), 181 zap.String("startKey", kv.StrKey(startKey)), 182 zap.String("endKey", kv.StrKey(endKey)), 183 zap.Int("concurrency", s.concurrency), 184 zap.Duration("cost time", time.Since(startTime)), 185 zap.Int("completed regions", s.CompletedRegions())) 186 default: 187 } 188 189 bo := NewLocateRegionBackoffer(ctx) 190 191 rangeEndKey, err := s.store.GetRegionCache().BatchLoadRegionsFromKey(bo, key, s.regionsPerTask) 192 if err != nil { 193 logutil.Logger(ctx).Info("range task failed", 194 zap.String("name", s.name), 195 zap.String("startKey", kv.StrKey(startKey)), 196 zap.String("endKey", kv.StrKey(endKey)), 197 zap.Duration("cost time", time.Since(startTime)), 198 zap.Error(err)) 199 return errors.Trace(err) 200 } 201 task := &kv.KeyRange{ 202 StartKey: key, 203 EndKey: rangeEndKey, 204 } 205 206 isLast := len(task.EndKey) == 0 || (len(endKey) > 0 && bytes.Compare(task.EndKey, endKey) >= 0) 207 // Let task.EndKey = min(endKey, loc.EndKey) 208 if isLast { 209 task.EndKey = endKey 210 } 211 212 pushTaskStartTime := time.Now() 213 214 select { 215 case taskCh <- task: 216 case <-ctx.Done(): 217 break Loop 218 } 219 metrics.TiKVRangeTaskPushDuration.WithLabelValues(s.name).Observe(time.Since(pushTaskStartTime).Seconds()) 220 221 if isLast { 222 break 223 } 224 225 key = task.EndKey 226 } 227 228 isClosed = true 229 close(taskCh) 230 wg.Wait() 231 for _, w := range workers { 232 if w.err != nil { 233 logutil.Logger(ctx).Info("range task failed", 234 zap.String("name", s.name), 235 zap.String("startKey", kv.StrKey(startKey)), 236 zap.String("endKey", kv.StrKey(endKey)), 237 zap.Duration("cost time", time.Since(startTime)), 238 zap.Error(w.err)) 239 return errors.Trace(w.err) 240 } 241 } 242 243 logutil.Logger(ctx).Info("range task finished", 244 zap.String("name", s.name), 245 zap.String("startKey", kv.StrKey(startKey)), 246 zap.String("endKey", kv.StrKey(endKey)), 247 zap.Duration("cost time", time.Since(startTime)), 248 zap.Int("completed regions", s.CompletedRegions())) 249 250 return nil 251 } 252 253 // createWorker creates a worker that can process tasks from the given channel. 254 func (s *Runner) createWorker(taskCh chan *kv.KeyRange, wg *sync.WaitGroup) *rangeTaskWorker { 255 return &rangeTaskWorker{ 256 name: s.name, 257 store: s.store, 258 handler: s.handler, 259 taskCh: taskCh, 260 wg: wg, 261 262 completedRegions: &s.completedRegions, 263 failedRegions: &s.failedRegions, 264 } 265 } 266 267 // CompletedRegions returns how many regions has been sent requests. 268 func (s *Runner) CompletedRegions() int { 269 return int(atomic.LoadInt32(&s.completedRegions)) 270 } 271 272 // FailedRegions returns how many regions has failed to do the task. 273 func (s *Runner) FailedRegions() int { 274 return int(atomic.LoadInt32(&s.failedRegions)) 275 } 276 277 // rangeTaskWorker is used by RangeTaskRunner to process tasks concurrently. 278 type rangeTaskWorker struct { 279 name string 280 store storage 281 handler TaskHandler 282 taskCh chan *kv.KeyRange 283 wg *sync.WaitGroup 284 285 err error 286 287 completedRegions *int32 288 failedRegions *int32 289 } 290 291 // run starts the worker. It collects all objects from `w.taskCh` and process them one by one. 292 func (w *rangeTaskWorker) run(ctx context.Context, cancel context.CancelFunc) { 293 defer w.wg.Done() 294 for r := range w.taskCh { 295 select { 296 case <-ctx.Done(): 297 w.err = ctx.Err() 298 return 299 default: 300 } 301 302 stat, err := w.handler(ctx, *r) 303 304 atomic.AddInt32(w.completedRegions, int32(stat.CompletedRegions)) 305 atomic.AddInt32(w.failedRegions, int32(stat.FailedRegions)) 306 metrics.TiKVRangeTaskStats.WithLabelValues(w.name, lblCompletedRegions).Add(float64(stat.CompletedRegions)) 307 metrics.TiKVRangeTaskStats.WithLabelValues(w.name, lblFailedRegions).Add(float64(stat.FailedRegions)) 308 309 if err != nil { 310 logutil.Logger(ctx).Info("canceling range task because of error", 311 zap.String("name", w.name), 312 zap.String("startKey", kv.StrKey(r.StartKey)), 313 zap.String("endKey", kv.StrKey(r.EndKey)), 314 zap.Error(err)) 315 w.err = err 316 cancel() 317 break 318 } 319 } 320 }