github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/restore/batcher.go (about) 1 // Copyright 2020 PingCAP, Inc. Licensed under Apache-2.0. 2 3 package restore 4 5 import ( 6 "context" 7 "sync" 8 "sync/atomic" 9 "time" 10 11 "github.com/opentracing/opentracing-go" 12 backuppb "github.com/pingcap/kvproto/pkg/backup" 13 "github.com/pingcap/log" 14 "go.uber.org/zap" 15 16 "github.com/pingcap/br/pkg/rtree" 17 ) 18 19 // SendType is the 'type' of a send. 20 // when we make a 'send' command to worker, we may want to flush all pending ranges (when auto commit enabled), 21 // or, we just want to clean overflowing ranges(when just adding a table to batcher). 22 type SendType int 23 24 const ( 25 // SendUntilLessThanBatch will make the batcher send batch until 26 // its remaining range is less than its batchSizeThreshold. 27 SendUntilLessThanBatch SendType = iota 28 // SendAll will make the batcher send all pending ranges. 29 SendAll 30 // SendAllThenClose will make the batcher send all pending ranges and then close itself. 31 SendAllThenClose 32 ) 33 34 // Batcher collects ranges to restore and send batching split/ingest request. 35 type Batcher struct { 36 cachedTables []TableWithRange 37 cachedTablesMu *sync.Mutex 38 rewriteRules *RewriteRules 39 40 // autoCommitJoiner is for joining the background batch sender. 41 autoCommitJoiner chan<- struct{} 42 // everythingIsDone is for waiting for worker done: that is, after we send a 43 // signal to autoCommitJoiner, we must give it enough time to get things done. 44 // Then, it should notify us by this wait group. 45 // Use wait group instead of a trivial channel for further extension. 46 everythingIsDone *sync.WaitGroup 47 // sendErr is for output error information. 48 sendErr chan<- error 49 // sendCh is for communiate with sendWorker. 50 sendCh chan<- SendType 51 // outCh is for output the restored table, so it can be sent to do something like checksum. 52 outCh chan<- CreatedTable 53 54 sender BatchSender 55 manager ContextManager 56 batchSizeThreshold int 57 size int32 58 } 59 60 // Len calculate the current size of this batcher. 61 func (b *Batcher) Len() int { 62 return int(atomic.LoadInt32(&b.size)) 63 } 64 65 // contextCleaner is the worker goroutine that cleaning the 'context' 66 // (e.g. make regions leave restore mode). 67 func (b *Batcher) contextCleaner(ctx context.Context, tables <-chan []CreatedTable) { 68 defer func() { 69 if ctx.Err() != nil { 70 log.Info("restore canceled, cleaning in background context") 71 b.manager.Close(context.Background()) 72 } else { 73 b.manager.Close(ctx) 74 } 75 }() 76 defer b.everythingIsDone.Done() 77 for { 78 select { 79 case <-ctx.Done(): 80 return 81 case tbls, ok := <-tables: 82 if !ok { 83 return 84 } 85 if err := b.manager.Leave(ctx, tbls); err != nil { 86 b.sendErr <- err 87 return 88 } 89 for _, tbl := range tbls { 90 b.outCh <- tbl 91 } 92 } 93 } 94 } 95 96 // NewBatcher creates a new batcher by a sender and a context manager. 97 // the former defines how the 'restore' a batch(i.e. send, or 'push down' the task to where). 98 // the context manager defines the 'lifetime' of restoring tables(i.e. how to enter 'restore' mode, and how to exit). 99 // this batcher will work background, send batches per second, or batch size reaches limit. 100 // and it will emit full-restored tables to the output channel returned. 101 func NewBatcher( 102 ctx context.Context, 103 sender BatchSender, 104 manager ContextManager, 105 errCh chan<- error, 106 ) (*Batcher, <-chan CreatedTable) { 107 output := make(chan CreatedTable, defaultChannelSize) 108 sendChan := make(chan SendType, 2) 109 b := &Batcher{ 110 rewriteRules: EmptyRewriteRule(), 111 sendErr: errCh, 112 outCh: output, 113 sender: sender, 114 manager: manager, 115 sendCh: sendChan, 116 cachedTablesMu: new(sync.Mutex), 117 everythingIsDone: new(sync.WaitGroup), 118 batchSizeThreshold: 1, 119 } 120 b.everythingIsDone.Add(2) 121 go b.sendWorker(ctx, sendChan) 122 restoredTables := make(chan []CreatedTable, defaultChannelSize) 123 go b.contextCleaner(ctx, restoredTables) 124 sink := chanTableSink{restoredTables, errCh} 125 sender.PutSink(sink) 126 return b, output 127 } 128 129 // EnableAutoCommit enables the batcher commit batch periodically even batcher size isn't big enough. 130 // we make this function for disable AutoCommit in some case. 131 func (b *Batcher) EnableAutoCommit(ctx context.Context, delay time.Duration) { 132 if b.autoCommitJoiner != nil { 133 // IMO, making two auto commit goroutine wouldn't be a good idea. 134 // If desire(e.g. change the peroid of auto commit), please disable auto commit firstly. 135 log.L().DPanic("enabling auto commit on a batcher that auto commit has been enabled, which isn't allowed") 136 } 137 joiner := make(chan struct{}) 138 go b.autoCommitWorker(ctx, joiner, delay) 139 b.autoCommitJoiner = joiner 140 } 141 142 // DisableAutoCommit blocks the current goroutine until the worker can gracefully stop, 143 // and then disable auto commit. 144 func (b *Batcher) DisableAutoCommit() { 145 b.joinAutoCommitWorker() 146 b.autoCommitJoiner = nil 147 } 148 149 func (b *Batcher) waitUntilSendDone() { 150 b.sendCh <- SendAllThenClose 151 b.everythingIsDone.Wait() 152 } 153 154 // joinAutoCommitWorker blocks the current goroutine until the worker can gracefully stop. 155 // return immediately when auto commit disabled. 156 func (b *Batcher) joinAutoCommitWorker() { 157 if b.autoCommitJoiner != nil { 158 log.Debug("gracefully stopping worker goroutine") 159 b.autoCommitJoiner <- struct{}{} 160 close(b.autoCommitJoiner) 161 log.Debug("gracefully stopped worker goroutine") 162 } 163 } 164 165 // sendWorker is the 'worker' that send all ranges to TiKV. 166 // TODO since all operations are asynchronous now, it's possible to remove this worker. 167 func (b *Batcher) sendWorker(ctx context.Context, send <-chan SendType) { 168 sendUntil := func(lessOrEqual int) { 169 for b.Len() > lessOrEqual { 170 b.Send(ctx) 171 } 172 } 173 174 for sendType := range send { 175 switch sendType { 176 case SendUntilLessThanBatch: 177 sendUntil(b.batchSizeThreshold) 178 case SendAll: 179 sendUntil(0) 180 case SendAllThenClose: 181 sendUntil(0) 182 b.sender.Close() 183 b.everythingIsDone.Done() 184 return 185 } 186 } 187 } 188 189 func (b *Batcher) autoCommitWorker(ctx context.Context, joiner <-chan struct{}, delay time.Duration) { 190 tick := time.NewTicker(delay) 191 defer tick.Stop() 192 for { 193 select { 194 case <-joiner: 195 log.Debug("graceful stop signal received") 196 return 197 case <-ctx.Done(): 198 b.sendErr <- ctx.Err() 199 return 200 case <-tick.C: 201 if b.Len() > 0 { 202 log.Debug("sending batch because time limit exceed", zap.Int("size", b.Len())) 203 b.asyncSend(SendAll) 204 } 205 } 206 } 207 } 208 209 func (b *Batcher) asyncSend(t SendType) { 210 // add a check here so we won't replica sending. 211 if len(b.sendCh) == 0 { 212 b.sendCh <- t 213 } 214 } 215 216 // DrainResult is the collection of some ranges and theirs metadata. 217 type DrainResult struct { 218 // TablesToSend are tables that would be send at this batch. 219 TablesToSend []CreatedTable 220 // BlankTablesAfterSend are tables that will be full-restored after this batch send. 221 BlankTablesAfterSend []CreatedTable 222 RewriteRules *RewriteRules 223 Ranges []rtree.Range 224 } 225 226 // Files returns all files of this drain result. 227 func (result DrainResult) Files() []*backuppb.File { 228 files := make([]*backuppb.File, 0, len(result.Ranges)*2) 229 for _, fs := range result.Ranges { 230 files = append(files, fs.Files...) 231 } 232 return files 233 } 234 235 func newDrainResult() DrainResult { 236 return DrainResult{ 237 TablesToSend: make([]CreatedTable, 0), 238 BlankTablesAfterSend: make([]CreatedTable, 0), 239 RewriteRules: EmptyRewriteRule(), 240 Ranges: make([]rtree.Range, 0), 241 } 242 } 243 244 // drainRanges 'drains' ranges from current tables. 245 // for example, let a '-' character be a range, assume we have: 246 // |---|-----|-------| 247 // |t1 |t2 |t3 | 248 // after we run drainRanges() with batchSizeThreshold = 6, let '*' be the ranges will be sent this batch : 249 // |***|***--|-------| 250 // |t1 |t2 |-------| 251 // 252 // drainRanges() will return: 253 // TablesToSend: [t1, t2] (so we can make them enter restore mode) 254 // BlankTableAfterSend: [t1] (so we can make them leave restore mode after restoring this batch) 255 // RewriteRules: rewrite rules for [t1, t2] (so we can restore them) 256 // Ranges: those stared ranges (so we can restore them) 257 // 258 // then, it will leaving the batcher's cachedTables like this: 259 // |--|-------| 260 // |t2|t3 | 261 // as you can see, all restored ranges would be removed. 262 func (b *Batcher) drainRanges() DrainResult { 263 result := newDrainResult() 264 265 b.cachedTablesMu.Lock() 266 defer b.cachedTablesMu.Unlock() 267 268 for offset, thisTable := range b.cachedTables { 269 thisTableLen := len(thisTable.Range) 270 collected := len(result.Ranges) 271 272 result.RewriteRules.Append(*thisTable.RewriteRule) 273 result.TablesToSend = append(result.TablesToSend, thisTable.CreatedTable) 274 275 // the batch is full, we should stop here! 276 // we use strictly greater than because when we send a batch at equal, the offset should plus one. 277 // (because the last table is sent, we should put it in emptyTables), and this will introduce extra complex. 278 if thisTableLen+collected > b.batchSizeThreshold { 279 drainSize := b.batchSizeThreshold - collected 280 thisTableRanges := thisTable.Range 281 282 var drained []rtree.Range 283 drained, b.cachedTables[offset].Range = thisTableRanges[:drainSize], thisTableRanges[drainSize:] 284 log.Debug("draining partial table to batch", 285 zap.Stringer("db", thisTable.OldTable.DB.Name), 286 zap.Stringer("table", thisTable.Table.Name), 287 zap.Int("size", thisTableLen), 288 zap.Int("drained", drainSize), 289 ) 290 result.Ranges = append(result.Ranges, drained...) 291 b.cachedTables = b.cachedTables[offset:] 292 atomic.AddInt32(&b.size, -int32(len(drained))) 293 return result 294 } 295 296 result.BlankTablesAfterSend = append(result.BlankTablesAfterSend, thisTable.CreatedTable) 297 // let's 'drain' the ranges of current table. This op must not make the batch full. 298 result.Ranges = append(result.Ranges, thisTable.Range...) 299 atomic.AddInt32(&b.size, -int32(len(thisTable.Range))) 300 // clear the table length. 301 b.cachedTables[offset].Range = []rtree.Range{} 302 log.Debug("draining table to batch", 303 zap.Stringer("db", thisTable.OldTable.DB.Name), 304 zap.Stringer("table", thisTable.Table.Name), 305 zap.Int("size", thisTableLen), 306 ) 307 } 308 309 // all tables are drained. 310 b.cachedTables = []TableWithRange{} 311 return result 312 } 313 314 // Send sends all pending requests in the batcher. 315 // returns tables sent FULLY in the current batch. 316 func (b *Batcher) Send(ctx context.Context) { 317 if span := opentracing.SpanFromContext(ctx); span != nil && span.Tracer() != nil { 318 span1 := span.Tracer().StartSpan("Batcher.Send", opentracing.ChildOf(span.Context())) 319 defer span1.Finish() 320 ctx = opentracing.ContextWithSpan(ctx, span1) 321 } 322 323 drainResult := b.drainRanges() 324 tbs := drainResult.TablesToSend 325 ranges := drainResult.Ranges 326 log.Info("restore batch start", rtree.ZapRanges(ranges), ZapTables(tbs)) 327 // Leave is called at b.contextCleaner 328 if err := b.manager.Enter(ctx, drainResult.TablesToSend); err != nil { 329 b.sendErr <- err 330 return 331 } 332 b.sender.RestoreBatch(drainResult) 333 } 334 335 func (b *Batcher) sendIfFull() { 336 if b.Len() >= b.batchSizeThreshold { 337 log.Debug("sending batch because batcher is full", zap.Int("size", b.Len())) 338 b.asyncSend(SendUntilLessThanBatch) 339 } 340 } 341 342 // Add adds a task to the Batcher. 343 func (b *Batcher) Add(tbs TableWithRange) { 344 b.cachedTablesMu.Lock() 345 log.Debug("adding table to batch", 346 zap.Stringer("db", tbs.OldTable.DB.Name), 347 zap.Stringer("table", tbs.Table.Name), 348 zap.Int64("old id", tbs.OldTable.Info.ID), 349 zap.Int64("new id", tbs.Table.ID), 350 zap.Int("table size", len(tbs.Range)), 351 zap.Int("batch size", b.Len()), 352 ) 353 b.cachedTables = append(b.cachedTables, tbs) 354 b.rewriteRules.Append(*tbs.RewriteRule) 355 atomic.AddInt32(&b.size, int32(len(tbs.Range))) 356 b.cachedTablesMu.Unlock() 357 358 b.sendIfFull() 359 } 360 361 // Close closes the batcher, sending all pending requests, close updateCh. 362 func (b *Batcher) Close() { 363 log.Info("sending batch lastly on close", zap.Int("size", b.Len())) 364 b.DisableAutoCommit() 365 b.waitUntilSendDone() 366 close(b.outCh) 367 close(b.sendCh) 368 } 369 370 // SetThreshold sets the threshold that how big the batch size reaching need to send batch. 371 // note this function isn't goroutine safe yet, 372 // just set threshold before anything starts(e.g. EnableAutoCommit), please. 373 func (b *Batcher) SetThreshold(newThreshold int) { 374 b.batchSizeThreshold = newThreshold 375 }