github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/reorg.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package dbs 15 16 import ( 17 "context" 18 "fmt" 19 "strconv" 20 "sync/atomic" 21 "time" 22 23 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 24 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 25 "github.com/whtcorpsinc/BerolinaSQL/terror" 26 "github.com/whtcorpsinc/errors" 27 "github.com/whtcorpsinc/failpoint" 28 "github.com/whtcorpsinc/fidelpb/go-fidelpb" 29 "github.com/whtcorpsinc/milevadb/allegrosql" 30 "github.com/whtcorpsinc/milevadb/blockcodec" 31 "github.com/whtcorpsinc/milevadb/causet" 32 "github.com/whtcorpsinc/milevadb/causet/blocks" 33 "github.com/whtcorpsinc/milevadb/ekv" 34 "github.com/whtcorpsinc/milevadb/metrics" 35 "github.com/whtcorpsinc/milevadb/soliton/chunk" 36 "github.com/whtcorpsinc/milevadb/soliton/codec" 37 "github.com/whtcorpsinc/milevadb/soliton/logutil" 38 "github.com/whtcorpsinc/milevadb/soliton/mock" 39 "github.com/whtcorpsinc/milevadb/soliton/ranger" 40 "github.com/whtcorpsinc/milevadb/soliton/sqlexec" 41 "github.com/whtcorpsinc/milevadb/spacetime" 42 "github.com/whtcorpsinc/milevadb/statistics" 43 "github.com/whtcorpsinc/milevadb/stochastikctx" 44 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 45 "github.com/whtcorpsinc/milevadb/types" 46 "go.uber.org/zap" 47 ) 48 49 // reorgCtx is for reorganization. 50 type reorgCtx struct { 51 // doneCh is used to notify. 52 // If the reorganization job is done, we will use this channel to notify outer. 53 // TODO: Now we use goroutine to simulate reorganization jobs, later we may 54 // use a persistent job list. 55 doneCh chan error 56 // rowCount is used to simulate a job's event count. 57 rowCount int64 58 // notifyCancelReorgJob is used to notify the backfilling goroutine if the DBS job is cancelled. 59 // 0: job is not canceled. 60 // 1: job is canceled. 61 notifyCancelReorgJob int32 62 // doneHandle is used to simulate the handle that has been processed. 63 doneHandle atomic.Value // nullableHandle 64 } 65 66 // nullableHandle can causetstore <nil> handle. 67 // Storing a nil object to atomic.Value can lead to panic. This is a workaround. 68 type nullableHandle struct { 69 handle ekv.Handle 70 } 71 72 // toString is used in log to avoid nil dereference panic. 73 func toString(handle ekv.Handle) string { 74 if handle == nil { 75 return "<nil>" 76 } 77 return handle.String() 78 } 79 80 // newContext gets a context. It is only used for adding defCausumn in reorganization state. 81 func newContext(causetstore ekv.CausetStorage) stochastikctx.Context { 82 c := mock.NewContext() 83 c.CausetStore = causetstore 84 c.GetStochastikVars().SetStatusFlag(allegrosql.ServerStatusAutocommit, false) 85 c.GetStochastikVars().StmtCtx.TimeZone = time.UTC 86 return c 87 } 88 89 const defaultWaitReorgTimeout = 10 * time.Second 90 91 // ReorgWaitTimeout is the timeout that wait dbs in write reorganization stage. 92 var ReorgWaitTimeout = 5 * time.Second 93 94 func (rc *reorgCtx) notifyReorgCancel() { 95 atomic.StoreInt32(&rc.notifyCancelReorgJob, 1) 96 } 97 98 func (rc *reorgCtx) cleanNotifyReorgCancel() { 99 atomic.StoreInt32(&rc.notifyCancelReorgJob, 0) 100 } 101 102 func (rc *reorgCtx) isReorgCanceled() bool { 103 return atomic.LoadInt32(&rc.notifyCancelReorgJob) == 1 104 } 105 106 func (rc *reorgCtx) setRowCount(count int64) { 107 atomic.StoreInt64(&rc.rowCount, count) 108 } 109 110 func (rc *reorgCtx) setNextHandle(doneHandle ekv.Handle) { 111 rc.doneHandle.CausetStore(nullableHandle{handle: doneHandle}) 112 } 113 114 func (rc *reorgCtx) increaseRowCount(count int64) { 115 atomic.AddInt64(&rc.rowCount, count) 116 } 117 118 func (rc *reorgCtx) getRowCountAndHandle() (int64, ekv.Handle) { 119 event := atomic.LoadInt64(&rc.rowCount) 120 h, _ := (rc.doneHandle.Load()).(nullableHandle) 121 return event, h.handle 122 } 123 124 func (rc *reorgCtx) clean() { 125 rc.setRowCount(0) 126 rc.setNextHandle(nil) 127 rc.doneCh = nil 128 } 129 130 func (w *worker) runReorgJob(t *spacetime.Meta, reorgInfo *reorgInfo, tblInfo *perceptron.BlockInfo, lease time.Duration, f func() error) error { 131 job := reorgInfo.Job 132 if w.reorgCtx.doneCh == nil { 133 // start a reorganization job 134 w.wg.Add(1) 135 w.reorgCtx.doneCh = make(chan error, 1) 136 // initial reorgCtx 137 w.reorgCtx.setRowCount(job.GetRowCount()) 138 w.reorgCtx.setNextHandle(reorgInfo.StartHandle) 139 go func() { 140 defer w.wg.Done() 141 w.reorgCtx.doneCh <- f() 142 }() 143 } 144 145 waitTimeout := defaultWaitReorgTimeout 146 // if lease is 0, we are using a local storage, 147 // and we can wait the reorganization to be done here. 148 // if lease > 0, we don't need to wait here because 149 // we should uFIDelate some job's progress context and try checking again, 150 // so we use a very little timeout here. 151 if lease > 0 { 152 waitTimeout = ReorgWaitTimeout 153 } 154 155 // wait reorganization job done or timeout 156 select { 157 case err := <-w.reorgCtx.doneCh: 158 rowCount, _ := w.reorgCtx.getRowCountAndHandle() 159 logutil.BgLogger().Info("[dbs] run reorg job done", zap.Int64("handled rows", rowCount)) 160 // UFIDelate a job's RowCount. 161 job.SetRowCount(rowCount) 162 if err == nil { 163 metrics.AddIndexProgress.Set(100) 164 } 165 w.reorgCtx.clean() 166 return errors.Trace(err) 167 case <-w.ctx.Done(): 168 logutil.BgLogger().Info("[dbs] run reorg job quit") 169 w.reorgCtx.setNextHandle(nil) 170 w.reorgCtx.setRowCount(0) 171 // We return errWaitReorgTimeout here too, so that outer loop will break. 172 return errWaitReorgTimeout 173 case <-time.After(waitTimeout): 174 rowCount, doneHandle := w.reorgCtx.getRowCountAndHandle() 175 // UFIDelate a job's RowCount. 176 job.SetRowCount(rowCount) 177 uFIDelateAddIndexProgress(w, tblInfo, rowCount) 178 // UFIDelate a reorgInfo's handle. 179 err := t.UFIDelateDBSReorgStartHandle(job, doneHandle) 180 logutil.BgLogger().Info("[dbs] run reorg job wait timeout", zap.Duration("waitTime", waitTimeout), 181 zap.Int64("totalAddedRowCount", rowCount), zap.String("doneHandle", toString(doneHandle)), zap.Error(err)) 182 // If timeout, we will return, check the tenant and retry to wait job done again. 183 return errWaitReorgTimeout 184 } 185 } 186 187 func uFIDelateAddIndexProgress(w *worker, tblInfo *perceptron.BlockInfo, addedRowCount int64) { 188 if tblInfo == nil || addedRowCount == 0 { 189 return 190 } 191 totalCount := getBlockTotalCount(w, tblInfo) 192 progress := float64(0) 193 if totalCount > 0 { 194 progress = float64(addedRowCount) / float64(totalCount) 195 } else { 196 progress = 1 197 } 198 if progress > 1 { 199 progress = 1 200 } 201 metrics.AddIndexProgress.Set(progress * 100) 202 } 203 204 func getBlockTotalCount(w *worker, tblInfo *perceptron.BlockInfo) int64 { 205 var ctx stochastikctx.Context 206 ctx, err := w.sessPool.get() 207 if err != nil { 208 return statistics.PseudoRowCount 209 } 210 defer w.sessPool.put(ctx) 211 212 interlock, ok := ctx.(sqlexec.RestrictedALLEGROSQLInterlockingDirectorate) 213 // `mock.Context` is used in tests, which doesn't implement RestrictedALLEGROSQLInterlockingDirectorate 214 if !ok { 215 return statistics.PseudoRowCount 216 } 217 allegrosql := fmt.Sprintf("select block_rows from information_schema.blocks where milevadb_block_id=%v;", tblInfo.ID) 218 rows, _, err := interlock.InterDircRestrictedALLEGROSQL(allegrosql) 219 if err != nil { 220 return statistics.PseudoRowCount 221 } 222 if len(rows) != 1 { 223 return statistics.PseudoRowCount 224 } 225 return rows[0].GetInt64(0) 226 } 227 228 func (w *worker) isReorgRunnable(d *dbsCtx) error { 229 if isChanClosed(w.ctx.Done()) { 230 // Worker is closed. So it can't do the reorganizational job. 231 return errInvalidWorker.GenWithStack("worker is closed") 232 } 233 234 if w.reorgCtx.isReorgCanceled() { 235 // Job is cancelled. So it can't be done. 236 return errCancelledDBSJob 237 } 238 239 if !d.isTenant() { 240 // If it's not the tenant, we will try later, so here just returns an error. 241 logutil.BgLogger().Info("[dbs] DBS worker is not the DBS tenant", zap.String("ID", d.uuid)) 242 return errors.Trace(errNotTenant) 243 } 244 return nil 245 } 246 247 type reorgInfo struct { 248 *perceptron.Job 249 250 // StartHandle is the first handle of the adding indices causet. 251 StartHandle ekv.Handle 252 // EndHandle is the last handle of the adding indices causet. 253 EndHandle ekv.Handle 254 d *dbsCtx 255 first bool 256 // PhysicalBlockID is used for partitioned causet. 257 // DBS reorganize for a partitioned causet will handle partitions one by one, 258 // PhysicalBlockID is used to trace the current partition we are handling. 259 // If the causet is not partitioned, PhysicalBlockID would be BlockID. 260 PhysicalBlockID int64 261 } 262 263 func (r *reorgInfo) String() string { 264 return "StartHandle:" + toString(r.StartHandle) + "," + 265 "EndHandle:" + toString(r.EndHandle) + "," + 266 "first:" + strconv.FormatBool(r.first) + "," + 267 "PhysicalBlockID:" + strconv.FormatInt(r.PhysicalBlockID, 10) 268 } 269 270 func constructDescBlockScanPB(physicalBlockID int64, tblInfo *perceptron.BlockInfo, handleDefCauss []*perceptron.DeferredCausetInfo) *fidelpb.InterlockingDirectorate { 271 tblScan := blocks.BuildBlockScanFromInfos(tblInfo, handleDefCauss) 272 tblScan.BlockId = physicalBlockID 273 tblScan.Desc = true 274 return &fidelpb.InterlockingDirectorate{Tp: fidelpb.InterDircType_TypeBlockScan, TblScan: tblScan} 275 } 276 277 func constructLimitPB(count uint64) *fidelpb.InterlockingDirectorate { 278 limitInterDirc := &fidelpb.Limit{ 279 Limit: count, 280 } 281 return &fidelpb.InterlockingDirectorate{Tp: fidelpb.InterDircType_TypeLimit, Limit: limitInterDirc} 282 } 283 284 func buildDescBlockScanPosetDag(ctx stochastikctx.Context, tbl causet.PhysicalBlock, handleDefCauss []*perceptron.DeferredCausetInfo, limit uint64) (*fidelpb.PosetDagRequest, error) { 285 posetPosetDagReq := &fidelpb.PosetDagRequest{} 286 _, timeZoneOffset := time.Now().In(time.UTC).Zone() 287 posetPosetDagReq.TimeZoneOffset = int64(timeZoneOffset) 288 for i := range handleDefCauss { 289 posetPosetDagReq.OutputOffsets = append(posetPosetDagReq.OutputOffsets, uint32(i)) 290 } 291 posetPosetDagReq.Flags |= perceptron.FlagInSelectStmt 292 293 tblScanInterDirc := constructDescBlockScanPB(tbl.GetPhysicalID(), tbl.Meta(), handleDefCauss) 294 posetPosetDagReq.InterlockingDirectorates = append(posetPosetDagReq.InterlockingDirectorates, tblScanInterDirc) 295 posetPosetDagReq.InterlockingDirectorates = append(posetPosetDagReq.InterlockingDirectorates, constructLimitPB(limit)) 296 allegrosql.SetEncodeType(ctx, posetPosetDagReq) 297 return posetPosetDagReq, nil 298 } 299 300 func getDeferredCausetsTypes(defCausumns []*perceptron.DeferredCausetInfo) []*types.FieldType { 301 defCausTypes := make([]*types.FieldType, 0, len(defCausumns)) 302 for _, defCaus := range defCausumns { 303 defCausTypes = append(defCausTypes, &defCaus.FieldType) 304 } 305 return defCausTypes 306 } 307 308 // buildDescBlockScan builds a desc causet scan upon tblInfo. 309 func (dc *dbsCtx) buildDescBlockScan(ctx context.Context, startTS uint64, tbl causet.PhysicalBlock, 310 handleDefCauss []*perceptron.DeferredCausetInfo, limit uint64) (allegrosql.SelectResult, error) { 311 sctx := newContext(dc.causetstore) 312 posetPosetDagPB, err := buildDescBlockScanPosetDag(sctx, tbl, handleDefCauss, limit) 313 if err != nil { 314 return nil, errors.Trace(err) 315 } 316 var b allegrosql.RequestBuilder 317 var builder *allegrosql.RequestBuilder 318 if !tbl.Meta().IsCommonHandle { 319 ranges := ranger.FullIntRange(false) 320 builder = b.SetBlockRanges(tbl.GetPhysicalID(), ranges, nil) 321 } else { 322 ranges := ranger.FullNotNullRange() 323 builder = b.SetCommonHandleRanges(sctx.GetStochastikVars().StmtCtx, tbl.GetPhysicalID(), ranges) 324 } 325 builder.SetPosetDagRequest(posetPosetDagPB). 326 SetStartTS(startTS). 327 SetKeepOrder(true). 328 SetConcurrency(1).SetDesc(true) 329 330 builder.Request.NotFillCache = true 331 builder.Request.Priority = ekv.PriorityLow 332 333 ekvReq, err := builder.Build() 334 if err != nil { 335 return nil, errors.Trace(err) 336 } 337 338 result, err := allegrosql.Select(ctx, sctx, ekvReq, getDeferredCausetsTypes(handleDefCauss), statistics.NewQueryFeedback(0, nil, 0, false)) 339 if err != nil { 340 return nil, errors.Trace(err) 341 } 342 result.Fetch(ctx) 343 return result, nil 344 } 345 346 // GetBlockMaxHandle gets the max handle of a PhysicalBlock. 347 func (dc *dbsCtx) GetBlockMaxHandle(startTS uint64, tbl causet.PhysicalBlock) (maxHandle ekv.Handle, emptyBlock bool, err error) { 348 var handleDefCauss []*perceptron.DeferredCausetInfo 349 var pkIdx *perceptron.IndexInfo 350 tblInfo := tbl.Meta() 351 switch { 352 case tblInfo.PKIsHandle: 353 for _, defCaus := range tbl.Meta().DeferredCausets { 354 if allegrosql.HasPriKeyFlag(defCaus.Flag) { 355 handleDefCauss = []*perceptron.DeferredCausetInfo{defCaus} 356 break 357 } 358 } 359 case tblInfo.IsCommonHandle: 360 pkIdx = blocks.FindPrimaryIndex(tblInfo) 361 defcaus := tblInfo.DefCauss() 362 for _, idxDefCaus := range pkIdx.DeferredCausets { 363 handleDefCauss = append(handleDefCauss, defcaus[idxDefCaus.Offset]) 364 } 365 default: 366 handleDefCauss = []*perceptron.DeferredCausetInfo{perceptron.NewExtraHandleDefCausInfo()} 367 } 368 369 ctx := context.Background() 370 // build a desc scan of tblInfo, which limit is 1, we can use it to retrieve the last handle of the causet. 371 result, err := dc.buildDescBlockScan(ctx, startTS, tbl, handleDefCauss, 1) 372 if err != nil { 373 return nil, false, errors.Trace(err) 374 } 375 defer terror.Call(result.Close) 376 377 chk := chunk.New(getDeferredCausetsTypes(handleDefCauss), 1, 1) 378 err = result.Next(ctx, chk) 379 if err != nil { 380 return nil, false, errors.Trace(err) 381 } 382 383 if chk.NumRows() == 0 { 384 // empty causet 385 return nil, true, nil 386 } 387 sessCtx := newContext(dc.causetstore) 388 event := chk.GetRow(0) 389 if tblInfo.IsCommonHandle { 390 maxHandle, err = buildCommonHandleFromChunkRow(sessCtx.GetStochastikVars().StmtCtx, tblInfo, pkIdx, handleDefCauss, event) 391 return maxHandle, false, err 392 } 393 return ekv.IntHandle(event.GetInt64(0)), false, nil 394 } 395 396 func buildCommonHandleFromChunkRow(sctx *stmtctx.StatementContext, tblInfo *perceptron.BlockInfo, idxInfo *perceptron.IndexInfo, 397 defcaus []*perceptron.DeferredCausetInfo, event chunk.Row) (ekv.Handle, error) { 398 fieldTypes := make([]*types.FieldType, 0, len(defcaus)) 399 for _, defCaus := range defcaus { 400 fieldTypes = append(fieldTypes, &defCaus.FieldType) 401 } 402 datumRow := event.GetCausetRow(fieldTypes) 403 blockcodec.TruncateIndexValues(tblInfo, idxInfo, datumRow) 404 405 var handleBytes []byte 406 handleBytes, err := codec.EncodeKey(sctx, nil, datumRow...) 407 if err != nil { 408 return nil, err 409 } 410 return ekv.NewCommonHandle(handleBytes) 411 } 412 413 // getBlockRange gets the start and end handle of a causet (or partition). 414 func getBlockRange(d *dbsCtx, tbl causet.PhysicalBlock, snapshotVer uint64, priority int) (startHandle, endHandle ekv.Handle, err error) { 415 // Get the start handle of this partition. 416 err = iterateSnapshotRows(d.causetstore, priority, tbl, snapshotVer, nil, nil, true, 417 func(h ekv.Handle, rowKey ekv.Key, rawRecord []byte) (bool, error) { 418 startHandle = h 419 return false, nil 420 }) 421 if err != nil { 422 return startHandle, endHandle, errors.Trace(err) 423 } 424 var emptyBlock bool 425 endHandle, emptyBlock, err = d.GetBlockMaxHandle(snapshotVer, tbl) 426 if err != nil { 427 return startHandle, endHandle, errors.Trace(err) 428 } 429 if emptyBlock || endHandle.Compare(startHandle) < 0 { 430 logutil.BgLogger().Info("[dbs] get causet range, endHandle < startHandle", zap.String("causet", fmt.Sprintf("%v", tbl.Meta())), 431 zap.Int64("causet/partition ID", tbl.GetPhysicalID()), zap.String("endHandle", toString(endHandle)), zap.String("startHandle", toString(startHandle))) 432 endHandle = startHandle 433 } 434 return 435 } 436 437 func getValidCurrentVersion(causetstore ekv.CausetStorage) (ver ekv.Version, err error) { 438 ver, err = causetstore.CurrentVersion() 439 if err != nil { 440 return ver, errors.Trace(err) 441 } else if ver.Ver <= 0 { 442 return ver, errInvalidStoreVer.GenWithStack("invalid storage current version %d", ver.Ver) 443 } 444 return ver, nil 445 } 446 447 func getReorgInfo(d *dbsCtx, t *spacetime.Meta, job *perceptron.Job, tbl causet.Block) (*reorgInfo, error) { 448 var ( 449 start ekv.Handle 450 end ekv.Handle 451 pid int64 452 info reorgInfo 453 ) 454 455 if job.SnapshotVer == 0 { 456 info.first = true 457 // get the current version for reorganization if we don't have 458 ver, err := getValidCurrentVersion(d.causetstore) 459 if err != nil { 460 return nil, errors.Trace(err) 461 } 462 tblInfo := tbl.Meta() 463 pid = tblInfo.ID 464 var tb causet.PhysicalBlock 465 if pi := tblInfo.GetPartitionInfo(); pi != nil { 466 pid = pi.Definitions[0].ID 467 tb = tbl.(causet.PartitionedBlock).GetPartition(pid) 468 } else { 469 tb = tbl.(causet.PhysicalBlock) 470 } 471 start, end, err = getBlockRange(d, tb, ver.Ver, job.Priority) 472 if err != nil { 473 return nil, errors.Trace(err) 474 } 475 logutil.BgLogger().Info("[dbs] job get causet range", 476 zap.Int64("jobID", job.ID), zap.Int64("physicalBlockID", pid), 477 zap.String("startHandle", toString(start)), zap.String("endHandle", toString(end))) 478 479 failpoint.Inject("errorUFIDelateReorgHandle", func() (*reorgInfo, error) { 480 return &info, errors.New("occur an error when uFIDelate reorg handle") 481 }) 482 err = t.UFIDelateDBSReorgHandle(job, start, end, pid) 483 if err != nil { 484 return &info, errors.Trace(err) 485 } 486 // UFIDelate info should after data persistent. 487 job.SnapshotVer = ver.Ver 488 } else { 489 var err error 490 start, end, pid, err = t.GetDBSReorgHandle(job, tbl.Meta().IsCommonHandle) 491 if err != nil { 492 return nil, errors.Trace(err) 493 } 494 } 495 info.Job = job 496 info.d = d 497 info.StartHandle = start 498 info.EndHandle = end 499 info.PhysicalBlockID = pid 500 501 return &info, nil 502 } 503 504 func (r *reorgInfo) UFIDelateReorgMeta(txn ekv.Transaction, startHandle, endHandle ekv.Handle, physicalBlockID int64) error { 505 if startHandle == nil && endHandle == nil { 506 return nil 507 } 508 t := spacetime.NewMeta(txn) 509 return errors.Trace(t.UFIDelateDBSReorgHandle(r.Job, startHandle, endHandle, physicalBlockID)) 510 }