github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/syncer/safe_mode.go (about) 1 // Copyright 2019 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package syncer 15 16 import ( 17 "time" 18 19 "github.com/pingcap/failpoint" 20 "github.com/pingcap/tiflow/dm/pkg/binlog" 21 tcontext "github.com/pingcap/tiflow/dm/pkg/context" 22 "github.com/pingcap/tiflow/dm/pkg/terror" 23 "github.com/pingcap/tiflow/dm/unit" 24 "go.uber.org/zap" 25 ) 26 27 func (s *Syncer) enableSafeModeByTaskCliArgs(tctx *tcontext.Context) { 28 //nolint:errcheck 29 s.safeMode.Add(tctx, 1) 30 s.tctx.L().Info("enable safe-mode because of task cli args") 31 } 32 33 func (s *Syncer) enableSafeModeInitializationPhase(tctx *tcontext.Context) { 34 var err error 35 defer func() { 36 if err != nil { 37 // send error to the fatal chan to interrupt the process 38 s.runFatalChan <- unit.NewProcessError(err) 39 } 40 }() 41 42 s.safeMode.Reset(tctx) // in initialization phase, reset first 43 44 // cliArgs has higher priority than config 45 if s.cliArgs != nil && s.cliArgs.SafeModeDuration != "" { 46 s.enableSafeModeByTaskCliArgs(tctx) 47 return 48 } 49 50 if s.cfg.SafeMode { 51 //nolint:errcheck 52 s.safeMode.Add(tctx, 1) // add 1 but has no corresponding -1, so keeps enabled 53 s.tctx.L().Info("enable safe-mode by config") 54 } 55 56 var duration time.Duration 57 initPhaseSeconds := s.cfg.SafeModeDuration 58 59 failpoint.Inject("SafeModeInitPhaseSeconds", func(val failpoint.Value) { 60 initPhaseSeconds = val.(string) 61 s.tctx.L().Info("set initPhaseSeconds", zap.String("failpoint", "SafeModeInitPhaseSeconds"), zap.String("value", initPhaseSeconds)) 62 }) 63 if initPhaseSeconds == "" { 64 duration = time.Second * time.Duration(2*s.cfg.CheckpointFlushInterval) 65 } else { 66 duration, err = time.ParseDuration(initPhaseSeconds) 67 if err != nil { 68 s.tctx.L().Error("enable safe-mode failed due to duration parse failed", zap.String("duration", initPhaseSeconds)) 69 return 70 } 71 } 72 exitPoint := s.checkpoint.SafeModeExitPoint() 73 if exitPoint != nil { 74 beginLocation := s.checkpoint.GlobalPoint() 75 s.tctx.L().Info("compare exitPoint and beginLocation", zap.Stringer("exitPoint", exitPoint), zap.Stringer("beginLocation", beginLocation)) 76 if binlog.CompareLocation(*exitPoint, beginLocation, s.cfg.EnableGTID) == 0 { 77 s.tctx.L().Info("exitPoint equal to beginLocation, so disable the safe mode") 78 s.checkpoint.SaveSafeModeExitPoint(nil) 79 // must flush here to avoid the following situation: 80 // 1. quit safe mode 81 // 2. push forward and replicate some sqls after safeModeExitPoint to downstream 82 // 3. quit because of network error, fail to flush global checkpoint and new safeModeExitPoint to downstream 83 // 4. restart again, quit safe mode at safeModeExitPoint, but some sqls after this location have already been replicated to the downstream 84 err = s.checkpoint.FlushSafeModeExitPoint(s.runCtx) 85 return 86 } 87 if duration == 0 { 88 err = terror.ErrSyncerReprocessWithSafeModeFail.Generate() 89 s.tctx.L().Error("safe-mode-duration=0 is conflict with that exitPoint not equal to beginLocation", zap.Error(err)) 90 return 91 } 92 //nolint:errcheck 93 s.safeMode.Add(tctx, 1) // enable and will revert after pass SafeModeExitLoc 94 s.tctx.L().Info("enable safe-mode for safe mode exit point, will exit at", zap.Stringer("location", *exitPoint)) 95 } else { 96 s.tctx.L().Info("enable safe-mode because of task initialization", zap.Duration("duration", duration)) 97 98 if duration > 0 { 99 //nolint:errcheck 100 s.safeMode.Add(tctx, 1) // enable and will revert after 2 * CheckpointFlushInterval 101 go func() { 102 defer func() { 103 err2 := s.safeMode.Add(tctx, -1) 104 if err2 != nil { 105 s.runFatalChan <- unit.NewProcessError(err2) 106 } 107 if !s.safeMode.Enable() { 108 s.tctx.L().Info("disable safe-mode after task initialization finished") 109 } 110 }() 111 112 select { 113 case <-tctx.Context().Done(): 114 case <-time.After(duration): 115 } 116 }() 117 } 118 } 119 }