github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/executor/fakejob/worker.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package fakejob 15 16 import ( 17 "context" 18 "fmt" 19 "sync" 20 "time" 21 22 "github.com/pingcap/log" 23 "github.com/pingcap/tiflow/engine/framework" 24 frameModel "github.com/pingcap/tiflow/engine/framework/model" 25 dcontext "github.com/pingcap/tiflow/engine/pkg/context" 26 fakejobPkg "github.com/pingcap/tiflow/engine/pkg/fakejob" 27 "github.com/pingcap/tiflow/engine/pkg/p2p" 28 "github.com/pingcap/tiflow/pkg/errors" 29 "go.etcd.io/etcd/api/v3/mvccpb" 30 clientv3 "go.etcd.io/etcd/client/v3" 31 "go.uber.org/atomic" 32 "go.uber.org/zap" 33 "golang.org/x/time/rate" 34 "google.golang.org/grpc" 35 ) 36 37 var _ framework.Worker = (*dummyWorker)(nil) 38 39 type ( 40 // Worker is exposed for unit test 41 Worker = dummyWorker 42 43 dummyWorker struct { 44 framework.BaseWorker 45 46 init bool 47 cancel context.CancelFunc 48 status *fakejobPkg.DummyWorkerStatus 49 config *fakejobPkg.WorkerConfig 50 errCh chan error 51 closed *atomic.Bool 52 canceling *atomic.Bool 53 54 statusRateLimiter *rate.Limiter 55 56 statusCode struct { 57 sync.RWMutex 58 code frameModel.WorkerState 59 } 60 61 startTime time.Time 62 } 63 ) 64 65 func (d *dummyWorker) InitImpl(_ context.Context) error { 66 if !d.init { 67 if d.config.EtcdWatchEnable { 68 // Don't use the ctx from the caller, because it may be canceled by the caller after InitImpl() returns. 69 ctx, cancel := context.WithCancel(context.Background()) 70 d.bgRunEtcdWatcher(ctx) 71 d.cancel = cancel 72 } 73 d.init = true 74 d.setState(frameModel.WorkerStateNormal) 75 d.startTime = time.Now() 76 return nil 77 } 78 return errors.New("repeated init") 79 } 80 81 func (d *dummyWorker) Tick(ctx context.Context) error { 82 if !d.init { 83 return errors.New("not yet init") 84 } 85 86 select { 87 case err := <-d.errCh: 88 return err 89 default: 90 } 91 92 d.status.DoTick() 93 94 if d.statusRateLimiter.Allow() { 95 log.Info("FakeWorker: Tick", zap.String("worker-id", d.ID()), zap.Int64("tick", d.status.Tick)) 96 err := d.BaseWorker.UpdateStatus(ctx, d.Status()) 97 if err != nil { 98 if errors.Is(err, errors.ErrWorkerUpdateStatusTryAgain) { 99 log.Warn("update status try again later", zap.String("error", err.Error())) 100 return nil 101 } 102 return err 103 } 104 } 105 106 if d.closed.Load() { 107 return nil 108 } 109 110 extMsg, err := d.status.Marshal() 111 if err != nil { 112 return err 113 } 114 115 if d.canceling.Load() { 116 d.setState(frameModel.WorkerStateStopped) 117 return d.Exit(ctx, framework.ExitReasonCanceled, nil, extMsg) 118 } 119 120 if d.status.Tick >= d.config.TargetTick { 121 d.setState(frameModel.WorkerStateFinished) 122 return d.Exit(ctx, framework.ExitReasonFinished, nil, extMsg) 123 } 124 125 if d.config.InjectErrorInterval != 0 { 126 if time.Since(d.startTime) > d.config.InjectErrorInterval { 127 return errors.Errorf("injected error by worker: %d", d.config.ID) 128 } 129 } 130 return nil 131 } 132 133 func (d *dummyWorker) Status() frameModel.WorkerStatus { 134 if d.init { 135 extBytes, err := d.status.Marshal() 136 if err != nil { 137 log.Panic("unexpected error", zap.Error(err)) 138 } 139 return frameModel.WorkerStatus{ 140 State: d.getState(), 141 ExtBytes: extBytes, 142 } 143 } 144 return frameModel.WorkerStatus{State: frameModel.WorkerStateCreated} 145 } 146 147 func (d *dummyWorker) OnMasterMessage(ctx context.Context, topic p2p.Topic, message p2p.MessageValue) error { 148 log.Info("fakeWorker: OnMasterMessage", zap.Any("message", message)) 149 switch msg := message.(type) { 150 case *frameModel.StatusChangeRequest: 151 switch msg.ExpectState { 152 case frameModel.WorkerStateStopped: 153 d.canceling.Store(true) 154 default: 155 log.Info("FakeWorker: ignore status change state", zap.Int32("state", int32(msg.ExpectState))) 156 } 157 default: 158 log.Info("unsupported message", zap.Any("message", message)) 159 } 160 161 return nil 162 } 163 164 func (d *dummyWorker) CloseImpl(ctx context.Context) { 165 if d.closed.CompareAndSwap(false, true) { 166 if d.cancel != nil { 167 d.cancel() 168 } 169 } 170 } 171 172 func (d *dummyWorker) setState(code frameModel.WorkerState) { 173 d.statusCode.Lock() 174 defer d.statusCode.Unlock() 175 d.statusCode.code = code 176 } 177 178 func (d *dummyWorker) getState() frameModel.WorkerState { 179 d.statusCode.RLock() 180 defer d.statusCode.RUnlock() 181 return d.statusCode.code 182 } 183 184 func (d *dummyWorker) bgRunEtcdWatcher(ctx context.Context) { 185 go func() { 186 if err := d.createEtcdWatcher(ctx); err != nil { 187 select { 188 case d.errCh <- err: 189 default: 190 log.Warn("duplicated error", zap.Error(err)) 191 } 192 } 193 }() 194 } 195 196 func (d *dummyWorker) createEtcdWatcher(ctx context.Context) error { 197 cli, err := clientv3.New(clientv3.Config{ 198 Endpoints: d.config.EtcdEndpoints, 199 Context: ctx, 200 DialTimeout: 3 * time.Second, 201 DialOptions: []grpc.DialOption{}, 202 }) 203 if err != nil { 204 return errors.Trace(err) 205 } 206 key := fmt.Sprintf("%s%d", d.config.EtcdWatchPrefix, d.config.ID) 207 watchLoop: 208 for { 209 select { 210 case <-ctx.Done(): 211 return errors.Trace(ctx.Err()) 212 default: 213 } 214 opts := make([]clientv3.OpOption, 0) 215 revision := d.status.GetEtcdCheckpoint().Revision 216 if revision > 0 { 217 opts = append(opts, clientv3.WithRev(revision+1)) 218 } 219 ch := cli.Watch(clientv3.WithRequireLeader(ctx), key, opts...) 220 log.Info("start to watch etcd", zap.String("key", key), 221 zap.Int64("revision", revision), 222 zap.Strings("endpoints", d.config.EtcdEndpoints)) 223 for resp := range ch { 224 if resp.Err() != nil { 225 log.Warn("watch met error", zap.Error(resp.Err())) 226 continue watchLoop 227 } 228 for _, event := range resp.Events { 229 // no concurrent write of this checkpoint, so it is safe to read 230 // old value, change it and overwrite. 231 ckpt := d.status.GetEtcdCheckpoint() 232 ckpt.MvccCount++ 233 ckpt.Revision = event.Kv.ModRevision 234 switch event.Type { 235 case mvccpb.PUT: 236 ckpt.Value = string(event.Kv.Value) 237 case mvccpb.DELETE: 238 ckpt.Value = "" 239 } 240 d.status.SetEtcdCheckpoint(&ckpt) 241 } 242 } 243 } 244 } 245 246 // NewDummyWorker creates a new dummy worker instance 247 func NewDummyWorker( 248 ctx *dcontext.Context, 249 id frameModel.WorkerID, masterID frameModel.MasterID, 250 wcfg *fakejobPkg.WorkerConfig, 251 ) framework.WorkerImpl { 252 status := &fakejobPkg.DummyWorkerStatus{ 253 BusinessID: wcfg.ID, 254 Tick: wcfg.Checkpoint.Tick, 255 Checkpoint: &fakejobPkg.WorkerCheckpoint{ 256 Revision: wcfg.Checkpoint.Revision, 257 MvccCount: wcfg.Checkpoint.MvccCount, 258 Value: wcfg.Checkpoint.Value, 259 }, 260 } 261 return &dummyWorker{ 262 statusRateLimiter: rate.NewLimiter(rate.Every(100*time.Millisecond), 1), 263 status: status, 264 config: wcfg, 265 errCh: make(chan error, 1), 266 closed: atomic.NewBool(false), 267 canceling: atomic.NewBool(false), 268 } 269 }