github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/executor/cvs/cvstask.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package cvs 15 16 import ( 17 "context" 18 "encoding/json" 19 "math/rand" 20 "sync" 21 "time" 22 23 "github.com/pingcap/log" 24 pb "github.com/pingcap/tiflow/engine/enginepb" 25 "github.com/pingcap/tiflow/engine/framework" 26 frameModel "github.com/pingcap/tiflow/engine/framework/model" 27 "github.com/pingcap/tiflow/engine/framework/registry" 28 dcontext "github.com/pingcap/tiflow/engine/pkg/context" 29 "github.com/pingcap/tiflow/engine/pkg/p2p" 30 "github.com/pingcap/tiflow/pkg/errors" 31 "go.uber.org/atomic" 32 "go.uber.org/zap" 33 "golang.org/x/time/rate" 34 "google.golang.org/grpc" 35 ) 36 37 const ( 38 bufferSize = 1024 39 ) 40 41 type strPair struct { 42 firstStr string 43 secondStr string 44 } 45 46 // Config is cvs task config 47 type Config struct { 48 Idx int `json:"Idx"` 49 SrcHost string `json:"SrcHost"` 50 DstHost string `json:"DstHost"` 51 DstDir string `json:"DstIdx"` 52 StartLoc string `json:"StartLoc"` 53 } 54 55 // Status represents business status of cvs task 56 type Status struct { 57 TaskConfig Config `json:"Config"` 58 CurrentLoc string `json:"CurLoc"` 59 Count int64 `json:"Cnt"` 60 } 61 62 type connPool struct { 63 sync.Mutex 64 65 pool map[string]connArray 66 } 67 68 var pool connPool = connPool{pool: make(map[string]connArray)} 69 70 func (c *connPool) getConn(addr string) (*grpc.ClientConn, error) { 71 c.Lock() 72 defer c.Unlock() 73 arr, ok := c.pool[addr] 74 if !ok { 75 for i := 0; i < 5; i++ { 76 conn, err := grpc.Dial(addr, grpc.WithInsecure()) 77 if err != nil { 78 return nil, err 79 } 80 arr = append(arr, conn) 81 } 82 c.pool[addr] = arr 83 } 84 i := rand.Intn(5) 85 return arr[i], nil 86 } 87 88 type connArray []*grpc.ClientConn 89 90 type cvsTask struct { 91 framework.BaseWorker 92 Config 93 counter *atomic.Int64 94 curLoc string 95 cancelFn func() 96 buffer chan strPair 97 isEOF bool 98 99 statusCode struct { 100 sync.RWMutex 101 code frameModel.WorkerState 102 } 103 runError struct { 104 sync.RWMutex 105 err error 106 } 107 108 statusRateLimiter *rate.Limiter 109 } 110 111 // RegisterWorker is used to register cvs task worker into global registry 112 func RegisterWorker() { 113 factory := registry.NewSimpleWorkerFactory(newCvsTask) 114 registry.GlobalWorkerRegistry().MustRegisterWorkerType(frameModel.CvsTask, factory) 115 } 116 117 func newCvsTask(ctx *dcontext.Context, _workerID frameModel.WorkerID, masterID frameModel.MasterID, conf *Config) *cvsTask { 118 task := &cvsTask{ 119 Config: *conf, 120 curLoc: conf.StartLoc, 121 buffer: make(chan strPair, bufferSize), 122 statusRateLimiter: rate.NewLimiter(rate.Every(time.Second), 1), 123 counter: atomic.NewInt64(0), 124 } 125 return task 126 } 127 128 // InitImpl implements WorkerImpl.InitImpl 129 func (task *cvsTask) InitImpl(ctx context.Context) error { 130 log.Info("init the task ", zap.Any("task id :", task.ID())) 131 task.setState(frameModel.WorkerStateNormal) 132 // Don't use the ctx from the caller. Caller may cancel the ctx after InitImpl returns. 133 ctx, task.cancelFn = context.WithCancel(context.Background()) 134 go func() { 135 err := task.Receive(ctx) 136 if err != nil { 137 log.Error("error happened when reading data from the upstream ", zap.String("id", task.ID()), zap.Any("message", err.Error())) 138 task.setRunError(err) 139 task.setState(frameModel.WorkerStateError) 140 } 141 }() 142 go func() { 143 err := task.send(ctx) 144 if err != nil { 145 log.Error("error happened when writing data to the downstream ", zap.String("id", task.ID()), zap.Any("message", err.Error())) 146 task.setRunError(err) 147 task.setState(frameModel.WorkerStateError) 148 } else { 149 task.setState(frameModel.WorkerStateFinished) 150 } 151 }() 152 153 return nil 154 } 155 156 // Tick is called on a fixed interval. 157 func (task *cvsTask) Tick(ctx context.Context) error { 158 // log.Info("cvs task tick", zap.Any(" task id ", string(task.ID())+" -- "+strconv.FormatInt(task.counter, 10))) 159 if task.statusRateLimiter.Allow() { 160 err := task.BaseWorker.UpdateStatus(ctx, task.Status()) 161 if errors.Is(err, errors.ErrWorkerUpdateStatusTryAgain) { 162 log.Warn("update status try again later", zap.String("id", task.ID()), zap.String("error", err.Error())) 163 return nil 164 } 165 return err 166 } 167 168 exitReason := framework.ExitReasonUnknown 169 switch task.getState() { 170 case frameModel.WorkerStateFinished: 171 exitReason = framework.ExitReasonFinished 172 case frameModel.WorkerStateError: 173 exitReason = framework.ExitReasonFailed 174 case frameModel.WorkerStateStopped: 175 exitReason = framework.ExitReasonCanceled 176 default: 177 } 178 179 if exitReason == framework.ExitReasonUnknown { 180 return nil 181 } 182 183 return task.BaseWorker.Exit(ctx, exitReason, task.getRunError(), task.Status().ExtBytes) 184 } 185 186 // Status returns a short worker status to be periodically sent to the master. 187 func (task *cvsTask) Status() frameModel.WorkerStatus { 188 stats := &Status{ 189 TaskConfig: task.Config, 190 CurrentLoc: task.curLoc, 191 Count: task.counter.Load(), 192 } 193 statsBytes, err := json.Marshal(stats) 194 if err != nil { 195 log.Panic("get stats error", zap.String("id", task.ID()), zap.Error(err)) 196 } 197 return frameModel.WorkerStatus{ 198 State: task.getState(), 199 ErrorMsg: "", 200 ExtBytes: statsBytes, 201 } 202 } 203 204 func (task *cvsTask) OnMasterMessage(ctx context.Context, topic p2p.Topic, message p2p.MessageValue) error { 205 switch msg := message.(type) { 206 case *frameModel.StatusChangeRequest: 207 switch msg.ExpectState { 208 case frameModel.WorkerStateStopped: 209 task.setState(frameModel.WorkerStateStopped) 210 default: 211 log.Info("FakeWorker: ignore status change state", zap.Int32("state", int32(msg.ExpectState))) 212 } 213 default: 214 log.Info("unsupported message", zap.Any("message", message)) 215 } 216 217 return nil 218 } 219 220 // CloseImpl tells the WorkerImpl to quitrunStatusWorker and release resources. 221 func (task *cvsTask) CloseImpl(ctx context.Context) { 222 if task.cancelFn != nil { 223 task.cancelFn() 224 } 225 } 226 227 func (task *cvsTask) Receive(ctx context.Context) error { 228 conn, err := pool.getConn(task.SrcHost) 229 if err != nil { 230 log.Error("cann't connect with the source address ", zap.String("id", task.ID()), zap.Any("message", task.SrcHost)) 231 return err 232 } 233 client := pb.NewDataRWServiceClient(conn) 234 reader, err := client.ReadLines(ctx, &pb.ReadLinesRequest{FileIdx: int32(task.Idx), LineNo: []byte(task.StartLoc)}) 235 if err != nil { 236 log.Error("read data from file failed ", zap.String("id", task.ID()), zap.Error(err)) 237 return err 238 } 239 for { 240 reply, err := reader.Recv() 241 if err != nil { 242 log.Error("read data failed", zap.String("id", task.ID()), zap.Error(err)) 243 if !task.isEOF { 244 task.cancelFn() 245 } 246 return err 247 } 248 if reply.IsEof { 249 log.Info("Reach the end of the file ", zap.String("id", task.ID()), zap.Any("fileID", task.Idx)) 250 close(task.buffer) 251 break 252 } 253 select { 254 case <-ctx.Done(): 255 return nil 256 case task.buffer <- strPair{firstStr: string(reply.Key), secondStr: string(reply.Val)}: 257 } 258 // waiting longer time to read lines slowly 259 } 260 return nil 261 } 262 263 func (task *cvsTask) send(ctx context.Context) error { 264 conn, err := pool.getConn(task.DstHost) 265 if err != nil { 266 log.Error("can't connect with the destination address ", zap.Any("id", task.ID()), zap.Error(err)) 267 return err 268 } 269 client := pb.NewDataRWServiceClient(conn) 270 writer, err := client.WriteLines(ctx) 271 if err != nil { 272 log.Error("call write data rpc failed", zap.String("id", task.ID()), zap.Error(err)) 273 task.cancelFn() 274 return err 275 } 276 for { 277 select { 278 case kv, more := <-task.buffer: 279 if !more { 280 log.Info("Reach the end of the file ", zap.String("id", task.ID()), zap.Any("cnt", task.counter.Load()), zap.String("last write", task.curLoc)) 281 resp, err := writer.CloseAndRecv() 282 if err != nil { 283 return err 284 } 285 if len(resp.ErrMsg) > 0 { 286 log.Warn("close writing meet error", zap.String("id", task.ID())) 287 } 288 return nil 289 } 290 err := writer.Send(&pb.WriteLinesRequest{FileIdx: int32(task.Idx), Key: []byte(kv.firstStr), Value: []byte(kv.secondStr), Dir: task.DstDir}) 291 if err != nil { 292 log.Error("call write data rpc failed ", zap.String("id", task.ID()), zap.Error(err)) 293 task.cancelFn() 294 return err 295 } 296 task.counter.Add(1) 297 task.curLoc = kv.firstStr 298 case <-ctx.Done(): 299 return ctx.Err() 300 } 301 } 302 } 303 304 func (task *cvsTask) getState() frameModel.WorkerState { 305 task.statusCode.RLock() 306 defer task.statusCode.RUnlock() 307 return task.statusCode.code 308 } 309 310 func (task *cvsTask) setState(status frameModel.WorkerState) { 311 task.statusCode.Lock() 312 defer task.statusCode.Unlock() 313 task.statusCode.code = status 314 } 315 316 func (task *cvsTask) getRunError() error { 317 task.runError.RLock() 318 defer task.runError.RUnlock() 319 return task.runError.err 320 } 321 322 func (task *cvsTask) setRunError(err error) { 323 task.runError.Lock() 324 defer task.runError.Unlock() 325 task.runError.err = err 326 }