github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/jobmaster/dm/runtime/worker_status.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package runtime 15 16 import ( 17 "encoding/json" 18 "fmt" 19 "time" 20 21 "github.com/pingcap/tiflow/engine/framework" 22 frameModel "github.com/pingcap/tiflow/engine/framework/model" 23 "github.com/pingcap/tiflow/pkg/errors" 24 ) 25 26 // HeartbeatInterval is heartbeat interval for checking worker stage 27 // TODO: expose this config in lib 28 var HeartbeatInterval = 3 * time.Second 29 30 /* 31 ,──────────────. ,────────────. ,─────────────. ,──────────────. 32 │WorkerCreating│ │WorkerOnline│ │WorkerOffline│ │WorkerFinished│ 33 `──────┬───────' `─────┬──────' `──────┬──────' `──────┬───────' 34 │ │ │ │ 35 CreateWorker │ │ │ │ 36 ───────────────►│ │ │ │ 37 │ OnWorkerOnline │ │ │ 38 ├───────────────────►│ │ │ 39 │ │ OnWorkerOffline │ │ 40 │ ├───────────────────►│ │ 41 │ │ │ │ 42 │ │ │ │ 43 │ │ OnWorkerFinished │ │ 44 │ ├────────────────────┼──────────────────►│ 45 │ │ │ │ 46 │ OnWorkerOffline/OnWorkerDispacth │ │ 47 ├────────────────────┬───────────────────►│ │ 48 │ │ │ │ 49 │ │ │ │ 50 │ │ │ │ 51 │ │ │ │ 52 │ OnWorkerFinished │ │ │ 53 ├────────────────────┼────────────────────┼──────────────────►│ 54 │ │ │ │ 55 │ │ │ │ 56 */ 57 58 // WorkerStage represents the stage of a worker. 59 type WorkerStage int 60 61 // All available WorkerStage 62 const ( 63 WorkerCreating WorkerStage = iota + 1 64 WorkerOnline 65 WorkerFinished 66 WorkerOffline 67 // WorkerDestroying 68 ) 69 70 var typesStringify = [...]string{ 71 0: "", 72 WorkerCreating: "Creating", 73 WorkerOnline: "Online", 74 WorkerFinished: "Finished", 75 WorkerOffline: "Offline", 76 } 77 78 var toWorkerStage map[string]WorkerStage 79 80 func init() { 81 toWorkerStage = make(map[string]WorkerStage, len(typesStringify)) 82 for i, s := range typesStringify { 83 toWorkerStage[s] = WorkerStage(i) 84 } 85 } 86 87 // String implements fmt.Stringer interface 88 func (ws WorkerStage) String() string { 89 if int(ws) >= len(typesStringify) || ws < 0 { 90 return fmt.Sprintf("Unknown WorkerStage %d", ws) 91 } 92 return typesStringify[ws] 93 } 94 95 // MarshalJSON marshals the enum as a quoted json string 96 func (ws WorkerStage) MarshalJSON() ([]byte, error) { 97 return json.Marshal(ws.String()) 98 } 99 100 // UnmarshalJSON unmashals a quoted json string to the enum value 101 func (ws *WorkerStage) UnmarshalJSON(b []byte) error { 102 var ( 103 j string 104 ok bool 105 ) 106 if err := json.Unmarshal(b, &j); err != nil { 107 return err 108 } 109 *ws, ok = toWorkerStage[j] 110 if !ok { 111 return errors.Errorf("Unknown WorkerStage %s", j) 112 } 113 return nil 114 } 115 116 // WorkerStatus manages worker state machine 117 type WorkerStatus struct { 118 TaskID string 119 ID frameModel.WorkerID 120 Unit framework.WorkerType 121 Stage WorkerStage 122 CfgModRevision uint64 123 // only use when creating, change to updatedTime if needed. 124 createdTime time.Time 125 } 126 127 // IsOffline checks whether worker stage is offline 128 func (w *WorkerStatus) IsOffline() bool { 129 return w.Stage == WorkerOffline 130 } 131 132 // IsTombStone returns whether the worker is tombstone, which means we don't need to stop it. 133 func (w *WorkerStatus) IsTombStone() bool { 134 return w.Stage == WorkerOffline || w.Stage == WorkerFinished || w.CreateFailed() 135 } 136 137 // CreateFailed checks whether the worker creation is failed 138 func (w *WorkerStatus) CreateFailed() bool { 139 return w.Stage == WorkerCreating && w.createdTime.Add(2*HeartbeatInterval).Before(time.Now()) 140 } 141 142 // RunAsExpected returns whether a worker is running. 143 // Currently, we regard worker run as expected except it is offline. 144 func (w *WorkerStatus) RunAsExpected() bool { 145 return w.Stage == WorkerOnline || w.Stage == WorkerCreating || w.Stage == WorkerFinished 146 } 147 148 // InitWorkerStatus creates a new worker status and initializes it 149 func InitWorkerStatus(taskID string, unit framework.WorkerType, id frameModel.WorkerID) WorkerStatus { 150 workerStatus := NewWorkerStatus(taskID, unit, id, WorkerCreating, 0) 151 workerStatus.createdTime = time.Now() 152 return workerStatus 153 } 154 155 // NewWorkerStatus creates a new WorkerStatus instance 156 func NewWorkerStatus(taskID string, unit framework.WorkerType, id frameModel.WorkerID, stage WorkerStage, cfgModRevision uint64) WorkerStatus { 157 return WorkerStatus{ 158 TaskID: taskID, 159 ID: id, 160 Unit: unit, 161 Stage: stage, 162 CfgModRevision: cfgModRevision, 163 } 164 }