github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/jobmaster/dm/metadata/job.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package metadata 15 16 import ( 17 "context" 18 "encoding/json" 19 "fmt" 20 "sync" 21 "time" 22 23 "github.com/pingcap/tiflow/engine/jobmaster/dm/bootstrap" 24 "github.com/pingcap/tiflow/engine/jobmaster/dm/config" 25 "github.com/pingcap/tiflow/engine/pkg/adapter" 26 metaModel "github.com/pingcap/tiflow/engine/pkg/meta/model" 27 "github.com/pingcap/tiflow/pkg/errors" 28 "go.uber.org/zap" 29 ) 30 31 // TaskStage represents internal stage of a task. 32 // TODO: use Stage in lib or move Stage to lib. 33 // we need to use same value for stage with same name in dmpb.Stage in order to make grafana dashboard label correct, 34 // since we use the same grafana dashboard for OP and engine. 35 // there's no need for them to have same meaning, just for grafana display. 36 type TaskStage int 37 38 // These stages may be updated in later pr. 39 const ( 40 StageInit TaskStage = iota + 1 // = 1 = dmpb.Stage_New 41 StageRunning // = 2 = dmpb.Stage_Running 42 StagePaused // = 3 ~= dmpb.Stage_Paused. in engine this stage means paused by user, if it's auto-paused by error, it's StageError 43 StageFinished TaskStage = iota + 2 // = 5 = dmpb.Stage_Finished. skip 4 - Stopped, no such stage in engine, see dm/worker/metrics.go 44 StagePausing // = 6 = dmpb.Stage_Pausing 45 StageError TaskStage = iota + 10 // = 15, leave some value space for extension of dmpb.Stage 46 // StageUnscheduled means the task is not scheduled. 47 // This usually happens when the worker is offline. 48 StageUnscheduled 49 ) 50 51 var typesStringify = [...]string{ 52 0: "", 53 StageInit: "Initing", 54 StageRunning: "Running", 55 StagePaused: "Paused", 56 StageFinished: "Finished", 57 StageError: "Error", 58 StagePausing: "Pausing", 59 StageUnscheduled: "Unscheduled", 60 } 61 62 var toTaskStage map[string]TaskStage 63 64 func init() { 65 toTaskStage = make(map[string]TaskStage, len(typesStringify)) 66 toTaskStage[""] = TaskStage(0) 67 for i, s := range typesStringify { 68 if len(s) == 0 { 69 continue 70 } 71 toTaskStage[s] = TaskStage(i) 72 } 73 } 74 75 // String implements fmt.Stringer interface 76 func (ts TaskStage) String() string { 77 if int(ts) >= len(typesStringify) || ts < 0 { 78 return fmt.Sprintf("Unknown TaskStage %d", ts) 79 } 80 return typesStringify[ts] 81 } 82 83 // MarshalJSON marshals the enum as a quoted json string 84 func (ts TaskStage) MarshalJSON() ([]byte, error) { 85 return json.Marshal(ts.String()) 86 } 87 88 // UnmarshalJSON unmashals a quoted json string to the enum value 89 func (ts *TaskStage) UnmarshalJSON(b []byte) error { 90 var ( 91 j string 92 ok bool 93 ) 94 if err := json.Unmarshal(b, &j); err != nil { 95 return err 96 } 97 *ts, ok = toTaskStage[j] 98 if !ok { 99 return errors.Errorf("Unknown TaskStage %s", j) 100 } 101 return nil 102 } 103 104 // Job represents the state of a job. 105 type Job struct { 106 // taskID -> task 107 Tasks map[string]*Task 108 109 // Deleting represents whether the job is being deleted. 110 Deleting bool 111 } 112 113 // NewJob creates a new Job instance 114 func NewJob(jobCfg *config.JobCfg) *Job { 115 taskCfgs := jobCfg.ToTaskCfgs() 116 job := &Job{ 117 Tasks: make(map[string]*Task, len(taskCfgs)), 118 } 119 120 for taskID, taskCfg := range taskCfgs { 121 job.Tasks[taskID] = NewTask(taskCfg) 122 } 123 return job 124 } 125 126 // Task is the minimum working unit of a job. 127 // A job may contain multiple upstream and it will be converted into multiple tasks. 128 type Task struct { 129 Cfg *config.TaskCfg 130 Stage TaskStage 131 StageUpdatedTime time.Time 132 } 133 134 // NewTask creates a new Task instance 135 func NewTask(taskCfg *config.TaskCfg) *Task { 136 return &Task{ 137 Cfg: taskCfg, 138 Stage: StageRunning, // TODO: support set stage when create task. 139 StageUpdatedTime: time.Now(), 140 } 141 } 142 143 // JobStore manages the state of a job. 144 type JobStore struct { 145 *frameworkMetaStore 146 *bootstrap.DefaultUpgrader 147 148 mu sync.Mutex 149 logger *zap.Logger 150 } 151 152 // NewJobStore creates a new JobStore instance 153 func NewJobStore(kvClient metaModel.KVClient, pLogger *zap.Logger) *JobStore { 154 logger := pLogger.With(zap.String("component", "job_store")) 155 jobStore := &JobStore{ 156 frameworkMetaStore: newTOMLFrameworkMetaStore(kvClient), 157 DefaultUpgrader: bootstrap.NewDefaultUpgrader(logger), 158 logger: logger, 159 } 160 jobStore.frameworkMetaStore.stateFactory = jobStore 161 jobStore.DefaultUpgrader.Upgrader = jobStore 162 return jobStore 163 } 164 165 // CreateState returns an empty Job object 166 func (jobStore *JobStore) createState() state { 167 return &Job{} 168 } 169 170 // Key returns encoded key for job store 171 func (jobStore *JobStore) key() string { 172 return adapter.DMJobKeyAdapter.Encode() 173 } 174 175 // UpdateStages will be called if user operate job. 176 func (jobStore *JobStore) UpdateStages(ctx context.Context, taskIDs []string, stage TaskStage) error { 177 jobStore.mu.Lock() 178 defer jobStore.mu.Unlock() 179 state, err := jobStore.Get(ctx) 180 if err != nil { 181 return errors.Trace(err) 182 } 183 184 job := state.(*Job) 185 if job.Deleting { 186 return errors.New("failed to update stages because job is being deleted") 187 } 188 if len(taskIDs) == 0 { 189 for task := range job.Tasks { 190 taskIDs = append(taskIDs, task) 191 } 192 } 193 for _, taskID := range taskIDs { 194 t, ok := job.Tasks[taskID] 195 if !ok { 196 return errors.Errorf("task %s not found", taskID) 197 } 198 t.Stage = stage 199 t.StageUpdatedTime = time.Now() 200 } 201 202 return jobStore.Put(ctx, job) 203 } 204 205 // UpdateConfig will be called if user update job config. 206 func (jobStore *JobStore) UpdateConfig(ctx context.Context, jobCfg *config.JobCfg) error { 207 jobStore.mu.Lock() 208 defer jobStore.mu.Unlock() 209 state, err := jobStore.Get(ctx) 210 if err != nil { 211 return errors.Trace(err) 212 } 213 oldJob := state.(*Job) 214 if oldJob.Deleting { 215 return errors.New("failed to update config because job is being deleted") 216 } 217 218 // TODO: we may diff the config at task level in the future, that way different tasks will have different modify revisions. 219 // so that changing the configuration of one task will not affect other tasks. 220 var oldVersion uint64 221 for _, task := range oldJob.Tasks { 222 oldVersion = task.Cfg.ModRevision 223 break 224 } 225 jobCfg.ModRevision = oldVersion + 1 226 newJob := NewJob(jobCfg) 227 228 for taskID, newTask := range newJob.Tasks { 229 // task stage will not be updated. 230 if oldTask, ok := oldJob.Tasks[taskID]; ok { 231 newTask.Stage = oldTask.Stage 232 newTask.StageUpdatedTime = oldTask.StageUpdatedTime 233 } 234 } 235 236 return jobStore.Put(ctx, newJob) 237 } 238 239 // MarkDeleting marks the job as deleting. 240 func (jobStore *JobStore) MarkDeleting(ctx context.Context) error { 241 jobStore.mu.Lock() 242 defer jobStore.mu.Unlock() 243 state, err := jobStore.Get(ctx) 244 if err != nil { 245 return errors.Trace(err) 246 } 247 job := state.(*Job) 248 job.Deleting = true 249 return jobStore.Put(ctx, job) 250 } 251 252 // UpgradeFuncs implement the Upgrader interface. 253 func (jobStore *JobStore) UpgradeFuncs() []bootstrap.UpgradeFunc { 254 return nil 255 } 256 257 // GetJobCfg gets the job config. 258 func (jobStore *JobStore) GetJobCfg(ctx context.Context) (*config.JobCfg, error) { 259 state, err := jobStore.Get(ctx) 260 if err != nil { 261 return nil, err 262 } 263 job := state.(*Job) 264 taskCfg := make([]*config.TaskCfg, 0, len(job.Tasks)) 265 for _, task := range job.Tasks { 266 taskCfg = append(taskCfg, task.Cfg) 267 } 268 return config.FromTaskCfgs(taskCfg), nil 269 }