github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/servermaster/jobop/operator.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package jobop 15 16 import ( 17 "context" 18 19 "github.com/pingcap/log" 20 frameworkModel "github.com/pingcap/tiflow/engine/framework/model" 21 pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm" 22 ormModel "github.com/pingcap/tiflow/engine/pkg/orm/model" 23 "go.uber.org/multierr" 24 "go.uber.org/zap" 25 ) 26 27 type operateRouter interface { 28 SendCancelJobMessage(ctx context.Context, jobID string) error 29 } 30 31 // JobOperator abstracts a metastore based job operator, it encapsulates logic 32 // to handle JobOp and a Tick API to ensure job moves towards to expected status. 33 type JobOperator interface { 34 MarkJobCanceling(ctx context.Context, jobID string) error 35 MarkJobCanceled(ctx context.Context, jobID string) error 36 Tick(ctx context.Context) error 37 IsJobCanceling(ctx context.Context, jobID string) bool 38 } 39 40 // JobOperatorImpl implements JobOperator 41 type JobOperatorImpl struct { 42 frameMetaClient pkgOrm.Client 43 router operateRouter 44 } 45 46 // NewJobOperatorImpl creates a new JobOperatorImpl 47 func NewJobOperatorImpl(cli pkgOrm.Client, router operateRouter) *JobOperatorImpl { 48 return &JobOperatorImpl{ 49 frameMetaClient: cli, 50 router: router, 51 } 52 } 53 54 func (oper *JobOperatorImpl) updateJobOperationStatus( 55 ctx context.Context, jobID string, op ormModel.JobOpStatus, 56 ) error { 57 var ormFn func(ctx context.Context, JobID string) (pkgOrm.Result, error) 58 switch op { 59 case ormModel.JobOpStatusNoop: 60 ormFn = oper.frameMetaClient.SetJobNoop 61 case ormModel.JobOpStatusCanceling: 62 ormFn = oper.frameMetaClient.SetJobCanceling 63 case ormModel.JobOpStatusCanceled: 64 ormFn = oper.frameMetaClient.SetJobCanceled 65 default: 66 log.Panic("unexpected job operate", zap.Any("op", op)) 67 } 68 if result, err := ormFn(ctx, jobID); err != nil { 69 return err 70 } else if result.RowsAffected() == 0 { 71 log.Info("job status is already set", zap.String("job-id", jobID), zap.Any("op", op)) 72 } 73 return nil 74 } 75 76 // MarkJobNoop implements JobOperator.MarkJobNoop 77 func (oper *JobOperatorImpl) MarkJobNoop(ctx context.Context, jobID string) error { 78 return oper.updateJobOperationStatus(ctx, jobID, ormModel.JobOpStatusNoop) 79 } 80 81 // MarkJobCanceling implements JobOperator.MarkJobCanceling 82 func (oper *JobOperatorImpl) MarkJobCanceling(ctx context.Context, jobID string) error { 83 return oper.updateJobOperationStatus(ctx, jobID, ormModel.JobOpStatusCanceling) 84 } 85 86 // MarkJobCanceled implements JobOperator.MarkJobCanceled 87 func (oper *JobOperatorImpl) MarkJobCanceled(ctx context.Context, jobID string) error { 88 return oper.updateJobOperationStatus(ctx, jobID, ormModel.JobOpStatusCanceled) 89 } 90 91 // Tick implements JobOperator.Tick 92 func (oper *JobOperatorImpl) Tick(ctx context.Context) error { 93 ops, err := oper.frameMetaClient.QueryJobOpsByStatus(ctx, ormModel.JobOpStatusCanceling) 94 if err != nil { 95 return err 96 } 97 var errs error 98 for _, op := range ops { 99 isJobTerminated, err := oper.checkJobStatus(ctx, op.JobID) 100 if err != nil { 101 errs = multierr.Append(errs, err) 102 continue 103 } 104 if isJobTerminated { 105 continue 106 } 107 if err := oper.router.SendCancelJobMessage(ctx, op.JobID); err != nil { 108 log.Warn("send cancel message to job master failed", 109 zap.String("job-id", op.JobID), zap.Error(err)) 110 } 111 } 112 return errs 113 } 114 115 // IsJobCanceling implements JobOperator 116 func (oper *JobOperatorImpl) IsJobCanceling(ctx context.Context, jobID string) bool { 117 op, err := oper.frameMetaClient.QueryJobOp(ctx, jobID) 118 if err != nil { 119 if !pkgOrm.IsNotFoundError(err) { 120 log.Warn("failed to query job canceling state", zap.Error(err)) 121 } 122 return false 123 } 124 return op.Op == ormModel.JobOpStatusCanceling 125 } 126 127 // check job status, if job is in terminated, return true, otherwise return false 128 // and the upper logic needs to send canceling message. Return value 129 // - whether job is in terminated state 130 // - error 131 func (oper *JobOperatorImpl) checkJobStatus( 132 ctx context.Context, jobID string, 133 ) (bool, error) { 134 isJobTerminated := false 135 meta, err := oper.frameMetaClient.GetJobByID(ctx, jobID) 136 if err != nil { 137 if pkgOrm.IsNotFoundError(err) { 138 log.Warn("found orphan job operation", zap.String("job-id", jobID)) 139 isJobTerminated = true 140 return isJobTerminated, oper.MarkJobNoop(ctx, jobID) 141 } 142 return isJobTerminated, err 143 } 144 switch meta.State { 145 case frameworkModel.MasterStateFinished, 146 frameworkModel.MasterStateStopped, frameworkModel.MasterStateFailed: 147 isJobTerminated = true 148 return isJobTerminated, oper.MarkJobCanceled(ctx, jobID) 149 } 150 return isJobTerminated, nil 151 }