github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/servermaster/jobop/operator.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package jobop
    15  
    16  import (
    17  	"context"
    18  
    19  	"github.com/pingcap/log"
    20  	frameworkModel "github.com/pingcap/tiflow/engine/framework/model"
    21  	pkgOrm "github.com/pingcap/tiflow/engine/pkg/orm"
    22  	ormModel "github.com/pingcap/tiflow/engine/pkg/orm/model"
    23  	"go.uber.org/multierr"
    24  	"go.uber.org/zap"
    25  )
    26  
    27  type operateRouter interface {
    28  	SendCancelJobMessage(ctx context.Context, jobID string) error
    29  }
    30  
    31  // JobOperator abstracts a metastore based job operator, it encapsulates logic
    32  // to handle JobOp and a Tick API to ensure job moves towards to expected status.
    33  type JobOperator interface {
    34  	MarkJobCanceling(ctx context.Context, jobID string) error
    35  	MarkJobCanceled(ctx context.Context, jobID string) error
    36  	Tick(ctx context.Context) error
    37  	IsJobCanceling(ctx context.Context, jobID string) bool
    38  }
    39  
    40  // JobOperatorImpl implements JobOperator
    41  type JobOperatorImpl struct {
    42  	frameMetaClient pkgOrm.Client
    43  	router          operateRouter
    44  }
    45  
    46  // NewJobOperatorImpl creates a new JobOperatorImpl
    47  func NewJobOperatorImpl(cli pkgOrm.Client, router operateRouter) *JobOperatorImpl {
    48  	return &JobOperatorImpl{
    49  		frameMetaClient: cli,
    50  		router:          router,
    51  	}
    52  }
    53  
    54  func (oper *JobOperatorImpl) updateJobOperationStatus(
    55  	ctx context.Context, jobID string, op ormModel.JobOpStatus,
    56  ) error {
    57  	var ormFn func(ctx context.Context, JobID string) (pkgOrm.Result, error)
    58  	switch op {
    59  	case ormModel.JobOpStatusNoop:
    60  		ormFn = oper.frameMetaClient.SetJobNoop
    61  	case ormModel.JobOpStatusCanceling:
    62  		ormFn = oper.frameMetaClient.SetJobCanceling
    63  	case ormModel.JobOpStatusCanceled:
    64  		ormFn = oper.frameMetaClient.SetJobCanceled
    65  	default:
    66  		log.Panic("unexpected job operate", zap.Any("op", op))
    67  	}
    68  	if result, err := ormFn(ctx, jobID); err != nil {
    69  		return err
    70  	} else if result.RowsAffected() == 0 {
    71  		log.Info("job status is already set", zap.String("job-id", jobID), zap.Any("op", op))
    72  	}
    73  	return nil
    74  }
    75  
    76  // MarkJobNoop implements JobOperator.MarkJobNoop
    77  func (oper *JobOperatorImpl) MarkJobNoop(ctx context.Context, jobID string) error {
    78  	return oper.updateJobOperationStatus(ctx, jobID, ormModel.JobOpStatusNoop)
    79  }
    80  
    81  // MarkJobCanceling implements JobOperator.MarkJobCanceling
    82  func (oper *JobOperatorImpl) MarkJobCanceling(ctx context.Context, jobID string) error {
    83  	return oper.updateJobOperationStatus(ctx, jobID, ormModel.JobOpStatusCanceling)
    84  }
    85  
    86  // MarkJobCanceled implements JobOperator.MarkJobCanceled
    87  func (oper *JobOperatorImpl) MarkJobCanceled(ctx context.Context, jobID string) error {
    88  	return oper.updateJobOperationStatus(ctx, jobID, ormModel.JobOpStatusCanceled)
    89  }
    90  
    91  // Tick implements JobOperator.Tick
    92  func (oper *JobOperatorImpl) Tick(ctx context.Context) error {
    93  	ops, err := oper.frameMetaClient.QueryJobOpsByStatus(ctx, ormModel.JobOpStatusCanceling)
    94  	if err != nil {
    95  		return err
    96  	}
    97  	var errs error
    98  	for _, op := range ops {
    99  		isJobTerminated, err := oper.checkJobStatus(ctx, op.JobID)
   100  		if err != nil {
   101  			errs = multierr.Append(errs, err)
   102  			continue
   103  		}
   104  		if isJobTerminated {
   105  			continue
   106  		}
   107  		if err := oper.router.SendCancelJobMessage(ctx, op.JobID); err != nil {
   108  			log.Warn("send cancel message to job master failed",
   109  				zap.String("job-id", op.JobID), zap.Error(err))
   110  		}
   111  	}
   112  	return errs
   113  }
   114  
   115  // IsJobCanceling implements JobOperator
   116  func (oper *JobOperatorImpl) IsJobCanceling(ctx context.Context, jobID string) bool {
   117  	op, err := oper.frameMetaClient.QueryJobOp(ctx, jobID)
   118  	if err != nil {
   119  		if !pkgOrm.IsNotFoundError(err) {
   120  			log.Warn("failed to query job canceling state", zap.Error(err))
   121  		}
   122  		return false
   123  	}
   124  	return op.Op == ormModel.JobOpStatusCanceling
   125  }
   126  
   127  // check job status, if job is in terminated, return true, otherwise return false
   128  // and the upper logic needs to send canceling message. Return value
   129  // - whether job is in terminated state
   130  // - error
   131  func (oper *JobOperatorImpl) checkJobStatus(
   132  	ctx context.Context, jobID string,
   133  ) (bool, error) {
   134  	isJobTerminated := false
   135  	meta, err := oper.frameMetaClient.GetJobByID(ctx, jobID)
   136  	if err != nil {
   137  		if pkgOrm.IsNotFoundError(err) {
   138  			log.Warn("found orphan job operation", zap.String("job-id", jobID))
   139  			isJobTerminated = true
   140  			return isJobTerminated, oper.MarkJobNoop(ctx, jobID)
   141  		}
   142  		return isJobTerminated, err
   143  	}
   144  	switch meta.State {
   145  	case frameworkModel.MasterStateFinished,
   146  		frameworkModel.MasterStateStopped, frameworkModel.MasterStateFailed:
   147  		isJobTerminated = true
   148  		return isJobTerminated, oper.MarkJobCanceled(ctx, jobID)
   149  	}
   150  	return isJobTerminated, nil
   151  }