github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/jobmaster/dm/runtime/worker_status.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package runtime
    15  
    16  import (
    17  	"encoding/json"
    18  	"fmt"
    19  	"time"
    20  
    21  	"github.com/pingcap/tiflow/engine/framework"
    22  	frameModel "github.com/pingcap/tiflow/engine/framework/model"
    23  	"github.com/pingcap/tiflow/pkg/errors"
    24  )
    25  
    26  // HeartbeatInterval is heartbeat interval for checking worker stage
    27  // TODO: expose this config in lib
    28  var HeartbeatInterval = 3 * time.Second
    29  
    30  /*
    31           ,──────────────.      ,────────────.      ,─────────────.     ,──────────────.
    32           │WorkerCreating│      │WorkerOnline│      │WorkerOffline│     │WorkerFinished│
    33           `──────┬───────'      `─────┬──────'      `──────┬──────'     `──────┬───────'
    34                  │                    │                    │                   │
    35    CreateWorker  │                    │                    │                   │
    36  ───────────────►│                    │                    │                   │
    37                  │  OnWorkerOnline    │                    │                   │
    38                  ├───────────────────►│                    │                   │
    39                  │                    │  OnWorkerOffline   │                   │
    40                  │                    ├───────────────────►│                   │
    41                  │                    │                    │                   │
    42                  │                    │                    │                   │
    43                  │                    │  OnWorkerFinished  │                   │
    44                  │                    ├────────────────────┼──────────────────►│
    45                  │                    │                    │                   │
    46                  │  OnWorkerOffline/OnWorkerDispacth       │                   │
    47                  ├────────────────────┬───────────────────►│                   │
    48                  │                    │                    │                   │
    49                  │                    │                    │                   │
    50                  │                    │                    │                   │
    51                  │                    │                    │                   │
    52                  │  OnWorkerFinished  │                    │                   │
    53                  ├────────────────────┼────────────────────┼──────────────────►│
    54                  │                    │                    │                   │
    55                  │                    │                    │                   │
    56  */
    57  
    58  // WorkerStage represents the stage of a worker.
    59  type WorkerStage int
    60  
    61  // All available WorkerStage
    62  const (
    63  	WorkerCreating WorkerStage = iota + 1
    64  	WorkerOnline
    65  	WorkerFinished
    66  	WorkerOffline
    67  	// WorkerDestroying
    68  )
    69  
    70  var typesStringify = [...]string{
    71  	0:              "",
    72  	WorkerCreating: "Creating",
    73  	WorkerOnline:   "Online",
    74  	WorkerFinished: "Finished",
    75  	WorkerOffline:  "Offline",
    76  }
    77  
    78  var toWorkerStage map[string]WorkerStage
    79  
    80  func init() {
    81  	toWorkerStage = make(map[string]WorkerStage, len(typesStringify))
    82  	for i, s := range typesStringify {
    83  		toWorkerStage[s] = WorkerStage(i)
    84  	}
    85  }
    86  
    87  // String implements fmt.Stringer interface
    88  func (ws WorkerStage) String() string {
    89  	if int(ws) >= len(typesStringify) || ws < 0 {
    90  		return fmt.Sprintf("Unknown WorkerStage %d", ws)
    91  	}
    92  	return typesStringify[ws]
    93  }
    94  
    95  // MarshalJSON marshals the enum as a quoted json string
    96  func (ws WorkerStage) MarshalJSON() ([]byte, error) {
    97  	return json.Marshal(ws.String())
    98  }
    99  
   100  // UnmarshalJSON unmashals a quoted json string to the enum value
   101  func (ws *WorkerStage) UnmarshalJSON(b []byte) error {
   102  	var (
   103  		j  string
   104  		ok bool
   105  	)
   106  	if err := json.Unmarshal(b, &j); err != nil {
   107  		return err
   108  	}
   109  	*ws, ok = toWorkerStage[j]
   110  	if !ok {
   111  		return errors.Errorf("Unknown WorkerStage %s", j)
   112  	}
   113  	return nil
   114  }
   115  
   116  // WorkerStatus manages worker state machine
   117  type WorkerStatus struct {
   118  	TaskID         string
   119  	ID             frameModel.WorkerID
   120  	Unit           framework.WorkerType
   121  	Stage          WorkerStage
   122  	CfgModRevision uint64
   123  	// only use when creating, change to updatedTime if needed.
   124  	createdTime time.Time
   125  }
   126  
   127  // IsOffline checks whether worker stage is offline
   128  func (w *WorkerStatus) IsOffline() bool {
   129  	return w.Stage == WorkerOffline
   130  }
   131  
   132  // IsTombStone returns whether the worker is tombstone, which means we don't need to stop it.
   133  func (w *WorkerStatus) IsTombStone() bool {
   134  	return w.Stage == WorkerOffline || w.Stage == WorkerFinished || w.CreateFailed()
   135  }
   136  
   137  // CreateFailed checks whether the worker creation is failed
   138  func (w *WorkerStatus) CreateFailed() bool {
   139  	return w.Stage == WorkerCreating && w.createdTime.Add(2*HeartbeatInterval).Before(time.Now())
   140  }
   141  
   142  // RunAsExpected returns whether a worker is running.
   143  // Currently, we regard worker run as expected except it is offline.
   144  func (w *WorkerStatus) RunAsExpected() bool {
   145  	return w.Stage == WorkerOnline || w.Stage == WorkerCreating || w.Stage == WorkerFinished
   146  }
   147  
   148  // InitWorkerStatus creates a new worker status and initializes it
   149  func InitWorkerStatus(taskID string, unit framework.WorkerType, id frameModel.WorkerID) WorkerStatus {
   150  	workerStatus := NewWorkerStatus(taskID, unit, id, WorkerCreating, 0)
   151  	workerStatus.createdTime = time.Now()
   152  	return workerStatus
   153  }
   154  
   155  // NewWorkerStatus creates a new WorkerStatus instance
   156  func NewWorkerStatus(taskID string, unit framework.WorkerType, id frameModel.WorkerID, stage WorkerStage, cfgModRevision uint64) WorkerStatus {
   157  	return WorkerStatus{
   158  		TaskID:         taskID,
   159  		ID:             id,
   160  		Unit:           unit,
   161  		Stage:          stage,
   162  		CfgModRevision: cfgModRevision,
   163  	}
   164  }