github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/dm/ctl/master/query_status.go (about)

     1  // Copyright 2019 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package master
    15  
    16  import (
    17  	"context"
    18  	"errors"
    19  	"os"
    20  	"strings"
    21  
    22  	"github.com/pingcap/tiflow/dm/ctl/common"
    23  	"github.com/pingcap/tiflow/dm/pb"
    24  	"github.com/spf13/cobra"
    25  )
    26  
    27  const stageError = "Error"
    28  
    29  type taskResult struct {
    30  	Result bool        `json:"result"`
    31  	Msg    string      `json:"msg"`
    32  	Tasks  []*taskInfo `json:"tasks"`
    33  }
    34  
    35  type taskInfo struct {
    36  	TaskName   string   `json:"taskName,omitempty"`
    37  	TaskStatus string   `json:"taskStatus,omitempty"`
    38  	Sources    []string `json:"sources,omitempty"`
    39  }
    40  
    41  // NewQueryStatusCmd creates a QueryStatus command.
    42  func NewQueryStatusCmd() *cobra.Command {
    43  	cmd := &cobra.Command{
    44  		Use:   "query-status [-s source ...] [task-name | task-file] [--more]",
    45  		Short: "Queries task status",
    46  		RunE:  queryStatusFunc,
    47  	}
    48  	cmd.Flags().BoolP("more", "", false, "whether to print the detailed task information")
    49  	return cmd
    50  }
    51  
    52  // queryStatusFunc does query task's status.
    53  func queryStatusFunc(cmd *cobra.Command, _ []string) error {
    54  	if len(cmd.Flags().Args()) > 1 {
    55  		cmd.SetOut(os.Stdout)
    56  		common.PrintCmdUsage(cmd)
    57  		return errors.New("please check output to see error")
    58  	}
    59  	taskName := common.GetTaskNameFromArgOrFile(cmd.Flags().Arg(0)) // maybe empty
    60  
    61  	sources, err := common.GetSourceArgs(cmd)
    62  	if err != nil {
    63  		return err
    64  	}
    65  
    66  	ctx, cancel := context.WithTimeout(context.Background(), common.GlobalConfig().RPCTimeout)
    67  	defer cancel()
    68  
    69  	resp := &pb.QueryStatusListResponse{}
    70  	err = common.SendRequest(
    71  		ctx,
    72  		"QueryStatus",
    73  		&pb.QueryStatusListRequest{
    74  			Name:    taskName,
    75  			Sources: sources,
    76  		},
    77  		&resp,
    78  	)
    79  
    80  	if err != nil {
    81  		common.PrintLinesf("can not query %s task's status(in sources %v)", taskName, sources)
    82  		return err
    83  	}
    84  
    85  	more, err := cmd.Flags().GetBool("more")
    86  	if err != nil {
    87  		common.PrintLinesf("error in parse `--more`")
    88  		return err
    89  	}
    90  
    91  	if resp.Result && taskName == "" && len(sources) == 0 && !more {
    92  		result, hasFalseResult := wrapTaskResult(resp)
    93  		if !hasFalseResult { // if any result is false, we still print the full status.
    94  			common.PrettyPrintInterface(result)
    95  			return nil
    96  		}
    97  	}
    98  	common.PrettyPrintResponse(resp)
    99  	return nil
   100  }
   101  
   102  // errorOccurred checks ProcessResult and return true if some error occurred.
   103  func errorOccurred(result *pb.ProcessResult) bool {
   104  	return result != nil && len(result.Errors) > 0
   105  }
   106  
   107  // getRelayStage returns current relay stage (including stageError).
   108  func getRelayStage(relayStatus *pb.RelayStatus) string {
   109  	if errorOccurred(relayStatus.Result) {
   110  		return stageError
   111  	}
   112  	return relayStatus.Stage.String()
   113  }
   114  
   115  // wrapTaskResult picks task info and generate tasks' status and relative workers.
   116  func wrapTaskResult(resp *pb.QueryStatusListResponse) (result *taskResult, hasFalseResult bool) {
   117  	taskStatusMap := make(map[string]string)
   118  	taskCorrespondingSources := make(map[string][]string)
   119  	hasFalseResult = !resp.Result
   120  	for _, source := range resp.Sources {
   121  		hasFalseResult = hasFalseResult || !source.Result
   122  		relayStatus := source.SourceStatus.RelayStatus
   123  		for _, subTask := range source.SubTaskStatus {
   124  			subTaskName := subTask.Name
   125  			subTaskStage := subTask.Stage
   126  
   127  			taskCorrespondingSources[subTaskName] = append(taskCorrespondingSources[subTaskName], source.SourceStatus.Source)
   128  			taskStage := taskStatusMap[subTaskName]
   129  			// the status of a task is decided by its subtasks, the rule is listed as follows:
   130  			// |                     Subtasks' status                       |                Task's status                 |
   131  			// | :--------------------------------------------------------: | :------------------------------------------: |
   132  			// |           Any Paused and len(result.errors) > 0            |    Error - Some error occurred in subtask    |
   133  			// | Any Running and unit is "Sync" and relay is Paused/Stopped | Error - Relay status is Error/Paused/Stopped |
   134  			// |              Any Paused but without error                  |                    Paused                    |
   135  			// |                        All New                             |                     New                      |
   136  			// |                      All Finished                          |                   Finished                   |
   137  			// |                      All Stopped                           |                   Stopped                    |
   138  			// |                         Others                             |                   Running                    |
   139  			switch {
   140  			case strings.HasPrefix(taskStage, stageError):
   141  			case subTaskStage == pb.Stage_Paused && errorOccurred(subTask.Result):
   142  				taskStatusMap[subTaskName] = stageError + " - Some error occurred in subtask"
   143  			case subTask.Unit == pb.UnitType_Sync && subTask.Stage == pb.Stage_Running && relayStatus != nil && (relayStatus.Stage == pb.Stage_Paused || relayStatus.Stage == pb.Stage_Stopped):
   144  				taskStatusMap[subTaskName] = stageError + " - Relay status is " + getRelayStage(relayStatus)
   145  			case taskStage == pb.Stage_Paused.String():
   146  			case taskStage == "", subTaskStage == pb.Stage_Paused:
   147  				taskStatusMap[subTaskName] = subTaskStage.String()
   148  			case taskStage != subTaskStage.String():
   149  				taskStatusMap[subTaskName] = pb.Stage_Running.String()
   150  			}
   151  		}
   152  	}
   153  	taskList := make([]*taskInfo, 0, len(taskStatusMap))
   154  	for curTaskName, taskStatus := range taskStatusMap {
   155  		if strings.HasPrefix(taskStatus, stageError) {
   156  			taskStatus += ". Please run `query-status " + curTaskName + "` to get more details."
   157  		}
   158  		taskList = append(taskList,
   159  			&taskInfo{
   160  				TaskName:   curTaskName,
   161  				TaskStatus: taskStatus,
   162  				Sources:    taskCorrespondingSources[curTaskName],
   163  			})
   164  	}
   165  	return &taskResult{
   166  		Result: resp.Result,
   167  		Msg:    resp.Msg,
   168  		Tasks:  taskList,
   169  	}, hasFalseResult
   170  }