github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/function/ctl/cmd_tracespan.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package ctl
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"strconv"
    21  	"strings"
    22  	"time"
    23  
    24  	"github.com/matrixorigin/matrixone/pkg/clusterservice"
    25  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    26  	"github.com/matrixorigin/matrixone/pkg/pb/api"
    27  	"github.com/matrixorigin/matrixone/pkg/pb/metadata"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/query"
    29  	"github.com/matrixorigin/matrixone/pkg/util/trace"
    30  	"github.com/matrixorigin/matrixone/pkg/vm/engine/tae/db"
    31  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    32  )
    33  
    34  // handleEnableSpan enable or disable spans for some specified operation in TN service
    35  // or/and CN service, this operation last exceeds than the threshold (ms), it will record
    36  // this operation. threshold means not to consider time threshold.
    37  //
    38  // the cmd format for CN service:
    39  // 		mo_ctl("cn", "TranceSpan" "uuids of cn:enable/disable:kinds of span:time threshold")
    40  // examples as below:
    41  // 		mo_ctl("cn", "TraceSpan", "cn_uuid1:enable:s3:0")
    42  // 		mo_ctl("cn", "TraceSpan", "cn_uuid1,cn_uuid2,...:enable:s3,local,...:1")
    43  // 		mo_ctl("cn", "TraceSpan", "cn_uuid1,cn_uuid2,...:enable:all:1")
    44  // 		mo_ctl("cn", "TraceSpan", "all:enable:all:1)
    45  //
    46  // the cmd format for TN service:
    47  // 		mo_ctl("dn", "TraceSpan", "enable/disable:kinds of span:time threshold")
    48  // (because there only exist one dn service, so we don't need to specify the uuid,
    49  // 		but, the uuid will be ignored and will not check its validation even though it is specified.)
    50  // examples as below:
    51  // mo_ctl("dn", "TraceSpan", "disable:s3:10")
    52  // mo_ctl("dn", "TraceSpan", "disable:local:1000")
    53  // mo_ctl("dn", "TraceSpan", "disable:s3, local,...:0")
    54  // mo_ctl("dn", "TraceSpan", "enable:all:0")
    55  
    56  var cmd2State = map[string]bool{
    57  	"enable":  true,
    58  	"disable": false,
    59  }
    60  
    61  func checkParameter(param string, ignoreUUID bool) (args []string, threshold int64, err error) {
    62  	param = strings.ToLower(param)
    63  	// [uuids], enable/disable, spans, time threshold
    64  	args = strings.Split(param, ":")
    65  
    66  	// cmd for tn will ignore the uuid
    67  	if (ignoreUUID && len(args) < 3) ||
    68  		(!ignoreUUID && len(args) != 4) {
    69  		return nil, 0, moerr.NewInternalErrorNoCtx("parameter invalid")
    70  	}
    71  
    72  	cmdIdx := 0
    73  	if !ignoreUUID { // contains uuids
    74  		cmdIdx = 1
    75  	}
    76  	_, ok := cmd2State[args[cmdIdx]]
    77  	if !ok {
    78  		return nil, 0, moerr.NewInternalErrorNoCtx("cmd invalid, expected enable or disable")
    79  	}
    80  
    81  	threshold, err = strconv.ParseInt(args[len(args)-1], 10, 64)
    82  	if err != nil {
    83  		return nil, 0, moerr.NewInvalidArgNoCtx("threshold", "convert to int failed")
    84  	}
    85  
    86  	return args, threshold, nil
    87  }
    88  
    89  func handleTraceSpan(proc *process.Process,
    90  	service serviceType,
    91  	parameter string,
    92  	sender requestSender) (Result, error) {
    93  	if service != cn && service != tn {
    94  		return Result{}, moerr.NewWrongServiceNoCtx("CN or DN", string(service))
    95  	}
    96  
    97  	if service == tn {
    98  		return send2TNAndWaitResp(proc, service, parameter, sender)
    99  	}
   100  
   101  	args, threshold, err := checkParameter(parameter, false)
   102  	if err != nil {
   103  		return Result{}, err
   104  	}
   105  
   106  	// the uuids of cn
   107  	cns := strings.Split(args[0], ",")
   108  
   109  	if len(cns) == 1 && strings.ToLower(cns[0]) == "all" {
   110  		cns = make([]string, 0)
   111  		clusterservice.GetMOCluster().GetCNService(clusterservice.Selector{}, func(cn metadata.CNService) bool {
   112  			cns = append(cns, cn.ServiceID)
   113  			return true
   114  		})
   115  	}
   116  
   117  	info := map[string]string{}
   118  	for idx := range cns {
   119  		// the current cn also need to process this span cmd
   120  		if cns[idx] == proc.QueryClient.ServiceID() {
   121  			info[cns[idx]] = SelfProcess(args[1], args[2], threshold)
   122  		} else {
   123  			// transfer query to another cn and receive its response
   124  			resp, _ := transferRequest(proc, cns[idx], args[1], args[2], threshold)
   125  			if resp == nil {
   126  				// no such cn service
   127  				info[cns[idx]] = "no such cn service"
   128  			} else {
   129  				info[cns[idx]] = resp.TraceSpanResponse.Resp
   130  			}
   131  		}
   132  	}
   133  
   134  	data := ""
   135  	for k, v := range info {
   136  		data += fmt.Sprintf("%s:%s; ", k, v)
   137  	}
   138  
   139  	return Result{
   140  		Method: TraceSpanMethod,
   141  		Data:   data,
   142  	}, nil
   143  }
   144  
   145  func SelfProcess(cmd string, spans string, threshold int64) string {
   146  	var succeed, failed []string
   147  	ss := strings.Split(spans, ",")
   148  	for _, t := range ss {
   149  		if trace.SetMoCtledSpanState(t, cmd2State[cmd], threshold) {
   150  			succeed = append(succeed, t)
   151  		} else {
   152  			failed = append(failed, t)
   153  		}
   154  	}
   155  
   156  	return fmt.Sprintf("%v %sd, %v failed", succeed, cmd, failed)
   157  }
   158  
   159  func transferRequest(proc *process.Process, uuid string, cmd string, spans string, threshold int64) (resp *query.Response, err error) {
   160  	clusterservice.GetMOCluster().GetCNService(clusterservice.NewServiceIDSelector(uuid),
   161  		func(cn metadata.CNService) bool {
   162  			request := proc.QueryClient.NewRequest(query.CmdMethod_TraceSpan)
   163  			request.TraceSpanRequest = &query.TraceSpanRequest{
   164  				Cmd:       cmd,
   165  				Spans:     spans,
   166  				Threshold: threshold,
   167  			}
   168  			ctx, cancel := context.WithTimeout(context.Background(), time.Second)
   169  			defer cancel()
   170  
   171  			resp, err = proc.QueryClient.SendMessage(ctx, cn.QueryAddress, request)
   172  			return true
   173  		})
   174  	return
   175  }
   176  
   177  func send2TNAndWaitResp(proc *process.Process,
   178  	service serviceType,
   179  	parameter string,
   180  	sender requestSender) (Result, error) {
   181  
   182  	whichTN := func(string) ([]uint64, error) { return nil, nil }
   183  	payloadFn := func(tnShardID uint64, parameter string, proc *process.Process) ([]byte, error) {
   184  		args, threshold, err := checkParameter(parameter, true)
   185  		if err != nil {
   186  			return nil, err
   187  		}
   188  
   189  		if len(args) == 4 {
   190  			args = args[1:]
   191  		}
   192  
   193  		req := db.TraceSpan{
   194  			Cmd:       args[0],
   195  			Spans:     args[1],
   196  			Threshold: threshold,
   197  		}
   198  
   199  		return req.MarshalBinary()
   200  	}
   201  
   202  	repsonseUnmarshaler := func(b []byte) (interface{}, error) {
   203  		return string(b[:]), nil
   204  	}
   205  
   206  	return GetTNHandlerFunc(
   207  		api.OpCode_OpTraceSpan, whichTN, payloadFn, repsonseUnmarshaler,
   208  	)(proc, service, parameter, sender)
   209  
   210  }