github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/test/e2e/e2e_test_cli.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package e2e
    15  
    16  import (
    17  	"bytes"
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"io"
    22  	"net/http"
    23  	"os/exec"
    24  	"time"
    25  
    26  	"github.com/pingcap/log"
    27  	pb "github.com/pingcap/tiflow/engine/enginepb"
    28  	"github.com/pingcap/tiflow/engine/jobmaster/fakejob"
    29  	"github.com/pingcap/tiflow/engine/pkg/meta"
    30  	metaModel "github.com/pingcap/tiflow/engine/pkg/meta/model"
    31  	"github.com/pingcap/tiflow/engine/pkg/tenant"
    32  	server "github.com/pingcap/tiflow/engine/servermaster"
    33  	"github.com/pingcap/tiflow/pkg/errors"
    34  	clientv3 "go.etcd.io/etcd/client/v3"
    35  	"go.uber.org/zap"
    36  	"go.uber.org/zap/zapcore"
    37  	"google.golang.org/grpc"
    38  	"google.golang.org/protobuf/encoding/protojson"
    39  	"google.golang.org/protobuf/proto"
    40  )
    41  
    42  func init() {
    43  	// set the debug level log for easy test
    44  	log.SetLevel(zapcore.DebugLevel)
    45  }
    46  
    47  // ErrLeaderNotFound is returned when the leader is not found.
    48  var ErrLeaderNotFound = errors.New("leader not found")
    49  
    50  // ChaosCli is used to interact with server master, fake job and provides ways
    51  // to adding chaos in e2e test.
    52  type ChaosCli struct {
    53  	masterAddrs []string
    54  	// masterCli is used to operate with server master, such as submit job
    55  	clientConn metaModel.ClientConn
    56  	// metaCli is used to query metadata which is stored from business logic(job-level isolation)
    57  	// NEED to reinitialize the metaCli if we access to a different job
    58  	metaCli metaModel.KVClient
    59  	// fakeJobCli is used to write to etcd to simulate the business of fake job
    60  	fakeJobCli *clientv3.Client
    61  	fakeJobCfg *FakeJobConfig
    62  	// project is used to save project info
    63  	project tenant.ProjectInfo
    64  }
    65  
    66  // FakeJobConfig is used to construct a fake job configuration
    67  type FakeJobConfig struct {
    68  	EtcdEndpoints []string
    69  	WorkerCount   int
    70  	KeyPrefix     string
    71  }
    72  
    73  // NewUTCli creates a new ChaosCli instance
    74  func NewUTCli(ctx context.Context, masterAddrs, businessMetaAddrs []string, project tenant.ProjectInfo,
    75  	cfg *FakeJobConfig,
    76  ) (*ChaosCli, error) {
    77  	if len(masterAddrs) == 0 {
    78  		panic("length of masterAddrs is 0")
    79  	}
    80  
    81  	// TODO: NEED to move metastore config to a toml, and parse the toml
    82  	defaultSchema := "test_business"
    83  
    84  	conf := server.NewDefaultBusinessMetaConfig()
    85  	conf.Endpoints = businessMetaAddrs
    86  	conf.Schema = defaultSchema
    87  	cc, err := meta.NewClientConn(conf)
    88  	if err != nil {
    89  		return nil, errors.Trace(err)
    90  	}
    91  
    92  	fakeJobCli, err := clientv3.New(clientv3.Config{
    93  		Endpoints:   cfg.EtcdEndpoints,
    94  		Context:     ctx,
    95  		DialTimeout: 3 * time.Second,
    96  		DialOptions: []grpc.DialOption{},
    97  	})
    98  	if err != nil {
    99  		return nil, errors.Trace(err)
   100  	}
   101  
   102  	return &ChaosCli{
   103  		masterAddrs: masterAddrs,
   104  		clientConn:  cc,
   105  		fakeJobCli:  fakeJobCli,
   106  		fakeJobCfg:  cfg,
   107  		project:     project,
   108  	}, nil
   109  }
   110  
   111  // CreateJob sends SubmitJob command to servermaster
   112  func (cli *ChaosCli) CreateJob(ctx context.Context, jobType pb.Job_Type, config []byte) (string, error) {
   113  	return CreateJobViaHTTP(ctx, cli.masterAddrs[0], cli.project.TenantID(), cli.project.ProjectID(), jobType, config)
   114  }
   115  
   116  // CancelJob sends CancelJob command to servermaster
   117  func (cli *ChaosCli) CancelJob(ctx context.Context, jobID string) error {
   118  	url := fmt.Sprintf("http://%s/api/v1/jobs/%s/cancel", cli.masterAddrs[0], jobID)
   119  	req := &pb.CancelJobRequest{
   120  		Id:        jobID,
   121  		ProjectId: cli.project.ProjectID(),
   122  		TenantId:  cli.project.TenantID(),
   123  	}
   124  	return sendHTTPRequest(ctx, http.MethodPost, url, req, nil)
   125  }
   126  
   127  // CheckJobStatus checks job status is as expected.
   128  func (cli *ChaosCli) CheckJobStatus(
   129  	ctx context.Context, jobID string, expectedStatus pb.Job_State,
   130  ) (bool, error) {
   131  	job, err := QueryJobViaHTTP(ctx, cli.masterAddrs[0], cli.project.TenantID(), cli.project.ProjectID(), jobID)
   132  	if err != nil {
   133  		return false, errors.Trace(err)
   134  	}
   135  	return job.State == expectedStatus, nil
   136  }
   137  
   138  // UpdateFakeJobKey updates the etcd value of a worker belonging to a fake job
   139  func (cli *ChaosCli) UpdateFakeJobKey(ctx context.Context, id int, value string) error {
   140  	key := fmt.Sprintf("%s%d", cli.fakeJobCfg.KeyPrefix, id)
   141  	_, err := cli.fakeJobCli.Put(ctx, key, value)
   142  	return errors.Trace(err)
   143  }
   144  
   145  func (cli *ChaosCli) getFakeJobCheckpoint(
   146  	ctx context.Context, masterID string,
   147  ) (*fakejob.Checkpoint, error) {
   148  	ckptKey := fakejob.CheckpointKey(masterID)
   149  	resp, metaErr := cli.metaCli.Get(ctx, ckptKey)
   150  	if metaErr != nil {
   151  		return nil, errors.New(metaErr.Error())
   152  	}
   153  	if len(resp.Kvs) == 0 {
   154  		return nil, errors.New("no checkpoint found")
   155  	}
   156  	checkpoint := &fakejob.Checkpoint{}
   157  	err := json.Unmarshal(resp.Kvs[0].Value, checkpoint)
   158  	if err != nil {
   159  		return nil, errors.Trace(err)
   160  	}
   161  	log.Debug("get fake job checkpoint", zap.String("ckptKey", ckptKey),
   162  		zap.Any("checkpoint", checkpoint))
   163  	return checkpoint, nil
   164  }
   165  
   166  // CheckFakeJobTick queries the checkpoint of a fake job and checks the tick count
   167  // is as expected.
   168  func (cli *ChaosCli) CheckFakeJobTick(
   169  	ctx context.Context, masterID string, jobIndex int, target int64,
   170  ) error {
   171  	ckpt, err := cli.getFakeJobCheckpoint(ctx, masterID)
   172  	if err != nil {
   173  		return err
   174  	}
   175  	tick, ok := ckpt.Ticks[jobIndex]
   176  	if !ok {
   177  		return errors.Errorf("job %d not found in checkpoint %v", jobIndex, ckpt)
   178  	}
   179  	if tick < target {
   180  		return errors.Errorf("tick %d not reaches target %d, checkpoint %v", tick, target, ckpt)
   181  	}
   182  	return nil
   183  }
   184  
   185  // CheckFakeJobKey queries the checkpoint of a fake job, checks the value and mvcc
   186  // count are as expected. If error happens or check is not passed, return error.
   187  func (cli *ChaosCli) CheckFakeJobKey(
   188  	ctx context.Context, masterID string, jobIndex int, expectedMvcc int, expectedValue string,
   189  ) error {
   190  	checkpoint, err := cli.getFakeJobCheckpoint(ctx, masterID)
   191  	if err != nil {
   192  		return err
   193  	}
   194  	ckpt, ok := checkpoint.Checkpoints[jobIndex]
   195  	if !ok {
   196  		return errors.Errorf("job %d not found in checkpoint %v", jobIndex, checkpoint)
   197  	}
   198  	if ckpt.Value != expectedValue {
   199  		return errors.Errorf(
   200  			"value not equals, expected: '%s', actual: '%s', checkpoint %v",
   201  			expectedValue, ckpt.Value, checkpoint)
   202  	}
   203  	if ckpt.MvccCount != expectedMvcc {
   204  		return errors.Errorf(
   205  			"mvcc not equals, expected: '%d', actual: '%d', checkpoint %v",
   206  			expectedMvcc, ckpt.MvccCount, checkpoint)
   207  	}
   208  
   209  	return nil
   210  }
   211  
   212  // GetRevision puts a key gets the latest revision of etcd cluster
   213  func (cli *ChaosCli) GetRevision(ctx context.Context) (int64, error) {
   214  	resp, err := cli.fakeJobCli.Put(ctx, "/chaos/gen_epoch/key", "/chaos/gen_epoch/value")
   215  	if err != nil {
   216  		return 0, errors.Trace(err)
   217  	}
   218  	return resp.Header.Revision, nil
   219  }
   220  
   221  func runCmdHandleError(cmd *exec.Cmd) []byte {
   222  	log.Info("Start executing command", zap.String("cmd", cmd.String()))
   223  	bytes, err := cmd.Output()
   224  	if err, ok := err.(*exec.ExitError); ok {
   225  		log.Info("Running command failed", zap.ByteString("stderr", err.Stderr))
   226  	}
   227  
   228  	if err != nil {
   229  		log.Fatal("Running command failed",
   230  			zap.Error(err),
   231  			zap.String("command", cmd.String()),
   232  			zap.ByteString("output", bytes))
   233  	}
   234  
   235  	log.Info("Finished executing command", zap.String("cmd", cmd.String()), zap.ByteString("output", bytes))
   236  	return bytes
   237  }
   238  
   239  // ContainerRestart restarts a docker container
   240  func (cli *ChaosCli) ContainerRestart(name string) {
   241  	cmd := exec.Command("docker", "restart", name)
   242  	runCmdHandleError(cmd)
   243  	log.Info("Finished restarting container", zap.String("name", name))
   244  }
   245  
   246  // ContainerStop stops a docker container
   247  func (cli *ChaosCli) ContainerStop(name string) {
   248  	cmd := exec.Command("docker", "stop", name)
   249  	runCmdHandleError(cmd)
   250  	log.Info("Finished stopping container", zap.String("name", name))
   251  }
   252  
   253  // ContainerStart starts a docker container
   254  func (cli *ChaosCli) ContainerStart(name string) {
   255  	cmd := exec.Command("docker", "start", name)
   256  	runCmdHandleError(cmd)
   257  	log.Info("Finished starting container", zap.String("name", name))
   258  }
   259  
   260  // InitializeMetaClient initializes the business kvclient
   261  func (cli *ChaosCli) InitializeMetaClient(jobID string) error {
   262  	if cli.metaCli != nil {
   263  		cli.metaCli.Close()
   264  	}
   265  	metaCli, err := meta.NewKVClientWithNamespace(cli.clientConn, cli.project.UniqueID(), jobID)
   266  	if err != nil {
   267  		return errors.Trace(err)
   268  	}
   269  
   270  	cli.metaCli = metaCli
   271  	return nil
   272  }
   273  
   274  // GetLeaderAddr gets the address of the leader of the server master.
   275  func (cli *ChaosCli) GetLeaderAddr(ctx context.Context) (string, error) {
   276  	url := fmt.Sprintf("http://%s/api/v1/leader", cli.masterAddrs[0])
   277  	resp := &pb.GetLeaderResponse{}
   278  	if err := sendHTTPRequest(ctx, http.MethodGet, url, &pb.ResignLeaderRequest{}, resp); err != nil {
   279  		return "", errors.Trace(err)
   280  	}
   281  	return resp.AdvertiseAddr, nil
   282  }
   283  
   284  // ResignLeader resigns the leader at the given addr.
   285  func (cli *ChaosCli) ResignLeader(ctx context.Context, addr string) error {
   286  	url := fmt.Sprintf("http://%s/api/v1/leader/resign", addr)
   287  	return sendHTTPRequest(ctx, http.MethodPost, url, &pb.ResignLeaderRequest{}, nil)
   288  }
   289  
   290  // CreateJobViaHTTP creates a job via http.
   291  func CreateJobViaHTTP(ctx context.Context, masterAddr, tenantID, projectID string, jobType pb.Job_Type, config []byte) (string, error) {
   292  	url := fmt.Sprintf("http://%s/api/v1/jobs?tenant_id=%s&project_id=%s", masterAddr, tenantID, projectID)
   293  	reqJob := &pb.Job{
   294  		Type:   jobType,
   295  		Config: config,
   296  	}
   297  	job := &pb.Job{}
   298  	if err := sendHTTPRequest(ctx, http.MethodPost, url, reqJob, job); err != nil {
   299  		return "", errors.Trace(err)
   300  	}
   301  	return job.Id, nil
   302  }
   303  
   304  // QueryJobViaHTTP queries a job via http.
   305  func QueryJobViaHTTP(ctx context.Context, masterAddr, tenantID, projectID, jobID string) (*pb.Job, error) {
   306  	url := fmt.Sprintf("http://%s/api/v1/jobs/%s?tenant_id=%s&project_id=%s", masterAddr, jobID, tenantID, projectID)
   307  	job := &pb.Job{}
   308  	if err := sendHTTPRequest(ctx, http.MethodGet, url, nil, job); err != nil {
   309  		return nil, errors.Trace(err)
   310  	}
   311  	return job, nil
   312  }
   313  
   314  // sendHTTPRequest sends a http request to the master.
   315  //
   316  // Here we use http client instead of gRPC client because our caller is usually access our
   317  // API via http. We want to simulate the http client behavior.
   318  func sendHTTPRequest(ctx context.Context, method, url string, reqBody, resp proto.Message) error {
   319  	var payload []byte
   320  	if reqBody != nil {
   321  		var err error
   322  		payload, err = protojson.MarshalOptions{UseProtoNames: false}.Marshal(reqBody)
   323  		if err != nil {
   324  			return errors.Trace(err)
   325  		}
   326  	}
   327  	httpReq, err := http.NewRequestWithContext(ctx, method, url, bytes.NewReader(payload))
   328  	if err != nil {
   329  		return errors.Trace(err)
   330  	}
   331  	httpResp, err := http.DefaultClient.Do(httpReq)
   332  	if err != nil {
   333  		return errors.Trace(err)
   334  	}
   335  	defer httpResp.Body.Close()
   336  	body, err := io.ReadAll(httpResp.Body)
   337  	if err != nil {
   338  		log.Warn("read response body failed", zap.Error(err))
   339  	}
   340  	if httpResp.StatusCode/100 != 2 {
   341  		return errors.Errorf("unexpected status code %d, body %s", httpResp.StatusCode, string(body))
   342  	}
   343  	if resp == nil {
   344  		return nil
   345  	}
   346  	return protojson.Unmarshal(body, resp)
   347  }