github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/test/e2e/e2e_test_cli.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package e2e 15 16 import ( 17 "bytes" 18 "context" 19 "encoding/json" 20 "fmt" 21 "io" 22 "net/http" 23 "os/exec" 24 "time" 25 26 "github.com/pingcap/log" 27 pb "github.com/pingcap/tiflow/engine/enginepb" 28 "github.com/pingcap/tiflow/engine/jobmaster/fakejob" 29 "github.com/pingcap/tiflow/engine/pkg/meta" 30 metaModel "github.com/pingcap/tiflow/engine/pkg/meta/model" 31 "github.com/pingcap/tiflow/engine/pkg/tenant" 32 server "github.com/pingcap/tiflow/engine/servermaster" 33 "github.com/pingcap/tiflow/pkg/errors" 34 clientv3 "go.etcd.io/etcd/client/v3" 35 "go.uber.org/zap" 36 "go.uber.org/zap/zapcore" 37 "google.golang.org/grpc" 38 "google.golang.org/protobuf/encoding/protojson" 39 "google.golang.org/protobuf/proto" 40 ) 41 42 func init() { 43 // set the debug level log for easy test 44 log.SetLevel(zapcore.DebugLevel) 45 } 46 47 // ErrLeaderNotFound is returned when the leader is not found. 48 var ErrLeaderNotFound = errors.New("leader not found") 49 50 // ChaosCli is used to interact with server master, fake job and provides ways 51 // to adding chaos in e2e test. 52 type ChaosCli struct { 53 masterAddrs []string 54 // masterCli is used to operate with server master, such as submit job 55 clientConn metaModel.ClientConn 56 // metaCli is used to query metadata which is stored from business logic(job-level isolation) 57 // NEED to reinitialize the metaCli if we access to a different job 58 metaCli metaModel.KVClient 59 // fakeJobCli is used to write to etcd to simulate the business of fake job 60 fakeJobCli *clientv3.Client 61 fakeJobCfg *FakeJobConfig 62 // project is used to save project info 63 project tenant.ProjectInfo 64 } 65 66 // FakeJobConfig is used to construct a fake job configuration 67 type FakeJobConfig struct { 68 EtcdEndpoints []string 69 WorkerCount int 70 KeyPrefix string 71 } 72 73 // NewUTCli creates a new ChaosCli instance 74 func NewUTCli(ctx context.Context, masterAddrs, businessMetaAddrs []string, project tenant.ProjectInfo, 75 cfg *FakeJobConfig, 76 ) (*ChaosCli, error) { 77 if len(masterAddrs) == 0 { 78 panic("length of masterAddrs is 0") 79 } 80 81 // TODO: NEED to move metastore config to a toml, and parse the toml 82 defaultSchema := "test_business" 83 84 conf := server.NewDefaultBusinessMetaConfig() 85 conf.Endpoints = businessMetaAddrs 86 conf.Schema = defaultSchema 87 cc, err := meta.NewClientConn(conf) 88 if err != nil { 89 return nil, errors.Trace(err) 90 } 91 92 fakeJobCli, err := clientv3.New(clientv3.Config{ 93 Endpoints: cfg.EtcdEndpoints, 94 Context: ctx, 95 DialTimeout: 3 * time.Second, 96 DialOptions: []grpc.DialOption{}, 97 }) 98 if err != nil { 99 return nil, errors.Trace(err) 100 } 101 102 return &ChaosCli{ 103 masterAddrs: masterAddrs, 104 clientConn: cc, 105 fakeJobCli: fakeJobCli, 106 fakeJobCfg: cfg, 107 project: project, 108 }, nil 109 } 110 111 // CreateJob sends SubmitJob command to servermaster 112 func (cli *ChaosCli) CreateJob(ctx context.Context, jobType pb.Job_Type, config []byte) (string, error) { 113 return CreateJobViaHTTP(ctx, cli.masterAddrs[0], cli.project.TenantID(), cli.project.ProjectID(), jobType, config) 114 } 115 116 // CancelJob sends CancelJob command to servermaster 117 func (cli *ChaosCli) CancelJob(ctx context.Context, jobID string) error { 118 url := fmt.Sprintf("http://%s/api/v1/jobs/%s/cancel", cli.masterAddrs[0], jobID) 119 req := &pb.CancelJobRequest{ 120 Id: jobID, 121 ProjectId: cli.project.ProjectID(), 122 TenantId: cli.project.TenantID(), 123 } 124 return sendHTTPRequest(ctx, http.MethodPost, url, req, nil) 125 } 126 127 // CheckJobStatus checks job status is as expected. 128 func (cli *ChaosCli) CheckJobStatus( 129 ctx context.Context, jobID string, expectedStatus pb.Job_State, 130 ) (bool, error) { 131 job, err := QueryJobViaHTTP(ctx, cli.masterAddrs[0], cli.project.TenantID(), cli.project.ProjectID(), jobID) 132 if err != nil { 133 return false, errors.Trace(err) 134 } 135 return job.State == expectedStatus, nil 136 } 137 138 // UpdateFakeJobKey updates the etcd value of a worker belonging to a fake job 139 func (cli *ChaosCli) UpdateFakeJobKey(ctx context.Context, id int, value string) error { 140 key := fmt.Sprintf("%s%d", cli.fakeJobCfg.KeyPrefix, id) 141 _, err := cli.fakeJobCli.Put(ctx, key, value) 142 return errors.Trace(err) 143 } 144 145 func (cli *ChaosCli) getFakeJobCheckpoint( 146 ctx context.Context, masterID string, 147 ) (*fakejob.Checkpoint, error) { 148 ckptKey := fakejob.CheckpointKey(masterID) 149 resp, metaErr := cli.metaCli.Get(ctx, ckptKey) 150 if metaErr != nil { 151 return nil, errors.New(metaErr.Error()) 152 } 153 if len(resp.Kvs) == 0 { 154 return nil, errors.New("no checkpoint found") 155 } 156 checkpoint := &fakejob.Checkpoint{} 157 err := json.Unmarshal(resp.Kvs[0].Value, checkpoint) 158 if err != nil { 159 return nil, errors.Trace(err) 160 } 161 log.Debug("get fake job checkpoint", zap.String("ckptKey", ckptKey), 162 zap.Any("checkpoint", checkpoint)) 163 return checkpoint, nil 164 } 165 166 // CheckFakeJobTick queries the checkpoint of a fake job and checks the tick count 167 // is as expected. 168 func (cli *ChaosCli) CheckFakeJobTick( 169 ctx context.Context, masterID string, jobIndex int, target int64, 170 ) error { 171 ckpt, err := cli.getFakeJobCheckpoint(ctx, masterID) 172 if err != nil { 173 return err 174 } 175 tick, ok := ckpt.Ticks[jobIndex] 176 if !ok { 177 return errors.Errorf("job %d not found in checkpoint %v", jobIndex, ckpt) 178 } 179 if tick < target { 180 return errors.Errorf("tick %d not reaches target %d, checkpoint %v", tick, target, ckpt) 181 } 182 return nil 183 } 184 185 // CheckFakeJobKey queries the checkpoint of a fake job, checks the value and mvcc 186 // count are as expected. If error happens or check is not passed, return error. 187 func (cli *ChaosCli) CheckFakeJobKey( 188 ctx context.Context, masterID string, jobIndex int, expectedMvcc int, expectedValue string, 189 ) error { 190 checkpoint, err := cli.getFakeJobCheckpoint(ctx, masterID) 191 if err != nil { 192 return err 193 } 194 ckpt, ok := checkpoint.Checkpoints[jobIndex] 195 if !ok { 196 return errors.Errorf("job %d not found in checkpoint %v", jobIndex, checkpoint) 197 } 198 if ckpt.Value != expectedValue { 199 return errors.Errorf( 200 "value not equals, expected: '%s', actual: '%s', checkpoint %v", 201 expectedValue, ckpt.Value, checkpoint) 202 } 203 if ckpt.MvccCount != expectedMvcc { 204 return errors.Errorf( 205 "mvcc not equals, expected: '%d', actual: '%d', checkpoint %v", 206 expectedMvcc, ckpt.MvccCount, checkpoint) 207 } 208 209 return nil 210 } 211 212 // GetRevision puts a key gets the latest revision of etcd cluster 213 func (cli *ChaosCli) GetRevision(ctx context.Context) (int64, error) { 214 resp, err := cli.fakeJobCli.Put(ctx, "/chaos/gen_epoch/key", "/chaos/gen_epoch/value") 215 if err != nil { 216 return 0, errors.Trace(err) 217 } 218 return resp.Header.Revision, nil 219 } 220 221 func runCmdHandleError(cmd *exec.Cmd) []byte { 222 log.Info("Start executing command", zap.String("cmd", cmd.String())) 223 bytes, err := cmd.Output() 224 if err, ok := err.(*exec.ExitError); ok { 225 log.Info("Running command failed", zap.ByteString("stderr", err.Stderr)) 226 } 227 228 if err != nil { 229 log.Fatal("Running command failed", 230 zap.Error(err), 231 zap.String("command", cmd.String()), 232 zap.ByteString("output", bytes)) 233 } 234 235 log.Info("Finished executing command", zap.String("cmd", cmd.String()), zap.ByteString("output", bytes)) 236 return bytes 237 } 238 239 // ContainerRestart restarts a docker container 240 func (cli *ChaosCli) ContainerRestart(name string) { 241 cmd := exec.Command("docker", "restart", name) 242 runCmdHandleError(cmd) 243 log.Info("Finished restarting container", zap.String("name", name)) 244 } 245 246 // ContainerStop stops a docker container 247 func (cli *ChaosCli) ContainerStop(name string) { 248 cmd := exec.Command("docker", "stop", name) 249 runCmdHandleError(cmd) 250 log.Info("Finished stopping container", zap.String("name", name)) 251 } 252 253 // ContainerStart starts a docker container 254 func (cli *ChaosCli) ContainerStart(name string) { 255 cmd := exec.Command("docker", "start", name) 256 runCmdHandleError(cmd) 257 log.Info("Finished starting container", zap.String("name", name)) 258 } 259 260 // InitializeMetaClient initializes the business kvclient 261 func (cli *ChaosCli) InitializeMetaClient(jobID string) error { 262 if cli.metaCli != nil { 263 cli.metaCli.Close() 264 } 265 metaCli, err := meta.NewKVClientWithNamespace(cli.clientConn, cli.project.UniqueID(), jobID) 266 if err != nil { 267 return errors.Trace(err) 268 } 269 270 cli.metaCli = metaCli 271 return nil 272 } 273 274 // GetLeaderAddr gets the address of the leader of the server master. 275 func (cli *ChaosCli) GetLeaderAddr(ctx context.Context) (string, error) { 276 url := fmt.Sprintf("http://%s/api/v1/leader", cli.masterAddrs[0]) 277 resp := &pb.GetLeaderResponse{} 278 if err := sendHTTPRequest(ctx, http.MethodGet, url, &pb.ResignLeaderRequest{}, resp); err != nil { 279 return "", errors.Trace(err) 280 } 281 return resp.AdvertiseAddr, nil 282 } 283 284 // ResignLeader resigns the leader at the given addr. 285 func (cli *ChaosCli) ResignLeader(ctx context.Context, addr string) error { 286 url := fmt.Sprintf("http://%s/api/v1/leader/resign", addr) 287 return sendHTTPRequest(ctx, http.MethodPost, url, &pb.ResignLeaderRequest{}, nil) 288 } 289 290 // CreateJobViaHTTP creates a job via http. 291 func CreateJobViaHTTP(ctx context.Context, masterAddr, tenantID, projectID string, jobType pb.Job_Type, config []byte) (string, error) { 292 url := fmt.Sprintf("http://%s/api/v1/jobs?tenant_id=%s&project_id=%s", masterAddr, tenantID, projectID) 293 reqJob := &pb.Job{ 294 Type: jobType, 295 Config: config, 296 } 297 job := &pb.Job{} 298 if err := sendHTTPRequest(ctx, http.MethodPost, url, reqJob, job); err != nil { 299 return "", errors.Trace(err) 300 } 301 return job.Id, nil 302 } 303 304 // QueryJobViaHTTP queries a job via http. 305 func QueryJobViaHTTP(ctx context.Context, masterAddr, tenantID, projectID, jobID string) (*pb.Job, error) { 306 url := fmt.Sprintf("http://%s/api/v1/jobs/%s?tenant_id=%s&project_id=%s", masterAddr, jobID, tenantID, projectID) 307 job := &pb.Job{} 308 if err := sendHTTPRequest(ctx, http.MethodGet, url, nil, job); err != nil { 309 return nil, errors.Trace(err) 310 } 311 return job, nil 312 } 313 314 // sendHTTPRequest sends a http request to the master. 315 // 316 // Here we use http client instead of gRPC client because our caller is usually access our 317 // API via http. We want to simulate the http client behavior. 318 func sendHTTPRequest(ctx context.Context, method, url string, reqBody, resp proto.Message) error { 319 var payload []byte 320 if reqBody != nil { 321 var err error 322 payload, err = protojson.MarshalOptions{UseProtoNames: false}.Marshal(reqBody) 323 if err != nil { 324 return errors.Trace(err) 325 } 326 } 327 httpReq, err := http.NewRequestWithContext(ctx, method, url, bytes.NewReader(payload)) 328 if err != nil { 329 return errors.Trace(err) 330 } 331 httpResp, err := http.DefaultClient.Do(httpReq) 332 if err != nil { 333 return errors.Trace(err) 334 } 335 defer httpResp.Body.Close() 336 body, err := io.ReadAll(httpResp.Body) 337 if err != nil { 338 log.Warn("read response body failed", zap.Error(err)) 339 } 340 if httpResp.StatusCode/100 != 2 { 341 return errors.Errorf("unexpected status code %d, body %s", httpResp.StatusCode, string(body)) 342 } 343 if resp == nil { 344 return nil 345 } 346 return protojson.Unmarshal(body, resp) 347 }