github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/chaos/cases/case_fake_job.go (about) 1 // Copyright 2022 PingCAP, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package main 15 16 import ( 17 "context" 18 "encoding/json" 19 "fmt" 20 "time" 21 22 "github.com/pingcap/log" 23 pb "github.com/pingcap/tiflow/engine/enginepb" 24 "github.com/pingcap/tiflow/engine/jobmaster/fakejob" 25 "github.com/pingcap/tiflow/engine/pkg/tenant" 26 "github.com/pingcap/tiflow/engine/test/e2e" 27 "github.com/pingcap/tiflow/pkg/errors" 28 "github.com/pingcap/tiflow/pkg/retry" 29 "github.com/pingcap/tiflow/pkg/util" 30 "go.uber.org/zap" 31 ) 32 33 func runFakeJobCase(ctx context.Context, cfg *config) error { 34 serverMasterEndpoints := []string{cfg.Addr} 35 businessMetaEndpoints := []string{cfg.BusinessMetaAddr} 36 etcdEndpoints := []string{cfg.EtcdAddr} 37 38 jobCfg := &fakejob.Config{ 39 JobName: "fake-job-case", 40 WorkerCount: 8, 41 // use a large enough target tick to ensure the fake job long running 42 TargetTick: 10000000, 43 EtcdWatchEnable: true, 44 EtcdEndpoints: etcdEndpoints, 45 EtcdWatchPrefix: "/fake-job/test/", 46 } 47 e2eCfg := &e2e.FakeJobConfig{ 48 EtcdEndpoints: etcdEndpoints, 49 WorkerCount: jobCfg.WorkerCount, 50 KeyPrefix: jobCfg.EtcdWatchPrefix, 51 } 52 53 cli, err := e2e.NewUTCli(ctx, serverMasterEndpoints, businessMetaEndpoints, 54 tenant.DefaultUserProjectInfo, e2eCfg) 55 if err != nil { 56 return err 57 } 58 59 revision, err := cli.GetRevision(ctx) 60 if err != nil { 61 return err 62 } 63 jobCfg.EtcdStartRevision = revision 64 cfgBytes, err := json.Marshal(jobCfg) 65 if err != nil { 66 return err 67 } 68 69 // retry to create a fake job, since chaos exists, the server master may be 70 // unavailable for sometime. 71 var jobID string 72 err = retry.Do(ctx, func() error { 73 var inErr error 74 jobID, inErr = cli.CreateJob(ctx, pb.Job_FakeJob, cfgBytes) 75 if inErr != nil { 76 log.Error("create fake job failed", zap.Error(inErr)) 77 } 78 return inErr 79 }, 80 retry.WithBackoffBaseDelay(1000 /* 1 second */), 81 retry.WithBackoffMaxDelay(8000 /* 8 seconds */), 82 retry.WithMaxTries(15 /* fail after 103 seconds */), 83 ) 84 if err != nil { 85 return err 86 } 87 88 err = cli.InitializeMetaClient(jobID) 89 if err != nil { 90 return err 91 } 92 93 // update upstream etcd, and check fake job works normally every 60 seconds 94 // run 10 times, about 10 minutes totally. 95 mvcc := 0 96 interval := 60 * time.Second 97 runTime := 10 98 for i := 0; i < runTime; i++ { 99 value := fmt.Sprintf("update-value-index-%d", i) 100 mvcc++ 101 start := time.Now() 102 err := updateKeyAndCheck(ctx, cli, jobID, jobCfg.WorkerCount, value, mvcc) 103 if err != nil { 104 return err 105 } 106 duration := time.Since(start) 107 log.Info("update key and check test", zap.Int("round", i), zap.Duration("duration", duration)) 108 if duration < interval { 109 time.Sleep(time.Until(start.Add(interval))) 110 } 111 } 112 113 log.Info("run fake job case successfully") 114 115 return nil 116 } 117 118 func updateKeyAndCheck( 119 ctx context.Context, cli *e2e.ChaosCli, jobID string, workerCount int, 120 updateValue string, expectedMvcc int, 121 ) error { 122 for i := 0; i < workerCount; i++ { 123 err := cli.UpdateFakeJobKey(ctx, i, updateValue) 124 if err != nil { 125 return err 126 } 127 } 128 // retry 6 minutes at most 129 finished := util.WaitSomething(60, time.Second*6, func() bool { 130 for jobIdx := 0; jobIdx < workerCount; jobIdx++ { 131 err := cli.CheckFakeJobKey(ctx, jobID, jobIdx, expectedMvcc, updateValue) 132 if err != nil { 133 log.Warn("check fail job failed", zap.Error(err)) 134 return false 135 } 136 } 137 return true 138 }) 139 if !finished { 140 return errors.New("wait fake job normally timeout") 141 } 142 return nil 143 }