github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/chaos/cases/case_fake_job.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package main
    15  
    16  import (
    17  	"context"
    18  	"encoding/json"
    19  	"fmt"
    20  	"time"
    21  
    22  	"github.com/pingcap/log"
    23  	pb "github.com/pingcap/tiflow/engine/enginepb"
    24  	"github.com/pingcap/tiflow/engine/jobmaster/fakejob"
    25  	"github.com/pingcap/tiflow/engine/pkg/tenant"
    26  	"github.com/pingcap/tiflow/engine/test/e2e"
    27  	"github.com/pingcap/tiflow/pkg/errors"
    28  	"github.com/pingcap/tiflow/pkg/retry"
    29  	"github.com/pingcap/tiflow/pkg/util"
    30  	"go.uber.org/zap"
    31  )
    32  
    33  func runFakeJobCase(ctx context.Context, cfg *config) error {
    34  	serverMasterEndpoints := []string{cfg.Addr}
    35  	businessMetaEndpoints := []string{cfg.BusinessMetaAddr}
    36  	etcdEndpoints := []string{cfg.EtcdAddr}
    37  
    38  	jobCfg := &fakejob.Config{
    39  		JobName:     "fake-job-case",
    40  		WorkerCount: 8,
    41  		// use a large enough target tick to ensure the fake job long running
    42  		TargetTick:      10000000,
    43  		EtcdWatchEnable: true,
    44  		EtcdEndpoints:   etcdEndpoints,
    45  		EtcdWatchPrefix: "/fake-job/test/",
    46  	}
    47  	e2eCfg := &e2e.FakeJobConfig{
    48  		EtcdEndpoints: etcdEndpoints,
    49  		WorkerCount:   jobCfg.WorkerCount,
    50  		KeyPrefix:     jobCfg.EtcdWatchPrefix,
    51  	}
    52  
    53  	cli, err := e2e.NewUTCli(ctx, serverMasterEndpoints, businessMetaEndpoints,
    54  		tenant.DefaultUserProjectInfo, e2eCfg)
    55  	if err != nil {
    56  		return err
    57  	}
    58  
    59  	revision, err := cli.GetRevision(ctx)
    60  	if err != nil {
    61  		return err
    62  	}
    63  	jobCfg.EtcdStartRevision = revision
    64  	cfgBytes, err := json.Marshal(jobCfg)
    65  	if err != nil {
    66  		return err
    67  	}
    68  
    69  	// retry to create a fake job, since chaos exists, the server master may be
    70  	// unavailable for sometime.
    71  	var jobID string
    72  	err = retry.Do(ctx, func() error {
    73  		var inErr error
    74  		jobID, inErr = cli.CreateJob(ctx, pb.Job_FakeJob, cfgBytes)
    75  		if inErr != nil {
    76  			log.Error("create fake job failed", zap.Error(inErr))
    77  		}
    78  		return inErr
    79  	},
    80  		retry.WithBackoffBaseDelay(1000 /* 1 second */),
    81  		retry.WithBackoffMaxDelay(8000 /* 8 seconds */),
    82  		retry.WithMaxTries(15 /* fail after 103 seconds */),
    83  	)
    84  	if err != nil {
    85  		return err
    86  	}
    87  
    88  	err = cli.InitializeMetaClient(jobID)
    89  	if err != nil {
    90  		return err
    91  	}
    92  
    93  	// update upstream etcd, and check fake job works normally every 60 seconds
    94  	// run 10 times, about 10 minutes totally.
    95  	mvcc := 0
    96  	interval := 60 * time.Second
    97  	runTime := 10
    98  	for i := 0; i < runTime; i++ {
    99  		value := fmt.Sprintf("update-value-index-%d", i)
   100  		mvcc++
   101  		start := time.Now()
   102  		err := updateKeyAndCheck(ctx, cli, jobID, jobCfg.WorkerCount, value, mvcc)
   103  		if err != nil {
   104  			return err
   105  		}
   106  		duration := time.Since(start)
   107  		log.Info("update key and check test", zap.Int("round", i), zap.Duration("duration", duration))
   108  		if duration < interval {
   109  			time.Sleep(time.Until(start.Add(interval)))
   110  		}
   111  	}
   112  
   113  	log.Info("run fake job case successfully")
   114  
   115  	return nil
   116  }
   117  
   118  func updateKeyAndCheck(
   119  	ctx context.Context, cli *e2e.ChaosCli, jobID string, workerCount int,
   120  	updateValue string, expectedMvcc int,
   121  ) error {
   122  	for i := 0; i < workerCount; i++ {
   123  		err := cli.UpdateFakeJobKey(ctx, i, updateValue)
   124  		if err != nil {
   125  			return err
   126  		}
   127  	}
   128  	// retry 6 minutes at most
   129  	finished := util.WaitSomething(60, time.Second*6, func() bool {
   130  		for jobIdx := 0; jobIdx < workerCount; jobIdx++ {
   131  			err := cli.CheckFakeJobKey(ctx, jobID, jobIdx, expectedMvcc, updateValue)
   132  			if err != nil {
   133  				log.Warn("check fail job failed", zap.Error(err))
   134  				return false
   135  			}
   136  		}
   137  		return true
   138  	})
   139  	if !finished {
   140  		return errors.New("wait fake job normally timeout")
   141  	}
   142  	return nil
   143  }