github.com/pingcap/tiflow@v0.0.0-20240520035814-5bf52d54e205/engine/executor/fakejob/worker.go (about)

     1  // Copyright 2022 PingCAP, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package fakejob
    15  
    16  import (
    17  	"context"
    18  	"fmt"
    19  	"sync"
    20  	"time"
    21  
    22  	"github.com/pingcap/log"
    23  	"github.com/pingcap/tiflow/engine/framework"
    24  	frameModel "github.com/pingcap/tiflow/engine/framework/model"
    25  	dcontext "github.com/pingcap/tiflow/engine/pkg/context"
    26  	fakejobPkg "github.com/pingcap/tiflow/engine/pkg/fakejob"
    27  	"github.com/pingcap/tiflow/engine/pkg/p2p"
    28  	"github.com/pingcap/tiflow/pkg/errors"
    29  	"go.etcd.io/etcd/api/v3/mvccpb"
    30  	clientv3 "go.etcd.io/etcd/client/v3"
    31  	"go.uber.org/atomic"
    32  	"go.uber.org/zap"
    33  	"golang.org/x/time/rate"
    34  	"google.golang.org/grpc"
    35  )
    36  
    37  var _ framework.Worker = (*dummyWorker)(nil)
    38  
    39  type (
    40  	// Worker is exposed for unit test
    41  	Worker = dummyWorker
    42  
    43  	dummyWorker struct {
    44  		framework.BaseWorker
    45  
    46  		init      bool
    47  		cancel    context.CancelFunc
    48  		status    *fakejobPkg.DummyWorkerStatus
    49  		config    *fakejobPkg.WorkerConfig
    50  		errCh     chan error
    51  		closed    *atomic.Bool
    52  		canceling *atomic.Bool
    53  
    54  		statusRateLimiter *rate.Limiter
    55  
    56  		statusCode struct {
    57  			sync.RWMutex
    58  			code frameModel.WorkerState
    59  		}
    60  
    61  		startTime time.Time
    62  	}
    63  )
    64  
    65  func (d *dummyWorker) InitImpl(_ context.Context) error {
    66  	if !d.init {
    67  		if d.config.EtcdWatchEnable {
    68  			// Don't use the ctx from the caller, because it may be canceled by the caller after InitImpl() returns.
    69  			ctx, cancel := context.WithCancel(context.Background())
    70  			d.bgRunEtcdWatcher(ctx)
    71  			d.cancel = cancel
    72  		}
    73  		d.init = true
    74  		d.setState(frameModel.WorkerStateNormal)
    75  		d.startTime = time.Now()
    76  		return nil
    77  	}
    78  	return errors.New("repeated init")
    79  }
    80  
    81  func (d *dummyWorker) Tick(ctx context.Context) error {
    82  	if !d.init {
    83  		return errors.New("not yet init")
    84  	}
    85  
    86  	select {
    87  	case err := <-d.errCh:
    88  		return err
    89  	default:
    90  	}
    91  
    92  	d.status.DoTick()
    93  
    94  	if d.statusRateLimiter.Allow() {
    95  		log.Info("FakeWorker: Tick", zap.String("worker-id", d.ID()), zap.Int64("tick", d.status.Tick))
    96  		err := d.BaseWorker.UpdateStatus(ctx, d.Status())
    97  		if err != nil {
    98  			if errors.Is(err, errors.ErrWorkerUpdateStatusTryAgain) {
    99  				log.Warn("update status try again later", zap.String("error", err.Error()))
   100  				return nil
   101  			}
   102  			return err
   103  		}
   104  	}
   105  
   106  	if d.closed.Load() {
   107  		return nil
   108  	}
   109  
   110  	extMsg, err := d.status.Marshal()
   111  	if err != nil {
   112  		return err
   113  	}
   114  
   115  	if d.canceling.Load() {
   116  		d.setState(frameModel.WorkerStateStopped)
   117  		return d.Exit(ctx, framework.ExitReasonCanceled, nil, extMsg)
   118  	}
   119  
   120  	if d.status.Tick >= d.config.TargetTick {
   121  		d.setState(frameModel.WorkerStateFinished)
   122  		return d.Exit(ctx, framework.ExitReasonFinished, nil, extMsg)
   123  	}
   124  
   125  	if d.config.InjectErrorInterval != 0 {
   126  		if time.Since(d.startTime) > d.config.InjectErrorInterval {
   127  			return errors.Errorf("injected error by worker: %d", d.config.ID)
   128  		}
   129  	}
   130  	return nil
   131  }
   132  
   133  func (d *dummyWorker) Status() frameModel.WorkerStatus {
   134  	if d.init {
   135  		extBytes, err := d.status.Marshal()
   136  		if err != nil {
   137  			log.Panic("unexpected error", zap.Error(err))
   138  		}
   139  		return frameModel.WorkerStatus{
   140  			State:    d.getState(),
   141  			ExtBytes: extBytes,
   142  		}
   143  	}
   144  	return frameModel.WorkerStatus{State: frameModel.WorkerStateCreated}
   145  }
   146  
   147  func (d *dummyWorker) OnMasterMessage(ctx context.Context, topic p2p.Topic, message p2p.MessageValue) error {
   148  	log.Info("fakeWorker: OnMasterMessage", zap.Any("message", message))
   149  	switch msg := message.(type) {
   150  	case *frameModel.StatusChangeRequest:
   151  		switch msg.ExpectState {
   152  		case frameModel.WorkerStateStopped:
   153  			d.canceling.Store(true)
   154  		default:
   155  			log.Info("FakeWorker: ignore status change state", zap.Int32("state", int32(msg.ExpectState)))
   156  		}
   157  	default:
   158  		log.Info("unsupported message", zap.Any("message", message))
   159  	}
   160  
   161  	return nil
   162  }
   163  
   164  func (d *dummyWorker) CloseImpl(ctx context.Context) {
   165  	if d.closed.CompareAndSwap(false, true) {
   166  		if d.cancel != nil {
   167  			d.cancel()
   168  		}
   169  	}
   170  }
   171  
   172  func (d *dummyWorker) setState(code frameModel.WorkerState) {
   173  	d.statusCode.Lock()
   174  	defer d.statusCode.Unlock()
   175  	d.statusCode.code = code
   176  }
   177  
   178  func (d *dummyWorker) getState() frameModel.WorkerState {
   179  	d.statusCode.RLock()
   180  	defer d.statusCode.RUnlock()
   181  	return d.statusCode.code
   182  }
   183  
   184  func (d *dummyWorker) bgRunEtcdWatcher(ctx context.Context) {
   185  	go func() {
   186  		if err := d.createEtcdWatcher(ctx); err != nil {
   187  			select {
   188  			case d.errCh <- err:
   189  			default:
   190  				log.Warn("duplicated error", zap.Error(err))
   191  			}
   192  		}
   193  	}()
   194  }
   195  
   196  func (d *dummyWorker) createEtcdWatcher(ctx context.Context) error {
   197  	cli, err := clientv3.New(clientv3.Config{
   198  		Endpoints:   d.config.EtcdEndpoints,
   199  		Context:     ctx,
   200  		DialTimeout: 3 * time.Second,
   201  		DialOptions: []grpc.DialOption{},
   202  	})
   203  	if err != nil {
   204  		return errors.Trace(err)
   205  	}
   206  	key := fmt.Sprintf("%s%d", d.config.EtcdWatchPrefix, d.config.ID)
   207  watchLoop:
   208  	for {
   209  		select {
   210  		case <-ctx.Done():
   211  			return errors.Trace(ctx.Err())
   212  		default:
   213  		}
   214  		opts := make([]clientv3.OpOption, 0)
   215  		revision := d.status.GetEtcdCheckpoint().Revision
   216  		if revision > 0 {
   217  			opts = append(opts, clientv3.WithRev(revision+1))
   218  		}
   219  		ch := cli.Watch(clientv3.WithRequireLeader(ctx), key, opts...)
   220  		log.Info("start to watch etcd", zap.String("key", key),
   221  			zap.Int64("revision", revision),
   222  			zap.Strings("endpoints", d.config.EtcdEndpoints))
   223  		for resp := range ch {
   224  			if resp.Err() != nil {
   225  				log.Warn("watch met error", zap.Error(resp.Err()))
   226  				continue watchLoop
   227  			}
   228  			for _, event := range resp.Events {
   229  				// no concurrent write of this checkpoint, so it is safe to read
   230  				// old value, change it and overwrite.
   231  				ckpt := d.status.GetEtcdCheckpoint()
   232  				ckpt.MvccCount++
   233  				ckpt.Revision = event.Kv.ModRevision
   234  				switch event.Type {
   235  				case mvccpb.PUT:
   236  					ckpt.Value = string(event.Kv.Value)
   237  				case mvccpb.DELETE:
   238  					ckpt.Value = ""
   239  				}
   240  				d.status.SetEtcdCheckpoint(&ckpt)
   241  			}
   242  		}
   243  	}
   244  }
   245  
   246  // NewDummyWorker creates a new dummy worker instance
   247  func NewDummyWorker(
   248  	ctx *dcontext.Context,
   249  	id frameModel.WorkerID, masterID frameModel.MasterID,
   250  	wcfg *fakejobPkg.WorkerConfig,
   251  ) framework.WorkerImpl {
   252  	status := &fakejobPkg.DummyWorkerStatus{
   253  		BusinessID: wcfg.ID,
   254  		Tick:       wcfg.Checkpoint.Tick,
   255  		Checkpoint: &fakejobPkg.WorkerCheckpoint{
   256  			Revision:  wcfg.Checkpoint.Revision,
   257  			MvccCount: wcfg.Checkpoint.MvccCount,
   258  			Value:     wcfg.Checkpoint.Value,
   259  		},
   260  	}
   261  	return &dummyWorker{
   262  		statusRateLimiter: rate.NewLimiter(rate.Every(100*time.Millisecond), 1),
   263  		status:            status,
   264  		config:            wcfg,
   265  		errCh:             make(chan error, 1),
   266  		closed:            atomic.NewBool(false),
   267  		canceling:         atomic.NewBool(false),
   268  	}
   269  }