github.com/polarismesh/polaris@v1.17.8/common/batchjob/batch.go (about)

     1  /**
     2   * Tencent is pleased to support the open source community by making Polaris available.
     3   *
     4   * Copyright (C) 2019 THL A29 Limited, a Tencent company. All rights reserved.
     5   *
     6   * Licensed under the BSD 3-Clause License (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at
     9   *
    10   * https://opensource.org/licenses/BSD-3-Clause
    11   *
    12   * Unless required by applicable law or agreed to in writing, software distributed
    13   * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
    14   * CONDITIONS OF ANY KIND, either express or implied. See the License for the
    15   * specific language governing permissions and limitations under the License.
    16   */
    17  
    18  package batchjob
    19  
    20  import (
    21  	"context"
    22  	"errors"
    23  	"sync"
    24  	"sync/atomic"
    25  	"time"
    26  
    27  	"github.com/prometheus/client_golang/prometheus"
    28  
    29  	"github.com/polarismesh/polaris/common/log"
    30  	"github.com/polarismesh/polaris/common/metrics"
    31  )
    32  
    33  var (
    34  	ErrorBatchControllerStopped = errors.New("batch controller is stopped")
    35  	ErrorSubmitTaskTimeout      = errors.New("submit task into batch controller timeout")
    36  )
    37  
    38  const (
    39  	shutdownNow      = 1
    40  	shutdownGraceful = 2
    41  )
    42  
    43  // BatchController 通用的批任务处理框架
    44  type BatchController struct {
    45  	lock           sync.RWMutex
    46  	stop           int32
    47  	label          string
    48  	conf           CtrlConfig
    49  	handler        func(tasks []Future)
    50  	tasksChan      chan Future
    51  	idleSignal     chan int
    52  	workers        []chan []Future
    53  	cancel         context.CancelFunc
    54  	allWorkersStop chan struct{}
    55  	unfinishJobs   *prometheus.GaugeVec
    56  }
    57  
    58  // NewBatchController 创建一个批任务处理
    59  func NewBatchController(ctx context.Context, conf CtrlConfig) *BatchController {
    60  	ctx, cancel := context.WithCancel(ctx)
    61  	bc := &BatchController{
    62  		label:          conf.Label,
    63  		conf:           conf,
    64  		cancel:         cancel,
    65  		tasksChan:      make(chan Future, conf.QueueSize),
    66  		workers:        make([]chan []Future, 0, conf.Concurrency),
    67  		idleSignal:     make(chan int, conf.Concurrency),
    68  		allWorkersStop: make(chan struct{}),
    69  	}
    70  	bc.handler = func(tasks []Future) {
    71  		defer func() {
    72  			if err := recover(); err != nil {
    73  				log.Errorf("[Batch] %s trigger consumer panic : %+v", conf.Label, err)
    74  			}
    75  		}()
    76  		conf.Handler(tasks)
    77  		metrics.ReportFinishBatchJob(bc.label, int64(len(tasks)))
    78  	}
    79  	bc.runWorkers(ctx)
    80  	bc.mainLoop(ctx)
    81  	return bc
    82  }
    83  
    84  // Submit 提交执行任务参数
    85  func (bc *BatchController) Submit(task Param) Future {
    86  	bc.lock.RLock()
    87  	defer bc.lock.RUnlock()
    88  
    89  	if bc.isStop() {
    90  		return &errorFuture{task: task, err: ErrorBatchControllerStopped}
    91  	}
    92  
    93  	ctx, cancel := context.WithCancel(context.Background())
    94  	f := &future{
    95  		task:      task,
    96  		ctx:       ctx,
    97  		cancel:    cancel,
    98  		setsignal: make(chan struct{}, 1),
    99  	}
   100  	bc.tasksChan <- f
   101  	metrics.ReportAddBatchJob(bc.label, 1)
   102  	return f
   103  }
   104  
   105  func (bc *BatchController) SubmitWithTimeout(task Param, timeout time.Duration) Future {
   106  	bc.lock.RLock()
   107  	defer bc.lock.RUnlock()
   108  
   109  	if bc.isStop() {
   110  		return &errorFuture{task: task, err: ErrorBatchControllerStopped}
   111  	}
   112  
   113  	ctx, cancel := context.WithCancel(context.Background())
   114  	f := &future{
   115  		task:      task,
   116  		ctx:       ctx,
   117  		cancel:    cancel,
   118  		setsignal: make(chan struct{}, 1),
   119  	}
   120  	timer := time.NewTimer(timeout)
   121  	defer timer.Stop()
   122  	select {
   123  	case <-timer.C:
   124  		f.Cancel()
   125  		return &errorFuture{task: task, err: ErrorSubmitTaskTimeout}
   126  	case bc.tasksChan <- f:
   127  		metrics.ReportAddBatchJob(bc.label, 1)
   128  		return f
   129  	}
   130  }
   131  
   132  func (bc *BatchController) runWorkers(ctx context.Context) {
   133  	wait := &sync.WaitGroup{}
   134  	wait.Add(int(bc.conf.Concurrency))
   135  
   136  	for i := uint32(0); i < bc.conf.Concurrency; i++ {
   137  		index := i
   138  		bc.workers = append(bc.workers, make(chan []Future))
   139  		go func(index uint32) {
   140  			log.Infof("[Batch] %s worker(%d) running in main loop", bc.label, index)
   141  			bc.workerLoop(ctx, int(index), wait)
   142  		}(index)
   143  	}
   144  
   145  	go func() {
   146  		wait.Wait()
   147  		log.Infof("[Batch] %s close idle worker signal", bc.label)
   148  		close(bc.idleSignal)
   149  		close(bc.allWorkersStop)
   150  	}()
   151  }
   152  
   153  func (bc *BatchController) workerLoop(ctx context.Context, index int, wait *sync.WaitGroup) {
   154  	stopFunc := func() {
   155  		defer wait.Done()
   156  		switch atomic.LoadInt32(&bc.stop) {
   157  		case shutdownGraceful:
   158  			replied := 0
   159  			for futures := range bc.workers[index] {
   160  				replied += len(futures)
   161  				bc.handler(futures)
   162  				bc.idleSignal <- int(index)
   163  			}
   164  			log.Infof("[Batch] %s worker(%d) exit, handle future count: %d", bc.label, index, replied)
   165  		case shutdownNow:
   166  			stopped := 0
   167  			for futures := range bc.workers[index] {
   168  				replyStoppedFutures(futures...)
   169  				stopped += len(futures)
   170  			}
   171  			log.Infof("[Batch] %s worker(%d) exit, reply stop msg to future count: %d", bc.label, index, stopped)
   172  		}
   173  	}
   174  
   175  	bc.idleSignal <- index
   176  	for {
   177  		select {
   178  		case <-ctx.Done():
   179  			stopFunc()
   180  			return
   181  		case futures := <-bc.workers[index]:
   182  			bc.handler(futures)
   183  			bc.idleSignal <- index
   184  		}
   185  	}
   186  }
   187  
   188  func (bc *BatchController) mainLoop(ctx context.Context) {
   189  	go func() {
   190  		futures := make([]Future, 0, bc.conf.MaxBatchCount)
   191  		triggerConsume := func(data []Future) {
   192  			if len(data) == 0 {
   193  				return
   194  			}
   195  			idleIndex := <-bc.idleSignal
   196  			bc.workers[idleIndex] <- data
   197  			futures = make([]Future, 0, bc.conf.MaxBatchCount)
   198  		}
   199  
   200  		stopFunc := func() {
   201  			close(bc.tasksChan)
   202  			log.Debugf("[Batch] %s begin close task chan", bc.label)
   203  			switch atomic.LoadInt32(&bc.stop) {
   204  			case shutdownGraceful:
   205  				triggerConsume(futures)
   206  				for future := range bc.tasksChan {
   207  					futures = append(futures, future)
   208  					if len(futures) == int(bc.conf.MaxBatchCount) {
   209  						triggerConsume(futures)
   210  					}
   211  				}
   212  				// 最后触发兜底
   213  				triggerConsume(futures)
   214  				for i := range bc.workers {
   215  					close(bc.workers[i])
   216  				}
   217  			case shutdownNow:
   218  				log.Debugf("[Batch] %s begin close worker loop", bc.label)
   219  				for i := range bc.workers {
   220  					close(bc.workers[i])
   221  				}
   222  				stopped := len(futures)
   223  				replyStoppedFutures(futures...)
   224  				for future := range bc.tasksChan {
   225  					replyStoppedFutures(future)
   226  					stopped++
   227  				}
   228  				log.Debugf("[Batch] %s do reply stop msg to future count: %d", bc.label, stopped)
   229  			}
   230  			<-bc.allWorkersStop
   231  			log.Infof("[Batch] %s main loop exited", bc.label)
   232  		}
   233  
   234  		log.Infof("[Batch] %s running main loop", bc.label)
   235  		ticker := time.NewTicker(bc.conf.WaitTime)
   236  		defer ticker.Stop()
   237  		for {
   238  			select {
   239  			case <-ctx.Done():
   240  				bc.lock.Lock()
   241  				defer bc.lock.Unlock()
   242  				stopFunc()
   243  				return
   244  			case <-ticker.C:
   245  				triggerConsume(futures)
   246  			case future := <-bc.tasksChan:
   247  				futures = append(futures, future)
   248  				if len(futures) == int(bc.conf.MaxBatchCount) {
   249  					triggerConsume(futures)
   250  				}
   251  			}
   252  		}
   253  	}()
   254  }
   255  
   256  // Stop 关闭批任务执行器
   257  func (bc *BatchController) Stop() {
   258  	bc.lock.Lock()
   259  	defer bc.lock.Unlock()
   260  	log.Infof("[Batch] %s begin do stop", bc.label)
   261  	atomic.StoreInt32(&bc.stop, shutdownNow)
   262  	bc.cancel()
   263  }
   264  
   265  // Stop 关闭批任务执行器
   266  func (bc *BatchController) GracefulStop() {
   267  	bc.lock.Lock()
   268  	defer bc.lock.Unlock()
   269  	atomic.StoreInt32(&bc.stop, shutdownGraceful)
   270  	bc.cancel()
   271  }
   272  
   273  func (bc *BatchController) isStop() bool {
   274  	return atomic.LoadInt32(&bc.stop) == 1 || atomic.LoadInt32(&bc.stop) == shutdownGraceful
   275  }
   276  
   277  func replyStoppedFutures(futures ...Future) {
   278  	for i := range futures {
   279  		_ = futures[i].Reply(nil, ErrorBatchControllerStopped)
   280  	}
   281  }