github.com/matrixorigin/matrixone@v0.7.0/pkg/taskservice/task_service_cron.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package taskservice
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"sync"
    21  	"time"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/common/stopper"
    24  	"github.com/matrixorigin/matrixone/pkg/logutil"
    25  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    26  	"github.com/robfig/cron/v3"
    27  	"go.uber.org/zap"
    28  )
    29  
    30  var (
    31  	fetchInterval = time.Second * 3
    32  	retryInterval = time.Second * 10
    33  )
    34  
    35  func (s *taskService) StartScheduleCronTask() {
    36  	s.crons.Lock()
    37  	defer s.crons.Unlock()
    38  
    39  	if s.crons.started || s.crons.stopping {
    40  		return
    41  	}
    42  
    43  	s.crons.started = true
    44  	s.crons.stopper = stopper.NewStopper("cronTasks")
    45  	s.crons.jobs = make(map[uint64]*cronJob)
    46  	s.crons.entryIDs = make(map[uint64]cron.EntryID)
    47  	s.crons.retryC = make(chan task.CronTask, 256)
    48  	s.crons.cron = cron.New(cron.WithParser(s.cronParser), cron.WithLogger(logutil.GetCronLogger(false)))
    49  	s.crons.cron.Start()
    50  	if err := s.crons.stopper.RunTask(s.fetchCronTasks); err != nil {
    51  		panic(err)
    52  	}
    53  	if err := s.crons.stopper.RunTask(s.retryTriggerCronTask); err != nil {
    54  		panic(err)
    55  	}
    56  }
    57  
    58  func (s *taskService) StopScheduleCronTask() {
    59  	s.crons.Lock()
    60  	if !s.crons.started {
    61  		s.crons.Unlock()
    62  		return
    63  	}
    64  	stopper := s.crons.stopper
    65  	s.crons.started = false
    66  	s.crons.stopping = true
    67  	s.crons.stopper = nil
    68  	s.crons.Unlock()
    69  
    70  	stopper.Stop()
    71  
    72  	s.crons.Lock()
    73  	defer s.crons.Unlock()
    74  	if s.crons.started {
    75  		panic("StartScheduleCronTask and StopScheduleCronTask can not concurrently invoked")
    76  	}
    77  	<-s.crons.cron.Stop().Done()
    78  	close(s.crons.retryC)
    79  	s.crons.jobs = nil
    80  	s.crons.entryIDs = nil
    81  	s.crons.stopping = false
    82  }
    83  
    84  func (s *taskService) fetchCronTasks(ctx context.Context) {
    85  	timer := time.NewTimer(fetchInterval)
    86  	defer timer.Stop()
    87  
    88  	for {
    89  		select {
    90  		case <-ctx.Done():
    91  			return
    92  		case <-timer.C:
    93  			c, cancel := context.WithTimeout(ctx, time.Second*10)
    94  			tasks, err := s.QueryCronTask(c)
    95  			cancel()
    96  			if err != nil {
    97  				s.rt.Logger().Error("query cron tasks failed",
    98  					zap.Error(err))
    99  				break
   100  			}
   101  
   102  			currentTasks := make(map[uint64]task.CronTask)
   103  			for _, cronTask := range tasks {
   104  				currentTasks[cronTask.ID] = cronTask
   105  			}
   106  
   107  			s.crons.Lock()
   108  			s.rt.Logger().Info("new cron tasks fetched",
   109  				zap.Int("current-count", len(s.crons.jobs)),
   110  				zap.Int("fetch-count", len(tasks)))
   111  
   112  			// add new cron tasks to cron scheduler
   113  			for id, v := range currentTasks {
   114  				if _, ok := s.crons.jobs[id]; !ok {
   115  					s.scheduleCronTaskLocked(v)
   116  				}
   117  			}
   118  
   119  			// remove deleted cron tasks
   120  			var removedTasks []uint64
   121  			for id, v := range s.crons.jobs {
   122  				if _, ok := currentTasks[id]; !ok {
   123  					removedTasks = append(removedTasks, id)
   124  					v.close()
   125  				}
   126  			}
   127  			for _, id := range removedTasks {
   128  				s.removeCronTaskLocked(id)
   129  			}
   130  			s.crons.Unlock()
   131  		}
   132  		timer.Reset(fetchInterval)
   133  	}
   134  }
   135  
   136  // retryTriggerCronTask when a cron reaches its trigger time, the CronTask creates a task and hands
   137  // it to the scheduler for execution. When the creation of a task fails, it goes to the retry queue.
   138  func (s *taskService) retryTriggerCronTask(ctx context.Context) {
   139  	timer := time.NewTimer(retryInterval)
   140  	defer timer.Stop()
   141  
   142  	handle := func() {
   143  		for {
   144  			select {
   145  			case cronTask := <-s.crons.retryC:
   146  				s.retryScheduleCronTask(cronTask)
   147  			default:
   148  				return
   149  			}
   150  		}
   151  	}
   152  
   153  	for {
   154  		select {
   155  		case <-ctx.Done():
   156  			return
   157  		case <-timer.C:
   158  			handle()
   159  		}
   160  		timer.Reset(retryInterval)
   161  	}
   162  }
   163  
   164  func (s *taskService) scheduleCronTaskLocked(task task.CronTask) {
   165  	job := newCronJob(task, s)
   166  	if time.Now().After(time.UnixMilli(task.NextTime)) {
   167  		s.rt.Logger().Info("cron task triggered",
   168  			zap.String("cause", "now > next"),
   169  			zap.String("task", task.DebugString()))
   170  		if err := s.crons.stopper.RunTask(func(ctx context.Context) { job.run() }); err != nil {
   171  			panic(err)
   172  		}
   173  	}
   174  
   175  	id, err := s.crons.cron.AddFunc(task.CronExpr, job.run)
   176  	if err != nil {
   177  		panic(err)
   178  	}
   179  	s.crons.entryIDs[task.ID] = id
   180  	s.crons.jobs[task.ID] = job
   181  }
   182  
   183  func (s *taskService) retryScheduleCronTask(task task.CronTask) {
   184  	s.crons.Lock()
   185  	defer s.crons.Unlock()
   186  
   187  	if !s.crons.started {
   188  		return
   189  	}
   190  
   191  	if job, ok := s.crons.jobs[task.ID]; ok {
   192  		if err := s.crons.stopper.RunTask(func(ctx context.Context) { job.retryRun(task) }); err != nil {
   193  			panic(err)
   194  		}
   195  	}
   196  }
   197  
   198  func (s *taskService) removeCronTaskLocked(id uint64) {
   199  	s.crons.cron.Remove(s.crons.entryIDs[id])
   200  	delete(s.crons.entryIDs, id)
   201  	delete(s.crons.jobs, id)
   202  }
   203  
   204  func (s *taskService) addToRetrySchedule(task task.CronTask) {
   205  	s.crons.Lock()
   206  	defer s.crons.Unlock()
   207  
   208  	if !s.crons.started {
   209  		return
   210  	}
   211  
   212  	s.crons.retryC <- task
   213  }
   214  
   215  func newTaskFromMetadata(metadata task.TaskMetadata) task.Task {
   216  	return task.Task{
   217  		Metadata: metadata,
   218  		Status:   task.TaskStatus_Created,
   219  		CreateAt: time.Now().UnixMilli(),
   220  	}
   221  }
   222  
   223  type cronJob struct {
   224  	sync.Mutex
   225  	closed   bool
   226  	s        *taskService
   227  	schedule cron.Schedule
   228  	task     task.CronTask
   229  	version  uint64
   230  }
   231  
   232  func newCronJob(task task.CronTask, s *taskService) *cronJob {
   233  	schedule, err := s.cronParser.Parse(task.CronExpr)
   234  	if err != nil {
   235  		panic(err)
   236  	}
   237  	return &cronJob{
   238  		schedule: schedule,
   239  		task:     task,
   240  		version:  0,
   241  		s:        s,
   242  	}
   243  }
   244  
   245  func (j *cronJob) close() {
   246  	j.Lock()
   247  	defer j.Unlock()
   248  
   249  	j.closed = true
   250  }
   251  
   252  func (j *cronJob) retryRun(task task.CronTask) {
   253  	j.Lock()
   254  	defer j.Unlock()
   255  
   256  	if j.task.NextTime != task.NextTime {
   257  		return
   258  	}
   259  
   260  	j.doRun()
   261  }
   262  
   263  func (j *cronJob) run() {
   264  	j.Lock()
   265  	defer j.Unlock()
   266  
   267  	if j.task.Metadata.Options.Concurrency != 0 {
   268  		ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
   269  		defer cancel()
   270  
   271  		queryTask, err := j.s.QueryTask(ctx,
   272  			WithTaskStatusCond(EQ, task.TaskStatus_Running),
   273  			WithTaskExecutorCond(EQ, j.task.Metadata.Executor))
   274  		if err != nil ||
   275  			uint32(len(queryTask)) >= j.task.Metadata.Options.Concurrency {
   276  			j.s.rt.Logger().Info("cron task not triggered",
   277  				zap.String("cause", "reach max concurrency"),
   278  				zap.String("task", j.task.DebugString()))
   279  			return
   280  		}
   281  	}
   282  
   283  	j.s.rt.Logger().Info("cron task triggered",
   284  		zap.String("cause", "normal"),
   285  		zap.String("task", j.task.DebugString()))
   286  	j.doRun()
   287  }
   288  
   289  func (j *cronJob) doRun() {
   290  	if j.closed {
   291  		return
   292  	}
   293  
   294  	now := time.Now()
   295  	next := j.schedule.Next(now)
   296  	newTask := j.task
   297  	newTask.NextTime = next.UnixMilli()
   298  	newTask.UpdateAt = now.UnixMilli()
   299  
   300  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
   301  	defer cancel()
   302  
   303  	newTask.TriggerTimes++
   304  	value := newTaskFromMetadata(newTask.Metadata)
   305  	value.ParentTaskID = value.Metadata.ID
   306  	value.Metadata.ID = fmt.Sprintf("%s:%d", value.ParentTaskID, newTask.TriggerTimes)
   307  
   308  	_, err := j.s.store.UpdateCronTask(ctx, newTask, value)
   309  	if err != nil {
   310  		j.s.rt.Logger().Error("trigger cron task failed",
   311  			zap.String("cron-task", j.task.Metadata.ID),
   312  			zap.Error(err))
   313  		j.s.addToRetrySchedule(j.task)
   314  		return
   315  	}
   316  	j.task = newTask
   317  }