github.com/matrixorigin/matrixone@v0.7.0/pkg/taskservice/task_service_cron.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package taskservice 16 17 import ( 18 "context" 19 "fmt" 20 "sync" 21 "time" 22 23 "github.com/matrixorigin/matrixone/pkg/common/stopper" 24 "github.com/matrixorigin/matrixone/pkg/logutil" 25 "github.com/matrixorigin/matrixone/pkg/pb/task" 26 "github.com/robfig/cron/v3" 27 "go.uber.org/zap" 28 ) 29 30 var ( 31 fetchInterval = time.Second * 3 32 retryInterval = time.Second * 10 33 ) 34 35 func (s *taskService) StartScheduleCronTask() { 36 s.crons.Lock() 37 defer s.crons.Unlock() 38 39 if s.crons.started || s.crons.stopping { 40 return 41 } 42 43 s.crons.started = true 44 s.crons.stopper = stopper.NewStopper("cronTasks") 45 s.crons.jobs = make(map[uint64]*cronJob) 46 s.crons.entryIDs = make(map[uint64]cron.EntryID) 47 s.crons.retryC = make(chan task.CronTask, 256) 48 s.crons.cron = cron.New(cron.WithParser(s.cronParser), cron.WithLogger(logutil.GetCronLogger(false))) 49 s.crons.cron.Start() 50 if err := s.crons.stopper.RunTask(s.fetchCronTasks); err != nil { 51 panic(err) 52 } 53 if err := s.crons.stopper.RunTask(s.retryTriggerCronTask); err != nil { 54 panic(err) 55 } 56 } 57 58 func (s *taskService) StopScheduleCronTask() { 59 s.crons.Lock() 60 if !s.crons.started { 61 s.crons.Unlock() 62 return 63 } 64 stopper := s.crons.stopper 65 s.crons.started = false 66 s.crons.stopping = true 67 s.crons.stopper = nil 68 s.crons.Unlock() 69 70 stopper.Stop() 71 72 s.crons.Lock() 73 defer s.crons.Unlock() 74 if s.crons.started { 75 panic("StartScheduleCronTask and StopScheduleCronTask can not concurrently invoked") 76 } 77 <-s.crons.cron.Stop().Done() 78 close(s.crons.retryC) 79 s.crons.jobs = nil 80 s.crons.entryIDs = nil 81 s.crons.stopping = false 82 } 83 84 func (s *taskService) fetchCronTasks(ctx context.Context) { 85 timer := time.NewTimer(fetchInterval) 86 defer timer.Stop() 87 88 for { 89 select { 90 case <-ctx.Done(): 91 return 92 case <-timer.C: 93 c, cancel := context.WithTimeout(ctx, time.Second*10) 94 tasks, err := s.QueryCronTask(c) 95 cancel() 96 if err != nil { 97 s.rt.Logger().Error("query cron tasks failed", 98 zap.Error(err)) 99 break 100 } 101 102 currentTasks := make(map[uint64]task.CronTask) 103 for _, cronTask := range tasks { 104 currentTasks[cronTask.ID] = cronTask 105 } 106 107 s.crons.Lock() 108 s.rt.Logger().Info("new cron tasks fetched", 109 zap.Int("current-count", len(s.crons.jobs)), 110 zap.Int("fetch-count", len(tasks))) 111 112 // add new cron tasks to cron scheduler 113 for id, v := range currentTasks { 114 if _, ok := s.crons.jobs[id]; !ok { 115 s.scheduleCronTaskLocked(v) 116 } 117 } 118 119 // remove deleted cron tasks 120 var removedTasks []uint64 121 for id, v := range s.crons.jobs { 122 if _, ok := currentTasks[id]; !ok { 123 removedTasks = append(removedTasks, id) 124 v.close() 125 } 126 } 127 for _, id := range removedTasks { 128 s.removeCronTaskLocked(id) 129 } 130 s.crons.Unlock() 131 } 132 timer.Reset(fetchInterval) 133 } 134 } 135 136 // retryTriggerCronTask when a cron reaches its trigger time, the CronTask creates a task and hands 137 // it to the scheduler for execution. When the creation of a task fails, it goes to the retry queue. 138 func (s *taskService) retryTriggerCronTask(ctx context.Context) { 139 timer := time.NewTimer(retryInterval) 140 defer timer.Stop() 141 142 handle := func() { 143 for { 144 select { 145 case cronTask := <-s.crons.retryC: 146 s.retryScheduleCronTask(cronTask) 147 default: 148 return 149 } 150 } 151 } 152 153 for { 154 select { 155 case <-ctx.Done(): 156 return 157 case <-timer.C: 158 handle() 159 } 160 timer.Reset(retryInterval) 161 } 162 } 163 164 func (s *taskService) scheduleCronTaskLocked(task task.CronTask) { 165 job := newCronJob(task, s) 166 if time.Now().After(time.UnixMilli(task.NextTime)) { 167 s.rt.Logger().Info("cron task triggered", 168 zap.String("cause", "now > next"), 169 zap.String("task", task.DebugString())) 170 if err := s.crons.stopper.RunTask(func(ctx context.Context) { job.run() }); err != nil { 171 panic(err) 172 } 173 } 174 175 id, err := s.crons.cron.AddFunc(task.CronExpr, job.run) 176 if err != nil { 177 panic(err) 178 } 179 s.crons.entryIDs[task.ID] = id 180 s.crons.jobs[task.ID] = job 181 } 182 183 func (s *taskService) retryScheduleCronTask(task task.CronTask) { 184 s.crons.Lock() 185 defer s.crons.Unlock() 186 187 if !s.crons.started { 188 return 189 } 190 191 if job, ok := s.crons.jobs[task.ID]; ok { 192 if err := s.crons.stopper.RunTask(func(ctx context.Context) { job.retryRun(task) }); err != nil { 193 panic(err) 194 } 195 } 196 } 197 198 func (s *taskService) removeCronTaskLocked(id uint64) { 199 s.crons.cron.Remove(s.crons.entryIDs[id]) 200 delete(s.crons.entryIDs, id) 201 delete(s.crons.jobs, id) 202 } 203 204 func (s *taskService) addToRetrySchedule(task task.CronTask) { 205 s.crons.Lock() 206 defer s.crons.Unlock() 207 208 if !s.crons.started { 209 return 210 } 211 212 s.crons.retryC <- task 213 } 214 215 func newTaskFromMetadata(metadata task.TaskMetadata) task.Task { 216 return task.Task{ 217 Metadata: metadata, 218 Status: task.TaskStatus_Created, 219 CreateAt: time.Now().UnixMilli(), 220 } 221 } 222 223 type cronJob struct { 224 sync.Mutex 225 closed bool 226 s *taskService 227 schedule cron.Schedule 228 task task.CronTask 229 version uint64 230 } 231 232 func newCronJob(task task.CronTask, s *taskService) *cronJob { 233 schedule, err := s.cronParser.Parse(task.CronExpr) 234 if err != nil { 235 panic(err) 236 } 237 return &cronJob{ 238 schedule: schedule, 239 task: task, 240 version: 0, 241 s: s, 242 } 243 } 244 245 func (j *cronJob) close() { 246 j.Lock() 247 defer j.Unlock() 248 249 j.closed = true 250 } 251 252 func (j *cronJob) retryRun(task task.CronTask) { 253 j.Lock() 254 defer j.Unlock() 255 256 if j.task.NextTime != task.NextTime { 257 return 258 } 259 260 j.doRun() 261 } 262 263 func (j *cronJob) run() { 264 j.Lock() 265 defer j.Unlock() 266 267 if j.task.Metadata.Options.Concurrency != 0 { 268 ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) 269 defer cancel() 270 271 queryTask, err := j.s.QueryTask(ctx, 272 WithTaskStatusCond(EQ, task.TaskStatus_Running), 273 WithTaskExecutorCond(EQ, j.task.Metadata.Executor)) 274 if err != nil || 275 uint32(len(queryTask)) >= j.task.Metadata.Options.Concurrency { 276 j.s.rt.Logger().Info("cron task not triggered", 277 zap.String("cause", "reach max concurrency"), 278 zap.String("task", j.task.DebugString())) 279 return 280 } 281 } 282 283 j.s.rt.Logger().Info("cron task triggered", 284 zap.String("cause", "normal"), 285 zap.String("task", j.task.DebugString())) 286 j.doRun() 287 } 288 289 func (j *cronJob) doRun() { 290 if j.closed { 291 return 292 } 293 294 now := time.Now() 295 next := j.schedule.Next(now) 296 newTask := j.task 297 newTask.NextTime = next.UnixMilli() 298 newTask.UpdateAt = now.UnixMilli() 299 300 ctx, cancel := context.WithTimeout(context.Background(), time.Second*10) 301 defer cancel() 302 303 newTask.TriggerTimes++ 304 value := newTaskFromMetadata(newTask.Metadata) 305 value.ParentTaskID = value.Metadata.ID 306 value.Metadata.ID = fmt.Sprintf("%s:%d", value.ParentTaskID, newTask.TriggerTimes) 307 308 _, err := j.s.store.UpdateCronTask(ctx, newTask, value) 309 if err != nil { 310 j.s.rt.Logger().Error("trigger cron task failed", 311 zap.String("cron-task", j.task.Metadata.ID), 312 zap.Error(err)) 313 j.s.addToRetrySchedule(j.task) 314 return 315 } 316 j.task = newTask 317 }