github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/task/task_scheduler.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package task 16 17 import ( 18 "container/heap" 19 "context" 20 "time" 21 22 "github.com/matrixorigin/matrixone/pkg/common/moerr" 23 "github.com/matrixorigin/matrixone/pkg/common/runtime" 24 "github.com/matrixorigin/matrixone/pkg/hakeeper" 25 "github.com/matrixorigin/matrixone/pkg/pb/logservice" 26 "github.com/matrixorigin/matrixone/pkg/pb/task" 27 "github.com/matrixorigin/matrixone/pkg/taskservice" 28 "go.uber.org/zap" 29 ) 30 31 const ( 32 taskSchedulerDefaultTimeout = 10 * time.Second 33 ) 34 35 type scheduler struct { 36 cfg hakeeper.Config 37 taskServiceGetter func() taskservice.TaskService 38 } 39 40 var _ hakeeper.TaskScheduler = (*scheduler)(nil) 41 42 func NewScheduler(taskServiceGetter func() taskservice.TaskService, cfg hakeeper.Config) hakeeper.TaskScheduler { 43 cfg.Fill() 44 s := &scheduler{ 45 taskServiceGetter: taskServiceGetter, 46 cfg: cfg, 47 } 48 return s 49 } 50 51 func (s *scheduler) Schedule(cnState logservice.CNState, currentTick uint64) { 52 cnPool := newCNPoolWithCNState(cnState) 53 runningTasks := s.queryTasks(task.TaskStatus_Running) 54 createdTasks := s.queryTasks(task.TaskStatus_Created) 55 tasks := append(runningTasks, createdTasks...) 56 57 runtime.ProcessLevelRuntime().Logger().Debug("task schedule query tasks", 58 zap.Int("created", len(createdTasks)), 59 zap.Int("running", len(runningTasks))) 60 if len(tasks) == 0 { 61 return 62 } 63 workingCNPool := cnPool.selectCNs(notExpired(s.cfg, currentTick)) 64 expiredTasks := getExpiredTasks(runningTasks, workingCNPool) 65 runtime.ProcessLevelRuntime().Logger().Info("task schedule query tasks", 66 zap.Int("created", len(createdTasks)), 67 zap.Int("expired", len(expiredTasks))) 68 s.allocateTasks(createdTasks, workingCNPool) 69 s.allocateTasks(expiredTasks, workingCNPool) 70 } 71 72 func (s *scheduler) StartScheduleCronTask() { 73 if ts := s.taskServiceGetter(); ts != nil { 74 ts.StartScheduleCronTask() 75 } 76 } 77 78 func (s *scheduler) StopScheduleCronTask() { 79 if ts := s.taskServiceGetter(); ts != nil { 80 ts.StopScheduleCronTask() 81 } 82 } 83 84 func (s *scheduler) queryTasks(status task.TaskStatus) []task.AsyncTask { 85 ts := s.taskServiceGetter() 86 if ts == nil { 87 runtime.ProcessLevelRuntime().Logger().Error("task service is nil", 88 zap.String("status", status.String())) 89 return nil 90 } 91 ctx, cancel := context.WithTimeout(context.Background(), taskSchedulerDefaultTimeout) 92 defer cancel() 93 94 tasks, err := ts.QueryAsyncTask(ctx, taskservice.WithTaskStatusCond(status)) 95 if err != nil { 96 runtime.ProcessLevelRuntime().Logger().Error("failed to query tasks", 97 zap.String("status", status.String()), 98 zap.Error(err)) 99 return nil 100 } 101 return tasks 102 } 103 104 func (s *scheduler) allocateTasks(tasks []task.AsyncTask, cnPool *cnPool) { 105 ts := s.taskServiceGetter() 106 if ts == nil { 107 return 108 } 109 110 for _, t := range tasks { 111 allocateTask(ts, t, cnPool) 112 } 113 } 114 115 func allocateTask(ts taskservice.TaskService, t task.AsyncTask, cnPool *cnPool) { 116 var rules []rule 117 if len(t.Metadata.Options.Labels) != 0 { 118 rules = make([]rule, 0, len(t.Metadata.Options.Labels)) 119 for key, label := range t.Metadata.Options.Labels { 120 rules = append(rules, containsLabel(key, label)) 121 } 122 123 } 124 if t.Metadata.Options.Resource.GetCPU() > 0 { 125 rules = append(rules, withCPU(t.Metadata.Options.Resource.CPU)) 126 } 127 if t.Metadata.Options.Resource.GetMemory() > 0 { 128 rules = append(rules, withMemory(t.Metadata.Options.Resource.Memory)) 129 } 130 cnPool = cnPool.selectCNs(rules...) 131 runner := cnPool.min() 132 if runner.uuid == "" { 133 runtime.ProcessLevelRuntime().Logger().Error("failed to allocate task", 134 zap.Uint64("task-id", t.ID), 135 zap.String("task-metadata-id", t.Metadata.ID), 136 zap.Error(moerr.NewInternalErrorNoCtx("no CN available"))) 137 return 138 } 139 ctx, cancel := context.WithTimeout(context.Background(), taskSchedulerDefaultTimeout) 140 defer cancel() 141 142 if err := ts.Allocate(ctx, t, runner.uuid); err != nil { 143 runtime.ProcessLevelRuntime().Logger().Error("failed to allocate task", 144 zap.Uint64("task-id", t.ID), 145 zap.String("task-metadata-id", t.Metadata.ID), 146 zap.String("task-runner", runner.uuid), 147 zap.Error(err)) 148 return 149 } 150 runtime.ProcessLevelRuntime().Logger().Info("task allocated", 151 zap.Uint64("task-id", t.ID), 152 zap.String("task-metadata-id", t.Metadata.ID), 153 zap.String("task-runner", runner.uuid)) 154 heap.Push(cnPool, runner) 155 } 156 157 func getExpiredTasks(tasks []task.AsyncTask, workingCNPool *cnPool) []task.AsyncTask { 158 expireCount := 0 159 for _, t := range tasks { 160 if store, ok := workingCNPool.getStore(t.TaskRunner); ok { 161 heap.Push(workingCNPool, store) 162 } else { 163 expireCount++ 164 } 165 } 166 if expireCount == 0 { 167 return nil 168 } 169 expired := make([]task.AsyncTask, 0, expireCount) 170 for _, t := range tasks { 171 if !workingCNPool.contains(t.TaskRunner) { 172 expired = append(expired, t) 173 } 174 } 175 return expired 176 }