github.com/matrixorigin/matrixone@v1.2.0/pkg/hakeeper/task/task_scheduler.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package task
    16  
    17  import (
    18  	"container/heap"
    19  	"context"
    20  	"time"
    21  
    22  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    23  	"github.com/matrixorigin/matrixone/pkg/common/runtime"
    24  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    25  	"github.com/matrixorigin/matrixone/pkg/pb/logservice"
    26  	"github.com/matrixorigin/matrixone/pkg/pb/task"
    27  	"github.com/matrixorigin/matrixone/pkg/taskservice"
    28  	"go.uber.org/zap"
    29  )
    30  
    31  const (
    32  	taskSchedulerDefaultTimeout = 10 * time.Second
    33  )
    34  
    35  type scheduler struct {
    36  	cfg               hakeeper.Config
    37  	taskServiceGetter func() taskservice.TaskService
    38  }
    39  
    40  var _ hakeeper.TaskScheduler = (*scheduler)(nil)
    41  
    42  func NewScheduler(taskServiceGetter func() taskservice.TaskService, cfg hakeeper.Config) hakeeper.TaskScheduler {
    43  	cfg.Fill()
    44  	s := &scheduler{
    45  		taskServiceGetter: taskServiceGetter,
    46  		cfg:               cfg,
    47  	}
    48  	return s
    49  }
    50  
    51  func (s *scheduler) Schedule(cnState logservice.CNState, currentTick uint64) {
    52  	cnPool := newCNPoolWithCNState(cnState)
    53  	runningTasks := s.queryTasks(task.TaskStatus_Running)
    54  	createdTasks := s.queryTasks(task.TaskStatus_Created)
    55  	tasks := append(runningTasks, createdTasks...)
    56  
    57  	runtime.ProcessLevelRuntime().Logger().Debug("task schedule query tasks",
    58  		zap.Int("created", len(createdTasks)),
    59  		zap.Int("running", len(runningTasks)))
    60  	if len(tasks) == 0 {
    61  		return
    62  	}
    63  	workingCNPool := cnPool.selectCNs(notExpired(s.cfg, currentTick))
    64  	expiredTasks := getExpiredTasks(runningTasks, workingCNPool)
    65  	runtime.ProcessLevelRuntime().Logger().Info("task schedule query tasks",
    66  		zap.Int("created", len(createdTasks)),
    67  		zap.Int("expired", len(expiredTasks)))
    68  	s.allocateTasks(createdTasks, workingCNPool)
    69  	s.allocateTasks(expiredTasks, workingCNPool)
    70  }
    71  
    72  func (s *scheduler) StartScheduleCronTask() {
    73  	if ts := s.taskServiceGetter(); ts != nil {
    74  		ts.StartScheduleCronTask()
    75  	}
    76  }
    77  
    78  func (s *scheduler) StopScheduleCronTask() {
    79  	if ts := s.taskServiceGetter(); ts != nil {
    80  		ts.StopScheduleCronTask()
    81  	}
    82  }
    83  
    84  func (s *scheduler) queryTasks(status task.TaskStatus) []task.AsyncTask {
    85  	ts := s.taskServiceGetter()
    86  	if ts == nil {
    87  		runtime.ProcessLevelRuntime().Logger().Error("task service is nil",
    88  			zap.String("status", status.String()))
    89  		return nil
    90  	}
    91  	ctx, cancel := context.WithTimeout(context.Background(), taskSchedulerDefaultTimeout)
    92  	defer cancel()
    93  
    94  	tasks, err := ts.QueryAsyncTask(ctx, taskservice.WithTaskStatusCond(status))
    95  	if err != nil {
    96  		runtime.ProcessLevelRuntime().Logger().Error("failed to query tasks",
    97  			zap.String("status", status.String()),
    98  			zap.Error(err))
    99  		return nil
   100  	}
   101  	return tasks
   102  }
   103  
   104  func (s *scheduler) allocateTasks(tasks []task.AsyncTask, cnPool *cnPool) {
   105  	ts := s.taskServiceGetter()
   106  	if ts == nil {
   107  		return
   108  	}
   109  
   110  	for _, t := range tasks {
   111  		allocateTask(ts, t, cnPool)
   112  	}
   113  }
   114  
   115  func allocateTask(ts taskservice.TaskService, t task.AsyncTask, cnPool *cnPool) {
   116  	var rules []rule
   117  	if len(t.Metadata.Options.Labels) != 0 {
   118  		rules = make([]rule, 0, len(t.Metadata.Options.Labels))
   119  		for key, label := range t.Metadata.Options.Labels {
   120  			rules = append(rules, containsLabel(key, label))
   121  		}
   122  
   123  	}
   124  	if t.Metadata.Options.Resource.GetCPU() > 0 {
   125  		rules = append(rules, withCPU(t.Metadata.Options.Resource.CPU))
   126  	}
   127  	if t.Metadata.Options.Resource.GetMemory() > 0 {
   128  		rules = append(rules, withMemory(t.Metadata.Options.Resource.Memory))
   129  	}
   130  	cnPool = cnPool.selectCNs(rules...)
   131  	runner := cnPool.min()
   132  	if runner.uuid == "" {
   133  		runtime.ProcessLevelRuntime().Logger().Error("failed to allocate task",
   134  			zap.Uint64("task-id", t.ID),
   135  			zap.String("task-metadata-id", t.Metadata.ID),
   136  			zap.Error(moerr.NewInternalErrorNoCtx("no CN available")))
   137  		return
   138  	}
   139  	ctx, cancel := context.WithTimeout(context.Background(), taskSchedulerDefaultTimeout)
   140  	defer cancel()
   141  
   142  	if err := ts.Allocate(ctx, t, runner.uuid); err != nil {
   143  		runtime.ProcessLevelRuntime().Logger().Error("failed to allocate task",
   144  			zap.Uint64("task-id", t.ID),
   145  			zap.String("task-metadata-id", t.Metadata.ID),
   146  			zap.String("task-runner", runner.uuid),
   147  			zap.Error(err))
   148  		return
   149  	}
   150  	runtime.ProcessLevelRuntime().Logger().Info("task allocated",
   151  		zap.Uint64("task-id", t.ID),
   152  		zap.String("task-metadata-id", t.Metadata.ID),
   153  		zap.String("task-runner", runner.uuid))
   154  	heap.Push(cnPool, runner)
   155  }
   156  
   157  func getExpiredTasks(tasks []task.AsyncTask, workingCNPool *cnPool) []task.AsyncTask {
   158  	expireCount := 0
   159  	for _, t := range tasks {
   160  		if store, ok := workingCNPool.getStore(t.TaskRunner); ok {
   161  			heap.Push(workingCNPool, store)
   162  		} else {
   163  			expireCount++
   164  		}
   165  	}
   166  	if expireCount == 0 {
   167  		return nil
   168  	}
   169  	expired := make([]task.AsyncTask, 0, expireCount)
   170  	for _, t := range tasks {
   171  		if !workingCNPool.contains(t.TaskRunner) {
   172  			expired = append(expired, t)
   173  		}
   174  	}
   175  	return expired
   176  }