volcano.sh/volcano@v1.9.0/pkg/scheduler/scheduler.go (about)

     1  /*
     2  Copyright 2017 The Kubernetes Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package scheduler
    18  
    19  import (
    20  	"fmt"
    21  	"os"
    22  	"path/filepath"
    23  	"strings"
    24  	"sync"
    25  	"time"
    26  
    27  	"github.com/fsnotify/fsnotify"
    28  	"k8s.io/apimachinery/pkg/util/wait"
    29  	"k8s.io/client-go/rest"
    30  	"k8s.io/klog/v2"
    31  
    32  	"volcano.sh/volcano/cmd/scheduler/app/options"
    33  	"volcano.sh/volcano/pkg/filewatcher"
    34  	schedcache "volcano.sh/volcano/pkg/scheduler/cache"
    35  	"volcano.sh/volcano/pkg/scheduler/conf"
    36  	"volcano.sh/volcano/pkg/scheduler/framework"
    37  	"volcano.sh/volcano/pkg/scheduler/metrics"
    38  )
    39  
    40  // Scheduler represents a "Volcano Scheduler".
    41  // Scheduler watches for new unscheduled pods(PodGroup) in Volcano.
    42  // It attempts to find nodes that can accommodate these pods and writes the binding information back to the API server.
    43  type Scheduler struct {
    44  	cache          schedcache.Cache
    45  	schedulerConf  string
    46  	fileWatcher    filewatcher.FileWatcher
    47  	schedulePeriod time.Duration
    48  	once           sync.Once
    49  
    50  	mutex          sync.Mutex
    51  	actions        []framework.Action
    52  	plugins        []conf.Tier
    53  	configurations []conf.Configuration
    54  	metricsConf    map[string]string
    55  	dumper         schedcache.Dumper
    56  }
    57  
    58  // NewScheduler returns a Scheduler
    59  func NewScheduler(config *rest.Config, opt *options.ServerOption) (*Scheduler, error) {
    60  	var watcher filewatcher.FileWatcher
    61  	if opt.SchedulerConf != "" {
    62  		var err error
    63  		path := filepath.Dir(opt.SchedulerConf)
    64  		watcher, err = filewatcher.NewFileWatcher(path)
    65  		if err != nil {
    66  			return nil, fmt.Errorf("failed creating filewatcher for %s: %v", opt.SchedulerConf, err)
    67  		}
    68  	}
    69  
    70  	cache := schedcache.New(config, opt.SchedulerNames, opt.DefaultQueue, opt.NodeSelector, opt.NodeWorkerThreads, opt.IgnoredCSIProvisioners)
    71  	scheduler := &Scheduler{
    72  		schedulerConf:  opt.SchedulerConf,
    73  		fileWatcher:    watcher,
    74  		cache:          cache,
    75  		schedulePeriod: opt.SchedulePeriod,
    76  		dumper:         schedcache.Dumper{Cache: cache, RootDir: opt.CacheDumpFileDir},
    77  	}
    78  
    79  	return scheduler, nil
    80  }
    81  
    82  // Run initializes and starts the Scheduler. It loads the configuration,
    83  // initializes the cache, and begins the scheduling process.
    84  func (pc *Scheduler) Run(stopCh <-chan struct{}) {
    85  	pc.loadSchedulerConf()
    86  	go pc.watchSchedulerConf(stopCh)
    87  	// Start cache for policy.
    88  	pc.cache.SetMetricsConf(pc.metricsConf)
    89  	pc.cache.Run(stopCh)
    90  	pc.cache.WaitForCacheSync(stopCh)
    91  	klog.V(2).Infof("Scheduler completes Initialization and start to run")
    92  	go wait.Until(pc.runOnce, pc.schedulePeriod, stopCh)
    93  	if options.ServerOpts.EnableCacheDumper {
    94  		pc.dumper.ListenForSignal(stopCh)
    95  	}
    96  	go runSchedulerSocket()
    97  }
    98  
    99  // runOnce executes a single scheduling cycle. This function is called periodically
   100  // as defined by the Scheduler's schedule period.
   101  func (pc *Scheduler) runOnce() {
   102  	klog.V(4).Infof("Start scheduling ...")
   103  	scheduleStartTime := time.Now()
   104  	defer klog.V(4).Infof("End scheduling ...")
   105  
   106  	pc.mutex.Lock()
   107  	actions := pc.actions
   108  	plugins := pc.plugins
   109  	configurations := pc.configurations
   110  	pc.mutex.Unlock()
   111  
   112  	// Load ConfigMap to check which action is enabled.
   113  	conf.EnabledActionMap = make(map[string]bool)
   114  	for _, action := range actions {
   115  		conf.EnabledActionMap[action.Name()] = true
   116  	}
   117  
   118  	ssn := framework.OpenSession(pc.cache, plugins, configurations)
   119  	defer func() {
   120  		framework.CloseSession(ssn)
   121  		metrics.UpdateE2eDuration(metrics.Duration(scheduleStartTime))
   122  	}()
   123  
   124  	for _, action := range actions {
   125  		actionStartTime := time.Now()
   126  		action.Execute(ssn)
   127  		metrics.UpdateActionDuration(action.Name(), metrics.Duration(actionStartTime))
   128  	}
   129  }
   130  
   131  func (pc *Scheduler) loadSchedulerConf() {
   132  	klog.V(4).Infof("Start loadSchedulerConf ...")
   133  	defer func() {
   134  		actions, plugins := pc.getSchedulerConf()
   135  		klog.V(2).Infof("Successfully loaded Scheduler conf, actions: %v, plugins: %v", actions, plugins)
   136  	}()
   137  
   138  	var err error
   139  	pc.once.Do(func() {
   140  		pc.actions, pc.plugins, pc.configurations, pc.metricsConf, err = UnmarshalSchedulerConf(DefaultSchedulerConf)
   141  		if err != nil {
   142  			klog.Errorf("unmarshal Scheduler config %s failed: %v", DefaultSchedulerConf, err)
   143  			panic("invalid default configuration")
   144  		}
   145  	})
   146  
   147  	var config string
   148  	if len(pc.schedulerConf) != 0 {
   149  		confData, err := os.ReadFile(pc.schedulerConf)
   150  		if err != nil {
   151  			klog.Errorf("Failed to read the Scheduler config in '%s', using previous configuration: %v",
   152  				pc.schedulerConf, err)
   153  			return
   154  		}
   155  		config = strings.TrimSpace(string(confData))
   156  	}
   157  
   158  	actions, plugins, configurations, metricsConf, err := UnmarshalSchedulerConf(config)
   159  	if err != nil {
   160  		klog.Errorf("Scheduler config %s is invalid: %v", config, err)
   161  		return
   162  	}
   163  
   164  	pc.mutex.Lock()
   165  	pc.actions = actions
   166  	pc.plugins = plugins
   167  	pc.configurations = configurations
   168  	pc.metricsConf = metricsConf
   169  	pc.mutex.Unlock()
   170  }
   171  
   172  func (pc *Scheduler) getSchedulerConf() (actions []string, plugins []string) {
   173  	for _, action := range pc.actions {
   174  		actions = append(actions, action.Name())
   175  	}
   176  	for _, tier := range pc.plugins {
   177  		for _, plugin := range tier.Plugins {
   178  			plugins = append(plugins, plugin.Name)
   179  		}
   180  	}
   181  	return
   182  }
   183  
   184  func (pc *Scheduler) watchSchedulerConf(stopCh <-chan struct{}) {
   185  	if pc.fileWatcher == nil {
   186  		return
   187  	}
   188  	eventCh := pc.fileWatcher.Events()
   189  	errCh := pc.fileWatcher.Errors()
   190  	for {
   191  		select {
   192  		case event, ok := <-eventCh:
   193  			if !ok {
   194  				return
   195  			}
   196  			klog.V(4).Infof("watch %s event: %v", pc.schedulerConf, event)
   197  			if event.Op&fsnotify.Write == fsnotify.Write || event.Op&fsnotify.Create == fsnotify.Create {
   198  				pc.loadSchedulerConf()
   199  				pc.cache.SetMetricsConf(pc.metricsConf)
   200  			}
   201  		case err, ok := <-errCh:
   202  			if !ok {
   203  				return
   204  			}
   205  			klog.Infof("watch %s error: %v", pc.schedulerConf, err)
   206  		case <-stopCh:
   207  			return
   208  		}
   209  	}
   210  }