github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/sysadvisor/sysadvisor.go (about)

     1  /*
     2  Copyright 2022 The Katalyst Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package sysadvisor
    18  
    19  import (
    20  	"context"
    21  	"fmt"
    22  	"sync"
    23  	"time"
    24  
    25  	"k8s.io/apimachinery/pkg/util/sets"
    26  	"k8s.io/klog/v2"
    27  
    28  	"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache"
    29  	pkgplugin "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin"
    30  	"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference"
    31  	metacacheplugin "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/metacache"
    32  	metricemitter "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/metric-emitter"
    33  	"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/overcommitmentaware"
    34  	"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware"
    35  	"github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types"
    36  	"github.com/kubewharf/katalyst-core/pkg/config"
    37  	"github.com/kubewharf/katalyst-core/pkg/metaserver"
    38  	metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool"
    39  	"github.com/kubewharf/katalyst-core/pkg/util/general"
    40  )
    41  
    42  const initTimeout = 10 * time.Second
    43  
    44  func init() {
    45  	pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameQoSAware, qosaware.NewQoSAwarePlugin)
    46  	pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameMetaCache, metacacheplugin.NewMetaCachePlugin)
    47  	pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameMetricEmitter, metricemitter.NewCustomMetricEmitter)
    48  	pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameInference, inference.NewInferencePlugin)
    49  	pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameOvercommitAware, overcommitmentaware.NewOvercommitmentAwarePlugin)
    50  }
    51  
    52  // AdvisorAgent for sysadvisor
    53  type AdvisorAgent struct {
    54  	// those are parameters that be passed to sysadvisor when starting agents.
    55  	config     *config.Configuration
    56  	extraConf  interface{}
    57  	metaServer *metaserver.MetaServer
    58  	emitPool   metricspool.MetricsEmitterPool
    59  
    60  	plugins      []pkgplugin.SysAdvisorPlugin
    61  	pluginsToRun []pkgplugin.SysAdvisorPlugin
    62  
    63  	wgInitPlugin sync.WaitGroup
    64  	mutex        sync.Mutex
    65  }
    66  
    67  // NewAdvisorAgent initializes the sysadvisor agent logic.
    68  func NewAdvisorAgent(conf *config.Configuration, extraConf interface{}, metaServer *metaserver.MetaServer,
    69  	emitPool metricspool.MetricsEmitterPool,
    70  ) (*AdvisorAgent, error) {
    71  	agent := &AdvisorAgent{
    72  		config:     conf,
    73  		extraConf:  extraConf,
    74  		metaServer: metaServer,
    75  		emitPool:   emitPool,
    76  
    77  		plugins:      make([]pkgplugin.SysAdvisorPlugin, 0),
    78  		pluginsToRun: make([]pkgplugin.SysAdvisorPlugin, 0),
    79  	}
    80  
    81  	if err := agent.getAdvisorPlugins(pkgplugin.GetRegisteredAdvisorPlugins()); err != nil {
    82  		return nil, err
    83  	}
    84  
    85  	agent.init()
    86  	return agent, nil
    87  }
    88  
    89  func (m *AdvisorAgent) getAdvisorPlugins(SysAdvisorPluginInitializers map[string]pkgplugin.AdvisorPluginInitFunc) error {
    90  	metaCache, err := metacache.NewMetaCacheImp(m.config, m.emitPool, m.metaServer.MetricsFetcher)
    91  	if err != nil {
    92  		return fmt.Errorf("new metacache failed: %v", err)
    93  	}
    94  
    95  	for pluginName, initFn := range SysAdvisorPluginInitializers {
    96  		if !general.IsNameEnabled(pluginName, sets.NewString(), m.config.GenericSysAdvisorConfiguration.SysAdvisorPlugins) {
    97  			klog.Warningf("[sysadvisor] %s plugin is disabled", pluginName)
    98  			continue
    99  		}
   100  
   101  		klog.Infof("[sysadvisor] %s plugin is enabled", pluginName)
   102  		curPlugin, err := initFn(pluginName, m.config, m.extraConf, m.emitPool, m.metaServer, metaCache)
   103  		if err != nil {
   104  			return fmt.Errorf("failed to start sysadvisor plugin %v: %v", pluginName, err)
   105  		}
   106  
   107  		m.plugins = append(m.plugins, curPlugin)
   108  	}
   109  
   110  	return nil
   111  }
   112  
   113  // Asynchronous initialization with timeout. Timeout plugin will neither be killed nor started.
   114  func (m *AdvisorAgent) init() {
   115  	for _, plugin := range m.plugins {
   116  		p := context.TODO()
   117  		c, cancel := context.WithTimeout(p, initTimeout)
   118  		defer cancel()
   119  		m.wgInitPlugin.Add(1)
   120  
   121  		go func(ctx context.Context, plugin pkgplugin.SysAdvisorPlugin) {
   122  			defer m.wgInitPlugin.Done()
   123  
   124  			ch := make(chan error, 1)
   125  			go func(plugin pkgplugin.SysAdvisorPlugin) {
   126  				err := plugin.Init()
   127  				ch <- err
   128  			}(plugin)
   129  
   130  			for {
   131  				select {
   132  				case err := <-ch:
   133  					if err != nil {
   134  						klog.Errorf("[sysadvisor] initialize plugin %v with error: %v; do not start it", plugin.Name(), err)
   135  					} else {
   136  						m.mutex.Lock()
   137  						m.pluginsToRun = append(m.pluginsToRun, plugin)
   138  						m.mutex.Unlock()
   139  						klog.Infof("[sysadvisor] plugin %v initialized", plugin.Name())
   140  					}
   141  					return
   142  				case <-ctx.Done():
   143  					klog.Errorf("[sysadvisor] initialize plugin %v timeout, limit %v; ignore and do not start it", plugin.Name(), initTimeout)
   144  					return
   145  				}
   146  			}
   147  		}(c, plugin)
   148  	}
   149  	m.wgInitPlugin.Wait()
   150  }
   151  
   152  // Run starts sysadvisor agent
   153  func (m *AdvisorAgent) Run(ctx context.Context) {
   154  	wg := sync.WaitGroup{}
   155  	// sysadvisor plugin can both run synchronously or asynchronously
   156  	for _, plugin := range m.pluginsToRun {
   157  		wg.Add(1)
   158  		go func(plugin pkgplugin.SysAdvisorPlugin) {
   159  			defer wg.Done()
   160  			klog.Infof("[sysadvisor] start plugin %v", plugin.Name())
   161  			plugin.Run(ctx)
   162  		}(plugin)
   163  	}
   164  
   165  	wg.Wait()
   166  	<-ctx.Done()
   167  }