github.com/kubewharf/katalyst-core@v0.5.3/pkg/agent/sysadvisor/sysadvisor.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package sysadvisor 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "k8s.io/apimachinery/pkg/util/sets" 26 "k8s.io/klog/v2" 27 28 "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/metacache" 29 pkgplugin "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin" 30 "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/inference" 31 metacacheplugin "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/metacache" 32 metricemitter "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/metric-emitter" 33 "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/overcommitmentaware" 34 "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/plugin/qosaware" 35 "github.com/kubewharf/katalyst-core/pkg/agent/sysadvisor/types" 36 "github.com/kubewharf/katalyst-core/pkg/config" 37 "github.com/kubewharf/katalyst-core/pkg/metaserver" 38 metricspool "github.com/kubewharf/katalyst-core/pkg/metrics/metrics-pool" 39 "github.com/kubewharf/katalyst-core/pkg/util/general" 40 ) 41 42 const initTimeout = 10 * time.Second 43 44 func init() { 45 pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameQoSAware, qosaware.NewQoSAwarePlugin) 46 pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameMetaCache, metacacheplugin.NewMetaCachePlugin) 47 pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameMetricEmitter, metricemitter.NewCustomMetricEmitter) 48 pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameInference, inference.NewInferencePlugin) 49 pkgplugin.RegisterAdvisorPlugin(types.AdvisorPluginNameOvercommitAware, overcommitmentaware.NewOvercommitmentAwarePlugin) 50 } 51 52 // AdvisorAgent for sysadvisor 53 type AdvisorAgent struct { 54 // those are parameters that be passed to sysadvisor when starting agents. 55 config *config.Configuration 56 extraConf interface{} 57 metaServer *metaserver.MetaServer 58 emitPool metricspool.MetricsEmitterPool 59 60 plugins []pkgplugin.SysAdvisorPlugin 61 pluginsToRun []pkgplugin.SysAdvisorPlugin 62 63 wgInitPlugin sync.WaitGroup 64 mutex sync.Mutex 65 } 66 67 // NewAdvisorAgent initializes the sysadvisor agent logic. 68 func NewAdvisorAgent(conf *config.Configuration, extraConf interface{}, metaServer *metaserver.MetaServer, 69 emitPool metricspool.MetricsEmitterPool, 70 ) (*AdvisorAgent, error) { 71 agent := &AdvisorAgent{ 72 config: conf, 73 extraConf: extraConf, 74 metaServer: metaServer, 75 emitPool: emitPool, 76 77 plugins: make([]pkgplugin.SysAdvisorPlugin, 0), 78 pluginsToRun: make([]pkgplugin.SysAdvisorPlugin, 0), 79 } 80 81 if err := agent.getAdvisorPlugins(pkgplugin.GetRegisteredAdvisorPlugins()); err != nil { 82 return nil, err 83 } 84 85 agent.init() 86 return agent, nil 87 } 88 89 func (m *AdvisorAgent) getAdvisorPlugins(SysAdvisorPluginInitializers map[string]pkgplugin.AdvisorPluginInitFunc) error { 90 metaCache, err := metacache.NewMetaCacheImp(m.config, m.emitPool, m.metaServer.MetricsFetcher) 91 if err != nil { 92 return fmt.Errorf("new metacache failed: %v", err) 93 } 94 95 for pluginName, initFn := range SysAdvisorPluginInitializers { 96 if !general.IsNameEnabled(pluginName, sets.NewString(), m.config.GenericSysAdvisorConfiguration.SysAdvisorPlugins) { 97 klog.Warningf("[sysadvisor] %s plugin is disabled", pluginName) 98 continue 99 } 100 101 klog.Infof("[sysadvisor] %s plugin is enabled", pluginName) 102 curPlugin, err := initFn(pluginName, m.config, m.extraConf, m.emitPool, m.metaServer, metaCache) 103 if err != nil { 104 return fmt.Errorf("failed to start sysadvisor plugin %v: %v", pluginName, err) 105 } 106 107 m.plugins = append(m.plugins, curPlugin) 108 } 109 110 return nil 111 } 112 113 // Asynchronous initialization with timeout. Timeout plugin will neither be killed nor started. 114 func (m *AdvisorAgent) init() { 115 for _, plugin := range m.plugins { 116 p := context.TODO() 117 c, cancel := context.WithTimeout(p, initTimeout) 118 defer cancel() 119 m.wgInitPlugin.Add(1) 120 121 go func(ctx context.Context, plugin pkgplugin.SysAdvisorPlugin) { 122 defer m.wgInitPlugin.Done() 123 124 ch := make(chan error, 1) 125 go func(plugin pkgplugin.SysAdvisorPlugin) { 126 err := plugin.Init() 127 ch <- err 128 }(plugin) 129 130 for { 131 select { 132 case err := <-ch: 133 if err != nil { 134 klog.Errorf("[sysadvisor] initialize plugin %v with error: %v; do not start it", plugin.Name(), err) 135 } else { 136 m.mutex.Lock() 137 m.pluginsToRun = append(m.pluginsToRun, plugin) 138 m.mutex.Unlock() 139 klog.Infof("[sysadvisor] plugin %v initialized", plugin.Name()) 140 } 141 return 142 case <-ctx.Done(): 143 klog.Errorf("[sysadvisor] initialize plugin %v timeout, limit %v; ignore and do not start it", plugin.Name(), initTimeout) 144 return 145 } 146 } 147 }(c, plugin) 148 } 149 m.wgInitPlugin.Wait() 150 } 151 152 // Run starts sysadvisor agent 153 func (m *AdvisorAgent) Run(ctx context.Context) { 154 wg := sync.WaitGroup{} 155 // sysadvisor plugin can both run synchronously or asynchronously 156 for _, plugin := range m.pluginsToRun { 157 wg.Add(1) 158 go func(plugin pkgplugin.SysAdvisorPlugin) { 159 defer wg.Done() 160 klog.Infof("[sysadvisor] start plugin %v", plugin.Name()) 161 plugin.Run(ctx) 162 }(plugin) 163 } 164 165 wg.Wait() 166 <-ctx.Done() 167 }