github.com/kubewharf/katalyst-core@v0.5.3/cmd/katalyst-agent/app/agent.go (about) 1 /* 2 Copyright 2022 The Katalyst Authors. 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 package app 18 19 import ( 20 "context" 21 "fmt" 22 "sync" 23 "time" 24 25 "k8s.io/klog/v2" 26 27 katalystbase "github.com/kubewharf/katalyst-core/cmd/base" 28 "github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/agent" 29 "github.com/kubewharf/katalyst-core/pkg/client" 30 "github.com/kubewharf/katalyst-core/pkg/config" 31 "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic/crd" 32 "github.com/kubewharf/katalyst-core/pkg/consts" 33 "github.com/kubewharf/katalyst-core/pkg/metrics" 34 "github.com/kubewharf/katalyst-core/pkg/util/general" 35 "github.com/kubewharf/katalyst-core/pkg/util/process" 36 ) 37 38 const healthzNameLockingFileAcquired = "LockingFileReady" 39 40 const ( 41 metricsNameLockingFailed = "get_lock_failed" 42 metricsNameAgentStarted = "agent_started" 43 ) 44 45 // Run starts common and uniformed agent components here, and starts other 46 // specific components in other separate repos (with common components as 47 // dependencies) 48 func Run(conf *config.Configuration, clientSet *client.GenericClientSet, genericOptions ...katalystbase.GenericOptions) error { 49 // Set up signals so that we handle the first shutdown signal gracefully. 50 ctx := process.SetupSignalHandler() 51 52 baseCtx, err := katalystbase.NewGenericContext(clientSet, "", nil, AgentsDisabledByDefault, 53 conf.GenericConfiguration, consts.KatalystComponentAgent, conf.DynamicAgentConfiguration) 54 if err != nil { 55 return err 56 } 57 58 genericCtx, err := agent.NewGenericContext(baseCtx, conf) 59 if err != nil { 60 return err 61 } 62 63 for _, genericOption := range genericOptions { 64 genericOption(genericCtx) 65 } 66 67 lock := acquireLock(genericCtx, conf) 68 defer func() { 69 // if the process panic in other place and the defer function isn't executed, 70 // OS will help to unlock. So the next process till get the lock successfully. 71 general.ReleaseUniqueLock(lock) 72 73 // wait async log sync to disk 74 time.Sleep(1 * time.Second) 75 }() 76 77 return startAgent(ctx, genericCtx, conf, GetAgentInitializers()) 78 } 79 80 // startAgent is used to initialize and start each component in katalyst-agent 81 func startAgent(ctx context.Context, genericCtx *agent.GenericContext, 82 conf *config.Configuration, agents map[string]AgentStarter, 83 ) error { 84 componentMap := make(map[string]agent.Component) 85 monitorAgentStart(genericCtx) 86 for agentName, starter := range agents { 87 if !genericCtx.IsEnabled(agentName, conf.Agents) { 88 klog.Warningf("%q is disabled", agentName) 89 continue 90 } 91 92 klog.Infof("initializing %q", agentName) 93 needToRun, component, err := starter.Init(genericCtx, conf, starter.ExtraConf, agentName) 94 if err != nil { 95 klog.Errorf("Error initializing %q", agentName) 96 return err 97 } else if !needToRun { 98 klog.Warningf("skip to call running functions %q", agentName) 99 continue 100 } 101 102 componentMap[agentName] = component 103 klog.Infof("needToRun %q", agentName) 104 } 105 106 // initialize dynamic config first before components run. 107 err := genericCtx.InitializeConfig(ctx) 108 if err != nil { 109 return fmt.Errorf("initialize dynamic config failed: %v", err) 110 } 111 112 wg := sync.WaitGroup{} 113 114 // start generic ctx first 115 wg.Add(1) 116 go func() { 117 defer wg.Done() 118 genericCtx.Run(ctx) 119 }() 120 121 // watch auth configuration 122 err = genericCtx.MetaServer.AddConfigWatcher(crd.AuthConfigurationGVR) 123 if err != nil { 124 return fmt.Errorf("add authconfiguration watcher failed") 125 } 126 127 // start all component and make sure them can be stopped completely 128 for agentName, component := range componentMap { 129 wg.Add(1) 130 runnable := component 131 go func() { 132 defer wg.Done() 133 runnable.Run(ctx) 134 klog.Infof("component %q stopped", agentName) 135 }() 136 137 klog.Infof("started %q", agentName) 138 } 139 140 wg.Wait() 141 return nil 142 } 143 144 func monitorAgentStart(genericCtx *agent.GenericContext) { 145 _ = genericCtx.EmitterPool.GetDefaultMetricsEmitter().StoreInt64(metricsNameAgentStarted, 1, metrics.MetricTypeNameCount) 146 } 147 148 // acquireLock makes sure only one process can handle socket files; 149 // any process that wants to enter main loop, should acquire file lock firstly. 150 func acquireLock(genericCtx *agent.GenericContext, conf *config.Configuration) *general.Flock { 151 // register a not-ready state for lock-acquiring when we starts 152 general.RegisterHeartbeatCheck(healthzNameLockingFileAcquired, 0, general.HealthzCheckStateNotReady, 0) 153 154 // set a ready state for lock-acquiring when we acquire locking successfully 155 defer func() { 156 _ = general.UpdateHealthzState(healthzNameLockingFileAcquired, general.HealthzCheckStateReady, "") 157 }() 158 159 for { 160 lock, err := general.GetUniqueLock(conf.LockFileName) 161 if err != nil { 162 _ = genericCtx.EmitterPool.GetDefaultMetricsEmitter().StoreInt64(metricsNameLockingFailed, 1, metrics.MetricTypeNameRaw) 163 // if waiting is enabled, we will always wait until lock has been obtained successfully; 164 if conf.LockWaitingEnabled { 165 continue 166 } 167 panic(err) 168 } else { 169 return lock 170 } 171 } 172 }