github.com/alibaba/ilogtail/pkg@v0.0.0-20250526110833-c53b480d046c/helper/containercenter/container_discover_controller.go (about) 1 // Copyright 2021 iLogtail Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package containercenter 16 17 import ( 18 "context" 19 "fmt" 20 "os" 21 "strconv" 22 "sync" 23 "time" 24 25 "github.com/alibaba/ilogtail/pkg/logger" 26 "github.com/alibaba/ilogtail/pkg/util" 27 ) 28 29 var FetchAllInterval = time.Second * time.Duration(300) 30 31 // fetchAllSuccessTimeout controls when to force timeout containers if fetchAll 32 // failed continuously. By default, 20 times of FetchAllInterval. 33 // current incremental discovery does not refresh container's lastUpdateTime, so this value must be greater than FetchAllInterval 34 var fetchAllSuccessTimeout = FetchAllInterval * 20 35 var ContainerCenterTimeout = time.Second * time.Duration(30) 36 var MaxFetchOneTriggerPerSecond int32 = 200 37 38 type ContainerDiscoverManager struct { 39 enableDockerDiscover bool // maybe changed 40 enableCRIDiscover bool 41 enableStaticDiscover bool 42 43 fetchOneCount int32 // only limit the frequency of FetchOne 44 lastFetchOneTime int64 45 fetchOneLock sync.Mutex 46 } 47 48 func NewContainerDiscoverManager() *ContainerDiscoverManager { 49 return &ContainerDiscoverManager{ 50 enableDockerDiscover: false, 51 enableCRIDiscover: false, 52 enableStaticDiscover: false, 53 } 54 } 55 56 // FetchAll 57 // Currently, there are 3 ways to find containers, which are docker interface, cri interface and static container info file. 58 func (c *ContainerDiscoverManager) FetchAll() { 59 if c.enableStaticDiscover { 60 c.fetchStatic() 61 } 62 63 var err error 64 if c.enableDockerDiscover { 65 if err = c.fetchDocker(); err != nil { 66 logger.Info(context.Background(), "container docker fetch all", err) 67 } 68 } 69 70 if c.enableCRIDiscover { 71 if err = c.fetchCRI(); err != nil { 72 logger.Info(context.Background(), "container CRIRuntime fetch all", err) 73 } 74 } 75 } 76 77 func (c *ContainerDiscoverManager) FetchOne(containerID string) error { 78 logger.Debug(context.Background(), "discover manager fetch one", containerID) 79 now := time.Now().Unix() 80 c.fetchOneLock.Lock() 81 if now > c.lastFetchOneTime { 82 c.lastFetchOneTime = now 83 c.fetchOneCount = 0 84 } 85 c.fetchOneCount++ 86 if c.fetchOneCount > MaxFetchOneTriggerPerSecond { 87 logger.Debug(context.Background(), "discover manager reject because of reaching the maximum fetch count", containerID) 88 c.fetchOneLock.Unlock() 89 return fmt.Errorf("cannot fetch %s because of reaching the maximum fetch count", containerID) 90 } 91 c.fetchOneLock.Unlock() 92 var err error 93 if c.enableCRIDiscover { 94 err = criRuntimeWrapper.fetchOne(containerID) 95 logger.Debug(context.Background(), "discover manager cri fetch one status", err == nil) 96 if err == nil { 97 return nil 98 } 99 } 100 if c.enableDockerDiscover { 101 err = containerCenterInstance.fetchOne(containerID, true) 102 logger.Debug(context.Background(), "discover manager docker fetch one status", err == nil) 103 } 104 return err 105 } 106 107 func (c *ContainerDiscoverManager) fetchDocker() error { 108 if containerCenterInstance == nil { 109 return nil 110 } 111 return containerCenterInstance.fetchAll() 112 } 113 114 func (c *ContainerDiscoverManager) fetchStatic() { 115 if containerCenterInstance == nil { 116 return 117 } 118 containerCenterInstance.readStaticConfig(true) 119 } 120 121 func (c *ContainerDiscoverManager) fetchCRI() error { 122 if criRuntimeWrapper == nil { 123 return nil 124 } 125 return criRuntimeWrapper.fetchAll() 126 } 127 128 func (c *ContainerDiscoverManager) StartSyncContainers() { 129 if c.enableCRIDiscover { 130 logger.Debug(context.Background(), "discover manager start sync containers goroutine", "cri") 131 go criRuntimeWrapper.loopSyncContainers() 132 } 133 if c.enableStaticDiscover { 134 logger.Debug(context.Background(), "discover manager start sync containers goroutine", "static") 135 go containerCenterInstance.flushStaticConfig() 136 } 137 if c.enableDockerDiscover { 138 logger.Debug(context.Background(), "discover manager start sync containers goroutine", "docker") 139 go containerCenterInstance.eventListener() 140 } 141 } 142 143 func (c *ContainerDiscoverManager) Clean() { 144 if criRuntimeWrapper != nil { 145 criRuntimeWrapper.sweepCache() 146 logger.Debug(context.Background(), "discover manager clean", "cri") 147 } 148 if containerCenterInstance != nil { 149 containerCenterInstance.sweepCache() 150 logger.Debug(context.Background(), "discover manager clean", "docker") 151 } 152 } 153 154 func (c *ContainerDiscoverManager) LogAlarm(err error, msg string) { 155 if err != nil { 156 logger.Warning(context.Background(), "DOCKER_CENTER_ALARM", "message", msg, "error found", err) 157 } else { 158 logger.Debug(context.Background(), "message", msg) 159 } 160 } 161 162 func (c *ContainerDiscoverManager) Init() bool { 163 defer containerCenterRecover() 164 165 // discover which runtime is valid 166 if wrapper, err := NewCRIRuntimeWrapper(containerCenterInstance); err != nil { 167 logger.Errorf(context.Background(), "DOCKER_CENTER_ALARM", "[CRIRuntime] creare cri-runtime client error: %v", err) 168 criRuntimeWrapper = nil 169 } else { 170 logger.Infof(context.Background(), "[CRIRuntime] create cri-runtime client successfully") 171 criRuntimeWrapper = wrapper 172 } 173 if ok, err := util.PathExists(DefaultLogtailMountPath); err == nil { 174 if !ok { 175 logger.Info(context.Background(), "no docker mount path", "set empty") 176 DefaultLogtailMountPath = "" 177 } 178 } else { 179 logger.Warning(context.Background(), "check docker mount path error", err.Error()) 180 } 181 c.enableCRIDiscover = criRuntimeWrapper != nil 182 c.enableDockerDiscover = containerCenterInstance.initClient() == nil 183 c.enableStaticDiscover = isStaticContainerInfoEnabled() 184 discoverdRuntime := c.enableCRIDiscover || c.enableDockerDiscover || c.enableStaticDiscover 185 if !discoverdRuntime { 186 return false 187 } 188 189 // try to connect to runtime 190 logger.Info(context.Background(), "input", "param", "docker discover", c.enableDockerDiscover, "cri discover", c.enableCRIDiscover, "static discover", c.enableStaticDiscover) 191 listenLoopIntervalSec := 0 192 // Get env in the same order as in C Logtail 193 listenLoopIntervalStr := os.Getenv("docker_config_update_interval") 194 if len(listenLoopIntervalStr) > 0 { 195 listenLoopIntervalSec, _ = strconv.Atoi(listenLoopIntervalStr) 196 } 197 listenLoopIntervalStr = os.Getenv("ALIYUN_LOGTAIL_DOCKER_CONFIG_UPDATE_INTERVAL") 198 if len(listenLoopIntervalStr) > 0 { 199 listenLoopIntervalSec, _ = strconv.Atoi(listenLoopIntervalStr) 200 } 201 // Keep this env var for compatibility 202 listenLoopIntervalStr = os.Getenv("CONTAINERD_LISTEN_LOOP_INTERVAL") 203 if len(listenLoopIntervalStr) > 0 { 204 listenLoopIntervalSec, _ = strconv.Atoi(listenLoopIntervalStr) 205 } 206 if listenLoopIntervalSec > 0 { 207 DefaultSyncContainersPeriod = time.Second * time.Duration(listenLoopIntervalSec) 208 } 209 // @note config for Fetch All Interval 210 fetchAllSec := (int)(FetchAllInterval.Seconds()) 211 if err := util.InitFromEnvInt("DOCKER_FETCH_ALL_INTERVAL", &fetchAllSec, fetchAllSec); err != nil { 212 c.LogAlarm(err, "initialize env DOCKER_FETCH_ALL_INTERVAL error") 213 } 214 if fetchAllSec > 0 && fetchAllSec < 3600*24 { 215 FetchAllInterval = time.Duration(fetchAllSec) * time.Second 216 } 217 logger.Info(context.Background(), "init docker center, fetch all seconds", FetchAllInterval.String()) 218 { 219 timeoutSec := int(fetchAllSuccessTimeout.Seconds()) 220 if err := util.InitFromEnvInt("DOCKER_FETCH_ALL_SUCCESS_TIMEOUT", &timeoutSec, timeoutSec); err != nil { 221 c.LogAlarm(err, "initialize env DOCKER_FETCH_ALL_SUCCESS_TIMEOUT error") 222 } 223 if timeoutSec > int(FetchAllInterval.Seconds()) && timeoutSec <= 3600*24 { 224 fetchAllSuccessTimeout = time.Duration(timeoutSec) * time.Second 225 } 226 } 227 logger.Info(context.Background(), "init docker center, fecth all success timeout", fetchAllSuccessTimeout.String()) 228 { 229 timeoutSec := int(ContainerCenterTimeout.Seconds()) 230 if err := util.InitFromEnvInt("DOCKER_CLIENT_REQUEST_TIMEOUT", &timeoutSec, timeoutSec); err != nil { 231 c.LogAlarm(err, "initialize env DOCKER_CLIENT_REQUEST_TIMEOUT error") 232 } 233 if timeoutSec > 0 { 234 ContainerCenterTimeout = time.Duration(timeoutSec) * time.Second 235 } 236 } 237 logger.Info(context.Background(), "init docker center, client request timeout", ContainerCenterTimeout.String()) 238 { 239 count := int(MaxFetchOneTriggerPerSecond) 240 if err := util.InitFromEnvInt("CONTAINER_FETCH_ONE_MAX_COUNT_PER_SECOND", &count, count); err != nil { 241 c.LogAlarm(err, "initialize env CONTAINER_FETCH_ONE_MAX_COUNT_PER_SECOND error") 242 } 243 if count > 0 { 244 MaxFetchOneTriggerPerSecond = int32(count) 245 } 246 } 247 logger.Info(context.Background(), "init docker center, max fetchOne count per second", MaxFetchOneTriggerPerSecond) 248 249 var err error 250 if c.enableDockerDiscover { 251 if err = c.fetchDocker(); err != nil { 252 c.enableDockerDiscover = false 253 logger.Errorf(context.Background(), "DOCKER_CENTER_ALARM", "fetch docker containers error, close docker discover, will retry") 254 } 255 } 256 if c.enableCRIDiscover { 257 if err = c.fetchCRI(); err != nil { 258 c.enableCRIDiscover = false 259 logger.Errorf(context.Background(), "DOCKER_CENTER_ALARM", "fetch cri containers error, close cri discover, will retry") 260 } 261 } 262 if c.enableStaticDiscover { 263 c.fetchStatic() 264 } 265 logger.Info(context.Background(), "final", "param", "docker discover", c.enableDockerDiscover, "cri discover", c.enableCRIDiscover, "static discover", c.enableStaticDiscover) 266 return c.enableCRIDiscover || c.enableDockerDiscover || c.enableStaticDiscover 267 } 268 269 func (c *ContainerDiscoverManager) TimerFetch() { 270 timerFetch := func() { 271 defer containerCenterRecover() 272 lastFetchAllTime := time.Now() 273 for { 274 time.Sleep(time.Duration(10) * time.Second) 275 logger.Debug(context.Background(), "container clean timeout container info", "start") 276 containerCenterInstance.cleanTimeoutContainer() 277 logger.Debug(context.Background(), "container clean timeout container info", "done") 278 if time.Since(lastFetchAllTime) >= FetchAllInterval { 279 logger.Info(context.Background(), "container fetch all", "start") 280 c.FetchAll() 281 lastFetchAllTime = time.Now() 282 c.Clean() 283 logger.Info(context.Background(), "container fetch all", "end") 284 } 285 286 } 287 } 288 go timerFetch() 289 }