github.com/alibaba/ilogtail/pkg@v0.0.0-20250526110833-c53b480d046c/helper/containercenter/container_discover_controller.go (about)

     1  // Copyright 2021 iLogtail Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package containercenter
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"os"
    21  	"strconv"
    22  	"sync"
    23  	"time"
    24  
    25  	"github.com/alibaba/ilogtail/pkg/logger"
    26  	"github.com/alibaba/ilogtail/pkg/util"
    27  )
    28  
    29  var FetchAllInterval = time.Second * time.Duration(300)
    30  
    31  // fetchAllSuccessTimeout controls when to force timeout containers if fetchAll
    32  // failed continuously. By default, 20 times of FetchAllInterval.
    33  // current incremental discovery does not refresh container's lastUpdateTime, so this value must be greater than FetchAllInterval
    34  var fetchAllSuccessTimeout = FetchAllInterval * 20
    35  var ContainerCenterTimeout = time.Second * time.Duration(30)
    36  var MaxFetchOneTriggerPerSecond int32 = 200
    37  
    38  type ContainerDiscoverManager struct {
    39  	enableDockerDiscover bool // maybe changed
    40  	enableCRIDiscover    bool
    41  	enableStaticDiscover bool
    42  
    43  	fetchOneCount    int32 // only limit the frequency of FetchOne
    44  	lastFetchOneTime int64
    45  	fetchOneLock     sync.Mutex
    46  }
    47  
    48  func NewContainerDiscoverManager() *ContainerDiscoverManager {
    49  	return &ContainerDiscoverManager{
    50  		enableDockerDiscover: false,
    51  		enableCRIDiscover:    false,
    52  		enableStaticDiscover: false,
    53  	}
    54  }
    55  
    56  // FetchAll
    57  // Currently, there are 3 ways to find containers, which are docker interface, cri interface and static container info file.
    58  func (c *ContainerDiscoverManager) FetchAll() {
    59  	if c.enableStaticDiscover {
    60  		c.fetchStatic()
    61  	}
    62  
    63  	var err error
    64  	if c.enableDockerDiscover {
    65  		if err = c.fetchDocker(); err != nil {
    66  			logger.Info(context.Background(), "container docker fetch all", err)
    67  		}
    68  	}
    69  
    70  	if c.enableCRIDiscover {
    71  		if err = c.fetchCRI(); err != nil {
    72  			logger.Info(context.Background(), "container CRIRuntime fetch all", err)
    73  		}
    74  	}
    75  }
    76  
    77  func (c *ContainerDiscoverManager) FetchOne(containerID string) error {
    78  	logger.Debug(context.Background(), "discover manager fetch one", containerID)
    79  	now := time.Now().Unix()
    80  	c.fetchOneLock.Lock()
    81  	if now > c.lastFetchOneTime {
    82  		c.lastFetchOneTime = now
    83  		c.fetchOneCount = 0
    84  	}
    85  	c.fetchOneCount++
    86  	if c.fetchOneCount > MaxFetchOneTriggerPerSecond {
    87  		logger.Debug(context.Background(), "discover manager reject because of reaching the maximum fetch count", containerID)
    88  		c.fetchOneLock.Unlock()
    89  		return fmt.Errorf("cannot fetch %s because of reaching the maximum fetch count", containerID)
    90  	}
    91  	c.fetchOneLock.Unlock()
    92  	var err error
    93  	if c.enableCRIDiscover {
    94  		err = criRuntimeWrapper.fetchOne(containerID)
    95  		logger.Debug(context.Background(), "discover manager cri fetch one status", err == nil)
    96  		if err == nil {
    97  			return nil
    98  		}
    99  	}
   100  	if c.enableDockerDiscover {
   101  		err = containerCenterInstance.fetchOne(containerID, true)
   102  		logger.Debug(context.Background(), "discover manager docker fetch one status", err == nil)
   103  	}
   104  	return err
   105  }
   106  
   107  func (c *ContainerDiscoverManager) fetchDocker() error {
   108  	if containerCenterInstance == nil {
   109  		return nil
   110  	}
   111  	return containerCenterInstance.fetchAll()
   112  }
   113  
   114  func (c *ContainerDiscoverManager) fetchStatic() {
   115  	if containerCenterInstance == nil {
   116  		return
   117  	}
   118  	containerCenterInstance.readStaticConfig(true)
   119  }
   120  
   121  func (c *ContainerDiscoverManager) fetchCRI() error {
   122  	if criRuntimeWrapper == nil {
   123  		return nil
   124  	}
   125  	return criRuntimeWrapper.fetchAll()
   126  }
   127  
   128  func (c *ContainerDiscoverManager) StartSyncContainers() {
   129  	if c.enableCRIDiscover {
   130  		logger.Debug(context.Background(), "discover manager start sync containers goroutine", "cri")
   131  		go criRuntimeWrapper.loopSyncContainers()
   132  	}
   133  	if c.enableStaticDiscover {
   134  		logger.Debug(context.Background(), "discover manager start sync containers goroutine", "static")
   135  		go containerCenterInstance.flushStaticConfig()
   136  	}
   137  	if c.enableDockerDiscover {
   138  		logger.Debug(context.Background(), "discover manager start sync containers goroutine", "docker")
   139  		go containerCenterInstance.eventListener()
   140  	}
   141  }
   142  
   143  func (c *ContainerDiscoverManager) Clean() {
   144  	if criRuntimeWrapper != nil {
   145  		criRuntimeWrapper.sweepCache()
   146  		logger.Debug(context.Background(), "discover manager clean", "cri")
   147  	}
   148  	if containerCenterInstance != nil {
   149  		containerCenterInstance.sweepCache()
   150  		logger.Debug(context.Background(), "discover manager clean", "docker")
   151  	}
   152  }
   153  
   154  func (c *ContainerDiscoverManager) LogAlarm(err error, msg string) {
   155  	if err != nil {
   156  		logger.Warning(context.Background(), "DOCKER_CENTER_ALARM", "message", msg, "error found", err)
   157  	} else {
   158  		logger.Debug(context.Background(), "message", msg)
   159  	}
   160  }
   161  
   162  func (c *ContainerDiscoverManager) Init() bool {
   163  	defer containerCenterRecover()
   164  
   165  	// discover which runtime is valid
   166  	if wrapper, err := NewCRIRuntimeWrapper(containerCenterInstance); err != nil {
   167  		logger.Errorf(context.Background(), "DOCKER_CENTER_ALARM", "[CRIRuntime] creare cri-runtime client error: %v", err)
   168  		criRuntimeWrapper = nil
   169  	} else {
   170  		logger.Infof(context.Background(), "[CRIRuntime] create cri-runtime client successfully")
   171  		criRuntimeWrapper = wrapper
   172  	}
   173  	if ok, err := util.PathExists(DefaultLogtailMountPath); err == nil {
   174  		if !ok {
   175  			logger.Info(context.Background(), "no docker mount path", "set empty")
   176  			DefaultLogtailMountPath = ""
   177  		}
   178  	} else {
   179  		logger.Warning(context.Background(), "check docker mount path error", err.Error())
   180  	}
   181  	c.enableCRIDiscover = criRuntimeWrapper != nil
   182  	c.enableDockerDiscover = containerCenterInstance.initClient() == nil
   183  	c.enableStaticDiscover = isStaticContainerInfoEnabled()
   184  	discoverdRuntime := c.enableCRIDiscover || c.enableDockerDiscover || c.enableStaticDiscover
   185  	if !discoverdRuntime {
   186  		return false
   187  	}
   188  
   189  	// try to connect to runtime
   190  	logger.Info(context.Background(), "input", "param", "docker discover", c.enableDockerDiscover, "cri discover", c.enableCRIDiscover, "static discover", c.enableStaticDiscover)
   191  	listenLoopIntervalSec := 0
   192  	// Get env in the same order as in C Logtail
   193  	listenLoopIntervalStr := os.Getenv("docker_config_update_interval")
   194  	if len(listenLoopIntervalStr) > 0 {
   195  		listenLoopIntervalSec, _ = strconv.Atoi(listenLoopIntervalStr)
   196  	}
   197  	listenLoopIntervalStr = os.Getenv("ALIYUN_LOGTAIL_DOCKER_CONFIG_UPDATE_INTERVAL")
   198  	if len(listenLoopIntervalStr) > 0 {
   199  		listenLoopIntervalSec, _ = strconv.Atoi(listenLoopIntervalStr)
   200  	}
   201  	// Keep this env var for compatibility
   202  	listenLoopIntervalStr = os.Getenv("CONTAINERD_LISTEN_LOOP_INTERVAL")
   203  	if len(listenLoopIntervalStr) > 0 {
   204  		listenLoopIntervalSec, _ = strconv.Atoi(listenLoopIntervalStr)
   205  	}
   206  	if listenLoopIntervalSec > 0 {
   207  		DefaultSyncContainersPeriod = time.Second * time.Duration(listenLoopIntervalSec)
   208  	}
   209  	// @note config for Fetch All Interval
   210  	fetchAllSec := (int)(FetchAllInterval.Seconds())
   211  	if err := util.InitFromEnvInt("DOCKER_FETCH_ALL_INTERVAL", &fetchAllSec, fetchAllSec); err != nil {
   212  		c.LogAlarm(err, "initialize env DOCKER_FETCH_ALL_INTERVAL error")
   213  	}
   214  	if fetchAllSec > 0 && fetchAllSec < 3600*24 {
   215  		FetchAllInterval = time.Duration(fetchAllSec) * time.Second
   216  	}
   217  	logger.Info(context.Background(), "init docker center, fetch all seconds", FetchAllInterval.String())
   218  	{
   219  		timeoutSec := int(fetchAllSuccessTimeout.Seconds())
   220  		if err := util.InitFromEnvInt("DOCKER_FETCH_ALL_SUCCESS_TIMEOUT", &timeoutSec, timeoutSec); err != nil {
   221  			c.LogAlarm(err, "initialize env DOCKER_FETCH_ALL_SUCCESS_TIMEOUT error")
   222  		}
   223  		if timeoutSec > int(FetchAllInterval.Seconds()) && timeoutSec <= 3600*24 {
   224  			fetchAllSuccessTimeout = time.Duration(timeoutSec) * time.Second
   225  		}
   226  	}
   227  	logger.Info(context.Background(), "init docker center, fecth all success timeout", fetchAllSuccessTimeout.String())
   228  	{
   229  		timeoutSec := int(ContainerCenterTimeout.Seconds())
   230  		if err := util.InitFromEnvInt("DOCKER_CLIENT_REQUEST_TIMEOUT", &timeoutSec, timeoutSec); err != nil {
   231  			c.LogAlarm(err, "initialize env DOCKER_CLIENT_REQUEST_TIMEOUT error")
   232  		}
   233  		if timeoutSec > 0 {
   234  			ContainerCenterTimeout = time.Duration(timeoutSec) * time.Second
   235  		}
   236  	}
   237  	logger.Info(context.Background(), "init docker center, client request timeout", ContainerCenterTimeout.String())
   238  	{
   239  		count := int(MaxFetchOneTriggerPerSecond)
   240  		if err := util.InitFromEnvInt("CONTAINER_FETCH_ONE_MAX_COUNT_PER_SECOND", &count, count); err != nil {
   241  			c.LogAlarm(err, "initialize env CONTAINER_FETCH_ONE_MAX_COUNT_PER_SECOND error")
   242  		}
   243  		if count > 0 {
   244  			MaxFetchOneTriggerPerSecond = int32(count)
   245  		}
   246  	}
   247  	logger.Info(context.Background(), "init docker center, max fetchOne count per second", MaxFetchOneTriggerPerSecond)
   248  
   249  	var err error
   250  	if c.enableDockerDiscover {
   251  		if err = c.fetchDocker(); err != nil {
   252  			c.enableDockerDiscover = false
   253  			logger.Errorf(context.Background(), "DOCKER_CENTER_ALARM", "fetch docker containers error, close docker discover, will retry")
   254  		}
   255  	}
   256  	if c.enableCRIDiscover {
   257  		if err = c.fetchCRI(); err != nil {
   258  			c.enableCRIDiscover = false
   259  			logger.Errorf(context.Background(), "DOCKER_CENTER_ALARM", "fetch cri containers error, close cri discover, will retry")
   260  		}
   261  	}
   262  	if c.enableStaticDiscover {
   263  		c.fetchStatic()
   264  	}
   265  	logger.Info(context.Background(), "final", "param", "docker discover", c.enableDockerDiscover, "cri discover", c.enableCRIDiscover, "static discover", c.enableStaticDiscover)
   266  	return c.enableCRIDiscover || c.enableDockerDiscover || c.enableStaticDiscover
   267  }
   268  
   269  func (c *ContainerDiscoverManager) TimerFetch() {
   270  	timerFetch := func() {
   271  		defer containerCenterRecover()
   272  		lastFetchAllTime := time.Now()
   273  		for {
   274  			time.Sleep(time.Duration(10) * time.Second)
   275  			logger.Debug(context.Background(), "container clean timeout container info", "start")
   276  			containerCenterInstance.cleanTimeoutContainer()
   277  			logger.Debug(context.Background(), "container clean timeout container info", "done")
   278  			if time.Since(lastFetchAllTime) >= FetchAllInterval {
   279  				logger.Info(context.Background(), "container fetch all", "start")
   280  				c.FetchAll()
   281  				lastFetchAllTime = time.Now()
   282  				c.Clean()
   283  				logger.Info(context.Background(), "container fetch all", "end")
   284  			}
   285  
   286  		}
   287  	}
   288  	go timerFetch()
   289  }