github.com/yankunsam/loki/v2@v2.6.3-0.20220817130409-389df5235c27/clients/pkg/promtail/targets/file/filetargetmanager.go (about)

     1  package file
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"os"
     7  	"strings"
     8  	"sync"
     9  
    10  	"github.com/bmatcuk/doublestar"
    11  	"gopkg.in/fsnotify.v1"
    12  
    13  	"github.com/go-kit/log"
    14  	"github.com/go-kit/log/level"
    15  	"github.com/prometheus/client_golang/prometheus"
    16  	"github.com/prometheus/common/model"
    17  	"github.com/prometheus/prometheus/discovery"
    18  	"github.com/prometheus/prometheus/discovery/kubernetes"
    19  	"github.com/prometheus/prometheus/discovery/targetgroup"
    20  	"github.com/prometheus/prometheus/model/labels"
    21  	"github.com/prometheus/prometheus/model/relabel"
    22  
    23  	"github.com/grafana/loki/clients/pkg/logentry/stages"
    24  	"github.com/grafana/loki/clients/pkg/promtail/api"
    25  	"github.com/grafana/loki/clients/pkg/promtail/positions"
    26  	"github.com/grafana/loki/clients/pkg/promtail/scrapeconfig"
    27  	"github.com/grafana/loki/clients/pkg/promtail/targets/target"
    28  
    29  	"github.com/grafana/loki/pkg/util"
    30  )
    31  
    32  const (
    33  	pathLabel              = "__path__"
    34  	pathExcludeLabel       = "__path_exclude__"
    35  	hostLabel              = "__host__"
    36  	kubernetesPodNodeField = "spec.nodeName"
    37  )
    38  
    39  // FileTargetManager manages a set of targets.
    40  // nolint:revive
    41  type FileTargetManager struct {
    42  	log     log.Logger
    43  	quit    context.CancelFunc
    44  	syncers map[string]*targetSyncer
    45  	manager *discovery.Manager
    46  
    47  	watcher            *fsnotify.Watcher
    48  	targetEventHandler chan fileTargetEvent
    49  
    50  	wg sync.WaitGroup
    51  }
    52  
    53  // NewFileTargetManager creates a new TargetManager.
    54  func NewFileTargetManager(
    55  	metrics *Metrics,
    56  	logger log.Logger,
    57  	positions positions.Positions,
    58  	client api.EntryHandler,
    59  	scrapeConfigs []scrapeconfig.Config,
    60  	targetConfig *Config,
    61  ) (*FileTargetManager, error) {
    62  	reg := metrics.reg
    63  	if reg == nil {
    64  		reg = prometheus.DefaultRegisterer
    65  	}
    66  
    67  	watcher, err := fsnotify.NewWatcher()
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  	ctx, quit := context.WithCancel(context.Background())
    72  	tm := &FileTargetManager{
    73  		log:                logger,
    74  		quit:               quit,
    75  		watcher:            watcher,
    76  		targetEventHandler: make(chan fileTargetEvent),
    77  		syncers:            map[string]*targetSyncer{},
    78  		manager:            discovery.NewManager(ctx, log.With(logger, "component", "discovery")),
    79  	}
    80  
    81  	hostname, err := hostname()
    82  	if err != nil {
    83  		return nil, err
    84  	}
    85  
    86  	configs := map[string]discovery.Configs{}
    87  	for _, cfg := range scrapeConfigs {
    88  		if !cfg.HasServiceDiscoveryConfig() {
    89  			continue
    90  		}
    91  
    92  		pipeline, err := stages.NewPipeline(log.With(logger, "component", "file_pipeline"), cfg.PipelineStages, &cfg.JobName, reg)
    93  		if err != nil {
    94  			return nil, err
    95  		}
    96  
    97  		// Add Source value to the static config target groups for unique identification
    98  		// within scrape pool. Also, default target label to localhost if target is not
    99  		// defined in promtail config.
   100  		// Just to make sure prometheus target group sync works fine.
   101  		for i, tg := range cfg.ServiceDiscoveryConfig.StaticConfigs {
   102  			tg.Source = fmt.Sprintf("%d", i)
   103  			if len(tg.Targets) == 0 {
   104  				tg.Targets = []model.LabelSet{
   105  					{model.AddressLabel: "localhost"},
   106  				}
   107  			}
   108  		}
   109  
   110  		// Add an additional api-level node filtering, so we only fetch pod metadata for
   111  		// all the pods from the current node. Without this filtering we will have to
   112  		// download metadata for all pods running on a cluster, which may be a long operation.
   113  		for _, kube := range cfg.ServiceDiscoveryConfig.KubernetesSDConfigs {
   114  			if kube.Role == kubernetes.RolePod {
   115  				selector := fmt.Sprintf("%s=%s", kubernetesPodNodeField, hostname)
   116  				kube.Selectors = []kubernetes.SelectorConfig{
   117  					{Role: kubernetes.RolePod, Field: selector},
   118  				}
   119  			}
   120  		}
   121  
   122  		s := &targetSyncer{
   123  			metrics:           metrics,
   124  			log:               logger,
   125  			positions:         positions,
   126  			relabelConfig:     cfg.RelabelConfigs,
   127  			targets:           map[string]*FileTarget{},
   128  			droppedTargets:    []target.Target{},
   129  			hostname:          hostname,
   130  			entryHandler:      pipeline.Wrap(client),
   131  			targetConfig:      targetConfig,
   132  			fileEventWatchers: map[string]chan fsnotify.Event{},
   133  			encoding:          cfg.Encoding,
   134  		}
   135  		tm.syncers[cfg.JobName] = s
   136  		configs[cfg.JobName] = cfg.ServiceDiscoveryConfig.Configs()
   137  	}
   138  
   139  	tm.wg.Add(3)
   140  	go tm.run(ctx)
   141  	go tm.watchTargetEvents(ctx)
   142  	go tm.watchFsEvents(ctx)
   143  
   144  	go util.LogError("running target manager", tm.manager.Run)
   145  
   146  	return tm, tm.manager.ApplyConfig(configs)
   147  }
   148  
   149  func (tm *FileTargetManager) watchTargetEvents(ctx context.Context) {
   150  	defer tm.wg.Done()
   151  
   152  	for {
   153  		select {
   154  		case event := <-tm.targetEventHandler:
   155  			switch event.eventType {
   156  			case fileTargetEventWatchStart:
   157  				if err := tm.watcher.Add(event.path); err != nil {
   158  					level.Error(tm.log).Log("msg", "error adding directory to watcher", "error", err)
   159  				}
   160  			case fileTargetEventWatchStop:
   161  				if err := tm.watcher.Remove(event.path); err != nil {
   162  					level.Error(tm.log).Log("msg", " failed to remove directory from watcher", "error", err)
   163  				}
   164  			}
   165  		case <-ctx.Done():
   166  			return
   167  		}
   168  	}
   169  }
   170  
   171  func (tm *FileTargetManager) watchFsEvents(ctx context.Context) {
   172  	defer tm.wg.Done()
   173  
   174  	for {
   175  		select {
   176  		case event := <-tm.watcher.Events:
   177  			// we only care about Create events
   178  			if event.Op == fsnotify.Create {
   179  				level.Info(tm.log).Log("msg", "received file watcher event", "name", event.Name, "op", event.Op.String())
   180  				for _, s := range tm.syncers {
   181  					s.sendFileCreateEvent(event)
   182  				}
   183  			}
   184  		case err := <-tm.watcher.Errors:
   185  			level.Error(tm.log).Log("msg", "error from fswatch", "error", err)
   186  		case <-ctx.Done():
   187  			return
   188  		}
   189  	}
   190  }
   191  
   192  func (tm *FileTargetManager) run(ctx context.Context) {
   193  	defer tm.wg.Done()
   194  
   195  	for {
   196  		select {
   197  		case targetGroups := <-tm.manager.SyncCh():
   198  			for jobName, groups := range targetGroups {
   199  				tm.syncers[jobName].sync(groups, tm.targetEventHandler)
   200  			}
   201  		case <-ctx.Done():
   202  			return
   203  		}
   204  	}
   205  }
   206  
   207  // Ready if there's at least one file target
   208  func (tm *FileTargetManager) Ready() bool {
   209  	for _, s := range tm.syncers {
   210  		if s.ready() {
   211  			return true
   212  		}
   213  	}
   214  	return false
   215  }
   216  
   217  // Stop the TargetManager.
   218  func (tm *FileTargetManager) Stop() {
   219  	tm.quit()
   220  	tm.wg.Wait()
   221  
   222  	for _, s := range tm.syncers {
   223  		s.stop()
   224  	}
   225  	util.LogError("closing watcher", tm.watcher.Close)
   226  	close(tm.targetEventHandler)
   227  }
   228  
   229  // ActiveTargets returns the active targets currently being scraped.
   230  func (tm *FileTargetManager) ActiveTargets() map[string][]target.Target {
   231  	result := map[string][]target.Target{}
   232  	for jobName, syncer := range tm.syncers {
   233  		result[jobName] = append(result[jobName], syncer.ActiveTargets()...)
   234  	}
   235  	return result
   236  }
   237  
   238  // AllTargets returns all targets, active and dropped.
   239  func (tm *FileTargetManager) AllTargets() map[string][]target.Target {
   240  	result := map[string][]target.Target{}
   241  	for jobName, syncer := range tm.syncers {
   242  		result[jobName] = append(result[jobName], syncer.ActiveTargets()...)
   243  		result[jobName] = append(result[jobName], syncer.DroppedTargets()...)
   244  	}
   245  	return result
   246  }
   247  
   248  // targetSyncer sync targets based on service discovery changes.
   249  type targetSyncer struct {
   250  	metrics      *Metrics
   251  	log          log.Logger
   252  	positions    positions.Positions
   253  	entryHandler api.EntryHandler
   254  	hostname     string
   255  
   256  	fileEventWatchers map[string]chan fsnotify.Event
   257  
   258  	droppedTargets []target.Target
   259  	targets        map[string]*FileTarget
   260  	mtx            sync.Mutex
   261  
   262  	relabelConfig []*relabel.Config
   263  	targetConfig  *Config
   264  
   265  	encoding string
   266  }
   267  
   268  // sync synchronize target based on received target groups received by service discovery
   269  func (s *targetSyncer) sync(groups []*targetgroup.Group, targetEventHandler chan fileTargetEvent) {
   270  	s.mtx.Lock()
   271  	defer s.mtx.Unlock()
   272  
   273  	targets := map[string]struct{}{}
   274  	dropped := []target.Target{}
   275  
   276  	for _, group := range groups {
   277  		for _, t := range group.Targets {
   278  			level.Debug(s.log).Log("msg", "new target", "labels", t)
   279  
   280  			discoveredLabels := group.Labels.Merge(t)
   281  			var labelMap = make(map[string]string)
   282  			for k, v := range discoveredLabels.Clone() {
   283  				labelMap[string(k)] = string(v)
   284  			}
   285  
   286  			processedLabels := relabel.Process(labels.FromMap(labelMap), s.relabelConfig...)
   287  
   288  			var labels = make(model.LabelSet)
   289  			for k, v := range processedLabels.Map() {
   290  				labels[model.LabelName(k)] = model.LabelValue(v)
   291  			}
   292  
   293  			// Drop empty targets (drop in relabeling).
   294  			if processedLabels == nil {
   295  				dropped = append(dropped, target.NewDroppedTarget("dropping target, no labels", discoveredLabels))
   296  				level.Debug(s.log).Log("msg", "dropping target, no labels")
   297  				s.metrics.failedTargets.WithLabelValues("empty_labels").Inc()
   298  				continue
   299  			}
   300  
   301  			host, ok := labels[hostLabel]
   302  			if ok && string(host) != s.hostname {
   303  				dropped = append(dropped, target.NewDroppedTarget(fmt.Sprintf("ignoring target, wrong host (labels:%s hostname:%s)", labels.String(), s.hostname), discoveredLabels))
   304  				level.Debug(s.log).Log("msg", "ignoring target, wrong host", "labels", labels.String(), "hostname", s.hostname)
   305  				s.metrics.failedTargets.WithLabelValues("wrong_host").Inc()
   306  				continue
   307  			}
   308  
   309  			path, ok := labels[pathLabel]
   310  			if !ok {
   311  				dropped = append(dropped, target.NewDroppedTarget("no path for target", discoveredLabels))
   312  				level.Info(s.log).Log("msg", "no path for target", "labels", labels.String())
   313  				s.metrics.failedTargets.WithLabelValues("no_path").Inc()
   314  				continue
   315  			}
   316  
   317  			pathExclude := labels[pathExcludeLabel]
   318  
   319  			for k := range labels {
   320  				if strings.HasPrefix(string(k), "__") {
   321  					delete(labels, k)
   322  				}
   323  			}
   324  
   325  			key := fmt.Sprintf("%s:%s", path, labels.String())
   326  			if pathExclude != "" {
   327  				key = fmt.Sprintf("%s:%s", key, pathExclude)
   328  			}
   329  
   330  			targets[key] = struct{}{}
   331  			if _, ok := s.targets[key]; ok {
   332  				dropped = append(dropped, target.NewDroppedTarget("ignoring target, already exists", discoveredLabels))
   333  				level.Debug(s.log).Log("msg", "ignoring target, already exists", "labels", labels.String())
   334  				s.metrics.failedTargets.WithLabelValues("exists").Inc()
   335  				continue
   336  			}
   337  
   338  			level.Info(s.log).Log("msg", "Adding target", "key", key)
   339  
   340  			wkey := string(path)
   341  			watcher, ok := s.fileEventWatchers[wkey]
   342  			if !ok {
   343  				watcher = make(chan fsnotify.Event)
   344  				s.fileEventWatchers[wkey] = watcher
   345  			}
   346  			t, err := s.newTarget(wkey, string(pathExclude), labels, discoveredLabels, watcher, targetEventHandler)
   347  			if err != nil {
   348  				dropped = append(dropped, target.NewDroppedTarget(fmt.Sprintf("Failed to create target: %s", err.Error()), discoveredLabels))
   349  				level.Error(s.log).Log("msg", "Failed to create target", "key", key, "error", err)
   350  				s.metrics.failedTargets.WithLabelValues("error").Inc()
   351  				continue
   352  			}
   353  
   354  			s.metrics.targetsActive.Add(1.)
   355  			s.targets[key] = t
   356  		}
   357  	}
   358  
   359  	for key, target := range s.targets {
   360  		if _, ok := targets[key]; !ok {
   361  			level.Info(s.log).Log("msg", "Removing target", "key", key)
   362  			target.Stop()
   363  			s.metrics.targetsActive.Add(-1.)
   364  			delete(s.targets, key)
   365  
   366  			// close related file event watcher
   367  			k := target.path
   368  			if _, ok := s.fileEventWatchers[k]; ok {
   369  				close(s.fileEventWatchers[k])
   370  				delete(s.fileEventWatchers, k)
   371  			} else {
   372  				level.Warn(s.log).Log("msg", "failed to remove file event watcher", "path", k)
   373  			}
   374  		}
   375  	}
   376  	s.droppedTargets = dropped
   377  }
   378  
   379  // sendFileCreateEvent sends file creation events to only the targets with matched path.
   380  func (s *targetSyncer) sendFileCreateEvent(event fsnotify.Event) {
   381  	// Lock the mutex because other threads are manipulating s.fileEventWatchers which can lead to a deadlock
   382  	// where we send events to channels where nobody is listening anymore
   383  	s.mtx.Lock()
   384  	defer s.mtx.Unlock()
   385  
   386  	for path, watcher := range s.fileEventWatchers {
   387  		matched, err := doublestar.Match(path, event.Name)
   388  		if err != nil {
   389  			level.Error(s.log).Log("msg", "failed to match file", "error", err, "filename", event.Name)
   390  			continue
   391  		}
   392  		if !matched {
   393  			level.Debug(s.log).Log("msg", "new file does not match glob", "filename", event.Name)
   394  			continue
   395  		}
   396  		watcher <- event
   397  	}
   398  }
   399  
   400  func (s *targetSyncer) newTarget(path, pathExclude string, labels model.LabelSet, discoveredLabels model.LabelSet, fileEventWatcher chan fsnotify.Event, targetEventHandler chan fileTargetEvent) (*FileTarget, error) {
   401  	return NewFileTarget(s.metrics, s.log, s.entryHandler, s.positions, path, pathExclude, labels, discoveredLabels, s.targetConfig, fileEventWatcher, targetEventHandler, s.encoding)
   402  }
   403  
   404  func (s *targetSyncer) DroppedTargets() []target.Target {
   405  	s.mtx.Lock()
   406  	defer s.mtx.Unlock()
   407  	return append([]target.Target(nil), s.droppedTargets...)
   408  }
   409  
   410  func (s *targetSyncer) ActiveTargets() []target.Target {
   411  	s.mtx.Lock()
   412  	defer s.mtx.Unlock()
   413  	actives := []target.Target{}
   414  	for _, t := range s.targets {
   415  		actives = append(actives, t)
   416  	}
   417  	return actives
   418  }
   419  
   420  func (s *targetSyncer) ready() bool {
   421  	s.mtx.Lock()
   422  	defer s.mtx.Unlock()
   423  
   424  	for _, target := range s.targets {
   425  		if target.Ready() {
   426  			return true
   427  		}
   428  	}
   429  	return false
   430  }
   431  
   432  func (s *targetSyncer) stop() {
   433  	s.mtx.Lock()
   434  	defer s.mtx.Unlock()
   435  
   436  	for key, target := range s.targets {
   437  		level.Info(s.log).Log("msg", "Removing target", "key", key)
   438  		target.Stop()
   439  		delete(s.targets, key)
   440  	}
   441  
   442  	for key, watcher := range s.fileEventWatchers {
   443  		close(watcher)
   444  		delete(s.fileEventWatchers, key)
   445  	}
   446  	s.entryHandler.Stop()
   447  }
   448  
   449  func hostname() (string, error) {
   450  	hostname := os.Getenv("HOSTNAME")
   451  	if hostname != "" {
   452  		return hostname, nil
   453  	}
   454  
   455  	return os.Hostname()
   456  }