github.imxd.top/hashicorp/consul@v1.4.5/agent/proxycfg/manager.go (about)

     1  package proxycfg
     2  
     3  import (
     4  	"errors"
     5  	"log"
     6  	"sync"
     7  
     8  	"github.com/hashicorp/consul/agent/cache"
     9  	"github.com/hashicorp/consul/agent/local"
    10  	"github.com/hashicorp/consul/agent/structs"
    11  )
    12  
    13  var (
    14  	// ErrStopped is returned from Run if the manager instance has already been
    15  	// stopped.
    16  	ErrStopped = errors.New("manager stopped")
    17  
    18  	// ErrStarted is returned from Run if the manager instance has already run.
    19  	ErrStarted = errors.New("manager was already run")
    20  )
    21  
    22  // CancelFunc is a type for a returned function that can be called to cancel a
    23  // watch.
    24  type CancelFunc func()
    25  
    26  // Manager is a component that integrates into the agent and manages Connect
    27  // proxy configuration state. This should not be confused with the deprecated
    28  // "managed proxy" concept where the agent supervises the actual proxy process.
    29  // proxycfg.Manager is oblivious to the distinction and manages state for any
    30  // service registered with Kind == connect-proxy.
    31  //
    32  // The Manager ensures that any Connect proxy registered on the agent has all
    33  // the state it needs cached locally via the agent cache. State includes
    34  // certificates, intentions, and service discovery results for any declared
    35  // upstreams. See package docs for more detail.
    36  type Manager struct {
    37  	ManagerConfig
    38  
    39  	// stateCh is notified for any service changes in local state. We only use
    40  	// this to trigger on _new_ service addition since it has no data and we don't
    41  	// want to maintain a full copy of the state in order to diff and figure out
    42  	// what changed. Luckily each service has it's own WatchCh so we can figure
    43  	// out changes and removals with those efficiently.
    44  	stateCh chan struct{}
    45  
    46  	mu       sync.Mutex
    47  	started  bool
    48  	proxies  map[string]*state
    49  	watchers map[string]map[uint64]chan *ConfigSnapshot
    50  }
    51  
    52  // ManagerConfig holds the required external dependencies for a Manager
    53  // instance. All fields must be set to something valid or the manager will
    54  // panic. The ManagerConfig is passed by value to NewManager so the passed value
    55  // can be mutated safely.
    56  type ManagerConfig struct {
    57  	// Cache is the agent's cache instance that can be used to retrieve, store and
    58  	// monitor state for the proxies.
    59  	Cache *cache.Cache
    60  	// state is the agent's local state to be watched for new proxy registrations.
    61  	State *local.State
    62  	// source describes the current agent's identity, it's used directly for
    63  	// prepared query discovery but also indirectly as a way to pass current
    64  	// Datacenter name into other request types that need it. This is sufficient
    65  	// for now and cleaner than passing the entire RuntimeConfig.
    66  	Source *structs.QuerySource
    67  	// logger is the agent's logger to be used for logging logs.
    68  	Logger *log.Logger
    69  }
    70  
    71  // NewManager constructs a manager from the provided agent cache.
    72  func NewManager(cfg ManagerConfig) (*Manager, error) {
    73  	if cfg.Cache == nil || cfg.State == nil || cfg.Source == nil ||
    74  		cfg.Logger == nil {
    75  		return nil, errors.New("all ManagerConfig fields must be provided")
    76  	}
    77  	m := &Manager{
    78  		ManagerConfig: cfg,
    79  		// Single item buffer is enough since there is no data transferred so this
    80  		// is "level triggering" and we can't miss actual data.
    81  		stateCh:  make(chan struct{}, 1),
    82  		proxies:  make(map[string]*state),
    83  		watchers: make(map[string]map[uint64]chan *ConfigSnapshot),
    84  	}
    85  	return m, nil
    86  }
    87  
    88  // Run is the long-running method that handles state syncing. It should be run
    89  // in it's own goroutine and will continue until a fatal error is hit or Close
    90  // is called. Run will return an error if it is called more than once, or called
    91  // after Close.
    92  func (m *Manager) Run() error {
    93  	m.mu.Lock()
    94  	alreadyStarted := m.started
    95  	m.started = true
    96  	stateCh := m.stateCh
    97  	m.mu.Unlock()
    98  
    99  	// Protect against multiple Run calls.
   100  	if alreadyStarted {
   101  		return ErrStarted
   102  	}
   103  
   104  	// Protect against being run after Close.
   105  	if stateCh == nil {
   106  		return ErrStopped
   107  	}
   108  
   109  	// Register for notifications about state changes
   110  	m.State.Notify(stateCh)
   111  	defer m.State.StopNotify(stateCh)
   112  
   113  	for {
   114  		m.syncState()
   115  
   116  		// Wait for a state change
   117  		_, ok := <-stateCh
   118  		if !ok {
   119  			// Stopped
   120  			return nil
   121  		}
   122  	}
   123  }
   124  
   125  // syncState is called whenever the local state notifies a change. It holds the
   126  // lock while finding any new or updated proxies and removing deleted ones.
   127  func (m *Manager) syncState() {
   128  	m.mu.Lock()
   129  	defer m.mu.Unlock()
   130  
   131  	// Traverse the local state and ensure all proxy services are registered
   132  	services := m.State.Services()
   133  	for svcID, svc := range services {
   134  		if svc.Kind != structs.ServiceKindConnectProxy {
   135  			continue
   136  		}
   137  		// TODO(banks): need to work out when to default some stuff. For example
   138  		// Proxy.LocalServicePort is practically necessary for any sidecar and can
   139  		// default to the port of the sidecar service, but only if it's already
   140  		// registered and once we get past here, we don't have enough context to
   141  		// know that so we'd need to set it here if not during registration of the
   142  		// proxy service. Sidecar Service and managed proxies in the interim can
   143  		// do that, but we should validate more generally that that is always
   144  		// true.
   145  		err := m.ensureProxyServiceLocked(svc, m.State.ServiceToken(svcID))
   146  		if err != nil {
   147  			m.Logger.Printf("[ERR] failed to watch proxy service %s: %s", svc.ID,
   148  				err)
   149  		}
   150  	}
   151  
   152  	// Now see if any proxies were removed
   153  	for proxyID := range m.proxies {
   154  		if _, ok := services[proxyID]; !ok {
   155  			// Remove them
   156  			m.removeProxyServiceLocked(proxyID)
   157  		}
   158  	}
   159  }
   160  
   161  // ensureProxyServiceLocked adds or changes the proxy to our state.
   162  func (m *Manager) ensureProxyServiceLocked(ns *structs.NodeService, token string) error {
   163  	state, ok := m.proxies[ns.ID]
   164  
   165  	if ok {
   166  		if !state.Changed(ns, token) {
   167  			// No change
   168  			return nil
   169  		}
   170  
   171  		// We are updating the proxy, close it's old state
   172  		state.Close()
   173  	}
   174  
   175  	var err error
   176  	state, err = newState(ns, token)
   177  	if err != nil {
   178  		return err
   179  	}
   180  
   181  	// Set the necessary dependencies
   182  	state.logger = m.Logger
   183  	state.cache = m.Cache
   184  	state.source = m.Source
   185  
   186  	ch, err := state.Watch()
   187  	if err != nil {
   188  		return err
   189  	}
   190  	m.proxies[ns.ID] = state
   191  
   192  	// Start a goroutine that will wait for changes and broadcast them to watchers.
   193  	go func(ch <-chan ConfigSnapshot) {
   194  		// Run until ch is closed
   195  		for snap := range ch {
   196  			m.notify(&snap)
   197  		}
   198  	}(ch)
   199  
   200  	return nil
   201  }
   202  
   203  // removeProxyService is called when a service deregisters and frees all
   204  // resources for that service.
   205  func (m *Manager) removeProxyServiceLocked(proxyID string) {
   206  	state, ok := m.proxies[proxyID]
   207  	if !ok {
   208  		return
   209  	}
   210  
   211  	// Closing state will let the goroutine we started in Ensure finish since
   212  	// watch chan is closed.
   213  	state.Close()
   214  	delete(m.proxies, proxyID)
   215  
   216  	// We intentionally leave potential watchers hanging here - there is no new
   217  	// config for them and closing their channels might be indistinguishable from
   218  	// an error that they should retry. We rely for them to eventually give up
   219  	// (because they are in fact not running any more) and so the watches be
   220  	// cleaned up naturally.
   221  }
   222  
   223  func (m *Manager) notify(snap *ConfigSnapshot) {
   224  	m.mu.Lock()
   225  	defer m.mu.Unlock()
   226  
   227  	watchers, ok := m.watchers[snap.ProxyID]
   228  	if !ok {
   229  		return
   230  	}
   231  
   232  	for _, ch := range watchers {
   233  		m.deliverLatest(snap, ch)
   234  	}
   235  }
   236  
   237  // deliverLatest delivers the snapshot to a watch chan. If the delivery blocks,
   238  // it will drain the chan and then re-attempt delivery so that a slow consumer
   239  // gets the latest config earlier. This MUST be called from a method where m.mu
   240  // is held to be safe since it assumes we are the only goroutine sending on ch.
   241  func (m *Manager) deliverLatest(snap *ConfigSnapshot, ch chan *ConfigSnapshot) {
   242  	// Send if chan is empty
   243  	select {
   244  	case ch <- snap:
   245  		return
   246  	default:
   247  	}
   248  
   249  	// Not empty, drain the chan of older snapshots and redeliver. For now we only
   250  	// use 1-buffered chans but this will still work if we change that later.
   251  OUTER:
   252  	for {
   253  		select {
   254  		case <-ch:
   255  			continue
   256  		default:
   257  			break OUTER
   258  		}
   259  	}
   260  
   261  	// Now send again
   262  	select {
   263  	case ch <- snap:
   264  		return
   265  	default:
   266  		// This should not be possible since we should be the only sender, enforced
   267  		// by m.mu but error and drop the update rather than panic.
   268  		m.Logger.Printf("[ERR] proxycfg: failed to deliver ConfigSnapshot to %q",
   269  			snap.ProxyID)
   270  	}
   271  }
   272  
   273  // Watch registers a watch on a proxy. It might not exist yet in which case this
   274  // will not fail, but no updates will be delivered until the proxy is
   275  // registered. If there is already a valid snapshot in memory, it will be
   276  // delivered immediately.
   277  func (m *Manager) Watch(proxyID string) (<-chan *ConfigSnapshot, CancelFunc) {
   278  	m.mu.Lock()
   279  	defer m.mu.Unlock()
   280  
   281  	// This buffering is crucial otherwise we'd block immediately trying to
   282  	// deliver the current snapshot below if we already have one.
   283  	ch := make(chan *ConfigSnapshot, 1)
   284  	watchers, ok := m.watchers[proxyID]
   285  	if !ok {
   286  		watchers = make(map[uint64]chan *ConfigSnapshot)
   287  	}
   288  	idx := uint64(len(watchers))
   289  	watchers[idx] = ch
   290  	m.watchers[proxyID] = watchers
   291  
   292  	// Deliver the current snapshot immediately if there is one ready
   293  	if state, ok := m.proxies[proxyID]; ok {
   294  		if snap := state.CurrentSnapshot(); snap != nil {
   295  			// We rely on ch being buffered above and that it's not been passed
   296  			// anywhere so we must be the only writer so this will never block and
   297  			// deadlock.
   298  			ch <- snap
   299  		}
   300  	}
   301  
   302  	return ch, func() {
   303  		m.mu.Lock()
   304  		defer m.mu.Unlock()
   305  		m.closeWatchLocked(proxyID, idx)
   306  	}
   307  }
   308  
   309  // closeWatchLocked cleans up state related to a single watcher. It assumes the
   310  // lock is held.
   311  func (m *Manager) closeWatchLocked(proxyID string, watchIdx uint64) {
   312  	if watchers, ok := m.watchers[proxyID]; ok {
   313  		if ch, ok := watchers[watchIdx]; ok {
   314  			delete(watchers, watchIdx)
   315  			close(ch)
   316  			if len(watchers) == 0 {
   317  				delete(m.watchers, proxyID)
   318  			}
   319  		}
   320  	}
   321  }
   322  
   323  // Close removes all state and stops all running goroutines.
   324  func (m *Manager) Close() error {
   325  	m.mu.Lock()
   326  	defer m.mu.Unlock()
   327  
   328  	if m.stateCh != nil {
   329  		close(m.stateCh)
   330  		m.stateCh = nil
   331  	}
   332  
   333  	// Close all current watchers first
   334  	for proxyID, watchers := range m.watchers {
   335  		for idx := range watchers {
   336  			m.closeWatchLocked(proxyID, idx)
   337  		}
   338  	}
   339  
   340  	// Then close all states
   341  	for proxyID, state := range m.proxies {
   342  		state.Close()
   343  		delete(m.proxies, proxyID)
   344  	}
   345  	return nil
   346  }