github.com/kjdelisle/consul@v1.4.5/agent/local/state.go (about)

     1  package local
     2  
     3  import (
     4  	"fmt"
     5  	"log"
     6  	"math/rand"
     7  	"reflect"
     8  	"strconv"
     9  	"strings"
    10  	"sync"
    11  	"sync/atomic"
    12  	"time"
    13  
    14  	metrics "github.com/armon/go-metrics"
    15  
    16  	"github.com/hashicorp/consul/acl"
    17  	"github.com/hashicorp/consul/agent/structs"
    18  	"github.com/hashicorp/consul/agent/token"
    19  	"github.com/hashicorp/consul/api"
    20  	"github.com/hashicorp/consul/lib"
    21  	"github.com/hashicorp/consul/types"
    22  	uuid "github.com/hashicorp/go-uuid"
    23  )
    24  
    25  // Config is the configuration for the State.
    26  type Config struct {
    27  	AdvertiseAddr       string
    28  	CheckUpdateInterval time.Duration
    29  	Datacenter          string
    30  	DiscardCheckOutput  bool
    31  	NodeID              types.NodeID
    32  	NodeName            string
    33  	TaggedAddresses     map[string]string
    34  	ProxyBindMinPort    int
    35  	ProxyBindMaxPort    int
    36  }
    37  
    38  // ServiceState describes the state of a service record.
    39  type ServiceState struct {
    40  	// Service is the local copy of the service record.
    41  	Service *structs.NodeService
    42  
    43  	// Token is the ACL to update or delete the service record on the
    44  	// server.
    45  	Token string
    46  
    47  	// InSync contains whether the local state of the service record
    48  	// is in sync with the remote state on the server.
    49  	InSync bool
    50  
    51  	// Deleted is true when the service record has been marked as deleted
    52  	// but has not been removed on the server yet.
    53  	Deleted bool
    54  
    55  	// WatchCh is closed when the service state changes suitable for use in a
    56  	// memdb.WatchSet when watching agent local changes with hash-based blocking.
    57  	WatchCh chan struct{}
    58  }
    59  
    60  // Clone returns a shallow copy of the object. The service record still points
    61  // to the original service record and must not be modified. The WatchCh is also
    62  // still pointing to the original so the clone will be update when the original
    63  // is.
    64  func (s *ServiceState) Clone() *ServiceState {
    65  	s2 := new(ServiceState)
    66  	*s2 = *s
    67  	return s2
    68  }
    69  
    70  // CheckState describes the state of a health check record.
    71  type CheckState struct {
    72  	// Check is the local copy of the health check record.
    73  	Check *structs.HealthCheck
    74  
    75  	// Token is the ACL record to update or delete the health check
    76  	// record on the server.
    77  	Token string
    78  
    79  	// CriticalTime is the last time the health check status went
    80  	// from non-critical to critical. When the health check is not
    81  	// in critical state the value is the zero value.
    82  	CriticalTime time.Time
    83  
    84  	// DeferCheck is used to delay the sync of a health check when
    85  	// only the output has changed. This rate limits changes which
    86  	// do not affect the state of the node and/or service.
    87  	DeferCheck *time.Timer
    88  
    89  	// InSync contains whether the local state of the health check
    90  	// record is in sync with the remote state on the server.
    91  	InSync bool
    92  
    93  	// Deleted is true when the health check record has been marked as
    94  	// deleted but has not been removed on the server yet.
    95  	Deleted bool
    96  }
    97  
    98  // Clone returns a shallow copy of the object. The check record and the
    99  // defer timer still point to the original values and must not be
   100  // modified.
   101  func (c *CheckState) Clone() *CheckState {
   102  	c2 := new(CheckState)
   103  	*c2 = *c
   104  	return c2
   105  }
   106  
   107  // Critical returns true when the health check is in critical state.
   108  func (c *CheckState) Critical() bool {
   109  	return !c.CriticalTime.IsZero()
   110  }
   111  
   112  // CriticalFor returns the amount of time the service has been in critical
   113  // state. Its value is undefined when the service is not in critical state.
   114  func (c *CheckState) CriticalFor() time.Duration {
   115  	return time.Since(c.CriticalTime)
   116  }
   117  
   118  type rpc interface {
   119  	RPC(method string, args interface{}, reply interface{}) error
   120  }
   121  
   122  // ManagedProxy represents the local state for a registered proxy instance.
   123  type ManagedProxy struct {
   124  	Proxy *structs.ConnectManagedProxy
   125  
   126  	// ProxyToken is a special local-only security token that grants the bearer
   127  	// access to the proxy's config as well as allowing it to request certificates
   128  	// on behalf of the target service. Certain connect endpoints will validate
   129  	// against this token and if it matches will then use the target service's
   130  	// registration token to actually authenticate the upstream RPC on behalf of
   131  	// the service. This token is passed securely to the proxy process via ENV
   132  	// vars and should never be exposed any other way. Unmanaged proxies will
   133  	// never see this and need to use service-scoped ACL tokens distributed
   134  	// externally. It is persisted in the local state to allow authenticating
   135  	// running proxies after the agent restarts.
   136  	//
   137  	// TODO(banks): In theory we only need to persist this at all to _validate_
   138  	// which means we could keep only a hash in memory and on disk and only pass
   139  	// the actual token to the process on startup. That would require a bit of
   140  	// refactoring though to have the required interaction with the proxy manager.
   141  	ProxyToken string
   142  
   143  	// WatchCh is a close-only chan that is closed when the proxy is removed or
   144  	// updated.
   145  	WatchCh chan struct{}
   146  }
   147  
   148  // State is used to represent the node's services,
   149  // and checks. We use it to perform anti-entropy with the
   150  // catalog representation
   151  type State struct {
   152  	sync.RWMutex
   153  
   154  	// Delegate the RPC interface to the consul server or agent.
   155  	//
   156  	// It is set after both the state and the consul server/agent have
   157  	// been created.
   158  	Delegate rpc
   159  
   160  	// TriggerSyncChanges is used to notify the state syncer that a
   161  	// partial sync should be performed.
   162  	//
   163  	// It is set after both the state and the state syncer have been
   164  	// created.
   165  	TriggerSyncChanges func()
   166  
   167  	logger *log.Logger
   168  
   169  	// Config is the agent config
   170  	config Config
   171  
   172  	// nodeInfoInSync tracks whether the server has our correct top-level
   173  	// node information in sync
   174  	nodeInfoInSync bool
   175  
   176  	// Services tracks the local services
   177  	services map[string]*ServiceState
   178  
   179  	// Checks tracks the local checks. checkAliases are aliased checks.
   180  	checks       map[types.CheckID]*CheckState
   181  	checkAliases map[string]map[types.CheckID]chan<- struct{}
   182  
   183  	// metadata tracks the node metadata fields
   184  	metadata map[string]string
   185  
   186  	// discardCheckOutput stores whether the output of health checks
   187  	// is stored in the raft log.
   188  	discardCheckOutput atomic.Value // bool
   189  
   190  	// tokens contains the ACL tokens
   191  	tokens *token.Store
   192  
   193  	// notifyHandlers is a map of registered channel listeners that are sent
   194  	// messages whenever state changes occur. For now these events only include
   195  	// service registration and deregistration since that is all that is needed
   196  	// but the same mechanism could be used for other state changes.
   197  	//
   198  	// Note that we haven't refactored managedProxyHandlers into this mechanism
   199  	// yet because that is soon to be deprecated and removed so it's easier to
   200  	// just leave them separate until managed proxies are removed entirely. Any
   201  	// future notifications should re-use this mechanism though.
   202  	notifyHandlers map[chan<- struct{}]struct{}
   203  
   204  	// managedProxies is a map of all managed connect proxies registered locally on
   205  	// this agent. This is NOT kept in sync with servers since it's agent-local
   206  	// config only. Proxy instances have separate service registrations in the
   207  	// services map above which are kept in sync via anti-entropy. Un-managed
   208  	// proxies (that registered themselves separately from the service
   209  	// registration) do not appear here as the agent doesn't need to manage their
   210  	// process nor config. The _do_ still exist in services above though as
   211  	// services with Kind == connect-proxy.
   212  	//
   213  	// managedProxyHandlers is a map of registered channel listeners that
   214  	// are sent a message each time a proxy changes via Add or RemoveProxy.
   215  	managedProxies       map[string]*ManagedProxy
   216  	managedProxyHandlers map[chan<- struct{}]struct{}
   217  }
   218  
   219  // NewState creates a new local state for the agent.
   220  func NewState(c Config, lg *log.Logger, tokens *token.Store) *State {
   221  	l := &State{
   222  		config:               c,
   223  		logger:               lg,
   224  		services:             make(map[string]*ServiceState),
   225  		checks:               make(map[types.CheckID]*CheckState),
   226  		checkAliases:         make(map[string]map[types.CheckID]chan<- struct{}),
   227  		metadata:             make(map[string]string),
   228  		tokens:               tokens,
   229  		notifyHandlers:       make(map[chan<- struct{}]struct{}),
   230  		managedProxies:       make(map[string]*ManagedProxy),
   231  		managedProxyHandlers: make(map[chan<- struct{}]struct{}),
   232  	}
   233  	l.SetDiscardCheckOutput(c.DiscardCheckOutput)
   234  	return l
   235  }
   236  
   237  // SetDiscardCheckOutput configures whether the check output
   238  // is discarded. This can be changed at runtime.
   239  func (l *State) SetDiscardCheckOutput(b bool) {
   240  	l.discardCheckOutput.Store(b)
   241  }
   242  
   243  // ServiceToken returns the configured ACL token for the given
   244  // service ID. If none is present, the agent's token is returned.
   245  func (l *State) ServiceToken(id string) string {
   246  	l.RLock()
   247  	defer l.RUnlock()
   248  	return l.serviceToken(id)
   249  }
   250  
   251  // serviceToken returns an ACL token associated with a service.
   252  // This method is not synchronized and the lock must already be held.
   253  func (l *State) serviceToken(id string) string {
   254  	var token string
   255  	if s := l.services[id]; s != nil {
   256  		token = s.Token
   257  	}
   258  	if token == "" {
   259  		token = l.tokens.UserToken()
   260  	}
   261  	return token
   262  }
   263  
   264  // AddService is used to add a service entry to the local state.
   265  // This entry is persistent and the agent will make a best effort to
   266  // ensure it is registered
   267  func (l *State) AddService(service *structs.NodeService, token string) error {
   268  	l.Lock()
   269  	defer l.Unlock()
   270  	return l.addServiceLocked(service, token)
   271  }
   272  
   273  func (l *State) addServiceLocked(service *structs.NodeService, token string) error {
   274  	if service == nil {
   275  		return fmt.Errorf("no service")
   276  	}
   277  
   278  	// use the service name as id if the id was omitted
   279  	if service.ID == "" {
   280  		service.ID = service.Service
   281  	}
   282  
   283  	l.setServiceStateLocked(&ServiceState{
   284  		Service: service,
   285  		Token:   token,
   286  	})
   287  	return nil
   288  }
   289  
   290  // AddServiceWithChecks adds a service and its check tp the local state atomically
   291  func (l *State) AddServiceWithChecks(service *structs.NodeService, checks []*structs.HealthCheck, token string) error {
   292  	l.Lock()
   293  	defer l.Unlock()
   294  
   295  	if err := l.addServiceLocked(service, token); err != nil {
   296  		return err
   297  	}
   298  
   299  	for _, check := range checks {
   300  		if err := l.addCheckLocked(check, token); err != nil {
   301  			return err
   302  		}
   303  	}
   304  
   305  	return nil
   306  }
   307  
   308  // RemoveService is used to remove a service entry from the local state.
   309  // The agent will make a best effort to ensure it is deregistered.
   310  func (l *State) RemoveService(id string) error {
   311  	l.Lock()
   312  	defer l.Unlock()
   313  	return l.removeServiceLocked(id)
   314  }
   315  
   316  // RemoveServiceWithChecks removes a service and its check from the local state atomically
   317  func (l *State) RemoveServiceWithChecks(serviceID string, checkIDs []types.CheckID) error {
   318  	l.Lock()
   319  	defer l.Unlock()
   320  
   321  	if err := l.removeServiceLocked(serviceID); err != nil {
   322  		return err
   323  	}
   324  
   325  	for _, id := range checkIDs {
   326  		if err := l.removeCheckLocked(id); err != nil {
   327  			return err
   328  		}
   329  	}
   330  
   331  	return nil
   332  }
   333  
   334  func (l *State) removeServiceLocked(id string) error {
   335  
   336  	s := l.services[id]
   337  	if s == nil || s.Deleted {
   338  		return fmt.Errorf("Service %q does not exist", id)
   339  	}
   340  
   341  	// To remove the service on the server we need the token.
   342  	// Therefore, we mark the service as deleted and keep the
   343  	// entry around until it is actually removed.
   344  	s.InSync = false
   345  	s.Deleted = true
   346  	if s.WatchCh != nil {
   347  		close(s.WatchCh)
   348  		s.WatchCh = nil
   349  	}
   350  	l.TriggerSyncChanges()
   351  	l.broadcastUpdateLocked()
   352  
   353  	return nil
   354  }
   355  
   356  // Service returns the locally registered service that the
   357  // agent is aware of and are being kept in sync with the server
   358  func (l *State) Service(id string) *structs.NodeService {
   359  	l.RLock()
   360  	defer l.RUnlock()
   361  
   362  	s := l.services[id]
   363  	if s == nil || s.Deleted {
   364  		return nil
   365  	}
   366  	return s.Service
   367  }
   368  
   369  // Services returns the locally registered services that the
   370  // agent is aware of and are being kept in sync with the server
   371  func (l *State) Services() map[string]*structs.NodeService {
   372  	l.RLock()
   373  	defer l.RUnlock()
   374  
   375  	m := make(map[string]*structs.NodeService)
   376  	for id, s := range l.services {
   377  		if s.Deleted {
   378  			continue
   379  		}
   380  		m[id] = s.Service
   381  	}
   382  	return m
   383  }
   384  
   385  // ServiceState returns a shallow copy of the current service state record. The
   386  // service record still points to the original service record and must not be
   387  // modified. The WatchCh for the copy returned will also be closed when the
   388  // actual service state is changed.
   389  func (l *State) ServiceState(id string) *ServiceState {
   390  	l.RLock()
   391  	defer l.RUnlock()
   392  
   393  	s := l.services[id]
   394  	if s == nil || s.Deleted {
   395  		return nil
   396  	}
   397  	return s.Clone()
   398  }
   399  
   400  // SetServiceState is used to overwrite a raw service state with the given
   401  // state. This method is safe to be called concurrently but should only be used
   402  // during testing. You should most likely call AddService instead.
   403  func (l *State) SetServiceState(s *ServiceState) {
   404  	l.Lock()
   405  	defer l.Unlock()
   406  
   407  	l.setServiceStateLocked(s)
   408  }
   409  
   410  func (l *State) setServiceStateLocked(s *ServiceState) {
   411  	s.WatchCh = make(chan struct{})
   412  
   413  	old, hasOld := l.services[s.Service.ID]
   414  	l.services[s.Service.ID] = s
   415  
   416  	if hasOld && old.WatchCh != nil {
   417  		close(old.WatchCh)
   418  	}
   419  
   420  	l.TriggerSyncChanges()
   421  	l.broadcastUpdateLocked()
   422  }
   423  
   424  // ServiceStates returns a shallow copy of all service state records.
   425  // The service record still points to the original service record and
   426  // must not be modified.
   427  func (l *State) ServiceStates() map[string]*ServiceState {
   428  	l.RLock()
   429  	defer l.RUnlock()
   430  
   431  	m := make(map[string]*ServiceState)
   432  	for id, s := range l.services {
   433  		if s.Deleted {
   434  			continue
   435  		}
   436  		m[id] = s.Clone()
   437  	}
   438  	return m
   439  }
   440  
   441  // CheckToken is used to return the configured health check token for a
   442  // Check, or if none is configured, the default agent ACL token.
   443  func (l *State) CheckToken(checkID types.CheckID) string {
   444  	l.RLock()
   445  	defer l.RUnlock()
   446  	return l.checkToken(checkID)
   447  }
   448  
   449  // checkToken returns an ACL token associated with a check.
   450  // This method is not synchronized and the lock must already be held.
   451  func (l *State) checkToken(id types.CheckID) string {
   452  	var token string
   453  	c := l.checks[id]
   454  	if c != nil {
   455  		token = c.Token
   456  	}
   457  	if token == "" {
   458  		token = l.tokens.UserToken()
   459  	}
   460  	return token
   461  }
   462  
   463  // AddCheck is used to add a health check to the local state.
   464  // This entry is persistent and the agent will make a best effort to
   465  // ensure it is registered
   466  func (l *State) AddCheck(check *structs.HealthCheck, token string) error {
   467  	l.Lock()
   468  	defer l.Unlock()
   469  
   470  	return l.addCheckLocked(check, token)
   471  }
   472  
   473  func (l *State) addCheckLocked(check *structs.HealthCheck, token string) error {
   474  	if check == nil {
   475  		return fmt.Errorf("no check")
   476  	}
   477  
   478  	// clone the check since we will be modifying it.
   479  	check = check.Clone()
   480  
   481  	if l.discardCheckOutput.Load().(bool) {
   482  		check.Output = ""
   483  	}
   484  
   485  	// if there is a serviceID associated with the check, make sure it exists before adding it
   486  	// NOTE - This logic may be moved to be handled within the Agent's Addcheck method after a refactor
   487  	if _, ok := l.services[check.ServiceID]; check.ServiceID != "" && !ok {
   488  		return fmt.Errorf("Check %q refers to non-existent service %q", check.CheckID, check.ServiceID)
   489  	}
   490  
   491  	// hard-set the node name
   492  	check.Node = l.config.NodeName
   493  
   494  	l.setCheckStateLocked(&CheckState{
   495  		Check: check,
   496  		Token: token,
   497  	})
   498  	return nil
   499  }
   500  
   501  // AddAliasCheck creates an alias check. When any check for the srcServiceID is
   502  // changed, checkID will reflect that using the same semantics as
   503  // checks.CheckAlias.
   504  //
   505  // This is a local optimization so that the Alias check doesn't need to use
   506  // blocking queries against the remote server for check updates for local
   507  // services.
   508  func (l *State) AddAliasCheck(checkID types.CheckID, srcServiceID string, notifyCh chan<- struct{}) error {
   509  	l.Lock()
   510  	defer l.Unlock()
   511  
   512  	m, ok := l.checkAliases[srcServiceID]
   513  	if !ok {
   514  		m = make(map[types.CheckID]chan<- struct{})
   515  		l.checkAliases[srcServiceID] = m
   516  	}
   517  	m[checkID] = notifyCh
   518  
   519  	return nil
   520  }
   521  
   522  // RemoveAliasCheck removes the mapping for the alias check.
   523  func (l *State) RemoveAliasCheck(checkID types.CheckID, srcServiceID string) {
   524  	l.Lock()
   525  	defer l.Unlock()
   526  
   527  	if m, ok := l.checkAliases[srcServiceID]; ok {
   528  		delete(m, checkID)
   529  		if len(m) == 0 {
   530  			delete(l.checkAliases, srcServiceID)
   531  		}
   532  	}
   533  }
   534  
   535  // RemoveCheck is used to remove a health check from the local state.
   536  // The agent will make a best effort to ensure it is deregistered
   537  // todo(fs): RemoveService returns an error for a non-existent service. RemoveCheck should as well.
   538  // todo(fs): Check code that calls this to handle the error.
   539  func (l *State) RemoveCheck(id types.CheckID) error {
   540  	l.Lock()
   541  	defer l.Unlock()
   542  	return l.removeCheckLocked(id)
   543  }
   544  
   545  func (l *State) removeCheckLocked(id types.CheckID) error {
   546  	c := l.checks[id]
   547  	if c == nil || c.Deleted {
   548  		return fmt.Errorf("Check %q does not exist", id)
   549  	}
   550  
   551  	// To remove the check on the server we need the token.
   552  	// Therefore, we mark the service as deleted and keep the
   553  	// entry around until it is actually removed.
   554  	c.InSync = false
   555  	c.Deleted = true
   556  	l.TriggerSyncChanges()
   557  
   558  	return nil
   559  }
   560  
   561  // UpdateCheck is used to update the status of a check
   562  func (l *State) UpdateCheck(id types.CheckID, status, output string) {
   563  	l.Lock()
   564  	defer l.Unlock()
   565  
   566  	c := l.checks[id]
   567  	if c == nil || c.Deleted {
   568  		return
   569  	}
   570  
   571  	if l.discardCheckOutput.Load().(bool) {
   572  		output = ""
   573  	}
   574  
   575  	// Update the critical time tracking (this doesn't cause a server updates
   576  	// so we can always keep this up to date).
   577  	if status == api.HealthCritical {
   578  		if !c.Critical() {
   579  			c.CriticalTime = time.Now()
   580  		}
   581  	} else {
   582  		c.CriticalTime = time.Time{}
   583  	}
   584  
   585  	// Do nothing if update is idempotent
   586  	if c.Check.Status == status && c.Check.Output == output {
   587  		return
   588  	}
   589  
   590  	// Defer a sync if the output has changed. This is an optimization around
   591  	// frequent updates of output. Instead, we update the output internally,
   592  	// and periodically do a write-back to the servers. If there is a status
   593  	// change we do the write immediately.
   594  	if l.config.CheckUpdateInterval > 0 && c.Check.Status == status {
   595  		c.Check.Output = output
   596  		if c.DeferCheck == nil {
   597  			d := l.config.CheckUpdateInterval
   598  			intv := time.Duration(uint64(d)/2) + lib.RandomStagger(d)
   599  			c.DeferCheck = time.AfterFunc(intv, func() {
   600  				l.Lock()
   601  				defer l.Unlock()
   602  
   603  				c := l.checks[id]
   604  				if c == nil {
   605  					return
   606  				}
   607  				c.DeferCheck = nil
   608  				if c.Deleted {
   609  					return
   610  				}
   611  				c.InSync = false
   612  				l.TriggerSyncChanges()
   613  			})
   614  		}
   615  		return
   616  	}
   617  
   618  	// If this is a check for an aliased service, then notify the waiters.
   619  	if aliases, ok := l.checkAliases[c.Check.ServiceID]; ok && len(aliases) > 0 {
   620  		for _, notifyCh := range aliases {
   621  			// Do not block. All notify channels should be buffered to at
   622  			// least 1 in which case not-blocking does not result in loss
   623  			// of data because a failed send means a notification is
   624  			// already queued. This must be called with the lock held.
   625  			select {
   626  			case notifyCh <- struct{}{}:
   627  			default:
   628  			}
   629  		}
   630  	}
   631  
   632  	// Update status and mark out of sync
   633  	c.Check.Status = status
   634  	c.Check.Output = output
   635  	c.InSync = false
   636  	l.TriggerSyncChanges()
   637  }
   638  
   639  // Check returns the locally registered check that the
   640  // agent is aware of and are being kept in sync with the server
   641  func (l *State) Check(id types.CheckID) *structs.HealthCheck {
   642  	l.RLock()
   643  	defer l.RUnlock()
   644  
   645  	c := l.checks[id]
   646  	if c == nil || c.Deleted {
   647  		return nil
   648  	}
   649  	return c.Check
   650  }
   651  
   652  // Checks returns the locally registered checks that the
   653  // agent is aware of and are being kept in sync with the server
   654  func (l *State) Checks() map[types.CheckID]*structs.HealthCheck {
   655  	m := make(map[types.CheckID]*structs.HealthCheck)
   656  	for id, c := range l.CheckStates() {
   657  		m[id] = c.Check
   658  	}
   659  	return m
   660  }
   661  
   662  // CheckState returns a shallow copy of the current health check state
   663  // record. The health check record and the deferred check still point to
   664  // the original values and must not be modified.
   665  func (l *State) CheckState(id types.CheckID) *CheckState {
   666  	l.RLock()
   667  	defer l.RUnlock()
   668  
   669  	c := l.checks[id]
   670  	if c == nil || c.Deleted {
   671  		return nil
   672  	}
   673  	return c.Clone()
   674  }
   675  
   676  // SetCheckState is used to overwrite a raw check state with the given
   677  // state. This method is safe to be called concurrently but should only be used
   678  // during testing. You should most likely call AddCheck instead.
   679  func (l *State) SetCheckState(c *CheckState) {
   680  	l.Lock()
   681  	defer l.Unlock()
   682  
   683  	l.setCheckStateLocked(c)
   684  }
   685  
   686  func (l *State) setCheckStateLocked(c *CheckState) {
   687  	l.checks[c.Check.CheckID] = c
   688  	l.TriggerSyncChanges()
   689  }
   690  
   691  // CheckStates returns a shallow copy of all health check state records.
   692  // The health check records and the deferred checks still point to
   693  // the original values and must not be modified.
   694  func (l *State) CheckStates() map[types.CheckID]*CheckState {
   695  	l.RLock()
   696  	defer l.RUnlock()
   697  
   698  	m := make(map[types.CheckID]*CheckState)
   699  	for id, c := range l.checks {
   700  		if c.Deleted {
   701  			continue
   702  		}
   703  		m[id] = c.Clone()
   704  	}
   705  	return m
   706  }
   707  
   708  // CriticalCheckStates returns the locally registered checks that the
   709  // agent is aware of and are being kept in sync with the server.
   710  // The map contains a shallow copy of the current check states but
   711  // references to the actual check definition which must not be
   712  // modified.
   713  func (l *State) CriticalCheckStates() map[types.CheckID]*CheckState {
   714  	l.RLock()
   715  	defer l.RUnlock()
   716  
   717  	m := make(map[types.CheckID]*CheckState)
   718  	for id, c := range l.checks {
   719  		if c.Deleted || !c.Critical() {
   720  			continue
   721  		}
   722  		m[id] = c.Clone()
   723  	}
   724  	return m
   725  }
   726  
   727  // AddProxy is used to add a connect proxy entry to the local state. This
   728  // assumes the proxy's NodeService is already registered via Agent.AddService
   729  // (since that has to do other book keeping). The token passed here is the ACL
   730  // token the service used to register itself so must have write on service
   731  // record. AddProxy returns the newly added proxy and an error.
   732  //
   733  // The restoredProxyToken argument should only be used when restoring proxy
   734  // definitions from disk; new proxies must leave it blank to get a new token
   735  // assigned. We need to restore from disk to enable to continue authenticating
   736  // running proxies that already had that credential injected.
   737  func (l *State) AddProxy(proxy *structs.ConnectManagedProxy, token,
   738  	restoredProxyToken string) (*ManagedProxy, error) {
   739  	if proxy == nil {
   740  		return nil, fmt.Errorf("no proxy")
   741  	}
   742  
   743  	// Lookup the local service
   744  	target := l.Service(proxy.TargetServiceID)
   745  	if target == nil {
   746  		return nil, fmt.Errorf("target service ID %s not registered",
   747  			proxy.TargetServiceID)
   748  	}
   749  
   750  	// Get bind info from config
   751  	cfg, err := proxy.ParseConfig()
   752  	if err != nil {
   753  		return nil, err
   754  	}
   755  
   756  	// Construct almost all of the NodeService that needs to be registered by the
   757  	// caller outside of the lock.
   758  	svc := &structs.NodeService{
   759  		Kind:    structs.ServiceKindConnectProxy,
   760  		ID:      target.ID + "-proxy",
   761  		Service: target.Service + "-proxy",
   762  		Proxy: structs.ConnectProxyConfig{
   763  			DestinationServiceName: target.Service,
   764  			LocalServiceAddress:    cfg.LocalServiceAddress,
   765  			LocalServicePort:       cfg.LocalServicePort,
   766  		},
   767  		Address: cfg.BindAddress,
   768  		Port:    cfg.BindPort,
   769  	}
   770  
   771  	// Set default port now while the target is known
   772  	if svc.Proxy.LocalServicePort < 1 {
   773  		svc.Proxy.LocalServicePort = target.Port
   774  	}
   775  
   776  	// Lock now. We can't lock earlier as l.Service would deadlock and shouldn't
   777  	// anyway to minimize the critical section.
   778  	l.Lock()
   779  	defer l.Unlock()
   780  
   781  	pToken := restoredProxyToken
   782  
   783  	// Does this proxy instance already exist?
   784  	if existing, ok := l.managedProxies[svc.ID]; ok {
   785  		// Keep the existing proxy token so we don't have to restart proxy to
   786  		// re-inject token.
   787  		pToken = existing.ProxyToken
   788  		// If the user didn't explicitly change the port, use the old one instead of
   789  		// assigning new.
   790  		if svc.Port < 1 {
   791  			svc.Port = existing.Proxy.ProxyService.Port
   792  		}
   793  	} else if proxyService, ok := l.services[svc.ID]; ok {
   794  		// The proxy-service already exists so keep the port that got assigned. This
   795  		// happens on reload from disk since service definitions are reloaded first.
   796  		svc.Port = proxyService.Service.Port
   797  	}
   798  
   799  	// If this is a new instance, generate a token
   800  	if pToken == "" {
   801  		pToken, err = uuid.GenerateUUID()
   802  		if err != nil {
   803  			return nil, err
   804  		}
   805  	}
   806  
   807  	// Allocate port if needed (min and max inclusive).
   808  	rangeLen := l.config.ProxyBindMaxPort - l.config.ProxyBindMinPort + 1
   809  	if svc.Port < 1 && l.config.ProxyBindMinPort > 0 && rangeLen > 0 {
   810  		// This should be a really short list so don't bother optimizing lookup yet.
   811  	OUTER:
   812  		for _, offset := range rand.Perm(rangeLen) {
   813  			p := l.config.ProxyBindMinPort + offset
   814  			// See if this port was already allocated to another proxy
   815  			for _, other := range l.managedProxies {
   816  				if other.Proxy.ProxyService.Port == p {
   817  					// already taken, skip to next random pick in the range
   818  					continue OUTER
   819  				}
   820  			}
   821  			// We made it through all existing proxies without a match so claim this one
   822  			svc.Port = p
   823  			break
   824  		}
   825  	}
   826  	// If no ports left (or auto ports disabled) fail
   827  	if svc.Port < 1 {
   828  		return nil, fmt.Errorf("no port provided for proxy bind_port and none "+
   829  			" left in the allocated range [%d, %d]", l.config.ProxyBindMinPort,
   830  			l.config.ProxyBindMaxPort)
   831  	}
   832  
   833  	proxy.ProxyService = svc
   834  
   835  	// All set, add the proxy and return the service
   836  	if old, ok := l.managedProxies[svc.ID]; ok {
   837  		// Notify watchers of the existing proxy config that it's changing. Note
   838  		// this is safe here even before the map is updated since we still hold the
   839  		// state lock and the watcher can't re-read the new config until we return
   840  		// anyway.
   841  		close(old.WatchCh)
   842  	}
   843  	l.managedProxies[svc.ID] = &ManagedProxy{
   844  		Proxy:      proxy,
   845  		ProxyToken: pToken,
   846  		WatchCh:    make(chan struct{}),
   847  	}
   848  
   849  	// Notify
   850  	for ch := range l.managedProxyHandlers {
   851  		// Do not block
   852  		select {
   853  		case ch <- struct{}{}:
   854  		default:
   855  		}
   856  	}
   857  
   858  	// No need to trigger sync as proxy state is local only.
   859  	return l.managedProxies[svc.ID], nil
   860  }
   861  
   862  // RemoveProxy is used to remove a proxy entry from the local state.
   863  // This returns the proxy that was removed.
   864  func (l *State) RemoveProxy(id string) (*ManagedProxy, error) {
   865  	l.Lock()
   866  	defer l.Unlock()
   867  
   868  	p := l.managedProxies[id]
   869  	if p == nil {
   870  		return nil, fmt.Errorf("Proxy %s does not exist", id)
   871  	}
   872  	delete(l.managedProxies, id)
   873  
   874  	// Notify watchers of the existing proxy config that it's changed.
   875  	close(p.WatchCh)
   876  
   877  	// Notify
   878  	for ch := range l.managedProxyHandlers {
   879  		// Do not block
   880  		select {
   881  		case ch <- struct{}{}:
   882  		default:
   883  		}
   884  	}
   885  
   886  	// No need to trigger sync as proxy state is local only.
   887  	return p, nil
   888  }
   889  
   890  // Proxy returns the local proxy state.
   891  func (l *State) Proxy(id string) *ManagedProxy {
   892  	l.RLock()
   893  	defer l.RUnlock()
   894  	return l.managedProxies[id]
   895  }
   896  
   897  // Proxies returns the locally registered proxies.
   898  func (l *State) Proxies() map[string]*ManagedProxy {
   899  	l.RLock()
   900  	defer l.RUnlock()
   901  
   902  	m := make(map[string]*ManagedProxy)
   903  	for id, p := range l.managedProxies {
   904  		m[id] = p
   905  	}
   906  	return m
   907  }
   908  
   909  // broadcastUpdateLocked assumes l is locked and delivers an update to all
   910  // registered watchers.
   911  func (l *State) broadcastUpdateLocked() {
   912  	for ch := range l.notifyHandlers {
   913  		// Do not block
   914  		select {
   915  		case ch <- struct{}{}:
   916  		default:
   917  		}
   918  	}
   919  }
   920  
   921  // Notify will register a channel to receive messages when the local state
   922  // changes. Only service add/remove are supported for now. See notes on
   923  // l.notifyHandlers for more details.
   924  //
   925  // This will not block on channel send so ensure the channel has a buffer. Note
   926  // that any buffer size is generally fine since actual data is not sent over the
   927  // channel, so a dropped send due to a full buffer does not result in any loss
   928  // of data. The fact that a buffer already contains a notification means that
   929  // the receiver will still be notified that changes occurred.
   930  func (l *State) Notify(ch chan<- struct{}) {
   931  	l.Lock()
   932  	defer l.Unlock()
   933  	l.notifyHandlers[ch] = struct{}{}
   934  }
   935  
   936  // StopNotify will deregister a channel receiving state change notifications.
   937  // Pair this with all calls to Notify to clean up state.
   938  func (l *State) StopNotify(ch chan<- struct{}) {
   939  	l.Lock()
   940  	defer l.Unlock()
   941  	delete(l.notifyHandlers, ch)
   942  }
   943  
   944  // NotifyProxy will register a channel to receive messages when the
   945  // configuration or set of proxies changes. This will not block on
   946  // channel send so ensure the channel has a buffer. Note that any buffer
   947  // size is generally fine since actual data is not sent over the channel,
   948  // so a dropped send due to a full buffer does not result in any loss of
   949  // data. The fact that a buffer already contains a notification means that
   950  // the receiver will still be notified that changes occurred.
   951  //
   952  // NOTE(mitchellh): This could be more generalized but for my use case I
   953  // only needed proxy events. In the future if it were to be generalized I
   954  // would add a new Notify method and remove the proxy-specific ones.
   955  func (l *State) NotifyProxy(ch chan<- struct{}) {
   956  	l.Lock()
   957  	defer l.Unlock()
   958  	l.managedProxyHandlers[ch] = struct{}{}
   959  }
   960  
   961  // StopNotifyProxy will deregister a channel receiving proxy notifications.
   962  // Pair this with all calls to NotifyProxy to clean up state.
   963  func (l *State) StopNotifyProxy(ch chan<- struct{}) {
   964  	l.Lock()
   965  	defer l.Unlock()
   966  	delete(l.managedProxyHandlers, ch)
   967  }
   968  
   969  // Metadata returns the local node metadata fields that the
   970  // agent is aware of and are being kept in sync with the server
   971  func (l *State) Metadata() map[string]string {
   972  	l.RLock()
   973  	defer l.RUnlock()
   974  
   975  	m := make(map[string]string)
   976  	for k, v := range l.metadata {
   977  		m[k] = v
   978  	}
   979  	return m
   980  }
   981  
   982  // LoadMetadata loads node metadata fields from the agent config and
   983  // updates them on the local agent.
   984  func (l *State) LoadMetadata(data map[string]string) error {
   985  	l.Lock()
   986  	defer l.Unlock()
   987  
   988  	for k, v := range data {
   989  		l.metadata[k] = v
   990  	}
   991  	l.TriggerSyncChanges()
   992  	return nil
   993  }
   994  
   995  // UnloadMetadata resets the local metadata state
   996  func (l *State) UnloadMetadata() {
   997  	l.Lock()
   998  	defer l.Unlock()
   999  	l.metadata = make(map[string]string)
  1000  }
  1001  
  1002  // Stats is used to get various debugging state from the sub-systems
  1003  func (l *State) Stats() map[string]string {
  1004  	l.RLock()
  1005  	defer l.RUnlock()
  1006  
  1007  	services := 0
  1008  	for _, s := range l.services {
  1009  		if s.Deleted {
  1010  			continue
  1011  		}
  1012  		services++
  1013  	}
  1014  
  1015  	checks := 0
  1016  	for _, c := range l.checks {
  1017  		if c.Deleted {
  1018  			continue
  1019  		}
  1020  		checks++
  1021  	}
  1022  
  1023  	return map[string]string{
  1024  		"services": strconv.Itoa(services),
  1025  		"checks":   strconv.Itoa(checks),
  1026  	}
  1027  }
  1028  
  1029  // updateSyncState does a read of the server state, and updates
  1030  // the local sync status as appropriate
  1031  func (l *State) updateSyncState() error {
  1032  	// Get all checks and services from the master
  1033  	req := structs.NodeSpecificRequest{
  1034  		Datacenter:   l.config.Datacenter,
  1035  		Node:         l.config.NodeName,
  1036  		QueryOptions: structs.QueryOptions{Token: l.tokens.AgentToken()},
  1037  	}
  1038  
  1039  	var out1 structs.IndexedNodeServices
  1040  	if err := l.Delegate.RPC("Catalog.NodeServices", &req, &out1); err != nil {
  1041  		return err
  1042  	}
  1043  
  1044  	var out2 structs.IndexedHealthChecks
  1045  	if err := l.Delegate.RPC("Health.NodeChecks", &req, &out2); err != nil {
  1046  		return err
  1047  	}
  1048  
  1049  	// Create useful data structures for traversal
  1050  	remoteServices := make(map[string]*structs.NodeService)
  1051  	if out1.NodeServices != nil {
  1052  		remoteServices = out1.NodeServices.Services
  1053  	}
  1054  
  1055  	remoteChecks := make(map[types.CheckID]*structs.HealthCheck, len(out2.HealthChecks))
  1056  	for _, rc := range out2.HealthChecks {
  1057  		remoteChecks[rc.CheckID] = rc
  1058  	}
  1059  
  1060  	// Traverse all checks, services and the node info to determine
  1061  	// which entries need to be updated on or removed from the server
  1062  
  1063  	l.Lock()
  1064  	defer l.Unlock()
  1065  
  1066  	// Check if node info needs syncing
  1067  	if out1.NodeServices == nil || out1.NodeServices.Node == nil ||
  1068  		out1.NodeServices.Node.ID != l.config.NodeID ||
  1069  		!reflect.DeepEqual(out1.NodeServices.Node.TaggedAddresses, l.config.TaggedAddresses) ||
  1070  		!reflect.DeepEqual(out1.NodeServices.Node.Meta, l.metadata) {
  1071  		l.nodeInfoInSync = false
  1072  	}
  1073  
  1074  	// Check which services need syncing
  1075  
  1076  	// Look for local services that do not exist remotely and mark them for
  1077  	// syncing so that they will be pushed to the server later
  1078  	for id, s := range l.services {
  1079  		if remoteServices[id] == nil {
  1080  			s.InSync = false
  1081  		}
  1082  	}
  1083  
  1084  	// Traverse the list of services from the server.
  1085  	// Remote services which do not exist locally have been deregistered.
  1086  	// Otherwise, check whether the two definitions are still in sync.
  1087  	for id, rs := range remoteServices {
  1088  		ls := l.services[id]
  1089  		if ls == nil {
  1090  			// The consul service is managed automatically and does
  1091  			// not need to be deregistered
  1092  			if id == structs.ConsulServiceID {
  1093  				continue
  1094  			}
  1095  
  1096  			// Mark a remote service that does not exist locally as deleted so
  1097  			// that it will be removed on the server later.
  1098  			l.services[id] = &ServiceState{Deleted: true}
  1099  			continue
  1100  		}
  1101  
  1102  		// If the service is already scheduled for removal skip it
  1103  		if ls.Deleted {
  1104  			continue
  1105  		}
  1106  
  1107  		// If our definition is different, we need to update it. Make a
  1108  		// copy so that we don't retain a pointer to any actual state
  1109  		// store info for in-memory RPCs.
  1110  		if ls.Service.EnableTagOverride {
  1111  			ls.Service.Tags = make([]string, len(rs.Tags))
  1112  			copy(ls.Service.Tags, rs.Tags)
  1113  		}
  1114  		ls.InSync = ls.Service.IsSame(rs)
  1115  	}
  1116  
  1117  	// Check which checks need syncing
  1118  
  1119  	// Look for local checks that do not exist remotely and mark them for
  1120  	// syncing so that they will be pushed to the server later
  1121  	for id, c := range l.checks {
  1122  		if remoteChecks[id] == nil {
  1123  			c.InSync = false
  1124  		}
  1125  	}
  1126  
  1127  	// Traverse the list of checks from the server.
  1128  	// Remote checks which do not exist locally have been deregistered.
  1129  	// Otherwise, check whether the two definitions are still in sync.
  1130  	for id, rc := range remoteChecks {
  1131  		lc := l.checks[id]
  1132  
  1133  		if lc == nil {
  1134  			// The Serf check is created automatically and does not
  1135  			// need to be deregistered.
  1136  			if id == structs.SerfCheckID {
  1137  				l.logger.Printf("[DEBUG] agent: Skipping remote check %q since it is managed automatically", id)
  1138  				continue
  1139  			}
  1140  
  1141  			// Mark a remote check that does not exist locally as deleted so
  1142  			// that it will be removed on the server later.
  1143  			l.checks[id] = &CheckState{Deleted: true}
  1144  			continue
  1145  		}
  1146  
  1147  		// If the check is already scheduled for removal skip it.
  1148  		if lc.Deleted {
  1149  			continue
  1150  		}
  1151  
  1152  		// If our definition is different, we need to update it
  1153  		if l.config.CheckUpdateInterval == 0 {
  1154  			lc.InSync = lc.Check.IsSame(rc)
  1155  			continue
  1156  		}
  1157  
  1158  		// Copy the existing check before potentially modifying
  1159  		// it before the compare operation.
  1160  		lcCopy := lc.Check.Clone()
  1161  
  1162  		// Copy the server's check before modifying, otherwise
  1163  		// in-memory RPCs will have side effects.
  1164  		rcCopy := rc.Clone()
  1165  
  1166  		// If there's a defer timer active then we've got a
  1167  		// potentially spammy check so we don't sync the output
  1168  		// during this sweep since the timer will mark the check
  1169  		// out of sync for us. Otherwise, it is safe to sync the
  1170  		// output now. This is especially important for checks
  1171  		// that don't change state after they are created, in
  1172  		// which case we'd never see their output synced back ever.
  1173  		if lc.DeferCheck != nil {
  1174  			lcCopy.Output = ""
  1175  			rcCopy.Output = ""
  1176  		}
  1177  		lc.InSync = lcCopy.IsSame(rcCopy)
  1178  	}
  1179  	return nil
  1180  }
  1181  
  1182  // SyncFull determines the delta between the local and remote state
  1183  // and synchronizes the changes.
  1184  func (l *State) SyncFull() error {
  1185  	// note that we do not acquire the lock here since the methods
  1186  	// we are calling will do that themselves.
  1187  	//
  1188  	// Also note that we don't hold the lock for the entire operation
  1189  	// but release it between the two calls. This is not an issue since
  1190  	// the algorithm is best-effort to achieve eventual consistency.
  1191  	// SyncChanges will sync whatever updateSyncState() has determined
  1192  	// needs updating.
  1193  
  1194  	if err := l.updateSyncState(); err != nil {
  1195  		return err
  1196  	}
  1197  	return l.SyncChanges()
  1198  }
  1199  
  1200  // SyncChanges pushes checks, services and node info data which has been
  1201  // marked out of sync or deleted to the server.
  1202  func (l *State) SyncChanges() error {
  1203  	l.Lock()
  1204  	defer l.Unlock()
  1205  
  1206  	// We will do node-level info syncing at the end, since it will get
  1207  	// updated by a service or check sync anyway, given how the register
  1208  	// API works.
  1209  
  1210  	// Sync the services
  1211  	// (logging happens in the helper methods)
  1212  	for id, s := range l.services {
  1213  		var err error
  1214  		switch {
  1215  		case s.Deleted:
  1216  			err = l.deleteService(id)
  1217  		case !s.InSync:
  1218  			err = l.syncService(id)
  1219  		default:
  1220  			l.logger.Printf("[DEBUG] agent: Service %q in sync", id)
  1221  		}
  1222  		if err != nil {
  1223  			return err
  1224  		}
  1225  	}
  1226  
  1227  	// Sync the checks
  1228  	// (logging happens in the helper methods)
  1229  	for id, c := range l.checks {
  1230  		var err error
  1231  		switch {
  1232  		case c.Deleted:
  1233  			err = l.deleteCheck(id)
  1234  		case !c.InSync:
  1235  			if c.DeferCheck != nil {
  1236  				c.DeferCheck.Stop()
  1237  				c.DeferCheck = nil
  1238  			}
  1239  			err = l.syncCheck(id)
  1240  		default:
  1241  			l.logger.Printf("[DEBUG] agent: Check %q in sync", id)
  1242  		}
  1243  		if err != nil {
  1244  			return err
  1245  		}
  1246  	}
  1247  
  1248  	// Now sync the node level info if we need to, and didn't do any of
  1249  	// the other sync operations.
  1250  	if l.nodeInfoInSync {
  1251  		l.logger.Printf("[DEBUG] agent: Node info in sync")
  1252  		return nil
  1253  	}
  1254  	return l.syncNodeInfo()
  1255  }
  1256  
  1257  // deleteService is used to delete a service from the server
  1258  func (l *State) deleteService(id string) error {
  1259  	if id == "" {
  1260  		return fmt.Errorf("ServiceID missing")
  1261  	}
  1262  
  1263  	req := structs.DeregisterRequest{
  1264  		Datacenter:   l.config.Datacenter,
  1265  		Node:         l.config.NodeName,
  1266  		ServiceID:    id,
  1267  		WriteRequest: structs.WriteRequest{Token: l.serviceToken(id)},
  1268  	}
  1269  	var out struct{}
  1270  	err := l.Delegate.RPC("Catalog.Deregister", &req, &out)
  1271  	switch {
  1272  	case err == nil || strings.Contains(err.Error(), "Unknown service"):
  1273  		delete(l.services, id)
  1274  		l.logger.Printf("[INFO] agent: Deregistered service %q", id)
  1275  		return nil
  1276  
  1277  	case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err):
  1278  		// todo(fs): mark the service to be in sync to prevent excessive retrying before next full sync
  1279  		// todo(fs): some backoff strategy might be a better solution
  1280  		l.services[id].InSync = true
  1281  		l.logger.Printf("[WARN] agent: Service %q deregistration blocked by ACLs", id)
  1282  		metrics.IncrCounter([]string{"acl", "blocked", "service", "deregistration"}, 1)
  1283  		return nil
  1284  
  1285  	default:
  1286  		l.logger.Printf("[WARN] agent: Deregistering service %q failed. %s", id, err)
  1287  		return err
  1288  	}
  1289  }
  1290  
  1291  // deleteCheck is used to delete a check from the server
  1292  func (l *State) deleteCheck(id types.CheckID) error {
  1293  	if id == "" {
  1294  		return fmt.Errorf("CheckID missing")
  1295  	}
  1296  
  1297  	req := structs.DeregisterRequest{
  1298  		Datacenter:   l.config.Datacenter,
  1299  		Node:         l.config.NodeName,
  1300  		CheckID:      id,
  1301  		WriteRequest: structs.WriteRequest{Token: l.checkToken(id)},
  1302  	}
  1303  	var out struct{}
  1304  	err := l.Delegate.RPC("Catalog.Deregister", &req, &out)
  1305  	switch {
  1306  	case err == nil || strings.Contains(err.Error(), "Unknown check"):
  1307  		c := l.checks[id]
  1308  		if c != nil && c.DeferCheck != nil {
  1309  			c.DeferCheck.Stop()
  1310  		}
  1311  		delete(l.checks, id)
  1312  		l.logger.Printf("[INFO] agent: Deregistered check %q", id)
  1313  		return nil
  1314  
  1315  	case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err):
  1316  		// todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync
  1317  		// todo(fs): some backoff strategy might be a better solution
  1318  		l.checks[id].InSync = true
  1319  		l.logger.Printf("[WARN] agent: Check %q deregistration blocked by ACLs", id)
  1320  		metrics.IncrCounter([]string{"acl", "blocked", "check", "deregistration"}, 1)
  1321  		return nil
  1322  
  1323  	default:
  1324  		l.logger.Printf("[WARN] agent: Deregistering check %q failed. %s", id, err)
  1325  		return err
  1326  	}
  1327  }
  1328  
  1329  // syncService is used to sync a service to the server
  1330  func (l *State) syncService(id string) error {
  1331  	// If the service has associated checks that are out of sync,
  1332  	// piggyback them on the service sync so they are part of the
  1333  	// same transaction and are registered atomically. We only let
  1334  	// checks ride on service registrations with the same token,
  1335  	// otherwise we need to register them separately so they don't
  1336  	// pick up privileges from the service token.
  1337  	var checks structs.HealthChecks
  1338  	for checkID, c := range l.checks {
  1339  		if c.Deleted || c.InSync {
  1340  			continue
  1341  		}
  1342  		if c.Check.ServiceID != id {
  1343  			continue
  1344  		}
  1345  		if l.serviceToken(id) != l.checkToken(checkID) {
  1346  			continue
  1347  		}
  1348  		checks = append(checks, c.Check)
  1349  	}
  1350  
  1351  	req := structs.RegisterRequest{
  1352  		Datacenter:      l.config.Datacenter,
  1353  		ID:              l.config.NodeID,
  1354  		Node:            l.config.NodeName,
  1355  		Address:         l.config.AdvertiseAddr,
  1356  		TaggedAddresses: l.config.TaggedAddresses,
  1357  		NodeMeta:        l.metadata,
  1358  		Service:         l.services[id].Service,
  1359  		WriteRequest:    structs.WriteRequest{Token: l.serviceToken(id)},
  1360  	}
  1361  
  1362  	// Backwards-compatibility for Consul < 0.5
  1363  	if len(checks) == 1 {
  1364  		req.Check = checks[0]
  1365  	} else {
  1366  		req.Checks = checks
  1367  	}
  1368  
  1369  	var out struct{}
  1370  	err := l.Delegate.RPC("Catalog.Register", &req, &out)
  1371  	switch {
  1372  	case err == nil:
  1373  		l.services[id].InSync = true
  1374  		// Given how the register API works, this info is also updated
  1375  		// every time we sync a service.
  1376  		l.nodeInfoInSync = true
  1377  		for _, check := range checks {
  1378  			l.checks[check.CheckID].InSync = true
  1379  		}
  1380  		l.logger.Printf("[INFO] agent: Synced service %q", id)
  1381  		return nil
  1382  
  1383  	case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err):
  1384  		// todo(fs): mark the service and the checks to be in sync to prevent excessive retrying before next full sync
  1385  		// todo(fs): some backoff strategy might be a better solution
  1386  		l.services[id].InSync = true
  1387  		for _, check := range checks {
  1388  			l.checks[check.CheckID].InSync = true
  1389  		}
  1390  		l.logger.Printf("[WARN] agent: Service %q registration blocked by ACLs", id)
  1391  		metrics.IncrCounter([]string{"acl", "blocked", "service", "registration"}, 1)
  1392  		return nil
  1393  
  1394  	default:
  1395  		l.logger.Printf("[WARN] agent: Syncing service %q failed. %s", id, err)
  1396  		return err
  1397  	}
  1398  }
  1399  
  1400  // syncCheck is used to sync a check to the server
  1401  func (l *State) syncCheck(id types.CheckID) error {
  1402  	c := l.checks[id]
  1403  
  1404  	req := structs.RegisterRequest{
  1405  		Datacenter:      l.config.Datacenter,
  1406  		ID:              l.config.NodeID,
  1407  		Node:            l.config.NodeName,
  1408  		Address:         l.config.AdvertiseAddr,
  1409  		TaggedAddresses: l.config.TaggedAddresses,
  1410  		NodeMeta:        l.metadata,
  1411  		Check:           c.Check,
  1412  		WriteRequest:    structs.WriteRequest{Token: l.checkToken(id)},
  1413  	}
  1414  
  1415  	// Pull in the associated service if any
  1416  	s := l.services[c.Check.ServiceID]
  1417  	if s != nil && !s.Deleted {
  1418  		req.Service = s.Service
  1419  	}
  1420  
  1421  	var out struct{}
  1422  	err := l.Delegate.RPC("Catalog.Register", &req, &out)
  1423  	switch {
  1424  	case err == nil:
  1425  		l.checks[id].InSync = true
  1426  		// Given how the register API works, this info is also updated
  1427  		// every time we sync a check.
  1428  		l.nodeInfoInSync = true
  1429  		l.logger.Printf("[INFO] agent: Synced check %q", id)
  1430  		return nil
  1431  
  1432  	case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err):
  1433  		// todo(fs): mark the check to be in sync to prevent excessive retrying before next full sync
  1434  		// todo(fs): some backoff strategy might be a better solution
  1435  		l.checks[id].InSync = true
  1436  		l.logger.Printf("[WARN] agent: Check %q registration blocked by ACLs", id)
  1437  		metrics.IncrCounter([]string{"acl", "blocked", "check", "registration"}, 1)
  1438  		return nil
  1439  
  1440  	default:
  1441  		l.logger.Printf("[WARN] agent: Syncing check %q failed. %s", id, err)
  1442  		return err
  1443  	}
  1444  }
  1445  
  1446  func (l *State) syncNodeInfo() error {
  1447  	req := structs.RegisterRequest{
  1448  		Datacenter:      l.config.Datacenter,
  1449  		ID:              l.config.NodeID,
  1450  		Node:            l.config.NodeName,
  1451  		Address:         l.config.AdvertiseAddr,
  1452  		TaggedAddresses: l.config.TaggedAddresses,
  1453  		NodeMeta:        l.metadata,
  1454  		WriteRequest:    structs.WriteRequest{Token: l.tokens.AgentToken()},
  1455  	}
  1456  	var out struct{}
  1457  	err := l.Delegate.RPC("Catalog.Register", &req, &out)
  1458  	switch {
  1459  	case err == nil:
  1460  		l.nodeInfoInSync = true
  1461  		l.logger.Printf("[INFO] agent: Synced node info")
  1462  		return nil
  1463  
  1464  	case acl.IsErrPermissionDenied(err), acl.IsErrNotFound(err):
  1465  		// todo(fs): mark the node info to be in sync to prevent excessive retrying before next full sync
  1466  		// todo(fs): some backoff strategy might be a better solution
  1467  		l.nodeInfoInSync = true
  1468  		l.logger.Printf("[WARN] agent: Node info update blocked by ACLs")
  1469  		metrics.IncrCounter([]string{"acl", "blocked", "node", "registration"}, 1)
  1470  		return nil
  1471  
  1472  	default:
  1473  		l.logger.Printf("[WARN] agent: Syncing node info failed. %s", err)
  1474  		return err
  1475  	}
  1476  }